Codebase list python-googlesearch / 0ab1a17
New upstream release. Kali Janitor 1 year, 5 months ago
6 changed file(s) with 36 addition(s) and 138 deletion(s). Raw diff Collapse all Expand all
00 Metadata-Version: 1.1
11 Name: google
2 Version: 2.0.3
2 Version: 3.0.0
33 Summary: Python bindings to the Google search engine.
44 Home-page: http://breakingcode.wordpress.com/
55 Author: Mario Vilas
0 python-googlesearch (3.0.0-0kali1) UNRELEASED; urgency=low
1
2 * New upstream release.
3
4 -- Kali Janitor <[email protected]> Sun, 04 Dec 2022 00:55:38 -0000
5
06 python-googlesearch (2.0.3-0kali1) kali-dev; urgency=medium
17
28 [ Sophie Brun ]
00 Metadata-Version: 1.1
11 Name: google
2 Version: 2.0.3
2 Version: 3.0.0
33 Summary: Python bindings to the Google search engine.
44 Home-page: http://breakingcode.wordpress.com/
55 Author: Mario Vilas
00 #!/usr/bin/env python
11
2 # Python bindings to the Google search engine
3 # Copyright (c) 2009-2019, Mario Vilas
2 # Copyright (c) 2009-2020, Mario Vilas
43 # All rights reserved.
54 #
65 # Redistribution and use in source and binary forms, with or without
3130 import random
3231 import sys
3332 import time
33 import ssl
3434
3535 if sys.version_info[0] > 2:
3636 from http.cookiejar import LWPCookieJar
5454 # Main search function.
5555 'search',
5656
57 # Specialized search functions.
58 'search_images', 'search_news',
59 'search_videos', 'search_shop',
60 'search_books', 'search_apps',
61
6257 # Shortcut for "get lucky" search.
6358 'lucky',
6459
6964 # URL templates to make Google searches.
7065 url_home = "https://www.google.%(tld)s/"
7166 url_search = "https://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&" \
72 "btnG=Google+Search&tbs=%(tbs)s&safe=%(safe)s&tbm=%(tpe)s&" \
67 "btnG=Google+Search&tbs=%(tbs)s&safe=%(safe)s&" \
7368 "cr=%(country)s"
7469 url_next_page = "https://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&" \
75 "start=%(start)d&tbs=%(tbs)s&safe=%(safe)s&tbm=%(tpe)s&" \
70 "start=%(start)d&tbs=%(tbs)s&safe=%(safe)s&" \
7671 "cr=%(country)s"
7772 url_search_num = "https://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&" \
7873 "num=%(num)d&btnG=Google+Search&tbs=%(tbs)s&safe=%(safe)s&" \
79 "tbm=%(tpe)s&cr=%(country)s"
74 "cr=%(country)s"
8075 url_next_page_num = "https://www.google.%(tld)s/search?hl=%(lang)s&" \
8176 "q=%(query)s&num=%(num)d&start=%(start)d&tbs=%(tbs)s&" \
82 "safe=%(safe)s&tbm=%(tpe)s&cr=%(country)s"
77 "safe=%(safe)s&cr=%(country)s"
8378 url_parameters = (
84 'hl', 'q', 'num', 'btnG', 'start', 'tbs', 'safe', 'tbm', 'cr')
79 'hl', 'q', 'num', 'btnG', 'start', 'tbs', 'safe', 'cr')
8580
8681 # Cookie jar. Stored at the user's home folder.
8782 # If the cookie jar is inaccessible, the errors are ignored.
152147
153148 # Request the given URL and return the response page, using the cookie jar.
154149 # If the cookie jar is inaccessible, the errors are ignored.
155 def get_page(url, user_agent=None):
150 def get_page(url, user_agent=None, verify_ssl=True):
156151 """
157152 Request the given URL and return the response page, using the cookie jar.
158153
159154 :param str url: URL to retrieve.
160155 :param str user_agent: User agent for the HTTP requests.
161156 Use None for the default.
157 :param bool verify_ssl: Verify the SSL certificate to prevent
158 traffic interception attacks. Defaults to True.
162159
163160 :rtype: str
164161 :return: Web page retrieved for the given URL.
172169 request = Request(url)
173170 request.add_header('User-Agent', user_agent)
174171 cookie_jar.add_cookie_header(request)
175 response = urlopen(request)
172 if verify_ssl:
173 response = urlopen(request)
174 else:
175 context = ssl._create_unverified_context()
176 response = urlopen(request, context=context)
176177 cookie_jar.extract_cookies(response, request)
177178 html = response.read()
178179 response.close()
207208
208209 # Returns a generator that yields URLs.
209210 def search(query, tld='com', lang='en', tbs='0', safe='off', num=10, start=0,
210 stop=None, domains=None, pause=2.0, tpe='', country='',
211 extra_params=None, user_agent=None):
211 stop=None, pause=2.0, country='', extra_params=None,
212 user_agent=None, verify_ssl=True):
212213 """
213214 Search the given query string using Google.
214215
222223 :param int start: First result to retrieve.
223224 :param int stop: Last result to retrieve.
224225 Use None to keep searching forever.
225 :param list domains: A list of web domains to constrain
226 the search.
227226 :param float pause: Lapse to wait between HTTP requests.
228227 A lapse too long will make the search slow, but a lapse too short may
229228 cause Google to block your IP. Your mileage may vary!
230 :param str tpe: Search type (images, videos, news, shopping, books, apps)
231 Use the following values {videos: 'vid', images: 'isch',
232 news: 'nws', shopping: 'shop', books: 'bks', applications: 'app'}
233229 :param str country: Country or region to focus the search on. Similar to
234230 changing the TLD, but does not yield exactly the same results.
235231 Only Google knows why...
239235 {'filter': '0'} which will append '&filter=0' to every query.
240236 :param str user_agent: User agent for the HTTP requests.
241237 Use None for the default.
238 :param bool verify_ssl: Verify the SSL certificate to prevent
239 traffic interception attacks. Defaults to True.
242240
243241 :rtype: generator of str
244242 :return: Generator (iterator) that yields found URLs.
250248
251249 # Count the number of links yielded.
252250 count = 0
253
254 # Prepare domain list if it exists.
255 if domains:
256 query = query + ' ' + ' OR '.join(
257 'site:' + domain for domain in domains)
258251
259252 # Prepare the search string.
260253 query = quote_plus(query)
275268 )
276269
277270 # Grab the cookie from the home page.
278 get_page(url_home % vars(), user_agent)
271 get_page(url_home % vars(), user_agent, verify_ssl)
279272
280273 # Prepare the URL of the first request.
281274 if start:
308301 time.sleep(pause)
309302
310303 # Request the Google Search results page.
311 html = get_page(url, user_agent)
304 html = get_page(url, user_agent, verify_ssl)
312305
313306 # Parse the response and get every anchored URL.
314307 if is_bs4:
368361 url = url_next_page_num % vars()
369362
370363
371 # Shortcut to search images.
372 # Beware, this does not return the image link.
373 def search_images(*args, **kwargs):
374 """
375 Shortcut to search images.
376
377 Same arguments and return value as the main search function.
378
379 :note: Beware, this does not return the image link.
380 """
381 kwargs['tpe'] = 'isch'
382 return search(*args, **kwargs)
383
384
385 # Shortcut to search news.
386 def search_news(*args, **kwargs):
387 """
388 Shortcut to search news.
389
390 Same arguments and return value as the main search function.
391 """
392 kwargs['tpe'] = 'nws'
393 return search(*args, **kwargs)
394
395
396 # Shortcut to search videos.
397 def search_videos(*args, **kwargs):
398 """
399 Shortcut to search videos.
400
401 Same arguments and return value as the main search function.
402 """
403 kwargs['tpe'] = 'vid'
404 return search(*args, **kwargs)
405
406
407 # Shortcut to search shop.
408 def search_shop(*args, **kwargs):
409 """
410 Shortcut to search shop.
411
412 Same arguments and return value as the main search function.
413 """
414 kwargs['tpe'] = 'shop'
415 return search(*args, **kwargs)
416
417
418 # Shortcut to search books.
419 def search_books(*args, **kwargs):
420 """
421 Shortcut to search books.
422
423 Same arguments and return value as the main search function.
424 """
425 kwargs['tpe'] = 'bks'
426 return search(*args, **kwargs)
427
428
429 # Shortcut to search apps.
430 def search_apps(*args, **kwargs):
431 """
432 Shortcut to search apps.
433
434 Same arguments and return value as the main search function.
435 """
436 kwargs['tpe'] = 'app'
437 return search(*args, **kwargs)
438
439
440364 # Shortcut to single-item search.
441365 # Evaluates the iterator to return the single URL as a string.
442366 def lucky(*args, **kwargs):
00 #!/usr/bin/env python
11
2 # Python bindings to the Google search engine
3 # Copyright (c) 2009-2019, Mario Vilas
2 # Copyright (c) 2009-2020, Mario Vilas
43 # All rights reserved.
54 #
65 # Redistribution and use in source and binary forms, with or without
6564 '--lang', metavar='LANGUAGE', type='string', default='en',
6665 help="produce results in the given language [default: en]")
6766 parser.add_option(
68 '--domains', metavar='DOMAINS', type='string', default='',
69 help="comma separated list of domains to constrain the search to")
70 parser.add_option(
7167 '--tbs', metavar='TBS', type='string', default='0',
7268 help="produce results from period [default: 0]")
7369 parser.add_option(
7470 '--safe', metavar='SAFE', type='string', default='off',
7571 help="kids safe search [default: off]")
76 parser.add_option(
77 '--type', metavar='TYPE', type='string', default='search', dest='tpe',
78 help="search type (search, images, videos, news, shopping, books,"
79 " apps) [default: search]")
8072 parser.add_option(
8173 '--country', metavar='COUNTRY', type='string', default='',
8274 help="region to restrict search on [default: not restricted]")
9385 '--pause', metavar='SECONDS', type='float', default=2.0,
9486 help="pause between HTTP requests [default: 2.0]")
9587 parser.add_option(
96 '--rua', metavar='USERAGENT', action='store_true', default=False,
88 '--rua', action='store_true', default=False,
89 help="Randomize the User-Agent [default: no]")
90 parser.add_option(
91 '--insecure', dest="verify_ssl", action='store_false', default=True,
9792 help="Randomize the User-Agent [default: no]")
9893 (options, args) = parser.parse_args()
9994 query = ' '.join(args)
10499 (k, v) for (k, v) in options.__dict__.items()
105100 if not k.startswith('_')]
106101 params = dict(params)
107
108 # Split the comma separated list of domains, if present.
109 if 'domains' in params:
110 params['domains'] = [x.strip() for x in params['domains'].split(',')]
111
112 # Use a special search type if requested.
113 if 'tpe' in params:
114 tpe = params['tpe']
115 if tpe and tpe not in (
116 'search', 'images', 'videos', 'news',
117 'shopping', 'books', 'apps'):
118 parser.error("invalid type: %r" % tpe)
119 if tpe == 'search':
120 params['tpe'] = ''
121102
122103 # Randomize the user agent if requested.
123104 if 'rua' in params and params.pop('rua'):
00 #!/usr/bin/env python
11
2 # Copyright (c) 2009-2019, Mario Vilas
2 # Copyright (c) 2009-2020, Mario Vilas
33 # All rights reserved.
44 #
55 # Redistribution and use in source and binary forms, with or without
4343 scripts=[join('scripts', 'google')],
4444 package_data={'googlesearch': ['user_agents.txt.gz']},
4545 include_package_data=True,
46 version="2.0.3",
46 version="3.0.0",
4747 description="Python bindings to the Google search engine.",
4848 author="Mario Vilas",
4949 author_email="[email protected]",
6565 metadata['install_requires'] = metadata['requires']
6666 except ImportError:
6767 from distutils.core import setup
68 """
69 # Get the long description from the readme file.
70 try:
71 metadata['long_description'] = open(join(here, 'README.md'), 'rU').read()
72 except Exception:
73 pass
7468
75 # If twine is installed, set the long description content type.
76 try:
77 import twine
78 metadata['long_description_content_type'] = 'text/markdown'
79 except ImportError:
80 pass
81 """
8269 # Run the setup script.
8370 setup(**metadata)