Codebase list python-googlesearch / e5d9777 scripts / google
e5d9777

Tree @e5d9777 (Download .tar.gz)

google @e5d9777raw · history · blame

#!/usr/bin/env python

# Python bindings to the Google search engine
# Copyright (c) 2009-2019, Mario Vilas
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
#     * Redistributions of source code must retain the above copyright notice,
#       this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above copyright
#       notice,this list of conditions and the following disclaimer in the
#       documentation and/or other materials provided with the distribution.
#     * Neither the name of the copyright holder nor the names of its
#       contributors may be used to endorse or promote products derived from
#       this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

import sys

from googlesearch import search, get_random_user_agent

# TODO port to argparse
from optparse import OptionParser, IndentedHelpFormatter


class BannerHelpFormatter(IndentedHelpFormatter):

    "Just a small tweak to optparse to be able to print a banner."

    def __init__(self, banner, *argv, **argd):
        self.banner = banner
        IndentedHelpFormatter.__init__(self, *argv, **argd)

    def format_usage(self, usage):
        msg = IndentedHelpFormatter.format_usage(self, usage)
        return '%s\n%s' % (self.banner, msg)


def main():

    # Parse the command line arguments.
    formatter = BannerHelpFormatter(
        "Python script to use the Google search engine\n"
        "By Mario Vilas (mvilas at gmail dot com)\n"
        "https://github.com/MarioVilas/googlesearch\n"
    )
    parser = OptionParser(formatter=formatter)
    parser.set_usage("%prog [options] query")
    parser.add_option(
        '--tld', metavar='TLD', type='string', default='com',
        help="top level domain to use [default: com]")
    parser.add_option(
        '--lang', metavar='LANGUAGE', type='string', default='en',
        help="produce results in the given language [default: en]")
    parser.add_option(
        '--domains', metavar='DOMAINS', type='string', default='',
        help="comma separated list of domains to constrain the search to")
    parser.add_option(
        '--tbs', metavar='TBS', type='string', default='0',
        help="produce results from period [default: 0]")
    parser.add_option(
        '--safe', metavar='SAFE', type='string', default='off',
        help="kids safe search [default: off]")
    parser.add_option(
        '--type', metavar='TYPE', type='string', default='search', dest='tpe',
        help="search type (search, images, videos, news, shopping, books,"
        " apps) [default: search]")
    parser.add_option(
        '--country', metavar='COUNTRY', type='string', default='',
        help="region to restrict search on [default: not restricted]")
    parser.add_option(
        '--num', metavar='NUMBER', type='int', default=10,
        help="number of results per page [default: 10]")
    parser.add_option(
        '--start', metavar='NUMBER', type='int', default=0,
        help="first result to retrieve [default: 0]")
    parser.add_option(
        '--stop', metavar='NUMBER', type='int', default=0,
        help="last result to retrieve [default: unlimited]")
    parser.add_option(
        '--pause', metavar='SECONDS', type='float', default=2.0,
        help="pause between HTTP requests [default: 2.0]")
    parser.add_option(
        '--rua', metavar='USERAGENT', action='store_true', default=False,
        help="Randomize the User-Agent [default: no]")
    (options, args) = parser.parse_args()
    query = ' '.join(args)
    if not query:
        parser.print_help()
        sys.exit(2)
    params = [
        (k, v) for (k, v) in options.__dict__.items()
        if not k.startswith('_')]
    params = dict(params)

    # Split the comma separated list of domains, if present.
    if 'domains' in params:
        params['domains'] = [x.strip() for x in params['domains'].split(',')]

    # Use a special search type if requested.
    if 'tpe' in params:
        tpe = params['tpe']
        if tpe and tpe not in (
                'search', 'images', 'videos', 'news',
                'shopping', 'books', 'apps'):
            parser.error("invalid type: %r" % tpe)
        if tpe == 'search':
            params['tpe'] = ''

    # Randomize the user agent if requested.
    if 'rua' in params and params.pop('rua'):
        params['user_agent'] = get_random_user_agent()

    # Run the query.
    for url in search(query, **params):
        print(url)
        try:
            sys.stdout.flush()
        except Exception:
            pass


if __name__ == '__main__':
    main()