Codebase list osrframework / a008289 osrframework / utils / browser.py
a008289

Tree @a008289 (Download .tar.gz)

browser.py @a008289raw · history · blame

################################################################################
#
#    Copyright 2015-2020 FĂ©lix Brezo and Yaiza Rubio
#
#    This program is part of OSRFramework. You can redistribute it and/or modify
#    it under the terms of the GNU Affero General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU Affero General Public License for more details.
#
#    You should have received a copy of the GNU Affero General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
################################################################################

from configparser import ConfigParser
import os
import random

import requests

import osrframework.utils.configuration as configuration


class Browser():
    """Utility used to code a Browser and to wrap the requests methods.

    Attributes:
        auth (tuple): The username and password authentication.
        proxies (list): A list of proxies.
        timeout (int): The number of seconds to wait until timeout.
        user_agents (list): The list of User Agents recognised for this browser.
    """
    def __init__(self):
        """Recovering an instance of a new Browser"""
        self.auth = None
        self.user_agents = []
        self.proxies = {}
        self.timeout = 2

        # Trying to read the configuration
        # --------------------------------
        # If a current.cfg has not been found, creating it by copying from default
        config_path = os.path.join(configuration.get_config_path()["appPath"], "browser.cfg")

        # Checking if the configuration file exists
        if not os.path.exists(config_path):
            try:
                # Copy the data from the default folder
                default_config_path = os.path.join(configuration.get_config_path()["appPathDefaults"], "browser.cfg")

                with open(default_config_path) as file:
                    cont = file.read()
                    with open(config_path, "w") as output_file:
                        output_file.write(cont)
            except Exception:
                print("WARNING. No configuration file could be found and the default file was not found either, so configuration will be set as default.")
                print(str(e))
                print()
                # Storing configuration as default
                self.user_agents = ['Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/55.0.2883.87 Chrome/55.0.2883.87 Safari/537.36']
                self.proxies = {}

                return None

        # Reading the configuration file
        config = ConfigParser()
        config.read(config_path)

        proxy = {}

        # Iterating through all the sections, which contain the platforms
        for conf in config.sections():
            if conf == "Browser":
                # Iterating through parametgers
                for (param, value) in config.items(conf):
                    if param == "user_agent":
                        if value != '':
                            self.user_agents.append(value)
                        else:
                            self.user_agents = ['Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/55.0.2883.87 Chrome/55.0.2883.87 Safari/537.36']
                    if param == "timeout":
                        try:
                            self.timeout = int(value)
                        except:
                            self.timeout = 2
            else:
                proxy[conf] = {}
                # Iterating through parameters
                for (param, value) in config.items(conf):
                    if value != '':
                        proxy[conf][param] = value

        # Configuring the proxy as it will be used by br.set_proxies
        for p in proxy.keys():
            # p ~= ProxyHTTP --> Protocol = p.lower()[5:]
            try:
                # Adding credentials if they exist
                self.proxies[ p.lower()[5:] ] = proxy[p]["username"] + ":" + proxy[p]["password"]  + "@" + proxy[p]["host"] + ":" + proxy[p]["port"]
            except:
                try:
                    self.proxies[ p.lower()[5:] ] = proxy[p]["host"] + ":" + proxy[p]["port"]
                except:
                    # We are not adding this protocol to be proxied
                    pass

    def recover_url(self, url):
        """Public method to recover a resource.

        Args:
            url (str): The URL to be collected.

        Returns:
            Returns a resource that has to be read, for instance, with
                html = self.br.read()
        """
        headers = {
            "User-Agent": self.getUserAgent(),
        }

        # Opening the resource
        try:
            r = requests.get(
                url,
                headers=headers,
                auth=self.auth
            )
            return r.text
        except Exception:
            # Something happened. Maybe the request was forbidden?
            return None

    def setNewPassword(self, username, password):
        """Public method to manually set the credentials for a url in the browser

        Args:
            username (str): The username of the session.
            password (str): The password of the session.
        """
        self.auth = (username, password)

    def getUserAgent(self):
        """This method will be called whenever a new query will be executed

        Returns:
            Returns a string with the User Agent.
        """
        if self.user_agents:
            # User-Agent (this is cheating, ok?)
            return random.choice(self.user_agents)
        else:
            return "Python3"