New upstream version 1.1.3
Sophie Brun
2 years ago
0 | # Byte-compiled / optimized / DLL files | |
1 | __pycache__/ | |
2 | *.py[cod] | |
3 | *$py.class | |
4 | ||
5 | # C extensions | |
6 | *.so | |
7 | ||
8 | # Distribution / packaging | |
9 | .Python | |
10 | build/ | |
11 | develop-eggs/ | |
12 | dist/ | |
13 | downloads/ | |
14 | eggs/ | |
15 | .eggs/ | |
16 | lib/ | |
17 | lib64/ | |
18 | parts/ | |
19 | sdist/ | |
20 | var/ | |
21 | wheels/ | |
22 | share/python-wheels/ | |
23 | *.egg-info/ | |
24 | .installed.cfg | |
25 | *.egg | |
26 | MANIFEST | |
27 | ||
28 | # PyInstaller | |
29 | # Usually these files are written by a python script from a template | |
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. | |
31 | *.manifest | |
32 | *.spec | |
33 | ||
34 | # Installer logs | |
35 | pip-log.txt | |
36 | pip-delete-this-directory.txt | |
37 | ||
38 | # Unit test / coverage reports | |
39 | htmlcov/ | |
40 | .tox/ | |
41 | .nox/ | |
42 | .coverage | |
43 | .coverage.* | |
44 | .cache | |
45 | nosetests.xml | |
46 | coverage.xml | |
47 | *.cover | |
48 | *.py,cover | |
49 | .hypothesis/ | |
50 | .pytest_cache/ | |
51 | cover/ | |
52 | ||
53 | # Translations | |
54 | *.mo | |
55 | *.pot | |
56 | ||
57 | # Django stuff: | |
58 | *.log | |
59 | local_settings.py | |
60 | db.sqlite3 | |
61 | db.sqlite3-journal | |
62 | ||
63 | # Flask stuff: | |
64 | instance/ | |
65 | .webassets-cache | |
66 | ||
67 | # Scrapy stuff: | |
68 | .scrapy | |
69 | ||
70 | # Sphinx documentation | |
71 | docs/_build/ | |
72 | ||
73 | # PyBuilder | |
74 | .pybuilder/ | |
75 | target/ | |
76 | ||
77 | # Jupyter Notebook | |
78 | .ipynb_checkpoints | |
79 | ||
80 | # IPython | |
81 | profile_default/ | |
82 | ipython_config.py | |
83 | ||
84 | # pyenv | |
85 | # For a library or package, you might want to ignore these files since the code is | |
86 | # intended to run in multiple environments; otherwise, check them in: | |
87 | # .python-version | |
88 | ||
89 | # pipenv | |
90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. | |
91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies | |
92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not | |
93 | # install all needed dependencies. | |
94 | #Pipfile.lock | |
95 | ||
96 | # poetry | |
97 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. | |
98 | # This is especially recommended for binary packages to ensure reproducibility, and is more | |
99 | # commonly ignored for libraries. | |
100 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control | |
101 | #poetry.lock | |
102 | ||
103 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow | |
104 | __pypackages__/ | |
105 | ||
106 | # Celery stuff | |
107 | celerybeat-schedule | |
108 | celerybeat.pid | |
109 | ||
110 | # SageMath parsed files | |
111 | *.sage.py | |
112 | ||
113 | # Environments | |
114 | .env | |
115 | .venv | |
116 | env/ | |
117 | venv/ | |
118 | ENV/ | |
119 | env.bak/ | |
120 | venv.bak/ | |
121 | ||
122 | # Spyder project settings | |
123 | .spyderproject | |
124 | .spyproject | |
125 | ||
126 | # Rope project settings | |
127 | .ropeproject | |
128 | ||
129 | # mkdocs documentation | |
130 | /site | |
131 | ||
132 | # mypy | |
133 | .mypy_cache/ | |
134 | .dmypy.json | |
135 | dmypy.json | |
136 | ||
137 | # Pyre type checker | |
138 | .pyre/ | |
139 | ||
140 | # pytype static type analyzer | |
141 | .pytype/ | |
142 | ||
143 | # Cython debug symbols | |
144 | cython_debug/ | |
145 | ||
146 | # PyCharm | |
147 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can | |
148 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore | |
149 | # and can be added to the global gitignore or merged into this file. For a more nuclear | |
150 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. | |
151 | .idea/ |
2 | 2 | import os |
3 | 3 | import sys |
4 | 4 | import atexit |
5 | import importlib.util | |
6 | ||
7 | R = '\033[31m' # red | |
8 | G = '\033[32m' # green | |
9 | C = '\033[36m' # cyan | |
10 | W = '\033[0m' # white | |
5 | ||
6 | R = '\033[31m' # red | |
7 | G = '\033[32m' # green | |
8 | C = '\033[36m' # cyan | |
9 | W = '\033[0m' # white | |
11 | 10 | |
12 | 11 | home = os.getenv('HOME') |
13 | 12 | pid_path = home + '/.local/share/finalrecon/finalrecon.pid' |
15 | 14 | conf_path = home + '/.config/finalrecon' |
16 | 15 | path_to_script = os.path.dirname(os.path.realpath(__file__)) |
17 | 16 | src_conf_path = path_to_script + '/conf/' |
17 | meta_file_path = path_to_script + '/metadata.json' | |
18 | 18 | fail = False |
19 | 19 | |
20 | 20 | if os.path.isfile(pid_path): |
21 | print(R + '[-]' + C + ' One instance of FinalRecon is already running!' + W) | |
21 | print(f'{R}[-] {C}One instance of FinalRecon is already running!{W}') | |
22 | 22 | with open(pid_path, 'r') as pidfile: |
23 | 23 | pid = pidfile.read() |
24 | print(G + '[+]' + C + ' PID : ' + W + str(pid)) | |
25 | print(G + '[>]' + C + ' If FinalRecon crashed, execute : ' + W + 'rm {}'.format(pid_path)) | |
26 | sys.exit() | |
24 | print(f'{G}[+] {C}PID :{W} {str(pid)}') | |
25 | print(f'{G}[>] {C}If FinalRecon crashed, execute :{W} rm {pid_path}') | |
26 | sys.exit(1) | |
27 | 27 | else: |
28 | 28 | os.makedirs(os.path.dirname(pid_path), exist_ok=True) |
29 | 29 | with open(pid_path, 'w') as pidfile: |
35 | 35 | import shutil |
36 | 36 | shutil.copytree(src_conf_path, conf_path, dirs_exist_ok=True) |
37 | 37 | |
38 | with open(path_to_script + '/requirements.txt', 'r') as rqr: | |
39 | pkg_list = rqr.read().strip().split('\n') | |
40 | ||
41 | print('\n' + G + '[+]' + C + ' Checking Dependencies...' + W + '\n') | |
42 | ||
43 | for pkg in pkg_list: | |
44 | spec = importlib.util.find_spec(pkg) | |
45 | if spec is None: | |
46 | print(R + '[-]' + W + ' {}'.format(pkg) + C + ' is not Installed!' + W) | |
47 | fail = True | |
48 | else: | |
49 | pass | |
50 | if fail == True: | |
51 | print('\n' + R + '[-]' + C + ' Please Execute ' + W + 'pip3 install -r requirements.txt' + C + ' to Install Missing Packages' + W + '\n') | |
52 | os.remove(pid_path) | |
53 | sys.exit() | |
54 | ||
55 | 38 | import argparse |
56 | 39 | |
57 | version = '1.1.2' | |
40 | version = '1.1.3' | |
58 | 41 | gh_version = '' |
59 | 42 | twitter_url = '' |
60 | 43 | discord_url = '' |
61 | 44 | |
62 | parser = argparse.ArgumentParser(description='FinalRecon - The Last Web Recon Tool You Will Need | v{}'.format(version)) | |
45 | parser = argparse.ArgumentParser(description=f'FinalRecon - The Last Web Recon Tool You Will Need | v{version}') | |
63 | 46 | parser.add_argument('url', help='Target URL') |
64 | 47 | parser.add_argument('--headers', help='Header Information', action='store_true') |
65 | 48 | parser.add_argument('--sslinfo', help='SSL Certificate Information', action='store_true') |
86 | 69 | ext_help.add_argument('-tt', type=float, help='Traceroute Timeout [ Default : 1.0 ]') |
87 | 70 | ext_help.add_argument('-o', help='Export Output [ Default : txt ] [ Available : xml, csv ]') |
88 | 71 | ext_help.set_defaults( |
89 | t = 30, | |
90 | T = 30.0, | |
91 | w = path_to_script + '/wordlists/dirb_common.txt', | |
92 | r = False, | |
93 | s = True, | |
94 | sp = 443, | |
95 | d = '1.1.1.1', | |
96 | e = '', | |
97 | m = 'UDP', | |
98 | p = 33434, | |
99 | tt = 1.0, | |
100 | o = 'txt') | |
72 | t=30, | |
73 | T=30.0, | |
74 | w=path_to_script + '/wordlists/dirb_common.txt', | |
75 | r=False, | |
76 | s=True, | |
77 | sp=443, | |
78 | d='1.1.1.1', | |
79 | e='', | |
80 | m='UDP', | |
81 | p=33434, | |
82 | tt=1.0, | |
83 | o='txt') | |
101 | 84 | |
102 | 85 | try: |
103 | 86 | args = parser.parse_args() |
129 | 112 | tr_tout = args.tt |
130 | 113 | output = args.o |
131 | 114 | |
132 | import json | |
133 | 115 | import socket |
134 | 116 | import requests |
135 | 117 | import datetime |
136 | 118 | import ipaddress |
137 | 119 | import tldextract |
120 | from json import loads | |
138 | 121 | |
139 | 122 | type_ip = False |
140 | 123 | data = {} |
141 | 124 | meta = {} |
142 | 125 | |
143 | def fetch_meta(): | |
144 | global gh_version, twitter_url, discord_url | |
145 | try: | |
146 | rqst = requests.get('https://raw.githubusercontent.com/thewhiteh4t/finalrecon/master/metadata.json', timeout=5) | |
147 | sc = rqst.status_code | |
148 | if sc == 200: | |
149 | metadata = rqst.text | |
150 | json_data = json.loads(metadata) | |
151 | gh_version = json_data['metadata']['version'] | |
152 | twitter_url = json_data['metadata']['twitter'] | |
153 | discord_url = json_data['metadata']['discord'] | |
154 | else: | |
155 | with open('metadata.json', 'r') as metadata: | |
156 | json_data = json.loads(metadata.read()) | |
157 | gh_version = json_data['metadata']['version'] | |
158 | twitter_url = json_data['metadata']['twitter'] | |
159 | discord_url = json_data['metadata']['discord'] | |
160 | except Exception as exc: | |
161 | print('\n' + R + '[-]' + C + ' Exception : ' + W + str(exc)) | |
162 | with open('metadata.json', 'r') as metadata: | |
163 | json_data = json.loads(metadata.read()) | |
164 | gh_version = json_data['metadata']['version'] | |
165 | twitter_url = json_data['metadata']['twitter'] | |
166 | discord_url = json_data['metadata']['discord'] | |
167 | 126 | |
168 | 127 | def banner(): |
169 | banner = r''' | |
128 | with open(meta_file_path, 'r') as metadata: | |
129 | json_data = loads(metadata.read()) | |
130 | twitter_url = json_data['twitter'] | |
131 | comms_url = json_data['comms'] | |
132 | ||
133 | art = r''' | |
170 | 134 | ______ __ __ __ ______ __ |
171 | 135 | /\ ___\/\ \ /\ "-.\ \ /\ __ \ /\ \ |
172 | 136 | \ \ __\\ \ \\ \ \-. \\ \ __ \\ \ \____ |
177 | 141 | \ \ __< \ \ __\ \ \ \____\ \ \/\ \\ \ \-. \ |
178 | 142 | \ \_\ \_\\ \_____\\ \_____\\ \_____\\ \_\\"\_\ |
179 | 143 | \/_/ /_/ \/_____/ \/_____/ \/_____/ \/_/ \/_/''' |
180 | print(G + banner + W + '\n') | |
181 | print(G + '[>]' + C + ' Created By : ' + W + 'thewhiteh4t') | |
182 | print(G + ' |---> ' + C + 'Twitter : ' + W + twitter_url) | |
183 | print(G + ' |---> ' + C + 'Discord : ' + W + discord_url) | |
184 | print(G + '[>]' + C + ' Version : ' + W + version + '\n') | |
185 | ||
186 | def ver_check(): | |
187 | print(G + '[+]' + C + ' Checking for Updates...', end='') | |
188 | if version == gh_version: | |
189 | print(C + '[' + G + ' Up-To-Date ' + C +']' + '\n') | |
190 | else: | |
191 | print(C + '[' + G + ' Available : {} '.format(gh_version) + C + ']' + '\n') | |
144 | print(f'{G}{art}{W}\n') | |
145 | print(f'{G}[>]{C} Created By :{W} thewhiteh4t') | |
146 | print(f'{G} |--->{C} Twitter :{W} {twitter_url}') | |
147 | print(f'{G} |--->{C} Community :{W} {comms_url}') | |
148 | print(f'{G}[>]{C} Version :{W} {version}\n') | |
149 | ||
192 | 150 | |
193 | 151 | def full_recon(): |
194 | 152 | from modules.sslinfo import cert |
204 | 162 | cert(hostname, sslp, output, data) |
205 | 163 | whois_lookup(ip, output, data) |
206 | 164 | dnsrec(domain, output, data) |
207 | if type_ip == False: | |
165 | if type_ip is False: | |
208 | 166 | subdomains(domain, tout, output, data, conf_path) |
209 | 167 | else: |
210 | 168 | pass |
213 | 171 | crawler(target, output, data) |
214 | 172 | hammer(target, threads, tout, wdlist, redir, sslv, dserv, output, data, filext) |
215 | 173 | |
174 | ||
216 | 175 | try: |
217 | fetch_meta() | |
218 | 176 | banner() |
219 | ver_check() | |
220 | ||
221 | if target.startswith(('http', 'https')) == False: | |
222 | print(R + '[-]' + C + ' Protocol Missing, Include ' + W + 'http://' + C + ' or ' + W + 'https://' + '\n') | |
177 | ||
178 | if target.startswith(('http', 'https')) is False: | |
179 | print(f'{R}[-] {C}Protocol Missing, Include {W}http:// {C}or{W} https:// \n') | |
223 | 180 | os.remove(pid_path) |
224 | sys.exit() | |
181 | sys.exit(1) | |
225 | 182 | else: |
226 | 183 | pass |
227 | 184 | |
228 | if target.endswith('/') == True: | |
185 | if target.endswith('/') is True: | |
229 | 186 | target = target[:-1] |
230 | 187 | else: |
231 | 188 | pass |
232 | 189 | |
233 | print (G + '[+]' + C + ' Target : ' + W + target) | |
190 | print(f'{G}[+] {C}Target : {W}{target}') | |
234 | 191 | ext = tldextract.extract(target) |
235 | 192 | domain = ext.registered_domain |
236 | 193 | hostname = '.'.join(part for part in ext if part) |
239 | 196 | ipaddress.ip_address(hostname) |
240 | 197 | type_ip = True |
241 | 198 | ip = hostname |
242 | except: | |
199 | except Exception: | |
243 | 200 | try: |
244 | 201 | ip = socket.gethostbyname(hostname) |
245 | print ('\n' + G + '[+]' + C + ' IP Address : ' + W + str(ip)) | |
202 | print(f'\n{G}[+] {C}IP Address : {W}{str(ip)}') | |
246 | 203 | except Exception as e: |
247 | print ('\n' + R + '[-]' + C + ' Unable to Get IP : ' + W + str(e)) | |
204 | print(f'\n{R}[-] {C}Unable to Get IP : {W}{str(e)}') | |
248 | 205 | os.remove(pid_path) |
249 | sys.exit() | |
206 | sys.exit(1) | |
250 | 207 | |
251 | 208 | start_time = datetime.datetime.now() |
252 | 209 | |
261 | 218 | fpath = usr_data |
262 | 219 | fname = fpath + hostname + '.' + output |
263 | 220 | if not os.path.exists(fpath): |
264 | os.makedirs(fpath) | |
221 | os.makedirs(fpath) | |
265 | 222 | output = { |
266 | 223 | 'format': output, |
267 | 224 | 'file': fname, |
268 | 225 | 'export': False |
269 | } | |
226 | } | |
270 | 227 | |
271 | 228 | from modules.export import export |
272 | 229 | |
273 | if full == True: | |
230 | if full is True: | |
274 | 231 | full_recon() |
275 | 232 | |
276 | if headinfo == True: | |
233 | if headinfo is True: | |
277 | 234 | from modules.headers import headers |
278 | 235 | headers(target, output, data) |
279 | 236 | |
280 | if sslinfo == True: | |
237 | if sslinfo is True: | |
281 | 238 | from modules.sslinfo import cert |
282 | 239 | cert(hostname, sslp, output, data) |
283 | 240 | |
284 | if whois == True: | |
241 | if whois is True: | |
285 | 242 | from modules.whois import whois_lookup |
286 | 243 | whois_lookup(ip, output, data) |
287 | 244 | |
288 | if crawl == True: | |
245 | if crawl is True: | |
289 | 246 | from modules.crawler import crawler |
290 | 247 | crawler(target, output, data) |
291 | 248 | |
292 | if dns == True: | |
249 | if dns is True: | |
293 | 250 | from modules.dns import dnsrec |
294 | 251 | dnsrec(domain, output, data) |
295 | 252 | |
296 | if subd == True and type_ip == False: | |
253 | if subd is True and type_ip is False: | |
297 | 254 | from modules.subdom import subdomains |
298 | 255 | subdomains(domain, tout, output, data, conf_path) |
299 | elif subd == True and type_ip == True: | |
300 | print(R + '[-]' + C + ' Sub-Domain Enumeration is Not Supported for IP Addresses' + W + '\n') | |
256 | elif subd is True and type_ip is True: | |
257 | print(f'{R}[-] {C}Sub-Domain Enumeration is Not Supported for IP Addresses{W}\n') | |
301 | 258 | os.remove(pid_path) |
302 | sys.exit() | |
259 | sys.exit(1) | |
303 | 260 | else: |
304 | 261 | pass |
305 | 262 | |
306 | if trace == True: | |
263 | if trace is True: | |
307 | 264 | from modules.traceroute import troute |
308 | 265 | if mode == 'TCP' and port == 33434: |
309 | 266 | port = 80 |
311 | 268 | else: |
312 | 269 | troute(ip, mode, port, tr_tout, output, data) |
313 | 270 | |
314 | if pscan == True: | |
271 | if pscan is True: | |
315 | 272 | from modules.portscan import ps |
316 | 273 | ps(ip, output, data) |
317 | 274 | |
318 | if dirrec == True: | |
275 | if dirrec is True: | |
319 | 276 | from modules.dirrec import hammer |
320 | 277 | hammer(target, threads, tout, wdlist, redir, sslv, dserv, output, data, filext) |
321 | 278 | |
322 | if any([full, headinfo, sslinfo, whois, crawl, dns, subd, trace, pscan, dirrec]) != True: | |
323 | print ('\n' + R + '[-] Error : ' + C + 'At least One Argument is Required with URL' + W) | |
279 | if any([full, headinfo, sslinfo, whois, crawl, dns, subd, trace, pscan, dirrec]) is not True: | |
280 | print(f'\n{R}[-] Error : {C}At least One Argument is Required with URL{W}') | |
324 | 281 | output = 'None' |
325 | 282 | os.remove(pid_path) |
326 | sys.exit() | |
283 | sys.exit(1) | |
327 | 284 | |
328 | 285 | end_time = datetime.datetime.now() - start_time |
329 | print ('\n' + G + '[+]' + C + ' Completed in ' + W + str(end_time) + '\n') | |
286 | print(f'\n{G}[+] {C}Completed in {W}{str(end_time)}\n') | |
330 | 287 | |
331 | 288 | @atexit.register |
332 | 289 | def call_export(): |
339 | 296 | os.remove(pid_path) |
340 | 297 | sys.exit() |
341 | 298 | except KeyboardInterrupt: |
342 | print (R + '[-]' + C + ' Keyboard Interrupt.' + W + '\n') | |
299 | print(f'{R}[-] {C}Keyboard Interrupt.{W}\n') | |
343 | 300 | os.remove(pid_path) |
344 | sys.exit() | |
301 | sys.exit(130) |
0 | 0 | { |
1 | "metadata": | |
2 | { | |
3 | "name": "FinalRecon", | |
4 | "author": "thewhiteh4t", | |
5 | "version": "1.1.2", | |
6 | "twitter": "https://twitter.com/thewhiteh4t", | |
7 | "discord": "https://discord.gg/UM92zUn" | |
8 | } | |
1 | "name": "FinalRecon", | |
2 | "author": "thewhiteh4t", | |
3 | "version": "1.1.3", | |
4 | "twitter": "https://twitter.com/thewhiteh4t", | |
5 | "comms": "https://twc1rcle.com/" | |
9 | 6 | }⏎ |
0 | 0 | #!/usr/bin/env python3 |
1 | 1 | |
2 | import os | |
3 | 2 | import re |
4 | 3 | import bs4 |
5 | 4 | import lxml |
11 | 10 | from datetime import date |
12 | 11 | requests.packages.urllib3.disable_warnings() |
13 | 12 | |
14 | R = '\033[31m' # red | |
15 | G = '\033[32m' # green | |
16 | C = '\033[36m' # cyan | |
17 | W = '\033[0m' # white | |
18 | Y = '\033[33m' # yellow | |
13 | R = '\033[31m' # red | |
14 | G = '\033[32m' # green | |
15 | C = '\033[36m' # cyan | |
16 | W = '\033[0m' # white | |
17 | Y = '\033[33m' # yellow | |
19 | 18 | |
20 | 19 | user_agent = { |
21 | 'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0' | |
22 | } | |
20 | 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0' | |
21 | } | |
23 | 22 | |
24 | 23 | soup = '' |
25 | 24 | r_url = '' |
36 | 35 | sm_crawl_total = [] |
37 | 36 | wayback_total = [] |
38 | 37 | |
38 | ||
39 | 39 | def crawler(target, output, data): |
40 | 40 | global soup, r_url, sm_url |
41 | 41 | print('\n' + Y + '[!]' + Y + ' Starting Crawler...' + W + '\n') |
56 | 56 | temp_tgt = target.split('://')[1] |
57 | 57 | pattern = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{2,5}' |
58 | 58 | custom = bool(re.match(pattern, temp_tgt)) |
59 | if custom == True: | |
59 | if custom is True: | |
60 | 60 | r_url = protocol + '://' + temp_tgt + '/robots.txt' |
61 | 61 | sm_url = protocol + '://' + temp_tgt + '/sitemap.xml' |
62 | 62 | else: |
83 | 83 | |
84 | 84 | out(target, output, data) |
85 | 85 | else: |
86 | print (R + '[-]' + C + ' Status : ' + W + str(sc)) | |
86 | print(R + '[-]' + C + ' Status : ' + W + str(sc)) | |
87 | ||
87 | 88 | |
88 | 89 | def url_filter(target): |
89 | 90 | global url |
90 | 91 | |
91 | if all([url.startswith('/') == True, url.startswith('//') == False]): | |
92 | if all([url.startswith('/') is True, url.startswith('//') is False]): | |
92 | 93 | url = target + url |
93 | 94 | else: |
94 | 95 | pass |
95 | 96 | |
96 | if all([url.find('http://') == -1, | |
97 | if all([ | |
98 | url.find('http://') == -1, | |
97 | 99 | url.find('https://') == -1]): |
98 | ||
99 | 100 | url = url.replace('//', 'http://') |
100 | 101 | url = url.replace('../', target + '/') |
101 | 102 | url = url.replace('./', target + '/') |
102 | 103 | else: |
103 | 104 | pass |
104 | 105 | |
105 | if all([url.find('//') == -1, | |
106 | if all([ | |
107 | url.find('//') == -1, | |
106 | 108 | url.find('../') == -1, |
107 | 109 | url.find('./') == -1, |
108 | 110 | url.find('http://') == -1, |
111 | 113 | url = target + '/' + url |
112 | 114 | else: |
113 | 115 | pass |
116 | ||
114 | 117 | |
115 | 118 | async def wayback(target): |
116 | 119 | global wayback_total |
121 | 124 | domain = ext.domain |
122 | 125 | domain_query = domain + '/*' |
123 | 126 | |
124 | #today = date.today().strftime("%Y%m%d") | |
125 | #past = date.today() + relativedelta(months=-6) | |
126 | #past = past.strftime("%Y%m%d") | |
127 | ||
128 | 127 | curr_yr = date.today().year |
129 | 128 | last_yr = curr_yr - 1 |
130 | 129 | |
131 | print(Y + '[!]' + C + ' Checking Availability on Wayback Machine' + W, end = '') | |
130 | print(Y + '[!]' + C + ' Checking Availability on Wayback Machine' + W, end='') | |
132 | 131 | wm_avail = 'http://archive.org/wayback/available' |
133 | avail_data = { 'url': domain } | |
132 | avail_data = {'url': domain} | |
134 | 133 | |
135 | 134 | try: |
136 | 135 | check_rqst = requests.get(wm_avail, params=avail_data, timeout=10) |
149 | 148 | except Exception as e: |
150 | 149 | print('\n' + R + '[-] Exception : ' + C + str(e) + W) |
151 | 150 | |
152 | if is_avail == True: | |
153 | print(Y + '[!]' + C + ' Requesting Wayback Machine' + W, end = '') | |
151 | if is_avail is True: | |
152 | print(Y + '[!]' + C + ' Requesting Wayback Machine' + W, end='') | |
154 | 153 | wm_url = 'http://web.archive.org/cdx/search/cdx' |
155 | 154 | |
156 | 155 | data = { |
157 | 'url': domain_query, | |
158 | 'fl': 'original', | |
159 | 'fastLatest': 'true', | |
156 | 'url': domain_query, | |
157 | 'fl': 'original', | |
158 | 'fastLatest': 'true', | |
160 | 159 | 'from': '{}'.format(str(last_yr)), |
161 | 160 | 'to': '{}'.format(str(curr_yr)), |
162 | 161 | 'filter': 'statuscode:200' |
179 | 178 | except Exception as e: |
180 | 179 | print('\n' + R + '[-] Exception : ' + C + str(e) + W) |
181 | 180 | |
181 | ||
182 | 182 | async def robots(target): |
183 | 183 | global url, r_url, r_total |
184 | print(G + '[+]' + C + ' Looking for robots.txt' + W, end = '') | |
184 | print(G + '[+]' + C + ' Looking for robots.txt' + W, end='') | |
185 | 185 | |
186 | 186 | try: |
187 | 187 | r_rqst = requests.get(r_url, headers=user_agent, verify=False, timeout=10) |
188 | 188 | r_sc = r_rqst.status_code |
189 | 189 | if r_sc == 200: |
190 | 190 | print(G + '['.rjust(9, '.') + ' Found ]' + W) |
191 | print(G + '[+]' + C + ' Extracting robots Links', end = '') | |
191 | print(G + '[+]' + C + ' Extracting robots Links', end='') | |
192 | 192 | r_page = r_rqst.text |
193 | 193 | r_scrape = r_page.split('\n') |
194 | 194 | for entry in r_scrape: |
195 | if (entry.find('Disallow') == 0 or | |
196 | entry.find('Allow') == 0 or | |
197 | entry.find('Sitemap') == 0): | |
195 | if any([ | |
196 | entry.find('Disallow') == 0, | |
197 | entry.find('Allow') == 0, | |
198 | entry.find('Sitemap') == 0]): | |
198 | 199 | |
199 | 200 | url = entry.split(': ') |
200 | 201 | try: |
202 | 203 | url = url.strip() |
203 | 204 | url_filter(target) |
204 | 205 | r_total.append(url) |
205 | if url.endswith('xml') == True: | |
206 | if url.endswith('xml') is True: | |
206 | 207 | sm_total.append(url) |
207 | except: | |
208 | except Exception: | |
208 | 209 | pass |
209 | 210 | |
210 | 211 | r_total = set(r_total) |
218 | 219 | except Exception as e: |
219 | 220 | print('\n' + R + '[-] Exception : ' + C + str(e) + W) |
220 | 221 | |
222 | ||
221 | 223 | async def sitemap(): |
222 | 224 | global url, sm_url, total, sm_total |
223 | print(G + '[+]' + C + ' Looking for sitemap.xml' + W, end = '') | |
225 | print(G + '[+]' + C + ' Looking for sitemap.xml' + W, end='') | |
224 | 226 | try: |
225 | 227 | sm_rqst = requests.get(sm_url, headers=user_agent, verify=False, timeout=10) |
226 | 228 | sm_sc = sm_rqst.status_code |
227 | 229 | if sm_sc == 200: |
228 | 230 | print(G + '['.rjust(8, '.') + ' Found ]' + W) |
229 | print(G + '[+]' + C + ' Extracting sitemap Links', end = '') | |
231 | print(G + '[+]' + C + ' Extracting sitemap Links', end='') | |
230 | 232 | sm_page = sm_rqst.content |
231 | 233 | sm_soup = bs4.BeautifulSoup(sm_page, 'xml') |
232 | 234 | links = sm_soup.find_all('loc') |
233 | 235 | for url in links: |
234 | 236 | url = url.get_text() |
235 | if url != None: | |
237 | if url is not None: | |
236 | 238 | sm_total.append(url) |
237 | 239 | |
238 | 240 | sm_total = set(sm_total) |
245 | 247 | except Exception as e: |
246 | 248 | print('\n' + R + '[-] Exception : ' + C + str(e)) |
247 | 249 | |
250 | ||
248 | 251 | async def css(target): |
249 | 252 | global url, soup, total, css_total |
250 | print(G + '[+]' + C + ' Extracting CSS Links' + W, end = '') | |
253 | print(G + '[+]' + C + ' Extracting CSS Links' + W, end='') | |
251 | 254 | css = soup.find_all('link') |
252 | 255 | |
253 | 256 | for link in css: |
254 | 257 | url = link.get('href') |
255 | if url != None and '.css' in url: | |
258 | if url is not None and '.css' in url: | |
256 | 259 | url_filter(target) |
257 | 260 | css_total.append(url) |
258 | 261 | |
259 | 262 | css_total = set(css_total) |
260 | 263 | print(G + '['.rjust(11, '.') + ' {} ]'.format(str(len(css_total))) + W) |
261 | 264 | |
265 | ||
262 | 266 | async def js(target): |
263 | 267 | global url, total, js_total |
264 | print(G + '[+]' + C + ' Extracting Javascript Links' + W, end = '') | |
268 | print(G + '[+]' + C + ' Extracting Javascript Links' + W, end='') | |
265 | 269 | js = soup.find_all('script') |
266 | 270 | |
267 | 271 | for link in js: |
268 | 272 | url = link.get('src') |
269 | if url != None and '.js' in url: | |
273 | if url is not None and '.js' in url: | |
270 | 274 | url_filter(target) |
271 | 275 | js_total.append(url) |
272 | 276 | |
273 | 277 | js_total = set(js_total) |
274 | 278 | print(G + '['.rjust(4, '.') + ' {} ]'.format(str(len(js_total)))) |
275 | 279 | |
280 | ||
276 | 281 | async def internal_links(target): |
277 | 282 | global total, int_total |
278 | print(G + '[+]' + C + ' Extracting Internal Links' + W, end = '') | |
283 | print(G + '[+]' + C + ' Extracting Internal Links' + W, end='') | |
279 | 284 | |
280 | 285 | ext = tldextract.extract(target) |
281 | 286 | domain = ext.registered_domain |
283 | 288 | links = soup.find_all('a') |
284 | 289 | for link in links: |
285 | 290 | url = link.get('href') |
286 | if url != None: | |
291 | if url is not None: | |
287 | 292 | if domain in url: |
288 | 293 | int_total.append(url) |
289 | 294 | |
290 | 295 | int_total = set(int_total) |
291 | 296 | print(G + '['.rjust(6, '.') + ' {} ]'.format(str(len(int_total)))) |
292 | 297 | |
298 | ||
293 | 299 | async def external_links(target): |
294 | 300 | global total, ext_total |
295 | print(G + '[+]' + C + ' Extracting External Links' + W, end = '') | |
301 | print(G + '[+]' + C + ' Extracting External Links' + W, end='') | |
296 | 302 | |
297 | 303 | ext = tldextract.extract(target) |
298 | 304 | domain = ext.registered_domain |
300 | 306 | links = soup.find_all('a') |
301 | 307 | for link in links: |
302 | 308 | url = link.get('href') |
303 | if url != None: | |
309 | if url is not None: | |
304 | 310 | if domain not in url and 'http' in url: |
305 | 311 | ext_total.append(url) |
306 | 312 | |
307 | 313 | ext_total = set(ext_total) |
308 | 314 | print(G + '['.rjust(6, '.') + ' {} ]'.format(str(len(ext_total)))) |
309 | 315 | |
316 | ||
310 | 317 | async def images(target): |
311 | 318 | global url, total, img_total |
312 | print(G + '[+]' + C + ' Extracting Images' + W, end = '') | |
319 | print(G + '[+]' + C + ' Extracting Images' + W, end='') | |
313 | 320 | images = soup.find_all('img') |
314 | 321 | |
315 | 322 | for link in images: |
316 | 323 | url = link.get('src') |
317 | if url != None and len(url) > 1: | |
324 | if url is not None and len(url) > 1: | |
318 | 325 | url_filter(target) |
319 | 326 | img_total.append(url) |
320 | 327 | |
321 | 328 | img_total = set(img_total) |
322 | 329 | print(G + '['.rjust(14, '.') + ' {} ]'.format(str(len(img_total)))) |
323 | 330 | |
331 | ||
324 | 332 | async def sm_crawl(): |
325 | 333 | global sm_crawl_total |
326 | print(G + '[+]' + C + ' Crawling Sitemaps' + W, end = '') | |
334 | print(G + '[+]' + C + ' Crawling Sitemaps' + W, end='') | |
327 | 335 | |
328 | 336 | threads = [] |
329 | 337 | |
337 | 345 | links = sm_soup.find_all('loc') |
338 | 346 | for url in links: |
339 | 347 | url = url.get_text() |
340 | if url != None: | |
348 | if url is not None: | |
341 | 349 | sm_crawl_total.append(url) |
342 | 350 | elif sm_sc == 404: |
343 | 351 | print(R + '['.rjust(8, '.') + ' Not Found ]' + W) |
348 | 356 | |
349 | 357 | for site_url in sm_total: |
350 | 358 | if site_url != sm_url: |
351 | if site_url.endswith('xml') == True: | |
359 | if site_url.endswith('xml') is True: | |
352 | 360 | t = threading.Thread(target=fetch, args=[site_url]) |
353 | 361 | t.daemon = True |
354 | 362 | threads.append(t) |
360 | 368 | sm_crawl_total = set(sm_crawl_total) |
361 | 369 | print(G + '['.rjust(14, '.') + ' {} ]'.format(str(len(sm_crawl_total)))) |
362 | 370 | |
371 | ||
363 | 372 | async def js_crawl(): |
364 | 373 | global js_crawl_total |
365 | print(G + '[+]' + C + ' Crawling Javascripts' + W, end = '') | |
374 | print(G + '[+]' + C + ' Crawling Javascripts' + W, end='') | |
366 | 375 | |
367 | 376 | threads = [] |
368 | 377 | |
394 | 403 | js_crawl_total = set(js_crawl_total) |
395 | 404 | print(G + '['.rjust(11, '.') + ' {} ]'.format(str(len(js_crawl_total)))) |
396 | 405 | |
406 | ||
397 | 407 | def out(target, output, data): |
398 | 408 | global total |
399 | 409 | |
422 | 432 | |
423 | 433 | data['module-Crawler'].update( |
424 | 434 | { |
425 | 'Count ( Robots )': str(len(r_total)), | |
426 | 'Count ( Sitemap )': str(len(sm_total)), | |
427 | 'Count ( CSS )': str(len(css_total)), | |
428 | 'Count ( JS )': str(len(js_total)), | |
429 | 'Count ( Links in JS )': str(len(js_crawl_total)), | |
435 | 'Count ( Robots )': str(len(r_total)), | |
436 | 'Count ( Sitemap )': str(len(sm_total)), | |
437 | 'Count ( CSS )': str(len(css_total)), | |
438 | 'Count ( JS )': str(len(js_total)), | |
439 | 'Count ( Links in JS )': str(len(js_crawl_total)), | |
430 | 440 | 'Count ( Links in Sitemaps )': str(len(sm_crawl_total)), |
431 | 'Count ( Internal )': str(len(int_total)), | |
432 | 'Count ( External )': str(len(ext_total)), | |
433 | 'Count ( Images )': str(len(img_total)), | |
441 | 'Count ( Internal )': str(len(int_total)), | |
442 | 'Count ( External )': str(len(ext_total)), | |
443 | 'Count ( Images )': str(len(img_total)), | |
434 | 444 | 'count ( Wayback Machine )': str(len(wayback_total)), |
435 | 445 | 'Count ( Total )': str(len(total)) |
436 | 446 | }) |
437 | ||
447 | ||
438 | 448 | if len(r_total) != 0: |
439 | 449 | data['module-Crawler'].update({'Robots': list(r_total)}) |
440 | ||
450 | ||
441 | 451 | if len(sm_total) != 0: |
442 | 452 | data['module-Crawler'].update({'Sitemaps': list(sm_total)}) |
443 | ||
453 | ||
444 | 454 | if len(css_total) != 0: |
445 | 455 | data['module-Crawler'].update({'CSS': list(css_total)}) |
446 | ||
456 | ||
447 | 457 | if len(js_total) != 0: |
448 | 458 | data['module-Crawler'].update({'Javascripts': list(js_total)}) |
449 | 459 | |
450 | 460 | if len(js_crawl_total) != 0: |
451 | 461 | data['module-Crawler'].update({'Links inside Javascripts': list(js_crawl_total)}) |
452 | ||
462 | ||
453 | 463 | if len(sm_crawl_total) != 0: |
454 | 464 | data['module-Crawler'].update({'Links Inside Sitemaps': list(sm_crawl_total)}) |
455 | ||
465 | ||
456 | 466 | if len(int_total) != 0: |
457 | 467 | data['module-Crawler'].update({'Internal Links': list(int_total)}) |
458 | ||
468 | ||
459 | 469 | if len(ext_total) != 0: |
460 | 470 | data['module-Crawler'].update({'External Links': list(ext_total)}) |
461 | ||
471 | ||
462 | 472 | if len(img_total) != 0: |
463 | 473 | data['module-Crawler'].update({'Images': list(img_total)}) |
464 | ||
474 | ||
465 | 475 | if len(wayback_total) != 0: |
466 | data['module-Crawler'].update({'Wayback Machine': list(wayback_total)})⏎ | |
476 | data['module-Crawler'].update({'Wayback Machine': list(wayback_total)}) |