Codebase list finalrecon / bcf1c21
New upstream version 1.1.3 Sophie Brun 2 years ago
7 changed file(s) with 307 addition(s) and 191 deletion(s). Raw diff Collapse all Expand all
0 # Byte-compiled / optimized / DLL files
1 __pycache__/
2 *.py[cod]
3 *$py.class
4
5 # C extensions
6 *.so
7
8 # Distribution / packaging
9 .Python
10 build/
11 develop-eggs/
12 dist/
13 downloads/
14 eggs/
15 .eggs/
16 lib/
17 lib64/
18 parts/
19 sdist/
20 var/
21 wheels/
22 share/python-wheels/
23 *.egg-info/
24 .installed.cfg
25 *.egg
26 MANIFEST
27
28 # PyInstaller
29 # Usually these files are written by a python script from a template
30 # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 *.manifest
32 *.spec
33
34 # Installer logs
35 pip-log.txt
36 pip-delete-this-directory.txt
37
38 # Unit test / coverage reports
39 htmlcov/
40 .tox/
41 .nox/
42 .coverage
43 .coverage.*
44 .cache
45 nosetests.xml
46 coverage.xml
47 *.cover
48 *.py,cover
49 .hypothesis/
50 .pytest_cache/
51 cover/
52
53 # Translations
54 *.mo
55 *.pot
56
57 # Django stuff:
58 *.log
59 local_settings.py
60 db.sqlite3
61 db.sqlite3-journal
62
63 # Flask stuff:
64 instance/
65 .webassets-cache
66
67 # Scrapy stuff:
68 .scrapy
69
70 # Sphinx documentation
71 docs/_build/
72
73 # PyBuilder
74 .pybuilder/
75 target/
76
77 # Jupyter Notebook
78 .ipynb_checkpoints
79
80 # IPython
81 profile_default/
82 ipython_config.py
83
84 # pyenv
85 # For a library or package, you might want to ignore these files since the code is
86 # intended to run in multiple environments; otherwise, check them in:
87 # .python-version
88
89 # pipenv
90 # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
91 # However, in case of collaboration, if having platform-specific dependencies or dependencies
92 # having no cross-platform support, pipenv may install dependencies that don't work, or not
93 # install all needed dependencies.
94 #Pipfile.lock
95
96 # poetry
97 # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
98 # This is especially recommended for binary packages to ensure reproducibility, and is more
99 # commonly ignored for libraries.
100 # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
101 #poetry.lock
102
103 # PEP 582; used by e.g. github.com/David-OConnor/pyflow
104 __pypackages__/
105
106 # Celery stuff
107 celerybeat-schedule
108 celerybeat.pid
109
110 # SageMath parsed files
111 *.sage.py
112
113 # Environments
114 .env
115 .venv
116 env/
117 venv/
118 ENV/
119 env.bak/
120 venv.bak/
121
122 # Spyder project settings
123 .spyderproject
124 .spyproject
125
126 # Rope project settings
127 .ropeproject
128
129 # mkdocs documentation
130 /site
131
132 # mypy
133 .mypy_cache/
134 .dmypy.json
135 dmypy.json
136
137 # Pyre type checker
138 .pyre/
139
140 # pytype static type analyzer
141 .pytype/
142
143 # Cython debug symbols
144 cython_debug/
145
146 # PyCharm
147 # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
148 # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
149 # and can be added to the global gitignore or merged into this file. For a more nuclear
150 # option (not recommended) you can uncomment the following to ignore the entire idea folder.
151 .idea/
0 FROM alpine:latest
0 FROM alpine:3.15.4
11 RUN apk update
22 RUN apk add \
33 git \
22 import os
33 import sys
44 import atexit
5 import importlib.util
6
7 R = '\033[31m' # red
8 G = '\033[32m' # green
9 C = '\033[36m' # cyan
10 W = '\033[0m' # white
5
6 R = '\033[31m' # red
7 G = '\033[32m' # green
8 C = '\033[36m' # cyan
9 W = '\033[0m' # white
1110
1211 home = os.getenv('HOME')
1312 pid_path = home + '/.local/share/finalrecon/finalrecon.pid'
1514 conf_path = home + '/.config/finalrecon'
1615 path_to_script = os.path.dirname(os.path.realpath(__file__))
1716 src_conf_path = path_to_script + '/conf/'
17 meta_file_path = path_to_script + '/metadata.json'
1818 fail = False
1919
2020 if os.path.isfile(pid_path):
21 print(R + '[-]' + C + ' One instance of FinalRecon is already running!' + W)
21 print(f'{R}[-] {C}One instance of FinalRecon is already running!{W}')
2222 with open(pid_path, 'r') as pidfile:
2323 pid = pidfile.read()
24 print(G + '[+]' + C + ' PID : ' + W + str(pid))
25 print(G + '[>]' + C + ' If FinalRecon crashed, execute : ' + W + 'rm {}'.format(pid_path))
26 sys.exit()
24 print(f'{G}[+] {C}PID :{W} {str(pid)}')
25 print(f'{G}[>] {C}If FinalRecon crashed, execute :{W} rm {pid_path}')
26 sys.exit(1)
2727 else:
2828 os.makedirs(os.path.dirname(pid_path), exist_ok=True)
2929 with open(pid_path, 'w') as pidfile:
3535 import shutil
3636 shutil.copytree(src_conf_path, conf_path, dirs_exist_ok=True)
3737
38 with open(path_to_script + '/requirements.txt', 'r') as rqr:
39 pkg_list = rqr.read().strip().split('\n')
40
41 print('\n' + G + '[+]' + C + ' Checking Dependencies...' + W + '\n')
42
43 for pkg in pkg_list:
44 spec = importlib.util.find_spec(pkg)
45 if spec is None:
46 print(R + '[-]' + W + ' {}'.format(pkg) + C + ' is not Installed!' + W)
47 fail = True
48 else:
49 pass
50 if fail == True:
51 print('\n' + R + '[-]' + C + ' Please Execute ' + W + 'pip3 install -r requirements.txt' + C + ' to Install Missing Packages' + W + '\n')
52 os.remove(pid_path)
53 sys.exit()
54
5538 import argparse
5639
57 version = '1.1.2'
40 version = '1.1.3'
5841 gh_version = ''
5942 twitter_url = ''
6043 discord_url = ''
6144
62 parser = argparse.ArgumentParser(description='FinalRecon - The Last Web Recon Tool You Will Need | v{}'.format(version))
45 parser = argparse.ArgumentParser(description=f'FinalRecon - The Last Web Recon Tool You Will Need | v{version}')
6346 parser.add_argument('url', help='Target URL')
6447 parser.add_argument('--headers', help='Header Information', action='store_true')
6548 parser.add_argument('--sslinfo', help='SSL Certificate Information', action='store_true')
8669 ext_help.add_argument('-tt', type=float, help='Traceroute Timeout [ Default : 1.0 ]')
8770 ext_help.add_argument('-o', help='Export Output [ Default : txt ] [ Available : xml, csv ]')
8871 ext_help.set_defaults(
89 t = 30,
90 T = 30.0,
91 w = path_to_script + '/wordlists/dirb_common.txt',
92 r = False,
93 s = True,
94 sp = 443,
95 d = '1.1.1.1',
96 e = '',
97 m = 'UDP',
98 p = 33434,
99 tt = 1.0,
100 o = 'txt')
72 t=30,
73 T=30.0,
74 w=path_to_script + '/wordlists/dirb_common.txt',
75 r=False,
76 s=True,
77 sp=443,
78 d='1.1.1.1',
79 e='',
80 m='UDP',
81 p=33434,
82 tt=1.0,
83 o='txt')
10184
10285 try:
10386 args = parser.parse_args()
129112 tr_tout = args.tt
130113 output = args.o
131114
132 import json
133115 import socket
134116 import requests
135117 import datetime
136118 import ipaddress
137119 import tldextract
120 from json import loads
138121
139122 type_ip = False
140123 data = {}
141124 meta = {}
142125
143 def fetch_meta():
144 global gh_version, twitter_url, discord_url
145 try:
146 rqst = requests.get('https://raw.githubusercontent.com/thewhiteh4t/finalrecon/master/metadata.json', timeout=5)
147 sc = rqst.status_code
148 if sc == 200:
149 metadata = rqst.text
150 json_data = json.loads(metadata)
151 gh_version = json_data['metadata']['version']
152 twitter_url = json_data['metadata']['twitter']
153 discord_url = json_data['metadata']['discord']
154 else:
155 with open('metadata.json', 'r') as metadata:
156 json_data = json.loads(metadata.read())
157 gh_version = json_data['metadata']['version']
158 twitter_url = json_data['metadata']['twitter']
159 discord_url = json_data['metadata']['discord']
160 except Exception as exc:
161 print('\n' + R + '[-]' + C + ' Exception : ' + W + str(exc))
162 with open('metadata.json', 'r') as metadata:
163 json_data = json.loads(metadata.read())
164 gh_version = json_data['metadata']['version']
165 twitter_url = json_data['metadata']['twitter']
166 discord_url = json_data['metadata']['discord']
167126
168127 def banner():
169 banner = r'''
128 with open(meta_file_path, 'r') as metadata:
129 json_data = loads(metadata.read())
130 twitter_url = json_data['twitter']
131 comms_url = json_data['comms']
132
133 art = r'''
170134 ______ __ __ __ ______ __
171135 /\ ___\/\ \ /\ "-.\ \ /\ __ \ /\ \
172136 \ \ __\\ \ \\ \ \-. \\ \ __ \\ \ \____
177141 \ \ __< \ \ __\ \ \ \____\ \ \/\ \\ \ \-. \
178142 \ \_\ \_\\ \_____\\ \_____\\ \_____\\ \_\\"\_\
179143 \/_/ /_/ \/_____/ \/_____/ \/_____/ \/_/ \/_/'''
180 print(G + banner + W + '\n')
181 print(G + '[>]' + C + ' Created By : ' + W + 'thewhiteh4t')
182 print(G + ' |---> ' + C + 'Twitter : ' + W + twitter_url)
183 print(G + ' |---> ' + C + 'Discord : ' + W + discord_url)
184 print(G + '[>]' + C + ' Version : ' + W + version + '\n')
185
186 def ver_check():
187 print(G + '[+]' + C + ' Checking for Updates...', end='')
188 if version == gh_version:
189 print(C + '[' + G + ' Up-To-Date ' + C +']' + '\n')
190 else:
191 print(C + '[' + G + ' Available : {} '.format(gh_version) + C + ']' + '\n')
144 print(f'{G}{art}{W}\n')
145 print(f'{G}[>]{C} Created By :{W} thewhiteh4t')
146 print(f'{G} |--->{C} Twitter :{W} {twitter_url}')
147 print(f'{G} |--->{C} Community :{W} {comms_url}')
148 print(f'{G}[>]{C} Version :{W} {version}\n')
149
192150
193151 def full_recon():
194152 from modules.sslinfo import cert
204162 cert(hostname, sslp, output, data)
205163 whois_lookup(ip, output, data)
206164 dnsrec(domain, output, data)
207 if type_ip == False:
165 if type_ip is False:
208166 subdomains(domain, tout, output, data, conf_path)
209167 else:
210168 pass
213171 crawler(target, output, data)
214172 hammer(target, threads, tout, wdlist, redir, sslv, dserv, output, data, filext)
215173
174
216175 try:
217 fetch_meta()
218176 banner()
219 ver_check()
220
221 if target.startswith(('http', 'https')) == False:
222 print(R + '[-]' + C + ' Protocol Missing, Include ' + W + 'http://' + C + ' or ' + W + 'https://' + '\n')
177
178 if target.startswith(('http', 'https')) is False:
179 print(f'{R}[-] {C}Protocol Missing, Include {W}http:// {C}or{W} https:// \n')
223180 os.remove(pid_path)
224 sys.exit()
181 sys.exit(1)
225182 else:
226183 pass
227184
228 if target.endswith('/') == True:
185 if target.endswith('/') is True:
229186 target = target[:-1]
230187 else:
231188 pass
232189
233 print (G + '[+]' + C + ' Target : ' + W + target)
190 print(f'{G}[+] {C}Target : {W}{target}')
234191 ext = tldextract.extract(target)
235192 domain = ext.registered_domain
236193 hostname = '.'.join(part for part in ext if part)
239196 ipaddress.ip_address(hostname)
240197 type_ip = True
241198 ip = hostname
242 except:
199 except Exception:
243200 try:
244201 ip = socket.gethostbyname(hostname)
245 print ('\n' + G + '[+]' + C + ' IP Address : ' + W + str(ip))
202 print(f'\n{G}[+] {C}IP Address : {W}{str(ip)}')
246203 except Exception as e:
247 print ('\n' + R + '[-]' + C + ' Unable to Get IP : ' + W + str(e))
204 print(f'\n{R}[-] {C}Unable to Get IP : {W}{str(e)}')
248205 os.remove(pid_path)
249 sys.exit()
206 sys.exit(1)
250207
251208 start_time = datetime.datetime.now()
252209
261218 fpath = usr_data
262219 fname = fpath + hostname + '.' + output
263220 if not os.path.exists(fpath):
264 os.makedirs(fpath)
221 os.makedirs(fpath)
265222 output = {
266223 'format': output,
267224 'file': fname,
268225 'export': False
269 }
226 }
270227
271228 from modules.export import export
272229
273 if full == True:
230 if full is True:
274231 full_recon()
275232
276 if headinfo == True:
233 if headinfo is True:
277234 from modules.headers import headers
278235 headers(target, output, data)
279236
280 if sslinfo == True:
237 if sslinfo is True:
281238 from modules.sslinfo import cert
282239 cert(hostname, sslp, output, data)
283240
284 if whois == True:
241 if whois is True:
285242 from modules.whois import whois_lookup
286243 whois_lookup(ip, output, data)
287244
288 if crawl == True:
245 if crawl is True:
289246 from modules.crawler import crawler
290247 crawler(target, output, data)
291248
292 if dns == True:
249 if dns is True:
293250 from modules.dns import dnsrec
294251 dnsrec(domain, output, data)
295252
296 if subd == True and type_ip == False:
253 if subd is True and type_ip is False:
297254 from modules.subdom import subdomains
298255 subdomains(domain, tout, output, data, conf_path)
299 elif subd == True and type_ip == True:
300 print(R + '[-]' + C + ' Sub-Domain Enumeration is Not Supported for IP Addresses' + W + '\n')
256 elif subd is True and type_ip is True:
257 print(f'{R}[-] {C}Sub-Domain Enumeration is Not Supported for IP Addresses{W}\n')
301258 os.remove(pid_path)
302 sys.exit()
259 sys.exit(1)
303260 else:
304261 pass
305262
306 if trace == True:
263 if trace is True:
307264 from modules.traceroute import troute
308265 if mode == 'TCP' and port == 33434:
309266 port = 80
311268 else:
312269 troute(ip, mode, port, tr_tout, output, data)
313270
314 if pscan == True:
271 if pscan is True:
315272 from modules.portscan import ps
316273 ps(ip, output, data)
317274
318 if dirrec == True:
275 if dirrec is True:
319276 from modules.dirrec import hammer
320277 hammer(target, threads, tout, wdlist, redir, sslv, dserv, output, data, filext)
321278
322 if any([full, headinfo, sslinfo, whois, crawl, dns, subd, trace, pscan, dirrec]) != True:
323 print ('\n' + R + '[-] Error : ' + C + 'At least One Argument is Required with URL' + W)
279 if any([full, headinfo, sslinfo, whois, crawl, dns, subd, trace, pscan, dirrec]) is not True:
280 print(f'\n{R}[-] Error : {C}At least One Argument is Required with URL{W}')
324281 output = 'None'
325282 os.remove(pid_path)
326 sys.exit()
283 sys.exit(1)
327284
328285 end_time = datetime.datetime.now() - start_time
329 print ('\n' + G + '[+]' + C + ' Completed in ' + W + str(end_time) + '\n')
286 print(f'\n{G}[+] {C}Completed in {W}{str(end_time)}\n')
330287
331288 @atexit.register
332289 def call_export():
339296 os.remove(pid_path)
340297 sys.exit()
341298 except KeyboardInterrupt:
342 print (R + '[-]' + C + ' Keyboard Interrupt.' + W + '\n')
299 print(f'{R}[-] {C}Keyboard Interrupt.{W}\n')
343300 os.remove(pid_path)
344 sys.exit()
301 sys.exit(130)
00 {
1 "metadata":
2 {
3 "name": "FinalRecon",
4 "author": "thewhiteh4t",
5 "version": "1.1.2",
6 "twitter": "https://twitter.com/thewhiteh4t",
7 "discord": "https://discord.gg/UM92zUn"
8 }
1 "name": "FinalRecon",
2 "author": "thewhiteh4t",
3 "version": "1.1.3",
4 "twitter": "https://twitter.com/thewhiteh4t",
5 "comms": "https://twc1rcle.com/"
96 }
00 #!/usr/bin/env python3
11
2 import os
32 import re
43 import bs4
54 import lxml
1110 from datetime import date
1211 requests.packages.urllib3.disable_warnings()
1312
14 R = '\033[31m' # red
15 G = '\033[32m' # green
16 C = '\033[36m' # cyan
17 W = '\033[0m' # white
18 Y = '\033[33m' # yellow
13 R = '\033[31m' # red
14 G = '\033[32m' # green
15 C = '\033[36m' # cyan
16 W = '\033[0m' # white
17 Y = '\033[33m' # yellow
1918
2019 user_agent = {
21 'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0'
22 }
20 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0'
21 }
2322
2423 soup = ''
2524 r_url = ''
3635 sm_crawl_total = []
3736 wayback_total = []
3837
38
3939 def crawler(target, output, data):
4040 global soup, r_url, sm_url
4141 print('\n' + Y + '[!]' + Y + ' Starting Crawler...' + W + '\n')
5656 temp_tgt = target.split('://')[1]
5757 pattern = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{2,5}'
5858 custom = bool(re.match(pattern, temp_tgt))
59 if custom == True:
59 if custom is True:
6060 r_url = protocol + '://' + temp_tgt + '/robots.txt'
6161 sm_url = protocol + '://' + temp_tgt + '/sitemap.xml'
6262 else:
8383
8484 out(target, output, data)
8585 else:
86 print (R + '[-]' + C + ' Status : ' + W + str(sc))
86 print(R + '[-]' + C + ' Status : ' + W + str(sc))
87
8788
8889 def url_filter(target):
8990 global url
9091
91 if all([url.startswith('/') == True, url.startswith('//') == False]):
92 if all([url.startswith('/') is True, url.startswith('//') is False]):
9293 url = target + url
9394 else:
9495 pass
9596
96 if all([url.find('http://') == -1,
97 if all([
98 url.find('http://') == -1,
9799 url.find('https://') == -1]):
98
99100 url = url.replace('//', 'http://')
100101 url = url.replace('../', target + '/')
101102 url = url.replace('./', target + '/')
102103 else:
103104 pass
104105
105 if all([url.find('//') == -1,
106 if all([
107 url.find('//') == -1,
106108 url.find('../') == -1,
107109 url.find('./') == -1,
108110 url.find('http://') == -1,
111113 url = target + '/' + url
112114 else:
113115 pass
116
114117
115118 async def wayback(target):
116119 global wayback_total
121124 domain = ext.domain
122125 domain_query = domain + '/*'
123126
124 #today = date.today().strftime("%Y%m%d")
125 #past = date.today() + relativedelta(months=-6)
126 #past = past.strftime("%Y%m%d")
127
128127 curr_yr = date.today().year
129128 last_yr = curr_yr - 1
130129
131 print(Y + '[!]' + C + ' Checking Availability on Wayback Machine' + W, end = '')
130 print(Y + '[!]' + C + ' Checking Availability on Wayback Machine' + W, end='')
132131 wm_avail = 'http://archive.org/wayback/available'
133 avail_data = { 'url': domain }
132 avail_data = {'url': domain}
134133
135134 try:
136135 check_rqst = requests.get(wm_avail, params=avail_data, timeout=10)
149148 except Exception as e:
150149 print('\n' + R + '[-] Exception : ' + C + str(e) + W)
151150
152 if is_avail == True:
153 print(Y + '[!]' + C + ' Requesting Wayback Machine' + W, end = '')
151 if is_avail is True:
152 print(Y + '[!]' + C + ' Requesting Wayback Machine' + W, end='')
154153 wm_url = 'http://web.archive.org/cdx/search/cdx'
155154
156155 data = {
157 'url': domain_query,
158 'fl': 'original',
159 'fastLatest': 'true',
156 'url': domain_query,
157 'fl': 'original',
158 'fastLatest': 'true',
160159 'from': '{}'.format(str(last_yr)),
161160 'to': '{}'.format(str(curr_yr)),
162161 'filter': 'statuscode:200'
179178 except Exception as e:
180179 print('\n' + R + '[-] Exception : ' + C + str(e) + W)
181180
181
182182 async def robots(target):
183183 global url, r_url, r_total
184 print(G + '[+]' + C + ' Looking for robots.txt' + W, end = '')
184 print(G + '[+]' + C + ' Looking for robots.txt' + W, end='')
185185
186186 try:
187187 r_rqst = requests.get(r_url, headers=user_agent, verify=False, timeout=10)
188188 r_sc = r_rqst.status_code
189189 if r_sc == 200:
190190 print(G + '['.rjust(9, '.') + ' Found ]' + W)
191 print(G + '[+]' + C + ' Extracting robots Links', end = '')
191 print(G + '[+]' + C + ' Extracting robots Links', end='')
192192 r_page = r_rqst.text
193193 r_scrape = r_page.split('\n')
194194 for entry in r_scrape:
195 if (entry.find('Disallow') == 0 or
196 entry.find('Allow') == 0 or
197 entry.find('Sitemap') == 0):
195 if any([
196 entry.find('Disallow') == 0,
197 entry.find('Allow') == 0,
198 entry.find('Sitemap') == 0]):
198199
199200 url = entry.split(': ')
200201 try:
202203 url = url.strip()
203204 url_filter(target)
204205 r_total.append(url)
205 if url.endswith('xml') == True:
206 if url.endswith('xml') is True:
206207 sm_total.append(url)
207 except:
208 except Exception:
208209 pass
209210
210211 r_total = set(r_total)
218219 except Exception as e:
219220 print('\n' + R + '[-] Exception : ' + C + str(e) + W)
220221
222
221223 async def sitemap():
222224 global url, sm_url, total, sm_total
223 print(G + '[+]' + C + ' Looking for sitemap.xml' + W, end = '')
225 print(G + '[+]' + C + ' Looking for sitemap.xml' + W, end='')
224226 try:
225227 sm_rqst = requests.get(sm_url, headers=user_agent, verify=False, timeout=10)
226228 sm_sc = sm_rqst.status_code
227229 if sm_sc == 200:
228230 print(G + '['.rjust(8, '.') + ' Found ]' + W)
229 print(G + '[+]' + C + ' Extracting sitemap Links', end = '')
231 print(G + '[+]' + C + ' Extracting sitemap Links', end='')
230232 sm_page = sm_rqst.content
231233 sm_soup = bs4.BeautifulSoup(sm_page, 'xml')
232234 links = sm_soup.find_all('loc')
233235 for url in links:
234236 url = url.get_text()
235 if url != None:
237 if url is not None:
236238 sm_total.append(url)
237239
238240 sm_total = set(sm_total)
245247 except Exception as e:
246248 print('\n' + R + '[-] Exception : ' + C + str(e))
247249
250
248251 async def css(target):
249252 global url, soup, total, css_total
250 print(G + '[+]' + C + ' Extracting CSS Links' + W, end = '')
253 print(G + '[+]' + C + ' Extracting CSS Links' + W, end='')
251254 css = soup.find_all('link')
252255
253256 for link in css:
254257 url = link.get('href')
255 if url != None and '.css' in url:
258 if url is not None and '.css' in url:
256259 url_filter(target)
257260 css_total.append(url)
258261
259262 css_total = set(css_total)
260263 print(G + '['.rjust(11, '.') + ' {} ]'.format(str(len(css_total))) + W)
261264
265
262266 async def js(target):
263267 global url, total, js_total
264 print(G + '[+]' + C + ' Extracting Javascript Links' + W, end = '')
268 print(G + '[+]' + C + ' Extracting Javascript Links' + W, end='')
265269 js = soup.find_all('script')
266270
267271 for link in js:
268272 url = link.get('src')
269 if url != None and '.js' in url:
273 if url is not None and '.js' in url:
270274 url_filter(target)
271275 js_total.append(url)
272276
273277 js_total = set(js_total)
274278 print(G + '['.rjust(4, '.') + ' {} ]'.format(str(len(js_total))))
275279
280
276281 async def internal_links(target):
277282 global total, int_total
278 print(G + '[+]' + C + ' Extracting Internal Links' + W, end = '')
283 print(G + '[+]' + C + ' Extracting Internal Links' + W, end='')
279284
280285 ext = tldextract.extract(target)
281286 domain = ext.registered_domain
283288 links = soup.find_all('a')
284289 for link in links:
285290 url = link.get('href')
286 if url != None:
291 if url is not None:
287292 if domain in url:
288293 int_total.append(url)
289294
290295 int_total = set(int_total)
291296 print(G + '['.rjust(6, '.') + ' {} ]'.format(str(len(int_total))))
292297
298
293299 async def external_links(target):
294300 global total, ext_total
295 print(G + '[+]' + C + ' Extracting External Links' + W, end = '')
301 print(G + '[+]' + C + ' Extracting External Links' + W, end='')
296302
297303 ext = tldextract.extract(target)
298304 domain = ext.registered_domain
300306 links = soup.find_all('a')
301307 for link in links:
302308 url = link.get('href')
303 if url != None:
309 if url is not None:
304310 if domain not in url and 'http' in url:
305311 ext_total.append(url)
306312
307313 ext_total = set(ext_total)
308314 print(G + '['.rjust(6, '.') + ' {} ]'.format(str(len(ext_total))))
309315
316
310317 async def images(target):
311318 global url, total, img_total
312 print(G + '[+]' + C + ' Extracting Images' + W, end = '')
319 print(G + '[+]' + C + ' Extracting Images' + W, end='')
313320 images = soup.find_all('img')
314321
315322 for link in images:
316323 url = link.get('src')
317 if url != None and len(url) > 1:
324 if url is not None and len(url) > 1:
318325 url_filter(target)
319326 img_total.append(url)
320327
321328 img_total = set(img_total)
322329 print(G + '['.rjust(14, '.') + ' {} ]'.format(str(len(img_total))))
323330
331
324332 async def sm_crawl():
325333 global sm_crawl_total
326 print(G + '[+]' + C + ' Crawling Sitemaps' + W, end = '')
334 print(G + '[+]' + C + ' Crawling Sitemaps' + W, end='')
327335
328336 threads = []
329337
337345 links = sm_soup.find_all('loc')
338346 for url in links:
339347 url = url.get_text()
340 if url != None:
348 if url is not None:
341349 sm_crawl_total.append(url)
342350 elif sm_sc == 404:
343351 print(R + '['.rjust(8, '.') + ' Not Found ]' + W)
348356
349357 for site_url in sm_total:
350358 if site_url != sm_url:
351 if site_url.endswith('xml') == True:
359 if site_url.endswith('xml') is True:
352360 t = threading.Thread(target=fetch, args=[site_url])
353361 t.daemon = True
354362 threads.append(t)
360368 sm_crawl_total = set(sm_crawl_total)
361369 print(G + '['.rjust(14, '.') + ' {} ]'.format(str(len(sm_crawl_total))))
362370
371
363372 async def js_crawl():
364373 global js_crawl_total
365 print(G + '[+]' + C + ' Crawling Javascripts' + W, end = '')
374 print(G + '[+]' + C + ' Crawling Javascripts' + W, end='')
366375
367376 threads = []
368377
394403 js_crawl_total = set(js_crawl_total)
395404 print(G + '['.rjust(11, '.') + ' {} ]'.format(str(len(js_crawl_total))))
396405
406
397407 def out(target, output, data):
398408 global total
399409
422432
423433 data['module-Crawler'].update(
424434 {
425 'Count ( Robots )': str(len(r_total)),
426 'Count ( Sitemap )': str(len(sm_total)),
427 'Count ( CSS )': str(len(css_total)),
428 'Count ( JS )': str(len(js_total)),
429 'Count ( Links in JS )': str(len(js_crawl_total)),
435 'Count ( Robots )': str(len(r_total)),
436 'Count ( Sitemap )': str(len(sm_total)),
437 'Count ( CSS )': str(len(css_total)),
438 'Count ( JS )': str(len(js_total)),
439 'Count ( Links in JS )': str(len(js_crawl_total)),
430440 'Count ( Links in Sitemaps )': str(len(sm_crawl_total)),
431 'Count ( Internal )': str(len(int_total)),
432 'Count ( External )': str(len(ext_total)),
433 'Count ( Images )': str(len(img_total)),
441 'Count ( Internal )': str(len(int_total)),
442 'Count ( External )': str(len(ext_total)),
443 'Count ( Images )': str(len(img_total)),
434444 'count ( Wayback Machine )': str(len(wayback_total)),
435445 'Count ( Total )': str(len(total))
436446 })
437
447
438448 if len(r_total) != 0:
439449 data['module-Crawler'].update({'Robots': list(r_total)})
440
450
441451 if len(sm_total) != 0:
442452 data['module-Crawler'].update({'Sitemaps': list(sm_total)})
443
453
444454 if len(css_total) != 0:
445455 data['module-Crawler'].update({'CSS': list(css_total)})
446
456
447457 if len(js_total) != 0:
448458 data['module-Crawler'].update({'Javascripts': list(js_total)})
449459
450460 if len(js_crawl_total) != 0:
451461 data['module-Crawler'].update({'Links inside Javascripts': list(js_crawl_total)})
452
462
453463 if len(sm_crawl_total) != 0:
454464 data['module-Crawler'].update({'Links Inside Sitemaps': list(sm_crawl_total)})
455
465
456466 if len(int_total) != 0:
457467 data['module-Crawler'].update({'Internal Links': list(int_total)})
458
468
459469 if len(ext_total) != 0:
460470 data['module-Crawler'].update({'External Links': list(ext_total)})
461
471
462472 if len(img_total) != 0:
463473 data['module-Crawler'].update({'Images': list(img_total)})
464
474
465475 if len(wayback_total) != 0:
466 data['module-Crawler'].update({'Wayback Machine': list(wayback_total)})
476 data['module-Crawler'].update({'Wayback Machine': list(wayback_total)})
229229 loop.run_until_complete(wayback(target, dserv, tout))
230230 wm_filter()
231231 dir_output(output, data)
232 loop.close()
232 loop.close()
44 dnslib
55 aiohttp
66 aiodns
7 psycopg2
7 psycopg2-binary
88 tldextract
99 icmplib