Commit bcf1c2112a3778d504dc5cad25f30c95a22167cc - finalrecon

New upstream version 1.1.3 Sophie Brun 2 years ago

7 changed file(s) with 307 addition(s) and 191 deletion(s). Raw diff Collapse all Expand all

+152

-0

.gitignore less more

	0	# Byte-compiled / optimized / DLL files
	1	__pycache__/
	2	*.py[cod]
	3	*$py.class
	4
	5	# C extensions
	6	*.so
	7
	8	# Distribution / packaging
	9	.Python
	10	build/
	11	develop-eggs/
	12	dist/
	13	downloads/
	14	eggs/
	15	.eggs/
	16	lib/
	17	lib64/
	18	parts/
	19	sdist/
	20	var/
	21	wheels/
	22	share/python-wheels/
	23	*.egg-info/
	24	.installed.cfg
	25	*.egg
	26	MANIFEST
	27
	28	# PyInstaller
	29	# Usually these files are written by a python script from a template
	30	# before PyInstaller builds the exe, so as to inject date/other infos into it.
	31	*.manifest
	32	*.spec
	33
	34	# Installer logs
	35	pip-log.txt
	36	pip-delete-this-directory.txt
	37
	38	# Unit test / coverage reports
	39	htmlcov/
	40	.tox/
	41	.nox/
	42	.coverage
	43	.coverage.*
	44	.cache
	45	nosetests.xml
	46	coverage.xml
	47	*.cover
	48	*.py,cover
	49	.hypothesis/
	50	.pytest_cache/
	51	cover/
	52
	53	# Translations
	54	*.mo
	55	*.pot
	56
	57	# Django stuff:
	58	*.log
	59	local_settings.py
	60	db.sqlite3
	61	db.sqlite3-journal
	62
	63	# Flask stuff:
	64	instance/
	65	.webassets-cache
	66
	67	# Scrapy stuff:
	68	.scrapy
	69
	70	# Sphinx documentation
	71	docs/_build/
	72
	73	# PyBuilder
	74	.pybuilder/
	75	target/
	76
	77	# Jupyter Notebook
	78	.ipynb_checkpoints
	79
	80	# IPython
	81	profile_default/
	82	ipython_config.py
	83
	84	# pyenv
	85	# For a library or package, you might want to ignore these files since the code is
	86	# intended to run in multiple environments; otherwise, check them in:
	87	# .python-version
	88
	89	# pipenv
	90	# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
	91	# However, in case of collaboration, if having platform-specific dependencies or dependencies
	92	# having no cross-platform support, pipenv may install dependencies that don't work, or not
	93	# install all needed dependencies.
	94	#Pipfile.lock
	95
	96	# poetry
	97	# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
	98	# This is especially recommended for binary packages to ensure reproducibility, and is more
	99	# commonly ignored for libraries.
	100	# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
	101	#poetry.lock
	102
	103	# PEP 582; used by e.g. github.com/David-OConnor/pyflow
	104	__pypackages__/
	105
	106	# Celery stuff
	107	celerybeat-schedule
	108	celerybeat.pid
	109
	110	# SageMath parsed files
	111	*.sage.py
	112
	113	# Environments
	114	.env
	115	.venv
	116	env/
	117	venv/
	118	ENV/
	119	env.bak/
	120	venv.bak/
	121
	122	# Spyder project settings
	123	.spyderproject
	124	.spyproject
	125
	126	# Rope project settings
	127	.ropeproject
	128
	129	# mkdocs documentation
	130	/site
	131
	132	# mypy
	133	.mypy_cache/
	134	.dmypy.json
	135	dmypy.json
	136
	137	# Pyre type checker
	138	.pyre/
	139
	140	# pytype static type analyzer
	141	.pytype/
	142
	143	# Cython debug symbols
	144	cython_debug/
	145
	146	# PyCharm
	147	# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
	148	# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
	149	# and can be added to the global gitignore or merged into this file. For a more nuclear
	150	# option (not recommended) you can uncomment the following to ignore the entire idea folder.
	151	.idea/

-1

Dockerfile less more

0		FROM alpine:latest
	0	FROM alpine:3.15.4
1	1	RUN apk update
2	2	RUN apk add \
3	3	git \

+70

-113

finalrecon.py less more

2	2	import os
3	3	import sys
4	4	import atexit
5		import importlib.util
6
7		R = '\033[31m' # red
8		G = '\033[32m' # green
9		C = '\033[36m' # cyan
10		W = '\033[0m' # white
	5
	6	R = '\033[31m' # red
	7	G = '\033[32m' # green
	8	C = '\033[36m' # cyan
	9	W = '\033[0m' # white
11	10
12	11	home = os.getenv('HOME')
13	12	pid_path = home + '/.local/share/finalrecon/finalrecon.pid'

15	14	conf_path = home + '/.config/finalrecon'
16	15	path_to_script = os.path.dirname(os.path.realpath(__file__))
17	16	src_conf_path = path_to_script + '/conf/'
	17	meta_file_path = path_to_script + '/metadata.json'
18	18	fail = False
19	19
20	20	if os.path.isfile(pid_path):
21		print(R + '[-]' + C + ' One instance of FinalRecon is already running!' + W)
	21	print(f'{R}[-] {C}One instance of FinalRecon is already running!{W}')
22	22	with open(pid_path, 'r') as pidfile:
23	23	pid = pidfile.read()
24		print(G + '[+]' + C + ' PID : ' + W + str(pid))
25		print(G + '[>]' + C + ' If FinalRecon crashed, execute : ' + W + 'rm {}'.format(pid_path))
26		sys.exit()
	24	print(f'{G}[+] {C}PID :{W} {str(pid)}')
	25	print(f'{G}[>] {C}If FinalRecon crashed, execute :{W} rm {pid_path}')
	26	sys.exit(1)
27	27	else:
28	28	os.makedirs(os.path.dirname(pid_path), exist_ok=True)
29	29	with open(pid_path, 'w') as pidfile:

35	35	import shutil
36	36	shutil.copytree(src_conf_path, conf_path, dirs_exist_ok=True)
37	37
38		with open(path_to_script + '/requirements.txt', 'r') as rqr:
39		pkg_list = rqr.read().strip().split('\n')
40
41		print('\n' + G + '[+]' + C + ' Checking Dependencies...' + W + '\n')
42
43		for pkg in pkg_list:
44		spec = importlib.util.find_spec(pkg)
45		if spec is None:
46		print(R + '[-]' + W + ' {}'.format(pkg) + C + ' is not Installed!' + W)
47		fail = True
48		else:
49		pass
50		if fail == True:
51		print('\n' + R + '[-]' + C + ' Please Execute ' + W + 'pip3 install -r requirements.txt' + C + ' to Install Missing Packages' + W + '\n')
52		os.remove(pid_path)
53		sys.exit()
54
55	38	import argparse
56	39
57		version = '1.1.2'
	40	version = '1.1.3'
58	41	gh_version = ''
59	42	twitter_url = ''
60	43	discord_url = ''
61	44
62		parser = argparse.ArgumentParser(description='FinalRecon - The Last Web Recon Tool You Will Need \| v{}'.format(version))
	45	parser = argparse.ArgumentParser(description=f'FinalRecon - The Last Web Recon Tool You Will Need \| v{version}')
63	46	parser.add_argument('url', help='Target URL')
64	47	parser.add_argument('--headers', help='Header Information', action='store_true')
65	48	parser.add_argument('--sslinfo', help='SSL Certificate Information', action='store_true')

86	69	ext_help.add_argument('-tt', type=float, help='Traceroute Timeout [ Default : 1.0 ]')
87	70	ext_help.add_argument('-o', help='Export Output [ Default : txt ] [ Available : xml, csv ]')
88	71	ext_help.set_defaults(
89		t = 30,
90		T = 30.0,
91		w = path_to_script + '/wordlists/dirb_common.txt',
92		r = False,
93		s = True,
94		sp = 443,
95		d = '1.1.1.1',
96		e = '',
97		m = 'UDP',
98		p = 33434,
99		tt = 1.0,
100		o = 'txt')
	72	t=30,
	73	T=30.0,
	74	w=path_to_script + '/wordlists/dirb_common.txt',
	75	r=False,
	76	s=True,
	77	sp=443,
	78	d='1.1.1.1',
	79	e='',
	80	m='UDP',
	81	p=33434,
	82	tt=1.0,
	83	o='txt')
101	84
102	85	try:
103	86	args = parser.parse_args()

129	112	tr_tout = args.tt
130	113	output = args.o
131	114
132		import json
133	115	import socket
134	116	import requests
135	117	import datetime
136	118	import ipaddress
137	119	import tldextract
	120	from json import loads
138	121
139	122	type_ip = False
140	123	data = {}
141	124	meta = {}
142	125
143		def fetch_meta():
144		global gh_version, twitter_url, discord_url
145		try:
146		rqst = requests.get('https://raw.githubusercontent.com/thewhiteh4t/finalrecon/master/metadata.json', timeout=5)
147		sc = rqst.status_code
148		if sc == 200:
149		metadata = rqst.text
150		json_data = json.loads(metadata)
151		gh_version = json_data['metadata']['version']
152		twitter_url = json_data['metadata']['twitter']
153		discord_url = json_data['metadata']['discord']
154		else:
155		with open('metadata.json', 'r') as metadata:
156		json_data = json.loads(metadata.read())
157		gh_version = json_data['metadata']['version']
158		twitter_url = json_data['metadata']['twitter']
159		discord_url = json_data['metadata']['discord']
160		except Exception as exc:
161		print('\n' + R + '[-]' + C + ' Exception : ' + W + str(exc))
162		with open('metadata.json', 'r') as metadata:
163		json_data = json.loads(metadata.read())
164		gh_version = json_data['metadata']['version']
165		twitter_url = json_data['metadata']['twitter']
166		discord_url = json_data['metadata']['discord']
167	126
168	127	def banner():
169		banner = r'''
	128	with open(meta_file_path, 'r') as metadata:
	129	json_data = loads(metadata.read())
	130	twitter_url = json_data['twitter']
	131	comms_url = json_data['comms']
	132
	133	art = r'''
170	134	______ __ __ __ ______ __
171	135	/\ ___\/\ \ /\ "-.\ \ /\ __ \ /\ \
172	136	\ \ __\\ \ \\ \ \-. \\ \ __ \\ \ \____

177	141	\ \ __< \ \ __\ \ \ \____\ \ \/\ \\ \ \-. \
178	142	\ \_\ \_\\ \_____\\ \_____\\ \_____\\ \_\\"\_\
179	143	\/_/ /_/ \/_____/ \/_____/ \/_____/ \/_/ \/_/'''
180		print(G + banner + W + '\n')
181		print(G + '[>]' + C + ' Created By : ' + W + 'thewhiteh4t')
182		print(G + ' \|---> ' + C + 'Twitter : ' + W + twitter_url)
183		print(G + ' \|---> ' + C + 'Discord : ' + W + discord_url)
184		print(G + '[>]' + C + ' Version : ' + W + version + '\n')
185
186		def ver_check():
187		print(G + '[+]' + C + ' Checking for Updates...', end='')
188		if version == gh_version:
189		print(C + '[' + G + ' Up-To-Date ' + C +']' + '\n')
190		else:
191		print(C + '[' + G + ' Available : {} '.format(gh_version) + C + ']' + '\n')
	144	print(f'{G}{art}{W}\n')
	145	print(f'{G}[>]{C} Created By :{W} thewhiteh4t')
	146	print(f'{G} \|--->{C} Twitter :{W} {twitter_url}')
	147	print(f'{G} \|--->{C} Community :{W} {comms_url}')
	148	print(f'{G}[>]{C} Version :{W} {version}\n')
	149
192	150
193	151	def full_recon():
194	152	from modules.sslinfo import cert

204	162	cert(hostname, sslp, output, data)
205	163	whois_lookup(ip, output, data)
206	164	dnsrec(domain, output, data)
207		if type_ip == False:
	165	if type_ip is False:
208	166	subdomains(domain, tout, output, data, conf_path)
209	167	else:
210	168	pass

213	171	crawler(target, output, data)
214	172	hammer(target, threads, tout, wdlist, redir, sslv, dserv, output, data, filext)
215	173
	174
216	175	try:
217		fetch_meta()
218	176	banner()
219		ver_check()
220
221		if target.startswith(('http', 'https')) == False:
222		print(R + '[-]' + C + ' Protocol Missing, Include ' + W + 'http://' + C + ' or ' + W + 'https://' + '\n')
	177
	178	if target.startswith(('http', 'https')) is False:
	179	print(f'{R}[-] {C}Protocol Missing, Include {W}http:// {C}or{W} https:// \n')
223	180	os.remove(pid_path)
224		sys.exit()
	181	sys.exit(1)
225	182	else:
226	183	pass
227	184
228		if target.endswith('/') == True:
	185	if target.endswith('/') is True:
229	186	target = target[:-1]
230	187	else:
231	188	pass
232	189
233		print (G + '[+]' + C + ' Target : ' + W + target)
	190	print(f'{G}[+] {C}Target : {W}{target}')
234	191	ext = tldextract.extract(target)
235	192	domain = ext.registered_domain
236	193	hostname = '.'.join(part for part in ext if part)

239	196	ipaddress.ip_address(hostname)
240	197	type_ip = True
241	198	ip = hostname
242		except:
	199	except Exception:
243	200	try:
244	201	ip = socket.gethostbyname(hostname)
245		print ('\n' + G + '[+]' + C + ' IP Address : ' + W + str(ip))
	202	print(f'\n{G}[+] {C}IP Address : {W}{str(ip)}')
246	203	except Exception as e:
247		print ('\n' + R + '[-]' + C + ' Unable to Get IP : ' + W + str(e))
	204	print(f'\n{R}[-] {C}Unable to Get IP : {W}{str(e)}')
248	205	os.remove(pid_path)
249		sys.exit()
	206	sys.exit(1)
250	207
251	208	start_time = datetime.datetime.now()
252	209

261	218	fpath = usr_data
262	219	fname = fpath + hostname + '.' + output
263	220	if not os.path.exists(fpath):
264		os.makedirs(fpath)
	221	os.makedirs(fpath)
265	222	output = {
266	223	'format': output,
267	224	'file': fname,
268	225	'export': False
269		}
	226	}
270	227
271	228	from modules.export import export
272	229
273		if full == True:
	230	if full is True:
274	231	full_recon()
275	232
276		if headinfo == True:
	233	if headinfo is True:
277	234	from modules.headers import headers
278	235	headers(target, output, data)
279	236
280		if sslinfo == True:
	237	if sslinfo is True:
281	238	from modules.sslinfo import cert
282	239	cert(hostname, sslp, output, data)
283	240
284		if whois == True:
	241	if whois is True:
285	242	from modules.whois import whois_lookup
286	243	whois_lookup(ip, output, data)
287	244
288		if crawl == True:
	245	if crawl is True:
289	246	from modules.crawler import crawler
290	247	crawler(target, output, data)
291	248
292		if dns == True:
	249	if dns is True:
293	250	from modules.dns import dnsrec
294	251	dnsrec(domain, output, data)
295	252
296		if subd == True and type_ip == False:
	253	if subd is True and type_ip is False:
297	254	from modules.subdom import subdomains
298	255	subdomains(domain, tout, output, data, conf_path)
299		elif subd == True and type_ip == True:
300		print(R + '[-]' + C + ' Sub-Domain Enumeration is Not Supported for IP Addresses' + W + '\n')
	256	elif subd is True and type_ip is True:
	257	print(f'{R}[-] {C}Sub-Domain Enumeration is Not Supported for IP Addresses{W}\n')
301	258	os.remove(pid_path)
302		sys.exit()
	259	sys.exit(1)
303	260	else:
304	261	pass
305	262
306		if trace == True:
	263	if trace is True:
307	264	from modules.traceroute import troute
308	265	if mode == 'TCP' and port == 33434:
309	266	port = 80

311	268	else:
312	269	troute(ip, mode, port, tr_tout, output, data)
313	270
314		if pscan == True:
	271	if pscan is True:
315	272	from modules.portscan import ps
316	273	ps(ip, output, data)
317	274
318		if dirrec == True:
	275	if dirrec is True:
319	276	from modules.dirrec import hammer
320	277	hammer(target, threads, tout, wdlist, redir, sslv, dserv, output, data, filext)
321	278
322		if any([full, headinfo, sslinfo, whois, crawl, dns, subd, trace, pscan, dirrec]) != True:
323		print ('\n' + R + '[-] Error : ' + C + 'At least One Argument is Required with URL' + W)
	279	if any([full, headinfo, sslinfo, whois, crawl, dns, subd, trace, pscan, dirrec]) is not True:
	280	print(f'\n{R}[-] Error : {C}At least One Argument is Required with URL{W}')
324	281	output = 'None'
325	282	os.remove(pid_path)
326		sys.exit()
	283	sys.exit(1)
327	284
328	285	end_time = datetime.datetime.now() - start_time
329		print ('\n' + G + '[+]' + C + ' Completed in ' + W + str(end_time) + '\n')
	286	print(f'\n{G}[+] {C}Completed in {W}{str(end_time)}\n')
330	287
331	288	@atexit.register
332	289	def call_export():

339	296	os.remove(pid_path)
340	297	sys.exit()
341	298	except KeyboardInterrupt:
342		print (R + '[-]' + C + ' Keyboard Interrupt.' + W + '\n')
	299	print(f'{R}[-] {C}Keyboard Interrupt.{W}\n')
343	300	os.remove(pid_path)
344		sys.exit()
	301	sys.exit(130)

-8

metadata.json less more

0	0	{
1		"metadata":
2		{
3		"name": "FinalRecon",
4		"author": "thewhiteh4t",
5		"version": "1.1.2",
6		"twitter": "https://twitter.com/thewhiteh4t",
7		"discord": "https://discord.gg/UM92zUn"
8		}
	1	"name": "FinalRecon",
	2	"author": "thewhiteh4t",
	3	"version": "1.1.3",
	4	"twitter": "https://twitter.com/thewhiteh4t",
	5	"comms": "https://twc1rcle.com/"
9	6	}⏎

+77

-67

modules/crawler.py less more

0	0	#!/usr/bin/env python3
1	1
2		import os
3	2	import re
4	3	import bs4
5	4	import lxml

11	10	from datetime import date
12	11	requests.packages.urllib3.disable_warnings()
13	12
14		R = '\033[31m' # red
15		G = '\033[32m' # green
16		C = '\033[36m' # cyan
17		W = '\033[0m' # white
18		Y = '\033[33m' # yellow
	13	R = '\033[31m' # red
	14	G = '\033[32m' # green
	15	C = '\033[36m' # cyan
	16	W = '\033[0m' # white
	17	Y = '\033[33m' # yellow
19	18
20	19	user_agent = {
21		'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0'
22		}
	20	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0'
	21	}
23	22
24	23	soup = ''
25	24	r_url = ''

36	35	sm_crawl_total = []
37	36	wayback_total = []
38	37
	38
39	39	def crawler(target, output, data):
40	40	global soup, r_url, sm_url
41	41	print('\n' + Y + '[!]' + Y + ' Starting Crawler...' + W + '\n')

56	56	temp_tgt = target.split('://')[1]
57	57	pattern = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{2,5}'
58	58	custom = bool(re.match(pattern, temp_tgt))
59		if custom == True:
	59	if custom is True:
60	60	r_url = protocol + '://' + temp_tgt + '/robots.txt'
61	61	sm_url = protocol + '://' + temp_tgt + '/sitemap.xml'
62	62	else:

83	83
84	84	out(target, output, data)
85	85	else:
86		print (R + '[-]' + C + ' Status : ' + W + str(sc))
	86	print(R + '[-]' + C + ' Status : ' + W + str(sc))
	87
87	88
88	89	def url_filter(target):
89	90	global url
90	91
91		if all([url.startswith('/') == True, url.startswith('//') == False]):
	92	if all([url.startswith('/') is True, url.startswith('//') is False]):
92	93	url = target + url
93	94	else:
94	95	pass
95	96
96		if all([url.find('http://') == -1,
	97	if all([
	98	url.find('http://') == -1,
97	99	url.find('https://') == -1]):
98
99	100	url = url.replace('//', 'http://')
100	101	url = url.replace('../', target + '/')
101	102	url = url.replace('./', target + '/')
102	103	else:
103	104	pass
104	105
105		if all([url.find('//') == -1,
	106	if all([
	107	url.find('//') == -1,
106	108	url.find('../') == -1,
107	109	url.find('./') == -1,
108	110	url.find('http://') == -1,

111	113	url = target + '/' + url
112	114	else:
113	115	pass
	116
114	117
115	118	async def wayback(target):
116	119	global wayback_total

121	124	domain = ext.domain
122	125	domain_query = domain + '/*'
123	126
124		#today = date.today().strftime("%Y%m%d")
125		#past = date.today() + relativedelta(months=-6)
126		#past = past.strftime("%Y%m%d")
127
128	127	curr_yr = date.today().year
129	128	last_yr = curr_yr - 1
130	129
131		print(Y + '[!]' + C + ' Checking Availability on Wayback Machine' + W, end = '')
	130	print(Y + '[!]' + C + ' Checking Availability on Wayback Machine' + W, end='')
132	131	wm_avail = 'http://archive.org/wayback/available'
133		avail_data = { 'url': domain }
	132	avail_data = {'url': domain}
134	133
135	134	try:
136	135	check_rqst = requests.get(wm_avail, params=avail_data, timeout=10)

149	148	except Exception as e:
150	149	print('\n' + R + '[-] Exception : ' + C + str(e) + W)
151	150
152		if is_avail == True:
153		print(Y + '[!]' + C + ' Requesting Wayback Machine' + W, end = '')
	151	if is_avail is True:
	152	print(Y + '[!]' + C + ' Requesting Wayback Machine' + W, end='')
154	153	wm_url = 'http://web.archive.org/cdx/search/cdx'
155	154
156	155	data = {
157		'url': domain_query,
158		'fl': 'original',
159		'fastLatest': 'true',
	156	'url': domain_query,
	157	'fl': 'original',
	158	'fastLatest': 'true',
160	159	'from': '{}'.format(str(last_yr)),
161	160	'to': '{}'.format(str(curr_yr)),
162	161	'filter': 'statuscode:200'

179	178	except Exception as e:
180	179	print('\n' + R + '[-] Exception : ' + C + str(e) + W)
181	180
	181
182	182	async def robots(target):
183	183	global url, r_url, r_total
184		print(G + '[+]' + C + ' Looking for robots.txt' + W, end = '')
	184	print(G + '[+]' + C + ' Looking for robots.txt' + W, end='')
185	185
186	186	try:
187	187	r_rqst = requests.get(r_url, headers=user_agent, verify=False, timeout=10)
188	188	r_sc = r_rqst.status_code
189	189	if r_sc == 200:
190	190	print(G + '['.rjust(9, '.') + ' Found ]' + W)
191		print(G + '[+]' + C + ' Extracting robots Links', end = '')
	191	print(G + '[+]' + C + ' Extracting robots Links', end='')
192	192	r_page = r_rqst.text
193	193	r_scrape = r_page.split('\n')
194	194	for entry in r_scrape:
195		if (entry.find('Disallow') == 0 or
196		entry.find('Allow') == 0 or
197		entry.find('Sitemap') == 0):
	195	if any([
	196	entry.find('Disallow') == 0,
	197	entry.find('Allow') == 0,
	198	entry.find('Sitemap') == 0]):
198	199
199	200	url = entry.split(': ')
200	201	try:

202	203	url = url.strip()
203	204	url_filter(target)
204	205	r_total.append(url)
205		if url.endswith('xml') == True:
	206	if url.endswith('xml') is True:
206	207	sm_total.append(url)
207		except:
	208	except Exception:
208	209	pass
209	210
210	211	r_total = set(r_total)

218	219	except Exception as e:
219	220	print('\n' + R + '[-] Exception : ' + C + str(e) + W)
220	221
	222
221	223	async def sitemap():
222	224	global url, sm_url, total, sm_total
223		print(G + '[+]' + C + ' Looking for sitemap.xml' + W, end = '')
	225	print(G + '[+]' + C + ' Looking for sitemap.xml' + W, end='')
224	226	try:
225	227	sm_rqst = requests.get(sm_url, headers=user_agent, verify=False, timeout=10)
226	228	sm_sc = sm_rqst.status_code
227	229	if sm_sc == 200:
228	230	print(G + '['.rjust(8, '.') + ' Found ]' + W)
229		print(G + '[+]' + C + ' Extracting sitemap Links', end = '')
	231	print(G + '[+]' + C + ' Extracting sitemap Links', end='')
230	232	sm_page = sm_rqst.content
231	233	sm_soup = bs4.BeautifulSoup(sm_page, 'xml')
232	234	links = sm_soup.find_all('loc')
233	235	for url in links:
234	236	url = url.get_text()
235		if url != None:
	237	if url is not None:
236	238	sm_total.append(url)
237	239
238	240	sm_total = set(sm_total)

245	247	except Exception as e:
246	248	print('\n' + R + '[-] Exception : ' + C + str(e))
247	249
	250
248	251	async def css(target):
249	252	global url, soup, total, css_total
250		print(G + '[+]' + C + ' Extracting CSS Links' + W, end = '')
	253	print(G + '[+]' + C + ' Extracting CSS Links' + W, end='')
251	254	css = soup.find_all('link')
252	255
253	256	for link in css:
254	257	url = link.get('href')
255		if url != None and '.css' in url:
	258	if url is not None and '.css' in url:
256	259	url_filter(target)
257	260	css_total.append(url)
258	261
259	262	css_total = set(css_total)
260	263	print(G + '['.rjust(11, '.') + ' {} ]'.format(str(len(css_total))) + W)
261	264
	265
262	266	async def js(target):
263	267	global url, total, js_total
264		print(G + '[+]' + C + ' Extracting Javascript Links' + W, end = '')
	268	print(G + '[+]' + C + ' Extracting Javascript Links' + W, end='')
265	269	js = soup.find_all('script')
266	270
267	271	for link in js:
268	272	url = link.get('src')
269		if url != None and '.js' in url:
	273	if url is not None and '.js' in url:
270	274	url_filter(target)
271	275	js_total.append(url)
272	276
273	277	js_total = set(js_total)
274	278	print(G + '['.rjust(4, '.') + ' {} ]'.format(str(len(js_total))))
275	279
	280
276	281	async def internal_links(target):
277	282	global total, int_total
278		print(G + '[+]' + C + ' Extracting Internal Links' + W, end = '')
	283	print(G + '[+]' + C + ' Extracting Internal Links' + W, end='')
279	284
280	285	ext = tldextract.extract(target)
281	286	domain = ext.registered_domain

283	288	links = soup.find_all('a')
284	289	for link in links:
285	290	url = link.get('href')
286		if url != None:
	291	if url is not None:
287	292	if domain in url:
288	293	int_total.append(url)
289	294
290	295	int_total = set(int_total)
291	296	print(G + '['.rjust(6, '.') + ' {} ]'.format(str(len(int_total))))
292	297
	298
293	299	async def external_links(target):
294	300	global total, ext_total
295		print(G + '[+]' + C + ' Extracting External Links' + W, end = '')
	301	print(G + '[+]' + C + ' Extracting External Links' + W, end='')
296	302
297	303	ext = tldextract.extract(target)
298	304	domain = ext.registered_domain

300	306	links = soup.find_all('a')
301	307	for link in links:
302	308	url = link.get('href')
303		if url != None:
	309	if url is not None:
304	310	if domain not in url and 'http' in url:
305	311	ext_total.append(url)
306	312
307	313	ext_total = set(ext_total)
308	314	print(G + '['.rjust(6, '.') + ' {} ]'.format(str(len(ext_total))))
309	315
	316
310	317	async def images(target):
311	318	global url, total, img_total
312		print(G + '[+]' + C + ' Extracting Images' + W, end = '')
	319	print(G + '[+]' + C + ' Extracting Images' + W, end='')
313	320	images = soup.find_all('img')
314	321
315	322	for link in images:
316	323	url = link.get('src')
317		if url != None and len(url) > 1:
	324	if url is not None and len(url) > 1:
318	325	url_filter(target)
319	326	img_total.append(url)
320	327
321	328	img_total = set(img_total)
322	329	print(G + '['.rjust(14, '.') + ' {} ]'.format(str(len(img_total))))
323	330
	331
324	332	async def sm_crawl():
325	333	global sm_crawl_total
326		print(G + '[+]' + C + ' Crawling Sitemaps' + W, end = '')
	334	print(G + '[+]' + C + ' Crawling Sitemaps' + W, end='')
327	335
328	336	threads = []
329	337

337	345	links = sm_soup.find_all('loc')
338	346	for url in links:
339	347	url = url.get_text()
340		if url != None:
	348	if url is not None:
341	349	sm_crawl_total.append(url)
342	350	elif sm_sc == 404:
343	351	print(R + '['.rjust(8, '.') + ' Not Found ]' + W)

348	356
349	357	for site_url in sm_total:
350	358	if site_url != sm_url:
351		if site_url.endswith('xml') == True:
	359	if site_url.endswith('xml') is True:
352	360	t = threading.Thread(target=fetch, args=[site_url])
353	361	t.daemon = True
354	362	threads.append(t)

360	368	sm_crawl_total = set(sm_crawl_total)
361	369	print(G + '['.rjust(14, '.') + ' {} ]'.format(str(len(sm_crawl_total))))
362	370
	371
363	372	async def js_crawl():
364	373	global js_crawl_total
365		print(G + '[+]' + C + ' Crawling Javascripts' + W, end = '')
	374	print(G + '[+]' + C + ' Crawling Javascripts' + W, end='')
366	375
367	376	threads = []
368	377

394	403	js_crawl_total = set(js_crawl_total)
395	404	print(G + '['.rjust(11, '.') + ' {} ]'.format(str(len(js_crawl_total))))
396	405
	406
397	407	def out(target, output, data):
398	408	global total
399	409

422	432
423	433	data['module-Crawler'].update(
424	434	{
425		'Count ( Robots )': str(len(r_total)),
426		'Count ( Sitemap )': str(len(sm_total)),
427		'Count ( CSS )': str(len(css_total)),
428		'Count ( JS )': str(len(js_total)),
429		'Count ( Links in JS )': str(len(js_crawl_total)),
	435	'Count ( Robots )': str(len(r_total)),
	436	'Count ( Sitemap )': str(len(sm_total)),
	437	'Count ( CSS )': str(len(css_total)),
	438	'Count ( JS )': str(len(js_total)),
	439	'Count ( Links in JS )': str(len(js_crawl_total)),
430	440	'Count ( Links in Sitemaps )': str(len(sm_crawl_total)),
431		'Count ( Internal )': str(len(int_total)),
432		'Count ( External )': str(len(ext_total)),
433		'Count ( Images )': str(len(img_total)),
	441	'Count ( Internal )': str(len(int_total)),
	442	'Count ( External )': str(len(ext_total)),
	443	'Count ( Images )': str(len(img_total)),
434	444	'count ( Wayback Machine )': str(len(wayback_total)),
435	445	'Count ( Total )': str(len(total))
436	446	})
437
	447
438	448	if len(r_total) != 0:
439	449	data['module-Crawler'].update({'Robots': list(r_total)})
440
	450
441	451	if len(sm_total) != 0:
442	452	data['module-Crawler'].update({'Sitemaps': list(sm_total)})
443
	453
444	454	if len(css_total) != 0:
445	455	data['module-Crawler'].update({'CSS': list(css_total)})
446
	456
447	457	if len(js_total) != 0:
448	458	data['module-Crawler'].update({'Javascripts': list(js_total)})
449	459
450	460	if len(js_crawl_total) != 0:
451	461	data['module-Crawler'].update({'Links inside Javascripts': list(js_crawl_total)})
452
	462
453	463	if len(sm_crawl_total) != 0:
454	464	data['module-Crawler'].update({'Links Inside Sitemaps': list(sm_crawl_total)})
455
	465
456	466	if len(int_total) != 0:
457	467	data['module-Crawler'].update({'Internal Links': list(int_total)})
458
	468
459	469	if len(ext_total) != 0:
460	470	data['module-Crawler'].update({'External Links': list(ext_total)})
461
	471
462	472	if len(img_total) != 0:
463	473	data['module-Crawler'].update({'Images': list(img_total)})
464
	474
465	475	if len(wayback_total) != 0:
466		data['module-Crawler'].update({'Wayback Machine': list(wayback_total)})⏎
	476	data['module-Crawler'].update({'Wayback Machine': list(wayback_total)})

-1

modules/dirrec.py less more

229	229	loop.run_until_complete(wayback(target, dserv, tout))
230	230	wm_filter()
231	231	dir_output(output, data)
232		loop.close()⏎
	232	loop.close()

-1

requirements.txt less more

4	4	dnslib
5	5	aiohttp
6	6	aiodns
7		psycopg2
	7	psycopg2-binary
8	8	tldextract
9	9	icmplib