151 | 151 |
self.silent = silent
|
152 | 152 |
self.verbose = verbose
|
153 | 153 |
self.headers = {
|
154 | |
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
|
155 | |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
156 | |
'Accept-Language': 'en-US,en;q=0.8',
|
157 | |
'Accept-Encoding': 'gzip',
|
158 | |
}
|
|
154 |
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
|
|
155 |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
156 |
'Accept-Language': 'en-US,en;q=0.8',
|
|
157 |
'Accept-Encoding': 'gzip',
|
|
158 |
}
|
159 | 159 |
self.print_banner()
|
160 | 160 |
|
161 | 161 |
def print_(self, text):
|
|
537 | 537 |
self.print_(e)
|
538 | 538 |
resp = None
|
539 | 539 |
return resp
|
|
540 |
|
|
541 |
def should_sleep(self):
|
|
542 |
time.sleep(random.randint(1, 2))
|
|
543 |
return
|
540 | 544 |
|
541 | 545 |
def get_next(self, resp):
|
542 | |
link_regx = re.compile('<A href="(.*?)"><b>Next page</b></a>')
|
|
546 |
link_regx = re.compile('<a.*?href="(.*?)">Next Page')
|
543 | 547 |
link = link_regx.findall(resp)
|
544 | |
link = re.sub('host=.*?%s' % self.domain, 'host=%s' % self.domain, link[0])
|
545 | |
url = 'http://searchdns.netcraft.com' + link
|
|
548 |
url = 'http://searchdns.netcraft.com' + link[0]
|
546 | 549 |
return url
|
547 | 550 |
|
548 | 551 |
def create_cookies(self, cookie):
|
|
568 | 571 |
while True:
|
569 | 572 |
resp = self.get_response(self.req(url, cookies))
|
570 | 573 |
self.extract_domains(resp)
|
571 | |
if 'Next page' not in resp:
|
|
574 |
if 'Next Page' not in resp:
|
572 | 575 |
return self.subdomains
|
573 | 576 |
break
|
574 | 577 |
url = self.get_next(resp)
|
|
578 |
self.should_sleep()
|
575 | 579 |
|
576 | 580 |
def extract_domains(self, resp):
|
577 | 581 |
links_list = list()
|
578 | |
link_regx = re.compile('<a href="http://toolbar.netcraft.com/site_report\?url=(.*)">')
|
|
582 |
link_regx = re.compile('<a class="results-table__host" href="(.*?)"')
|
579 | 583 |
try:
|
580 | 584 |
links_list = link_regx.findall(resp)
|
581 | 585 |
for link in links_list:
|
|
791 | 795 |
try:
|
792 | 796 |
links = link_regx.findall(resp)
|
793 | 797 |
for link in links:
|
794 | |
subdomain = link.strip()
|
795 | |
if not subdomain.endswith(self.domain) or '*' in subdomain:
|
796 | |
continue
|
797 | |
|
798 | |
if '@' in subdomain:
|
799 | |
subdomain = subdomain[subdomain.find('@')+1:]
|
800 | |
|
801 | |
if subdomain not in self.subdomains and subdomain != self.domain:
|
802 | |
if self.verbose:
|
803 | |
self.print_("%s%s: %s%s" % (R, self.engine_name, W, subdomain))
|
804 | |
self.subdomains.append(subdomain.strip())
|
|
798 |
link = link.strip()
|
|
799 |
subdomains = []
|
|
800 |
if '<BR>' in link:
|
|
801 |
subdomains = link.split('<BR>')
|
|
802 |
else:
|
|
803 |
subdomains.append(link)
|
|
804 |
|
|
805 |
for subdomain in subdomains:
|
|
806 |
if not subdomain.endswith(self.domain) or '*' in subdomain:
|
|
807 |
continue
|
|
808 |
|
|
809 |
if '@' in subdomain:
|
|
810 |
subdomain = subdomain[subdomain.find('@')+1:]
|
|
811 |
|
|
812 |
if subdomain not in self.subdomains and subdomain != self.domain:
|
|
813 |
if self.verbose:
|
|
814 |
self.print_("%s%s: %s%s" % (R, self.engine_name, W, subdomain))
|
|
815 |
self.subdomains.append(subdomain.strip())
|
805 | 816 |
except Exception as e:
|
|
817 |
print(e)
|
806 | 818 |
pass
|
807 | 819 |
|
808 | 820 |
|