You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
MiaCTFer/client-1/subdomain/oneforall/modules/datasets/netcraft.py

84 lines
2.5 KiB

import hashlib
import re
import time
from urllib import parse
from client.subdomain.oneforall.common.query import Query
class NetCraft(Query):
def __init__(self, domain):
Query.__init__(self)
self.domain = self.register(domain)
self.module = 'Dataset'
self.source = 'NetCraftQuery'
self.init = 'https://searchdns.netcraft.com/'
self.addr = 'https://searchdns.netcraft.com/?restriction=site+contains'
self.page_num = 1
self.per_page_num = 20
def bypass_verification(self):
"""
绕过NetCraft的JS验证
"""
self.header = self.get_header() # Netcraft会检查User-Agent
resp = self.get(self.init)
if not resp:
return False
self.cookie = resp.cookies
cookie_value = self.cookie['netcraft_js_verification_challenge']
cookie_encode = parse.unquote(cookie_value).encode('utf-8')
verify_taken = hashlib.sha1(cookie_encode).hexdigest()
self.cookie['netcraft_js_verification_response'] = verify_taken
return True
def query(self):
"""
向接口查询子域并做子域匹配
"""
if not self.bypass_verification():
return
last = ''
while True:
time.sleep(self.delay)
self.header = self.get_header()
self.proxy = self.get_proxy(self.source)
params = {'host': '*.' + self.domain, 'from': self.page_num}
resp = self.get(self.addr + last, params)
if not resp:
return
subdomains = self.match(self.domain, resp.text)
if not subdomains: # 搜索没有发现子域名则停止搜索
break
# 合并搜索子域名搜索结果
self.subdomains = self.subdomains.union(subdomains)
if 'Next page' not in resp.text: # 搜索页面没有出现下一页时停止搜索
break
last = re.search(r'&last=.*' + self.domain, resp.text).group(0)
self.page_num += self.per_page_num
def run(self):
"""
类执行入口
"""
self.begin()
self.query()
self.finish()
self.save_json()
self.gen_result()
self.save_db()
def do(domain): # 统一入口名字 方便多线程调用
"""
类统一调用入口
:param str domain: 域名
"""
query = NetCraft(domain)
query.run()
if __name__ == '__main__':
do('example.com')