|
|
'''探测端口是否为HTTP服务,在加入到web资产表'''
|
|
|
import os
|
|
|
import sys
|
|
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/../../../')
|
|
|
|
|
|
import requests
|
|
|
import chardet
|
|
|
from bs4 import BeautifulSoup
|
|
|
import random
|
|
|
import ipaddress
|
|
|
from concurrent import futures
|
|
|
import time
|
|
|
from urllib.parse import urlparse
|
|
|
import threading
|
|
|
|
|
|
from client.database import session, SrcPorts, SrcAssets
|
|
|
from client.webinfo.run import SelectIP, Check_Waf
|
|
|
|
|
|
requests.packages.urllib3.disable_warnings()
|
|
|
LOCK = threading.RLock()
|
|
|
user_agents = [
|
|
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
|
|
|
'(KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36',
|
|
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 '
|
|
|
'(KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36',
|
|
|
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 '
|
|
|
'(KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36',
|
|
|
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/68.0',
|
|
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:61.0) '
|
|
|
'Gecko/20100101 Firefox/68.0',
|
|
|
'Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/68.0']
|
|
|
|
|
|
class UrlProbe:
|
|
|
|
|
|
def __init__(self, ip_dict):
|
|
|
'''{'ip': 'xxx', 'port': 123}'''
|
|
|
self.ip_dict = ip_dict
|
|
|
|
|
|
def _gen_random_ip(self):
|
|
|
"""生成随机的点分十进制的IP字符串"""
|
|
|
while True:
|
|
|
ip = ipaddress.IPv4Address(random.randint(0, 2 ** 32 - 1))
|
|
|
if ip.is_global:
|
|
|
return ip.exploded
|
|
|
|
|
|
def _gen_fake_header(self):
|
|
|
"""生成伪造请求头"""
|
|
|
ua = random.choice(user_agents)
|
|
|
ip = self._gen_random_ip()
|
|
|
headers = {
|
|
|
'Accept': 'text/html,application/xhtml+xml,'
|
|
|
'application/xml;q=0.9,*/*;q=0.8',
|
|
|
'Accept-Encoding': 'gzip, deflate, br',
|
|
|
'Accept-Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7',
|
|
|
'Cache-Control': 'max-age=0',
|
|
|
'Connection': 'keep-alive',
|
|
|
'DNT': '1',
|
|
|
'Referer': 'https://www.google.com/',
|
|
|
'Upgrade-Insecure-Requests': '1',
|
|
|
'User-Agent': ua,
|
|
|
'X-Forwarded-For': ip,
|
|
|
'X-Real-IP': ip
|
|
|
}
|
|
|
return headers
|
|
|
|
|
|
def _check_http(self):
|
|
|
'''HTTP服务探测'''
|
|
|
url = f"http://{self.ip_dict['ip']}:{self.ip_dict['port']}"
|
|
|
headers = self._gen_fake_header()
|
|
|
try:
|
|
|
response = requests.get(url, timeout=20, headers=headers)
|
|
|
except requests.exceptions.SSLError:
|
|
|
url = f"https://{self.ip_dict['ip']}:{self.ip_dict['port']}"
|
|
|
try:
|
|
|
response = requests.get(url, timeout=20, verify=False, headers=headers)
|
|
|
except Exception as e:
|
|
|
return None
|
|
|
else:
|
|
|
return response
|
|
|
except Exception as e:
|
|
|
return None
|
|
|
else:
|
|
|
return response
|
|
|
|
|
|
def _get_banner(self, headers):
|
|
|
# 从 HTTP 响应头中获取 'Server' 的值
|
|
|
server = headers.get('Server')
|
|
|
# 从 HTTP 响应头中获取 'X-Powered-By' 的值
|
|
|
Powered = headers.get('X-Powered-By')
|
|
|
# 如果 'Server' 或者 'X-Powered-By' 有值
|
|
|
if server or Powered:
|
|
|
# 返回拼接后的字符串
|
|
|
return f'{server},{Powered}'
|
|
|
else:
|
|
|
# 如果两者都没有值,返回空字符串
|
|
|
return ''
|
|
|
|
|
|
def _get_title(self, markup):
|
|
|
'''获取网页标题'''
|
|
|
try:
|
|
|
soup = BeautifulSoup(markup, 'lxml')
|
|
|
except:
|
|
|
return None
|
|
|
title = soup.title
|
|
|
if title:
|
|
|
return title.text.strip()
|
|
|
h1 = soup.h1
|
|
|
if h1:
|
|
|
return h1.text.strip()
|
|
|
h2 = soup.h2
|
|
|
if h2:
|
|
|
return h2.text.strip()
|
|
|
h3 = soup.h3
|
|
|
if h2:
|
|
|
return h3.text.strip()
|
|
|
desc = soup.find('meta', attrs={'name': 'description'})
|
|
|
if desc:
|
|
|
return desc['content'].strip()
|
|
|
word = soup.find('meta', attrs={'name': 'keywords'})
|
|
|
if word:
|
|
|
return word['content'].strip()
|
|
|
if len(markup) <= 200:
|
|
|
return markup.strip()
|
|
|
text = soup.text
|
|
|
if len(text) <= 200:
|
|
|
return text.strip()
|
|
|
return None
|
|
|
|
|
|
def run(self):
|
|
|
# 打印正在探测的 URL 信息
|
|
|
print(f'[+]URL 开始探测:[{self.ip_dict}]')
|
|
|
# 检查是否为 HTTP 服务,如果不是则返回 None
|
|
|
response = self._check_http()
|
|
|
if response == None:
|
|
|
print(f'[-]URL 探测:[{self.ip_dict}]非 HTTP 服务')
|
|
|
return None
|
|
|
# 如果响应状态码为 200
|
|
|
if response.status_code == 200:
|
|
|
# 自动识别响应内容的编码
|
|
|
mychar = chardet.detect(response.content)
|
|
|
bianma = mychar['encoding']
|
|
|
response.encoding = bianma
|
|
|
# 获取页面标题
|
|
|
title = self._get_title(markup=response.text)
|
|
|
# 获取响应头中的 banner 信息
|
|
|
banner = self._get_banner(response.headers)
|
|
|
# 创建包含 IP、端口、标题和 banner 的字典
|
|
|
assets_dict = self.ip_dict
|
|
|
assets_dict['title'] = title
|
|
|
assets_dict['banner'] = banner
|
|
|
assets_dict['host'] = response.url
|
|
|
# 返回包含信息的字典
|
|
|
return assets_dict
|
|
|
else:
|
|
|
# 如果状态码不是 200,打印错误信息并返回 None
|
|
|
print(f'[-]URL 探测:[{self.ip_dict}]状态码非 200')
|
|
|
return None
|
|
|
|
|
|
def ReadPorts():
|
|
|
'''读取端口数据'''
|
|
|
port_sql = session.query(SrcPorts).filter(SrcPorts.port_url_scan == False).limit(10).all()
|
|
|
session.commit()
|
|
|
if port_sql:
|
|
|
for port in port_sql:
|
|
|
port.port_url_scan = True
|
|
|
session.add(port)
|
|
|
try:
|
|
|
session.commit()
|
|
|
except Exception as error:
|
|
|
print(f'[-]URL扫描-修改端口扫描状态异常{error}')
|
|
|
else:
|
|
|
session.refresh(port, ['port_url_scan'])
|
|
|
return port_sql
|
|
|
|
|
|
def WriteAsset(http_info, port_sql):
|
|
|
# 加锁,防止多线程同时写入数据库产生冲突
|
|
|
LOCK.acquire()
|
|
|
# 查询数据库中是否已经存在相同主机的资产记录
|
|
|
asset_count = session.query(SrcAssets).filter(SrcAssets.asset_host == http_info['host']).count()
|
|
|
# 提交事务,确保查询结果的准确性
|
|
|
session.commit()
|
|
|
if not asset_count:
|
|
|
# 如果不存在相同主机的资产记录,则创建新的资产记录对象
|
|
|
srcasset_sql = SrcAssets(asset_name=port_sql.port_name, asset_host=http_info['host'],
|
|
|
asset_subdomain=http_info['subdomain'],
|
|
|
asset_title=http_info['title'],
|
|
|
asset_ip=port_sql.port_ip, asset_area=http_info['area'], asset_waf=http_info['waf'],
|
|
|
asset_cdn=False,
|
|
|
asset_banner=http_info['banner'], asset_info='', asset_whois='')
|
|
|
# 将新的资产记录添加到数据库会话中
|
|
|
session.add(srcasset_sql)
|
|
|
try:
|
|
|
# 提交事务,将新的资产记录写入数据库
|
|
|
session.commit()
|
|
|
except Exception as error:
|
|
|
# 如果写入过程中出现异常,回滚事务
|
|
|
session.rollback()
|
|
|
# 打印错误信息
|
|
|
print(f'[-]Url探测-子域名入库异常{error}')
|
|
|
finally:
|
|
|
# 释放锁
|
|
|
LOCK.release()
|
|
|
else:
|
|
|
# 如果已经存在相同主机的资产记录,直接释放锁
|
|
|
LOCK.release()
|
|
|
|
|
|
def main():
|
|
|
# 打印提示信息,表示 URL 扫描启动
|
|
|
print('[+]URL 扫描启动')
|
|
|
# 创建线程池,最大工作线程数为 10
|
|
|
pool = futures.ThreadPoolExecutor(max_workers=10)
|
|
|
# 进入无限循环
|
|
|
while True:
|
|
|
# 读取端口信息
|
|
|
port_sql = ReadPorts()
|
|
|
# 如果没有端口信息,等待 30 秒
|
|
|
if not port_sql:
|
|
|
time.sleep(30)
|
|
|
else:
|
|
|
# 使用线程池提交任务,对每个端口信息执行 action 函数
|
|
|
wait_for = [pool.submit(action, sql_port) for sql_port in port_sql]
|
|
|
# 遍历已完成的任务,获取结果
|
|
|
for f in futures.as_completed(wait_for):
|
|
|
f.result()
|
|
|
|
|
|
def action(sql_port):
|
|
|
# 如果端口是 80 端口,直接返回 None
|
|
|
if sql_port.port_port == 80:
|
|
|
return None
|
|
|
try:
|
|
|
# 解析端口对应的主机地址
|
|
|
host = urlparse(sql_port.port_host)
|
|
|
except:
|
|
|
# 解析出现异常,返回 None
|
|
|
return None
|
|
|
# 获取解析后的主机地址
|
|
|
host = host.netloc
|
|
|
# 打印正在探测的 URL 信息
|
|
|
print(f'[+]URL 开始探测:{host}:{sql_port.port_port}')
|
|
|
# 创建包含 IP 和端口的字典
|
|
|
ip_dict = {'ip': host, 'port': sql_port.port_port}
|
|
|
# 使用 UrlProbe 类对给定的 IP 和端口进行探测
|
|
|
http_info = UrlProbe(ip_dict)
|
|
|
# 执行探测并获取结果
|
|
|
info = http_info.run()
|
|
|
if info:
|
|
|
# 获取 IP 对应的地区信息
|
|
|
area = SelectIP(sql_port.port_ip)
|
|
|
# 检查是否有 WAF,并得到标志和 WAF 信息
|
|
|
flag, waf = Check_Waf(info['host'])
|
|
|
# 将地区信息添加到探测结果中
|
|
|
info['area'] = area
|
|
|
# 将 WAF 信息添加到探测结果中
|
|
|
info['waf'] = waf
|
|
|
# 将子域名信息添加到探测结果中
|
|
|
info['subdomain'] = host
|
|
|
# 将探测结果写入资产
|
|
|
WriteAsset(info, sql_port)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
main() |