You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

73 lines
3.0 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

'''
参考 https://blog.csdn.net/weixin_45081575/article/details/112621581
控制当前已经打开的 chrome浏览器窗口
第一步在终端运行chrome.exe --remote-debugging-port=9527 --user-data-dir="D:\CTF\python_project\seebug\selenium_chrome"
user-data-dirr=“” 是在单独的配置文件中启动 chrome浏览器可以理解为 新的浏览器,记得创建对应文件夹;其中 9527 为端口号,可自行指定。
第二步输入目标网址https://www.seebug.org/vuldb/vulnerabilities?page=1
'''
import time#控制爬取速度
import pymysql#进行数据库操作
from tqdm import tqdm#进度条
from selenium import webdriver
from selenium.webdriver.chrome.options import Options #selenium配置设置
from fake_useragent import UserAgent
ua = UserAgent()
options = Options()
# options.add_argument("--disable-blink-features=AutomationControlled")
options.add_experimental_option("debuggerAddress", "127.0.0.1:9527")
options.add_argument(f"user-agent={ua.random}")
chrome = webdriver.Chrome(options=options)
def get_data(allpage):
data=[]
for page in tqdm(range(allpage),desc='数据爬取',unit=''):
tr_list = chrome.find_elements_by_xpath('/html/body/div[2]/div/div/div/div/table/tbody/tr')
for tr in tr_list:
one_info={}
one_info['SSV_ID'] = tr.find_element_by_xpath('./td[1]/a').text
one_info['href'] = tr.find_element_by_xpath('./td[1]/a').get_attribute('href')
one_info['time'] = tr.find_element_by_xpath('./td[2]').text
one_info['name'] = tr.find_element_by_xpath('./td[4]/a').text
data.append(one_info)
try:
chrome.find_element_by_xpath('//a[@aria-label="Next"]').click()
time.sleep(1)
except:
print("似乎没有下一页了")
return data
def save_data(host, user, password, database, data):
try:
db = pymysql.connect(host=host, user=user, password=password, database=database, charset='utf8')
# print(db)
cursor = db.cursor() # 游标对象
except EOFError as e:
print("[-]连接数据库失败!!!!!!!")
exit()
try:
cursor.execute("CREATE TABLE seebug (Numbering char(50),href char(100),time char(50),name varchar(1000))")
db.commit()
print("[+]创建表成功")
except:
print("[-]创建表失败,该表似乎已经存在")
for i in tqdm(data, desc="[+]正在存储数据:", total=len(data), unit='MB'):
try:
cursor.execute("INSERT INTO seebug (Numbering,href,time,name) VALUES(%s,%s,%s,%s)",(str(i['SSV_ID']),str(i['href']),str(i['time']),str(i['name'])))
db.commit()
except:
print("[-]存储数据出错")
exit()
cursor.close()
db.close()
print("[+]数据存储成功")
a =get_data(40)#爬取4页,程序是从当前页面开始爬取的
# print(a)
save_data('124.222.200.220','root','Fei1912760','data',a)