|
|
|
@ -0,0 +1,72 @@
|
|
|
|
|
'''
|
|
|
|
|
参考 https://blog.csdn.net/weixin_45081575/article/details/112621581
|
|
|
|
|
控制当前已经打开的 chrome浏览器窗口
|
|
|
|
|
第一步:在终端运行:chrome.exe --remote-debugging-port=9527 --user-data-dir="D:\CTF\python_project\seebug\selenium_chrome"
|
|
|
|
|
user-data-dirr=“” 是在单独的配置文件中启动 chrome浏览器,可以理解为 新的浏览器,记得创建对应文件夹;其中 9527 为端口号,可自行指定。
|
|
|
|
|
|
|
|
|
|
第二步:输入目标网址:https://www.seebug.org/vuldb/vulnerabilities?page=1
|
|
|
|
|
'''
|
|
|
|
|
import time#控制爬取速度
|
|
|
|
|
import pymysql#进行数据库操作
|
|
|
|
|
from tqdm import tqdm#进度条
|
|
|
|
|
from selenium import webdriver
|
|
|
|
|
from selenium.webdriver.chrome.options import Options #selenium配置设置
|
|
|
|
|
from fake_useragent import UserAgent
|
|
|
|
|
ua = UserAgent()
|
|
|
|
|
|
|
|
|
|
options = Options()
|
|
|
|
|
# options.add_argument("--disable-blink-features=AutomationControlled")
|
|
|
|
|
options.add_experimental_option("debuggerAddress", "127.0.0.1:9527")
|
|
|
|
|
options.add_argument(f"user-agent={ua.random}")
|
|
|
|
|
chrome = webdriver.Chrome(options=options)
|
|
|
|
|
|
|
|
|
|
def get_data(allpage):
|
|
|
|
|
data=[]
|
|
|
|
|
for page in tqdm(range(allpage),desc='数据爬取',unit='页'):
|
|
|
|
|
tr_list = chrome.find_elements_by_xpath('/html/body/div[2]/div/div/div/div/table/tbody/tr')
|
|
|
|
|
for tr in tr_list:
|
|
|
|
|
one_info={}
|
|
|
|
|
one_info['SSV_ID'] = tr.find_element_by_xpath('./td[1]/a').text
|
|
|
|
|
one_info['href'] = tr.find_element_by_xpath('./td[1]/a').get_attribute('href')
|
|
|
|
|
one_info['time'] = tr.find_element_by_xpath('./td[2]').text
|
|
|
|
|
one_info['name'] = tr.find_element_by_xpath('./td[4]/a').text
|
|
|
|
|
data.append(one_info)
|
|
|
|
|
try:
|
|
|
|
|
chrome.find_element_by_xpath('//a[@aria-label="Next"]').click()
|
|
|
|
|
time.sleep(1)
|
|
|
|
|
except:
|
|
|
|
|
print("似乎没有下一页了")
|
|
|
|
|
|
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
|
def save_data(host, user, password, database, data):
|
|
|
|
|
try:
|
|
|
|
|
db = pymysql.connect(host=host, user=user, password=password, database=database, charset='utf8')
|
|
|
|
|
# print(db)
|
|
|
|
|
cursor = db.cursor() # 游标对象
|
|
|
|
|
except EOFError as e:
|
|
|
|
|
print("[-]连接数据库失败!!!!!!!")
|
|
|
|
|
exit()
|
|
|
|
|
try:
|
|
|
|
|
cursor.execute("CREATE TABLE seebug (Numbering char(50),href char(100),time char(50),name varchar(1000))")
|
|
|
|
|
db.commit()
|
|
|
|
|
print("[+]创建表成功")
|
|
|
|
|
except:
|
|
|
|
|
print("[-]创建表失败,该表似乎已经存在")
|
|
|
|
|
for i in tqdm(data, desc="[+]正在存储数据:", total=len(data), unit='MB'):
|
|
|
|
|
try:
|
|
|
|
|
cursor.execute("INSERT INTO seebug (Numbering,href,time,name) VALUES(%s,%s,%s,%s)",(str(i['SSV_ID']),str(i['href']),str(i['time']),str(i['name'])))
|
|
|
|
|
db.commit()
|
|
|
|
|
except:
|
|
|
|
|
print("[-]存储数据出错")
|
|
|
|
|
exit()
|
|
|
|
|
cursor.close()
|
|
|
|
|
db.close()
|
|
|
|
|
print("[+]数据存储成功")
|
|
|
|
|
|
|
|
|
|
a =get_data(40)#爬取4页,程序是从当前页面开始爬取的
|
|
|
|
|
# print(a)
|
|
|
|
|
save_data('124.222.200.220','root','Fei1912760','data',a)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|