|  |  | @ -0,0 +1,72 @@ | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | ''' | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 参考   https://blog.csdn.net/weixin_45081575/article/details/112621581 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 控制当前已经打开的 chrome浏览器窗口 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 第一步:在终端运行:chrome.exe --remote-debugging-port=9527 --user-data-dir="D:\CTF\python_project\seebug\selenium_chrome" | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | user-data-dirr=“” 是在单独的配置文件中启动 chrome浏览器,可以理解为 新的浏览器,记得创建对应文件夹;其中 9527 为端口号,可自行指定。 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 第二步:输入目标网址:https://www.seebug.org/vuldb/vulnerabilities?page=1 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | ''' | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | import time#控制爬取速度 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | import pymysql#进行数据库操作 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | from tqdm import tqdm#进度条 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | from selenium import webdriver | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | from selenium.webdriver.chrome.options import Options #selenium配置设置 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | from fake_useragent import UserAgent | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | ua = UserAgent() | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | options = Options() | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | # options.add_argument("--disable-blink-features=AutomationControlled") | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | options.add_experimental_option("debuggerAddress", "127.0.0.1:9527") | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | options.add_argument(f"user-agent={ua.random}") | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | chrome = webdriver.Chrome(options=options) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | def get_data(allpage): | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     data=[] | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     for page in tqdm(range(allpage),desc='数据爬取',unit='页'): | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         tr_list = chrome.find_elements_by_xpath('/html/body/div[2]/div/div/div/div/table/tbody/tr') | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         for tr in tr_list: | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             one_info={} | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             one_info['SSV_ID'] = tr.find_element_by_xpath('./td[1]/a').text | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             one_info['href'] = tr.find_element_by_xpath('./td[1]/a').get_attribute('href') | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             one_info['time'] = tr.find_element_by_xpath('./td[2]').text | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             one_info['name'] = tr.find_element_by_xpath('./td[4]/a').text | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             data.append(one_info) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         try: | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             chrome.find_element_by_xpath('//a[@aria-label="Next"]').click() | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             time.sleep(1) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         except: | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             print("似乎没有下一页了") | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     return data | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | def save_data(host, user, password, database, data): | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     try: | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         db = pymysql.connect(host=host, user=user, password=password, database=database, charset='utf8') | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             # print(db) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         cursor = db.cursor()  # 游标对象 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     except EOFError as e: | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         print("[-]连接数据库失败!!!!!!!") | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         exit() | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     try: | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         cursor.execute("CREATE TABLE seebug (Numbering char(50),href char(100),time char(50),name varchar(1000))") | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         db.commit() | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         print("[+]创建表成功") | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     except: | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         print("[-]创建表失败,该表似乎已经存在") | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     for i in tqdm(data, desc="[+]正在存储数据:", total=len(data), unit='MB'): | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         try: | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             cursor.execute("INSERT INTO seebug (Numbering,href,time,name) VALUES(%s,%s,%s,%s)",(str(i['SSV_ID']),str(i['href']),str(i['time']),str(i['name']))) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             db.commit() | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         except: | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             print("[-]存储数据出错") | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             exit() | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     cursor.close() | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     db.close() | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     print("[+]数据存储成功") | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | a =get_data(40)#爬取4页,程序是从当前页面开始爬取的 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | # print(a) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | save_data('124.222.200.220','root','Fei1912760','data',a) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 |