import requests import time import pymysql from tqdm import tqdm from lxml import etree from fake_useragent import UserAgent #随机UA ua = UserAgent() class cnnvdSpider: def spider_page(self,star_page,end_page): headers ={ "User-Agent": ua.random, "Referer": "http://www.cnnvd.org.cn/web/vulnerability/querylist.tag?isArea=0&repairLd=" } data = { 'CSRFToken':'', 'cvHazardRating':'', 'cvVultype':'', 'qstartdateXq':'', 'cvUsedStyle':'', 'cvCnnvdUpdatedateXq':'', 'cpvendor':'', 'relLdKey':'', 'hotLd':'', 'isArea':'', 'qcvCname':'', 'qcvCnnvdid':'CNNVD或CVE编号', 'qstartdate':'', 'qenddate':'' } cnnvd_datas = [] for k in tqdm(range(star_page,end_page+1),desc="[+]正在爬取数据:",unit="页"): url = "http://www.cnnvd.org.cn/web/vulnerability/querylist.tag?pageno="+str(k)+"&repairLd=" html = requests.post(url=url,headers=headers,data=data).text page = etree.HTML(html) li_list = page.xpath("/html/body/div[4]/div/div[1]/div/div[2]/ul/li") for i in li_list: info={} info['name'] = i.xpath("./div[1]/a/text()")[0].strip() info['Numbering'] = i.xpath("./div[1]/p/a/text()")[0] info['href'] = "http://www.cnnvd.org.cn/"+i.xpath("./div[1]/a/@href")[0].strip() info['time'] = i.xpath("./div[2]/text()")[2].strip() cnnvd_datas.append(info) # time.sleep(1) print("[+]数据爬取完成") return cnnvd_datas def save_data(self,host,user,password,database,data): db = pymysql.connect(host=host,user=user,password=password,database=database,charset='utf8') # print(db) cursor = db.cursor() #游标对象 try: cursor.execute("CREATE TABLE cnnvd (name char(100),Numbering char(20),href char(100),time char(10))") db.commit() print("[+]创建表成功") except: print("[-]创建表失败,该表或许已经存在") sql = "INSERT INTO cnnvd (name,Numbering,href,time) VALUES('%s','%s','%s','%s')" for i in tqdm(data,desc="[+]正在存储数据:",total=len(data),unit='MB'): try: cursor.execute(sql %(i['name'],i['Numbering'],i['href'],i['time'])) db.commit() except: print("[-]存储数据出错") exit() cursor.close() db.close() print("[+]数据存储成功") a = cnnvdSpider() c = a.spider_page(1,5) a.save_data('124.222.200.220','root','Fei1912760','data',c)