From 88e564bd0b44fa8e6a7338cf51c0c2d47f5563cb Mon Sep 17 00:00:00 2001 From: mmxwz6ahu <3440848736@qq.com> Date: Wed, 13 Apr 2022 17:07:43 +0800 Subject: [PATCH] ADD file via upload --- get_data.py | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 get_data.py diff --git a/get_data.py b/get_data.py new file mode 100644 index 0000000..b173244 --- /dev/null +++ b/get_data.py @@ -0,0 +1,79 @@ +import requests +import time +import pymysql +from tqdm import tqdm +from lxml import etree +from fake_useragent import UserAgent #随机UA +ua = UserAgent() + +class cnnvdSpider: + def spider_page(self,star_page,end_page): + headers ={ + "User-Agent": ua.random, + "Referer": "http://www.cnnvd.org.cn/web/vulnerability/querylist.tag?isArea=0&repairLd=" + } + data = { + 'CSRFToken':'', + 'cvHazardRating':'', + 'cvVultype':'', + 'qstartdateXq':'', + 'cvUsedStyle':'', + 'cvCnnvdUpdatedateXq':'', + 'cpvendor':'', + 'relLdKey':'', + 'hotLd':'', + 'isArea':'', + 'qcvCname':'', + 'qcvCnnvdid':'CNNVD或CVE编号', + 'qstartdate':'', + 'qenddate':'' + } + cnnvd_datas = [] + + for k in tqdm(range(star_page,end_page+1),desc="[+]正在爬取数据:",unit="页"): + url = "http://www.cnnvd.org.cn/web/vulnerability/querylist.tag?pageno="+str(k)+"&repairLd=" + html = requests.post(url=url,headers=headers,data=data).text + page = etree.HTML(html) + li_list = page.xpath("/html/body/div[4]/div/div[1]/div/div[2]/ul/li") + for i in li_list: + info={} + info['name'] = i.xpath("./div[1]/a/text()")[0].strip() + info['Numbering'] = i.xpath("./div[1]/p/a/text()")[0] + info['href'] = "http://www.cnnvd.org.cn/"+i.xpath("./div[1]/a/@href")[0].strip() + info['time'] = i.xpath("./div[2]/text()")[2].strip() + cnnvd_datas.append(info) + # time.sleep(1) + print("[+]数据爬取完成") + return cnnvd_datas + + def save_data(self,host,user,password,database,data): + db = pymysql.connect(host=host,user=user,password=password,database=database,charset='utf8') + # print(db) + cursor = db.cursor() #游标对象 + try: + cursor.execute("CREATE TABLE cnnvd (name char(100),Numbering char(20),href char(100),time char(10))") + db.commit() + print("[+]创建表成功") + except: + print("[-]创建表失败,该表或许已经存在") + sql = "INSERT INTO cnnvd (name,Numbering,href,time) VALUES('%s','%s','%s','%s')" + for i in tqdm(data,desc="[+]正在存储数据:",total=len(data),unit='MB'): + try: + + cursor.execute(sql %(i['name'],i['Numbering'],i['href'],i['time'])) + db.commit() + except: + print("[-]存储数据出错") + exit() + cursor.close() + db.close() + print("[+]数据存储成功") + + + + + +a = cnnvdSpider() +c = a.spider_page(1,5) + +a.save_data('124.222.200.220','root','Fei1912760','data',c)