import redis import pymysql import json,re from func_timeout import func_set_timeout import func_timeout class Vuldboption: def table_exists(self, con, table_name): # 判断数据表是否已经创建 sql = 'show tables;' con.execute(sql) tables = [con.fetchall()] table_list = re.findall('(\'.*?\')', str(tables)) table_list = [re.sub("'", '', each) for each in table_list] # 遍历并获得数据库表 if table_name in table_list: return 1 # 创建了返回1 else: return 0 # 不创建返回0 @func_set_timeout(1) def process_item(self, table_name, redis_item_name, sql1, sql2): pool = redis.ConnectionPool(host='127.0.0.1', port=6379, db=0, password=None) redis_cli = redis.Redis(connection_pool=pool) conn = pymysql.connect(user='root', # 用户名 password='root', # 密码 db = 'vulcrawl', # 数据库名 host='127.0.0.1', # 地址 port=3306, charset='utf8') page_start = 0 step = 2 page_end = page_start + step page_num = 0 #table_name = 'db_cnnvd' # 数据库表 # 没有对应数据库表则创建 if (self.table_exists(conn.cursor(), table_name) != 1): #sql = "create table db_cnnvd(time VARCHAR (30),title VARCHAR (1000),Numbering VARCHAR (40),url VARCHAR (200))" conn.cursor().execute(sql1) # 不存在则创建数据库表 offset = 0 while True: # 将数据从redis里pop出来 source, data = redis_cli.blpop(redis_item_name) # 将redis字符串转为格式化的字典 if redis_cli.blpop(redis_item_name): page_start = page_end + 1 page_end = page_start + step page_num += 1 # 如果获取到的数据为空,则跳出循环 else: break item = json.loads(data) # 创建数据库游标 cursor = conn.cursor() # "insert into db_cnnvd(time,title,Numbering,url)VALUES ('%s','%s','%s','%s')" sql = sql2 % (item['time'], item['title'], item['Numbering'], item['url']) cursor.execute(sql) # 执行数据插入 conn.commit() # 提交记录 cursor.close() # 关闭游标 offset += 1 print("正在保存第:" + str(offset) + "条记录") if __name__ == '__main__': creat_cnnvd_table = "create table db_cnnvd(time VARCHAR (30),title VARCHAR (1000),Numbering VARCHAR (40),url VARCHAR (200))" insert_cnnvd_data = "insert into db_cnnvd(time,title,Numbering,url)VALUES ('%s','%s','%s','%s')" creat_woodyun_table = "create table db_woodyun(time VARCHAR (100),title VARCHAR (1000),Numbering VARCHAR (40),url VARCHAR (200))" insert_woodyun_data = "insert into db_woodyun(time,title,Numbering,url)VALUES ('%s','%s','%s','%s')" try: vul_cnnvd = Vuldboption() vul_cnnvd.process_item('db_cnnvd', 'vulcrawl:items', creat_cnnvd_table, insert_cnnvd_data) except func_timeout.exceptions.FunctionTimedOut: vul_woodyun = Vuldboption() vul_woodyun.process_item('db_woodyun', 'vulcrawl2:items', creat_woodyun_table, insert_woodyun_data) else: pass