diff --git a/Vul_db.py b/Vul_db.py index 11ad0d0..dfaddb3 100644 --- a/Vul_db.py +++ b/Vul_db.py @@ -1,58 +1,81 @@ -import redis -import pymysql -import json,re - -class Vuldboption: - def table_exists(self, con, table_name): - # 判断数据表是否已经创建 - sql = 'show tables;' - con.execute(sql) - tables = [con.fetchall()] - table_list = re.findall('(\'.*?\')', str(tables)) - table_list = [re.sub("'", '', each) for each in table_list] # 遍历并获得数据库表 - if table_name in table_list: - return 1 # 创建了返回1 - else: - return 0 # 不创建返回0 - - def process_item(self,table_name,redis_item_name): - - pool = redis.ConnectionPool(host='127.0.0.1', port=6379, db=0, password=None) - redis_cli = redis.Redis(connection_pool=pool) - - conn = pymysql.connect(user='root', # 用户名 - password='root', # 密码 - db = 'vulcrawl', # 数据库名 - host='127.0.0.1', # 地址 - port=3306, - charset='utf8') - #table_name = 'db_cnnvd' # 数据库表 - # 没有对应数据库表则创建 - if (self.table_exists(conn.cursor(), table_name) != 1): - sql = "create table db_cnnvd(time VARCHAR (30),title VARCHAR (1000),Numbering VARCHAR (40),url VARCHAR (200))" - conn.cursor().execute(sql) # 不存在则创建数据库表 - - offset = 0 - while True: - # 将数据从redis里pop出来 - source, data = redis_cli.blpop(redis_item_name) - # 将redis字符串转为格式化的字典 - item = json.loads(data) - # 创建数据库游标 - cursor = conn.cursor() - - sql = "insert into db_cnnvd(time,title,Numbering,url)VALUES ('%s','%s','%s','%s')" % (item['time'], item['title'], item['Numbering'], item['url']) - cursor.execute(sql) # 执行数据插入 - conn.commit() # 提交记录 - - cursor.close() # 关闭游标 - - offset += 1 - print("正在保存第:" + str(offset) + "条记录") - - -if __name__ == '__main__': - vul_cnnvd = Vuldboption() - vul_cnnvd.process_item('db_cnnvd','vulcrawl:items') - vul_woodyun = Vuldboption() - vul_woodyun.process_item('db_woodyun','vulcrawl2:items') \ No newline at end of file +import redis +import pymysql +import json,re + +from func_timeout import func_set_timeout +import func_timeout + +class Vuldboption: + def table_exists(self, con, table_name): + # 判断数据表是否已经创建 + sql = 'show tables;' + con.execute(sql) + tables = [con.fetchall()] + table_list = re.findall('(\'.*?\')', str(tables)) + table_list = [re.sub("'", '', each) for each in table_list] # 遍历并获得数据库表 + if table_name in table_list: + return 1 # 创建了返回1 + else: + return 0 # 不创建返回0 + + @func_set_timeout(1) + def process_item(self, table_name, redis_item_name, sql1, sql2): + pool = redis.ConnectionPool(host='127.0.0.1', port=6379, db=0, password=None) + redis_cli = redis.Redis(connection_pool=pool) + + conn = pymysql.connect(user='root', # 用户名 + password='root', # 密码 + db = 'vulcrawl', # 数据库名 + host='127.0.0.1', # 地址 + port=3306, + charset='utf8') + page_start = 0 + step = 2 + page_end = page_start + step + page_num = 0 + #table_name = 'db_cnnvd' # 数据库表 + # 没有对应数据库表则创建 + if (self.table_exists(conn.cursor(), table_name) != 1): + #sql = "create table db_cnnvd(time VARCHAR (30),title VARCHAR (1000),Numbering VARCHAR (40),url VARCHAR (200))" + conn.cursor().execute(sql1) # 不存在则创建数据库表 + + offset = 0 + while True: + # 将数据从redis里pop出来 + source, data = redis_cli.blpop(redis_item_name) + # 将redis字符串转为格式化的字典 + if redis_cli.blpop(redis_item_name): + page_start = page_end + 1 + page_end = page_start + step + page_num += 1 + # 如果获取到的数据为空,则跳出循环 + else: + break + item = json.loads(data) + # 创建数据库游标 + cursor = conn.cursor() + # "insert into db_cnnvd(time,title,Numbering,url)VALUES ('%s','%s','%s','%s')" + sql = sql2 % (item['time'], item['title'], item['Numbering'], item['url']) + cursor.execute(sql) # 执行数据插入 + conn.commit() # 提交记录 + + cursor.close() # 关闭游标 + + offset += 1 + print("正在保存第:" + str(offset) + "条记录") + + + +if __name__ == '__main__': + creat_cnnvd_table = "create table db_cnnvd(time VARCHAR (30),title VARCHAR (1000),Numbering VARCHAR (40),url VARCHAR (200))" + insert_cnnvd_data = "insert into db_cnnvd(time,title,Numbering,url)VALUES ('%s','%s','%s','%s')" + creat_woodyun_table = "create table db_woodyun(time VARCHAR (100),title VARCHAR (1000),Numbering VARCHAR (40),url VARCHAR (200))" + insert_woodyun_data = "insert into db_woodyun(time,title,Numbering,url)VALUES ('%s','%s','%s','%s')" + try: + vul_cnnvd = Vuldboption() + vul_cnnvd.process_item('db_cnnvd', 'vulcrawl:items', creat_cnnvd_table, insert_cnnvd_data) + except func_timeout.exceptions.FunctionTimedOut: + vul_woodyun = Vuldboption() + vul_woodyun.process_item('db_woodyun', 'vulcrawl2:items', creat_woodyun_table, insert_woodyun_data) + else: + pass \ No newline at end of file