完成 data_cleansing 模块的集成

master
Fisher 3 years ago
parent 758027eb55
commit 8954439fed

@ -6,9 +6,8 @@ Description:
shop_name 店铺名 shop_name 店铺名
评论数和好评率不爬因为都是0 评论数和好评率不爬因为都是0
Author: Fishermanykx Author: Fishermanykx
Date: 2020-12-11 14:25:04
LastEditors: Fishermanykx LastEditors: Fishermanykx
LastEditTime: 2021-05-28 11:12:07 LastEditTime: 2021-07-12 16:26:41
''' '''
import re import re
import json import json

@ -3,9 +3,8 @@ Description:
introduction 商品介绍的 .json introduction 商品介绍的 .json
Ptable_params 规格与包装的 .json Ptable_params 规格与包装的 .json
Author: Fishermanykx Author: Fishermanykx
Date: 2020-12-08 20:45:47
LastEditors: Fishermanykx LastEditors: Fishermanykx
LastEditTime: 2021-05-28 11:12:00 LastEditTime: 2021-07-12 16:14:17
''' '''
import json import json

@ -1,7 +1,6 @@
''' '''
Description: Data Base Configuration Description: Data Base Configuration
Author: Fishermanykx Author: Fishermanykx
Date: 2020-12-30 15:33:37
LastEditors: Fishermanykx LastEditors: Fishermanykx
LastEditTime: 2021-05-28 11:11:42 LastEditTime: 2021-05-28 11:11:42
''' '''

@ -1,16 +1,15 @@
''' '''
Description: Description:
Author: Fishermanykx Author: Fishermanykx
Date: 2020-12-29 08:21:41
LastEditors: Fishermanykx LastEditors: Fishermanykx
LastEditTime: 2021-05-28 11:11:28 LastEditTime: 2021-07-13 09:17:32
''' '''
import re import re
import json import json
import time import time
import pymysql import pymysql
from pymysql.converters import escape_string # from pymysql.converters import escape_string
from selenium.common.exceptions import ElementNotInteractableException from selenium.common.exceptions import ElementNotInteractableException
from selenium.common.exceptions import ElementClickInterceptedException from selenium.common.exceptions import ElementClickInterceptedException
@ -659,7 +658,7 @@ class CPUSpider(JDSpider):
page_num = 1 page_num = 1
start_page = 1 start_page = 1
# self.productSpider(cpu_link, page_num, start_page) self.productSpider(cpu_link, page_num, start_page)
self.cleanCPU() self.cleanCPU()
print("Successfully get CPU data!") print("Successfully get CPU data!")
@ -959,7 +958,7 @@ class GraphicsCardSpider(JDSpider):
else: else:
generation = 1 generation = 1
else: else:
if ('6900' in name) or ('6800' in name): if ('6900' in name) or ('6800' in name) or ('6700' in name):
generation = 3 generation = 3
elif ('5700' in name) or ('5600' in name) or ('5500' in name): elif ('5700' in name) or ('5600' in name) or ('5500' in name):
generation = 2 generation = 2
@ -1187,7 +1186,7 @@ class MemorySpider(JDSpider):
"Ptable_params, title_name) VALUES (%(id)s, %(name)s, %(comment_num)s, %(praise_rate)s, %(shop_name)s, %(price)s"\ "Ptable_params, title_name) VALUES (%(id)s, %(name)s, %(comment_num)s, %(praise_rate)s, %(shop_name)s, %(price)s"\
", %(link)s, %(brand)s, %(frequency)s, %(total_capacity)s, %(memory_num)s, %(appearance)s, "\ ", %(link)s, %(brand)s, %(frequency)s, %(total_capacity)s, %(memory_num)s, %(appearance)s, "\
"%(ddr_gen)s, %(introduction)s, %(Ptable_params)s, %(title_name)s)" "%(ddr_gen)s, %(introduction)s, %(Ptable_params)s, %(title_name)s)"
# sql_insert = escape_string(sql_insert) # sql_insert = pymysql.escape_string(sql_insert)
# cursor.executemany(sql_insert, new_data) # cursor.executemany(sql_insert, new_data)
for i in range(len(new_data)): for i in range(len(new_data)):
cursor.execute(sql_insert, new_data[i]) cursor.execute(sql_insert, new_data[i])
@ -2090,7 +2089,7 @@ class CaseSpider(JDSpider):
if __name__ == "__main__": if __name__ == "__main__":
accessory_type = 'all' accessory_type = 'all'
accessory_type = 'motherboard' accessory_type = 'graphics_card'
if accessory_type == 'cpu': if accessory_type == 'cpu':
cpu_spider = CPUSpider('cpu') cpu_spider = CPUSpider('cpu')
cpu_spider.main() cpu_spider.main()

Loading…
Cancel
Save