From 8954439fed15bcd05cd1847da136e11b4e3c115d Mon Sep 17 00:00:00 2001 From: Fisher Date: Tue, 13 Jul 2021 09:29:32 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=8C=E6=88=90=20data=5Fcleansing=20?= =?UTF-8?q?=E6=A8=A1=E5=9D=97=E7=9A=84=E9=9B=86=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/backend/JDSpiders/BoardUSuit/BoardUSuit.py | 3 +-- src/backend/JDSpiders/CaseFan/CaseFan.py | 3 +-- src/backend/JDSpiders/Spider/DBConfig.py | 1 - src/backend/JDSpiders/Spider/JDSpider.py | 17 ++++++++--------- .../Spider/__pycache__/DBConfig.cpython-37.pyc | Bin 473 -> 414 bytes 5 files changed, 10 insertions(+), 14 deletions(-) diff --git a/src/backend/JDSpiders/BoardUSuit/BoardUSuit.py b/src/backend/JDSpiders/BoardUSuit/BoardUSuit.py index a572c02..0c6d5a9 100644 --- a/src/backend/JDSpiders/BoardUSuit/BoardUSuit.py +++ b/src/backend/JDSpiders/BoardUSuit/BoardUSuit.py @@ -6,9 +6,8 @@ Description: shop_name :店铺名 评论数和好评率不爬,因为都是0 Author: Fishermanykx -Date: 2020-12-11 14:25:04 LastEditors: Fishermanykx -LastEditTime: 2021-05-28 11:12:07 +LastEditTime: 2021-07-12 16:26:41 ''' import re import json diff --git a/src/backend/JDSpiders/CaseFan/CaseFan.py b/src/backend/JDSpiders/CaseFan/CaseFan.py index 1e6db54..145d470 100644 --- a/src/backend/JDSpiders/CaseFan/CaseFan.py +++ b/src/backend/JDSpiders/CaseFan/CaseFan.py @@ -3,9 +3,8 @@ Description: introduction :商品介绍的 .json Ptable_params :规格与包装的 .json Author: Fishermanykx -Date: 2020-12-08 20:45:47 LastEditors: Fishermanykx -LastEditTime: 2021-05-28 11:12:00 +LastEditTime: 2021-07-12 16:14:17 ''' import json diff --git a/src/backend/JDSpiders/Spider/DBConfig.py b/src/backend/JDSpiders/Spider/DBConfig.py index a15ebae..6555398 100644 --- a/src/backend/JDSpiders/Spider/DBConfig.py +++ b/src/backend/JDSpiders/Spider/DBConfig.py @@ -1,7 +1,6 @@ ''' Description: Data Base Configuration Author: Fishermanykx -Date: 2020-12-30 15:33:37 LastEditors: Fishermanykx LastEditTime: 2021-05-28 11:11:42 ''' diff --git a/src/backend/JDSpiders/Spider/JDSpider.py b/src/backend/JDSpiders/Spider/JDSpider.py index b67bd4e..0e71dc0 100644 --- a/src/backend/JDSpiders/Spider/JDSpider.py +++ b/src/backend/JDSpiders/Spider/JDSpider.py @@ -1,16 +1,15 @@ ''' Description: Author: Fishermanykx -Date: 2020-12-29 08:21:41 LastEditors: Fishermanykx -LastEditTime: 2021-05-28 11:11:28 +LastEditTime: 2021-07-13 09:17:32 ''' import re import json import time import pymysql -from pymysql.converters import escape_string +# from pymysql.converters import escape_string from selenium.common.exceptions import ElementNotInteractableException from selenium.common.exceptions import ElementClickInterceptedException @@ -659,7 +658,7 @@ class CPUSpider(JDSpider): page_num = 1 start_page = 1 - # self.productSpider(cpu_link, page_num, start_page) + self.productSpider(cpu_link, page_num, start_page) self.cleanCPU() print("Successfully get CPU data!") @@ -824,7 +823,7 @@ class MotherboardSpider(JDSpider): # 判定是否为板-U套装 record = data[i] if record['comment_num'] == 100: # 抓到板-U套装了 - continue + continue try: if '套装' in record['title_name']: continue @@ -959,7 +958,7 @@ class GraphicsCardSpider(JDSpider): else: generation = 1 else: - if ('6900' in name) or ('6800' in name): + if ('6900' in name) or ('6800' in name) or ('6700' in name): generation = 3 elif ('5700' in name) or ('5600' in name) or ('5500' in name): generation = 2 @@ -1187,7 +1186,7 @@ class MemorySpider(JDSpider): "Ptable_params, title_name) VALUES (%(id)s, %(name)s, %(comment_num)s, %(praise_rate)s, %(shop_name)s, %(price)s"\ ", %(link)s, %(brand)s, %(frequency)s, %(total_capacity)s, %(memory_num)s, %(appearance)s, "\ "%(ddr_gen)s, %(introduction)s, %(Ptable_params)s, %(title_name)s)" - # sql_insert = escape_string(sql_insert) + # sql_insert = pymysql.escape_string(sql_insert) # cursor.executemany(sql_insert, new_data) for i in range(len(new_data)): cursor.execute(sql_insert, new_data[i]) @@ -1894,7 +1893,7 @@ class PowerSupplySpider(JDSpider): def main(self): power_supply_link = "https://list.jd.com/list.html?cat=670%2C677%2C691&psort=3&psort=3&page=" # page_num = 25 # 抓25页 - page_num = 1 + page_num = 1 start_page = 1 # self.productSpider(power_supply_link, page_num, start_page) @@ -2090,7 +2089,7 @@ class CaseSpider(JDSpider): if __name__ == "__main__": accessory_type = 'all' - accessory_type = 'motherboard' + accessory_type = 'graphics_card' if accessory_type == 'cpu': cpu_spider = CPUSpider('cpu') cpu_spider.main() diff --git a/src/backend/JDSpiders/Spider/__pycache__/DBConfig.cpython-37.pyc b/src/backend/JDSpiders/Spider/__pycache__/DBConfig.cpython-37.pyc index d7ad420a0393dec505f6778ff9f15fde9a5dedc5..017fdbd06f8eb45747ee40611fcb10711f012833 100644 GIT binary patch delta 123 zcmcb~Jdc^ziI@llGLL3;N;BIyyVoF;QX|b^2DOlc*mmTjLeeMd0|i4zK!6ZhmZR!r7mlr?kBOV7+pO)bjIOOJ8R&n+k|NiB+ZOioTMF3vB? sOf8NNN=?qs%}vcqNi50C&x;SPEG|jSogBxgFUSsbLoo-CU}1zn00tT?p8x;=