diff --git a/doc/编译说明.docx b/doc/编译说明.docx new file mode 100644 index 0000000..27c7fab Binary files /dev/null and b/doc/编译说明.docx differ diff --git a/src/backend/JDSpiders/Spider/JDSpider.py b/src/backend/JDSpiders/Spider/JDSpider.py index 0e71dc0..e7000a0 100644 --- a/src/backend/JDSpiders/Spider/JDSpider.py +++ b/src/backend/JDSpiders/Spider/JDSpider.py @@ -2,7 +2,7 @@ Description: Author: Fishermanykx LastEditors: Fishermanykx -LastEditTime: 2021-07-13 09:17:32 +LastEditTime: 2021-07-13 10:04:13 ''' import re @@ -851,10 +851,10 @@ class MotherboardSpider(JDSpider): def main(self): motherboard_link = "https://list.jd.com/list.html?cat=670%2C677%2C681&psort=3&psort=3&pvid=e726d0ee460448b6a16cf24950c1dabb&page=" # 爬取数据 - # page_num = 26 # 一共爬了26页 - page_num = 1 # for testing + page_num = 26 # 一共爬了26页 + # page_num = 1 # for testing start_page = 1 - # self.productSpider(motherboard_link, page_num, start_page) + self.productSpider(motherboard_link, page_num, start_page) # 清洗数据 self.cleanMotherboard() print("Successfully get Motherboard data!") @@ -1015,10 +1015,10 @@ class GraphicsCardSpider(JDSpider): def main(self): graphics_card_link = "https://list.jd.com/list.html?cat=670%2C677%2C679&psort=3&psort=3&pvid=e726d0ee460448b6a16cf24950c1dabb&page=" # 爬取数据 - # page_num = 30 - page_num = 1 + page_num = 30 + # page_num = 1 start_page = 1 - # self.productSpider(graphics_card_link, page_num, start_page) + self.productSpider(graphics_card_link, page_num, start_page) # 清洗数据 self.cleanGraphicsCard() print("Successfully get Graphics Card data!") @@ -1196,10 +1196,10 @@ class MemorySpider(JDSpider): def main(self): memory_link = "https://list.jd.com/list.html?cat=670%2C677%2C680&psort=3&ev=210_1558%5E&psort=3&page=" # 爬取数据 - # page_num = 40 - page_num = 1 + page_num = 40 + # page_num = 1 start_page = 1 - # self.productSpider(memory_link, page_num, start_page) + self.productSpider(memory_link, page_num, start_page) # 清洗数据 self.cleanMemory() print("Successfully get Memory data!") @@ -1384,8 +1384,8 @@ class CPURadiatorSpider(JDSpider): def main(self): radiator_link = "https://list.jd.com/list.html?cat=670%2C677%2C682&psort=3&ev=3680_97402%7C%7C97403%7C%7C106254%7C%7C106255%5E&psort=3&page=" # 爬取数据 - # page_num = 27 - page_num = 1 + page_num = 27 + # page_num = 1 start_page = 1 self.productSpider(radiator_link, page_num, start_page) # 清洗数据 @@ -1557,10 +1557,10 @@ class SSDSpider(JDSpider): def main(self): ssd_link = "https://list.jd.com/list.html?cat=670%2C677%2C11303&psort=3&psort=3&page=" - # page_num = 36 # 一共爬了36页 - page_num = 1 # 一共爬了36页 + page_num = 36 # 一共爬了36页 + # page_num = 1 # 一共爬了36页 start_page = 1 - # self.productSpider(ssd_link, page_num, start_page) + self.productSpider(ssd_link, page_num, start_page) self.cleanSSD() print("Successfully get SSD data!") @@ -1735,10 +1735,10 @@ class HDDSpider(JDSpider): def main(self): hdd_link = "https://list.jd.com/list.html?cat=670%2C677%2C683&psort=3&psort=3&page=" - # page_num = 11 - page_num = 1 + page_num = 11 + # page_num = 1 start_page = 1 - # self.productSpider(hdd_link, page_num, start_page) + self.productSpider(hdd_link, page_num, start_page) self.cleanHDD() print("Successfully get HDD data!") @@ -1892,10 +1892,10 @@ class PowerSupplySpider(JDSpider): def main(self): power_supply_link = "https://list.jd.com/list.html?cat=670%2C677%2C691&psort=3&psort=3&page=" - # page_num = 25 # 抓25页 - page_num = 1 + page_num = 25 # 抓25页 + # page_num = 1 start_page = 1 - # self.productSpider(power_supply_link, page_num, start_page) + self.productSpider(power_supply_link, page_num, start_page) self.cleanPowerSupply() print("Successfully get Power Supply data!") @@ -2078,10 +2078,10 @@ class CaseSpider(JDSpider): def main(self): case_link = "https://list.jd.com/list.html?cat=670%2C677%2C687&psort=3&psort=3&page=" - # page_num = 36 - page_num = 1 + page_num = 36 + # page_num = 1 start_page = 1 - # self.productSpider(case_link, page_num, start_page) + self.productSpider(case_link, page_num, start_page) self.cleanCase() print("Successfully get Computer Case data!") @@ -2089,7 +2089,7 @@ class CaseSpider(JDSpider): if __name__ == "__main__": accessory_type = 'all' - accessory_type = 'graphics_card' + # accessory_type = 'graphics_card' if accessory_type == 'cpu': cpu_spider = CPUSpider('cpu') cpu_spider.main()