|
|
@ -2,7 +2,7 @@
|
|
|
|
Description:
|
|
|
|
Description:
|
|
|
|
Author: Fishermanykx
|
|
|
|
Author: Fishermanykx
|
|
|
|
LastEditors: Fishermanykx
|
|
|
|
LastEditors: Fishermanykx
|
|
|
|
LastEditTime: 2021-07-13 09:17:32
|
|
|
|
LastEditTime: 2021-07-13 10:04:13
|
|
|
|
'''
|
|
|
|
'''
|
|
|
|
|
|
|
|
|
|
|
|
import re
|
|
|
|
import re
|
|
|
@ -851,10 +851,10 @@ class MotherboardSpider(JDSpider):
|
|
|
|
def main(self):
|
|
|
|
def main(self):
|
|
|
|
motherboard_link = "https://list.jd.com/list.html?cat=670%2C677%2C681&psort=3&psort=3&pvid=e726d0ee460448b6a16cf24950c1dabb&page="
|
|
|
|
motherboard_link = "https://list.jd.com/list.html?cat=670%2C677%2C681&psort=3&psort=3&pvid=e726d0ee460448b6a16cf24950c1dabb&page="
|
|
|
|
# 爬取数据
|
|
|
|
# 爬取数据
|
|
|
|
# page_num = 26 # 一共爬了26页
|
|
|
|
page_num = 26 # 一共爬了26页
|
|
|
|
page_num = 1 # for testing
|
|
|
|
# page_num = 1 # for testing
|
|
|
|
start_page = 1
|
|
|
|
start_page = 1
|
|
|
|
# self.productSpider(motherboard_link, page_num, start_page)
|
|
|
|
self.productSpider(motherboard_link, page_num, start_page)
|
|
|
|
# 清洗数据
|
|
|
|
# 清洗数据
|
|
|
|
self.cleanMotherboard()
|
|
|
|
self.cleanMotherboard()
|
|
|
|
print("Successfully get Motherboard data!")
|
|
|
|
print("Successfully get Motherboard data!")
|
|
|
@ -1015,10 +1015,10 @@ class GraphicsCardSpider(JDSpider):
|
|
|
|
def main(self):
|
|
|
|
def main(self):
|
|
|
|
graphics_card_link = "https://list.jd.com/list.html?cat=670%2C677%2C679&psort=3&psort=3&pvid=e726d0ee460448b6a16cf24950c1dabb&page="
|
|
|
|
graphics_card_link = "https://list.jd.com/list.html?cat=670%2C677%2C679&psort=3&psort=3&pvid=e726d0ee460448b6a16cf24950c1dabb&page="
|
|
|
|
# 爬取数据
|
|
|
|
# 爬取数据
|
|
|
|
# page_num = 30
|
|
|
|
page_num = 30
|
|
|
|
page_num = 1
|
|
|
|
# page_num = 1
|
|
|
|
start_page = 1
|
|
|
|
start_page = 1
|
|
|
|
# self.productSpider(graphics_card_link, page_num, start_page)
|
|
|
|
self.productSpider(graphics_card_link, page_num, start_page)
|
|
|
|
# 清洗数据
|
|
|
|
# 清洗数据
|
|
|
|
self.cleanGraphicsCard()
|
|
|
|
self.cleanGraphicsCard()
|
|
|
|
print("Successfully get Graphics Card data!")
|
|
|
|
print("Successfully get Graphics Card data!")
|
|
|
@ -1196,10 +1196,10 @@ class MemorySpider(JDSpider):
|
|
|
|
def main(self):
|
|
|
|
def main(self):
|
|
|
|
memory_link = "https://list.jd.com/list.html?cat=670%2C677%2C680&psort=3&ev=210_1558%5E&psort=3&page="
|
|
|
|
memory_link = "https://list.jd.com/list.html?cat=670%2C677%2C680&psort=3&ev=210_1558%5E&psort=3&page="
|
|
|
|
# 爬取数据
|
|
|
|
# 爬取数据
|
|
|
|
# page_num = 40
|
|
|
|
page_num = 40
|
|
|
|
page_num = 1
|
|
|
|
# page_num = 1
|
|
|
|
start_page = 1
|
|
|
|
start_page = 1
|
|
|
|
# self.productSpider(memory_link, page_num, start_page)
|
|
|
|
self.productSpider(memory_link, page_num, start_page)
|
|
|
|
# 清洗数据
|
|
|
|
# 清洗数据
|
|
|
|
self.cleanMemory()
|
|
|
|
self.cleanMemory()
|
|
|
|
print("Successfully get Memory data!")
|
|
|
|
print("Successfully get Memory data!")
|
|
|
@ -1384,8 +1384,8 @@ class CPURadiatorSpider(JDSpider):
|
|
|
|
def main(self):
|
|
|
|
def main(self):
|
|
|
|
radiator_link = "https://list.jd.com/list.html?cat=670%2C677%2C682&psort=3&ev=3680_97402%7C%7C97403%7C%7C106254%7C%7C106255%5E&psort=3&page="
|
|
|
|
radiator_link = "https://list.jd.com/list.html?cat=670%2C677%2C682&psort=3&ev=3680_97402%7C%7C97403%7C%7C106254%7C%7C106255%5E&psort=3&page="
|
|
|
|
# 爬取数据
|
|
|
|
# 爬取数据
|
|
|
|
# page_num = 27
|
|
|
|
page_num = 27
|
|
|
|
page_num = 1
|
|
|
|
# page_num = 1
|
|
|
|
start_page = 1
|
|
|
|
start_page = 1
|
|
|
|
self.productSpider(radiator_link, page_num, start_page)
|
|
|
|
self.productSpider(radiator_link, page_num, start_page)
|
|
|
|
# 清洗数据
|
|
|
|
# 清洗数据
|
|
|
@ -1557,10 +1557,10 @@ class SSDSpider(JDSpider):
|
|
|
|
|
|
|
|
|
|
|
|
def main(self):
|
|
|
|
def main(self):
|
|
|
|
ssd_link = "https://list.jd.com/list.html?cat=670%2C677%2C11303&psort=3&psort=3&page="
|
|
|
|
ssd_link = "https://list.jd.com/list.html?cat=670%2C677%2C11303&psort=3&psort=3&page="
|
|
|
|
# page_num = 36 # 一共爬了36页
|
|
|
|
page_num = 36 # 一共爬了36页
|
|
|
|
page_num = 1 # 一共爬了36页
|
|
|
|
# page_num = 1 # 一共爬了36页
|
|
|
|
start_page = 1
|
|
|
|
start_page = 1
|
|
|
|
# self.productSpider(ssd_link, page_num, start_page)
|
|
|
|
self.productSpider(ssd_link, page_num, start_page)
|
|
|
|
|
|
|
|
|
|
|
|
self.cleanSSD()
|
|
|
|
self.cleanSSD()
|
|
|
|
print("Successfully get SSD data!")
|
|
|
|
print("Successfully get SSD data!")
|
|
|
@ -1735,10 +1735,10 @@ class HDDSpider(JDSpider):
|
|
|
|
|
|
|
|
|
|
|
|
def main(self):
|
|
|
|
def main(self):
|
|
|
|
hdd_link = "https://list.jd.com/list.html?cat=670%2C677%2C683&psort=3&psort=3&page="
|
|
|
|
hdd_link = "https://list.jd.com/list.html?cat=670%2C677%2C683&psort=3&psort=3&page="
|
|
|
|
# page_num = 11
|
|
|
|
page_num = 11
|
|
|
|
page_num = 1
|
|
|
|
# page_num = 1
|
|
|
|
start_page = 1
|
|
|
|
start_page = 1
|
|
|
|
# self.productSpider(hdd_link, page_num, start_page)
|
|
|
|
self.productSpider(hdd_link, page_num, start_page)
|
|
|
|
|
|
|
|
|
|
|
|
self.cleanHDD()
|
|
|
|
self.cleanHDD()
|
|
|
|
print("Successfully get HDD data!")
|
|
|
|
print("Successfully get HDD data!")
|
|
|
@ -1892,10 +1892,10 @@ class PowerSupplySpider(JDSpider):
|
|
|
|
|
|
|
|
|
|
|
|
def main(self):
|
|
|
|
def main(self):
|
|
|
|
power_supply_link = "https://list.jd.com/list.html?cat=670%2C677%2C691&psort=3&psort=3&page="
|
|
|
|
power_supply_link = "https://list.jd.com/list.html?cat=670%2C677%2C691&psort=3&psort=3&page="
|
|
|
|
# page_num = 25 # 抓25页
|
|
|
|
page_num = 25 # 抓25页
|
|
|
|
page_num = 1
|
|
|
|
# page_num = 1
|
|
|
|
start_page = 1
|
|
|
|
start_page = 1
|
|
|
|
# self.productSpider(power_supply_link, page_num, start_page)
|
|
|
|
self.productSpider(power_supply_link, page_num, start_page)
|
|
|
|
|
|
|
|
|
|
|
|
self.cleanPowerSupply()
|
|
|
|
self.cleanPowerSupply()
|
|
|
|
print("Successfully get Power Supply data!")
|
|
|
|
print("Successfully get Power Supply data!")
|
|
|
@ -2078,10 +2078,10 @@ class CaseSpider(JDSpider):
|
|
|
|
|
|
|
|
|
|
|
|
def main(self):
|
|
|
|
def main(self):
|
|
|
|
case_link = "https://list.jd.com/list.html?cat=670%2C677%2C687&psort=3&psort=3&page="
|
|
|
|
case_link = "https://list.jd.com/list.html?cat=670%2C677%2C687&psort=3&psort=3&page="
|
|
|
|
# page_num = 36
|
|
|
|
page_num = 36
|
|
|
|
page_num = 1
|
|
|
|
# page_num = 1
|
|
|
|
start_page = 1
|
|
|
|
start_page = 1
|
|
|
|
# self.productSpider(case_link, page_num, start_page)
|
|
|
|
self.productSpider(case_link, page_num, start_page)
|
|
|
|
|
|
|
|
|
|
|
|
self.cleanCase()
|
|
|
|
self.cleanCase()
|
|
|
|
print("Successfully get Computer Case data!")
|
|
|
|
print("Successfully get Computer Case data!")
|
|
|
@ -2089,7 +2089,7 @@ class CaseSpider(JDSpider):
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
if __name__ == "__main__":
|
|
|
|
accessory_type = 'all'
|
|
|
|
accessory_type = 'all'
|
|
|
|
accessory_type = 'graphics_card'
|
|
|
|
# accessory_type = 'graphics_card'
|
|
|
|
if accessory_type == 'cpu':
|
|
|
|
if accessory_type == 'cpu':
|
|
|
|
cpu_spider = CPUSpider('cpu')
|
|
|
|
cpu_spider = CPUSpider('cpu')
|
|
|
|
cpu_spider.main()
|
|
|
|
cpu_spider.main()
|
|
|
|