|
|
|
@ -2,6 +2,8 @@
|
|
|
|
|
from lxml import etree
|
|
|
|
|
import csv
|
|
|
|
|
import os
|
|
|
|
|
import time
|
|
|
|
|
import historyPrice
|
|
|
|
|
|
|
|
|
|
def gethtml(response, gethtml_mode = "url"): # 用etree格式化得到的对象
|
|
|
|
|
try:
|
|
|
|
@ -30,9 +32,11 @@ def getidlist(response) -> list: # 获取id
|
|
|
|
|
idlist = html.xpath(reg)
|
|
|
|
|
return idlist
|
|
|
|
|
|
|
|
|
|
def myreplace(name) -> str: # 简单的处理输出
|
|
|
|
|
name = name.strip()
|
|
|
|
|
return name
|
|
|
|
|
def myreplace(text, mode = '') -> str: # 简单的处理输出
|
|
|
|
|
if mode == 'all':
|
|
|
|
|
return text.strip().replace(' ', '').replace("\r\n", '')
|
|
|
|
|
elif mode == 'strip': return text.strip().replace('\r', '')
|
|
|
|
|
else: return text.strip()
|
|
|
|
|
|
|
|
|
|
def isElementTree(response) -> bool: # 用于判断是否已经为etree对象
|
|
|
|
|
if str(type(response)) == "<class 'lxml.etree._ElementTree'>":
|
|
|
|
@ -91,7 +95,10 @@ class item:
|
|
|
|
|
url = r"https://item.jd.com/" + str(self.id) + r".html"
|
|
|
|
|
return url
|
|
|
|
|
|
|
|
|
|
itemlist = [str(self.id), name(), price(), attribute(), sales(), url()]
|
|
|
|
|
historyPriceItem = historyPrice.historyPriceItem(self.id)
|
|
|
|
|
priceHistoryList = historyPriceItem.gethistoryPrice()
|
|
|
|
|
|
|
|
|
|
itemlist = [str(self.id), name(), price(), attribute(), sales(), url()] + priceHistoryList
|
|
|
|
|
return itemlist
|
|
|
|
|
|
|
|
|
|
def print2console(response): # 输出到命令行
|
|
|
|
@ -147,7 +154,7 @@ def write2csv(response, filename_csv): # 写入到csv文件
|
|
|
|
|
except BaseException as e:
|
|
|
|
|
print(e)
|
|
|
|
|
|
|
|
|
|
headers = ['id', '商品名称', '价格(人民币)', '标签', '促销策略', 'url']
|
|
|
|
|
headers = ['id', '商品名称', '价格(人民币)', '标签', '促销策略', 'url', '数据更新时间', '历史价格趋势']
|
|
|
|
|
writer.writerow(headers)
|
|
|
|
|
|
|
|
|
|
write(writer)
|
|
|
|
@ -160,4 +167,22 @@ def write2csv(response, filename_csv): # 写入到csv文件
|
|
|
|
|
print("sth wrong in pipelines.write2csv")
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
pass
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
'''
|
|
|
|
|
# 调试数据
|
|
|
|
|
import pipelines
|
|
|
|
|
from lxml import etree
|
|
|
|
|
response = 'index.html' # 文件名 or url
|
|
|
|
|
html = pipelines.gethtml(response, gethtml_mode = 'cache') # cache or url
|
|
|
|
|
id = '1127466'
|
|
|
|
|
aitem = pipelines.item(id, html)
|
|
|
|
|
a = aitem.getitem()
|
|
|
|
|
|
|
|
|
|
import historyPrice
|
|
|
|
|
bitem = historyPrice.historyPriceItem(id)
|
|
|
|
|
b = bitem.gethistoryPrice()
|
|
|
|
|
|
|
|
|
|
itemList = a + b
|
|
|
|
|
'''
|