You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
74 lines
2.7 KiB
74 lines
2.7 KiB
# -*- coding: utf-8 -*-
|
|
|
|
from lxml import etree
|
|
|
|
import downloader
|
|
import pipelines
|
|
import settings
|
|
|
|
|
|
class historyPriceItem:
|
|
def __init__(self, id):
|
|
self.url = settings.HISTORY_PRICE_URL + str(id)
|
|
self.response = pipelines.gethtml(downloader.useRequests(self.url))
|
|
# self.response = etree.parse('historyPriceMore.html', etree.HTMLParser(encoding = 'utf-8'))
|
|
# self.response = etree.parse('historyPrice.html', etree.HTMLParser(encoding = 'utf-8'))
|
|
|
|
def gethistoryPrice(self) -> list:
|
|
|
|
reg = r"//div[@class='container']"
|
|
item = self.response.xpath(reg)[0]
|
|
item = etree.tostring(item, encoding = 'utf-8', method = 'html').decode('utf-8')
|
|
|
|
def getCommit() -> str:
|
|
string = '商品点评:'
|
|
if check(string) == False: # 用于判断有无商品点评
|
|
return ''
|
|
reg = "//div[@data-content='商品点评:']/text()"
|
|
commit = self.response.xpath(reg)[0]
|
|
commit = pipelines.myreplace(commit, mode = 'all')
|
|
return str(commit[5:-1])
|
|
|
|
def getTags() -> str:
|
|
string = '商品类别:'
|
|
if check(string) == False: # 用于判断有无商品类别
|
|
return ''
|
|
reg = "//div[@data-content='商品类别:']/text()"
|
|
tags = self.response.xpath(reg)[0]
|
|
tags = pipelines.myreplace(tags, mode = 'all')
|
|
return str(tags[5:])
|
|
|
|
def updateTime() -> str:
|
|
string = 'p3'
|
|
if check(string) == False: # 用于判断有无数据更新时间记录
|
|
return ''
|
|
reg = r"//div[@class='p3']/p[@class='tips']/text()"
|
|
time = self.response.xpath(reg)[0]
|
|
time = pipelines.myreplace(time, mode = 'strip')
|
|
return str(time[5:])
|
|
|
|
def priceTrend() -> str:
|
|
string = 'timeline-text'
|
|
if check(string) == False: # 用于判断有无历史价格记录
|
|
return ''
|
|
reg = r"//div[@class='timeline-text']/p/text()"
|
|
regList = self.response.xpath(reg)
|
|
price = ''
|
|
for i in range(0, len(regList), 2):
|
|
price += pipelines.myreplace(regList[i]) + pipelines.myreplace(regList[i + 1]) + ';'
|
|
return price
|
|
|
|
def check(string, item = item) -> bool:
|
|
if string in item: return True
|
|
elif not string in item: return False
|
|
|
|
priceHistoryList = [getCommit(), getTags(), updateTime(), priceTrend()]
|
|
return priceHistoryList
|
|
|
|
if __name__ == '__main__':
|
|
# id = "10036840192083"
|
|
# id = "100020511880" # More
|
|
id = "100008227212"
|
|
aitem = historyPriceItem(id)
|
|
print(aitem.gethistoryPrice())
|