# -*- coding: utf-8 -*- from lxml import etree import downloader import pipelines import settings class historyPriceItem: def __init__(self, id): self.url = settings.HISTORY_PRICE_URL + str(id) self.response = pipelines.gethtml(downloader.useRequests(self.url)) # self.response = etree.parse('historyPriceMore.html', etree.HTMLParser(encoding = 'utf-8')) # self.response = etree.parse('historyPrice.html', etree.HTMLParser(encoding = 'utf-8')) def gethistoryPrice(self) -> list: reg = r"//div[@class='container']" item = self.response.xpath(reg)[0] item = etree.tostring(item, encoding = 'utf-8', method = 'html').decode('utf-8') def getCommit() -> str: reg = "//div[@data-content='商品点评:']/text()" commit = self.response.xpath(reg)[0] commit = pipelines.myreplace(commit, mode = 'all') return str(commit[5:-1]) def getTags() -> str: reg = "//div[@data-content='商品类别:']/text()" tags = self.response.xpath(reg)[0] tags = pipelines.myreplace(tags, mode = 'all') return str(tags[5:]) def updateTime() -> str: reg = r"//div[@class='p3']/p[@class='tips']/text()" time = self.response.xpath(reg)[0] time = pipelines.myreplace(time, mode = 'strip') return str(time[5:]) def priceTrend() -> str: check = 'timeline-text' if not check in item: # 用于判断有无历史价格记录 return '' reg = r"//div[@class='timeline-text']/p/text()" regList = self.response.xpath(reg) price = '' for i in range(0, len(regList), 2): price += pipelines.myreplace(regList[i]) + pipelines.myreplace(regList[i + 1]) + ';' return price priceHistoryList = [getCommit(), getTags(), updateTime(), priceTrend()] return priceHistoryList if __name__ == '__main__': # id = "10036840192083" id = "11564571796" # More aitem = historyPriceItem(id) print(aitem.gethistoryPrice())