milkSpider/historyPrice.py

# -*- coding: utf-8 -*-

from lxml import etree

import downloader
import pipelines
import settings

class historyPriceItem:
    def __init__(self, id):
        self.url = settings.HISTORY_PRICE_URL + str(id)
        self.response = pipelines.gethtml(downloader.useRequests(self.url))
        # self.response = etree.parse('historyPriceMore.html', etree.HTMLParser(encoding = 'utf-8'))
        # self.response = etree.parse('historyPrice.html', etree.HTMLParser(encoding = 'utf-8'))

    def gethistoryPrice(self) -> list:

        reg = r"//div[@class='container']"
        item = self.response.xpath(reg)[0]
        item = etree.tostring(item, encoding = 'utf-8', method = 'html').decode('utf-8')

        def getCommit() -> str:
            reg = "//div[@data-content='商品点评：']/text()"
            commit = self.response.xpath(reg)[0]
            commit = pipelines.myreplace(commit, mode = 'all')
            return str(commit[5:-1])

        def getTags() -> str:
            reg = "//div[@data-content='商品类别：']/text()"
            tags = self.response.xpath(reg)[0]
            tags = pipelines.myreplace(tags, mode = 'all')
            return str(tags[5:])

        def updateTime() -> str:
            reg = r"//div[@class='p3']/p[@class='tips']/text()"
            time = self.response.xpath(reg)[0]
            time = pipelines.myreplace(time, mode = 'strip')
            return str(time[5:])

        def priceTrend() -> str:
            check = 'timeline-text'
            if not check in item:  # 用于判断有无历史价格记录
                return ''
            reg = r"//div[@class='timeline-text']/p/text()"
            regList = self.response.xpath(reg)
            price = ''
            for i in range(0, len(regList), 2):
                price += pipelines.myreplace(regList[i]) + pipelines.myreplace(regList[i + 1]) + ';'
            return price

        priceHistoryList = [getCommit(), getTags(), updateTime(), priceTrend()]
        return priceHistoryList

if __name__ == '__main__':
    # id = "10036840192083"
    id = "11564571796"  # More
    aitem = historyPriceItem(id)
    print(aitem.gethistoryPrice())