diff --git a/downloader.py b/downloader.py index 067b540..9454aad 100644 --- a/downloader.py +++ b/downloader.py @@ -38,12 +38,6 @@ def getsource(url): return response def useRequests(url): - - def write2html(res): - filename = 'historyPrice.html' - with open(filename, mode = 'w+', encoding='utf-8') as fd: - fd.write(res) - try: session = requests.Session() res = session.get(url, headers = headers) @@ -52,7 +46,6 @@ def useRequests(url): res.encoding = res.apparent_encoding res = etree.HTML(res.text) source = etree.tostring(res, encoding = 'utf-8', pretty_print = True, method = 'html').decode('utf-8') - # write2html(res) return source except BaseException as e: print(e) @@ -61,7 +54,7 @@ def useRequests(url): if __name__ == "__main__": - jdurl = r"https://item.jd.com/10036840192083.html" + jdurl = r"https://item.jd.com/10023043997421.html" url = r"https://www.vveby.com/search?keyword=" + jdurl print(url) useRequests(url) diff --git a/historyPrice.py b/historyPrice.py index ed43b00..f8db74e 100644 --- a/historyPrice.py +++ b/historyPrice.py @@ -4,13 +4,36 @@ from lxml import etree import settings import downloader +def myreplace(text): + return text.strip().replace(' ', '').replace("\r\n", '') + class historyPriceItem: def __init__(self, id): - self.url = + self.url = settings.HISTORY_PRICE_URL + str(id) + # self.response = downloader.useRequests(self.url) + self.response = etree.parse('historyPrice.html', etree.HTMLParser(encoding = 'utf-8')) + + def gethistoryPrice(self) -> list: + + reg = r"//div[@class='container']" + item = self.response.xpath(reg)[0] + item = etree.tostring(item, encoding = 'utf-8', method = 'html').decode('utf-8') + + def getTag(self) -> str: + reg = r"//div[@data-content='商品类别:']/text()" + tag = self.response.xpath(reg)[0] + tag = etree.HTML(tag) + tag = myreplace(tag) + return tag[5:] + + def get +# tree = etree.tostring(response.xpath(reg)[0], encoding = 'utf-8', method = 'html').decode('utf-8') if __name__ == '__main__': - pass + id = "10036840192083" + aitem = historyPriceItem(id) + aitem.gethistoryPrice() diff --git a/settings.py b/settings.py index f482d73..6c7dcf8 100644 --- a/settings.py +++ b/settings.py @@ -2,6 +2,8 @@ import os +from hyperlink import URL + # 修改要生成的文件名,下面的是默认,注意要用.csv结尾 FILENAME_CSV = { "牛奶": "milk.csv", @@ -40,4 +42,5 @@ USER_AGENT = [ COOKIES_FILENAME = "cookies.json" -# \ No newline at end of file +# 历史价格查询网站 vveby.com +HISTORY_PRICE_URL = r"https://www.vveby.com/search?keyword=" \ No newline at end of file