添加历史价格比对项目

3 years ago · 8ca463292a
parent e88774cc30
commit 8ca463292a
3 changed files with 50 additions and 66 deletions
--- a/README.md
+++ b/README.md
@ -150,15 +150,43 @@ html1 = etree.parse('test.html',etree.HTMLParser(encoding='utf-8'))
 # 访问 https://httpbin.org/get?show_env=1 可以返回当前浏览器的请求信息
 options.add_argument('lang=zh_CN.UTF-8')
 # 贴一个用json模块保存cookies
 def getCookies():
    with open('cookies.json', 'r', encoding='utf-8') as fd:
            listCookies = json.loads(fd.read())
    for cookie in listCookies:
        cookies = {
            'domain': cookie['domain'],
            'httpOnly': cookie['httpOnly'],
            'name':cookie['name'],
            'path':'/',
            'secure': cookie['secure'],
            'value':cookie['value'],
        }
        print(cookies)
 def saveCookies(driver):
    jsonCookies = json.dumps(driver.get_cookies())
    with open('cookies.json', 'w', encoding='utf-8') as fd:
        fd.write(jsonCookies)
    fd.close()
 ```
 ChromeDriver
 下载 [ChromeDriver](https://chromedriver.chromium.org/home) 放到当前目录就行(如果是放在 python 根目录可以不用在实例化 selenium 时指定chromedriver 路径)
-### Redis
+### Requests
-[介绍，配置](C:\Users\wkyuu\Desktop\my\SQL\Redis\Redis - NoSql高速缓存数据库.md)
+经典老碟
 ```python
 import requests
 ```
 ### Redis
 ```python
 # 安装 redis 模块
@ -208,4 +236,6 @@ redisconn = redis.Redis(host = '127.0.0.1', port = '6379', password = 'x', db =
 14，[Selenium：添加Cookie的方法](https://cloud.tencent.com/developer/article/1616175)
-15，
+15，
 16，
--- a/downloader.py
+++ b/downloader.py
@ -1,12 +1,12 @@
 # -*- coding: utf-8 -*-
 import json
 from selenium import webdriver
 from selenium.webdriver.chrome.options import Options
 from lxml import etree
 import random
 import settings
-import time
+import requests
 import json
 headers = {
    'User-Agent': random.choice(settings.USER_AGENT)
@ -34,61 +34,14 @@ def getsource(url):
    driver.close()
    return response
-def manmanbuy(url):
+def useRequests(url):
-    initChrome = Options()
+    pass
    '''
    initChrome.add_argument('--no-sandbox')
    initChrome.add_argument('--headless')
    initChrome.add_argument('--disable-gpu')
    initChrome.add_argument("disable-cache")
    initChrome.add_argument('disable-infobars')
    initChrome.add_argument('log-level=3')    # INFO = 0 WARNING = 1 LOG_ERROR = 2 LOG_FATAL = 3 default is 0
    initChrome.add_experimental_option("excludeSwitches",['enable-automation','enable-logging'])
    '''
    # driver = webdriver.Chrome(chrome_options = initChrome, executable_path = './chromedriver.exe')
    driver = webdriver.Chrome(executable_path = './chromedriver.exe')
    # driver.get(url)
    # time.sleep(10)
    with open('cookies.json', 'r', encoding='utf-8') as fd:
            listCookies = json.loads(fd.read())
    for cookie in listCookies:
        cookies = {
            'domain': cookie['domain'],
            'httpOnly': cookie['httpOnly'],
            'name':cookie['name'],
            'path':'/',
            'secure': cookie['secure'],
            'value':cookie['value'],
        }
        print(cookies)
        driver.add_cookie(cookies)
    driver.get(url)
    time.sleep(10)
    exit()
    driver.implicitly_wait(10)
    driver.get(url)
    response = etree.HTML(driver.page_source)
    response = etree.tostring(response, encoding = "utf-8", pretty_print = True, method = "html")
    response = response.decode('utf-8')
    driver.close()
    return response
 def saveCookies(driver):
    jsonCookies = json.dumps(driver.get_cookies())
    with open('cookies.json', 'w', encoding='utf-8') as fd:
        fd.write(jsonCookies)
    fd.close()
 def buy():
    jdurl = "https://item.jd.com/10047511027349.html"
    url = "https://tool.manmanbuy.com/HistoryLowest.aspx?url=" + jdurl
    # print(url)
    response = manmanbuy(url)
    print(response)
 if __name__ == "__main__":
-    buy()
+    jdurl = "https://item.jd.com/10036840192083.html"
    url = "https://www.vveby.com/search?keyword=" + jdurl
    with open('historyPrice.html', 'w+', encoding = 'utf-8') as fd:
        fd.write(getsource(url))
    fd.close()
    print('done')
--- a/middlewares.py
+++ b/middlewares.py
@ -44,7 +44,7 @@ def isNullRedis() -> bool: # 判断redis中待处理的url为空
    if redisconn.llen(REDIS_LISTNAME) == 0: return True
    else: return False
-def precheck() -> bool: #
+def precheck() -> bool: # 检查redis队列情况
    while redisconn.llen(REDIS_LISTNAME) == 0:
        print("No queue was found!\nPush some urls to the queue using default settings.\nContinue [c] or Exit [q] ?")
        check = str(input())
@ -57,6 +57,11 @@ def precheck() -> bool: #
        else: print("invalid input!")
    return True
 def clearRedis():   # 用于清空Redis队列
    while not isNullRedis():
        redisconn.lpop(REDIS_LISTNAME)
    print("Redis queue has cleared.")
 def write2csv(category, response):    # 写入csv文件
    filename_csv = os.getcwd() + "\\Catalogues\\" + FILENAME_CSV.get(category)
    pipelines.write2csv(response, filename_csv)
@ -106,7 +111,7 @@ def mainThread(threadlines = 16, flag = flag):    # 线程数默认为3
        exit()
 if __name__ == '__main__':
-    pass
+    clearRedis()
@ -133,7 +138,3 @@ def localtest(category): # 本地加载的源码测试
        print("page " + str(page) + " sleep over at " + time.ctime())
        page += 1
 def clearRedis():   # 用于清空Redis队列
    while not isNullRedis():
        redisconn.lpop(REDIS_LISTNAME)
    print("Redis queue has cleared.")