添加历史价格比对项目

3 years ago · 8ca463292a
parent e88774cc30
commit 8ca463292a
3 changed files with 50 additions and 66 deletions
--- a/README.md
+++ b/README.md
@ -150,15 +150,43 @@ html1 = etree.parse('test.html',etree.HTMLParser(encoding='utf-8'))
 # 访问 https://httpbin.org/get?show_env=1 可以返回当前浏览器的请求信息
 options.add_argument('lang=zh_CN.UTF-8')

+# 贴一个用json模块保存cookies
+def getCookies():
+    with open('cookies.json', 'r', encoding='utf-8') as fd:
+            listCookies = json.loads(fd.read())
+    for cookie in listCookies:
+        cookies = {
+            'domain': cookie['domain'],
+            'httpOnly': cookie['httpOnly'],
+            'name':cookie['name'],
+            'path':'/',
+            'secure': cookie['secure'],
+            'value':cookie['value'],
+        }
+        print(cookies)
+
+def saveCookies(driver):
+    jsonCookies = json.dumps(driver.get_cookies())
+    with open('cookies.json', 'w', encoding='utf-8') as fd:
+        fd.write(jsonCookies)
+    fd.close()
+
 ```

 ChromeDriver

 下载 [ChromeDriver](https://chromedriver.chromium.org/home) 放到当前目录就行(如果是放在 python 根目录可以不用在实例化 selenium 时指定chromedriver 路径)

-### Redis
+### Requests

-[介绍，配置](C:\Users\wkyuu\Desktop\my\SQL\Redis\Redis - NoSql高速缓存数据库.md)
+经典老碟
+
+```python
+import requests
+
+```
+
+### Redis

 ```python
 # 安装 redis 模块
@ -208,4 +236,6 @@ redisconn = redis.Redis(host = '127.0.0.1', port = '6379', password = 'x', db =

 14，[Selenium：添加Cookie的方法](https://cloud.tencent.com/developer/article/1616175)

-15，
+15，
+
+16，
--- a/downloader.py
+++ b/downloader.py
@ -1,12 +1,12 @@
 # -*- coding: utf-8 -*-

-import json
 from selenium import webdriver
 from selenium.webdriver.chrome.options import Options
 from lxml import etree
 import random
 import settings
-import time
+import requests
+import json

 headers = {
    'User-Agent': random.choice(settings.USER_AGENT)
@ -34,61 +34,14 @@ def getsource(url):
    driver.close()
    return response

-def manmanbuy(url):
-    initChrome = Options()
-    '''
-    initChrome.add_argument('--no-sandbox')
-    initChrome.add_argument('--headless')
-    initChrome.add_argument('--disable-gpu')
-    initChrome.add_argument("disable-cache")
-    initChrome.add_argument('disable-infobars')
-    initChrome.add_argument('log-level=3')    # INFO = 0 WARNING = 1 LOG_ERROR = 2 LOG_FATAL = 3 default is 0
-    initChrome.add_experimental_option("excludeSwitches",['enable-automation','enable-logging'])
-    '''
+def useRequests(url):
+    pass

-    # driver = webdriver.Chrome(chrome_options = initChrome, executable_path = './chromedriver.exe')
-    driver = webdriver.Chrome(executable_path = './chromedriver.exe')
-    # driver.get(url)
-    # time.sleep(10)
-    with open('cookies.json', 'r', encoding='utf-8') as fd:
-            listCookies = json.loads(fd.read())
-    for cookie in listCookies:
-        cookies = {
-            'domain': cookie['domain'],
-            'httpOnly': cookie['httpOnly'],
-            'name':cookie['name'],
-            'path':'/',
-            'secure': cookie['secure'],
-            'value':cookie['value'],
-        }
-        print(cookies)
-        driver.add_cookie(cookies)
-    driver.get(url)
-    time.sleep(10)
-    exit()
-    driver.implicitly_wait(10)
-    driver.get(url)
-
-    response = etree.HTML(driver.page_source)
-    response = etree.tostring(response, encoding = "utf-8", pretty_print = True, method = "html")
-    response = response.decode('utf-8')
-
-    driver.close()
-    return response
-
-def saveCookies(driver):
-    jsonCookies = json.dumps(driver.get_cookies())
-    with open('cookies.json', 'w', encoding='utf-8') as fd:
-        fd.write(jsonCookies)
-    fd.close()
-
-
-def buy():
-    jdurl = "https://item.jd.com/10047511027349.html"
-    url = "https://tool.manmanbuy.com/HistoryLowest.aspx?url=" + jdurl
-    # print(url)
-    response = manmanbuy(url)
-    print(response)

 if __name__ == "__main__":
-    buy()
+    jdurl = "https://item.jd.com/10036840192083.html"
+    url = "https://www.vveby.com/search?keyword=" + jdurl
+    with open('historyPrice.html', 'w+', encoding = 'utf-8') as fd:
+        fd.write(getsource(url))
+    fd.close()
+    print('done')
--- a/middlewares.py
+++ b/middlewares.py
@ -44,7 +44,7 @@ def isNullRedis() -> bool: # 判断redis中待处理的url为空
    if redisconn.llen(REDIS_LISTNAME) == 0: return True
    else: return False

-def precheck() -> bool: #
+def precheck() -> bool: # 检查redis队列情况
    while redisconn.llen(REDIS_LISTNAME) == 0:
        print("No queue was found!\nPush some urls to the queue using default settings.\nContinue [c] or Exit [q] ?")
        check = str(input())
@ -57,6 +57,11 @@ def precheck() -> bool: #
        else: print("invalid input!")
    return True

+def clearRedis():   # 用于清空Redis队列
+    while not isNullRedis():
+        redisconn.lpop(REDIS_LISTNAME)
+    print("Redis queue has cleared.")
+
 def write2csv(category, response):    # 写入csv文件
    filename_csv = os.getcwd() + "\\Catalogues\\" + FILENAME_CSV.get(category)
    pipelines.write2csv(response, filename_csv)
@ -106,7 +111,7 @@ def mainThread(threadlines = 16, flag = flag):    # 线程数默认为3
        exit()

 if __name__ == '__main__':
-    pass
+    clearRedis()



@ -133,7 +138,3 @@ def localtest(category): # 本地加载的源码测试
        print("page " + str(page) + " sleep over at " + time.ctime())
        page += 1

-def clearRedis():   # 用于清空Redis队列
-    while not isNullRedis():
-        redisconn.lpop(REDIS_LISTNAME)
-    print("Redis queue has cleared.")