添加历史价格比对项目

master
wkyuu 3 years ago
parent e88774cc30
commit 8ca463292a

@ -150,15 +150,43 @@ html1 = etree.parse('test.html',etree.HTMLParser(encoding='utf-8'))
# 访问 https://httpbin.org/get?show_env=1 可以返回当前浏览器的请求信息 # 访问 https://httpbin.org/get?show_env=1 可以返回当前浏览器的请求信息
options.add_argument('lang=zh_CN.UTF-8') options.add_argument('lang=zh_CN.UTF-8')
# 贴一个用json模块保存cookies
def getCookies():
with open('cookies.json', 'r', encoding='utf-8') as fd:
listCookies = json.loads(fd.read())
for cookie in listCookies:
cookies = {
'domain': cookie['domain'],
'httpOnly': cookie['httpOnly'],
'name':cookie['name'],
'path':'/',
'secure': cookie['secure'],
'value':cookie['value'],
}
print(cookies)
def saveCookies(driver):
jsonCookies = json.dumps(driver.get_cookies())
with open('cookies.json', 'w', encoding='utf-8') as fd:
fd.write(jsonCookies)
fd.close()
``` ```
ChromeDriver ChromeDriver
下载 [ChromeDriver](https://chromedriver.chromium.org/home) 放到当前目录就行(如果是放在 python 根目录可以不用在实例化 selenium 时指定chromedriver 路径) 下载 [ChromeDriver](https://chromedriver.chromium.org/home) 放到当前目录就行(如果是放在 python 根目录可以不用在实例化 selenium 时指定chromedriver 路径)
### Redis ### Requests
[介绍,配置](C:\Users\wkyuu\Desktop\my\SQL\Redis\Redis - NoSql高速缓存数据库.md) 经典老碟
```python
import requests
```
### Redis
```python ```python
# 安装 redis 模块 # 安装 redis 模块
@ -208,4 +236,6 @@ redisconn = redis.Redis(host = '127.0.0.1', port = '6379', password = 'x', db =
14[Selenium添加Cookie的方法](https://cloud.tencent.com/developer/article/1616175) 14[Selenium添加Cookie的方法](https://cloud.tencent.com/developer/article/1616175)
15 15
16

@ -1,12 +1,12 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import json
from selenium import webdriver from selenium import webdriver
from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.options import Options
from lxml import etree from lxml import etree
import random import random
import settings import settings
import time import requests
import json
headers = { headers = {
'User-Agent': random.choice(settings.USER_AGENT) 'User-Agent': random.choice(settings.USER_AGENT)
@ -34,61 +34,14 @@ def getsource(url):
driver.close() driver.close()
return response return response
def manmanbuy(url): def useRequests(url):
initChrome = Options() pass
'''
initChrome.add_argument('--no-sandbox')
initChrome.add_argument('--headless')
initChrome.add_argument('--disable-gpu')
initChrome.add_argument("disable-cache")
initChrome.add_argument('disable-infobars')
initChrome.add_argument('log-level=3') # INFO = 0 WARNING = 1 LOG_ERROR = 2 LOG_FATAL = 3 default is 0
initChrome.add_experimental_option("excludeSwitches",['enable-automation','enable-logging'])
'''
# driver = webdriver.Chrome(chrome_options = initChrome, executable_path = './chromedriver.exe')
driver = webdriver.Chrome(executable_path = './chromedriver.exe')
# driver.get(url)
# time.sleep(10)
with open('cookies.json', 'r', encoding='utf-8') as fd:
listCookies = json.loads(fd.read())
for cookie in listCookies:
cookies = {
'domain': cookie['domain'],
'httpOnly': cookie['httpOnly'],
'name':cookie['name'],
'path':'/',
'secure': cookie['secure'],
'value':cookie['value'],
}
print(cookies)
driver.add_cookie(cookies)
driver.get(url)
time.sleep(10)
exit()
driver.implicitly_wait(10)
driver.get(url)
response = etree.HTML(driver.page_source)
response = etree.tostring(response, encoding = "utf-8", pretty_print = True, method = "html")
response = response.decode('utf-8')
driver.close()
return response
def saveCookies(driver):
jsonCookies = json.dumps(driver.get_cookies())
with open('cookies.json', 'w', encoding='utf-8') as fd:
fd.write(jsonCookies)
fd.close()
def buy():
jdurl = "https://item.jd.com/10047511027349.html"
url = "https://tool.manmanbuy.com/HistoryLowest.aspx?url=" + jdurl
# print(url)
response = manmanbuy(url)
print(response)
if __name__ == "__main__": if __name__ == "__main__":
buy() jdurl = "https://item.jd.com/10036840192083.html"
url = "https://www.vveby.com/search?keyword=" + jdurl
with open('historyPrice.html', 'w+', encoding = 'utf-8') as fd:
fd.write(getsource(url))
fd.close()
print('done')

@ -44,7 +44,7 @@ def isNullRedis() -> bool: # 判断redis中待处理的url为空
if redisconn.llen(REDIS_LISTNAME) == 0: return True if redisconn.llen(REDIS_LISTNAME) == 0: return True
else: return False else: return False
def precheck() -> bool: # def precheck() -> bool: # 检查redis队列情况
while redisconn.llen(REDIS_LISTNAME) == 0: while redisconn.llen(REDIS_LISTNAME) == 0:
print("No queue was found!\nPush some urls to the queue using default settings.\nContinue [c] or Exit [q] ?") print("No queue was found!\nPush some urls to the queue using default settings.\nContinue [c] or Exit [q] ?")
check = str(input()) check = str(input())
@ -57,6 +57,11 @@ def precheck() -> bool: #
else: print("invalid input!") else: print("invalid input!")
return True return True
def clearRedis(): # 用于清空Redis队列
while not isNullRedis():
redisconn.lpop(REDIS_LISTNAME)
print("Redis queue has cleared.")
def write2csv(category, response): # 写入csv文件 def write2csv(category, response): # 写入csv文件
filename_csv = os.getcwd() + "\\Catalogues\\" + FILENAME_CSV.get(category) filename_csv = os.getcwd() + "\\Catalogues\\" + FILENAME_CSV.get(category)
pipelines.write2csv(response, filename_csv) pipelines.write2csv(response, filename_csv)
@ -106,7 +111,7 @@ def mainThread(threadlines = 16, flag = flag): # 线程数默认为3
exit() exit()
if __name__ == '__main__': if __name__ == '__main__':
pass clearRedis()
@ -133,7 +138,3 @@ def localtest(category): # 本地加载的源码测试
print("page " + str(page) + " sleep over at " + time.ctime()) print("page " + str(page) + " sleep over at " + time.ctime())
page += 1 page += 1
def clearRedis(): # 用于清空Redis队列
while not isNullRedis():
redisconn.lpop(REDIS_LISTNAME)
print("Redis queue has cleared.")
Loading…
Cancel
Save