添加历史价格比对项目

master
wkyuu 3 years ago
parent e88774cc30
commit 8ca463292a

@ -150,15 +150,43 @@ html1 = etree.parse('test.html',etree.HTMLParser(encoding='utf-8'))
# 访问 https://httpbin.org/get?show_env=1 可以返回当前浏览器的请求信息
options.add_argument('lang=zh_CN.UTF-8')
# 贴一个用json模块保存cookies
def getCookies():
with open('cookies.json', 'r', encoding='utf-8') as fd:
listCookies = json.loads(fd.read())
for cookie in listCookies:
cookies = {
'domain': cookie['domain'],
'httpOnly': cookie['httpOnly'],
'name':cookie['name'],
'path':'/',
'secure': cookie['secure'],
'value':cookie['value'],
}
print(cookies)
def saveCookies(driver):
jsonCookies = json.dumps(driver.get_cookies())
with open('cookies.json', 'w', encoding='utf-8') as fd:
fd.write(jsonCookies)
fd.close()
```
ChromeDriver
下载 [ChromeDriver](https://chromedriver.chromium.org/home) 放到当前目录就行(如果是放在 python 根目录可以不用在实例化 selenium 时指定chromedriver 路径)
### Redis
### Requests
[介绍,配置](C:\Users\wkyuu\Desktop\my\SQL\Redis\Redis - NoSql高速缓存数据库.md)
经典老碟
```python
import requests
```
### Redis
```python
# 安装 redis 模块
@ -209,3 +237,5 @@ redisconn = redis.Redis(host = '127.0.0.1', port = '6379', password = 'x', db =
14[Selenium添加Cookie的方法](https://cloud.tencent.com/developer/article/1616175)
15
16

@ -1,12 +1,12 @@
# -*- coding: utf-8 -*-
import json
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from lxml import etree
import random
import settings
import time
import requests
import json
headers = {
'User-Agent': random.choice(settings.USER_AGENT)
@ -34,61 +34,14 @@ def getsource(url):
driver.close()
return response
def manmanbuy(url):
initChrome = Options()
'''
initChrome.add_argument('--no-sandbox')
initChrome.add_argument('--headless')
initChrome.add_argument('--disable-gpu')
initChrome.add_argument("disable-cache")
initChrome.add_argument('disable-infobars')
initChrome.add_argument('log-level=3') # INFO = 0 WARNING = 1 LOG_ERROR = 2 LOG_FATAL = 3 default is 0
initChrome.add_experimental_option("excludeSwitches",['enable-automation','enable-logging'])
'''
def useRequests(url):
pass
# driver = webdriver.Chrome(chrome_options = initChrome, executable_path = './chromedriver.exe')
driver = webdriver.Chrome(executable_path = './chromedriver.exe')
# driver.get(url)
# time.sleep(10)
with open('cookies.json', 'r', encoding='utf-8') as fd:
listCookies = json.loads(fd.read())
for cookie in listCookies:
cookies = {
'domain': cookie['domain'],
'httpOnly': cookie['httpOnly'],
'name':cookie['name'],
'path':'/',
'secure': cookie['secure'],
'value':cookie['value'],
}
print(cookies)
driver.add_cookie(cookies)
driver.get(url)
time.sleep(10)
exit()
driver.implicitly_wait(10)
driver.get(url)
response = etree.HTML(driver.page_source)
response = etree.tostring(response, encoding = "utf-8", pretty_print = True, method = "html")
response = response.decode('utf-8')
driver.close()
return response
def saveCookies(driver):
jsonCookies = json.dumps(driver.get_cookies())
with open('cookies.json', 'w', encoding='utf-8') as fd:
fd.write(jsonCookies)
fd.close()
def buy():
jdurl = "https://item.jd.com/10047511027349.html"
url = "https://tool.manmanbuy.com/HistoryLowest.aspx?url=" + jdurl
# print(url)
response = manmanbuy(url)
print(response)
if __name__ == "__main__":
buy()
jdurl = "https://item.jd.com/10036840192083.html"
url = "https://www.vveby.com/search?keyword=" + jdurl
with open('historyPrice.html', 'w+', encoding = 'utf-8') as fd:
fd.write(getsource(url))
fd.close()
print('done')

@ -44,7 +44,7 @@ def isNullRedis() -> bool: # 判断redis中待处理的url为空
if redisconn.llen(REDIS_LISTNAME) == 0: return True
else: return False
def precheck() -> bool: #
def precheck() -> bool: # 检查redis队列情况
while redisconn.llen(REDIS_LISTNAME) == 0:
print("No queue was found!\nPush some urls to the queue using default settings.\nContinue [c] or Exit [q] ?")
check = str(input())
@ -57,6 +57,11 @@ def precheck() -> bool: #
else: print("invalid input!")
return True
def clearRedis(): # 用于清空Redis队列
while not isNullRedis():
redisconn.lpop(REDIS_LISTNAME)
print("Redis queue has cleared.")
def write2csv(category, response): # 写入csv文件
filename_csv = os.getcwd() + "\\Catalogues\\" + FILENAME_CSV.get(category)
pipelines.write2csv(response, filename_csv)
@ -106,7 +111,7 @@ def mainThread(threadlines = 16, flag = flag): # 线程数默认为3
exit()
if __name__ == '__main__':
pass
clearRedis()
@ -133,7 +138,3 @@ def localtest(category): # 本地加载的源码测试
print("page " + str(page) + " sleep over at " + time.ctime())
page += 1
def clearRedis(): # 用于清空Redis队列
while not isNullRedis():
redisconn.lpop(REDIS_LISTNAME)
print("Redis queue has cleared.")
Loading…
Cancel
Save