|
|
@ -4,12 +4,15 @@ from selenium import webdriver
|
|
|
|
from selenium.webdriver.chrome.options import Options
|
|
|
|
from selenium.webdriver.chrome.options import Options
|
|
|
|
from lxml import etree
|
|
|
|
from lxml import etree
|
|
|
|
import random
|
|
|
|
import random
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from zmq import proxy
|
|
|
|
import settings
|
|
|
|
import settings
|
|
|
|
import requests
|
|
|
|
import requests
|
|
|
|
import json
|
|
|
|
import json
|
|
|
|
|
|
|
|
|
|
|
|
headers = {
|
|
|
|
headers = {
|
|
|
|
'User-Agent': random.choice(settings.USER_AGENT)
|
|
|
|
#'User-Agent': random.choice(settings.USER_AGENT)
|
|
|
|
|
|
|
|
'User-Agent': settings.USER_AGENT[1]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def getsource(url):
|
|
|
|
def getsource(url):
|
|
|
@ -36,34 +39,30 @@ def getsource(url):
|
|
|
|
|
|
|
|
|
|
|
|
def useRequests(url):
|
|
|
|
def useRequests(url):
|
|
|
|
|
|
|
|
|
|
|
|
def saveCookies(response):
|
|
|
|
def write2html(res):
|
|
|
|
myCookies = {}
|
|
|
|
filename = 'historyPrice.html'
|
|
|
|
for key, value in response.cookies.items():
|
|
|
|
with open(filename, mode = 'w+', encoding='utf-8') as fd:
|
|
|
|
myCookies[key] = value
|
|
|
|
fd.write(res)
|
|
|
|
jsonCookies = json.dumps(myCookies)
|
|
|
|
|
|
|
|
with open(settings.COOKIES_FILENAME, mode = 'a', encoding = 'utf-8') as fd:
|
|
|
|
|
|
|
|
fd.write(jsonCookies)
|
|
|
|
|
|
|
|
print("Cookies saved!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
res = requests.get(url, headers = headers)
|
|
|
|
session = requests.Session()
|
|
|
|
|
|
|
|
res = session.get(url, headers = headers)
|
|
|
|
res.raise_for_status() # 判断是不是200
|
|
|
|
res.raise_for_status() # 判断是不是200
|
|
|
|
|
|
|
|
# print(res.request.headers)
|
|
|
|
res.encoding = res.apparent_encoding
|
|
|
|
res.encoding = res.apparent_encoding
|
|
|
|
print(res.cookies)
|
|
|
|
res = etree.HTML(res.text)
|
|
|
|
saveCookies(res)
|
|
|
|
source = etree.tostring(res, encoding = 'utf-8', pretty_print = True, method = 'html').decode('utf-8')
|
|
|
|
return res
|
|
|
|
# write2html(res)
|
|
|
|
|
|
|
|
return source
|
|
|
|
except BaseException as e:
|
|
|
|
except BaseException as e:
|
|
|
|
print(e)
|
|
|
|
print(e)
|
|
|
|
print("sth wrong in your downloader.useRequests. Exiting...")
|
|
|
|
print("sth wrong in your downloader.useRequests. Exiting...")
|
|
|
|
exit()
|
|
|
|
exit()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
if __name__ == "__main__":
|
|
|
|
# jdurl = r"https://item.jd.com/10036840192083.html"
|
|
|
|
jdurl = r"https://item.jd.com/10036840192083.html"
|
|
|
|
jdurl = r"https://item.jd.com/59162092942.html"
|
|
|
|
|
|
|
|
url = r"https://www.vveby.com/search?keyword=" + jdurl
|
|
|
|
url = r"https://www.vveby.com/search?keyword=" + jdurl
|
|
|
|
print(url)
|
|
|
|
print(url)
|
|
|
|
with open('newhistoryPrice.html', 'w+', encoding = 'utf-8') as fd:
|
|
|
|
useRequests(url)
|
|
|
|
fd.write(useRequests(url).text)
|
|
|
|
|
|
|
|
print('done')
|
|
|
|
print('done')
|