You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

68 lines
2.1 KiB

3 years ago
# -*- coding: utf-8 -*-
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from lxml import etree
import random
from zmq import proxy
import settings
import requests
import json
3 years ago
headers = {
#'User-Agent': random.choice(settings.USER_AGENT)
'User-Agent': settings.USER_AGENT[1]
3 years ago
}
def getsource(url):
initChrome = Options()
3 years ago
initChrome.add_argument('--no-sandbox')
initChrome.add_argument('--headless')
initChrome.add_argument('--disable-gpu')
initChrome.add_argument("disable-cache")
initChrome.add_argument('disable-infobars')
initChrome.add_argument('log-level=3') # INFO = 0 WARNING = 1 LOG_ERROR = 2 LOG_FATAL = 3 default is 0
initChrome.add_experimental_option("excludeSwitches",['enable-automation','enable-logging'])
3 years ago
driver = webdriver.Chrome(chrome_options = initChrome, executable_path = './chromedriver.exe')
3 years ago
driver.implicitly_wait(10)
driver.get(url)
response = etree.HTML(driver.page_source)
response = etree.tostring(response, encoding = "utf-8", pretty_print = True, method = "html")
response = response.decode('utf-8')
driver.close()
3 years ago
return response
def useRequests(url):
3 years ago
def write2html(res):
filename = 'historyPrice.html'
with open(filename, mode = 'w+', encoding='utf-8') as fd:
fd.write(res)
3 years ago
try:
session = requests.Session()
res = session.get(url, headers = headers)
3 years ago
res.raise_for_status() # 判断是不是200
# print(res.request.headers)
3 years ago
res.encoding = res.apparent_encoding
res = etree.HTML(res.text)
source = etree.tostring(res, encoding = 'utf-8', pretty_print = True, method = 'html').decode('utf-8')
# write2html(res)
return source
3 years ago
except BaseException as e:
print(e)
print("sth wrong in your downloader.useRequests. Exiting...")
exit()
3 years ago
if __name__ == "__main__":
jdurl = r"https://item.jd.com/10036840192083.html"
3 years ago
url = r"https://www.vveby.com/search?keyword=" + jdurl
print(url)
useRequests(url)
print('done')