# -*- coding: utf-8 -*- from selenium import webdriver from selenium.webdriver.chrome.options import Options from lxml import etree import random import settings import requests import json headers = { 'User-Agent': random.choice(settings.USER_AGENT) } def getsource(url): initChrome = Options() initChrome.add_argument('--no-sandbox') initChrome.add_argument('--headless') initChrome.add_argument('--disable-gpu') initChrome.add_argument("disable-cache") initChrome.add_argument('disable-infobars') initChrome.add_argument('log-level=3') # INFO = 0 WARNING = 1 LOG_ERROR = 2 LOG_FATAL = 3 default is 0 initChrome.add_experimental_option("excludeSwitches",['enable-automation','enable-logging']) driver = webdriver.Chrome(chrome_options = initChrome, executable_path = './chromedriver.exe') driver.implicitly_wait(10) driver.get(url) response = etree.HTML(driver.page_source) response = etree.tostring(response, encoding = "utf-8", pretty_print = True, method = "html") response = response.decode('utf-8') driver.close() return response def useRequests(url): def saveCookies(response): myCookies = {} for key, value in response.cookies.items(): myCookies[key] = value jsonCookies = json.dumps(myCookies) with open(settings.COOKIES_FILENAME, mode = 'a', encoding = 'utf-8') as fd: fd.write(jsonCookies) print("Cookies saved!") try: res = requests.get(url, headers = headers) res.raise_for_status() # 判断是不是200 res.encoding = res.apparent_encoding print(res.cookies) saveCookies(res) return res except BaseException as e: print(e) print("sth wrong in your downloader.useRequests. Exiting...") exit() if __name__ == "__main__": # jdurl = r"https://item.jd.com/10036840192083.html" jdurl = r"https://item.jd.com/59162092942.html" url = r"https://www.vveby.com/search?keyword=" + jdurl print(url) with open('newhistoryPrice.html', 'w+', encoding = 'utf-8') as fd: fd.write(useRequests(url).text) print('done')