From 69e572cee836d1d78141ead18de00f8a38cbfd15 Mon Sep 17 00:00:00 2001 From: pkyftzsbu <369765584@qq.com> Date: Wed, 24 Apr 2024 21:24:58 +0800 Subject: [PATCH] ADD file via upload --- new.py | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 new.py diff --git a/new.py b/new.py new file mode 100644 index 0000000..e3227b6 --- /dev/null +++ b/new.py @@ -0,0 +1,82 @@ +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.chrome.options import Options +import json +import csv +import threading +from time import sleep + +def get_cookie(): + options = Options() + driver = webdriver.Chrome(options=options) + url = 'https://passport.jd.com/new/login.aspx?/' + driver.get(url) + print("请手动登录京东账号...") + input("登录完成后,请按回车键继续...") + + cookie_list = driver.get_cookies() + cookie_str = json.dumps(cookie_list) + with open('Jdcookie.txt', 'w') as f: + f.write(cookie_str) + print('Cookie已保存到Jdcookie.txt') + driver.quit() + +def crawl_page(start_page, end_page): + options = Options() + driver = webdriver.Chrome(options=options) + with open('Jdcookie.txt', mode='r', encoding='utf-8') as f: + cookie = f.read() + cookie = json.loads(cookie) + driver.get('https://www.jd.com/') + driver.delete_all_cookies() + for item in cookie: + driver.add_cookie(item) + driver.refresh() + + for page in range(start_page, end_page + 1): + url = f"https://search.jd.com/Search?keyword=%E5%8F%A3%E7%BA%A2&qrst=1&wq=%E5%8F%A3%E7%BA%A2&stock=1&pvid=a2121da231fd4f5e90a6541711da68a0&isList=0&page={page*2-1}" + driver.get(url) + sleep(5) + goods = driver.find_elements(By.CLASS_NAME, 'gl-i-wrap') + for good in goods: + try: + title = good.find_element(By.CSS_SELECTOR, '.p-name em').text.strip() + price = good.find_element(By.CSS_SELECTOR, '.p-price strong').text.strip() + shop = good.find_element(By.CSS_SELECTOR, '.p-shop span a').text.strip() + comment = good.find_element(By.CSS_SELECTOR, '.p-commit strong a').text.strip() + print('title: ' + title) + print('price: ' + price) + print('shop: ' + shop) + print('comment: ' + comment) + with open('JD.csv', mode='a+', encoding='UTF-8', newline='') as file: + csv.writer(file).writerow([title, price, shop, comment]) + except Exception as e: + print(f"Error: {e}") + print(f'第{page}页爬取完毕!') + + driver.quit() + +def main(): + # 先获取cookie + # get_cookie() + + # 创建两个线程,每个线程都使用独立的WebDriver实例 + t1 = threading.Thread(target=crawl_page, args=(1, 25)) + t2 = threading.Thread(target=crawl_page, args=(26, 50)) + t3 = threading.Thread(target=crawl_page, args=(51, 75)) + t4 = threading.Thread(target=crawl_page, args=(76, 100)) + + # 启动线程 + t1.start() + t2.start() + t3.start() + t4.start() + + # 等待线程结束 + t1.join() + t2.join() + t3.join() + t4.join() + +if __name__ == '__main__': + main()