Delete 'new.py'

main
pkyftzsbu 10 months ago
parent 69e572cee8
commit eade3df466

@ -1,82 +0,0 @@
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import json
import csv
import threading
from time import sleep
def get_cookie():
options = Options()
driver = webdriver.Chrome(options=options)
url = 'https://passport.jd.com/new/login.aspx?/'
driver.get(url)
print("请手动登录京东账号...")
input("登录完成后,请按回车键继续...")
cookie_list = driver.get_cookies()
cookie_str = json.dumps(cookie_list)
with open('Jdcookie.txt', 'w') as f:
f.write(cookie_str)
print('Cookie已保存到Jdcookie.txt')
driver.quit()
def crawl_page(start_page, end_page):
options = Options()
driver = webdriver.Chrome(options=options)
with open('Jdcookie.txt', mode='r', encoding='utf-8') as f:
cookie = f.read()
cookie = json.loads(cookie)
driver.get('https://www.jd.com/')
driver.delete_all_cookies()
for item in cookie:
driver.add_cookie(item)
driver.refresh()
for page in range(start_page, end_page + 1):
url = f"https://search.jd.com/Search?keyword=%E5%8F%A3%E7%BA%A2&qrst=1&wq=%E5%8F%A3%E7%BA%A2&stock=1&pvid=a2121da231fd4f5e90a6541711da68a0&isList=0&page={page*2-1}"
driver.get(url)
sleep(5)
goods = driver.find_elements(By.CLASS_NAME, 'gl-i-wrap')
for good in goods:
try:
title = good.find_element(By.CSS_SELECTOR, '.p-name em').text.strip()
price = good.find_element(By.CSS_SELECTOR, '.p-price strong').text.strip()
shop = good.find_element(By.CSS_SELECTOR, '.p-shop span a').text.strip()
comment = good.find_element(By.CSS_SELECTOR, '.p-commit strong a').text.strip()
print('title: ' + title)
print('price: ' + price)
print('shop: ' + shop)
print('comment: ' + comment)
with open('JD.csv', mode='a+', encoding='UTF-8', newline='') as file:
csv.writer(file).writerow([title, price, shop, comment])
except Exception as e:
print(f"Error: {e}")
print(f'{page}页爬取完毕!')
driver.quit()
def main():
# 先获取cookie
# get_cookie()
# 创建两个线程每个线程都使用独立的WebDriver实例
t1 = threading.Thread(target=crawl_page, args=(1, 25))
t2 = threading.Thread(target=crawl_page, args=(26, 50))
t3 = threading.Thread(target=crawl_page, args=(51, 75))
t4 = threading.Thread(target=crawl_page, args=(76, 100))
# 启动线程
t1.start()
t2.start()
t3.start()
t4.start()
# 等待线程结束
t1.join()
t2.join()
t3.join()
t4.join()
if __name__ == '__main__':
main()
Loading…
Cancel
Save