You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

115 lines
3.8 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

from selenium import webdriver
import time
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import json
import csv
from selenium.webdriver.common.by import By
from time import sleep
from multiprocessing import Process
driver = webdriver.Chrome()
#先手动登录让程序获取到cookie保存下来
def getcookie():
url = 'https://passport.jd.com/new/login.aspx?/'
#首先直接访问登录的页面 passport.jd.com
driver.get(url)
time.sleep(20)
#扫码登录
#登录之后的页面会跳转到这里让浏览器等待直到url完全匹配
url='https://www.jd.com/'
WebDriverWait(driver,20).until(EC.url_to_be(url))
#登录之后停2秒
time.sleep(2)
#获取到的cookies是列表
cookieList = driver.get_cookies()
#转成字符串
cookieStr = json.dumps(cookieList)
# print(cookieStr)
with open('Jdcookie.txt', 'w') as f:
f.write(cookieStr)
print('cookie已写入')
print(driver.current_url)
driver.close()
if __name__ == '__main__':
# getcookie()#首次登录时去除注释
driver.get('https://www.jd.com/')
with open('Jdcookie.txt',mode='r',encoding='utf-8') as f:
cookie = f.read()
#读取到的是字符串类型loads之后就变成了python中的字典类型
cookie = json.loads(cookie)
#先把所有的cookie全部删掉
driver.delete_all_cookies()
for item in cookie:
print(type(item))
print(item)
driver.add_cookie(item)
#是一个列表内套字典的形式
driver.refresh()
driver.find_element(By.ID,'key').send_keys("口红") #通过id找到搜索框输入内容
driver.find_element(By.CLASS_NAME, 'button').click() #通过类名找到button提交并点击
driver.implicitly_wait(10)
with open('JD.csv',mode='w',encoding='UTF-8',newline='') as file:
csv.writer(file).writerow(['商品', '价格','店铺','评论数'])
for page in range(0,50):
sleep(10)
# 下滑页面
# 滑动到最底部
driver.execute_script('window.scrollTo(0,1000)')
sleep(1)
driver.execute_script('window.scrollTo(1000,2000)')
sleep(1)
driver.execute_script('window.scrollTo(2000,3000)')
sleep(1)
driver.execute_script('window.scrollTo(3000,4000)')
sleep(1)
driver.execute_script('window.scrollTo(4000,5000)')
sleep(1)
driver.execute_script('window.scrollTo(5000,6000)')
sleep(1)
driver.execute_script('window.scrollTo(6000,7000)')
sleep(10)
goods = driver.find_elements(By.CLASS_NAME,'gl-i-wrap')
for good in goods:
title = good.find_element(By.CSS_SELECTOR,'.p-name em').text.strip()
price = good.find_element(By.CSS_SELECTOR,'.p-price strong').text.strip()
shop = good.find_element(By.CSS_SELECTOR,'.p-shop span a').text.strip()
comment = good.find_element(By.CSS_SELECTOR,'.p-commit strong a').text.strip()
print('title: ' + title)
print('price: ' + price)
print('shop: ' + shop)
print('comment: ' + comment)
# 用a+模式创建csv文件并写入
f = open('JD.csv', 'a+', encoding='utf-8')
# 基于文件对象构建csv写入
csv_a = csv.writer(f)
# 将数据写入
csv_a.writerow([title, price, shop,comment])
# 关闭文件
f.close()
driver.find_element(By.CLASS_NAME,'pn-next').click()
print(f'{page+1}页爬取完毕!')
print('下一页加载中……')
# time.sleep(5)
driver.close()