You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

115 lines
3.8 KiB

from selenium import webdriver
import time
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import json
import csv
from selenium.webdriver.common.by import By
from time import sleep
from multiprocessing import Process
driver = webdriver.Chrome()
#先手动登录让程序获取到cookie保存下来
def getcookie():
url = 'https://passport.jd.com/new/login.aspx?/'
#首先直接访问登录的页面 passport.jd.com
driver.get(url)
time.sleep(20)
#扫码登录
#登录之后的页面会跳转到这里让浏览器等待直到url完全匹配
url='https://www.jd.com/'
WebDriverWait(driver,20).until(EC.url_to_be(url))
#登录之后停2秒
time.sleep(2)
#获取到的cookies是列表
cookieList = driver.get_cookies()
#转成字符串
cookieStr = json.dumps(cookieList)
# print(cookieStr)
with open('Jdcookie.txt', 'w') as f:
f.write(cookieStr)
print('cookie已写入')
print(driver.current_url)
driver.close()
if __name__ == '__main__':
# getcookie()#首次登录时去除注释
driver.get('https://www.jd.com/')
with open('Jdcookie.txt',mode='r',encoding='utf-8') as f:
cookie = f.read()
#读取到的是字符串类型loads之后就变成了python中的字典类型
cookie = json.loads(cookie)
#先把所有的cookie全部删掉
driver.delete_all_cookies()
for item in cookie:
print(type(item))
print(item)
driver.add_cookie(item)
#是一个列表内套字典的形式
driver.refresh()
driver.find_element(By.ID,'key').send_keys("口红") #通过id找到搜索框输入内容
driver.find_element(By.CLASS_NAME, 'button').click() #通过类名找到button提交并点击
driver.implicitly_wait(10)
with open('JD.csv',mode='w',encoding='UTF-8',newline='') as file:
csv.writer(file).writerow(['商品', '价格','店铺','评论数'])
for page in range(0,50):
sleep(10)
# 下滑页面
# 滑动到最底部
driver.execute_script('window.scrollTo(0,1000)')
sleep(1)
driver.execute_script('window.scrollTo(1000,2000)')
sleep(1)
driver.execute_script('window.scrollTo(2000,3000)')
sleep(1)
driver.execute_script('window.scrollTo(3000,4000)')
sleep(1)
driver.execute_script('window.scrollTo(4000,5000)')
sleep(1)
driver.execute_script('window.scrollTo(5000,6000)')
sleep(1)
driver.execute_script('window.scrollTo(6000,7000)')
sleep(10)
goods = driver.find_elements(By.CLASS_NAME,'gl-i-wrap')
for good in goods:
title = good.find_element(By.CSS_SELECTOR,'.p-name em').text.strip()
price = good.find_element(By.CSS_SELECTOR,'.p-price strong').text.strip()
shop = good.find_element(By.CSS_SELECTOR,'.p-shop span a').text.strip()
comment = good.find_element(By.CSS_SELECTOR,'.p-commit strong a').text.strip()
print('title: ' + title)
print('price: ' + price)
print('shop: ' + shop)
print('comment: ' + comment)
# 用a+模式创建csv文件并写入
f = open('JD.csv', 'a+', encoding='utf-8')
# 基于文件对象构建csv写入
csv_a = csv.writer(f)
# 将数据写入
csv_a.writerow([title, price, shop,comment])
# 关闭文件
f.close()
driver.find_element(By.CLASS_NAME,'pn-next').click()
print(f'{page+1}页爬取完毕!')
print('下一页加载中……')
# time.sleep(5)
driver.close()