from selenium import webdriver import time from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support import expected_conditions as EC import json import csv from selenium.webdriver.common.by import By from time import sleep from multiprocessing import Process driver = webdriver.Chrome() #先手动登录,让程序获取到cookie,保存下来 def getcookie(): url = 'https://passport.jd.com/new/login.aspx?/' #首先直接访问登录的页面 passport.jd.com driver.get(url) time.sleep(20) #扫码登录 #登录之后的页面会跳转到这里,让浏览器等待,直到url完全匹配 url='https://www.jd.com/' WebDriverWait(driver,20).until(EC.url_to_be(url)) #登录之后停2秒 time.sleep(2) #获取到的cookies是列表 cookieList = driver.get_cookies() #转成字符串 cookieStr = json.dumps(cookieList) # print(cookieStr) with open('Jdcookie.txt', 'w') as f: f.write(cookieStr) print('cookie已写入') print(driver.current_url) driver.close() if __name__ == '__main__': # getcookie()#首次登录时去除注释 driver.get('https://www.jd.com/') with open('Jdcookie.txt',mode='r',encoding='utf-8') as f: cookie = f.read() #读取到的是字符串类型,loads之后就变成了python中的字典类型 cookie = json.loads(cookie) #先把所有的cookie全部删掉 driver.delete_all_cookies() for item in cookie: print(type(item)) print(item) driver.add_cookie(item) #是一个列表内套字典的形式 driver.refresh() driver.find_element(By.ID,'key').send_keys("口红") #通过id找到搜索框,输入内容 driver.find_element(By.CLASS_NAME, 'button').click() #通过类名找到button提交,并点击 driver.implicitly_wait(10) with open('JD.csv',mode='w',encoding='UTF-8',newline='') as file: csv.writer(file).writerow(['商品', '价格','店铺','评论数']) for page in range(0,50): sleep(10) # 下滑页面 # 滑动到最底部 driver.execute_script('window.scrollTo(0,1000)') sleep(1) driver.execute_script('window.scrollTo(1000,2000)') sleep(1) driver.execute_script('window.scrollTo(2000,3000)') sleep(1) driver.execute_script('window.scrollTo(3000,4000)') sleep(1) driver.execute_script('window.scrollTo(4000,5000)') sleep(1) driver.execute_script('window.scrollTo(5000,6000)') sleep(1) driver.execute_script('window.scrollTo(6000,7000)') sleep(10) goods = driver.find_elements(By.CLASS_NAME,'gl-i-wrap') for good in goods: title = good.find_element(By.CSS_SELECTOR,'.p-name em').text.strip() price = good.find_element(By.CSS_SELECTOR,'.p-price strong').text.strip() shop = good.find_element(By.CSS_SELECTOR,'.p-shop span a').text.strip() comment = good.find_element(By.CSS_SELECTOR,'.p-commit strong a').text.strip() print('title: ' + title) print('price: ' + price) print('shop: ' + shop) print('comment: ' + comment) # 用a+模式创建csv文件并写入 f = open('JD.csv', 'a+', encoding='utf-8') # 基于文件对象构建csv写入 csv_a = csv.writer(f) # 将数据写入 csv_a.writerow([title, price, shop,comment]) # 关闭文件 f.close() driver.find_element(By.CLASS_NAME,'pn-next').click() print(f'第{page+1}页爬取完毕!') print('下一页加载中……') # time.sleep(5) driver.close()