diff --git a/jingdong.py b/jingdong.py new file mode 100644 index 0000000..6237981 --- /dev/null +++ b/jingdong.py @@ -0,0 +1,114 @@ +from selenium import webdriver +import time +from selenium.webdriver.support.wait import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC +import json +import csv +from selenium.webdriver.common.by import By +from time import sleep +from multiprocessing import Process + + +driver = webdriver.Chrome() + +#先手动登录,让程序获取到cookie,保存下来 +def getcookie(): + url = 'https://passport.jd.com/new/login.aspx?/' + #首先直接访问登录的页面 passport.jd.com + driver.get(url) + time.sleep(20) + #扫码登录 + #登录之后的页面会跳转到这里,让浏览器等待,直到url完全匹配 + url='https://www.jd.com/' + WebDriverWait(driver,20).until(EC.url_to_be(url)) + #登录之后停2秒 + time.sleep(2) + #获取到的cookies是列表 + cookieList = driver.get_cookies() + #转成字符串 + cookieStr = json.dumps(cookieList) + + # print(cookieStr) + with open('Jdcookie.txt', 'w') as f: + f.write(cookieStr) + + print('cookie已写入') + print(driver.current_url) + driver.close() + + +if __name__ == '__main__': + + # getcookie()#首次登录时去除注释 + + driver.get('https://www.jd.com/') + with open('Jdcookie.txt',mode='r',encoding='utf-8') as f: + cookie = f.read() + + #读取到的是字符串类型,loads之后就变成了python中的字典类型 + cookie = json.loads(cookie) + + #先把所有的cookie全部删掉 + driver.delete_all_cookies() + for item in cookie: + print(type(item)) + print(item) + driver.add_cookie(item) + #是一个列表内套字典的形式 + + driver.refresh() + + driver.find_element(By.ID,'key').send_keys("口红") #通过id找到搜索框,输入内容 + driver.find_element(By.CLASS_NAME, 'button').click() #通过类名找到button提交,并点击 + driver.implicitly_wait(10) + with open('JD.csv',mode='w',encoding='UTF-8',newline='') as file: + csv.writer(file).writerow(['商品', '价格','店铺','评论数']) + for page in range(0,50): + sleep(10) + # 下滑页面 + # 滑动到最底部 + driver.execute_script('window.scrollTo(0,1000)') + sleep(1) + driver.execute_script('window.scrollTo(1000,2000)') + sleep(1) + driver.execute_script('window.scrollTo(2000,3000)') + sleep(1) + driver.execute_script('window.scrollTo(3000,4000)') + sleep(1) + driver.execute_script('window.scrollTo(4000,5000)') + sleep(1) + driver.execute_script('window.scrollTo(5000,6000)') + sleep(1) + driver.execute_script('window.scrollTo(6000,7000)') + + + sleep(10) + goods = driver.find_elements(By.CLASS_NAME,'gl-i-wrap') + for good in goods: + title = good.find_element(By.CSS_SELECTOR,'.p-name em').text.strip() + price = good.find_element(By.CSS_SELECTOR,'.p-price strong').text.strip() + shop = good.find_element(By.CSS_SELECTOR,'.p-shop span a').text.strip() + comment = good.find_element(By.CSS_SELECTOR,'.p-commit strong a').text.strip() + print('title: ' + title) + print('price: ' + price) + print('shop: ' + shop) + print('comment: ' + comment) + # 用a+模式创建csv文件并写入 + f = open('JD.csv', 'a+', encoding='utf-8') + # 基于文件对象构建csv写入 + csv_a = csv.writer(f) + # 将数据写入 + csv_a.writerow([title, price, shop,comment]) + # 关闭文件 + f.close() + driver.find_element(By.CLASS_NAME,'pn-next').click() + print(f'第{page+1}页爬取完毕!') + print('下一页加载中……') + # time.sleep(5) + + driver.close() + + + + +