diff --git a/爬虫.py b/爬虫.py deleted file mode 100644 index c991e07..0000000 --- a/爬虫.py +++ /dev/null @@ -1,34 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Wed Dec 4 22:19:53 2024 - -@author: LENOVO -""" - - -import re -import requests -message=[] -for i in range(0,226,25): - - url=f"https://movie.douban.com/top250?start={i}" - header={"cookie":'bid=Kfv1pbVpYhk; _pk_id.100001.4cf6=92d1dcb81583850f.1734496590.; __utmz=30149280.1734496590.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __yadk_uid=wdgkM1hFqbfrQbJwsMetkoJwq2CRPqY2; ll="118267"; __utma=30149280.1833711220.1734496590.1734496590.1734597660.2; __utmc=30149280; __utmb=30149280.2.10.1734597660; dbcl2="285464845:OOitQZaXAZ8"; ck=AARY; __utma=223695111.624436283.1734496590.1734496590.1734597905.2; __utmb=223695111.0.10.1734597905; __utmc=223695111; __utmz=223695111.1734597905.2.2.utmcsr=accounts.douban.com|utmccn=(referral)|utmcmd=referral|utmcct=/; _pk_ref.100001.4cf6=["","",1734597905,"https://accounts.douban.com/"]; _pk_ses.100001.4cf6=1; push_noty_num=0; push_doumail_num=0', - "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"} - respond=requests.get(url,headers=header) - txt=respond.text - name=re.findall('([^&]+?)',txt) - score=re.findall('(.+?)',txt) - - total=list(zip(name,score)) - message.append(total) -print(message) -for o,j in enumerate(message): - count=(o*25+1) - for index,i in enumerate(j,start=count): - f=open("phb.txt","a+",encoding="utf-8") - f.write(f"No.{index}:{i[0]}\n") - f.write(f"豆瓣评分:{i[1]}\n") - f.write("-----------------------------------\n") - f.close() -with open("phb.txt","r",encoding="utf-8") as f: - print(f.read()) \ No newline at end of file