diff --git a/33lxj.py b/33lxj.py new file mode 100644 index 0000000..f43a1ad --- /dev/null +++ b/33lxj.py @@ -0,0 +1,42 @@ +import requests +from bs4 import BeautifulSoup +import csv + +url='https://movie.douban.com/subject/1306809/comments?status=P' +headers = { + 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' + } +response = requests.get(url, headers=headers) +source=response.text +#print(source) +soup = BeautifulSoup(source, 'lxml') +comments = soup.find_all('div', class_='comment-item') + +run=[] + +for comment in comments: + title=comment.select(".comment-info>a") + for t1 in title: + print(t1.get_text()) + t2=t1.get_text(strip=True) + run.append([t2]) + + time=comment.select(".comment-time") + for m1 in time: + print(m1.get_text()) + m2=m1.get_text(strip=True) + run.append([m2]) + + content = comment.select('p>span') + for p1 in content: + print(p1.get_text()) + p2=p1.get_text(strip=True) + run.append([p2]) + + with open('生化危机.csv',mode='w',encoding='utf-8',newline='') as f: + csv_writer = csv.writer(f) + for c1 in run: + csv_writer.writerow(c1) +print("下载完成") + +