parent
dfa9d8e4e1
commit
652dda759a
@ -0,0 +1,42 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import csv
|
||||
|
||||
url='https://movie.douban.com/subject/1306809/comments?status=P'
|
||||
headers = {
|
||||
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
|
||||
}
|
||||
response = requests.get(url, headers=headers)
|
||||
source=response.text
|
||||
#print(source)
|
||||
soup = BeautifulSoup(source, 'lxml')
|
||||
comments = soup.find_all('div', class_='comment-item')
|
||||
|
||||
run=[]
|
||||
|
||||
for comment in comments:
|
||||
title=comment.select(".comment-info>a")
|
||||
for t1 in title:
|
||||
print(t1.get_text())
|
||||
t2=t1.get_text(strip=True)
|
||||
run.append([t2])
|
||||
|
||||
time=comment.select(".comment-time")
|
||||
for m1 in time:
|
||||
print(m1.get_text())
|
||||
m2=m1.get_text(strip=True)
|
||||
run.append([m2])
|
||||
|
||||
content = comment.select('p>span')
|
||||
for p1 in content:
|
||||
print(p1.get_text())
|
||||
p2=p1.get_text(strip=True)
|
||||
run.append([p2])
|
||||
|
||||
with open('生化危机.csv',mode='w',encoding='utf-8',newline='') as f:
|
||||
csv_writer = csv.writer(f)
|
||||
for c1 in run:
|
||||
csv_writer.writerow(c1)
|
||||
print("下载完成")
|
||||
|
||||
|
Loading…
Reference in new issue