parent
817e071891
commit
e609e4a9c2
@ -0,0 +1,44 @@
|
|||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import csv
|
||||||
|
|
||||||
|
# 请求URL
|
||||||
|
base_url = 'https://movie.douban.com/top250?start='
|
||||||
|
# 请求头部
|
||||||
|
headers = {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# 解析页面函数
|
||||||
|
def parse_html(html):
|
||||||
|
soup = BeautifulSoup(html, 'lxml')
|
||||||
|
movie_list = soup.find('ol', class_='grid_view').find_all('li')
|
||||||
|
movies = []
|
||||||
|
for movie in movie_list:
|
||||||
|
title = movie.find('div', class_='hd').find('span', class_='title').get_text()
|
||||||
|
rating_num = movie.find('div', class_='star').find('span', class_='rating_num').get_text()
|
||||||
|
comment_num = movie.find('div', class_='star').find_all('span')[-1].get_text()
|
||||||
|
movies.append([title, rating_num, comment_num])
|
||||||
|
return movies
|
||||||
|
|
||||||
|
|
||||||
|
# 保存数据函数
|
||||||
|
def save_data():
|
||||||
|
with open('douban_movie_top250.csv', 'a', newline='', encoding='utf-8') as f:
|
||||||
|
writer = csv.writer(f)
|
||||||
|
writer.writerow(['电影名称', '评分', '评价人数'])
|
||||||
|
for i in range(10):
|
||||||
|
url = base_url + str(i * 25)
|
||||||
|
try:
|
||||||
|
response = requests.get(url, headers=headers)
|
||||||
|
response.raise_for_status() # 如果请求失败,这里会抛出异常
|
||||||
|
movies = parse_html(response.text)
|
||||||
|
for movie in movies:
|
||||||
|
writer.writerow(movie)
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
print(f"请求失败: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
save_data()
|
Loading…
Reference in new issue