From 817e0718912fcb11e46129f12c17cbb2d7744918 Mon Sep 17 00:00:00 2001 From: ph275ue6c <2370007971@qq.com> Date: Mon, 17 Jun 2024 10:58:45 +0800 Subject: [PATCH] ADD file via upload --- 豆瓣电影排行榜.py | 65 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 豆瓣电影排行榜.py diff --git a/豆瓣电影排行榜.py b/豆瓣电影排行榜.py new file mode 100644 index 0000000..249c148 --- /dev/null +++ b/豆瓣电影排行榜.py @@ -0,0 +1,65 @@ +import requests +from bs4 import BeautifulSoup +import csv + +# 请求URL +url = 'https://movie.douban.com/top250' +# 请求头部 +headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36' +} + +# 解析页面函数 +def parse_html(html): + soup = BeautifulSoup(html, 'lxml') + movie_list = soup.find('ol', class_='grid_view').find_all('li') + for movie in movie_list: + title = movie.find('div', class_='hd').find('span', class_='title').get_text() + rating_num = movie.find('div', class_='star').find('span', class_='rating_num').get_text() + comment_num = movie.find('div', class_='star').find_all('span')[-1].get_text() + writer.writerow([title, rating_num, comment_num]) + +# 保存数据函数 +def save_data(): + f = open('douban_movie_top250.csv', 'a', newline='', encoding='utf-8') + global writer + writer = csv.writer(f) + writer.writerow(['电影名称', '评分', '评价人数']) + for i in range(10): + url = 'https://movie.douban.com/top250?start=' + str(i * 25) + '&filter=' + response = requests.get(url, headers=headers) + parse_html(response.text) + f.close() + +if __name__ == '__main__': + save_data() + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +