From 150b57ad31c6c375fdf4f9688241b0d15c332091 Mon Sep 17 00:00:00 2001 From: pux36pf8t <3187933100@qq.com> Date: Mon, 9 Sep 2024 21:52:26 +0800 Subject: [PATCH] Extra tasks --- Scraping Bilibili comment section comments.py | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 Scraping Bilibili comment section comments.py diff --git a/Scraping Bilibili comment section comments.py b/Scraping Bilibili comment section comments.py new file mode 100644 index 0000000..1c9ceb5 --- /dev/null +++ b/Scraping Bilibili comment section comments.py @@ -0,0 +1,31 @@ +import requests + +# 读取root.txt中的URL +with open('root.txt', 'r') as file: + urls = file.readlines() + +# 头部信息 +headers = { + 'cookie': 'buvid4=686CE350-75FA-4921-C069-8D0E582FF02993159-024082507-y91msXDi8JTSAtvVtdhJkQ%3D%3D; buvid3=313C6A34-4C14-0939-EBE8-332F809D2EF655028infoc; b_nut=1725087454; CURRENT_FNVAL=4048; _uuid=10E7EC991-7B18-9A8B-78AA-C95F55102347103610infoc; rpdid=|(JlklRl)~Y|0J\'u~kl|)~l|l; header_theme_version=CLOSE; enable_web_push=DISABLE; is-2022-channel=1; fingerprint=f90b71618c196fb8806f458403d943fb; buvid_fp_plain=undefined; bp_t_offset_1074062089=974427929414991872; buvid_fp=f90b71618c196fb8806f458403d943fb; SESSDATA=e74a05df%2C1741267229%2Ce876a%2A91CjDqLgub8fAVML6ADiSzb56IvMh3z61KnSnawN0g_c1h5emTp3cU9qrpFxgDEzzpawASVkpfc01rblFpaUxDRHViNXpJdGhweEdNY2VDdEJ0N1hvMU92SWdLcG5Dclg5dlZmV29aMWZfX2ZSWHJ5VVN3ZHRkc0ZaLU9COHdmeDR2T0tmSXlvdmt3IIEC; bili_jct=addb604342937a4322aa12322c11bc2c; DedeUserID=3546758143544046; DedeUserID__ckMd5=65316417021aa6ed; sid=7yti0jp9; bsource=search_bing; CURRENT_QUALITY=80; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjYxMjc3NjcsImlhdCI6MTcyNTg2ODUwNywicGx0IjotMX0.Cj1LdQNDygY5bgVW9GxDgJ6gHnTNWtS3p7qi2yTlrpk; bili_ticket_expires=1726127707; home_feed_column=5; browser_resolution=1769-874; b_lsid=562310C410_191D6AB826C; bp_t_offset_3546758143544046=975156832609763328', + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0', + 'origin': 'https://www.bilibili.com' +} + +# 遍历每个URL +count=1; +for url in urls: + url = url.strip() # 移除可能的换行符 + response = requests.get(url, headers=headers) + + # 确保响应成功 + if response.status_code == 200: + content_list = [i['content']['message'] for i in response.json().get('data', {}).get('replies', [])] + # 将评论写入文件 + with open('评论.txt', 'a', encoding='utf-8') as f: + for content in content_list: + f.write(content) + f.write('\n') + print('爬取成功', count, "次", sep='') + count += 1; + else: + print(f"Failed to fetch data from {url}")