You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2024-Summer-Olympics/Scraping Bilibili comment s...

32 lines
2.2 KiB

import requests
# 读取root.txt中的URL
with open('root.txt', 'r') as file:
urls = file.readlines()
# 头部信息
headers = {
'cookie': 'buvid4=686CE350-75FA-4921-C069-8D0E582FF02993159-024082507-y91msXDi8JTSAtvVtdhJkQ%3D%3D; buvid3=313C6A34-4C14-0939-EBE8-332F809D2EF655028infoc; b_nut=1725087454; CURRENT_FNVAL=4048; _uuid=10E7EC991-7B18-9A8B-78AA-C95F55102347103610infoc; rpdid=|(JlklRl)~Y|0J\'u~kl|)~l|l; header_theme_version=CLOSE; enable_web_push=DISABLE; is-2022-channel=1; fingerprint=f90b71618c196fb8806f458403d943fb; buvid_fp_plain=undefined; bp_t_offset_1074062089=974427929414991872; buvid_fp=f90b71618c196fb8806f458403d943fb; SESSDATA=e74a05df%2C1741267229%2Ce876a%2A91CjDqLgub8fAVML6ADiSzb56IvMh3z61KnSnawN0g_c1h5emTp3cU9qrpFxgDEzzpawASVkpfc01rblFpaUxDRHViNXpJdGhweEdNY2VDdEJ0N1hvMU92SWdLcG5Dclg5dlZmV29aMWZfX2ZSWHJ5VVN3ZHRkc0ZaLU9COHdmeDR2T0tmSXlvdmt3IIEC; bili_jct=addb604342937a4322aa12322c11bc2c; DedeUserID=3546758143544046; DedeUserID__ckMd5=65316417021aa6ed; sid=7yti0jp9; bsource=search_bing; CURRENT_QUALITY=80; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjYxMjc3NjcsImlhdCI6MTcyNTg2ODUwNywicGx0IjotMX0.Cj1LdQNDygY5bgVW9GxDgJ6gHnTNWtS3p7qi2yTlrpk; bili_ticket_expires=1726127707; home_feed_column=5; browser_resolution=1769-874; b_lsid=562310C410_191D6AB826C; bp_t_offset_3546758143544046=975156832609763328',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0',
'origin': 'https://www.bilibili.com'
}
# 遍历每个URL
count=1;
for url in urls:
url = url.strip() # 移除可能的换行符
response = requests.get(url, headers=headers)
# 确保响应成功
if response.status_code == 200:
content_list = [i['content']['message'] for i in response.json().get('data', {}).get('replies', [])]
# 将评论写入文件
with open('评论.txt', 'a', encoding='utf-8') as f:
for content in content_list:
f.write(content)
f.write('\n')
print('爬取成功', count, "", sep='')
count += 1;
else:
print(f"Failed to fetch data from {url}")