You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

39 lines
2.4 KiB

import requests
from bs4 import BeautifulSoup
# 读取cid.txt中的所有oid
with open('cid.txt', 'r') as f:
oids = f.read().splitlines()
# 请求头
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36 Edg/127.0.0.0',
'Cookie': "buvid3=A208D8C8-62E0-8A2C-49DE-97075EC9AFF347900infoc; b_nut=1709605247; CURRENT_FNVAL=4048; _uuid=6B1712B5-E56D-39CF-2F4F-1109679B2275450779infoc; buvid4=56D339AE-65A3-9EB3-637F-1C6844E511FF48608-024030502-XZT8dGJQQGvpxEW%2FgPNaOA%3D%3D; rpdid=|(u))kkYu|Yk0J'u~|mY|lklu; DedeUserID=506426443; DedeUserID__ckMd5=3fc44628b47de972; enable_web_push=DISABLE; FEED_LIVE_VERSION=V8; header_theme_version=CLOSE; fingerprint=0eaa6f4de3c8e4fbb7ac2f6b2dbeeaf2; buvid_fp_plain=undefined; buvid_fp=0eaa6f4de3c8e4fbb7ac2f6b2dbeeaf2; LIVE_BUVID=AUTO2817161348895068; CURRENT_QUALITY=80; SESSDATA=c20b1ba2%2C1741679244%2C9fa32%2A91CjDmAy9_jeDRbsRQaSuk5MkfKaRAaVej6SEgP7hWsXGVhV2wJNMxhHCTMRpuUFBL-48SVlV2TDFTZk5Md2NCR2Fxc3doVDFtTGIycU1pd0dGcGt0UC1kLUZoVURHQTJmNGZVb1IxVlk5Zk4yRTl5cFdOMUpJN1JFNy1UZXpMWmQ5bzZjcWdQRzRBIIEC; bili_jct=cc779de17c4fba7ff039ca3e8573ad3d; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjYzODY0NzIsImlhdCI6MTcyNjEyNzIxMiwicGx0IjotMX0.X7nq8XaGsaZS0lUB42-EyN2Q4hPwUkSc4a9pGNkrqZY; bili_ticket_expires=1726386412; PVID=1; home_feed_column=5; browser_resolution=1699-943; sid=4mrblyq6; bp_t_offset_506426443=976262979962011648; b_lsid=691F191E_191E6B279BF"
}
# 打开文件以写入所有弹幕
with open('弹幕.txt', mode='w', encoding='utf-8') as f:
# 遍历每个oid
for oid in oids:
oid = oid.strip() # 去除每个oid的前后空格
url = f'https://comment.bilibili.com/{oid}.xml'
response = requests.get(url=url, headers=headers)
response.encoding = 'utf-8'
# 状态测试
print(f"请求 URL: {url}")
print(f"响应状态码: {response.status_code}")
print(f"响应内容: {response.text[:500]}") # 打印前500个字符
# 解析 XML 数据
soup = BeautifulSoup(response.text, 'lxml')
# 查找所有弹幕内容
d_tags = soup.find_all('d')
if not d_tags:
print("没有找到 <d> 标签")
# 写入弹幕到文件
for d_tag in d_tags:
content = d_tag.get_text()
f.write(content)
f.write('\n')
print(content)