You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

49 lines
1.6 KiB

"""
从B站获取视频的弹幕并保存到文件中
"""
import re
import json
import requests
from common_headers import HEADERS # 假设你有一个公共的header文件
def load_bv_numbers(file_path):
"""从文件中读取BV号"""
with open(file_path, 'r', encoding='utf-8') as f:
return [line.strip() for line in f.readlines()]
def fetch_video_cids(bv_list):
"""获取视频的CID号"""
cid_list = []
for bv in bv_list:
url = f'https://api.bilibili.com/x/player/pagelist?bvid={bv}&jsonp=jsonp'
response = requests.get(url=url, headers=HEADERS, timeout=10)
cid = json.loads(response.text)['data'][0]['cid']
cid_list.append(cid)
return cid_list
def fetch_and_save_danmu(cid_list, danmu_file):
"""爬取视频弹幕并保存到文件"""
for cid in cid_list:
url = f'https://api.bilibili.com/x/v1/dm/list.so?oid={cid}'
response = requests.get(url=url, headers=HEADERS, timeout=10)
response.encoding = response.apparent_encoding
data_list = re.findall('<d p=".*?">(.*?)</d>', response.text)
with open(danmu_file, mode='a', encoding='utf-8') as f:
for danmu in data_list:
f.write(danmu + '\n')
def main():
"""主函数从BV号中获取CID并爬取弹幕"""
bv_file_path = 'E:/Crawler/output/bv_numbers.txt'
danmu_output_file = 'E:/Crawler/output/danmu.txt'
bv_numbers = load_bv_numbers(bv_file_path)
cids = fetch_video_cids(bv_numbers)
fetch_and_save_danmu(cids, danmu_output_file)
print("弹幕数据爬取完成")
if __name__ == '__main__':
main()