""" 从B站获取视频的弹幕并保存到文件中 """ import re import json import requests from common_headers import HEADERS from concurrent.futures import ThreadPoolExecutor def load_bv_numbers(file_path): """从文件中读取BV号""" with open(file_path, 'r', encoding='utf-8') as f: return [line.strip() for line in f.readlines()] def fetch_video_cids(bv_list): """获取视频的CID号""" cid_list = [] with ThreadPoolExecutor() as executor: results = list(executor.map(lambda bv: requests.get(f'https://api.bilibili.com/x/player/pagelist?bvid={bv}&jsonp=jsonp', headers=HEADERS, timeout=10), bv_list)) for response in results: cid = json.loads(response.text)['data'][0]['cid'] cid_list.append(cid) return cid_list def fetch_and_save_danmu(cid_list, danmu_file): """爬取视频弹幕并保存到文件""" with ThreadPoolExecutor() as executor: results = list(executor.map(lambda cid: requests.get(f'https://api.bilibili.com/x/v1/dm/list.so?oid={cid}', headers=HEADERS, timeout=10), cid_list)) for response in results: response.encoding = response.apparent_encoding data_list = re.findall('(.*?)', response.text) with open(danmu_file, mode='a', encoding='utf-8') as f: for danmu in data_list: f.write(danmu + '\n') def main(): """主函数:从BV号中获取CID并爬取弹幕""" bv_file_path = '/output/bv_numbers.txt' danmu_output_file = '/output/danmu.txt' bv_numbers = load_bv_numbers(bv_file_path) cids = fetch_video_cids(bv_numbers) fetch_and_save_danmu(cids, danmu_output_file) print("弹幕数据爬取完成") if __name__ == '__main__': main()