You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.
"""
从B站获取视频的弹幕并保存到文件中
"""
import re
import json
import requests
from common_headers import HEADERS # 假设你有一个公共的header文件
def load_bv_numbers ( file_path ) :
""" 从文件中读取BV号 """
with open ( file_path , ' r ' , encoding = ' utf-8 ' ) as f :
return [ line . strip ( ) for line in f . readlines ( ) ]
def fetch_video_cids ( bv_list ) :
""" 获取视频的CID号 """
cid_list = [ ]
for bv in bv_list :
url = f ' https://api.bilibili.com/x/player/pagelist?bvid= { bv } &jsonp=jsonp '
response = requests . get ( url = url , headers = HEADERS , timeout = 10 )
cid = json . loads ( response . text ) [ ' data ' ] [ 0 ] [ ' cid ' ]
cid_list . append ( cid )
return cid_list
def fetch_and_save_danmu ( cid_list , danmu_file ) :
""" 爬取视频弹幕并保存到文件 """
for cid in cid_list :
url = f ' https://api.bilibili.com/x/v1/dm/list.so?oid= { cid } '
response = requests . get ( url = url , headers = HEADERS , timeout = 10 )
response . encoding = response . apparent_encoding
data_list = re . findall ( ' <d p= " .*? " >(.*?)</d> ' , response . text )
with open ( danmu_file , mode = ' a ' , encoding = ' utf-8 ' ) as f :
for danmu in data_list :
f . write ( danmu + ' \n ' )
def main ( ) :
""" 主函数: 从BV号中获取CID并爬取弹幕 """
bv_file_path = ' E:/Crawler/output/bv_numbers.txt '
danmu_output_file = ' E:/Crawler/output/danmu.txt '
bv_numbers = load_bv_numbers ( bv_file_path )
cids = fetch_video_cids ( bv_numbers )
fetch_and_save_danmu ( cids , danmu_output_file )
print ( " 弹幕数据爬取完成 " )
if __name__ == ' __main__ ' :
main ( )