You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
102201518/获取视频的cid,并进行提取.py

20 lines
897 B

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import requests
import json
import re
#使用get_barrage函数来获取视频弹幕
def get_barrage(bvid,headers):
url = "https://api.bilibili.com/x/web-interface/view?bvid=" + bvid #B站API获取CID
req = requests.get(url, headers=headers)
res = json.loads(req.text)
cid = res['data']['cid']#提取弹幕cid。
barrage_url = "https://api.bilibili.com/x/v2/dm/web/seg.so?type=1&oid=" + str(cid) + "&segment_index=1"#获取弹幕XML文件
barrage_req = requests.get(barrage_url, headers=headers) #发送GET请求
barrage_req.encoding = 'utf-8'
barrage_list = re.findall('<d p=".*?">(.*?)</d>',barrage_req.text) #解析数据
for index in barrage_list:
with open('barrage.txt', mode='a', encoding='utf-8') as f:#输出为文本
f.write(index)
f.write("\n")
print(index)
return barrage_list