|
|
@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
import json
|
|
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
import requests
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# path = Path('cid.json')
|
|
|
|
|
|
|
|
# contents = path.read_text()
|
|
|
|
|
|
|
|
# video_ids = json.loads(contents)
|
|
|
|
|
|
|
|
# print(len(video_ids))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_danmaku(cids):
|
|
|
|
|
|
|
|
headers = {
|
|
|
|
|
|
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36'
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
#视频的弹幕地址
|
|
|
|
|
|
|
|
danmaku_url = "https://api.bilibili.com/x/v1/dm/list.so?oid={}"
|
|
|
|
|
|
|
|
#用来存放所有弹幕
|
|
|
|
|
|
|
|
all_danmaku = []
|
|
|
|
|
|
|
|
for cid in cids:
|
|
|
|
|
|
|
|
#对每条视频的弹幕地址进行请求,并将内容进行解码
|
|
|
|
|
|
|
|
response = requests.get(danmaku_url.format(cid), headers=headers)
|
|
|
|
|
|
|
|
xml_data = response.content.decode('utf-8')
|
|
|
|
|
|
|
|
#用这则表达式提取每条弹幕,并存放于all_danmaku数组中
|
|
|
|
|
|
|
|
danmaku_list = re.findall('<d p=".*?">(.*?)</d>', xml_data)
|
|
|
|
|
|
|
|
all_danmaku.extend(danmaku_list)
|
|
|
|
|
|
|
|
return all_danmaku
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
# cids=[1645189127, 1625903793]
|
|
|
|
|
|
|
|
# alldanmaku = get_danmaku(cids)
|
|
|
|
|
|
|
|
# print(alldanmaku)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# with open('paris_olympics_danmaku.txt', 'w', encoding='utf-8') as f:
|
|
|
|
|
|
|
|
# for danmaku in all_danmaku:
|
|
|
|
|
|
|
|
# f.write(danmaku + '\n')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#https://api.bilibili.com/x/v1/dm/list.so?oid=1628991276
|