From 2f0dfe011e070bb98e5558d06358bb547e42d2b5 Mon Sep 17 00:00:00 2001 From: p4ut7fwj5 <3346195219@qq.com> Date: Sun, 15 Sep 2024 18:47:48 +0800 Subject: [PATCH] ADD file via upload --- bullet_screen.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 bullet_screen.py diff --git a/bullet_screen.py b/bullet_screen.py new file mode 100644 index 0000000..fbda8a7 --- /dev/null +++ b/bullet_screen.py @@ -0,0 +1,48 @@ +# bullet screen为弹幕 +""" + 说明:爬取b站弹幕的模块 +""" +import re +import requests +from bs4 import BeautifulSoup +from bv_maker import get_bv, BV_NUM + + +"https://api.bilibili.com/x/v1/dm/list.so?oid=" # 获取b站弹幕api,需要提供bv号获取cid +"https://comment.bilibili.com/{cid}.xml" # 提供bv号获取cid + + +header = { + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0", #UA + "referer": "https://www.bilibili.com/" #溯源 反反爬 +} + +def get_bullet_screen(bv_list): + """ + 通过bv号获取视频cid,进一步获取弹幕内容 + :param bv_list: + :return: bullet_screen_list + """ + bullet_screen_list = [] #存放弹幕 + for bv in bv_list: + cid_url = f"https://api.bilibili.com/x/player/pagelist?bvid={bv}&jsonp=jsonp" + resp1 = requests.get(cid_url, headers=header) + # print(resp1.json()) + dict = resp1.json() + cid = dict["data"][0]["cid"] #获取字典中的cid + api_url = f"https://comment.bilibili.com/{cid}.xml" + resp2 = requests.get(api_url, headers=header) + resp2.encoding = "utf-8" #设置对应的字符集 + # print(resp2.text) + xml = BeautifulSoup(resp2.text, "xml") #使用xml需要安装lxml库 + ds = xml.find_all("d") #爬取所有的弹幕 + for d in ds: + bullet_screen_list.append(d.text) #将弹幕写入列表 + print(f"爬取{bv}的弹幕成功") + return bullet_screen_list + + +if __name__ == '__main__': + bv_list = get_bv(BV_NUM) + print(bv_list) + print(get_bullet_screen(bv_list))