From 2f0dfe011e070bb98e5558d06358bb547e42d2b5 Mon Sep 17 00:00:00 2001
From: p4ut7fwj5 <3346195219@qq.com>
Date: Sun, 15 Sep 2024 18:47:48 +0800
Subject: [PATCH] ADD file via upload

---
 bullet_screen.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 bullet_screen.py

diff --git a/bullet_screen.py b/bullet_screen.py
new file mode 100644
index 0000000..fbda8a7
--- /dev/null
+++ b/bullet_screen.py
@@ -0,0 +1,48 @@
+# bullet screen为弹幕
+"""
+    说明：爬取b站弹幕的模块
+"""
+import re
+import requests
+from bs4 import BeautifulSoup
+from bv_maker import get_bv, BV_NUM
+
+
+"https://api.bilibili.com/x/v1/dm/list.so?oid="  # 获取b站弹幕api,需要提供bv号获取cid
+"https://comment.bilibili.com/{cid}.xml"  # 提供bv号获取cid
+
+
+header = {
+    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0",  #UA
+    "referer": "https://www.bilibili.com/"  #溯源 反反爬
+}
+
+def get_bullet_screen(bv_list):
+    """
+    通过bv号获取视频cid，进一步获取弹幕内容
+    :param bv_list:
+    :return: bullet_screen_list
+    """
+    bullet_screen_list = []     #存放弹幕
+    for bv in bv_list:
+        cid_url = f"https://api.bilibili.com/x/player/pagelist?bvid={bv}&jsonp=jsonp"
+        resp1 = requests.get(cid_url, headers=header)
+        # print(resp1.json())
+        dict = resp1.json()
+        cid = dict["data"][0]["cid"]    #获取字典中的cid
+        api_url = f"https://comment.bilibili.com/{cid}.xml"
+        resp2 = requests.get(api_url, headers=header)
+        resp2.encoding = "utf-8"    #设置对应的字符集
+        # print(resp2.text)
+        xml = BeautifulSoup(resp2.text, "xml")  #使用xml需要安装lxml库
+        ds = xml.find_all("d")  #爬取所有的弹幕
+        for d in ds:
+            bullet_screen_list.append(d.text)   #将弹幕写入列表
+        print(f"爬取{bv}的弹幕成功")
+    return bullet_screen_list
+
+
+if __name__ == '__main__':
+    bv_list = get_bv(BV_NUM)
+    print(bv_list)
+    print(get_bullet_screen(bv_list))