From 654b0fcca8d29ae6e55299e9d49ba260d344bdd1 Mon Sep 17 00:00:00 2001
From: p4ut7fwj5 <3346195219@qq.com>
Date: Fri, 13 Sep 2024 19:17:37 +0800
Subject: [PATCH] ADD file via upload

---
 bullet_screen.py | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 bullet_screen.py

diff --git a/bullet_screen.py b/bullet_screen.py
new file mode 100644
index 0000000..1715e32
--- /dev/null
+++ b/bullet_screen.py
@@ -0,0 +1,44 @@
+# bullet screen为弹幕
+"""
+    说明：爬取b站弹幕的模块
+"""
+import re
+import requests
+from bs4 import BeautifulSoup
+from bv_maker import get_bv, BV_NUM
+
+
+"https://api.bilibili.com/x/v1/dm/list.so?oid="  # 获取b站弹幕api,需要提供bv号获取cid
+"https://comment.bilibili.com/{cid}.xml"  # 提供bv号获取cid
+
+
+header = {
+    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0",
+    "referer": "https://www.bilibili.com/"
+}
+
+
+# print(bv_list)
+def get_bullet_screen(bv_list):
+    bullet_screen_list = []     #存放弹幕
+    for bv in bv_list:
+        cid_url = f"https://api.bilibili.com/x/player/pagelist?bvid={bv}&jsonp=jsonp"
+        resp1 = requests.get(cid_url, headers=header)
+        # print(resp1.json())
+        dict = resp1.json()
+        cid = dict["data"][0]["cid"]    #获取字典中的cid
+        api_url = f"https://comment.bilibili.com/{cid}.xml"
+        resp2 = requests.get(api_url, headers=header)
+        resp2.encoding = "utf-8"
+        # print(resp2.text)
+        xml = BeautifulSoup(resp2.text, "xml")
+        ds = xml.find_all("d")
+        for d in ds:
+            bullet_screen_list.append(d.text)
+        print(f"爬取{bv}的弹幕成功")
+    return bullet_screen_list
+
+
+if __name__ == '__main__':
+    bv_list = get_bv(BV_NUM)
+    print(get_bullet_screen(bv_list))