Delete 'get_danmu.py'

1 year ago · 8508d48690
parent b21a6a7505
commit 8508d48690
1 changed files with 0 additions and 78 deletions
--- a/get_danmu.py
+++ b/get_danmu.py
@ -1,78 +0,0 @@
-import requests
-import re
-
-count = 0
-
-def get_page_url(n):
-    """
-    此函数用于获取页面的url
-    n代表获取的页数
-    返回一个存储各页面url的列表
-    """
-    page_url_list = []  # 存储页面网址的列表
-    for i in range(n):
-        if i == 0:
-            page_url = "https://search.bilibili.com/all?keyword=2024%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A&from_source=webtop_search&spm_id_from=333.1007&search_source=5"
-            page_url_list.append(page_url)
-            i += 1
-        else:
-            page_url = f"https://search.bilibili.com/all?keyword=2024%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A&from_source=webtop_search&spm_id_from=333.1007&search_source=5&page={i + 1}&o={i * 36}"
-            page_url_list.append(page_url)
-            i += 1
-    return page_url_list
-
-# 设置请求头。为了应对B站的反爬虫，我们需要伪装成浏览器进行请求
-header = {"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0"}
-
-def get_cid(page_url_list):
-    """
-    本函数用于获取各个页面视频的cid
-    返回存储综合排序前300的视频的cid的列表
-    """
-    global count
-    cid_list = [] # 存储cid的列表
-    for page_url in page_url_list:
-        if count >= 300:
-            break
-        else:
-            response = requests.get(url = page_url,headers = header)
-            response.encoding = 'utf-8'
-            data = response.text
-            content = re.findall('"//www.bilibili.com/video/(.*?)/"',data)
-            content = set(content)
-            content = list(content)
-            for bvid in content:
-                url = f"https://api.bilibili.com/x/player/pagelist?bvid={bvid}&jsonp=jsonp"
-                response = requests.get(url=url, headers=header)
-                response.encoding = 'utf-8'
-                data = response.text
-                content = re.findall('{"cid":(.*?),', data)
-                cid_list.append(content[0])
-                count += 1
-                if count >= 300:
-                    break
-
-    return cid_list
-
-
-def get_danmu(cid_list):
-    """
-    本函数用于获取弹幕
-    """
-    danmu_list = []
-    for cid in cid_list:
-        url = f"https://comment.bilibili.com/{cid}.xml"
-        response = requests.get(url=url, headers=header)
-        response.encoding = 'utf-8'
-        data = response.text
-        content = re.findall('<d p=".*?">(.*?)</d>', data)
-        danmu_list.extend(content)
-    return danmu_list
-
-
-
-cid_list = get_cid(get_page_url(10))
-danmu_list = get_danmu(cid_list)
-for danmu in danmu_list:  # 遍历弹幕
-    with open('弹幕.txt', 'a', encoding='utf-8') as f:  # 打开文件准备写入
-        f.write(danmu + '\n')  # 写入弹幕