From b21a6a75052b32bb26bbde53cd201f5f98a1ba48 Mon Sep 17 00:00:00 2001 From: pw4izm9rs <2764851313@qq.com> Date: Tue, 17 Sep 2024 23:55:53 +0800 Subject: [PATCH] ADD file via upload --- get_danmu.py | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 get_danmu.py diff --git a/get_danmu.py b/get_danmu.py new file mode 100644 index 0000000..37a13de --- /dev/null +++ b/get_danmu.py @@ -0,0 +1,78 @@ +import requests +import re + +count = 0 + +def get_page_url(n): + """ + 此函数用于获取页面的url + n代表获取的页数 + 返回一个存储各页面url的列表 + """ + page_url_list = [] # 存储页面网址的列表 + for i in range(n): + if i == 0: + page_url = "https://search.bilibili.com/all?keyword=2024%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A&from_source=webtop_search&spm_id_from=333.1007&search_source=5" + page_url_list.append(page_url) + i += 1 + else: + page_url = f"https://search.bilibili.com/all?keyword=2024%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A&from_source=webtop_search&spm_id_from=333.1007&search_source=5&page={i + 1}&o={i * 36}" + page_url_list.append(page_url) + i += 1 + return page_url_list + +# 设置请求头。为了应对B站的反爬虫,我们需要伪装成浏览器进行请求 +header = {"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0"} + +def get_cid(page_url_list): + """ + 本函数用于获取各个页面视频的cid + 返回存储综合排序前300的视频的cid的列表 + """ + global count + cid_list = [] # 存储cid的列表 + for page_url in page_url_list: + if count >= 300: + break + else: + response = requests.get(url = page_url,headers = header) + response.encoding = 'utf-8' + data = response.text + content = re.findall('"//www.bilibili.com/video/(.*?)/"',data) + content = set(content) + content = list(content) + for bvid in content: + url = f"https://api.bilibili.com/x/player/pagelist?bvid={bvid}&jsonp=jsonp" + response = requests.get(url=url, headers=header) + response.encoding = 'utf-8' + data = response.text + content = re.findall('{"cid":(.*?),', data) + cid_list.append(content[0]) + count += 1 + if count >= 300: + break + + return cid_list + + +def get_danmu(cid_list): + """ + 本函数用于获取弹幕 + """ + danmu_list = [] + for cid in cid_list: + url = f"https://comment.bilibili.com/{cid}.xml" + response = requests.get(url=url, headers=header) + response.encoding = 'utf-8' + data = response.text + content = re.findall('(.*?)', data) + danmu_list.extend(content) + return danmu_list + + + +cid_list = get_cid(get_page_url(10)) +danmu_list = get_danmu(cid_list) +for danmu in danmu_list: # 遍历弹幕 + with open('弹幕.txt', 'a', encoding='utf-8') as f: # 打开文件准备写入 + f.write(danmu + '\n') # 写入弹幕