From ef50cc4aa0a032cd8013fd5813888419763abcb9 Mon Sep 17 00:00:00 2001 From: pjmw9izve <2308014474@qq.com> Date: Thu, 12 Sep 2024 22:46:20 +0800 Subject: [PATCH] ADD file via upload --- get_danmu.py | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 get_danmu.py diff --git a/get_danmu.py b/get_danmu.py new file mode 100644 index 0000000..2ac80de --- /dev/null +++ b/get_danmu.py @@ -0,0 +1,54 @@ +import requests +from bs4 import BeautifulSoup +import pandas as pd +import time +def get_danmu(urls): + # 获取BV号 + bv_id = [] + for url in urls: + parts = url.split('/') + for part in parts: + if part.startswith('BV'): + bv_id.append(part) + #获取cid号 + cids = [] + for id in bv_id: + url = f"https://api.bilibili.com/x/player/pagelist?bvid={id}&jsonp=jsonp" + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0" + } + response = requests.get(url,headers=headers) + time.sleep(0.5) + if response.status_code == 200: + data = response.json() + if data['code'] == 0 and data['data']: + cid = data['data'][0]['cid'] + cids.append(cid) + print("cid",len(cids)) + #获取弹幕数据 + danmu_data = [] + fail_count = 0 + for id in cids: + url = f"https://api.bilibili.com/x/v1/dm/list.so?oid={id}" + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0" + } + response = requests.get(url,headers=headers) + time.sleep(0.5) + if response.status_code != 200: + fail_count += 1 + response.encoding = 'utf-8' + danmu_data.append(response.text) + print("danmu_data_html",len(danmu_data)) + print("fail_count",fail_count) + #返回一个所有弹幕的list + all_danmu = [] + for html in danmu_data: + soup = BeautifulSoup(html, 'html.parser') + for d in soup.find_all('d'): + all_danmu.append(d.get_text()) + + print("all_danmu",len(all_danmu)) + df = pd.DataFrame({'danmu': all_danmu}) + df.to_excel("all_danmu_data.xlsx", index=False,engine='openpyxl') + return all_danmu