From c1845db3e586677ada1a3fdb66a40eed144cfe7d Mon Sep 17 00:00:00 2001 From: pjmw9izve <2308014474@qq.com> Date: Tue, 17 Sep 2024 12:42:33 +0800 Subject: [PATCH] Delete 'get_danmu.py' --- get_danmu.py | 54 ---------------------------------------------------- 1 file changed, 54 deletions(-) delete mode 100644 get_danmu.py diff --git a/get_danmu.py b/get_danmu.py deleted file mode 100644 index 2ac80de..0000000 --- a/get_danmu.py +++ /dev/null @@ -1,54 +0,0 @@ -import requests -from bs4 import BeautifulSoup -import pandas as pd -import time -def get_danmu(urls): - # 获取BV号 - bv_id = [] - for url in urls: - parts = url.split('/') - for part in parts: - if part.startswith('BV'): - bv_id.append(part) - #获取cid号 - cids = [] - for id in bv_id: - url = f"https://api.bilibili.com/x/player/pagelist?bvid={id}&jsonp=jsonp" - headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0" - } - response = requests.get(url,headers=headers) - time.sleep(0.5) - if response.status_code == 200: - data = response.json() - if data['code'] == 0 and data['data']: - cid = data['data'][0]['cid'] - cids.append(cid) - print("cid",len(cids)) - #获取弹幕数据 - danmu_data = [] - fail_count = 0 - for id in cids: - url = f"https://api.bilibili.com/x/v1/dm/list.so?oid={id}" - headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0" - } - response = requests.get(url,headers=headers) - time.sleep(0.5) - if response.status_code != 200: - fail_count += 1 - response.encoding = 'utf-8' - danmu_data.append(response.text) - print("danmu_data_html",len(danmu_data)) - print("fail_count",fail_count) - #返回一个所有弹幕的list - all_danmu = [] - for html in danmu_data: - soup = BeautifulSoup(html, 'html.parser') - for d in soup.find_all('d'): - all_danmu.append(d.get_text()) - - print("all_danmu",len(all_danmu)) - df = pd.DataFrame({'danmu': all_danmu}) - df.to_excel("all_danmu_data.xlsx", index=False,engine='openpyxl') - return all_danmu