Delete 'get_danmu.py'

main
luoyonghuang 2 months ago
parent 9e7d1e7360
commit c1845db3e5

@ -1,54 +0,0 @@
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
def get_danmu(urls):
# 获取BV号
bv_id = []
for url in urls:
parts = url.split('/')
for part in parts:
if part.startswith('BV'):
bv_id.append(part)
#获取cid号
cids = []
for id in bv_id:
url = f"https://api.bilibili.com/x/player/pagelist?bvid={id}&jsonp=jsonp"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0"
}
response = requests.get(url,headers=headers)
time.sleep(0.5)
if response.status_code == 200:
data = response.json()
if data['code'] == 0 and data['data']:
cid = data['data'][0]['cid']
cids.append(cid)
print("cid",len(cids))
#获取弹幕数据
danmu_data = []
fail_count = 0
for id in cids:
url = f"https://api.bilibili.com/x/v1/dm/list.so?oid={id}"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0"
}
response = requests.get(url,headers=headers)
time.sleep(0.5)
if response.status_code != 200:
fail_count += 1
response.encoding = 'utf-8'
danmu_data.append(response.text)
print("danmu_data_html",len(danmu_data))
print("fail_count",fail_count)
#返回一个所有弹幕的list
all_danmu = []
for html in danmu_data:
soup = BeautifulSoup(html, 'html.parser')
for d in soup.find_all('d'):
all_danmu.append(d.get_text())
print("all_danmu",len(all_danmu))
df = pd.DataFrame({'danmu': all_danmu})
df.to_excel("all_danmu_data.xlsx", index=False,engine='openpyxl')
return all_danmu
Loading…
Cancel
Save