parent
9e7d1e7360
commit
c1845db3e5
@ -1,54 +0,0 @@
|
|||||||
import requests
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
import pandas as pd
|
|
||||||
import time
|
|
||||||
def get_danmu(urls):
|
|
||||||
# 获取BV号
|
|
||||||
bv_id = []
|
|
||||||
for url in urls:
|
|
||||||
parts = url.split('/')
|
|
||||||
for part in parts:
|
|
||||||
if part.startswith('BV'):
|
|
||||||
bv_id.append(part)
|
|
||||||
#获取cid号
|
|
||||||
cids = []
|
|
||||||
for id in bv_id:
|
|
||||||
url = f"https://api.bilibili.com/x/player/pagelist?bvid={id}&jsonp=jsonp"
|
|
||||||
headers = {
|
|
||||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0"
|
|
||||||
}
|
|
||||||
response = requests.get(url,headers=headers)
|
|
||||||
time.sleep(0.5)
|
|
||||||
if response.status_code == 200:
|
|
||||||
data = response.json()
|
|
||||||
if data['code'] == 0 and data['data']:
|
|
||||||
cid = data['data'][0]['cid']
|
|
||||||
cids.append(cid)
|
|
||||||
print("cid",len(cids))
|
|
||||||
#获取弹幕数据
|
|
||||||
danmu_data = []
|
|
||||||
fail_count = 0
|
|
||||||
for id in cids:
|
|
||||||
url = f"https://api.bilibili.com/x/v1/dm/list.so?oid={id}"
|
|
||||||
headers = {
|
|
||||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0"
|
|
||||||
}
|
|
||||||
response = requests.get(url,headers=headers)
|
|
||||||
time.sleep(0.5)
|
|
||||||
if response.status_code != 200:
|
|
||||||
fail_count += 1
|
|
||||||
response.encoding = 'utf-8'
|
|
||||||
danmu_data.append(response.text)
|
|
||||||
print("danmu_data_html",len(danmu_data))
|
|
||||||
print("fail_count",fail_count)
|
|
||||||
#返回一个所有弹幕的list
|
|
||||||
all_danmu = []
|
|
||||||
for html in danmu_data:
|
|
||||||
soup = BeautifulSoup(html, 'html.parser')
|
|
||||||
for d in soup.find_all('d'):
|
|
||||||
all_danmu.append(d.get_text())
|
|
||||||
|
|
||||||
print("all_danmu",len(all_danmu))
|
|
||||||
df = pd.DataFrame({'danmu': all_danmu})
|
|
||||||
df.to_excel("all_danmu_data.xlsx", index=False,engine='openpyxl')
|
|
||||||
return all_danmu
|
|
Loading…
Reference in new issue