Python/all.py

import requests
import time
import pandas as pd
from wordcloud import WordCloud
import matplotlib.pyplot as plt

# 获取 B 站视频弹幕
def get_danmu(video_id):
    url = f'https://api.bilibili.com/x/v1/dm/list.so?oid={video_id}'
    response = requests.get(url)
    if response.status_code == 200:
        from xml.etree import ElementTree as ET
        root = ET.fromstring(response.content)
        danmu_list = []
        for d in root.findall('d'):
            danmu_text = d.text
            if danmu_text:
                danmu_list.append(danmu_text)
        return danmu_list
    else:
        print(f"获取视频 {video_id} 弹幕失败")
        return []

# 搜索关键词相关视频
search_url = 'https://api.bilibili.com/x/web-interface/search/type?&page_size=30&order=totalrank&keyword=2024%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A&search_type=video'

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}

response = requests.get(search_url, headers=headers)
data = response.json()

video_ids = []
for item in data.get('data', {}).get('result', []):
    video_ids.append(item.get('aid'))

all_danmu = []
for video_id in video_ids:
    danmu = get_danmu(video_id)
    all_danmu.extend(danmu)
    time.sleep(2)  # 每获取一个视频的弹幕后等待 2 秒

# 统计 AI 相关弹幕数量
ai_related_danmu = {}
for danmu in all_danmu:
    if 'AI' in danmu:
        if danmu in ai_related_danmu:
            ai_related_danmu[danmu] += 1
        else:
            ai_related_danmu[danmu] = 1

# 排序并输出前 8
sorted_danmu = sorted(ai_related_danmu.items(), key=lambda x: x[1], reverse=True)[:8]

# 写入 Excel
df = pd.DataFrame(sorted_danmu, columns=['弹幕', '数量'])
df.to_excel('bilibili_danmu.xlsx', index=False)

# 生成词云图
wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(ai_related_danmu)
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()
ADD file via upload 2 months ago			`import requests`
			`import time`
			`import pandas as pd`
			`from wordcloud import WordCloud`
			`import matplotlib.pyplot as plt`

			`# 获取 B 站视频弹幕`
			`def get_danmu(video_id):`
			`url = f'https://api.bilibili.com/x/v1/dm/list.so?oid={video_id}'`
			`response = requests.get(url)`
			`if response.status_code == 200:`
			`from xml.etree import ElementTree as ET`
			`root = ET.fromstring(response.content)`
			`danmu_list = []`
			`for d in root.findall('d'):`
			`danmu_text = d.text`
			`if danmu_text:`
			`danmu_list.append(danmu_text)`
			`return danmu_list`
			`else:`
			`print(f"获取视频 {video_id} 弹幕失败")`
			`return []`

			`# 搜索关键词相关视频`
			`search_url = 'https://api.bilibili.com/x/web-interface/search/type?&page_size=30&order=totalrank&keyword=2024%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A&search_type=video'`

			`headers = {`
			`"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"`
			`}`

			`response = requests.get(search_url, headers=headers)`
			`data = response.json()`

			`video_ids = []`
			`for item in data.get('data', {}).get('result', []):`
			`video_ids.append(item.get('aid'))`

			`all_danmu = []`
			`for video_id in video_ids:`
			`danmu = get_danmu(video_id)`
			`all_danmu.extend(danmu)`
			`time.sleep(2) # 每获取一个视频的弹幕后等待 2 秒`

			`# 统计 AI 相关弹幕数量`
			`ai_related_danmu = {}`
			`for danmu in all_danmu:`
			`if 'AI' in danmu:`
			`if danmu in ai_related_danmu:`
			`ai_related_danmu[danmu] += 1`
			`else:`
			`ai_related_danmu[danmu] = 1`

			`# 排序并输出前 8`
			`sorted_danmu = sorted(ai_related_danmu.items(), key=lambda x: x[1], reverse=True)[:8]`

			`# 写入 Excel`
			`df = pd.DataFrame(sorted_danmu, columns=['弹幕', '数量'])`
			`df.to_excel('bilibili_danmu.xlsx', index=False)`

			`# 生成词云图`
			`wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(ai_related_danmu)`
			`plt.figure(figsize=(10, 5))`
			`plt.imshow(wordcloud, interpolation='bilinear')`
			`plt.axis('off')`
			`plt.show()`