ADD file via upload

main
fzu062200129 2 months ago
parent caefda24ca
commit bcf3d9fe4c

@ -0,0 +1,195 @@
import requests
import re
import jieba
from collections import Counter
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import pandas as pd
class SimpleDanmuAnalyzer:
def __init__(self):
self.danmu_list = []
self.noise_words = ['666', '哈哈哈', '233', '点赞', '关注', '来了']
def get_danmu(self, bvid):
"""获取单个视频的弹幕"""
try:
print(f"正在获取视频 {bvid} 的弹幕...")
# 获取cid
info_url = f"https://api.bilibili.com/x/web-interface/view?bvid={bvid}"
info_response = requests.get(info_url)
info_data = info_response.json()
if info_data['code'] != 0:
print(f"获取视频信息失败: {info_data.get('message', '未知错误')}")
return
cid = info_data['data']['cid']
# 获取弹幕
danmu_url = f"https://api.bilibili.com/x/v1/dm/list.so?oid={cid}"
response = requests.get(danmu_url)
response.encoding = 'utf-8'
# 提取弹幕文本
danmus = re.findall(r'<d p=".*?">(.*?)</d>', response.text)
# 简单过滤噪声
filtered_danmus = []
for danmu in danmus:
if not any(noise in danmu for noise in self.noise_words):
filtered_danmus.append(danmu)
self.danmu_list.extend(filtered_danmus)
print(f"获取到 {len(filtered_danmus)} 条有效弹幕")
except Exception as e:
print(f"获取弹幕失败: {e}")
def analyze_words(self, top_n=8):
"""简单词频分析"""
if not self.danmu_list:
print("没有弹幕数据可供分析")
return []
# 合并所有弹幕
text = ' '.join(self.danmu_list)
# 分词
words = jieba.cut(text)
# 过滤短词和停用词
filtered_words = [word for word in words if len(word) > 1]
# 统计词频
word_count = Counter(filtered_words)
return word_count.most_common(top_n)
def make_wordcloud(self, filename='wordcloud.png'):
"""生成词云"""
if not self.danmu_list:
print("没有弹幕数据生成词云")
return
text = ' '.join(self.danmu_list)
try:
# 创建词云
wc = WordCloud(
font_path='simhei.ttf', # 需要系统中文字体
width=800,
height=600,
background_color='white',
max_words=100
).generate(text)
# 显示词云
plt.figure(figsize=(10, 8))
plt.imshow(wc)
plt.axis('off')
plt.title('弹幕词云图')
plt.tight_layout()
plt.savefig(filename, dpi=300, bbox_inches='tight')
plt.show()
print(f"词云图已保存为: {filename}")
except Exception as e:
print(f"生成词云失败: {e}")
print("请确保系统中安装了中文字体")
def save_to_excel(self, filename='result.xlsx'):
"""保存结果到Excel"""
if not self.danmu_list:
print("没有数据可保存")
return
try:
# 弹幕数据
df_danmu = pd.DataFrame(self.danmu_list, columns=['弹幕内容'])
# 词频数据
top_words = self.analyze_words(8)
df_words = pd.DataFrame(top_words, columns=['词语', '出现次数'])
# 保存
with pd.ExcelWriter(filename) as writer:
df_danmu.to_excel(writer, sheet_name='弹幕数据', index=False)
df_words.to_excel(writer, sheet_name='词频统计', index=False)
print(f"数据已保存到: {filename}")
except Exception as e:
print(f"保存Excel失败: {e}")
def get_conclusions(self):
"""简单分析结论"""
if not self.danmu_list:
return "没有足够数据进行分析"
total = len(self.danmu_list)
top_words = self.analyze_words(5)
conclusions = []
conclusions.append(f"共分析 {total} 条弹幕")
conclusions.append("高频词TOP5:")
for word, count in top_words:
conclusions.append(f" - {word}: {count}")
return '\n'.join(conclusions)
def main():
"""主函数"""
analyzer = SimpleDanmuAnalyzer()
# 这里放几个大语言模型相关视频的BV号
# 你可以替换成你想分析的视频BV号
video_list = [
'BV1fp4y1q7E9', # 大语言模型介绍
'BV1nV41127AV', # LLM应用案例
'BV1Ru41127XB', # 大模型技术解析
]
print("开始获取弹幕数据...")
for bvid in video_list:
analyzer.get_danmu(bvid)
if not analyzer.danmu_list:
print("没有获取到弹幕数据,使用示例数据演示")
# 添加一些示例数据
analyzer.danmu_list = [
'大语言模型很强大',
'AI改变世界',
'机器学习很有趣',
'深度学习技术',
'自然语言处理',
'大模型应用广泛',
'人工智能未来',
'LLM发展很快',
'智能助手很方便',
'代码生成很实用'
]
print("\n进行词频分析...")
top_words = analyzer.analyze_words(8)
print("高频词TOP8:")
for i, (word, count) in enumerate(top_words, 1):
print(f"{i}. {word}: {count}")
print("\n生成词云图...")
analyzer.make_wordcloud()
print("\n保存数据到Excel...")
analyzer.save_to_excel()
print("\n分析结论:")
conclusions = analyzer.get_conclusions()
print(conclusions)
print("\n任务完成!")
if __name__ == "__main__":
main()
Loading…
Cancel
Save