ADD file via upload

main
pmgp6jfbh 3 months ago
parent 16d1fb9ad2
commit fd5063bf88

@ -0,0 +1,108 @@
# -*- coding: utf-8 -*-
import os # 新增导入os模块
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import pandas as pd
import jieba
# 设置中文字体
plt.rcParams["font.family"] = ["SimHei", "WenQuanYi Micro Hei", "Heiti TC"]
class DanmakuVisualizer:
def __init__(self, danmu_path, excel_path):
self.danmu_path = danmu_path
self.excel_path = excel_path
self.danmu_text = ""
self.top8 = pd.DataFrame() # 初始化空DataFrame
# 读取弹幕数据(新增异常处理)
try:
if os.path.exists(danmu_path) and os.path.getsize(danmu_path) > 0:
with open(danmu_path, "r", encoding="utf-8") as f:
self.danmu_text = " ".join([line.strip() for line in f if line.strip()])
else:
print(f"警告:弹幕文件 {danmu_path} 不存在或为空")
except Exception as e:
print(f"读取弹幕文件失败:{e}")
# 读取Excel数据新增异常处理
try:
if os.path.exists(excel_path) and os.path.getsize(excel_path) > 0:
self.top8 = pd.read_excel(excel_path)
else:
print(f"警告:统计文件 {excel_path} 不存在或为空")
except Exception as e:
print(f"读取统计文件失败:{e}")
# 停用词
self.stop_words = {
"已三连", "求资料", "打卡", "不错", "很好", "牛逼", "卧槽",
"学习", "分享", "感谢", "点赞", "三连", "教程", "B站", "老师"
}
def generate_wordcloud(self):
"""生成词云图"""
if not self.danmu_text.strip():
print("警告:无有效弹幕数据,无法生成词云")
return
# 过滤停用词
filtered_words = [w for w in jieba.cut(self.danmu_text) if len(w) > 1 and w not in self.stop_words]
filtered_text = " ".join(filtered_words)
if not filtered_text:
print("警告:过滤后无有效词汇,无法生成词云")
return
# 生成词云(增强字体兼容性)
try:
# 多系统字体支持
font_paths = [
"C:/Windows/Fonts/msyh.ttc", # Windows
"/System/Library/Fonts/PingFang.ttc", # macOS
"/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc" # Linux
]
font_path = None
for path in font_paths:
if os.path.exists(path):
font_path = path
break
wc = WordCloud(
font_path=font_path,
width=1200, height=800,
background_color="white",
max_words=200
).generate(filtered_text)
# 保存词云图
wc.to_file("./data/wordcloud.png")
plt.figure(figsize=(12, 8))
plt.imshow(wc)
plt.axis("off")
plt.title("弹幕高频词云图")
plt.show()
print("词云图已保存至./data/wordcloud.png")
except Exception as e:
print(f"生成词云失败:{e}")
def plot_top8_bar(self):
"""绘制Top8高频词柱状图"""
# 新增:检查数据是否有效
if self.top8.empty or len(self.top8) < 8:
print("警告:统计数据不足,无法绘制柱状图")
return
try:
plt.figure(figsize=(10, 6))
plt.bar(self.top8["关键词"], self.top8["词频"], color="skyblue")
plt.title("弹幕Top8高频词")
plt.xlabel("关键词")
plt.ylabel("词频")
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("./data/top8_bar.png")
plt.show()
print("Top8柱状图已保存至./data/top8_bar.png")
except Exception as e:
print(f"绘制柱状图失败:{e}")
Loading…
Cancel
Save