diff --git a/visualization.py b/visualization.py new file mode 100644 index 0000000..a75f4a0 --- /dev/null +++ b/visualization.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- +import os # 新增:导入os模块 +import matplotlib.pyplot as plt +from wordcloud import WordCloud +import pandas as pd +import jieba + +# 设置中文字体 +plt.rcParams["font.family"] = ["SimHei", "WenQuanYi Micro Hei", "Heiti TC"] + +class DanmakuVisualizer: + def __init__(self, danmu_path, excel_path): + self.danmu_path = danmu_path + self.excel_path = excel_path + self.danmu_text = "" + self.top8 = pd.DataFrame() # 初始化空DataFrame + + # 读取弹幕数据(新增异常处理) + try: + if os.path.exists(danmu_path) and os.path.getsize(danmu_path) > 0: + with open(danmu_path, "r", encoding="utf-8") as f: + self.danmu_text = " ".join([line.strip() for line in f if line.strip()]) + else: + print(f"警告:弹幕文件 {danmu_path} 不存在或为空") + except Exception as e: + print(f"读取弹幕文件失败:{e}") + + # 读取Excel数据(新增异常处理) + try: + if os.path.exists(excel_path) and os.path.getsize(excel_path) > 0: + self.top8 = pd.read_excel(excel_path) + else: + print(f"警告:统计文件 {excel_path} 不存在或为空") + except Exception as e: + print(f"读取统计文件失败:{e}") + + # 停用词 + self.stop_words = { + "已三连", "求资料", "打卡", "不错", "很好", "牛逼", "卧槽", + "学习", "分享", "感谢", "点赞", "三连", "教程", "B站", "老师" + } + + def generate_wordcloud(self): + """生成词云图""" + if not self.danmu_text.strip(): + print("警告:无有效弹幕数据,无法生成词云") + return + + # 过滤停用词 + filtered_words = [w for w in jieba.cut(self.danmu_text) if len(w) > 1 and w not in self.stop_words] + filtered_text = " ".join(filtered_words) + + if not filtered_text: + print("警告:过滤后无有效词汇,无法生成词云") + return + + # 生成词云(增强字体兼容性) + try: + # 多系统字体支持 + font_paths = [ + "C:/Windows/Fonts/msyh.ttc", # Windows + "/System/Library/Fonts/PingFang.ttc", # macOS + "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc" # Linux + ] + font_path = None + for path in font_paths: + if os.path.exists(path): + font_path = path + break + + wc = WordCloud( + font_path=font_path, + width=1200, height=800, + background_color="white", + max_words=200 + ).generate(filtered_text) + + # 保存词云图 + wc.to_file("./data/wordcloud.png") + plt.figure(figsize=(12, 8)) + plt.imshow(wc) + plt.axis("off") + plt.title("弹幕高频词云图") + plt.show() + print("词云图已保存至./data/wordcloud.png") + except Exception as e: + print(f"生成词云失败:{e}") + + def plot_top8_bar(self): + """绘制Top8高频词柱状图""" + # 新增:检查数据是否有效 + if self.top8.empty or len(self.top8) < 8: + print("警告:统计数据不足,无法绘制柱状图") + return + + try: + plt.figure(figsize=(10, 6)) + plt.bar(self.top8["关键词"], self.top8["词频"], color="skyblue") + plt.title("弹幕Top8高频词") + plt.xlabel("关键词") + plt.ylabel("词频") + plt.xticks(rotation=45) + plt.tight_layout() + plt.savefig("./data/top8_bar.png") + plt.show() + print("Top8柱状图已保存至./data/top8_bar.png") + except Exception as e: + print(f"绘制柱状图失败:{e}") \ No newline at end of file