# -*- coding: utf-8 -*- import os # 新增:导入os模块 import matplotlib.pyplot as plt from wordcloud import WordCloud import pandas as pd import jieba # 设置中文字体 plt.rcParams["font.family"] = ["SimHei", "WenQuanYi Micro Hei", "Heiti TC"] class DanmakuVisualizer: def __init__(self, danmu_path, excel_path): self.danmu_path = danmu_path self.excel_path = excel_path self.danmu_text = "" self.top8 = pd.DataFrame() # 初始化空DataFrame # 读取弹幕数据(新增异常处理) try: if os.path.exists(danmu_path) and os.path.getsize(danmu_path) > 0: with open(danmu_path, "r", encoding="utf-8") as f: self.danmu_text = " ".join([line.strip() for line in f if line.strip()]) else: print(f"警告:弹幕文件 {danmu_path} 不存在或为空") except Exception as e: print(f"读取弹幕文件失败:{e}") # 读取Excel数据(新增异常处理) try: if os.path.exists(excel_path) and os.path.getsize(excel_path) > 0: self.top8 = pd.read_excel(excel_path) else: print(f"警告:统计文件 {excel_path} 不存在或为空") except Exception as e: print(f"读取统计文件失败:{e}") # 停用词 self.stop_words = { "已三连", "求资料", "打卡", "不错", "很好", "牛逼", "卧槽", "学习", "分享", "感谢", "点赞", "三连", "教程", "B站", "老师" } def generate_wordcloud(self): """生成词云图""" if not self.danmu_text.strip(): print("警告:无有效弹幕数据,无法生成词云") return # 过滤停用词 filtered_words = [w for w in jieba.cut(self.danmu_text) if len(w) > 1 and w not in self.stop_words] filtered_text = " ".join(filtered_words) if not filtered_text: print("警告:过滤后无有效词汇,无法生成词云") return # 生成词云(增强字体兼容性) try: # 多系统字体支持 font_paths = [ "C:/Windows/Fonts/msyh.ttc", # Windows "/System/Library/Fonts/PingFang.ttc", # macOS "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc" # Linux ] font_path = None for path in font_paths: if os.path.exists(path): font_path = path break wc = WordCloud( font_path=font_path, width=1200, height=800, background_color="white", max_words=200 ).generate(filtered_text) # 保存词云图 wc.to_file("./data/wordcloud.png") plt.figure(figsize=(12, 8)) plt.imshow(wc) plt.axis("off") plt.title("弹幕高频词云图") plt.show() print("词云图已保存至./data/wordcloud.png") except Exception as e: print(f"生成词云失败:{e}") def plot_top8_bar(self): """绘制Top8高频词柱状图""" # 新增:检查数据是否有效 if self.top8.empty or len(self.top8) < 8: print("警告:统计数据不足,无法绘制柱状图") return try: plt.figure(figsize=(10, 6)) plt.bar(self.top8["关键词"], self.top8["词频"], color="skyblue") plt.title("弹幕Top8高频词") plt.xlabel("关键词") plt.ylabel("词频") plt.xticks(rotation=45) plt.tight_layout() plt.savefig("./data/top8_bar.png") plt.show() print("Top8柱状图已保存至./data/top8_bar.png") except Exception as e: print(f"绘制柱状图失败:{e}")