import matplotlib.pyplot as plt import matplotlib.font_manager as fm from wordcloud import WordCloud import pandas as pd import numpy as np from collections import Counter import os class Visualizer: def __init__(self): # 设置中文字体 plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'DejaVu Sans'] plt.rcParams['axes.unicode_minus'] = False self.font_path = self.find_chinese_font() def find_chinese_font(self): """寻找中文字体""" try: # 尝试常见的中文字体路径 font_paths = [ 'C:/Windows/Fonts/simhei.ttf', # Windows '/System/Library/Fonts/PingFang.ttc', # macOS '/usr/share/fonts/truetype/droid/DroidSansFallbackFull.ttf' # Linux ] for font_path in font_paths: if os.path.exists(font_path): return font_path # 如果找不到,使用matplotlib默认字体 return None except: return None def create_wordcloud(self, word_freq_df: pd.DataFrame, save_path: str): """创建词云图""" # 创建词频字典 word_freq = dict(zip(word_freq_df['词语'], word_freq_df['频次'])) # 创建词云 wc_config = { 'width': 1200, 'height': 800, 'background_color': 'white', 'colormap': 'viridis', 'max_words': 100, 'relative_scaling': 0.5 } if self.font_path: wc_config['font_path'] = self.font_path wc = WordCloud(**wc_config) wordcloud = wc.generate_from_frequencies(word_freq) # 绘制词云 plt.figure(figsize=(15, 10)) plt.imshow(wordcloud, interpolation='bilinear') plt.axis('off') plt.title('大语言模型应用弹幕词云分析', fontsize=20, pad=20) plt.tight_layout() # 确保目录存在 os.makedirs('visualization', exist_ok=True) plt.savefig(save_path, dpi=300, bbox_inches='tight', facecolor='white', edgecolor='none') plt.show() print(f"词云图已保存到: {save_path}") def plot_applications_bar(self, top_apps_df: pd.DataFrame, save_path: str): """绘制应用领域条形图""" plt.figure(figsize=(12, 8)) colors = plt.cm.Set3(np.linspace(0, 1, len(top_apps_df))) bars = plt.barh(top_apps_df['应用领域'], top_apps_df['出现次数'], color=colors, edgecolor='black', alpha=0.8) # 添加数据标签 for bar in bars: width = bar.get_width() plt.text(width + 0.1, bar.get_y() + bar.get_height()/2, f'{int(width)}', ha='left', va='center', fontsize=12) plt.xlabel('出现次数', fontsize=14) plt.title('大语言模型应用领域分布(Top 8)', fontsize=16, pad=20) plt.grid(axis='x', alpha=0.3) plt.tight_layout() plt.savefig(save_path, dpi=300, bbox_inches='tight') plt.show() print(f"应用领域分布图已保存到: {save_path}") def plot_sentiment_analysis(self, processed_df: pd.DataFrame, save_path: str): """绘制情感分析图""" # 简单的情感关键词分类 positive_words = ['好', '强', '棒', '厉害', '方便', '高效', '智能', '强大', '优秀', '推荐'] negative_words = ['差', '弱', '问题', '担心', '风险', '贵', '难', '复杂', '取代', '改进'] sentiment_counts = {'积极': 0, '消极': 0, '中性': 0} for danmu in processed_df['original_danmu']: positive_count = sum(1 for word in positive_words if word in danmu) negative_count = sum(1 for word in negative_words if word in danmu) if positive_count > negative_count: sentiment_counts['积极'] += 1 elif negative_count > positive_count: sentiment_counts['消极'] += 1 else: sentiment_counts['中性'] += 1 # 绘制饼图 plt.figure(figsize=(10, 8)) colors = ['#ff9999', '#66b3ff', '#99ff99'] plt.pie(sentiment_counts.values(), labels=sentiment_counts.keys(), autopct='%1.1f%%', colors=colors, startangle=90, explode=(0.1, 0, 0)) # 突出显示积极评价 plt.title('弹幕情感倾向分布', fontsize=16) plt.savefig(save_path, dpi=300, bbox_inches='tight') plt.show() print(f"情感分析图已保存到: {save_path}") def create_comprehensive_visualization(self, processed_df: pd.DataFrame, top_apps_df: pd.DataFrame, word_freq_df: pd.DataFrame): """创建综合可视化""" # 确保可视化目录存在 os.makedirs('visualization', exist_ok=True) # 1. 词云图 self.create_wordcloud(word_freq_df, 'visualization/wordcloud.png') # 2. 应用领域分布 self.plot_applications_bar(top_apps_df, 'visualization/applications_distribution.png') # 3. 情感倾向分析 self.plot_sentiment_analysis(processed_df, 'visualization/sentiment_analysis.png') def main(): visualizer = Visualizer() try: # 加载处理后的数据 processed_df = pd.read_excel('data/processed/llm_analysis.xlsx', sheet_name='弹幕数据') top_apps_df = pd.read_excel('data/processed/llm_analysis.xlsx', sheet_name='应用领域排名') word_freq_df = pd.read_excel('data/processed/llm_analysis.xlsx', sheet_name='词频统计') # 创建可视化 visualizer.create_comprehensive_visualization(processed_df, top_apps_df, word_freq_df) print("所有可视化图表生成完成!") except Exception as e: print(f"可视化过程中出现错误: {e}") print("请先运行 data_processor.py 生成数据") if __name__ == "__main__": main()