102301535/visualizer.py

import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
from wordcloud import WordCloud
import pandas as pd
import numpy as np
from collections import Counter
import os

class Visualizer:
    def __init__(self):
        # 设置中文字体
        plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'DejaVu Sans']
        plt.rcParams['axes.unicode_minus'] = False
        self.font_path = self.find_chinese_font()

    def find_chinese_font(self):
        """寻找中文字体"""
        try:
            # 尝试常见的中文字体路径
            font_paths = [
                'C:/Windows/Fonts/simhei.ttf',  # Windows
                '/System/Library/Fonts/PingFang.ttc',  # macOS
                '/usr/share/fonts/truetype/droid/DroidSansFallbackFull.ttf'  # Linux
            ]

            for font_path in font_paths:
                if os.path.exists(font_path):
                    return font_path

            # 如果找不到，使用matplotlib默认字体
            return None
        except:
            return None

    def create_wordcloud(self, word_freq_df: pd.DataFrame, save_path: str):
        """创建词云图"""
        # 创建词频字典
        word_freq = dict(zip(word_freq_df['词语'], word_freq_df['频次']))

        # 创建词云
        wc_config = {
            'width': 1200,
            'height': 800,
            'background_color': 'white',
            'colormap': 'viridis',
            'max_words': 100,
            'relative_scaling': 0.5
        }

        if self.font_path:
            wc_config['font_path'] = self.font_path

        wc = WordCloud(**wc_config)
        wordcloud = wc.generate_from_frequencies(word_freq)

        # 绘制词云
        plt.figure(figsize=(15, 10))
        plt.imshow(wordcloud, interpolation='bilinear')
        plt.axis('off')
        plt.title('大语言模型应用弹幕词云分析', fontsize=20, pad=20)
        plt.tight_layout()

        # 确保目录存在
        os.makedirs('visualization', exist_ok=True)
        plt.savefig(save_path, dpi=300, bbox_inches='tight',
                   facecolor='white', edgecolor='none')
        plt.show()

        print(f"词云图已保存到: {save_path}")

    def plot_applications_bar(self, top_apps_df: pd.DataFrame, save_path: str):
        """绘制应用领域条形图"""
        plt.figure(figsize=(12, 8))

        colors = plt.cm.Set3(np.linspace(0, 1, len(top_apps_df)))

        bars = plt.barh(top_apps_df['应用领域'], top_apps_df['出现次数'],
                       color=colors, edgecolor='black', alpha=0.8)

        # 添加数据标签
        for bar in bars:
            width = bar.get_width()
            plt.text(width + 0.1, bar.get_y() + bar.get_height()/2,
                    f'{int(width)}', ha='left', va='center', fontsize=12)

        plt.xlabel('出现次数', fontsize=14)
        plt.title('大语言模型应用领域分布（Top 8）', fontsize=16, pad=20)
        plt.grid(axis='x', alpha=0.3)
        plt.tight_layout()

        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        plt.show()

        print(f"应用领域分布图已保存到: {save_path}")

    def plot_sentiment_analysis(self, processed_df: pd.DataFrame, save_path: str):
        """绘制情感分析图"""
        # 简单的情感关键词分类
        positive_words = ['好', '强', '棒', '厉害', '方便', '高效', '智能', '强大', '优秀', '推荐']
        negative_words = ['差', '弱', '问题', '担心', '风险', '贵', '难', '复杂', '取代', '改进']

        sentiment_counts = {'积极': 0, '消极': 0, '中性': 0}

        for danmu in processed_df['original_danmu']:
            positive_count = sum(1 for word in positive_words if word in danmu)
            negative_count = sum(1 for word in negative_words if word in danmu)

            if positive_count > negative_count:
                sentiment_counts['积极'] += 1
            elif negative_count > positive_count:
                sentiment_counts['消极'] += 1
            else:
                sentiment_counts['中性'] += 1

        # 绘制饼图
        plt.figure(figsize=(10, 8))
        colors = ['#ff9999', '#66b3ff', '#99ff99']
        plt.pie(sentiment_counts.values(), labels=sentiment_counts.keys(),
                autopct='%1.1f%%', colors=colors, startangle=90,
                explode=(0.1, 0, 0))  # 突出显示积极评价
        plt.title('弹幕情感倾向分布', fontsize=16)
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        plt.show()

        print(f"情感分析图已保存到: {save_path}")

    def create_comprehensive_visualization(self, processed_df: pd.DataFrame,
                                         top_apps_df: pd.DataFrame,
                                         word_freq_df: pd.DataFrame):
        """创建综合可视化"""
        # 确保可视化目录存在
        os.makedirs('visualization', exist_ok=True)

        # 1. 词云图
        self.create_wordcloud(word_freq_df, 'visualization/wordcloud.png')

        # 2. 应用领域分布
        self.plot_applications_bar(top_apps_df, 'visualization/applications_distribution.png')

        # 3. 情感倾向分析
        self.plot_sentiment_analysis(processed_df, 'visualization/sentiment_analysis.png')

def main():
    visualizer = Visualizer()

    try:
        # 加载处理后的数据
        processed_df = pd.read_excel('data/processed/llm_analysis.xlsx',
                                    sheet_name='弹幕数据')
        top_apps_df = pd.read_excel('data/processed/llm_analysis.xlsx',
                                   sheet_name='应用领域排名')
        word_freq_df = pd.read_excel('data/processed/llm_analysis.xlsx',
                                    sheet_name='词频统计')

        # 创建可视化
        visualizer.create_comprehensive_visualization(processed_df, top_apps_df, word_freq_df)
        print("所有可视化图表生成完成！")

    except Exception as e:
        print(f"可视化过程中出现错误: {e}")
        print("请先运行 data_processor.py 生成数据")

if __name__ == "__main__":
    main()