You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
102301535/visualizer.py

164 lines
6.4 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
from wordcloud import WordCloud
import pandas as pd
import numpy as np
from collections import Counter
import os
class Visualizer:
def __init__(self):
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False
self.font_path = self.find_chinese_font()
def find_chinese_font(self):
"""寻找中文字体"""
try:
# 尝试常见的中文字体路径
font_paths = [
'C:/Windows/Fonts/simhei.ttf', # Windows
'/System/Library/Fonts/PingFang.ttc', # macOS
'/usr/share/fonts/truetype/droid/DroidSansFallbackFull.ttf' # Linux
]
for font_path in font_paths:
if os.path.exists(font_path):
return font_path
# 如果找不到使用matplotlib默认字体
return None
except:
return None
def create_wordcloud(self, word_freq_df: pd.DataFrame, save_path: str):
"""创建词云图"""
# 创建词频字典
word_freq = dict(zip(word_freq_df['词语'], word_freq_df['频次']))
# 创建词云
wc_config = {
'width': 1200,
'height': 800,
'background_color': 'white',
'colormap': 'viridis',
'max_words': 100,
'relative_scaling': 0.5
}
if self.font_path:
wc_config['font_path'] = self.font_path
wc = WordCloud(**wc_config)
wordcloud = wc.generate_from_frequencies(word_freq)
# 绘制词云
plt.figure(figsize=(15, 10))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.title('大语言模型应用弹幕词云分析', fontsize=20, pad=20)
plt.tight_layout()
# 确保目录存在
os.makedirs('visualization', exist_ok=True)
plt.savefig(save_path, dpi=300, bbox_inches='tight',
facecolor='white', edgecolor='none')
plt.show()
print(f"词云图已保存到: {save_path}")
def plot_applications_bar(self, top_apps_df: pd.DataFrame, save_path: str):
"""绘制应用领域条形图"""
plt.figure(figsize=(12, 8))
colors = plt.cm.Set3(np.linspace(0, 1, len(top_apps_df)))
bars = plt.barh(top_apps_df['应用领域'], top_apps_df['出现次数'],
color=colors, edgecolor='black', alpha=0.8)
# 添加数据标签
for bar in bars:
width = bar.get_width()
plt.text(width + 0.1, bar.get_y() + bar.get_height()/2,
f'{int(width)}', ha='left', va='center', fontsize=12)
plt.xlabel('出现次数', fontsize=14)
plt.title('大语言模型应用领域分布Top 8', fontsize=16, pad=20)
plt.grid(axis='x', alpha=0.3)
plt.tight_layout()
plt.savefig(save_path, dpi=300, bbox_inches='tight')
plt.show()
print(f"应用领域分布图已保存到: {save_path}")
def plot_sentiment_analysis(self, processed_df: pd.DataFrame, save_path: str):
"""绘制情感分析图"""
# 简单的情感关键词分类
positive_words = ['', '', '', '厉害', '方便', '高效', '智能', '强大', '优秀', '推荐']
negative_words = ['', '', '问题', '担心', '风险', '', '', '复杂', '取代', '改进']
sentiment_counts = {'积极': 0, '消极': 0, '中性': 0}
for danmu in processed_df['original_danmu']:
positive_count = sum(1 for word in positive_words if word in danmu)
negative_count = sum(1 for word in negative_words if word in danmu)
if positive_count > negative_count:
sentiment_counts['积极'] += 1
elif negative_count > positive_count:
sentiment_counts['消极'] += 1
else:
sentiment_counts['中性'] += 1
# 绘制饼图
plt.figure(figsize=(10, 8))
colors = ['#ff9999', '#66b3ff', '#99ff99']
plt.pie(sentiment_counts.values(), labels=sentiment_counts.keys(),
autopct='%1.1f%%', colors=colors, startangle=90,
explode=(0.1, 0, 0)) # 突出显示积极评价
plt.title('弹幕情感倾向分布', fontsize=16)
plt.savefig(save_path, dpi=300, bbox_inches='tight')
plt.show()
print(f"情感分析图已保存到: {save_path}")
def create_comprehensive_visualization(self, processed_df: pd.DataFrame,
top_apps_df: pd.DataFrame,
word_freq_df: pd.DataFrame):
"""创建综合可视化"""
# 确保可视化目录存在
os.makedirs('visualization', exist_ok=True)
# 1. 词云图
self.create_wordcloud(word_freq_df, 'visualization/wordcloud.png')
# 2. 应用领域分布
self.plot_applications_bar(top_apps_df, 'visualization/applications_distribution.png')
# 3. 情感倾向分析
self.plot_sentiment_analysis(processed_df, 'visualization/sentiment_analysis.png')
def main():
visualizer = Visualizer()
try:
# 加载处理后的数据
processed_df = pd.read_excel('data/processed/llm_analysis.xlsx',
sheet_name='弹幕数据')
top_apps_df = pd.read_excel('data/processed/llm_analysis.xlsx',
sheet_name='应用领域排名')
word_freq_df = pd.read_excel('data/processed/llm_analysis.xlsx',
sheet_name='词频统计')
# 创建可视化
visualizer.create_comprehensive_visualization(processed_df, top_apps_df, word_freq_df)
print("所有可视化图表生成完成!")
except Exception as e:
print(f"可视化过程中出现错误: {e}")
print("请先运行 data_processor.py 生成数据")
if __name__ == "__main__":
main()