You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

155 lines
5.6 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import sys
import matplotlib
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from collections import Counter
# ---------------- 中文字体自动检测 ----------------
def find_chinese_font():
"""
自动查找可用中文字体路径。
支持 Windows / macOS / Linux。
"""
candidates = []
if sys.platform.startswith("win"):
candidates = [
r"C:\Windows\Fonts\msyh.ttc", # 微软雅黑
r"C:\Windows\Fonts\simhei.ttf", # 黑体
r"C:\Windows\Fonts\msyh.ttf",
]
elif sys.platform == "darwin": # macOS
candidates = [
"/System/Library/Fonts/PingFang.ttc",
"/System/Library/Fonts/STHeiti Medium.ttc",
"/Library/Fonts/Arial Unicode.ttf"
]
else: # Linux
candidates = [
"/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc",
"/usr/share/fonts/truetype/noto/NotoSansCJK-sc-Regular.otf",
"/usr/share/fonts/truetype/arphic/ukai.ttf",
"/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc",
"/usr/share/fonts/truetype/simhei.ttf"
]
for p in candidates:
if os.path.exists(p):
return p
return None
# ---------------- 中文字体配置修正版 ----------------
_ch_font = find_chinese_font()
if _ch_font:
from matplotlib import font_manager
font_prop = font_manager.FontProperties(fname=_ch_font)
font_name = font_prop.get_name() # 提取字体名称matplotlib 不认路径)
matplotlib.rcParams['font.sans-serif'] = [font_name]
matplotlib.rcParams['axes.unicode_minus'] = False
print(f"✅ 已加载中文字体: {font_name}")
else:
print("⚠️ 未检测到系统中文字体。请安装 Noto Sans CJK / Microsoft YaHei / SimHei 等字体。")
font_prop = None
# ---------------- 可视化类 ----------------
class Visualizer:
"""数据可视化类"""
def __init__(self, output_dir="output"):
self.output_dir = output_dir
os.makedirs(self.output_dir, exist_ok=True)
self.wordcloud_font = _ch_font # WordCloud 使用路径
print(f"📊 可视化器初始化完成,输出目录: {self.output_dir}")
def generate_wordcloud(self, freq_counter):
"""生成词云图"""
print(f"🎨 生成词云图,关键词数量: {len(freq_counter)}")
if not self.wordcloud_font:
print("⚠️ 未找到中文字体,词云可能会乱码。")
wc = WordCloud(
font_path=self.wordcloud_font or None,
width=1200, height=800,
background_color="white",
max_words=200,
colormap="viridis"
)
wc.generate_from_frequencies(freq_counter)
path = os.path.join(self.output_dir, "wordcloud.png")
wc.to_file(path)
print(f"✅ 词云图已保存至: {path}")
def plot_top_applications(self, freq_counter, top_n=8):
"""生成前 N 高频关键词柱状图"""
top_items = freq_counter.most_common(top_n)
if not top_items:
print("⚠️ 无数据可绘制柱状图")
return
words, counts = zip(*top_items)
plt.figure(figsize=(10, 6))
plt.bar(words, counts, color="steelblue")
plt.xticks(rotation=30, fontsize=10)
plt.title(f"🏆 前{top_n}大AI相关高频词", fontsize=14)
plt.xlabel("关键词", fontsize=12)
plt.ylabel("出现次数", fontsize=12)
plt.tight_layout()
path = os.path.join(self.output_dir, "top8_apps.png")
plt.savefig(path, bbox_inches="tight", dpi=150)
plt.close()
print(f"✅ 柱状图已保存至: {path}")
def plot_pie_chart(self, freq_counter, top_n=6):
"""生成饼图"""
top_items = freq_counter.most_common(top_n)
if not top_items:
print("⚠️ 无数据生成饼图")
return
labels, values = zip(*top_items)
plt.figure(figsize=(7, 7))
plt.pie(values, labels=labels, autopct="%1.1f%%", startangle=140)
plt.title("🎯 热门关键词占比", fontsize=14)
path = os.path.join(self.output_dir, "pie_chart.png")
plt.savefig(path, bbox_inches="tight", dpi=150)
plt.close()
print(f"✅ 饼图已保存至: {path}")
def generate_dashboard(self, freq_counter):
"""整合词云 + 柱状 + 饼图"""
print("🖼️ 生成整合数据可视化大屏...")
top_items = freq_counter.most_common(15)
if not top_items:
print("⚠️ 无有效数据生成大屏")
return
words, counts = zip(*top_items)
wc = WordCloud(
font_path=self.wordcloud_font or None,
width=800, height=400,
background_color="white",
colormap="plasma"
).generate_from_frequencies(dict(top_items))
fig, axs = plt.subplots(1, 3, figsize=(18, 6))
axs[0].imshow(wc, interpolation="bilinear")
axs[0].set_title("🌈 热门关键词词云")
axs[0].axis("off")
axs[1].bar(words[:8], counts[:8], color="skyblue")
axs[1].set_title("🏆 Top 8 高频关键词")
axs[1].tick_params(axis='x', rotation=30)
axs[2].pie(counts[:6], labels=words[:6], autopct="%1.1f%%", startangle=150)
axs[2].set_title("🎯 热词占比")
dashboard_path = os.path.join(self.output_dir, "dashboard.png")
plt.tight_layout()
plt.savefig(dashboard_path, bbox_inches="tight", dpi=200)
plt.close()
print(f"✅ 可视化大屏已保存至: {dashboard_path}")