|
|
#!/usr/bin/env python3
|
|
|
# -*- coding: utf-8 -*-
|
|
|
import os
|
|
|
import sys
|
|
|
import matplotlib
|
|
|
import matplotlib.pyplot as plt
|
|
|
from wordcloud import WordCloud
|
|
|
from collections import Counter
|
|
|
|
|
|
|
|
|
# ---------------- 中文字体自动检测 ----------------
|
|
|
def find_chinese_font():
|
|
|
"""
|
|
|
自动查找可用中文字体路径。
|
|
|
支持 Windows / macOS / Linux。
|
|
|
"""
|
|
|
candidates = []
|
|
|
if sys.platform.startswith("win"):
|
|
|
candidates = [
|
|
|
r"C:\Windows\Fonts\msyh.ttc", # 微软雅黑
|
|
|
r"C:\Windows\Fonts\simhei.ttf", # 黑体
|
|
|
r"C:\Windows\Fonts\msyh.ttf",
|
|
|
]
|
|
|
elif sys.platform == "darwin": # macOS
|
|
|
candidates = [
|
|
|
"/System/Library/Fonts/PingFang.ttc",
|
|
|
"/System/Library/Fonts/STHeiti Medium.ttc",
|
|
|
"/Library/Fonts/Arial Unicode.ttf"
|
|
|
]
|
|
|
else: # Linux
|
|
|
candidates = [
|
|
|
"/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc",
|
|
|
"/usr/share/fonts/truetype/noto/NotoSansCJK-sc-Regular.otf",
|
|
|
"/usr/share/fonts/truetype/arphic/ukai.ttf",
|
|
|
"/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc",
|
|
|
"/usr/share/fonts/truetype/simhei.ttf"
|
|
|
]
|
|
|
|
|
|
for p in candidates:
|
|
|
if os.path.exists(p):
|
|
|
return p
|
|
|
return None
|
|
|
|
|
|
|
|
|
# ---------------- 中文字体配置修正版 ----------------
|
|
|
_ch_font = find_chinese_font()
|
|
|
if _ch_font:
|
|
|
from matplotlib import font_manager
|
|
|
font_prop = font_manager.FontProperties(fname=_ch_font)
|
|
|
font_name = font_prop.get_name() # 提取字体名称(matplotlib 不认路径)
|
|
|
matplotlib.rcParams['font.sans-serif'] = [font_name]
|
|
|
matplotlib.rcParams['axes.unicode_minus'] = False
|
|
|
print(f"✅ 已加载中文字体: {font_name}")
|
|
|
else:
|
|
|
print("⚠️ 未检测到系统中文字体。请安装 Noto Sans CJK / Microsoft YaHei / SimHei 等字体。")
|
|
|
font_prop = None
|
|
|
|
|
|
|
|
|
# ---------------- 可视化类 ----------------
|
|
|
class Visualizer:
|
|
|
"""数据可视化类"""
|
|
|
|
|
|
def __init__(self, output_dir="output"):
|
|
|
self.output_dir = output_dir
|
|
|
os.makedirs(self.output_dir, exist_ok=True)
|
|
|
self.wordcloud_font = _ch_font # WordCloud 使用路径
|
|
|
print(f"📊 可视化器初始化完成,输出目录: {self.output_dir}")
|
|
|
|
|
|
def generate_wordcloud(self, freq_counter):
|
|
|
"""生成词云图"""
|
|
|
print(f"🎨 生成词云图,关键词数量: {len(freq_counter)}")
|
|
|
if not self.wordcloud_font:
|
|
|
print("⚠️ 未找到中文字体,词云可能会乱码。")
|
|
|
|
|
|
wc = WordCloud(
|
|
|
font_path=self.wordcloud_font or None,
|
|
|
width=1200, height=800,
|
|
|
background_color="white",
|
|
|
max_words=200,
|
|
|
colormap="viridis"
|
|
|
)
|
|
|
wc.generate_from_frequencies(freq_counter)
|
|
|
path = os.path.join(self.output_dir, "wordcloud.png")
|
|
|
wc.to_file(path)
|
|
|
print(f"✅ 词云图已保存至: {path}")
|
|
|
|
|
|
def plot_top_applications(self, freq_counter, top_n=8):
|
|
|
"""生成前 N 高频关键词柱状图"""
|
|
|
top_items = freq_counter.most_common(top_n)
|
|
|
if not top_items:
|
|
|
print("⚠️ 无数据可绘制柱状图")
|
|
|
return
|
|
|
words, counts = zip(*top_items)
|
|
|
plt.figure(figsize=(10, 6))
|
|
|
plt.bar(words, counts, color="steelblue")
|
|
|
plt.xticks(rotation=30, fontsize=10)
|
|
|
plt.title(f"🏆 前{top_n}大AI相关高频词", fontsize=14)
|
|
|
plt.xlabel("关键词", fontsize=12)
|
|
|
plt.ylabel("出现次数", fontsize=12)
|
|
|
plt.tight_layout()
|
|
|
path = os.path.join(self.output_dir, "top8_apps.png")
|
|
|
plt.savefig(path, bbox_inches="tight", dpi=150)
|
|
|
plt.close()
|
|
|
print(f"✅ 柱状图已保存至: {path}")
|
|
|
|
|
|
def plot_pie_chart(self, freq_counter, top_n=6):
|
|
|
"""生成饼图"""
|
|
|
top_items = freq_counter.most_common(top_n)
|
|
|
if not top_items:
|
|
|
print("⚠️ 无数据生成饼图")
|
|
|
return
|
|
|
labels, values = zip(*top_items)
|
|
|
plt.figure(figsize=(7, 7))
|
|
|
plt.pie(values, labels=labels, autopct="%1.1f%%", startangle=140)
|
|
|
plt.title("🎯 热门关键词占比", fontsize=14)
|
|
|
path = os.path.join(self.output_dir, "pie_chart.png")
|
|
|
plt.savefig(path, bbox_inches="tight", dpi=150)
|
|
|
plt.close()
|
|
|
print(f"✅ 饼图已保存至: {path}")
|
|
|
|
|
|
def generate_dashboard(self, freq_counter):
|
|
|
"""整合词云 + 柱状 + 饼图"""
|
|
|
print("🖼️ 生成整合数据可视化大屏...")
|
|
|
top_items = freq_counter.most_common(15)
|
|
|
if not top_items:
|
|
|
print("⚠️ 无有效数据生成大屏")
|
|
|
return
|
|
|
words, counts = zip(*top_items)
|
|
|
|
|
|
wc = WordCloud(
|
|
|
font_path=self.wordcloud_font or None,
|
|
|
width=800, height=400,
|
|
|
background_color="white",
|
|
|
colormap="plasma"
|
|
|
).generate_from_frequencies(dict(top_items))
|
|
|
|
|
|
fig, axs = plt.subplots(1, 3, figsize=(18, 6))
|
|
|
|
|
|
axs[0].imshow(wc, interpolation="bilinear")
|
|
|
axs[0].set_title("🌈 热门关键词词云")
|
|
|
axs[0].axis("off")
|
|
|
|
|
|
axs[1].bar(words[:8], counts[:8], color="skyblue")
|
|
|
axs[1].set_title("🏆 Top 8 高频关键词")
|
|
|
axs[1].tick_params(axis='x', rotation=30)
|
|
|
|
|
|
axs[2].pie(counts[:6], labels=words[:6], autopct="%1.1f%%", startangle=150)
|
|
|
axs[2].set_title("🎯 热词占比")
|
|
|
|
|
|
dashboard_path = os.path.join(self.output_dir, "dashboard.png")
|
|
|
plt.tight_layout()
|
|
|
plt.savefig(dashboard_path, bbox_inches="tight", dpi=200)
|
|
|
plt.close()
|
|
|
print(f"✅ 可视化大屏已保存至: {dashboard_path}")
|