You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

108 lines
4.1 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# -*- coding: utf-8 -*-
import os # 新增导入os模块
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import pandas as pd
import jieba
# 设置中文字体
plt.rcParams["font.family"] = ["SimHei", "WenQuanYi Micro Hei", "Heiti TC"]
class DanmakuVisualizer:
def __init__(self, danmu_path, excel_path):
self.danmu_path = danmu_path
self.excel_path = excel_path
self.danmu_text = ""
self.top8 = pd.DataFrame() # 初始化空DataFrame
# 读取弹幕数据(新增异常处理)
try:
if os.path.exists(danmu_path) and os.path.getsize(danmu_path) > 0:
with open(danmu_path, "r", encoding="utf-8") as f:
self.danmu_text = " ".join([line.strip() for line in f if line.strip()])
else:
print(f"警告:弹幕文件 {danmu_path} 不存在或为空")
except Exception as e:
print(f"读取弹幕文件失败:{e}")
# 读取Excel数据新增异常处理
try:
if os.path.exists(excel_path) and os.path.getsize(excel_path) > 0:
self.top8 = pd.read_excel(excel_path)
else:
print(f"警告:统计文件 {excel_path} 不存在或为空")
except Exception as e:
print(f"读取统计文件失败:{e}")
# 停用词
self.stop_words = {
"已三连", "求资料", "打卡", "不错", "很好", "牛逼", "卧槽",
"学习", "分享", "感谢", "点赞", "三连", "教程", "B站", "老师"
}
def generate_wordcloud(self):
"""生成词云图"""
if not self.danmu_text.strip():
print("警告:无有效弹幕数据,无法生成词云")
return
# 过滤停用词
filtered_words = [w for w in jieba.cut(self.danmu_text) if len(w) > 1 and w not in self.stop_words]
filtered_text = " ".join(filtered_words)
if not filtered_text:
print("警告:过滤后无有效词汇,无法生成词云")
return
# 生成词云(增强字体兼容性)
try:
# 多系统字体支持
font_paths = [
"C:/Windows/Fonts/msyh.ttc", # Windows
"/System/Library/Fonts/PingFang.ttc", # macOS
"/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc" # Linux
]
font_path = None
for path in font_paths:
if os.path.exists(path):
font_path = path
break
wc = WordCloud(
font_path=font_path,
width=1200, height=800,
background_color="white",
max_words=200
).generate(filtered_text)
# 保存词云图
wc.to_file("./data/wordcloud.png")
plt.figure(figsize=(12, 8))
plt.imshow(wc)
plt.axis("off")
plt.title("弹幕高频词云图")
plt.show()
print("词云图已保存至./data/wordcloud.png")
except Exception as e:
print(f"生成词云失败:{e}")
def plot_top8_bar(self):
"""绘制Top8高频词柱状图"""
# 新增:检查数据是否有效
if self.top8.empty or len(self.top8) < 8:
print("警告:统计数据不足,无法绘制柱状图")
return
try:
plt.figure(figsize=(10, 6))
plt.bar(self.top8["关键词"], self.top8["词频"], color="skyblue")
plt.title("弹幕Top8高频词")
plt.xlabel("关键词")
plt.ylabel("词频")
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("./data/top8_bar.png")
plt.show()
print("Top8柱状图已保存至./data/top8_bar.png")
except Exception as e:
print(f"绘制柱状图失败:{e}")