Delete 'wordcloud_generator.py'

main
fzu102301136 5 months ago
parent e2b102b47f
commit 8c2c9499c7

@ -1,79 +0,0 @@
import jieba
from wordcloud import WordCloud
import matplotlib.pyplot as plt
# 设置中文字体
plt.rcParams["font.family"] = ["SimHei"]
class WordCloudGenerator:
def __init__(self, stopwords=None):
"""
初始化词云生成器
:param stopwords: 停用词集合若为None则使用默认停用词
"""
self.stopwords = stopwords if stopwords is not None else self._get_default_stopwords()
def _get_default_stopwords(self):
"""获取默认停用词"""
return set(["", "", "", "", "", "", "", "", "", "", "", "",
"一个", "", "", "", "", "", "", "", "", "", "",
"没有", "", "", "自己", ""])
def load_stopwords_from_file(self, file_path="stopwords.txt"):
"""从文件加载停用词"""
try:
with open(file_path, "r", encoding="utf-8") as f:
self.stopwords = set([line.strip() for line in f.readlines()])
print(f"成功从{file_path}加载停用词")
except Exception as e:
print(f"加载停用词文件失败: {e},将使用默认停用词")
self.stopwords = self._get_default_stopwords()
return self
def generate_from_texts(self, texts, filename="词云图.png", font_path="simhei.ttf",
width=1200, height=800, max_words=200):
"""
从文本列表生成词云图
:param texts: 文本列表
:param filename: 保存的文件名
:param font_path: 字体文件路径
:param width: 词云图宽度
:param height: 词云图高度
:param max_words: 最大词数
"""
if not texts:
print("没有可用于生成词云的文本数据")
return
# 合并文本并分词
all_text = " ".join(texts)
words = jieba.cut(all_text)
# 过滤停用词和短词
filtered_words = [word for word in words if word not in self.stopwords and len(word) > 1]
processed_text = " ".join(filtered_words)
if not processed_text:
print("处理后的文本为空,无法生成词云")
return
# 生成词云
wc = WordCloud(
font_path=font_path,
background_color="white",
width=width,
height=height,
max_words=max_words,
collocations=False
).generate(processed_text)
# 显示并保存词云
plt.figure(figsize=(width/100, height/100))
plt.imshow(wc, interpolation="bilinear")
plt.axis("off")
plt.tight_layout()
plt.savefig(filename, dpi=300)
plt.show()
print(f"词云图已保存到{filename}")
return wc
Loading…
Cancel
Save