|
|
import os
|
|
|
import jieba
|
|
|
from wordcloud import WordCloud
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
def generate_wordcloud(directory, output_file):
|
|
|
"""
|
|
|
生成普通词云图。
|
|
|
"""
|
|
|
text = ""
|
|
|
for filename in os.listdir(directory):
|
|
|
if filename.endswith('.txt'):
|
|
|
with open(os.path.join(directory, filename), 'r', encoding='utf-8') as file:
|
|
|
text += file.read()
|
|
|
|
|
|
words = jieba.cut(text)
|
|
|
|
|
|
stop_words = set([
|
|
|
"我", "你", "他", "她", "它", "是", "的", "了", "在", "吗", "啊", "吧",
|
|
|
"也", "有", "这", "那", "从", "为", "上", "下", "和", "与", "就", "不",
|
|
|
"中", "还", "要", "会", "能", "对", "着", "个", "把", "所以", "但", "也",
|
|
|
"所以", "从", "如", "她", "他", "它", "还", "也", "吗", "啊", "哦", "?", "!", ",", "。"
|
|
|
])
|
|
|
|
|
|
filtered_words = [word for word in words if word.strip() and word not in stop_words]
|
|
|
|
|
|
word_freq = {}
|
|
|
for word in filtered_words:
|
|
|
word_freq[word] = word_freq.get(word, 0) + 1
|
|
|
|
|
|
wordcloud = WordCloud(font_path='simsun.ttc', width=800, height=400, background_color='white').generate_from_frequencies(word_freq)
|
|
|
|
|
|
plt.figure(figsize=(10, 5))
|
|
|
plt.imshow(wordcloud, interpolation='bilinear')
|
|
|
plt.axis("off")
|
|
|
plt.savefig(output_file)
|
|
|
plt.close()
|
|
|
|
|
|
def generate_trophy_wordcloud(directory, output_file):
|
|
|
"""
|
|
|
生成奖杯词云图。
|
|
|
"""
|
|
|
from wordcloud import WordCloud, STOPWORDS
|
|
|
import matplotlib.pyplot as plt
|
|
|
import numpy as np
|
|
|
import jieba.posseg as pseg
|
|
|
from collections import Counter
|
|
|
from PIL import Image
|
|
|
from matplotlib import colors
|
|
|
|
|
|
text = ""
|
|
|
for filename in os.listdir(directory):
|
|
|
if filename.endswith('.txt'):
|
|
|
with open(os.path.join(directory, filename), 'r', encoding='utf-8') as file:
|
|
|
text += file.read()
|
|
|
|
|
|
words = pseg.cut(text)
|
|
|
report_words = [word for word, flag in words if (len(word) >= 2) and ('n' in flag)]
|
|
|
|
|
|
result = Counter(report_words).most_common(300)
|
|
|
content = dict(result)
|
|
|
|
|
|
stopwords = set(STOPWORDS)
|
|
|
stopwords.update(["我", "你", "他", "她", "它", "是", "的", "了", "在", "吗", "啊", "吧",
|
|
|
"也", "有", "这", "那", "从", "为", "上", "下", "和", "与", "就", "不",
|
|
|
"中", "还", "要", "会", "能", "对", "着", "个", "把", "所以", "但", "也",
|
|
|
"所以", "从", "如", "她", "他", "它", "还", "也", "吗", "啊", "哦", "?", "!", ",", "。"])
|
|
|
|
|
|
background = Image.open("E:/前端/奖杯4.png").convert('RGB')
|
|
|
mask = np.array(background)
|
|
|
|
|
|
font_path = r"C:\Windows\Fonts\STLITI.TTF"
|
|
|
|
|
|
max_font_size = 100
|
|
|
min_font_size = 10
|
|
|
color_list = ['#FF274B']
|
|
|
colormap = colors.ListedColormap(color_list)
|
|
|
|
|
|
wordcloud = WordCloud(scale=4, font_path=font_path, colormap=colormap, width=1600, height=900, background_color='white', stopwords=stopwords, mask=mask, max_font_size=max_font_size, min_font_size=min_font_size).generate_from_frequencies(content)
|
|
|
|
|
|
plt.imshow(wordcloud, interpolation='bilinear')
|
|
|
plt.axis('off')
|
|
|
plt.savefig(output_file)
|
|
|
plt.close() |