You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

67 lines
2.0 KiB

'''
生成词云图片
filePath词云所需的词语文本路径
maskImgPath蒙版图片路径如果无蒙版图片则置为''
saveImgPath保存图片的路径
width生成图片的横向尺寸
height生成图片的纵向尺寸
save是否保存图片
'''
import jieba
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from wordcloud import WordCloud, ImageColorGenerator
from sklearn.feature_extraction.text import TfidfVectorizer
def ReadAndCutWords(filePath):
with open(filePath, 'r', encoding='utf-8') as file:
text = file.read()
words = jieba.cut(text, cut_all=False)
word_list = ' '.join(words)
return word_list
def ChangeToFreq(word_list):
documents = [word_list]
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(documents)
feature_names = vectorizer.get_feature_names_out()
word_freq = dict(zip(feature_names, tfidf_matrix.toarray().sum(axis=0)))
return word_freq
def CreateWordCloud(word_freq, width, height, maskImgPath, saveImgPath, save=False):
if maskImgPath == '':
mask = None
else:
mask = np.array(Image.open(maskImgPath))
wordcloud = WordCloud(font_path='simhei.ttf',
mask= mask,
width=width,
height=height,
background_color='white').generate_from_frequencies(word_freq)
if maskImgPath != '':
image_colors = ImageColorGenerator(mask)
wordcloud.recolor(color_func=image_colors)
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()
if save:
wordcloud.to_file(saveImgPath)
def main():
filePath = './docs/allBarrage.txt'
maskImgPath = './docs/maskImg.png'
saveImgPath = './docs/wordCloud.png'
word_list = ReadAndCutWords(filePath)
word_freq = ChangeToFreq(word_list)
width = 1920
height = 1440
isSave = True
CreateWordCloud(word_freq, width, height, maskImgPath, saveImgPath, isSave)
if __name__ == '__main__':
main()