''' 生成词云图片 filePath词云所需的词语文本路径 maskImgPath蒙版图片路径,如果无蒙版图片则置为'' saveImgPath保存图片的路径 width生成图片的横向尺寸 height生成图片的纵向尺寸 save是否保存图片 ''' import jieba import numpy as np import matplotlib.pyplot as plt from PIL import Image from wordcloud import WordCloud, ImageColorGenerator from sklearn.feature_extraction.text import TfidfVectorizer # 将弹幕文本分隔成易于处理的字词 def ReadAndCutWords(filePath): with open(filePath, 'r', encoding='utf-8') as file: text = file.read() words = jieba.cut(text, cut_all=False) word_list = ' '.join(words) return word_list # 利用TF-IDF将字词按频率划分 def ChangeToFreq(word_list): documents = [word_list] vectorizer = TfidfVectorizer() tfidf_matrix = vectorizer.fit_transform(documents) feature_names = vectorizer.get_feature_names_out() word_freq = dict(zip(feature_names, tfidf_matrix.toarray().sum(axis=0))) return word_freq # 根据字词频率来生成图云 def CreateWordCloud(word_freq, width, height, maskImgPath, saveImgPath, save=False): if maskImgPath == '': mask = None else: mask = np.array(Image.open(maskImgPath)) wordcloud = WordCloud(font_path='simhei.ttf', mask= mask, width=width, height=height, background_color='white').generate_from_frequencies(word_freq) if maskImgPath != '': image_colors = ImageColorGenerator(mask) wordcloud.recolor(color_func=image_colors) plt.imshow(wordcloud, interpolation='bilinear') plt.axis('off') plt.show() if save: wordcloud.to_file(saveImgPath) def main(): filePath = './docs/allBarrage.txt' maskImgPath = './docs/maskImg.png' saveImgPath = './docs/wordCloud.png' word_list = ReadAndCutWords(filePath) word_freq = ChangeToFreq(word_list) width = 1920 height = 1440 isSave = True CreateWordCloud(word_freq, width, height, maskImgPath, saveImgPath, isSave) if __name__ == '__main__': main()