You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

70 lines
2.1 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

'''
生成词云图片
filePath词云所需的词语文本路径
maskImgPath蒙版图片路径如果无蒙版图片则置为''
saveImgPath保存图片的路径
width生成图片的横向尺寸
height生成图片的纵向尺寸
save是否保存图片
'''
import jieba
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from wordcloud import WordCloud, ImageColorGenerator
from sklearn.feature_extraction.text import TfidfVectorizer
# 将弹幕文本分隔成易于处理的字词
def ReadAndCutWords(filePath):
with open(filePath, 'r', encoding='utf-8') as file:
text = file.read()
words = jieba.cut(text, cut_all=False)
word_list = ' '.join(words)
return word_list
# 利用TF-IDF将字词按频率划分
def ChangeToFreq(word_list):
documents = [word_list]
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(documents)
feature_names = vectorizer.get_feature_names_out()
word_freq = dict(zip(feature_names, tfidf_matrix.toarray().sum(axis=0)))
return word_freq
# 根据字词频率来生成图云
def CreateWordCloud(word_freq, width, height, maskImgPath, saveImgPath, save=False):
if maskImgPath == '':
mask = None
else:
mask = np.array(Image.open(maskImgPath))
wordcloud = WordCloud(font_path='simhei.ttf',
mask= mask,
width=width,
height=height,
background_color='white').generate_from_frequencies(word_freq)
if maskImgPath != '':
image_colors = ImageColorGenerator(mask)
wordcloud.recolor(color_func=image_colors)
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()
if save:
wordcloud.to_file(saveImgPath)
def main():
filePath = './docs/allBarrage.txt'
maskImgPath = './docs/maskImg.png'
saveImgPath = './docs/wordCloud.png'
word_list = ReadAndCutWords(filePath)
word_freq = ChangeToFreq(word_list)
width = 1920
height = 1440
isSave = True
CreateWordCloud(word_freq, width, height, maskImgPath, saveImgPath, isSave)
if __name__ == '__main__':
main()