You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

70 lines
2.1 KiB

'''
生成词云图片
filePath词云所需的词语文本路径
maskImgPath蒙版图片路径如果无蒙版图片则置为''
saveImgPath保存图片的路径
width生成图片的横向尺寸
height生成图片的纵向尺寸
save是否保存图片
'''
import jieba
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from wordcloud import WordCloud, ImageColorGenerator
from sklearn.feature_extraction.text import TfidfVectorizer
2 months ago
# 将弹幕文本分隔成易于处理的字词
def ReadAndCutWords(filePath):
with open(filePath, 'r', encoding='utf-8') as file:
text = file.read()
words = jieba.cut(text, cut_all=False)
word_list = ' '.join(words)
return word_list
2 months ago
# 利用TF-IDF将字词按频率划分
def ChangeToFreq(word_list):
documents = [word_list]
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(documents)
feature_names = vectorizer.get_feature_names_out()
word_freq = dict(zip(feature_names, tfidf_matrix.toarray().sum(axis=0)))
return word_freq
2 months ago
# 根据字词频率来生成图云
def CreateWordCloud(word_freq, width, height, maskImgPath, saveImgPath, save=False):
if maskImgPath == '':
mask = None
else:
mask = np.array(Image.open(maskImgPath))
wordcloud = WordCloud(font_path='simhei.ttf',
mask= mask,
width=width,
height=height,
background_color='white').generate_from_frequencies(word_freq)
if maskImgPath != '':
image_colors = ImageColorGenerator(mask)
wordcloud.recolor(color_func=image_colors)
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()
if save:
wordcloud.to_file(saveImgPath)
def main():
filePath = './docs/allBarrage.txt'
maskImgPath = './docs/maskImg.png'
saveImgPath = './docs/wordCloud.png'
word_list = ReadAndCutWords(filePath)
word_freq = ChangeToFreq(word_list)
width = 1920
height = 1440
isSave = True
CreateWordCloud(word_freq, width, height, maskImgPath, saveImgPath, isSave)
if __name__ == '__main__':
main()