You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

36 lines
1.1 KiB

import jieba
import nltk
import numpy as np
import PIL.Image as image
from nltk.corpus import stopwords
from wordcloud import wordcloud
def get_wordcloud(file_name):
nltk.download('stopwords')
f = open(file_name, encoding='utf-8')
txt = f.read()
txt_list = jieba.lcut(txt)
string = ' '.join(txt_list)
mask_image = "2.png"
mask = np.array(image.open(mask_image))
stopwords_list = set(stopwords.words('chinese'))
stopwords_target = ['', '', '', '哈哈哈', '', '', '', '']
for i in stopwords_target:
stopwords_list.add(i)
w = wordcloud.WordCloud(
mask=mask,
width=mask.shape[1],
height=mask.shape[0],
background_color='white',
font_path='C:/Windows/Fonts/STLITI.TTF',
stopwords=stopwords_list,
)
w.generate(string)
# 打印词云图片
w.to_file('wordcloud.jpg')
if __name__ == '__main__':
get_wordcloud('barrage.csv')