You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
36 lines
1.1 KiB
36 lines
1.1 KiB
import jieba
|
|
import nltk
|
|
import numpy as np
|
|
import PIL.Image as image
|
|
from nltk.corpus import stopwords
|
|
from wordcloud import wordcloud
|
|
|
|
|
|
def get_wordcloud(file_name):
|
|
nltk.download('stopwords')
|
|
f = open(file_name, encoding='utf-8')
|
|
txt = f.read()
|
|
txt_list = jieba.lcut(txt)
|
|
string = ' '.join(txt_list)
|
|
mask_image = "2.png"
|
|
mask = np.array(image.open(mask_image))
|
|
stopwords_list = set(stopwords.words('chinese'))
|
|
stopwords_target = ['都', '不', '好', '哈哈哈', '说', '还', '很', '没']
|
|
for i in stopwords_target:
|
|
stopwords_list.add(i)
|
|
w = wordcloud.WordCloud(
|
|
mask=mask,
|
|
width=mask.shape[1],
|
|
height=mask.shape[0],
|
|
background_color='white',
|
|
font_path='C:/Windows/Fonts/STLITI.TTF',
|
|
stopwords=stopwords_list,
|
|
)
|
|
|
|
w.generate(string)
|
|
# 打印词云图片
|
|
w.to_file('wordcloud.jpg')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
get_wordcloud('barrage.csv') |