diff --git a/词云图.py b/词云图.py new file mode 100644 index 0000000..60a743b --- /dev/null +++ b/词云图.py @@ -0,0 +1,36 @@ +import jieba +from PIL import Image +from wordcloud import WordCloud +import numpy as np +import matplotlib.pyplot as plt +#导入文本内容,删除换行符和空格 +text = open(r"弹幕.txt",encoding='utf-8').read() +text = text.replace('\n',"").replace("\u3000","") + +#将其分成一个个词 +text_cut = jieba.lcut(text) + +#将分好的词用某个符号分隔开连成字符串 +text_cut = [word for word in text_cut if len(word) > 1] +text_cut = ' '.join(text_cut) + +#停用词 +#stop_words = set(["的","了","是","和",'这','吧','不','看','啊','我','就','也','那','我','还','吗','你','都']) +#读取背景图 +background_image = np.array(Image.open('trophy.png')) + +word_cloud = WordCloud(font_path="simsun.ttc", + background_color = "white", + mask=background_image, + collocations = False,#防止词云图出现重复单词 + #stopwords=stop_words, + max_font_size=100, + max_words=200, + min_font_size=10, + colormap='viridis' + ) + +word_cloud.generate(text_cut) +image = word_cloud.to_image() +image.show() +word_cloud.to_file("词云图.png") \ No newline at end of file