diff --git a/test.py b/test.py new file mode 100644 index 0000000..dbaf348 --- /dev/null +++ b/test.py @@ -0,0 +1,32 @@ +import jieba +import matplotlib.pyplot as plt +import wordcloud + +txt=open("D:\文本素材\西游记.txt","r",encoding='utf-8').read() + +word = jieba.lcut(txt) +data ={} +for ch in word: + if len(ch)==1: + continue + elif ch=="行者"or ch=="悟空": + ch ="孙悟空" + elif ch=="三藏": + ch="唐僧" + elif ch=="八戒": + ch ="猪八戒" + else: + ch =ch + data[ch] = data.get(ch,0)+1 +item = sorted(data.items(),key=lambda x:x[1],reverse=True) +print(item) +text ="" +for i in item: + text+=" "+i[0] +w =wordcloud.WordCloud(font_path="msyh.ttc",width=400,height=200,background_color="white",max_words=50) +w.generate(text) +plt.imshow(w) +plt.axis('off') +plt.show() +w.to_file("threekingdoms.png") +