From ded7667d7807b008e4cfa844117c68f67483541a Mon Sep 17 00:00:00 2001 From: pjz5aqix4 <852383702@qq.com> Date: Thu, 31 Mar 2022 20:02:05 +0800 Subject: [PATCH] ADD file via upload --- main.py | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 main.py diff --git a/main.py b/main.py new file mode 100644 index 0000000..d36b4b5 --- /dev/null +++ b/main.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- +import codecs +import jieba#分词 +from collections import Counter#计数 +import glob +import numpy +#对于词云背景图案设置 +from wordcloud import WordCloud # 词云图相关 +from PIL import Image +import matplotlib.pyplot as plt +import matplotlib.colors as colors # 处理图片相关内容:自定义颜色 +#import imageio + +# #根据用户输入的序号打开相应的文件 +def get_txt(): + result = glob.glob('*.txt') + print(result) + + while True: + try: + a = result + i = int(input('请输入文件序号:')) + if i <= 0: + print("输入错误,请重新输入!") + continue + else: + with open(a[i - 1], 'r', encoding='utf-8') as f: + txt_content=f.read() + get_words(txt_content) + + except: + print('输入错误,请重新输入!') + else: + break + pass + + + +#统计词汇 +def get_words(txt): + list = jieba.cut(txt) #jieba.cut生成的是一个生成器,generator,也就是可以通过for循环来取里面的每一个词。 + c = Counter() #创建一个新的空counter(Counter 是实现的 dict 的一个子类,可以用来方便地计数。) + + for i in list: + if len(i) > 1 and i !='\r\n': + c[i] += 1 + #将统计好的词汇存入文件‘cut_result.txt'中 + with open('cut_result.txt','w',encoding='utf-8') as fw: + for(k,v) in c.most_common(): + fw.write(k + ' ' + str(v) + '\n') + fw.close() + ciyun("cut_result.txt") + +# 生成词云 +def ciyun(txt_name):#将jieba生成的文件传递进来 + f = open(txt_name, 'r', encoding='utf-8') + txt_content = f.read() + # 打开背景图片 + color_mask = numpy.array(Image.open('poop.png')) + # 自定义文字颜色 + colormaps = colors.ListedColormap(["#FFF0F5", "#E1FFFF", "#87CEEB", "#F0E68C", + "#E1FFFF"]) + # 生成词云(自定义样式) + w= WordCloud( + mask=color_mask, # 指定背景图形状 + colormap=colormaps, # 指定颜色 + # font_path='C:/Windows/Fonts/simkai.ttf', # 指定字体 + font_path='msyh.ttc', + background_color='black', # 指定背景颜色,默认黑色 + width=800, # 指定宽度 + height=800, # 指定高度 + contour_width=3, # 轮廓线条宽度 + contour_color='steelblue' # 轮廓颜色 + ).generate(txt_content) + # 显示词云 + plt.imshow(w) + plt.axis('off') + plt.show() + # 生成词云图片 + w.to_file('wordcloud.png') + + + +if __name__=='__main__': + get_txt()