|
|
|
@ -0,0 +1,85 @@
|
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
import codecs
|
|
|
|
|
import jieba#分词
|
|
|
|
|
from collections import Counter#计数
|
|
|
|
|
import glob
|
|
|
|
|
import numpy
|
|
|
|
|
#对于词云背景图案设置
|
|
|
|
|
from wordcloud import WordCloud # 词云图相关
|
|
|
|
|
from PIL import Image
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
import matplotlib.colors as colors # 处理图片相关内容:自定义颜色
|
|
|
|
|
#import imageio
|
|
|
|
|
|
|
|
|
|
# #根据用户输入的序号打开相应的文件
|
|
|
|
|
def get_txt():
|
|
|
|
|
result = glob.glob('*.txt')
|
|
|
|
|
print(result)
|
|
|
|
|
|
|
|
|
|
while True:
|
|
|
|
|
try:
|
|
|
|
|
a = result
|
|
|
|
|
i = int(input('请输入文件序号:'))
|
|
|
|
|
if i <= 0:
|
|
|
|
|
print("输入错误,请重新输入!")
|
|
|
|
|
continue
|
|
|
|
|
else:
|
|
|
|
|
with open(a[i - 1], 'r', encoding='utf-8') as f:
|
|
|
|
|
txt_content=f.read()
|
|
|
|
|
get_words(txt_content)
|
|
|
|
|
|
|
|
|
|
except:
|
|
|
|
|
print('输入错误,请重新输入!')
|
|
|
|
|
else:
|
|
|
|
|
break
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#统计词汇
|
|
|
|
|
def get_words(txt):
|
|
|
|
|
list = jieba.cut(txt) #jieba.cut生成的是一个生成器,generator,也就是可以通过for循环来取里面的每一个词。
|
|
|
|
|
c = Counter() #创建一个新的空counter(Counter 是实现的 dict 的一个子类,可以用来方便地计数。)
|
|
|
|
|
|
|
|
|
|
for i in list:
|
|
|
|
|
if len(i) > 1 and i !='\r\n':
|
|
|
|
|
c[i] += 1
|
|
|
|
|
#将统计好的词汇存入文件‘cut_result.txt'中
|
|
|
|
|
with open('cut_result.txt','w',encoding='utf-8') as fw:
|
|
|
|
|
for(k,v) in c.most_common():
|
|
|
|
|
fw.write(k + ' ' + str(v) + '\n')
|
|
|
|
|
fw.close()
|
|
|
|
|
ciyun("cut_result.txt")
|
|
|
|
|
|
|
|
|
|
# 生成词云
|
|
|
|
|
def ciyun(txt_name):#将jieba生成的文件传递进来
|
|
|
|
|
f = open(txt_name, 'r', encoding='utf-8')
|
|
|
|
|
txt_content = f.read()
|
|
|
|
|
# 打开背景图片
|
|
|
|
|
color_mask = numpy.array(Image.open('poop.png'))
|
|
|
|
|
# 自定义文字颜色
|
|
|
|
|
colormaps = colors.ListedColormap(["#FFF0F5", "#E1FFFF", "#87CEEB", "#F0E68C",
|
|
|
|
|
"#E1FFFF"])
|
|
|
|
|
# 生成词云(自定义样式)
|
|
|
|
|
w= WordCloud(
|
|
|
|
|
mask=color_mask, # 指定背景图形状
|
|
|
|
|
colormap=colormaps, # 指定颜色
|
|
|
|
|
# font_path='C:/Windows/Fonts/simkai.ttf', # 指定字体
|
|
|
|
|
font_path='msyh.ttc',
|
|
|
|
|
background_color='black', # 指定背景颜色,默认黑色
|
|
|
|
|
width=800, # 指定宽度
|
|
|
|
|
height=800, # 指定高度
|
|
|
|
|
contour_width=3, # 轮廓线条宽度
|
|
|
|
|
contour_color='steelblue' # 轮廓颜色
|
|
|
|
|
).generate(txt_content)
|
|
|
|
|
# 显示词云
|
|
|
|
|
plt.imshow(w)
|
|
|
|
|
plt.axis('off')
|
|
|
|
|
plt.show()
|
|
|
|
|
# 生成词云图片
|
|
|
|
|
w.to_file('wordcloud.png')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__=='__main__':
|
|
|
|
|
get_txt()
|