You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

86 lines
2.7 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# -*- coding: utf-8 -*-
import codecs
import jieba#分词
from collections import Counter#计数
import glob
import numpy
#对于词云背景图案设置
from wordcloud import WordCloud # 词云图相关
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.colors as colors # 处理图片相关内容:自定义颜色
#import imageio
# #根据用户输入的序号打开相应的文件
def get_txt():
result = glob.glob('*.txt')
print(result)
while True:
try:
a = result
i = int(input('请输入文件序号:'))
if i <= 0:
print("输入错误,请重新输入!")
continue
else:
with open(a[i - 1], 'r', encoding='utf-8') as f:
txt_content=f.read()
get_words(txt_content)
except:
print('输入错误,请重新输入!')
else:
break
pass
#统计词汇
def get_words(txt):
list = jieba.cut(txt) #jieba.cut生成的是一个生成器generator也就是可以通过for循环来取里面的每一个词。
c = Counter() #创建一个新的空counter(Counter 是实现的 dict 的一个子类,可以用来方便地计数。)
for i in list:
if len(i) > 1 and i !='\r\n':
c[i] += 1
#将统计好的词汇存入文件cut_result.txt'中
with open('cut_result.txt','w',encoding='utf-8') as fw:
for(k,v) in c.most_common():
fw.write(k + ' ' + str(v) + '\n')
fw.close()
ciyun("cut_result.txt")
# 生成词云
def ciyun(txt_name):#将jieba生成的文件传递进来
f = open(txt_name, 'r', encoding='utf-8')
txt_content = f.read()
# 打开背景图片
color_mask = numpy.array(Image.open('poop.png'))
# 自定义文字颜色
colormaps = colors.ListedColormap(["#FFF0F5", "#E1FFFF", "#87CEEB", "#F0E68C",
"#E1FFFF"])
# 生成词云(自定义样式)
w= WordCloud(
mask=color_mask, # 指定背景图形状
colormap=colormaps, # 指定颜色
# font_path='C:/Windows/Fonts/simkai.ttf', # 指定字体
font_path='msyh.ttc',
background_color='black', # 指定背景颜色,默认黑色
width=800, # 指定宽度
height=800, # 指定高度
contour_width=3, # 轮廓线条宽度
contour_color='steelblue' # 轮廓颜色
).generate(txt_content)
# 显示词云
plt.imshow(w)
plt.axis('off')
plt.show()
# 生成词云图片
w.to_file('wordcloud.png')
if __name__=='__main__':
get_txt()