|
|
|
@ -0,0 +1,93 @@
|
|
|
|
|
import os
|
|
|
|
|
import jieba
|
|
|
|
|
import wordcloud
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def Visitdir(path:object): #函数名称:Visitdir 函数作用: 检索传入路径下的所有目录并打印出文件基本名
|
|
|
|
|
list_dir = os.walk(path) #形参类型:object 意义:需要检索的路径
|
|
|
|
|
dict_dir = {}
|
|
|
|
|
file_num = 1
|
|
|
|
|
for root, dirs, files in list_dir:
|
|
|
|
|
for f in files:
|
|
|
|
|
print('{0}---{1}'.format(file_num, f))
|
|
|
|
|
dict_dir[file_num] = f
|
|
|
|
|
file_num += 1
|
|
|
|
|
'''for i in dict_dir.keys():
|
|
|
|
|
print(i,dict_dir[i])'''
|
|
|
|
|
return dict_dir #返回值存储文件名以及文件编号的字典,key为file_num即为文件编号,而value则为文件名
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def Operation_file(file_name:object): #函数名称:Operation_file 函数作用:对文件进行操作,使其便于读取文字
|
|
|
|
|
'''with open('D://Python Test//Test1//{}'.format(file_name), 'r', encoding='utf-8') as fp:
|
|
|
|
|
dict_word = {}
|
|
|
|
|
fs = fp.readlines()
|
|
|
|
|
for line in fs:
|
|
|
|
|
line = line.replace(",", " ")
|
|
|
|
|
line = line.replace(".", " ")
|
|
|
|
|
line = line.replace("!", " ")
|
|
|
|
|
line = line.replace("?", " ")
|
|
|
|
|
line = line.replace("\n", " ")
|
|
|
|
|
words = line.split(" ")
|
|
|
|
|
for word in words:
|
|
|
|
|
if word == '':
|
|
|
|
|
continue
|
|
|
|
|
else:
|
|
|
|
|
if word in dict_word.keys():
|
|
|
|
|
dict_word[word] += 1
|
|
|
|
|
else:
|
|
|
|
|
dict_word[word] = 1
|
|
|
|
|
result = sorted(dict_word.items(), key=lambda x: x[1], reverse=True)'''
|
|
|
|
|
|
|
|
|
|
with open('D://Python Test//Test1//{}'.format(file_name), "r", encoding='utf-8') as fp:
|
|
|
|
|
fs = fp.read()
|
|
|
|
|
words = jieba.lcut(fs)
|
|
|
|
|
counts = {}
|
|
|
|
|
for word in words:
|
|
|
|
|
if len(word) == 1:
|
|
|
|
|
if word.isalpha():
|
|
|
|
|
counts[word] = counts.get(word, 0) + 1
|
|
|
|
|
else:
|
|
|
|
|
continue
|
|
|
|
|
else:
|
|
|
|
|
counts[word] = counts.get(word, 0) + 1
|
|
|
|
|
items = list(counts.items())
|
|
|
|
|
items.sort(key=lambda x: x[1], reverse=True)
|
|
|
|
|
for i in range(len(counts)):
|
|
|
|
|
word, count = items[i]
|
|
|
|
|
print("{0:<15}{1}".format(word, count))
|
|
|
|
|
|
|
|
|
|
with open('D://Python Test//Test2(write_in)//writein_{}'.format(file_name), "w", encoding='utf-8') as dp:
|
|
|
|
|
for item in counts.keys():
|
|
|
|
|
dp.write('{:<15}'.format(item))
|
|
|
|
|
dp.write(str(counts[item]) + '\n')
|
|
|
|
|
|
|
|
|
|
with open('D://Python Test//Test2(write_in)//writein_{}'.format(file_name), "r", encoding='utf-8') as dp:
|
|
|
|
|
txt = dp.read()
|
|
|
|
|
w = wordcloud.WordCloud(font_path='msyhbd.ttc',width=800,height=500,background_color="white")
|
|
|
|
|
w.generate(txt)
|
|
|
|
|
file_name = file_name[:-4]
|
|
|
|
|
w.to_file('D://Python Test//Test3(woldcloud)//word_pic---{}.png'.format(file_name))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
path = 'd://Python Test//Test1'
|
|
|
|
|
Dict_file = Visitdir(path)
|
|
|
|
|
N = len(Dict_file)
|
|
|
|
|
Target_file = int(input('请输入要打开的文件序号:'))
|
|
|
|
|
while Target_file not in range(1, N + 1):
|
|
|
|
|
print('您输入的文件序号不存在')
|
|
|
|
|
Target_file = int(input('请输入要打开的文件序号:'))
|
|
|
|
|
answer = 'Yes'
|
|
|
|
|
while answer == 'Yes':
|
|
|
|
|
Operation_file(Dict_file[Target_file])
|
|
|
|
|
answer = input('请输入是否继续该程序:[Yes/No]')
|
|
|
|
|
while answer not in ['Yes','No']:
|
|
|
|
|
answer = input('非法字符,请重新输入:[Yes/No]')
|
|
|
|
|
if answer == 'No':
|
|
|
|
|
break
|
|
|
|
|
Target_file = int(input('请输入要打开的文件序号:'))
|
|
|
|
|
while Target_file not in range(1, N + 1):
|
|
|
|
|
print('您输入的文件序号不存在')
|
|
|
|
|
Target_file = int(input('请输入要打开的文件序号:'))
|
|
|
|
|
|
|
|
|
|
|