You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

94 lines
3.9 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import os
import jieba
import wordcloud
def Visitdir(path:object): #函数名称Visitdir 函数作用: 检索传入路径下的所有目录并打印出文件基本名
list_dir = os.walk(path) #形参类型object 意义:需要检索的路径
dict_dir = {}
file_num = 1
for root, dirs, files in list_dir:
for f in files:
print('{0}---{1}'.format(file_num, f))
dict_dir[file_num] = f
file_num += 1
'''for i in dict_dir.keys():
print(i,dict_dir[i])'''
return dict_dir #返回值存储文件名以及文件编号的字典key为file_num即为文件编号而value则为文件名
def Operation_file(file_name:object): #函数名称Operation_file 函数作用:对文件进行操作,使其便于读取文字
'''with open('D://Python Test//Test1//{}'.format(file_name), 'r', encoding='utf-8') as fp:
dict_word = {}
fs = fp.readlines()
for line in fs:
line = line.replace(",", " ")
line = line.replace(".", " ")
line = line.replace("!", " ")
line = line.replace("?", " ")
line = line.replace("\n", " ")
words = line.split(" ")
for word in words:
if word == '':
continue
else:
if word in dict_word.keys():
dict_word[word] += 1
else:
dict_word[word] = 1
result = sorted(dict_word.items(), key=lambda x: x[1], reverse=True)'''
with open('D://Python Test//Test1//{}'.format(file_name), "r", encoding='utf-8') as fp:
fs = fp.read()
words = jieba.lcut(fs)
counts = {}
for word in words:
if len(word) == 1:
if word.isalpha():
counts[word] = counts.get(word, 0) + 1
else:
continue
else:
counts[word] = counts.get(word, 0) + 1
items = list(counts.items())
items.sort(key=lambda x: x[1], reverse=True)
for i in range(len(counts)):
word, count = items[i]
print("{0:<15}{1}".format(word, count))
with open('D://Python Test//Test2(write_in)//writein_{}'.format(file_name), "w", encoding='utf-8') as dp:
for item in counts.keys():
dp.write('{:<15}'.format(item))
dp.write(str(counts[item]) + '\n')
with open('D://Python Test//Test2(write_in)//writein_{}'.format(file_name), "r", encoding='utf-8') as dp:
txt = dp.read()
w = wordcloud.WordCloud(font_path='msyhbd.ttc',width=800,height=500,background_color="white")
w.generate(txt)
file_name = file_name[:-4]
w.to_file('D://Python Test//Test3(woldcloud)//word_pic---{}.png'.format(file_name))
if __name__ == '__main__':
path = 'd://Python Test//Test1'
Dict_file = Visitdir(path)
N = len(Dict_file)
Target_file = int(input('请输入要打开的文件序号:'))
while Target_file not in range(1, N + 1):
print('您输入的文件序号不存在')
Target_file = int(input('请输入要打开的文件序号:'))
answer = 'Yes'
while answer == 'Yes':
Operation_file(Dict_file[Target_file])
answer = input('请输入是否继续该程序:[Yes/No]')
while answer not in ['Yes','No']:
answer = input('非法字符,请重新输入:[Yes/No]')
if answer == 'No':
break
Target_file = int(input('请输入要打开的文件序号:'))
while Target_file not in range(1, N + 1):
print('您输入的文件序号不存在')
Target_file = int(input('请输入要打开的文件序号:'))