diff --git a/search1.py b/search1.py new file mode 100644 index 0000000..fc541bd --- /dev/null +++ b/search1.py @@ -0,0 +1,93 @@ +import os +import jieba +import wordcloud + + +def Visitdir(path:object): #函数名称:Visitdir 函数作用: 检索传入路径下的所有目录并打印出文件基本名 + list_dir = os.walk(path) #形参类型:object 意义:需要检索的路径 + dict_dir = {} + file_num = 1 + for root, dirs, files in list_dir: + for f in files: + print('{0}---{1}'.format(file_num, f)) + dict_dir[file_num] = f + file_num += 1 + '''for i in dict_dir.keys(): + print(i,dict_dir[i])''' + return dict_dir #返回值存储文件名以及文件编号的字典,key为file_num即为文件编号,而value则为文件名 + + +def Operation_file(file_name:object): #函数名称:Operation_file 函数作用:对文件进行操作,使其便于读取文字 + '''with open('D://Python Test//Test1//{}'.format(file_name), 'r', encoding='utf-8') as fp: + dict_word = {} + fs = fp.readlines() + for line in fs: + line = line.replace(",", " ") + line = line.replace(".", " ") + line = line.replace("!", " ") + line = line.replace("?", " ") + line = line.replace("\n", " ") + words = line.split(" ") + for word in words: + if word == '': + continue + else: + if word in dict_word.keys(): + dict_word[word] += 1 + else: + dict_word[word] = 1 + result = sorted(dict_word.items(), key=lambda x: x[1], reverse=True)''' + + with open('D://Python Test//Test1//{}'.format(file_name), "r", encoding='utf-8') as fp: + fs = fp.read() + words = jieba.lcut(fs) + counts = {} + for word in words: + if len(word) == 1: + if word.isalpha(): + counts[word] = counts.get(word, 0) + 1 + else: + continue + else: + counts[word] = counts.get(word, 0) + 1 + items = list(counts.items()) + items.sort(key=lambda x: x[1], reverse=True) + for i in range(len(counts)): + word, count = items[i] + print("{0:<15}{1}".format(word, count)) + + with open('D://Python Test//Test2(write_in)//writein_{}'.format(file_name), "w", encoding='utf-8') as dp: + for item in counts.keys(): + dp.write('{:<15}'.format(item)) + dp.write(str(counts[item]) + '\n') + + with open('D://Python Test//Test2(write_in)//writein_{}'.format(file_name), "r", encoding='utf-8') as dp: + txt = dp.read() + w = wordcloud.WordCloud(font_path='msyhbd.ttc',width=800,height=500,background_color="white") + w.generate(txt) + file_name = file_name[:-4] + w.to_file('D://Python Test//Test3(woldcloud)//word_pic---{}.png'.format(file_name)) + + +if __name__ == '__main__': + path = 'd://Python Test//Test1' + Dict_file = Visitdir(path) + N = len(Dict_file) + Target_file = int(input('请输入要打开的文件序号:')) + while Target_file not in range(1, N + 1): + print('您输入的文件序号不存在') + Target_file = int(input('请输入要打开的文件序号:')) + answer = 'Yes' + while answer == 'Yes': + Operation_file(Dict_file[Target_file]) + answer = input('请输入是否继续该程序:[Yes/No]') + while answer not in ['Yes','No']: + answer = input('非法字符,请重新输入:[Yes/No]') + if answer == 'No': + break + Target_file = int(input('请输入要打开的文件序号:')) + while Target_file not in range(1, N + 1): + print('您输入的文件序号不存在') + Target_file = int(input('请输入要打开的文件序号:')) + +