You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

82 lines
2.0 KiB

import os
import jieba
from wordcloud import WordCloud
def files_list():
#获取当前文件路径
i = os.getcwd()
return i
def file_namesearch(a):
path = a
exist = os.path.exists(path)
file_list = []
numb = 0
if exist == False:
print("路径错误,请重新输入")
#写出序号后文件名
else:
for root, dicts, files in os.walk(path):
for d in dicts:
dpath = os.path.join(root,d)
if "txt" in dpath:
file_list.append(dpath)
numb = numb + 1
print("%d----"%numb + d)
for f in files:
fpath = os.path.join(root,f)
if "txt" in fpath:
file_list.append(fpath)
numb = numb + 1
print("%d----"%numb + f)
print("请输入文件序号")
n = int(input())
return file_list[n-1]
def files_read(n):
#读取文件
f = open (n,'r',encoding='utf-8')
s = f.read()
f.close()
return s
def files_cut_word(a):
jieb_str = jieba.lcut(a)
x = " ".join(jieb_str)
count = {}
#统计分词数保存进3.txt文档中
for word in jieb_str:
if len(word) == 1:
continue
else:
count[word] = count.get(word,0)+1
items = list(count.items())
items.sort(key = lambda i:i[1],reverse = True)
f = open ("3.txt",'w',encoding='utf-8')
for i,j in items:
st=str(i)+" "+str(j) + "\n"
f.write(st)
f.close()
#设置词云生成图参数
word_cloud = WordCloud(
font_path="C:/Windows/Fonts/simfang.ttf",
background_color="white",
width=1920,
height=1080,
).generate(x)
word_cloud.to_file("2.png")
if __name__=='__main__':
s = files_list()
n = file_namesearch(s)
a = files_read(n)
files_cut_word(a)
pass