You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
82 lines
2.0 KiB
82 lines
2.0 KiB
3 years ago
|
import os
|
||
|
import jieba
|
||
|
from wordcloud import WordCloud
|
||
|
|
||
|
def files_list():
|
||
|
#获取当前文件路径
|
||
|
i = os.getcwd()
|
||
|
return i
|
||
|
|
||
|
|
||
|
def file_namesearch(a):
|
||
|
path = a
|
||
|
exist = os.path.exists(path)
|
||
|
file_list = []
|
||
|
numb = 0
|
||
|
if exist == False:
|
||
|
print("路径错误,请重新输入")
|
||
|
#写出序号后文件名
|
||
|
else:
|
||
|
for root, dicts, files in os.walk(path):
|
||
|
for d in dicts:
|
||
|
dpath = os.path.join(root,d)
|
||
|
if "txt" in dpath:
|
||
|
file_list.append(dpath)
|
||
|
numb = numb + 1
|
||
|
print("%d----"%numb + d)
|
||
|
|
||
|
for f in files:
|
||
|
fpath = os.path.join(root,f)
|
||
|
if "txt" in fpath:
|
||
|
file_list.append(fpath)
|
||
|
numb = numb + 1
|
||
|
print("%d----"%numb + f)
|
||
|
print("请输入文件序号")
|
||
|
n = int(input())
|
||
|
return file_list[n-1]
|
||
|
|
||
|
|
||
|
def files_read(n):
|
||
|
#读取文件
|
||
|
f = open (n,'r',encoding='utf-8')
|
||
|
s = f.read()
|
||
|
f.close()
|
||
|
return s
|
||
|
|
||
|
|
||
|
def files_cut_word(a):
|
||
|
jieb_str = jieba.lcut(a)
|
||
|
x = " ".join(jieb_str)
|
||
|
count = {}
|
||
|
#统计分词数保存进3.txt文档中
|
||
|
for word in jieb_str:
|
||
|
if len(word) == 1:
|
||
|
continue
|
||
|
else:
|
||
|
count[word] = count.get(word,0)+1
|
||
|
items = list(count.items())
|
||
|
items.sort(key = lambda i:i[1],reverse = True)
|
||
|
f = open ("3.txt",'w',encoding='utf-8')
|
||
|
for i,j in items:
|
||
|
st=str(i)+" "+str(j) + "\n"
|
||
|
f.write(st)
|
||
|
f.close()
|
||
|
|
||
|
#设置词云生成图参数
|
||
|
word_cloud = WordCloud(
|
||
|
font_path="C:/Windows/Fonts/simfang.ttf",
|
||
|
background_color="white",
|
||
|
width=1920,
|
||
|
height=1080,
|
||
|
|
||
|
).generate(x)
|
||
|
word_cloud.to_file("2.png")
|
||
|
|
||
|
|
||
|
if __name__=='__main__':
|
||
|
s = files_list()
|
||
|
n = file_namesearch(s)
|
||
|
a = files_read(n)
|
||
|
files_cut_word(a)
|
||
|
pass
|