parent
1508089162
commit
e9f98c476b
@ -0,0 +1,81 @@
|
||||
import os
|
||||
import jieba
|
||||
from wordcloud import WordCloud
|
||||
|
||||
def files_list():
|
||||
#获取当前文件路径
|
||||
i = os.getcwd()
|
||||
return i
|
||||
|
||||
|
||||
def file_namesearch(a):
|
||||
path = a
|
||||
exist = os.path.exists(path)
|
||||
file_list = []
|
||||
numb = 0
|
||||
if exist == False:
|
||||
print("路径错误,请重新输入")
|
||||
#写出序号后文件名
|
||||
else:
|
||||
for root, dicts, files in os.walk(path):
|
||||
for d in dicts:
|
||||
dpath = os.path.join(root,d)
|
||||
if "txt" in dpath:
|
||||
file_list.append(dpath)
|
||||
numb = numb + 1
|
||||
print("%d----"%numb + d)
|
||||
|
||||
for f in files:
|
||||
fpath = os.path.join(root,f)
|
||||
if "txt" in fpath:
|
||||
file_list.append(fpath)
|
||||
numb = numb + 1
|
||||
print("%d----"%numb + f)
|
||||
print("请输入文件序号")
|
||||
n = int(input())
|
||||
return file_list[n-1]
|
||||
|
||||
|
||||
def files_read(n):
|
||||
#读取文件
|
||||
f = open (n,'r',encoding='utf-8')
|
||||
s = f.read()
|
||||
f.close()
|
||||
return s
|
||||
|
||||
|
||||
def files_cut_word(a):
|
||||
jieb_str = jieba.lcut(a)
|
||||
x = " ".join(jieb_str)
|
||||
count = {}
|
||||
#统计分词数保存进3.txt文档中
|
||||
for word in jieb_str:
|
||||
if len(word) == 1:
|
||||
continue
|
||||
else:
|
||||
count[word] = count.get(word,0)+1
|
||||
items = list(count.items())
|
||||
items.sort(key = lambda i:i[1],reverse = True)
|
||||
f = open ("3.txt",'w',encoding='utf-8')
|
||||
for i,j in items:
|
||||
st=str(i)+" "+str(j) + "\n"
|
||||
f.write(st)
|
||||
f.close()
|
||||
|
||||
#设置词云生成图参数
|
||||
word_cloud = WordCloud(
|
||||
font_path="C:/Windows/Fonts/simfang.ttf",
|
||||
background_color="white",
|
||||
width=1920,
|
||||
height=1080,
|
||||
|
||||
).generate(x)
|
||||
word_cloud.to_file("2.png")
|
||||
|
||||
|
||||
if __name__=='__main__':
|
||||
s = files_list()
|
||||
n = file_namesearch(s)
|
||||
a = files_read(n)
|
||||
files_cut_word(a)
|
||||
pass
|
Loading…
Reference in new issue