ADD file via upload

master
p8yolxhq4 4 years ago
parent 3935d65175
commit 2c47f1a84c

@ -0,0 +1,101 @@
from operator import itemgetter
import matplotlib.pyplot as mp
import jieba
import string
import wordcloud
import csv
import os
def create_file(filepath):
"""
寻找程序所在目录下的所有文本文件,将保存文件名到filename.csv文件中并打印文本文件名
:param filepath: 文本文件存放路径
:return: 所找到的文本文件名对应的序号
"""
with open('filename.csv', 'w', encoding='utf-8', newline='') as file_handler: # 用于清空filename的内容
file_writer = csv.writer(file_handler)
row = ['file_no', 'filename']
file_writer.writerow(row)
files = os.listdir(filepath)
file_no = 0
print('files list:') # 遍历所有文件
for filename in files:
file_no = file_no + 1
print('%d--' % file_no, end='')
print(filename) #打印文本文件名以及所对应的序号
file_handler = open('filename.csv', 'a+', encoding='utf-8', newline='') # 保存文件名到filename.csv
file_writer = csv.writer(file_handler)
row = [file_no, filename]
file_writer.writerow(row)
return file_no
def count_word(filename):
"""
统计词汇数
:param filename: 想要统计词汇数的文本文件地址
:return: 统计的词汇结果
"""
file_txt = open(filename, "r", encoding='utf-8').read()
file_txt = file_txt.lower() # 将文本文件内容的大写字母换成小写字母
for ch in string.punctuation: # 将文本中的特殊字符替换为空格
file_txt = file_txt.replace(ch, " ")
words = list(jieba.lcut(file_txt, cut_all=False)) # 分词
wordfre = {}
for word in words: # 统计词汇数
if word in wordfre:
wordfre[word] += 1
else:
wordfre[word] = 1
wordfre = sorted(wordfre.items(), key=itemgetter(1), reverse=True) # 按字典元素的值进行逆序排序
for i in range(30): # 输出前30个词汇
print(wordfre[i])
return wordfre
def create_cloud(numword):
"""
生成词云并将词汇查询结果写入temp文本文件下
:param numword: 统计的词汇结果
"""
with open('temp.txt', 'a', encoding='utf-8') as f: # 将内容写入temp.txt并不删除文本文件原有内容
# with open('temp.txt', 'a', encoding='utf-8') as f: #将内容写入temp.txt并删除文本文件原有内容
for i in numword:
f.write("%d%s\n" % (i[1], i[0]))
temp_txt = open("temp.txt", "r", encoding='utf-8').read()
cloud = wordcloud.WordCloud(font_path='msyhbd.ttc', # 词云参数设置
width=1000,
height=700,
background_color="white"
)
word_cloud = cloud.generate(temp_txt)
mp.imshow(word_cloud) # 词云的展示
mp.axis('off')
mp.show()
if __name__ == "__main__":
filepath = 'file' # txt 文本存放路径
create_file(filepath)
file_no = input("如果要退出请输入X\n请输入你的选择:")
while file_no != 'X':
if file_no.isnumeric(): # 判断是否是数字
with open('filename.csv', 'r', encoding='utf-8') as fd:
TXT = csv.reader(fd)
for i in TXT:
if file_no == i[0]:
filename = filepath + '\\' + i[1]
numword = count_word(filename)
create_cloud(numword)
create_file(filepath)
else:
file_no = input("请输入正确的:")
else:
print("输入错误!")
file_no = input("请输入正确的:")
Loading…
Cancel
Save