You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.
# -*- coding: utf-8 -*-
import os
import DataProcess
def Read_fileName ( nowPath ) :
'''
搜索指定目录下的 txt 文件,显示出文件基本名
:param nowPath: 指定目录
:return: 文件名列表
'''
fileList = [ ]
os . chdir ( nowPath ) # 修改为.py文件所在目录( 当前目录)
for files in os . listdir ( nowPath ) :
if files . endswith ( ' .txt ' ) :
fileList . append ( os . path . join ( files ) )
for i in range ( len ( fileList ) ) :
print ( ' {:d} -- ' . format ( i + 1 ) + fileList [ i ] )
return fileList
def Output_endTxt ( fileList ) :
'''
输入指定文件, 进行分词, 统计, 并输出Sort.txt文件
:param fileList: 文件名列表
:return: Sort.txt文件
'''
f = None
try :
fileName = fileList [ int ( input ( ' 请输入列表中文件的序号: ' ) ) - 1 ]
f = open ( fileName , ' r ' , encoding = ' utf-8 ' )
# 创建目录存放结果
newPath = DataProcess . Create_path ( )
# 分词 --> Jieba.txt
DataProcess . Jieba ( fileName , newPath )
# Jieba.txt --> Sort.txt
DataProcess . Sort ( DataProcess . Account ( DataProcess . Turn ( ) ) )
except IndexError or ValueError or FileNotFoundError :
print ( ' 无法打开指定文件,请重新输入! ' )
Output_endTxt ( fileList )
finally :
if f :
f . close ( )