You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

103 lines
2.2 KiB

import numpy as np
from operator import itemgetter
import os
import re
import jieba
import pandas as pd
from wordcloud import WordCloud
def readname():
filePath =os.getcwd()
print(filePath)
name = os.listdir(filePath)
return name
def ciyun(name):
with open('%s'%name ,encoding="utf-8")as file:
#1.读取文本内容
text=file.read()
#2.设置词云的背景颜色、宽高、字数
wordcloud=WordCloud(font_path="C:/Windows/Fonts/simfang.ttf",
background_color="black",width=600,
height=300,max_words=50).generate(text)
#3.生成图片
image=wordcloud.to_image()
#4.显示图片
image.show()
def processs(data):
m1=map(lambda s:s.strip("\n"),data)
cut_words=map(lambda s:list(jieba.cut(s)),m1)
return list(cut_words)
def tongji(a):
data=open('%s'%a,encoding = 'utf-8').readlines()
cut_words=processs(data)
total_words=[]
for each in cut_words:
total_words.extend(each)
n=np.unique(total_words,return_counts=True)
s=pd.Series(data=n[1],index=n[0])
result=s.sort_values(ascending=False)
print(result)
result.to_csv("result.csv", header='true', encoding='utf-8')
if __name__ == "__main__":
print("输入q to quit")
menu=True
while menu:
name = readname()
arr=[]
print(name)
k=0
for i in name:
if i.endswith('txt'):
print(k,end="")
k=k+1
print("---",end="")
arr.append(i)
print(i[:-4])
print(k)
print(arr)
print("读取文件操作")
flag=True
while flag:
x=(input("请输入0-{}中的一个整数".format(k-1)))
if x.isdigit():
x=int(x)
if x>=0 and x<k-1 :
break
else:
print("输入错误,请重新输入")
a=arr[x]
print(a)
tongji(a)
ciyun(a)
re=(input("输入q to quit"))
if re=='q':
menu=False