You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
103 lines
2.2 KiB
103 lines
2.2 KiB
3 years ago
|
import numpy as np
|
||
|
from operator import itemgetter
|
||
|
import os
|
||
|
import re
|
||
|
import jieba
|
||
|
import pandas as pd
|
||
|
from wordcloud import WordCloud
|
||
|
|
||
|
|
||
|
def readname():
|
||
|
filePath =os.getcwd()
|
||
|
print(filePath)
|
||
|
name = os.listdir(filePath)
|
||
|
return name
|
||
|
def ciyun(name):
|
||
|
with open('%s'%name ,encoding="utf-8")as file:
|
||
|
|
||
|
#1.读取文本内容
|
||
|
text=file.read()
|
||
|
#2.设置词云的背景颜色、宽高、字数
|
||
|
wordcloud=WordCloud(font_path="C:/Windows/Fonts/simfang.ttf",
|
||
|
background_color="black",width=600,
|
||
|
height=300,max_words=50).generate(text)
|
||
|
#3.生成图片
|
||
|
image=wordcloud.to_image()
|
||
|
#4.显示图片
|
||
|
image.show()
|
||
|
|
||
|
def processs(data):
|
||
|
m1=map(lambda s:s.strip("\n"),data)
|
||
|
cut_words=map(lambda s:list(jieba.cut(s)),m1)
|
||
|
return list(cut_words)
|
||
|
|
||
|
def tongji(a):
|
||
|
|
||
|
data=open('%s'%a,encoding = 'utf-8').readlines()
|
||
|
cut_words=processs(data)
|
||
|
total_words=[]
|
||
|
for each in cut_words:
|
||
|
total_words.extend(each)
|
||
|
n=np.unique(total_words,return_counts=True)
|
||
|
s=pd.Series(data=n[1],index=n[0])
|
||
|
result=s.sort_values(ascending=False)
|
||
|
print(result)
|
||
|
result.to_csv("result.csv", header='true', encoding='utf-8')
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
print("输入q to quit")
|
||
|
menu=True
|
||
|
while menu:
|
||
|
name = readname()
|
||
|
arr=[]
|
||
|
print(name)
|
||
|
k=0
|
||
|
for i in name:
|
||
|
if i.endswith('txt'):
|
||
|
print(k,end="")
|
||
|
k=k+1
|
||
|
print("---",end="")
|
||
|
arr.append(i)
|
||
|
print(i[:-4])
|
||
|
print(k)
|
||
|
print(arr)
|
||
|
print("读取文件操作")
|
||
|
flag=True
|
||
|
while flag:
|
||
|
x=(input("请输入0-{}中的一个整数".format(k-1)))
|
||
|
if x.isdigit():
|
||
|
x=int(x)
|
||
|
if x>=0 and x<k-1 :
|
||
|
break
|
||
|
else:
|
||
|
print("输入错误,请重新输入")
|
||
|
a=arr[x]
|
||
|
print(a)
|
||
|
tongji(a)
|
||
|
ciyun(a)
|
||
|
re=(input("输入q to quit"))
|
||
|
if re=='q':
|
||
|
menu=False
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|