diff --git a/词频统计.py b/词频统计.py deleted file mode 100644 index 979742e..0000000 --- a/词频统计.py +++ /dev/null @@ -1,29 +0,0 @@ -import jieba -import pandas -import openpyxl -#1.读取弹幕数据 -f =open("总弹幕.txt",encoding='utf-8') -text =f.read() - -#2.分词 -text_list=jieba.lcut(text) - -#3.去除标点符号(常见词) -f2=open('中文常见停用词.text',encoding='utf-8') -stopwords=f2.read() -text_clean=[word for word in text_list if word not in stopwords] - -#4.生成词频字典 -d_sict={} -for key in text_clean: - d_sict[key]=d_sict.get(key,0)+1 - -#5.获取字典的键和值作为excel的两个列表变量 -key=list(d_sict.keys()) -value=list(d_sict.values()) - -#6.写入excel表 -result_excel=pandas.DataFrame() -result_excel["词"]=key -result_excel["词频"]=value -result_excel.to_excel('词频统计(去常见词).xlsx') \ No newline at end of file