Delete '词频统计.py'

2 months ago · 82ade59631
parent f29684caaa
commit 82ade59631
1 changed files with 0 additions and 29 deletions
--- a/词频统计.py
+++ b/词频统计.py
@ -1,29 +0,0 @@
 import jieba
 import pandas 
 import openpyxl
 #1.读取弹幕数据
 f =open("总弹幕.txt",encoding='utf-8')
 text =f.read()
 #2.分词
 text_list=jieba.lcut(text)
 #3.去除标点符号（常见词）
 f2=open('中文常见停用词.text',encoding='utf-8')
 stopwords=f2.read()
 text_clean=[word for word in text_list if word not in stopwords]
 #4.生成词频字典
 d_sict={}
 for key in text_clean:
    d_sict[key]=d_sict.get(key,0)+1
 #5.获取字典的键和值作为excel的两个列表变量
 key=list(d_sict.keys())
 value=list(d_sict.values())
 #6.写入excel表
 result_excel=pandas.DataFrame()
 result_excel["词"]=key
 result_excel["词频"]=value
 result_excel.to_excel('词频统计（去常见词）.xlsx')