pai/词频统计.py

import jieba
import pandas 
import openpyxl
#1.读取弹幕数据
f =open("总弹幕.txt",encoding='utf-8')
text =f.read()

#2.分词
text_list=jieba.lcut(text)

#3.去除标点符号（常见词）
f2=open('标点符号表.text',encoding='utf-8')
stopwords=f2.read()
text_clean=[word for word in text_list if word not in stopwords]

#4.生成词频字典
d_sict={}
for key in text_clean:
    d_sict[key]=d_sict.get(key,0)+1

#5.获取字典的键和值作为excel的两个列表变量
key=list(d_sict.keys())
value=list(d_sict.values())

#6.写入excel表
result_excel=pandas.DataFrame()
result_excel["词"]=key
result_excel["词频"]=value
result_excel.to_excel('词频统计（去标点符号）.xlsx')
ADD file via upload 3 months ago			`import jieba`
			`import pandas`
			`import openpyxl`
			`#1.读取弹幕数据`
			`f =open("总弹幕.txt",encoding='utf-8')`
			`text =f.read()`

			`#2.分词`
			`text_list=jieba.lcut(text)`

			`#3.去除标点符号（常见词）`
			`f2=open('标点符号表.text',encoding='utf-8')`
			`stopwords=f2.read()`
			`text_clean=[word for word in text_list if word not in stopwords]`

			`#4.生成词频字典`
			`d_sict={}`
			`for key in text_clean:`
			`d_sict[key]=d_sict.get(key,0)+1`

			`#5.获取字典的键和值作为excel的两个列表变量`
			`key=list(d_sict.keys())`
			`value=list(d_sict.values())`

			`#6.写入excel表`
			`result_excel=pandas.DataFrame()`
			`result_excel["词"]=key`
			`result_excel["词频"]=value`
			`result_excel.to_excel('词频统计（去标点符号）.xlsx')`