import jieba import pandas import openpyxl #1.读取弹幕数据 f =open("总弹幕.txt",encoding='utf-8') text =f.read() #2.分词 text_list=jieba.lcut(text) #3.去除标点符号(常见词) f2=open('标点符号表.text',encoding='utf-8') stopwords=f2.read() text_clean=[word for word in text_list if word not in stopwords] #4.生成词频字典 d_sict={} for key in text_clean: d_sict[key]=d_sict.get(key,0)+1 #5.获取字典的键和值作为excel的两个列表变量 key=list(d_sict.keys()) value=list(d_sict.values()) #6.写入excel表 result_excel=pandas.DataFrame() result_excel["词"]=key result_excel["词频"]=value result_excel.to_excel('词频统计(去标点符号).xlsx')