You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.
"""
读取弹幕数据并统计频率, 然后将统计结果保存到Excel文件中
"""
import pandas as pd
def load_danmu ( file_path ) :
""" 从文件中读取弹幕数据 """
with open ( file_path , ' r ' , encoding = ' utf-8 ' ) as f :
return f . readlines ( )
def count_danmu ( danmu_list ) :
""" 统计每条弹幕出现的次数 """
all_danmus = { }
for danmu in danmu_list :
danmu = danmu . strip ( )
all_danmus [ danmu ] = all_danmus . get ( danmu , 0 ) + 1
return all_danmus
def save_to_excel ( all_danmus , excel_file ) :
""" 将弹幕频率统计结果保存到Excel文件中 """
sorted_danmus = sorted ( all_danmus . items ( ) , key = lambda x : x [ 1 ] , reverse = True )
df = pd . DataFrame ( sorted_danmus , columns = [ ' danmu ' , ' count ' ] )
df . to_excel ( excel_file , index = False )
def main ( ) :
""" 读取弹幕数据、统计频率并保存到Excel """
danmu_file_path = ' E:/Crawler/output/danmu.txt '
excel_file = ' E:/Crawler/output/All_Danmu.xlsx '
danmu_list = load_danmu ( danmu_file_path )
all_danmus = count_danmu ( danmu_list )
save_to_excel ( all_danmus , excel_file )
print ( " 所有弹幕数据统计完成, 并已保存到Excel表格 " )
if __name__ == ' __main__ ' :
main ( )