You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.
import pandas as pd
# 从文件中读取弹幕数据
def load_danmu ( file_path ) :
with open ( file_path , ' r ' , encoding = ' utf-8 ' ) as f :
danmu_list = f . readlines ( )
return danmu_list
# 统计AI相关的弹幕
def filter_and_count_danmu ( danmu_list ) :
all_danmus = { }
ai_keywords = [ ' ai ' , ' 智能 ' , ' 技术 ' , ' 应用 ' , ' 人机 ' , ' AI ' , ' 人工智能 ' , ' 机器学习 ' , ' 深度学习 ' , ' 神经网络 ' ] # AI相关的关键词
for danmu in danmu_list :
if any ( keyword in danmu for keyword in ai_keywords ) : # 检查弹幕是否包含AI相关的关键词
danmu = danmu . strip ( ) # 去除可能的换行符
if danmu in all_danmus :
all_danmus [ danmu ] + = 1
else :
all_danmus [ danmu ] = 1
return all_danmus
def save_to_excel ( all_danmus , excel_file ) :
# 排序并取前8
sorted_danmus = sorted ( all_danmus . items ( ) , key = lambda x : x [ 1 ] , reverse = True ) [ : 8 ]
df = pd . DataFrame ( sorted_danmus , columns = [ ' danmu ' , ' count ' ] )
df . to_excel ( excel_file , index = False )
def main ( ) :
danmu_file_path = r ' E: \ Crawler \ danmu.txt ' # 弹幕文件路径
excel_file = r ' E: \ Crawler \ Top8_Danmu.xlsx ' # Excel文件路径
danmu_list = load_danmu ( danmu_file_path )
all_danmus = filter_and_count_danmu ( danmu_list )
# 保存到Excel
save_to_excel ( all_danmus , excel_file )
print ( " 与AI相关的弹幕数据统计完成, 并已保存到Excel表格 " )
if __name__ == ' __main__ ' :
main ( )