You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

42 lines
1.4 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

"""
统计AI相关的弹幕数据并将前8项结果保存到Excel文件中
"""
import pandas as pd
def load_danmu(file_path):
"""从文件中读取弹幕数据"""
with open(file_path, 'r', encoding='utf-8') as f:
return f.readlines()
def filter_and_count_danmu(danmu_list):
"""统计AI相关的弹幕频率"""
all_danmus = {}
ai_keywords = ['ai', '智能', '技术', '应用', '人机', 'AI', '人工智能', '机器学习', '深度学习', '神经网络']
for danmu in danmu_list:
if any(keyword in danmu for keyword in ai_keywords):
danmu = danmu.strip()
all_danmus[danmu] = all_danmus.get(danmu, 0) + 1
return all_danmus
def save_to_excel(all_danmus, excel_file):
"""将统计的AI相关弹幕保存到Excel文件中"""
sorted_danmus = sorted(all_danmus.items(), key=lambda x: x[1], reverse=True)[:8]
df = pd.DataFrame(sorted_danmus, columns=['danmu', 'count'])
df.to_excel(excel_file, index=False)
def main():
"""读取弹幕数据、统计AI相关弹幕并保存到Excel"""
danmu_file_path = 'E:/Crawler/output/danmu.txt'
excel_file = 'E:/Crawler/output/Top8_Danmu.xlsx'
danmu_list = load_danmu(danmu_file_path)
all_danmus = filter_and_count_danmu(danmu_list)
save_to_excel(all_danmus, excel_file)
print("与AI相关的弹幕数据统计完成并已保存到Excel表格")
if __name__ == '__main__':
main()