diff --git a/ai弹幕.py b/ai弹幕.py new file mode 100644 index 0000000..04a60bb --- /dev/null +++ b/ai弹幕.py @@ -0,0 +1,54 @@ +ai_list = [] # 存放AI弹幕列表 +find = open('爬取的总弹幕.txt',encoding='utf-8') +keywords = ['AI','Ai','ai','智能','人工智能','aI'] # 关键词 +for aifind in find: + if any(i in aifind for i in keywords)==True: # 对每个弹幕进行判断,是否至少包含一个关键词 + print(aifind) + ai_list.append(aifind) # 写入AI弹幕列表 +print(ai_list) +for aifind in ai_list: + with open('AI弹幕.txt',mode='a',encoding='utf-8') as f: + f.write(aifind) # 写入AI弹幕.txt文本 +for aifind in ai_list: + with open('AI弹幕.xls',mode='a',encoding='utf-8') as f: + f.write(aifind) # 写入AI弹幕.xls文本 +from collections import Counter + +# 存放AI弹幕列表 +ai_list = [] +keywords = ['AI', 'Ai', 'ai', '智能', '人工智能', 'aI'] # 关键词 + +# 读取弹幕文件并筛选出包含关键词的弹幕 +with open('爬取的总弹幕.txt', encoding='utf-8') as find: + for aifind in find: + if any(keyword in aifind for keyword in keywords): + ai_list.append(aifind.strip()) # 去除每行末尾的换行符 + +# 统计弹幕出现的频率 +counter = Counter(ai_list) + +# 获取出现频率前8的弹幕 +top_8_comments = counter.most_common(8) + +# 输出前8个弹幕及其数量 +print("前八的弹幕:") +for comment, count in top_8_comments: + print(f"{comment}: {count}") + +# 将所有筛选出的AI弹幕写入AI弹幕.txt +with open('AI弹幕.txt', mode='w', encoding='utf-8') as f: + for aifind in ai_list: + f.write(aifind + '\n') + +# 将前8个弹幕及其数量写入AI弹幕.txt +with open('前八弹幕.txt', mode='w', encoding='utf-8') as f: + f.write("Top 8 'AI' related comments:\n") + for comment, count in top_8_comments: + f.write(f"{comment}: {count}\n") + +# 如果需要写入Excel文件中,请使用pandas库 +import pandas as pd + +# 将统计数据写入Excel文件 +df = pd.DataFrame(top_8_comments, columns=['Comment', 'Count']) +df.to_excel('前八弹幕.xlsx', index=False) \ No newline at end of file