You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

54 lines
2.0 KiB

ai_list = [] # 存放AI弹幕列表
find = open('爬取的总弹幕.txt',encoding='utf-8')
keywords = ['AI','Ai','ai','智能','人工智能','aI'] # 关键词
for aifind in find:
if any(i in aifind for i in keywords)==True: # 对每个弹幕进行判断,是否至少包含一个关键词
print(aifind)
ai_list.append(aifind) # 写入AI弹幕列表
print(ai_list)
for aifind in ai_list:
with open('AI弹幕.txt',mode='a',encoding='utf-8') as f:
f.write(aifind) # 写入AI弹幕.txt文本
for aifind in ai_list:
with open('AI弹幕.xls',mode='a',encoding='utf-8') as f:
f.write(aifind) # 写入AI弹幕.xls文本
from collections import Counter
# 存放AI弹幕列表
ai_list = []
keywords = ['AI', 'Ai', 'ai', '智能', '人工智能', 'aI'] # 关键词
# 读取弹幕文件并筛选出包含关键词的弹幕
with open('爬取的总弹幕.txt', encoding='utf-8') as find:
for aifind in find:
if any(keyword in aifind for keyword in keywords):
ai_list.append(aifind.strip()) # 去除每行末尾的换行符
# 统计弹幕出现的频率
counter = Counter(ai_list)
# 获取出现频率前8的弹幕
top_8_comments = counter.most_common(8)
# 输出前8个弹幕及其数量
print("前八的弹幕:")
for comment, count in top_8_comments:
print(f"{comment}: {count}")
# 将所有筛选出的AI弹幕写入AI弹幕.txt
with open('AI弹幕.txt', mode='w', encoding='utf-8') as f:
for aifind in ai_list:
f.write(aifind + '\n')
# 将前8个弹幕及其数量写入AI弹幕.txt
with open('前八弹幕.txt', mode='w', encoding='utf-8') as f:
f.write("Top 8 'AI' related comments:\n")
for comment, count in top_8_comments:
f.write(f"{comment}: {count}\n")
# 如果需要写入Excel文件中请使用pandas库
import pandas as pd
# 将统计数据写入Excel文件
df = pd.DataFrame(top_8_comments, columns=['Comment', 'Count'])
df.to_excel('前八弹幕.xlsx', index=False)