|
|
ai_list = [] # 存放AI弹幕列表
|
|
|
find = open('爬取的总弹幕.txt',encoding='utf-8')
|
|
|
keywords = ['AI','Ai','ai','智能','人工智能','aI'] # 关键词
|
|
|
for aifind in find:
|
|
|
if any(i in aifind for i in keywords)==True: # 对每个弹幕进行判断,是否至少包含一个关键词
|
|
|
print(aifind)
|
|
|
ai_list.append(aifind) # 写入AI弹幕列表
|
|
|
print(ai_list)
|
|
|
for aifind in ai_list:
|
|
|
with open('AI弹幕.txt',mode='a',encoding='utf-8') as f:
|
|
|
f.write(aifind) # 写入AI弹幕.txt文本
|
|
|
for aifind in ai_list:
|
|
|
with open('AI弹幕.xls',mode='a',encoding='utf-8') as f:
|
|
|
f.write(aifind) # 写入AI弹幕.xls文本
|
|
|
from collections import Counter
|
|
|
|
|
|
# 存放AI弹幕列表
|
|
|
ai_list = []
|
|
|
keywords = ['AI', 'Ai', 'ai', '智能', '人工智能', 'aI'] # 关键词
|
|
|
|
|
|
# 读取弹幕文件并筛选出包含关键词的弹幕
|
|
|
with open('爬取的总弹幕.txt', encoding='utf-8') as find:
|
|
|
for aifind in find:
|
|
|
if any(keyword in aifind for keyword in keywords):
|
|
|
ai_list.append(aifind.strip()) # 去除每行末尾的换行符
|
|
|
|
|
|
# 统计弹幕出现的频率
|
|
|
counter = Counter(ai_list)
|
|
|
|
|
|
# 获取出现频率前8的弹幕
|
|
|
top_8_comments = counter.most_common(8)
|
|
|
|
|
|
# 输出前8个弹幕及其数量
|
|
|
print("前八的弹幕:")
|
|
|
for comment, count in top_8_comments:
|
|
|
print(f"{comment}: {count}")
|
|
|
|
|
|
# 将所有筛选出的AI弹幕写入AI弹幕.txt
|
|
|
with open('AI弹幕.txt', mode='w', encoding='utf-8') as f:
|
|
|
for aifind in ai_list:
|
|
|
f.write(aifind + '\n')
|
|
|
|
|
|
# 将前8个弹幕及其数量写入AI弹幕.txt
|
|
|
with open('前八弹幕.txt', mode='w', encoding='utf-8') as f:
|
|
|
f.write("Top 8 'AI' related comments:\n")
|
|
|
for comment, count in top_8_comments:
|
|
|
f.write(f"{comment}: {count}\n")
|
|
|
|
|
|
# 如果需要写入Excel文件中,请使用pandas库
|
|
|
import pandas as pd
|
|
|
|
|
|
# 将统计数据写入Excel文件
|
|
|
df = pd.DataFrame(top_8_comments, columns=['Comment', 'Count'])
|
|
|
df.to_excel('前八弹幕.xlsx', index=False) |