You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

86 lines
3.4 KiB

import bilibili_spider
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import jieba
import pandas
from openpyxl import Workbook
def contains_keywords(text, keywords):
for word in list(jieba.cut(text)):
for keyword in keywords:
if word == keyword:
return True
return False
ai_keywords = [
"机器学习", "深度学习", "自然语言处理", "计算机视觉", "图像识别",
"语音识别", "强化学习", "生成对抗网络", "智能推荐系统", "数据挖掘",
"模式识别", "智能机器人", "自动驾驶", "预测分析", "数据清洗",
"异常检测", "知识图谱", "人工智能伦理", "智能合约", "虚拟助手",
"语义分析", "图像生成", "文本生成", "情感分析", "决策支持系统",
"人脸识别", "智能搜索", "自然语言生成", "人工神经网络", "模型优化",
"智能监控", "医疗影像分析", "自动化", "智能制造", "虚拟现实",
"增强现实", "智能家居", "边缘计算", "云计算", "数据隐私",
"算法公平性", "知识推理", "智能交通", "聊天机器人", "自动化客服",
"智能推荐引擎", "生物识别", "机器人过程自动化", "多模态学习", "量子计算",
"自适应系统", "算法优化", "智能数据分析", "虚拟角色", "环境感知",
"ai", "AI", "人工智能"
]
def list_to_dict(list):
# 遍历列表中的每个元素
count_dict = {}
for item in list:
if item in count_dict:
count_dict[item] += 1
else:
count_dict[item] = 1
return count_dict
def main():
query = '2024巴黎奥运会'
number = 300
# 获取弹幕列表
danmu_list = bilibili_spider.get_danmu(query=query, number=number, display_progress=True)
# danmu_list = ["test", "ai", "noai"]
# 筛选其中包含AI关键词的弹幕
ai_danmu_list = []
for danmu in danmu_list:
if contains_keywords(danmu, ai_keywords):
ai_danmu_list.append(danmu)
ai_danmu_dict = list_to_dict(ai_danmu_list)
ai_danmu_dict = dict(sorted(ai_danmu_dict.items(), key=lambda item: item[1], reverse=True))
#输出数量排名前8的弹幕
first_8_ai_danmu = list(ai_danmu_dict.items())[:8]
for item in first_8_ai_danmu:
print(f"{item[0]} : 出现{item[1]}次数")
# 将所有弹幕数量写入 Excel 文件
danmu_dict = list_to_dict(danmu_list)
danmu_dict = dict(sorted(danmu_dict.items(), key=lambda item: item[1], reverse=True))
Workbook().save('output.xlsx')
with pandas.ExcelWriter('output.xlsx', engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
pandas.DataFrame(list(danmu_dict.items())).to_excel(writer, sheet_name='所有弹幕', index=False)
# 将ai弹幕数量写入 Excel 文件
with pandas.ExcelWriter('output.xlsx', engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
pandas.DataFrame(list(ai_danmu_dict.items())).to_excel(writer, sheet_name='ai弹幕', index=False)
# 制作词云图
font_path = "C:\Windows\Fonts\SimHei.ttf"
wordcloud = WordCloud(font_path=font_path, width=800, height=400, background_color='white').generate(' '.join(ai_danmu_list))
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.savefig('wordcloud.png', format='png') # 保存为 PNG 文件
if __name__ == '__main__':
main()