import bilibili_spider import matplotlib.pyplot as plt from wordcloud import WordCloud import jieba import pandas from openpyxl import Workbook def contains_keywords(text, keywords): for word in list(jieba.cut(text)): for keyword in keywords: if word == keyword: return True return False ai_keywords = [ "机器学习", "深度学习", "自然语言处理", "计算机视觉", "图像识别", "语音识别", "强化学习", "生成对抗网络", "智能推荐系统", "数据挖掘", "模式识别", "智能机器人", "自动驾驶", "预测分析", "数据清洗", "异常检测", "知识图谱", "人工智能伦理", "智能合约", "虚拟助手", "语义分析", "图像生成", "文本生成", "情感分析", "决策支持系统", "人脸识别", "智能搜索", "自然语言生成", "人工神经网络", "模型优化", "智能监控", "医疗影像分析", "自动化", "智能制造", "虚拟现实", "增强现实", "智能家居", "边缘计算", "云计算", "数据隐私", "算法公平性", "知识推理", "智能交通", "聊天机器人", "自动化客服", "智能推荐引擎", "生物识别", "机器人过程自动化", "多模态学习", "量子计算", "自适应系统", "算法优化", "智能数据分析", "虚拟角色", "环境感知", "ai", "AI", "人工智能" ] def list_to_dict(list): # 遍历列表中的每个元素 count_dict = {} for item in list: if item in count_dict: count_dict[item] += 1 else: count_dict[item] = 1 return count_dict def main(): query = '2024巴黎奥运会' number = 300 # 获取弹幕列表 danmu_list = bilibili_spider.get_danmu(query=query, number=number, display_progress=True) # danmu_list = ["test", "ai", "noai"] # 筛选其中包含AI关键词的弹幕 ai_danmu_list = [] for danmu in danmu_list: if contains_keywords(danmu, ai_keywords): ai_danmu_list.append(danmu) ai_danmu_dict = list_to_dict(ai_danmu_list) ai_danmu_dict = dict(sorted(ai_danmu_dict.items(), key=lambda item: item[1], reverse=True)) #输出数量排名前8的弹幕 first_8_ai_danmu = list(ai_danmu_dict.items())[:8] for item in first_8_ai_danmu: print(f"{item[0]} : 出现{item[1]}次数") # 将所有弹幕数量写入 Excel 文件 danmu_dict = list_to_dict(danmu_list) danmu_dict = dict(sorted(danmu_dict.items(), key=lambda item: item[1], reverse=True)) Workbook().save('output.xlsx') with pandas.ExcelWriter('output.xlsx', engine='openpyxl', mode='a', if_sheet_exists='replace') as writer: pandas.DataFrame(list(danmu_dict.items())).to_excel(writer, sheet_name='所有弹幕', index=False) # 将ai弹幕数量写入 Excel 文件 with pandas.ExcelWriter('output.xlsx', engine='openpyxl', mode='a', if_sheet_exists='replace') as writer: pandas.DataFrame(list(ai_danmu_dict.items())).to_excel(writer, sheet_name='ai弹幕', index=False) # 制作词云图 font_path = "C:\Windows\Fonts\SimHei.ttf" wordcloud = WordCloud(font_path=font_path, width=800, height=400, background_color='white').generate(' '.join(ai_danmu_list)) plt.figure(figsize=(10, 5)) plt.imshow(wordcloud, interpolation='bilinear') plt.axis('off') plt.savefig('wordcloud.png', format='png') # 保存为 PNG 文件 if __name__ == '__main__': main()