You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							86 lines
						
					
					
						
							3.4 KiB
						
					
					
				
			
		
		
	
	
							86 lines
						
					
					
						
							3.4 KiB
						
					
					
				| import bilibili_spider
 | |
| import matplotlib.pyplot as plt
 | |
| from wordcloud import WordCloud
 | |
| import jieba
 | |
| import pandas
 | |
| from openpyxl import Workbook
 | |
| 
 | |
| def contains_keywords(text, keywords):
 | |
|     for word in list(jieba.cut(text)):
 | |
|         for keyword in keywords:
 | |
|             if word == keyword:
 | |
|                 return True
 | |
|     return False
 | |
| 
 | |
| ai_keywords = [
 | |
|     "机器学习", "深度学习", "自然语言处理", "计算机视觉", "图像识别",
 | |
|     "语音识别", "强化学习", "生成对抗网络", "智能推荐系统", "数据挖掘",
 | |
|     "模式识别", "智能机器人", "自动驾驶", "预测分析", "数据清洗",
 | |
|     "异常检测", "知识图谱", "人工智能伦理", "智能合约", "虚拟助手",
 | |
|     "语义分析", "图像生成", "文本生成", "情感分析", "决策支持系统",
 | |
|     "人脸识别", "智能搜索", "自然语言生成", "人工神经网络", "模型优化",
 | |
|     "智能监控", "医疗影像分析", "自动化", "智能制造", "虚拟现实",
 | |
|     "增强现实", "智能家居", "边缘计算", "云计算", "数据隐私",
 | |
|     "算法公平性", "知识推理", "智能交通", "聊天机器人", "自动化客服",
 | |
|     "智能推荐引擎", "生物识别", "机器人过程自动化", "多模态学习", "量子计算",
 | |
|     "自适应系统", "算法优化", "智能数据分析", "虚拟角色", "环境感知",
 | |
|     "ai", "AI", "人工智能"
 | |
| ]
 | |
| 
 | |
| def list_to_dict(list):
 | |
|     # 遍历列表中的每个元素
 | |
|     count_dict = {}
 | |
|     for item in list:
 | |
|         if item in count_dict:
 | |
|             count_dict[item] += 1
 | |
|         else:
 | |
|             count_dict[item] = 1
 | |
|     return count_dict
 | |
| 
 | |
| 
 | |
| def main():
 | |
|     query = '2024巴黎奥运会'
 | |
|     number = 300
 | |
| 
 | |
|     # 获取弹幕列表
 | |
|     danmu_list = bilibili_spider.get_danmu(query=query, number=number, display_progress=True)
 | |
|     # danmu_list = ["test", "ai", "noai"]
 | |
| 
 | |
|     # 筛选其中包含AI关键词的弹幕
 | |
|     ai_danmu_list = []
 | |
|     for danmu in danmu_list:
 | |
|         if contains_keywords(danmu, ai_keywords):
 | |
|             ai_danmu_list.append(danmu)
 | |
|     
 | |
|     ai_danmu_dict = list_to_dict(ai_danmu_list)
 | |
|     ai_danmu_dict = dict(sorted(ai_danmu_dict.items(), key=lambda item: item[1], reverse=True))
 | |
| 
 | |
|     #输出数量排名前8的弹幕
 | |
|     first_8_ai_danmu = list(ai_danmu_dict.items())[:8]
 | |
|     for item in first_8_ai_danmu:
 | |
|         print(f"{item[0]} : 出现{item[1]}次数")
 | |
|     
 | |
|     # 将所有弹幕数量写入 Excel 文件
 | |
|     danmu_dict = list_to_dict(danmu_list)
 | |
|     danmu_dict = dict(sorted(danmu_dict.items(), key=lambda item: item[1], reverse=True))
 | |
|     Workbook().save('output.xlsx')
 | |
|     with pandas.ExcelWriter('output.xlsx', engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
 | |
|         pandas.DataFrame(list(danmu_dict.items())).to_excel(writer, sheet_name='所有弹幕', index=False)
 | |
|     
 | |
|     # 将ai弹幕数量写入 Excel 文件
 | |
|     with pandas.ExcelWriter('output.xlsx', engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
 | |
|         pandas.DataFrame(list(ai_danmu_dict.items())).to_excel(writer, sheet_name='ai弹幕', index=False)
 | |
| 
 | |
|     # 制作词云图
 | |
|     font_path = "C:\Windows\Fonts\SimHei.ttf"
 | |
|     wordcloud = WordCloud(font_path=font_path, width=800, height=400, background_color='white').generate(' '.join(ai_danmu_list))
 | |
| 
 | |
|     plt.figure(figsize=(10, 5))
 | |
|     plt.imshow(wordcloud, interpolation='bilinear')
 | |
|     plt.axis('off')
 | |
| 
 | |
|     plt.savefig('wordcloud.png', format='png')  # 保存为 PNG 文件
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|     main()
 | |
|      |