import requests from bs4 import BeautifulSoup import pandas as pd import jieba from wordcloud import WordCloud import matplotlib.pyplot as plt # 模拟浏览器请求头 headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0" } # 检查文本是否包含“AI”或“人工智能” def contains_ai_or_artificial_intelligence(text): return "ai" in text.lower() or "人工智能" in text.lower() # 获取网页内容 def get_html(url): response = requests.get(url, headers=headers) response.encoding = 'utf-8' return response.text # 解析网页获取视频链接 def parse_video_links(html): soup = BeautifulSoup(html, 'html.parser') video_links = [] for link in soup.find_all('a', href=True): href = link['href'] if href.startswith('/video/'): video_links.append(href) return video_links # 获取弹幕数据 def get_danmaku_data(video_id): danmaku_url = f"https://api.bilibili.com/x/v1/dm/list.so?oid={video_id}" response = requests.get(danmaku_url, headers=headers) if response.status_code == 200: danmaku_data = response.json() danmakus = [danmaku['content'] for danmaku in danmaku_data['data']['list']] return danmakus return [] # 主程序 def main(): ai_danmaku_list = [] # 初始化列表以存储AI相关的弹幕 video_links = parse_video_links(get_html("https://search.bilibili.com/video?keyword=%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A")) for link in video_links: video_id = link.split('/')[2] danmakus = get_danmaku_data(video_id) for danmaku in danmakus: if contains_ai_or_artificial_intelligence(danmaku): ai_danmaku_list.append(danmaku) # 输出AI相关的弹幕数量 print(f"AI相关的弹幕数量: {len(ai_danmaku_list)}") # 写入Excel文件 df = pd.DataFrame(ai_danmaku_list, columns=['弹幕']) df.to_excel('ai_danmaku.xlsx', index=False) # 生成词云图 text = ' '.join(ai_danmaku_list) wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text) plt.imshow(wordcloud, interpolation='bilinear') plt.axis('off') plt.show() if __name__ == "__main__": main()