ADD file via upload

11 months ago · 7018ae8655
parent 5b42a764d4
commit 7018ae8655
1 changed files with 69 additions and 0 deletions
--- a/102201216李俊辉.py
+++ b/102201216李俊辉.py
@ -0,0 +1,69 @@
+import requests
+from bs4 import BeautifulSoup
+import pandas as pd
+import jieba
+from wordcloud import WordCloud
+import matplotlib.pyplot as plt
+
+# 模拟浏览器请求头
+headers = {
+     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0"
+}
+
+# 检查文本是否包含“AI”或“人工智能”
+def contains_ai_or_artificial_intelligence(text):
+     return "ai" in text.lower() or "人工智能" in text.lower()
+
+# 获取网页内容
+def get_html(url):
+     response = requests.get(url, headers=headers)
+     response.encoding = 'utf-8'
+     return response.text
+
+# 解析网页获取视频链接
+def parse_video_links(html):
+     soup = BeautifulSoup(html, 'html.parser')
+     video_links = []
+     for link in soup.find_all('a', href=True):
+         href = link['href']
+         if href.startswith('/video/'):
+             video_links.append(href)
+     return video_links
+
+# 获取弹幕数据
+def get_danmaku_data(video_id):
+     danmaku_url = f"https://api.bilibili.com/x/v1/dm/list.so?oid={video_id}"
+     response = requests.get(danmaku_url, headers=headers)
+     if response.status_code == 200:
+         danmaku_data = response.json()
+         danmakus = [danmaku['content'] for danmaku in danmaku_data['data']['list']]
+         return danmakus
+     return []
+
+# 主程序
+def main():
+     ai_danmaku_list = []  # 初始化列表以存储AI相关的弹幕
+     video_links = parse_video_links(get_html("https://search.bilibili.com/video?keyword=%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A"))
+     for link in video_links:
+         video_id = link.split('/')[2]
+         danmakus = get_danmaku_data(video_id)
+         for danmaku in danmakus:
+             if contains_ai_or_artificial_intelligence(danmaku):
+                 ai_danmaku_list.append(danmaku)
+
+     # 输出AI相关的弹幕数量
+     print(f"AI相关的弹幕数量: {len(ai_danmaku_list)}")
+
+     # 写入Excel文件
+     df = pd.DataFrame(ai_danmaku_list, columns=['弹幕'])
+     df.to_excel('ai_danmaku.xlsx', index=False)
+
+     # 生成词云图
+     text = ' '.join(ai_danmaku_list)
+     wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
+     plt.imshow(wordcloud, interpolation='bilinear')
+     plt.axis('off')
+     plt.show()
+
+if __name__ == "__main__":
+     main()