From 7018ae8655c8b7acf6e6feab089d819151a427d8 Mon Sep 17 00:00:00 2001 From: ping56ry4 <1583105685@qq.com> Date: Wed, 18 Sep 2024 10:27:10 +0800 Subject: [PATCH] ADD file via upload --- 102201216李俊辉.py | 69 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 102201216李俊辉.py diff --git a/102201216李俊辉.py b/102201216李俊辉.py new file mode 100644 index 0000000..13c58e0 --- /dev/null +++ b/102201216李俊辉.py @@ -0,0 +1,69 @@ +import requests +from bs4 import BeautifulSoup +import pandas as pd +import jieba +from wordcloud import WordCloud +import matplotlib.pyplot as plt + +# 模拟浏览器请求头 +headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0" +} + +# 检查文本是否包含“AI”或“人工智能” +def contains_ai_or_artificial_intelligence(text): + return "ai" in text.lower() or "人工智能" in text.lower() + +# 获取网页内容 +def get_html(url): + response = requests.get(url, headers=headers) + response.encoding = 'utf-8' + return response.text + +# 解析网页获取视频链接 +def parse_video_links(html): + soup = BeautifulSoup(html, 'html.parser') + video_links = [] + for link in soup.find_all('a', href=True): + href = link['href'] + if href.startswith('/video/'): + video_links.append(href) + return video_links + +# 获取弹幕数据 +def get_danmaku_data(video_id): + danmaku_url = f"https://api.bilibili.com/x/v1/dm/list.so?oid={video_id}" + response = requests.get(danmaku_url, headers=headers) + if response.status_code == 200: + danmaku_data = response.json() + danmakus = [danmaku['content'] for danmaku in danmaku_data['data']['list']] + return danmakus + return [] + +# 主程序 +def main(): + ai_danmaku_list = [] # 初始化列表以存储AI相关的弹幕 + video_links = parse_video_links(get_html("https://search.bilibili.com/video?keyword=%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A")) + for link in video_links: + video_id = link.split('/')[2] + danmakus = get_danmaku_data(video_id) + for danmaku in danmakus: + if contains_ai_or_artificial_intelligence(danmaku): + ai_danmaku_list.append(danmaku) + + # 输出AI相关的弹幕数量 + print(f"AI相关的弹幕数量: {len(ai_danmaku_list)}") + + # 写入Excel文件 + df = pd.DataFrame(ai_danmaku_list, columns=['弹幕']) + df.to_excel('ai_danmaku.xlsx', index=False) + + # 生成词云图 + text = ' '.join(ai_danmaku_list) + wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text) + plt.imshow(wordcloud, interpolation='bilinear') + plt.axis('off') + plt.show() + +if __name__ == "__main__": + main() \ No newline at end of file