From 9898d73e187d5d3ad62c56c76521edb9e4713242 Mon Sep 17 00:00:00 2001
From: pu6qcatis <916351100@qq.com>
Date: Sun, 15 Sep 2024 10:25:52 +0800
Subject: [PATCH] ADD file via upload

---
 main.py | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 105 insertions(+)
 create mode 100644 main.py

diff --git a/main.py b/main.py
new file mode 100644
index 0000000..ff9a13a
--- /dev/null
+++ b/main.py
@@ -0,0 +1,105 @@
+import requests
+from bs4 import BeautifulSoup
+import time
+import pandas as pd
+from collections import Counter
+from wordcloud import WordCloud
+import matplotlib.pyplot as plt
+cnt = 0
+# 已爬取视频数
+danmuku_all = []
+# 弹幕库
+
+headers = {
+    "cookie": "cookie",
+    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"
+}
+
+
+def get_cid(bvid):
+    url = f"https://api.bilibili.com/x/player/pagelist?bvid={bvid}"
+    try:
+        response = requests.get(url, headers=headers, timeout=10)
+        response.raise_for_status()
+        Json = response.json()
+        return Json['data'][0]['cid']
+    except requests.exceptions.RequestException as e:
+        print(f"请求失败: {e}")
+        return None
+
+
+def get_danmuku(cid):
+    if cid is None:
+        return []
+    url = f"https://comment.bilibili.com/{cid}.xml"
+    try:
+        response = requests.get(url, headers=headers, timeout=10)
+        response.encoding = 'utf-8'
+        soup = BeautifulSoup(response.text, 'xml')
+        return [i.text for i in soup.find_all('d')]
+    except requests.exceptions.RequestException as e:
+        print(f"请求失败: {e}")
+        return []
+
+
+for Page in range(1, 22):  # 1到22页够300个视频
+    url = f'https://api.bilibili.com/x/web-interface/search/type?search_type=video&keyword=巴黎奥运会&page={Page}'
+    try:
+        response = requests.get(url, headers=headers, timeout=10)
+        response.raise_for_status()
+        Json = response.json()
+        results = Json['data']['result']
+        for result in results:
+            cid = get_cid(result['bvid'])
+            danmuku = get_danmuku(cid)
+            danmuku_all.extend(danmuku)
+            cnt += 1
+            if cnt >= 300:
+                break
+        if cnt >= 300:
+            break
+    except requests.exceptions.RequestException as e:
+        print(f"请求失败: {e}")
+time.sleep(1)  # 延时1秒防止被屏蔽
+
+
+def filter_danmuku(danmuku_list, keywords):
+    # 筛选包含指定关键词的弹幕
+    keywords_lower = [keyword.lower() for keyword in keywords]  # 关键词小写
+    filtered = [d for d in danmuku_list if any(keyword in d.lower() for keyword in keywords_lower)]
+    return filtered
+
+# 读取弹幕文件
+with open('所有视频弹幕.txt', 'r', encoding='utf-8') as file:
+    danmuku_all = file.readlines()
+
+# 筛选包含关键词的弹幕
+keywords = ['AI配音' , 'ai配音' , '人工智能' , 'ai画图' , 'AI画图' , 'AI识曲' , 'AI生成' , '神经网络' , '卷积神经网络' , '循环神经网络' , '智能家居' , '自动驾驶' , '智能推荐' , '智能算法' , '强化学习' , '计算机视觉' , 'ai还原' , 'ai合成']
+filtered_danmuku = filter_danmuku(danmuku_all, keywords)
+# 统计弹幕数量
+counter = Counter(filtered_danmuku)
+most_common = counter.most_common(8)
+# 将结果按列写入Excel
+data = {'弹幕内容': [content.strip() for content, count in most_common],
+        '数量': [count for content, count in most_common]}
+df = pd.DataFrame(data)
+df.to_excel('AI_人工智能_弹幕统计.xlsx', index=False)
+print("前8位弹幕统计已保存到 'AI_人工智能_弹幕统计.xlsx'.")
+font_path = r'C:\Windows\Fonts\simhei.ttf'
+try:
+    df = pd.read_excel('AI_人工智能_弹幕统计.xlsx')
+    if '弹幕内容' not in df.columns:
+        raise ValueError("Excel 文件中没有找到 '弹幕内容' 列")
+    text = ' '.join(df['弹幕内容'].dropna())
+    wordcloud = WordCloud(font_path=font_path, width=800, height=400, background_color='white').generate(text)
+    plt.figure(figsize=(10, 5))
+    plt.imshow(wordcloud, interpolation='bilinear')
+    plt.axis('off')
+    plt.show()
+    wordcloud.to_file('词云图.png')
+except FileNotFoundError:
+    print("文件未找到，请检查文件路径")
+except ValueError as ve:
+    print(ve)
+except Exception as e:
+    print(f"发生错误: {e}")
\ No newline at end of file