ADD file via upload

1 year ago · ad1dbd0191
parent c8a122faed
commit ad1dbd0191
1 changed files with 94 additions and 0 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,94 @@
+import requests
+from bs4 import BeautifulSoup
+import time
+import pandas as pd
+from collections import Counter
+from wordcloud import WordCloud
+import matplotlib.pyplot as plt
+cnt = 0  #已爬取视频数
+danmuku_all = []  #弹幕库
+
+headers = {
+    user-agent Mozilla5.0 (Windows NT 10.0; Win64; x64) AppleWebKit537.36 (KHTML, like Gecko) Chrome128.0.0.0 Safari537.36
+}
+def get_cid(bvid)
+    url = fhttpsapi.bilibili.comxplayerpagelistbvid={bvid}
+    try
+        response = requests.get(url, headers=headers, timeout=10)
+        response.raise_for_status()
+        Json = response.json()
+        return Json['data'][0]['cid']
+    except requests.exceptions.RequestException as e
+        print(f请求失败 {e})
+        return None
+def get_danmuku(cid)
+    if cid is None
+        return []
+    url = fhttpscomment.bilibili.com{cid}.xml
+    try
+        response = requests.get(url, headers=headers, timeout=10)
+        response.encoding = 'utf-8'
+        soup = BeautifulSoup(response.text, 'xml')
+        return [i.text for i in soup.find_all('d')]
+    except requests.exceptions.RequestException as e
+        print(f请求失败 {e})
+        return []
+for Page in range(1, 22)
+    url = f'httpsapi.bilibili.comxweb-interfacesearchtypesearch_type=video&keyword=巴黎奥运会&page={Page}'
+    try
+        response = requests.get(url, headers=headers, timeout=10)
+        response.raise_for_status()
+        Json = response.json()
+        results = Json['data']['result']
+        for result in results
+            cid = get_cid(result['bvid'])
+            danmuku = get_danmuku(cid)
+            danmuku_all.extend(danmuku)
+            cnt += 1
+            if cnt = 300
+                break
+        if cnt = 300
+            break
+    except requests.exceptions.RequestException as e
+        print(f请求失败 {e})
+time.sleep(1)  #延时1秒防止被屏蔽
+def filter_danmuku(danmuku_list, keywords)
+    #筛选包含指定关键词的弹幕
+    keywords_lower = [keyword.lower() for keyword in keywords]  # 关键词小写
+    filtered = [d for d in danmuku_list if any(keyword in d.lower() for keyword in keywords_lower)]
+    return filtered
+
+#读取弹幕文件
+with open('所有视频弹幕.txt', 'r', encoding='utf-8') as file
+    danmuku_all = file.readlines()
+
+#筛选包含关键词的弹幕
+keywords = ['AI识曲','AI生成','神经网络','卷积神经网络','循环神经网络','智能家居','自动驾驶','智能推荐','智能算法','强化学习','计算机视觉','ai还原','ai合成']
+filtered_danmuku = filter_danmuku(danmuku_all, keywords)
+#统计弹幕数量
+counter = Counter(filtered_danmuku)
+most_common = counter.most_common(8)
+#将结果按列写入Excel
+data = {'弹幕内容' [content.strip() for content, count in most_common],
+        '数量' [count for content, count in most_common]}
+df = pd.DataFrame(data)
+df.to_excel('AI_人工智能_弹幕统计.xlsx', index=False)
+print(前8位弹幕统计已保存到 'AI_人工智能_弹幕统计.xlsx'.)
+font_path = r'CWindowsFontssimhei.ttf'
+try
+    df = pd.read_excel('AI_人工智能_弹幕统计.xlsx')
+    if '弹幕内容' not in df.columns
+        raise ValueError(Excel 文件中没有找到 '弹幕内容' 列)
+    text = ' '.join(df['弹幕内容'].dropna())
+    wordcloud = WordCloud(font_path=font_path, width=800, height=400, background_color='white').generate(text)
+    plt.figure(figsize=(10, 5))
+    plt.imshow(wordcloud, interpolation='bilinear')
+    plt.axis('off')
+    plt.show()
+    wordcloud.to_file('词云图.png')
+except FileNotFoundError
+    print(文件未找到，请检查文件路径)
+except ValueError as ve
+    print(ve)
+except Exception as e
+    print(f发生错误 {e})