Delete 'main.py'

2 years ago · 30ddff142e
parent 4dca0d2bcc
commit 30ddff142e
1 changed files with 0 additions and 105 deletions
--- a/main.py
+++ b/main.py
@ -1,105 +0,0 @@
-import requests
-from bs4 import BeautifulSoup
-import time
-import pandas as pd
-from collections import Counter
-from wordcloud import WordCloud
-import matplotlib.pyplot as plt
-cnt = 0
-# 已爬取视频数
-danmuku_all = []
-# 弹幕库
-
-headers = {
-    "cookie": "cookie",
-    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"
-}
-
-
-def get_cid(bvid):
-    url = f"https://api.bilibili.com/x/player/pagelist?bvid={bvid}"
-    try:
-        response = requests.get(url, headers=headers, timeout=10)
-        response.raise_for_status()
-        Json = response.json()
-        return Json['data'][0]['cid']
-    except requests.exceptions.RequestException as e:
-        print(f"请求失败: {e}")
-        return None
-
-
-def get_danmuku(cid):
-    if cid is None:
-        return []
-    url = f"https://comment.bilibili.com/{cid}.xml"
-    try:
-        response = requests.get(url, headers=headers, timeout=10)
-        response.encoding = 'utf-8'
-        soup = BeautifulSoup(response.text, 'xml')
-        return [i.text for i in soup.find_all('d')]
-    except requests.exceptions.RequestException as e:
-        print(f"请求失败: {e}")
-        return []
-
-
-for Page in range(1, 22):  # 1到22页够300个视频
-    url = f'https://api.bilibili.com/x/web-interface/search/type?search_type=video&keyword=巴黎奥运会&page={Page}'
-    try:
-        response = requests.get(url, headers=headers, timeout=10)
-        response.raise_for_status()
-        Json = response.json()
-        results = Json['data']['result']
-        for result in results:
-            cid = get_cid(result['bvid'])
-            danmuku = get_danmuku(cid)
-            danmuku_all.extend(danmuku)
-            cnt += 1
-            if cnt >= 300:
-                break
-        if cnt >= 300:
-            break
-    except requests.exceptions.RequestException as e:
-        print(f"请求失败: {e}")
-time.sleep(1)  # 延时1秒防止被屏蔽
-
-
-def filter_danmuku(danmuku_list, keywords):
-    # 筛选包含指定关键词的弹幕
-    keywords_lower = [keyword.lower() for keyword in keywords]  # 关键词小写
-    filtered = [d for d in danmuku_list if any(keyword in d.lower() for keyword in keywords_lower)]
-    return filtered
-
-# 读取弹幕文件
-with open('所有视频弹幕.txt', 'r', encoding='utf-8') as file:
-    danmuku_all = file.readlines()
-
-# 筛选包含关键词的弹幕
-keywords = ['AI配音' , 'ai配音' , '人工智能' , 'ai画图' , 'AI画图' , 'AI识曲' , 'AI生成' , '神经网络' , '卷积神经网络' , '循环神经网络' , '智能家居' , '自动驾驶' , '智能推荐' , '智能算法' , '强化学习' , '计算机视觉' , 'ai还原' , 'ai合成']
-filtered_danmuku = filter_danmuku(danmuku_all, keywords)
-# 统计弹幕数量
-counter = Counter(filtered_danmuku)
-most_common = counter.most_common(8)
-# 将结果按列写入Excel
-data = {'弹幕内容': [content.strip() for content, count in most_common],
-        '数量': [count for content, count in most_common]}
-df = pd.DataFrame(data)
-df.to_excel('AI_人工智能_弹幕统计.xlsx', index=False)
-print("前8位弹幕统计已保存到 'AI_人工智能_弹幕统计.xlsx'.")
-font_path = r'C:\Windows\Fonts\simhei.ttf'
-try:
-    df = pd.read_excel('AI_人工智能_弹幕统计.xlsx')
-    if '弹幕内容' not in df.columns:
-        raise ValueError("Excel 文件中没有找到 '弹幕内容' 列")
-    text = ' '.join(df['弹幕内容'].dropna())
-    wordcloud = WordCloud(font_path=font_path, width=800, height=400, background_color='white').generate(text)
-    plt.figure(figsize=(10, 5))
-    plt.imshow(wordcloud, interpolation='bilinear')
-    plt.axis('off')
-    plt.show()
-    wordcloud.to_file('词云图.png')
-except FileNotFoundError:
-    print("文件未找到，请检查文件路径")
-except ValueError as ve:
-    print(ve)
-except Exception as e:
-    print(f"发生错误: {e}")