Delete 'bilibili_spider.py'

5 months ago · 6cd9c0cfaa
parent c17dc71935
commit 6cd9c0cfaa
1 changed files with 0 additions and 87 deletions
--- a/bilibili_spider.py
+++ b/bilibili_spider.py
@ -1,87 +0,0 @@
-import requests
-import re
-import json
-import time
-import random
-
-class BilibiliDanmakuSpider:
-    def __init__(self):
-        self.headers = {
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
-            "Referer": "https://www.bilibili.com/"
-        }
-        self.danmaku_list = []
-        
-    def get_video_ids(self, keyword, page_count=36):
-        """获取搜索结果的视频ID，每页10个视频，36页共360个"""
-        video_ids = []
-        for page in range(1, page_count + 1):
-            try:
-                url = f"https://api.bilibili.com/x/web-interface/search/type?keyword={keyword}&search_type=video&page={page}"
-                response = requests.get(url, headers=self.headers)
-                data = json.loads(response.text)
-                
-                #if data["code"] == 0 and data["data"]["result"]:
-                 #   for item in data["data"]["result"]:
-                  #      video_ids.append(item["aid"])
-                # 检查接口响应是否正常且有视频数据
-                if data["code"] == 0 and data["data"]["result"]:
-                    # 提取当前页的视频ID并显示
-                    page_video_ids = [item["aid"] for item in data["data"]["result"]]
-                    video_ids.extend(page_video_ids)  # 批量添加到总列表
-                
-                    # 显示当前页获取到的视频号
-                    print(f"第{page}页获取到视频ID：{page_video_ids}共{len(page_video_ids)}个")
-                #else:
-                    # 无数据时提示（非错误，可能是真的没结果）
-                    #print(f"第{page}页未获取到视频数据")        
-                
-                # 随机延迟，避免被反爬
-                time.sleep(random.uniform(1, 3))
-            except Exception as e:
-                print(f"获取第{page}页视频ID失败: {e}")
-        
-        return list(set(video_ids))[:360]  # 去重并确保最多360个
-    
-    def get_danmakus(self, aid):
-        """获取单个视频的弹幕"""
-        try:
-            # 获取cid
-            url = f"https://api.bilibili.com/x/web-interface/view?aid={aid}"
-            response = requests.get(url, headers=self.headers)
-            cid = json.loads(response.text)["data"]["cid"]
-            
-            # 获取弹幕
-            danmaku_url = f"https://comment.bilibili.com/{cid}.xml"
-            response = requests.get(danmaku_url, headers=self.headers)
-            response.encoding = "utf-8"
-            
-            # 提取弹幕内容
-            danmakus = re.findall(r'<d.*?>(.*?)</d>', response.text)
-            self.danmaku_list.extend(danmakus)
-            
-            print(f"成功获取视频{aid}的{len(danmakus)}条弹幕")
-            time.sleep(random.uniform(0.5, 1.5))
-            return True
-        except Exception as e:
-            print(f"获取视频{aid}弹幕失败: {e}")
-            return False
-    
-    def run(self, keywords=["大语言模型", "大模型", "LLM"]):
-        """运行爬虫主程序"""
-        all_video_ids = []
-        for keyword in keywords:
-            print(f"搜索关键词: {keyword}")
-            video_ids = self.get_video_ids(keyword)
-            all_video_ids.extend(video_ids)
-        
-        # 去重并确保总数不超过360
-        unique_video_ids = list(set(all_video_ids))[:360]
-        print(f"共获取{len(unique_video_ids)}个视频ID，开始爬取弹幕...")
-        
-        for idx, aid in enumerate(unique_video_ids, 1):
-            print(f"正在爬取第{idx}/{len(unique_video_ids)}个视频")
-            self.get_danmakus(aid)
-        
-        print(f"爬取完成，共获取{len(self.danmaku_list)}条弹幕")
-        return self.danmaku_list