import time from typing import List import requests import re from urllib import parse from concurrent.futures import ThreadPoolExecutor, as_completed class BilibiliVideoSpider: def __init__(self, session_cookie: str, user_agent: str): self.session_cookie = session_cookie self.user_agent = user_agent def search_videos(self, keyword: str, page: int, page_size: int) -> list: headers = { "Accept": "application/json, text/plain, */*", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", "Cache-Control": "no-cache", "Cookie": self.session_cookie, "Origin": "https://search.bilibili.com", "Pragma": "no-cache", "Referer": f"https://search.bilibili.com/all?keyword={parse.quote(keyword)}", "User-Agent": self.user_agent, } params = { "search_type": "video", "page": page, "page_size": page_size, "keyword": keyword, } while True: try: response = requests.get("https://api.bilibili.com/x/web-interface/search/type", headers=headers, params=params).json() if response.get('code') == 0: return [item['id'] for item in response['data'].get('result', [])] except Exception as error: print(f"Error fetching search results: {error}") time.sleep(1) continue # Retry the request if it fails def retrieve_cid(self, aid: int) -> int: headers = { "Accept": "application/json, text/plain, */*", "User-Agent": self.user_agent, "Cookie": self.session_cookie, } response = requests.get(f"https://api.bilibili.com/x/player/pagelist?aid={aid}&bvid=", headers=headers) if response.status_code == 200: data = response.json() if data and 'data' in data and len(data['data']) > 0: return data['data'][0]['cid'] raise ValueError(f"No video found for aid {aid}.") def fetch_danmaku(self, aid: int) -> List[str]: headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8", "Cookie": self.session_cookie, "User-Agent": self.user_agent } response = requests.get(f'https://api.bilibili.com/x/v1/dm/list.so?oid={aid}', headers=headers) response.encoding = 'utf-8' if response.status_code == 200: return re.findall('(.+?)', response.text) else: print(f"Failed to fetch danmaku for aid {aid}") return [] def fetch_bullet_screen(spider: BilibiliVideoSpider, aid: int) -> List[str]: try: print(f"Fetching bullet screen for video with aid {aid}...") cid = spider.retrieve_cid(aid) return spider.fetch_danmaku(cid) except Exception as error: print(f"Error fetching data for aid {aid}: {error}") return [] def main(): user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0" session_cookie = "YOUR_COOKIE" # Replace with your actual cookie spider = BilibiliVideoSpider(session_cookie, user_agent) keyword = "2024巴黎奥运会" results_per_page = 30 total_pages = 10 all_danmaku = [] with ThreadPoolExecutor(max_workers=10) as executor: futures = [] for page in range(1, total_pages + 1): print(f"Fetching search results for page {page}...") aids = spider.search_videos(keyword, page, results_per_page) for aid in aids: futures.append(executor.submit(fetch_bullet_screen, spider, aid)) for future in as_completed(futures): all_danmaku.extend(future.result()) print(f"Total bullet screens fetched: {len(all_danmaku)}") # 将弹幕数据保存到 "弹幕.txt" 文件 with open("弹幕.txt", mode='w', encoding="utf-8") as file: # 以写入模式打开文件 for danmaku in all_danmaku: # 遍历所有弹幕数据 file.write(danmaku + '\n') # 将弹幕数据写入文件 if __name__ == "__main__": main()