diff --git a/paqu.py b/paqu.py deleted file mode 100644 index d0fac0c..0000000 --- a/paqu.py +++ /dev/null @@ -1,110 +0,0 @@ -import time -from typing import List -import requests -import re -from urllib import parse -from concurrent.futures import ThreadPoolExecutor, as_completed - -class BilibiliVideoSpider: - def __init__(self, session_cookie: str, user_agent: str): - self.session_cookie = session_cookie - self.user_agent = user_agent - - def search_videos(self, keyword: str, page: int, page_size: int) -> list: - headers = { - "Accept": "application/json, text/plain, */*", - "Accept-Encoding": "gzip, deflate, br", - "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", - "Cache-Control": "no-cache", - "Cookie": self.session_cookie, - "Origin": "https://search.bilibili.com", - "Pragma": "no-cache", - "Referer": f"https://search.bilibili.com/all?keyword={parse.quote(keyword)}", - "User-Agent": self.user_agent, - } - - params = { - "search_type": "video", - "page": page, - "page_size": page_size, - "keyword": keyword, - } - - while True: - try: - response = requests.get("https://api.bilibili.com/x/web-interface/search/type", headers=headers, params=params).json() - if response.get('code') == 0: - return [item['id'] for item in response['data'].get('result', [])] - except Exception as error: - print(f"Error fetching search results: {error}") - time.sleep(1) - continue # Retry the request if it fails - - def retrieve_cid(self, aid: int) -> int: - headers = { - "Accept": "application/json, text/plain, */*", - "User-Agent": self.user_agent, - "Cookie": self.session_cookie, - } - - response = requests.get(f"https://api.bilibili.com/x/player/pagelist?aid={aid}&bvid=", headers=headers) - if response.status_code == 200: - data = response.json() - if data and 'data' in data and len(data['data']) > 0: - return data['data'][0]['cid'] - raise ValueError(f"No video found for aid {aid}.") - - def fetch_danmaku(self, aid: int) -> List[str]: - headers = { - "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8", - "Cookie": self.session_cookie, - "User-Agent": self.user_agent - } - - response = requests.get(f'https://api.bilibili.com/x/v1/dm/list.so?oid={aid}', headers=headers) - response.encoding = 'utf-8' - if response.status_code == 200: - return re.findall('(.+?)', response.text) - else: - print(f"Failed to fetch danmaku for aid {aid}") - return [] - -def fetch_bullet_screen(spider: BilibiliVideoSpider, aid: int) -> List[str]: - try: - print(f"Fetching bullet screen for video with aid {aid}...") - cid = spider.retrieve_cid(aid) - return spider.fetch_danmaku(cid) - except Exception as error: - print(f"Error fetching data for aid {aid}: {error}") - return [] - -def main(): - user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0" - session_cookie = "YOUR_COOKIE" # Replace with your actual cookie - - spider = BilibiliVideoSpider(session_cookie, user_agent) - keyword = "2024巴黎奥运会" - results_per_page = 30 - total_pages = 10 - all_danmaku = [] - - with ThreadPoolExecutor(max_workers=10) as executor: - futures = [] - for page in range(1, total_pages + 1): - print(f"Fetching search results for page {page}...") - aids = spider.search_videos(keyword, page, results_per_page) - for aid in aids: - futures.append(executor.submit(fetch_bullet_screen, spider, aid)) - - for future in as_completed(futures): - all_danmaku.extend(future.result()) - - print(f"Total bullet screens fetched: {len(all_danmaku)}") - - # 将弹幕数据保存到 "弹幕.txt" 文件 - with open("弹幕.txt", mode='w', encoding="utf-8") as file: # 以写入模式打开文件 - for danmaku in all_danmaku: # 遍历所有弹幕数据 - file.write(danmaku + '\n') # 将弹幕数据写入文件 - -if __name__ == "__main__": - main() \ No newline at end of file