parent
f0c27cfd62
commit
f5a6df80e8
@ -1,110 +0,0 @@
|
|||||||
import time
|
|
||||||
from typing import List
|
|
||||||
import requests
|
|
||||||
import re
|
|
||||||
from urllib import parse
|
|
||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
||||||
|
|
||||||
class BilibiliVideoSpider:
|
|
||||||
def __init__(self, session_cookie: str, user_agent: str):
|
|
||||||
self.session_cookie = session_cookie
|
|
||||||
self.user_agent = user_agent
|
|
||||||
|
|
||||||
def search_videos(self, keyword: str, page: int, page_size: int) -> list:
|
|
||||||
headers = {
|
|
||||||
"Accept": "application/json, text/plain, */*",
|
|
||||||
"Accept-Encoding": "gzip, deflate, br",
|
|
||||||
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
||||||
"Cache-Control": "no-cache",
|
|
||||||
"Cookie": self.session_cookie,
|
|
||||||
"Origin": "https://search.bilibili.com",
|
|
||||||
"Pragma": "no-cache",
|
|
||||||
"Referer": f"https://search.bilibili.com/all?keyword={parse.quote(keyword)}",
|
|
||||||
"User-Agent": self.user_agent,
|
|
||||||
}
|
|
||||||
|
|
||||||
params = {
|
|
||||||
"search_type": "video",
|
|
||||||
"page": page,
|
|
||||||
"page_size": page_size,
|
|
||||||
"keyword": keyword,
|
|
||||||
}
|
|
||||||
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
response = requests.get("https://api.bilibili.com/x/web-interface/search/type", headers=headers, params=params).json()
|
|
||||||
if response.get('code') == 0:
|
|
||||||
return [item['id'] for item in response['data'].get('result', [])]
|
|
||||||
except Exception as error:
|
|
||||||
print(f"Error fetching search results: {error}")
|
|
||||||
time.sleep(1)
|
|
||||||
continue # Retry the request if it fails
|
|
||||||
|
|
||||||
def retrieve_cid(self, aid: int) -> int:
|
|
||||||
headers = {
|
|
||||||
"Accept": "application/json, text/plain, */*",
|
|
||||||
"User-Agent": self.user_agent,
|
|
||||||
"Cookie": self.session_cookie,
|
|
||||||
}
|
|
||||||
|
|
||||||
response = requests.get(f"https://api.bilibili.com/x/player/pagelist?aid={aid}&bvid=", headers=headers)
|
|
||||||
if response.status_code == 200:
|
|
||||||
data = response.json()
|
|
||||||
if data and 'data' in data and len(data['data']) > 0:
|
|
||||||
return data['data'][0]['cid']
|
|
||||||
raise ValueError(f"No video found for aid {aid}.")
|
|
||||||
|
|
||||||
def fetch_danmaku(self, aid: int) -> List[str]:
|
|
||||||
headers = {
|
|
||||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
|
||||||
"Cookie": self.session_cookie,
|
|
||||||
"User-Agent": self.user_agent
|
|
||||||
}
|
|
||||||
|
|
||||||
response = requests.get(f'https://api.bilibili.com/x/v1/dm/list.so?oid={aid}', headers=headers)
|
|
||||||
response.encoding = 'utf-8'
|
|
||||||
if response.status_code == 200:
|
|
||||||
return re.findall('<d p=".*?">(.+?)</d>', response.text)
|
|
||||||
else:
|
|
||||||
print(f"Failed to fetch danmaku for aid {aid}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
def fetch_bullet_screen(spider: BilibiliVideoSpider, aid: int) -> List[str]:
|
|
||||||
try:
|
|
||||||
print(f"Fetching bullet screen for video with aid {aid}...")
|
|
||||||
cid = spider.retrieve_cid(aid)
|
|
||||||
return spider.fetch_danmaku(cid)
|
|
||||||
except Exception as error:
|
|
||||||
print(f"Error fetching data for aid {aid}: {error}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
def main():
|
|
||||||
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0"
|
|
||||||
session_cookie = "YOUR_COOKIE" # Replace with your actual cookie
|
|
||||||
|
|
||||||
spider = BilibiliVideoSpider(session_cookie, user_agent)
|
|
||||||
keyword = "2024巴黎奥运会"
|
|
||||||
results_per_page = 30
|
|
||||||
total_pages = 10
|
|
||||||
all_danmaku = []
|
|
||||||
|
|
||||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
|
||||||
futures = []
|
|
||||||
for page in range(1, total_pages + 1):
|
|
||||||
print(f"Fetching search results for page {page}...")
|
|
||||||
aids = spider.search_videos(keyword, page, results_per_page)
|
|
||||||
for aid in aids:
|
|
||||||
futures.append(executor.submit(fetch_bullet_screen, spider, aid))
|
|
||||||
|
|
||||||
for future in as_completed(futures):
|
|
||||||
all_danmaku.extend(future.result())
|
|
||||||
|
|
||||||
print(f"Total bullet screens fetched: {len(all_danmaku)}")
|
|
||||||
|
|
||||||
# 将弹幕数据保存到 "弹幕.txt" 文件
|
|
||||||
with open("弹幕.txt", mode='w', encoding="utf-8") as file: # 以写入模式打开文件
|
|
||||||
for danmaku in all_danmaku: # 遍历所有弹幕数据
|
|
||||||
file.write(danmaku + '\n') # 将弹幕数据写入文件
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
Loading…
Reference in new issue