import requests import re from concurrent.futures import ThreadPoolExecutor, as_completed count = 0 def get_page_url(n): """ 获取页面的URL """ page_url_list = [] for i in range(n): if i == 0: page_url = "https://search.bilibili.com/all?keyword=2024%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A&from_source=webtop_search&spm_id_from=333.1007&search_source=5" else: page_url = f"https://search.bilibili.com/all?keyword=2024%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A&from_source=webtop_search&spm_id_from=333.1007&search_source=5&page={i + 1}&o={i * 36}" page_url_list.append(page_url) return page_url_list header = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0"} def fetch_page(url): try: response = requests.get(url=url, headers=header) response.encoding = 'utf-8' return response.text except Exception as e: print(f"请求失败: {e}") return "" def get_cid(page_url_list): global count cid_list = [] with ThreadPoolExecutor(max_workers=5) as executor: future_to_url = {executor.submit(fetch_page, url): url for url in page_url_list} for future in as_completed(future_to_url): page_url = future_to_url[future] try: data = future.result() content = re.findall('"//www.bilibili.com/video/(.*?)/"', data) content = set(content) content = list(content) for bvid in content: url = f"https://api.bilibili.com/x/player/pagelist?bvid={bvid}&jsonp=jsonp" response = fetch_page(url) cids = re.findall('{"cid":(.*?),', response) if cids: cid_list.append(cids[0]) count += 1 print(f"已获取到 {count} 个cid") if count >= 300: break if count >= 300: break except Exception as e: print(f"处理失败: {e}") return cid_list def get_danmu(cid_list): def fetch_danmu(cid): try: url = f"https://comment.bilibili.com/{cid}.xml" response = requests.get(url=url, headers=header) response.encoding = 'utf-8' data = response.text return re.findall('(.*?)', data) except Exception as e: print(f"请求失败: {e}") return [] danmu_list = [] with ThreadPoolExecutor(max_workers=5) as executor: future_to_cid = {executor.submit(fetch_danmu, cid): cid for cid in cid_list} for future in as_completed(future_to_cid): cid = future_to_cid[future] try: danmu_list.extend(future.result()) print(f"已获取到 {len(danmu_list)} 条弹幕") except Exception as e: print(f"处理失败: {e}") return danmu_list cid_list = get_cid(get_page_url(10)) print("开始获取弹幕数据...") danmu_list = get_danmu(cid_list) print("弹幕数据爬取完成。") with open('弹幕.txt', 'w', encoding='utf-8') as f: for danmu in danmu_list: f.write(danmu + '\n') print("弹幕已保存到 '弹幕.txt'")