""" 从B站搜索结果中提取视频的BV号,并将其保存到文件中 """ import re import requests from common_headers import HEADERS # 假设你有一个公共的header文件 def get_source(page_num): """获取B站搜索结果页的源码""" get_url = ( f'https://api.bilibili.com/x/web-interface' f'/wbi/search/type?__refresh__=true&_extra=&' f'context=&page={page_num}' '&page_size=42&from_source=&from_spmid=333.337&' 'platform=pc&highlight=1&single_column=0&' 'keyword=2024巴黎奥运会' '&qv_id=zaOudcC1LJI0GehR81nuNQEKktKQ2aP1&ad_resource=5654' '&source_tag=3&gaia_vtoken=&category_id=&search_type=video' ) response = requests.get(url=get_url, headers=HEADERS, timeout=10) return response.text def extract_bv(source_html): """从搜索结果的HTML源码中提取BV号""" return re.findall('"bvid":"(.*?)","title":".*?', source_html) def save_bv_to_file(bv_list): """将BV号保存到文件中""" with open('E:/Crawler/output/bv_numbers.txt', 'a', encoding='utf-8') as f: for bv in bv_list: f.write(bv + '\n') def main(): """主函数:循环获取多页BV号并保存""" counter = 0 for page in range(1, 9): html_source = get_source(page) bvs = extract_bv(html_source) save_bv_to_file(bvs) counter += len(bvs) if counter >= 300: break print("BV号收集完成") if __name__ == '__main__': main()