import requests import os from urllib.parse import quote from pathlib import Path # 配置参数 KEYWORD = "工程师" # 搜索关键词 SAVE_DIR = "/root/imgs4" # 保存路径(需确保有写入权限) NUM_IMAGES = 20 # 目标数量 HEADERS = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36", "Referer": "https://image.baidu.com/" } def download_images(): # 创建保存目录 Path(SAVE_DIR).mkdir(parents=True, exist_ok=True) # 构造请求URL(编码关键词) encoded_keyword = quote(KEYWORD) url = f"https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&word={encoded_keyword}" # 分页请求(自动计算所需页数) downloaded = 0 page = 0 while downloaded < NUM_IMAGES: params = { "pn": page * 30, # 百度默认每页30条 "rn": 30 } try: response = requests.get(url, headers=HEADERS, params=params, timeout=10) response.raise_for_status() data = response.json() # 提取图片真实URL for item in data.get("data", []): if downloaded >= NUM_IMAGES: break if "thumbURL" in item: img_url = item["thumbURL"] try: img_data = requests.get(img_url, headers=HEADERS, timeout=5).content with open(os.path.join(SAVE_DIR, f"engineer_{downloaded+1}.jpg"), "wb") as f: f.write(img_data) downloaded += 1 print(f"已下载 {downloaded}/{NUM_IMAGES}") except Exception as e: print(f"下载失败 {img_url}: {str(e)}") page += 1 except Exception as e: print(f"请求失败: {str(e)}") break if __name__ == "__main__": download_images()