BaiduSearch/main.py

import requests
import os
from urllib.parse import quote
from pathlib import Path

# 配置参数
KEYWORD = "工程师"       # 搜索关键词
SAVE_DIR = "/root/imgs4"  # 保存路径（需确保有写入权限）
NUM_IMAGES = 20          # 目标数量
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
    "Referer": "https://image.baidu.com/"
}

def download_images():
    # 创建保存目录
    Path(SAVE_DIR).mkdir(parents=True, exist_ok=True)

    # 构造请求URL（编码关键词）
    encoded_keyword = quote(KEYWORD)
    url = f"https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&word={encoded_keyword}"

    # 分页请求（自动计算所需页数）
    downloaded = 0
    page = 0
    while downloaded < NUM_IMAGES:
        params = {
            "pn": page * 30,  # 百度默认每页30条
            "rn": 30
        }
        try:
            response = requests.get(url, headers=HEADERS, params=params, timeout=10)
            response.raise_for_status()
            data = response.json()

            # 提取图片真实URL
            for item in data.get("data", []):
                if downloaded >= NUM_IMAGES:
                    break
                if "thumbURL" in item:
                    img_url = item["thumbURL"]
                    try:
                        img_data = requests.get(img_url, headers=HEADERS, timeout=5).content
                        with open(os.path.join(SAVE_DIR, f"engineer_{downloaded+1}.jpg"), "wb") as f:
                            f.write(img_data)
                        downloaded += 1
                        print(f"已下载 {downloaded}/{NUM_IMAGES}")
                    except Exception as e:
                        print(f"下载失败 {img_url}: {str(e)}")
            page += 1
        except Exception as e:
            print(f"请求失败: {str(e)}")
            break

if __name__ == "__main__":
    download_images()