From d260b45183a3becf28ba04b554ee9f0529ff69b6 Mon Sep 17 00:00:00 2001 From: hnu202310040215 <2895268237@qq.com> Date: Wed, 7 May 2025 16:37:51 +0800 Subject: [PATCH] =?UTF-8?q?=E7=88=AC=E8=99=AB=E4=B8=BB=E5=87=BD=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 main.py diff --git a/main.py b/main.py new file mode 100644 index 0000000..5f3c7bc --- /dev/null +++ b/main.py @@ -0,0 +1,56 @@ +import requests +import os +from urllib.parse import quote +from pathlib import Path + +# 配置参数 +KEYWORD = "工程师" # 搜索关键词 +SAVE_DIR = "/root/imgs4" # 保存路径(需确保有写入权限) +NUM_IMAGES = 20 # 目标数量 +HEADERS = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36", + "Referer": "https://image.baidu.com/" +} + +def download_images(): + # 创建保存目录 + Path(SAVE_DIR).mkdir(parents=True, exist_ok=True) + + # 构造请求URL(编码关键词) + encoded_keyword = quote(KEYWORD) + url = f"https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&word={encoded_keyword}" + + # 分页请求(自动计算所需页数) + downloaded = 0 + page = 0 + while downloaded < NUM_IMAGES: + params = { + "pn": page * 30, # 百度默认每页30条 + "rn": 30 + } + try: + response = requests.get(url, headers=HEADERS, params=params, timeout=10) + response.raise_for_status() + data = response.json() + + # 提取图片真实URL + for item in data.get("data", []): + if downloaded >= NUM_IMAGES: + break + if "thumbURL" in item: + img_url = item["thumbURL"] + try: + img_data = requests.get(img_url, headers=HEADERS, timeout=5).content + with open(os.path.join(SAVE_DIR, f"engineer_{downloaded+1}.jpg"), "wb") as f: + f.write(img_data) + downloaded += 1 + print(f"已下载 {downloaded}/{NUM_IMAGES}") + except Exception as e: + print(f"下载失败 {img_url}: {str(e)}") + page += 1 + except Exception as e: + print(f"请求失败: {str(e)}") + break + +if __name__ == "__main__": + download_images() \ No newline at end of file