|
|
|
@ -0,0 +1,56 @@
|
|
|
|
|
import requests
|
|
|
|
|
import os
|
|
|
|
|
from urllib.parse import quote
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
# 配置参数
|
|
|
|
|
KEYWORD = "工程师" # 搜索关键词
|
|
|
|
|
SAVE_DIR = "/root/imgs4" # 保存路径(需确保有写入权限)
|
|
|
|
|
NUM_IMAGES = 20 # 目标数量
|
|
|
|
|
HEADERS = {
|
|
|
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
|
|
|
|
|
"Referer": "https://image.baidu.com/"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def download_images():
|
|
|
|
|
# 创建保存目录
|
|
|
|
|
Path(SAVE_DIR).mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
# 构造请求URL(编码关键词)
|
|
|
|
|
encoded_keyword = quote(KEYWORD)
|
|
|
|
|
url = f"https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&word={encoded_keyword}"
|
|
|
|
|
|
|
|
|
|
# 分页请求(自动计算所需页数)
|
|
|
|
|
downloaded = 0
|
|
|
|
|
page = 0
|
|
|
|
|
while downloaded < NUM_IMAGES:
|
|
|
|
|
params = {
|
|
|
|
|
"pn": page * 30, # 百度默认每页30条
|
|
|
|
|
"rn": 30
|
|
|
|
|
}
|
|
|
|
|
try:
|
|
|
|
|
response = requests.get(url, headers=HEADERS, params=params, timeout=10)
|
|
|
|
|
response.raise_for_status()
|
|
|
|
|
data = response.json()
|
|
|
|
|
|
|
|
|
|
# 提取图片真实URL
|
|
|
|
|
for item in data.get("data", []):
|
|
|
|
|
if downloaded >= NUM_IMAGES:
|
|
|
|
|
break
|
|
|
|
|
if "thumbURL" in item:
|
|
|
|
|
img_url = item["thumbURL"]
|
|
|
|
|
try:
|
|
|
|
|
img_data = requests.get(img_url, headers=HEADERS, timeout=5).content
|
|
|
|
|
with open(os.path.join(SAVE_DIR, f"engineer_{downloaded+1}.jpg"), "wb") as f:
|
|
|
|
|
f.write(img_data)
|
|
|
|
|
downloaded += 1
|
|
|
|
|
print(f"已下载 {downloaded}/{NUM_IMAGES}")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"下载失败 {img_url}: {str(e)}")
|
|
|
|
|
page += 1
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"请求失败: {str(e)}")
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
download_images()
|