You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

106 lines
3.1 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#!/usr/bin/env bash
# fetch.sh — WebFetch 的本地替代。DeepSeek 后端下 WebFetch 域名安全校验失败,
# 且 hooks 无法拦截(校验发生在 PreToolUse 之前),故创建此脚本作为完整替代。
#
# 用法bash scripts/fetch.sh <url> [--raw]
# --raw 输出原始 HTML/响应,不处理
#
# 输出:
# - JSON → pretty-printpython3 -m json.tool
# - HTML → 基本标签剥离,输出纯文本
# - 其他/text → 原样输出
# - 非文本 → 打印 Content-Type + 大小,不输出二进制
set -euo pipefail
usage() {
echo "用法: bash scripts/fetch.sh <url> [--raw]"
echo " --raw 输出原始响应,不做格式处理"
exit 1
}
[ $# -ge 1 ] || usage
URL="$1"
RAW=false
[ "${2:-}" = "--raw" ] && RAW=true
TMPFILE=$(mktemp)
trap 'rm -f "$TMPFILE"' EXIT
# 下载;同时获取 HTTP 状态码和 Content-Type
HTTP_INFO=$(curl -sSL -w '\n%{http_code}\n%{content_type}' -o "$TMPFILE" \
-H 'User-Agent: Mozilla/5.0 (compatible; fetch.sh/1.0)' \
--max-time 30 \
"$URL" 2>/dev/null || echo -e "\n000\n")
HTTP_CODE=$(echo "$HTTP_INFO" | tail -2 | head -1)
SERVER_CT=$(echo "$HTTP_INFO" | tail -1 | sed 's/;.*//') # 去掉 charset
if [ "$RAW" = true ]; then
cat "$TMPFILE"
exit 0
fi
# 确定内容类型:优先服务器 Content-Type其次 file 命令
detect_content_type() {
# 服务器声明的类型
case "$SERVER_CT" in
application/json|text/json|application/*+json)
echo "json"; return ;;
esac
# 用 file 命令检测
local ft=$(file -b --mime-type "$TMPFILE" 2>/dev/null)
case "$ft" in
application/json|text/json|application/*+json)
echo "json"; return ;;
esac
# 尝试 JSON 解析(检测 API 返回的 text/plain JSON
if python3 -c "import json; json.load(open('$TMPFILE'))" 2>/dev/null; then
echo "json"; return
fi
# 服务器 HTML
case "$SERVER_CT" in
text/html|application/xhtml*)
echo "html"; return ;;
esac
case "$ft" in
text/html|application/xhtml*)
echo "html"; return ;;
text/*)
echo "text"; return ;;
esac
echo "binary"
}
CT=$(detect_content_type)
case "$CT" in
json)
python3 -m json.tool "$TMPFILE" 2>/dev/null || cat "$TMPFILE"
;;
html)
# 基本 HTML→文本删除 script/style删除标签压缩空白
sed -E 's/<script[^>]*>[^<]*<\/script>//gi
s/<style[^>]*>[^<]*<\/style>//gi' "$TMPFILE" \
| sed -E 's/<[^>]+>//g' \
| sed -E '/^[[:space:]]*$/d' \
| sed -E 's/&amp;/\&/g; s/&lt;/</g; s/&gt;/>/g; s/&quot;/"/g; s/&#39;/'"'"'/g; s/&nbsp;/ /g' \
| head -500
;;
text)
cat "$TMPFILE"
;;
*)
FILE_SIZE=$(wc -c < "$TMPFILE")
echo "[fetch.sh] 非文本响应 (server:${SERVER_CT:-none}, ${FILE_SIZE} bytes)"
echo "[fetch.sh] 使用 'bash scripts/fetch.sh \"$URL\" --raw' 查看原始内容"
echo "[fetch.sh] 或使用 Chrome browser 工具 (navigate) 渲染页面"
;;
esac