You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
nudt-compiler-cpp/scripts/verify_mem2reg.sh

378 lines
9.1 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
COMPILER="$ROOT_DIR/build/bin/compiler"
DEFAULT_TEST_ROOT="$ROOT_DIR/test"
TMP_DIR="$ROOT_DIR/build/test_passes"
CC_BIN="${CC:-cc}"
LLC_BIN="${LLC:-llc}"
CLANG_BIN="${CLANG:-clang}"
RUNTIME_SRC="$ROOT_DIR/sylib/sylib.c"
RUNTIME_OBJ="$TMP_DIR/sylib.o"
debug=false
run_exec=false
test_root="$DEFAULT_TEST_ROOT"
stop_on_fail=false
strict_mem2reg=false
usage() {
cat <<EOF
用法: $0 [选项]
选项:
--run 生成 IR 后继续用 llc/clang 运行,并和同名 .out 对比
--debug 打印每个用例的命令与更多诊断信息
--test-root <dir> 指定测试根目录,默认: $DEFAULT_TEST_ROOT
--stop-on-fail 遇到第一个失败立即退出
--strict-mem2reg 将优化后残留标量 alloca 视为失败;默认只作为警告统计
-h, --help 显示帮助
环境变量:
LLC=<path> 指定 llc默认 llc
CLANG=<path> 指定 clang默认 clang
CC=<path> 指定 C 编译器,用于编译 sylib.c默认 cc
EOF
}
while [[ $# -gt 0 ]]; do
case "$1" in
--run)
run_exec=true
shift
;;
--debug)
debug=true
shift
;;
--test-root)
if [[ $# -lt 2 ]]; then
echo "--test-root 需要目录参数" >&2
exit 1
fi
test_root="$2"
shift 2
;;
--stop-on-fail)
stop_on_fail=true
shift
;;
--strict-mem2reg)
strict_mem2reg=true
shift
;;
-h|--help)
usage
exit 0
;;
*)
echo "未知参数: $1" >&2
usage >&2
exit 1
;;
esac
done
if [[ ! -x "$COMPILER" ]]; then
echo "未找到编译器: $COMPILER" >&2
echo "请先构建编译器,例如: cmake -S . -B build && cmake --build build -j" >&2
exit 1
fi
if [[ ! -d "$test_root" ]]; then
echo "测试目录不存在: $test_root" >&2
exit 1
fi
mkdir -p "$TMP_DIR"
runtime_ready=0
if [[ "$run_exec" == true ]]; then
if ! command -v "$LLC_BIN" >/dev/null 2>&1; then
echo "未找到 llc: $LLC_BIN" >&2
exit 1
fi
if ! command -v "$CLANG_BIN" >/dev/null 2>&1; then
echo "未找到 clang: $CLANG_BIN" >&2
exit 1
fi
if [[ -f "$RUNTIME_SRC" ]]; then
if "$CC_BIN" -c "$RUNTIME_SRC" -o "$RUNTIME_OBJ" >/dev/null 2>&1; then
runtime_ready=1
else
echo "[WARN] 运行库编译失败,将只链接目标文件: $RUNTIME_SRC" >&2
fi
else
echo "[WARN] 未找到运行库源码,将只链接目标文件: $RUNTIME_SRC" >&2
fi
fi
normalize_file() {
sed 's/\r$//' "$1"
}
make_case_out_dir() {
local input=$1
local rel
rel=$(realpath --relative-to="$test_root" "$(dirname "$input")")
echo "$TMP_DIR/$rel"
}
extract_ir() {
local raw_file=$1
local ll_file=$2
# 编译器在 debug 模式下可能把诊断也写到 stdout这里保留 LLVM-like IR 行。
grep -E '^(define |declare |@|[[:space:]]|})|^[A-Za-z_.$%][A-Za-z0-9_.$%]*:$' \
"$raw_file" > "$ll_file" || true
}
record_failure() {
local bucket=$1
local message=$2
case "$bucket" in
ir) ir_failures+=("$message") ;;
opt) opt_failures+=("$message") ;;
run) run_failures+=("$message") ;;
esac
if [[ "$stop_on_fail" == true ]]; then
echo ""
echo "遇到失败,按 --stop-on-fail 停止。失败文件保留在: $TMP_DIR"
exit 1
fi
}
record_warning() {
local bucket=$1
local message=$2
case "$bucket" in
opt) opt_warnings+=("$message") ;;
esac
}
check_scalar_mem2reg() {
local ll_file=$1
grep -nE '=[[:space:]]*alloca[[:space:]]+(i32|float|i1)\b' "$ll_file" || true
}
compare_result() {
local input=$1
local expected_file=$2
local stdout_file=$3
local status=$4
local actual_file="${stdout_file%.stdout}.actual.out"
{
cat "$stdout_file"
if [[ -s "$stdout_file" ]] && [[ "$(tail -c 1 "$stdout_file" | wc -l)" -eq 0 ]]; then
printf '\n'
fi
printf '%s\n' "$status"
} > "$actual_file"
local expected_text
local actual_text
expected_text=$(normalize_file "$expected_file")
actual_text=$(normalize_file "$actual_file")
if [[ "$expected_text" == "$actual_text" ]]; then
echo " [RUN] OK"
return 0
fi
echo " [RUN] FAIL: 输出或退出码不匹配"
echo " expected: $expected_file"
echo " actual: $actual_file"
if [[ "$debug" == true ]]; then
diff -u <(printf '%s\n' "$expected_text") <(printf '%s\n' "$actual_text") || true
fi
record_failure run "$input: output mismatch"
return 1
}
mapfile -t test_files < <(find "$test_root" -type f -name '*.sy' | sort)
if [[ ${#test_files[@]} -eq 0 ]]; then
echo "未在目录中找到 .sy 测试: $test_root" >&2
exit 1
fi
ir_total=0
ir_pass=0
opt_total=0
opt_pass=0
run_total=0
run_pass=0
ir_failures=()
opt_failures=()
opt_warnings=()
run_failures=()
echo "测试根目录: $test_root"
echo "输出目录: $TMP_DIR"
echo "测试数量: ${#test_files[@]}"
if [[ "$run_exec" == true ]]; then
echo "运行验证: 开启"
else
echo "运行验证: 关闭(加 --run 可开启语义对拍)"
fi
echo ""
for input in "${test_files[@]}"; do
ir_total=$((ir_total + 1))
opt_total=$((opt_total + 1))
out_dir=$(make_case_out_dir "$input")
mkdir -p "$out_dir"
base=$(basename "$input")
stem=${base%.sy}
raw_ir="$out_dir/$stem.raw.ll"
ll_file="$out_dir/$stem.ll"
log_file="$out_dir/$stem.compiler.log"
stdout_file="$out_dir/$stem.stdout"
obj_file="$out_dir/$stem.o"
exe_file="$out_dir/$stem"
input_dir=$(dirname "$input")
stdin_file="$input_dir/$stem.in"
expected_file="$input_dir/$stem.out"
echo "[TEST] ${input#$ROOT_DIR/}"
if [[ "$debug" == true ]]; then
echo " [CMD] $COMPILER --emit-ir $input"
fi
compiler_status=0
"$COMPILER" --emit-ir "$input" > "$raw_ir" 2> "$log_file" || compiler_status=$?
extract_ir "$raw_ir" "$ll_file"
if [[ $compiler_status -ne 0 ]]; then
echo " [IR] FAIL: 编译器返回 $compiler_status"
record_failure ir "$input: compiler failed ($compiler_status)"
continue
fi
if ! grep -qE '^define ' "$ll_file"; then
echo " [IR] FAIL: 未提取到有效函数定义"
record_failure ir "$input: invalid IR"
continue
fi
ir_pass=$((ir_pass + 1))
echo " [IR] OK"
scalar_allocas=$(check_scalar_mem2reg "$ll_file")
if [[ -n "$scalar_allocas" ]]; then
if [[ "$strict_mem2reg" == true ]]; then
echo " [OPT] FAIL: 优化后仍有可提升标量 alloca"
else
echo " [OPT] WARN: 优化后仍有标量 alloca 残留"
fi
if [[ "$debug" == true ]]; then
echo "$scalar_allocas" | sed 's/^/ /'
fi
if [[ "$strict_mem2reg" == true ]]; then
record_failure opt "$input: scalar alloca remains"
else
opt_pass=$((opt_pass + 1))
record_warning opt "$input: scalar alloca remains"
fi
else
opt_pass=$((opt_pass + 1))
echo " [OPT] OK: 未发现标量 alloca 残留"
fi
if [[ "$run_exec" != true ]]; then
continue
fi
if [[ ! -f "$expected_file" ]]; then
echo " [RUN] SKIP: 未找到期望输出 $expected_file"
continue
fi
run_total=$((run_total + 1))
if ! "$LLC_BIN" -filetype=obj "$ll_file" -o "$obj_file" > "$stdout_file" 2>&1; then
echo " [RUN] FAIL: llc 生成对象文件失败"
record_failure run "$input: llc failed"
continue
fi
if [[ $runtime_ready -eq 1 ]]; then
if ! "$CLANG_BIN" "$obj_file" "$RUNTIME_OBJ" -o "$exe_file" >> "$stdout_file" 2>&1; then
echo " [RUN] FAIL: clang 链接失败"
record_failure run "$input: clang link failed"
continue
fi
else
if ! "$CLANG_BIN" "$obj_file" -o "$exe_file" >> "$stdout_file" 2>&1; then
echo " [RUN] FAIL: clang 链接失败"
record_failure run "$input: clang link failed"
continue
fi
fi
run_status=0
if [[ -f "$stdin_file" ]]; then
"$exe_file" < "$stdin_file" > "$stdout_file" 2>&1 || run_status=$?
else
"$exe_file" > "$stdout_file" 2>&1 || run_status=$?
fi
if compare_result "$input" "$expected_file" "$stdout_file" "$run_status"; then
run_pass=$((run_pass + 1))
fi
done
echo ""
echo "测试完成。"
echo "IR 生成: $ir_pass / $ir_total"
echo "Pass 优化检查: $opt_pass / $opt_total"
if [[ "$run_exec" == true ]]; then
echo "运行结果: $run_pass / $run_total"
fi
if [[ ${#ir_failures[@]} -gt 0 ]]; then
echo ""
echo "IR 失败列表:"
for item in "${ir_failures[@]}"; do
echo " $item"
done
fi
if [[ ${#opt_failures[@]} -gt 0 ]]; then
echo ""
echo "优化检查失败列表:"
for item in "${opt_failures[@]}"; do
echo " $item"
done
fi
if [[ ${#opt_warnings[@]} -gt 0 ]]; then
echo ""
echo "优化警告列表(默认不算失败;加 --strict-mem2reg 可升级为失败):"
for item in "${opt_warnings[@]}"; do
echo " $item"
done
fi
if [[ ${#run_failures[@]} -gt 0 ]]; then
echo ""
echo "运行失败列表:"
for item in "${run_failures[@]}"; do
echo " $item"
done
fi
if [[ ${#ir_failures[@]} -gt 0 || ${#opt_failures[@]} -gt 0 || ${#run_failures[@]} -gt 0 ]]; then
echo ""
echo "失败产物已保留在: $TMP_DIR"
exit 1
fi
echo ""
echo "全部检查通过。"