nudt-compiler-cpp/scripts/verify_mem2reg.sh

#!/usr/bin/env bash
set -euo pipefail

ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
COMPILER="$ROOT_DIR/build/bin/compiler"
DEFAULT_TEST_ROOT="$ROOT_DIR/test"
TMP_DIR="$ROOT_DIR/build/test_passes"
CC_BIN="${CC:-cc}"
LLC_BIN="${LLC:-llc}"
CLANG_BIN="${CLANG:-clang}"
RUNTIME_SRC="$ROOT_DIR/sylib/sylib.c"
RUNTIME_OBJ="$TMP_DIR/sylib.o"

debug=false
run_exec=false
test_root="$DEFAULT_TEST_ROOT"
stop_on_fail=false
strict_mem2reg=false

usage() {
  cat <<EOF
用法: $0 [选项]

选项:
  --run                 生成 IR 后继续用 llc/clang 运行，并和同名 .out 对比
  --debug               打印每个用例的命令与更多诊断信息
  --test-root <dir>     指定测试根目录，默认: $DEFAULT_TEST_ROOT
  --stop-on-fail        遇到第一个失败立即退出
  --strict-mem2reg      将优化后残留标量 alloca 视为失败；默认只作为警告统计
  -h, --help            显示帮助

环境变量:
  LLC=<path>            指定 llc，默认 llc
  CLANG=<path>          指定 clang，默认 clang
  CC=<path>             指定 C 编译器，用于编译 sylib.c，默认 cc
EOF
}

while [[ $# -gt 0 ]]; do
  case "$1" in
    --run)
      run_exec=true
      shift
      ;;
    --debug)
      debug=true
      shift
      ;;
    --test-root)
      if [[ $# -lt 2 ]]; then
        echo "--test-root 需要目录参数" >&2
        exit 1
      fi
      test_root="$2"
      shift 2
      ;;
    --stop-on-fail)
      stop_on_fail=true
      shift
      ;;
    --strict-mem2reg)
      strict_mem2reg=true
      shift
      ;;
    -h|--help)
      usage
      exit 0
      ;;
    *)
      echo "未知参数: $1" >&2
      usage >&2
      exit 1
      ;;
  esac
done

if [[ ! -x "$COMPILER" ]]; then
  echo "未找到编译器: $COMPILER" >&2
  echo "请先构建编译器，例如: cmake -S . -B build && cmake --build build -j" >&2
  exit 1
fi

if [[ ! -d "$test_root" ]]; then
  echo "测试目录不存在: $test_root" >&2
  exit 1
fi

mkdir -p "$TMP_DIR"

runtime_ready=0
if [[ "$run_exec" == true ]]; then
  if ! command -v "$LLC_BIN" >/dev/null 2>&1; then
    echo "未找到 llc: $LLC_BIN" >&2
    exit 1
  fi
  if ! command -v "$CLANG_BIN" >/dev/null 2>&1; then
    echo "未找到 clang: $CLANG_BIN" >&2
    exit 1
  fi

  if [[ -f "$RUNTIME_SRC" ]]; then
    if "$CC_BIN" -c "$RUNTIME_SRC" -o "$RUNTIME_OBJ" >/dev/null 2>&1; then
      runtime_ready=1
    else
      echo "[WARN] 运行库编译失败，将只链接目标文件: $RUNTIME_SRC" >&2
    fi
  else
    echo "[WARN] 未找到运行库源码，将只链接目标文件: $RUNTIME_SRC" >&2
  fi
fi

normalize_file() {
  sed 's/\r$//' "$1"
}

make_case_out_dir() {
  local input=$1
  local rel
  rel=$(realpath --relative-to="$test_root" "$(dirname "$input")")
  echo "$TMP_DIR/$rel"
}

extract_ir() {
  local raw_file=$1
  local ll_file=$2

  # 编译器在 debug 模式下可能把诊断也写到 stdout；这里保留 LLVM-like IR 行。
  grep -E '^(define |declare |@|[[:space:]]|})|^[A-Za-z_.$%][A-Za-z0-9_.$%]*:$' \
    "$raw_file" > "$ll_file" || true
}

record_failure() {
  local bucket=$1
  local message=$2
  case "$bucket" in
    ir) ir_failures+=("$message") ;;
    opt) opt_failures+=("$message") ;;
    run) run_failures+=("$message") ;;
  esac
  if [[ "$stop_on_fail" == true ]]; then
    echo ""
    echo "遇到失败，按 --stop-on-fail 停止。失败文件保留在: $TMP_DIR"
    exit 1
  fi
}

record_warning() {
  local bucket=$1
  local message=$2
  case "$bucket" in
    opt) opt_warnings+=("$message") ;;
  esac
}

check_scalar_mem2reg() {
  local ll_file=$1
  grep -nE '=[[:space:]]*alloca[[:space:]]+(i32|float|i1)\b' "$ll_file" || true
}

compare_result() {
  local input=$1
  local expected_file=$2
  local stdout_file=$3
  local status=$4

  local actual_file="${stdout_file%.stdout}.actual.out"
  {
    cat "$stdout_file"
    if [[ -s "$stdout_file" ]] && [[ "$(tail -c 1 "$stdout_file" | wc -l)" -eq 0 ]]; then
      printf '\n'
    fi
    printf '%s\n' "$status"
  } > "$actual_file"

  local expected_text
  local actual_text
  expected_text=$(normalize_file "$expected_file")
  actual_text=$(normalize_file "$actual_file")

  if [[ "$expected_text" == "$actual_text" ]]; then
    echo "  [RUN] OK"
    return 0
  fi

  echo "  [RUN] FAIL: 输出或退出码不匹配"
  echo "        expected: $expected_file"
  echo "        actual:   $actual_file"
  if [[ "$debug" == true ]]; then
    diff -u <(printf '%s\n' "$expected_text") <(printf '%s\n' "$actual_text") || true
  fi
  record_failure run "$input: output mismatch"
  return 1
}

mapfile -t test_files < <(find "$test_root" -type f -name '*.sy' | sort)

if [[ ${#test_files[@]} -eq 0 ]]; then
  echo "未在目录中找到 .sy 测试: $test_root" >&2
  exit 1
fi

ir_total=0
ir_pass=0
opt_total=0
opt_pass=0
run_total=0
run_pass=0

ir_failures=()
opt_failures=()
opt_warnings=()
run_failures=()

echo "测试根目录: $test_root"
echo "输出目录:   $TMP_DIR"
echo "测试数量:   ${#test_files[@]}"
if [[ "$run_exec" == true ]]; then
  echo "运行验证:   开启"
else
  echo "运行验证:   关闭（加 --run 可开启语义对拍）"
fi
echo ""

for input in "${test_files[@]}"; do
  ir_total=$((ir_total + 1))
  opt_total=$((opt_total + 1))

  out_dir=$(make_case_out_dir "$input")
  mkdir -p "$out_dir"

  base=$(basename "$input")
  stem=${base%.sy}
  raw_ir="$out_dir/$stem.raw.ll"
  ll_file="$out_dir/$stem.ll"
  log_file="$out_dir/$stem.compiler.log"
  stdout_file="$out_dir/$stem.stdout"
  obj_file="$out_dir/$stem.o"
  exe_file="$out_dir/$stem"
  input_dir=$(dirname "$input")
  stdin_file="$input_dir/$stem.in"
  expected_file="$input_dir/$stem.out"

  echo "[TEST] ${input#$ROOT_DIR/}"
  if [[ "$debug" == true ]]; then
    echo "  [CMD] $COMPILER --emit-ir $input"
  fi

  compiler_status=0
  "$COMPILER" --emit-ir "$input" > "$raw_ir" 2> "$log_file" || compiler_status=$?
  extract_ir "$raw_ir" "$ll_file"

  if [[ $compiler_status -ne 0 ]]; then
    echo "  [IR] FAIL: 编译器返回 $compiler_status"
    record_failure ir "$input: compiler failed ($compiler_status)"
    continue
  fi

  if ! grep -qE '^define ' "$ll_file"; then
    echo "  [IR] FAIL: 未提取到有效函数定义"
    record_failure ir "$input: invalid IR"
    continue
  fi

  ir_pass=$((ir_pass + 1))
  echo "  [IR] OK"

  scalar_allocas=$(check_scalar_mem2reg "$ll_file")
  if [[ -n "$scalar_allocas" ]]; then
    if [[ "$strict_mem2reg" == true ]]; then
      echo "  [OPT] FAIL: 优化后仍有可提升标量 alloca"
    else
      echo "  [OPT] WARN: 优化后仍有标量 alloca 残留"
    fi
    if [[ "$debug" == true ]]; then
      echo "$scalar_allocas" | sed 's/^/        /'
    fi
    if [[ "$strict_mem2reg" == true ]]; then
      record_failure opt "$input: scalar alloca remains"
    else
      opt_pass=$((opt_pass + 1))
      record_warning opt "$input: scalar alloca remains"
    fi
  else
    opt_pass=$((opt_pass + 1))
    echo "  [OPT] OK: 未发现标量 alloca 残留"
  fi

  if [[ "$run_exec" != true ]]; then
    continue
  fi

  if [[ ! -f "$expected_file" ]]; then
    echo "  [RUN] SKIP: 未找到期望输出 $expected_file"
    continue
  fi
  run_total=$((run_total + 1))

  if ! "$LLC_BIN" -filetype=obj "$ll_file" -o "$obj_file" > "$stdout_file" 2>&1; then
    echo "  [RUN] FAIL: llc 生成对象文件失败"
    record_failure run "$input: llc failed"
    continue
  fi

  if [[ $runtime_ready -eq 1 ]]; then
    if ! "$CLANG_BIN" "$obj_file" "$RUNTIME_OBJ" -o "$exe_file" >> "$stdout_file" 2>&1; then
      echo "  [RUN] FAIL: clang 链接失败"
      record_failure run "$input: clang link failed"
      continue
    fi
  else
    if ! "$CLANG_BIN" "$obj_file" -o "$exe_file" >> "$stdout_file" 2>&1; then
      echo "  [RUN] FAIL: clang 链接失败"
      record_failure run "$input: clang link failed"
      continue
    fi
  fi

  run_status=0
  if [[ -f "$stdin_file" ]]; then
    "$exe_file" < "$stdin_file" > "$stdout_file" 2>&1 || run_status=$?
  else
    "$exe_file" > "$stdout_file" 2>&1 || run_status=$?
  fi

  if compare_result "$input" "$expected_file" "$stdout_file" "$run_status"; then
    run_pass=$((run_pass + 1))
  fi
done

echo ""
echo "测试完成。"
echo "IR 生成:       $ir_pass / $ir_total"
echo "Pass 优化检查: $opt_pass / $opt_total"
if [[ "$run_exec" == true ]]; then
  echo "运行结果:      $run_pass / $run_total"
fi

if [[ ${#ir_failures[@]} -gt 0 ]]; then
  echo ""
  echo "IR 失败列表:"
  for item in "${ir_failures[@]}"; do
    echo "  $item"
  done
fi

if [[ ${#opt_failures[@]} -gt 0 ]]; then
  echo ""
  echo "优化检查失败列表:"
  for item in "${opt_failures[@]}"; do
    echo "  $item"
  done
fi

if [[ ${#opt_warnings[@]} -gt 0 ]]; then
  echo ""
  echo "优化警告列表（默认不算失败；加 --strict-mem2reg 可升级为失败）:"
  for item in "${opt_warnings[@]}"; do
    echo "  $item"
  done
fi

if [[ ${#run_failures[@]} -gt 0 ]]; then
  echo ""
  echo "运行失败列表:"
  for item in "${run_failures[@]}"; do
    echo "  $item"
  done
fi

if [[ ${#ir_failures[@]} -gt 0 || ${#opt_failures[@]} -gt 0 || ${#run_failures[@]} -gt 0 ]]; then
  echo ""
  echo "失败产物已保留在: $TMP_DIR"
  exit 1
fi

echo ""
echo "全部检查通过。"