#!/usr/bin/env bash # run_baseline.sh — 批量编译 GCC -O2 基线并保存汇编、输出与运行时间 # # 数据统一保存在 output/baseline/: # gcc_timing.tsv — stemgcc_elapsed_s (所有脚本的共享数据源) # .gcc.s — GCC -O2 AArch64 汇编(供 analyze_case.sh 对比) # .gcc.out — GCC 程序实际输出 stdout+exit_code(供 analyze_case.sh 对比) # # 用法: # run_baseline.sh [--update] [test_dir|file ...] # # --update 重新计算所有条目(默认跳过 gcc_timing.tsv 中已有的 stem) # # 若不指定测试目录/文件,自动扫描 test/test_case 和 test/class_test_case set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" BASELINE_DIR="$REPO_ROOT/output/baseline" TIMING_TSV="$BASELINE_DIR/gcc_timing.tsv" RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' CYAN='\033[0;36m' NC='\033[0m' UPDATE=false TEST_DIRS=() TEST_FILES=() while [[ $# -gt 0 ]]; do case "$1" in --update) UPDATE=true ;; *) if [[ -f "$1" ]]; then TEST_FILES+=("$1") else TEST_DIRS+=("$1") fi ;; esac shift done # ---------- 工具检查 ---------- for tool in aarch64-linux-gnu-gcc qemu-aarch64; do if ! command -v "$tool" >/dev/null 2>&1; then printf '%bERROR: required tool not found: %s%b\n' "$RED" "$tool" "$NC" >&2 exit 1 fi done if [[ ! -x /usr/bin/time ]]; then printf '%bERROR: /usr/bin/time not found%b\n' "$RED" "$NC" >&2 exit 1 fi mkdir -p "$BASELINE_DIR" # 是否已存在某 stem 的基线数据(直接查 TSV 文件,避免关联数组兼容性问题) stem_is_cached() { local key="$1" [[ -f "$TIMING_TSV" ]] && grep -qF "${key} " "$TIMING_TSV" 2>/dev/null } stem_cached_time() { local key="$1" awk -F'\t' -v s="$key" '$1==s{print $2; exit}' "$TIMING_TSV" 2>/dev/null || true } # ---------- 测试用例发现 ---------- discover_default_test_dirs() { local roots=( "$REPO_ROOT/test/test_case" "$REPO_ROOT/test/class_test_case" ) local root for root in "${roots[@]}"; do [[ -d "$root" ]] || continue find "$root" -mindepth 1 -maxdepth 1 -type d -print0 done | sort -z } if [[ ${#TEST_DIRS[@]} -eq 0 && ${#TEST_FILES[@]} -eq 0 ]]; then while IFS= read -r -d '' d; do TEST_DIRS+=("$d") done < <(discover_default_test_dirs) fi # ---------- 计时工具 ---------- now_ns() { date +%s%N; } format_duration_ns() { local ns="$1" printf '%d.%05ds' "$((ns / 1000000000))" "$(((ns % 1000000000) / 10000))" } # ---------- 处理单个用例 ---------- PASS=0 SKIP=0 FAIL=0 process_case() { local sy_file="$1" local base stem input_dir stdin_file base="$(basename "$sy_file")" stem="${base%.sy}" input_dir="$(dirname "$sy_file")" stdin_file="$input_dir/$stem.in" local rel rel="$(realpath --relative-to="$REPO_ROOT" "$sy_file")" # 路径键:去掉 test/ 前缀和 .sy 后缀,保留完整目录结构 # 例:test/class_test_case/h_functional/11_BST.sy → class_test_case/h_functional/11_BST local case_key case_key="${rel#test/}" case_key="${case_key%.sy}" local case_start_ns case_start_ns=$(now_ns) # 已有数据且不强制更新 → 跳过 if [[ "$UPDATE" == false ]] && stem_is_cached "$case_key"; then printf '%b SKIP %s (cached: %ss)%b\n' \ "$CYAN" "$rel" "$(stem_cached_time "$case_key")" "$NC" SKIP=$((SKIP + 1)) return 0 fi # 输出目录镜像源路径结构 local case_out_dir case_out_dir="$BASELINE_DIR/$(dirname "$case_key")" mkdir -p "$case_out_dir" local gcc_elf gcc_asm gcc_out gcc_err gcc_elf="$case_out_dir/$stem.gcc.elf" gcc_asm="$case_out_dir/$stem.gcc.s" gcc_out="$case_out_dir/$stem.gcc.out" gcc_err="$case_out_dir/$stem.gcc.err" # 预处理:把 "const int NAME = EXPR;" 转为 "#define NAME ((int)(EXPR))" # 同时处理多声明符:const int A=1, B=2; → #define A ((int)(1))\n#define B ((int)(2)) # 原因:SysY const int 是编译期常量,C 模式下不能用于全局数组维度,#define 可以 local tmp_sy tmp_sy="$(mktemp /tmp/sysy_XXXXXX.c)" python3 - "$sy_file" "$tmp_sy" << 'PYEOF' import re, sys pat = re.compile( r'^(\s*)const\s+int\s+((?:[A-Za-z_]\w*\s*=\s*[^,;]+)(?:,\s*[A-Za-z_]\w*\s*=\s*[^,;]+)*)\s*;', re.MULTILINE ) def replace(m): indent = m.group(1) decls = re.split(r',\s*(?=[A-Za-z_])', m.group(2)) lines = [] for d in decls: name, _, val = d.partition('=') lines.append(f'{indent}#define {name.strip()} ((int)({val.strip()}))') return '\n'.join(lines) with open(sys.argv[1]) as f: src = f.read() with open(sys.argv[2], 'w') as f: f.write(pat.sub(replace, src)) PYEOF # 步骤1:编译链接(C 模式,用于运行计时) # -x c:允许 delete/new/class 等作为标识符 # -include sylib.h:强制注入 SysY 运行时声明(.sy 无 #include) # 无名称修饰,直接链接同为 C 编译的 sylib.o if ! aarch64-linux-gnu-gcc -O2 \ -x c -include "$REPO_ROOT/sylib/sylib.h" \ -I "$REPO_ROOT/sylib" \ "$tmp_sy" -x none "$SYLIB_OBJ" \ -lm -o "$gcc_elf" > "$gcc_err" 2>&1; then rm -f "$tmp_sy" printf '%b FAIL %s (GCC compile error — see %s)%b\n' \ "$RED" "$rel" "$gcc_err" "$NC" FAIL=$((FAIL + 1)) return 0 fi # 步骤2:生成汇编(单独 -S,仅针对 .sy 文件本身) aarch64-linux-gnu-gcc -O2 \ -x c -include "$REPO_ROOT/sylib/sylib.h" \ -I "$REPO_ROOT/sylib" \ "$tmp_sy" -S -o "$gcc_asm" 2>/dev/null || true rm -f "$tmp_sy" # 步骤3:运行并计时(手动 ns 计时,精度 5 位小数) local stdout_file="$case_out_dir/$stem.gcc.stdout" local status=0 local timeout_sec=60 [[ "$sy_file" == *"/performance/"* || "$sy_file" == *"/h_performance/"* ]] && timeout_sec=300 local run_start_ns run_end_ns run_elapsed_ns run_start_ns=$(now_ns) set +e if [[ -f "$stdin_file" ]]; then timeout "$timeout_sec" \ qemu-aarch64 -L /usr/aarch64-linux-gnu "$gcc_elf" \ < "$stdin_file" > "$stdout_file" 2>/dev/null else timeout "$timeout_sec" \ qemu-aarch64 -L /usr/aarch64-linux-gnu "$gcc_elf" \ > "$stdout_file" 2>/dev/null fi status=$? run_end_ns=$(now_ns) run_elapsed_ns=$((run_end_ns - run_start_ns)) set -e # 删除可执行(节省空间,数据已提取完毕) rm -f "$gcc_elf" if [[ $status -eq 124 ]]; then printf '%b TIMEOUT %s (>%ds)%b\n' "$YELLOW" "$rel" "$timeout_sec" "$NC" rm -f "$stdout_file" FAIL=$((FAIL + 1)) return 0 fi # 步骤4:保存输出文件(stdout + exit_code,与 verify_asm.sh 格式一致) { cat "$stdout_file" if [[ -s "$stdout_file" ]] && (( $(tail -c 1 "$stdout_file" | wc -l) == 0 )); then printf '\n' fi printf '%s\n' "$status" } > "$gcc_out" rm -f "$stdout_file" # 步骤5:计算耗时(5 位小数秒)并写入 TSV local elapsed elapsed=$(awk "BEGIN{printf \"%.5f\", $run_elapsed_ns / 1000000000}") # 更新 TSV(若已有该 case_key 的旧行则先删除再追加) if grep -qF "${case_key} " "$TIMING_TSV" 2>/dev/null; then local _tmp="$TIMING_TSV.tmp" grep -vF "${case_key} " "$TIMING_TSV" > "$_tmp" || true mv "$_tmp" "$TIMING_TSV" fi printf '%s\t%s\n' "$case_key" "$elapsed" >> "$TIMING_TSV" local case_end_ns duration_ns case_end_ns=$(now_ns) duration_ns=$((case_end_ns - case_start_ns)) printf '%b DONE %s gcc=%ss [%s]%b\n' \ "$GREEN" "$rel" "$elapsed" "$(format_duration_ns "$duration_ns")" "$NC" PASS=$((PASS + 1)) } # ---------- 初始化 ---------- if [[ "$UPDATE" == true ]]; then printf '%b[--update] Clearing all existing baseline data.%b\n' "$YELLOW" "$NC" : > "$TIMING_TSV" find "$BASELINE_DIR" -maxdepth 1 \ \( -name '*.gcc.s' -o -name '*.gcc.out' -o -name '*.gcc.time' -o -name '*.gcc.err' \) \ -delete 2>/dev/null || true else [[ -f "$TIMING_TSV" ]] || : > "$TIMING_TSV" fi printf '%bBaseline directory : %s%b\n' "$CYAN" "$BASELINE_DIR" "$NC" printf '%bTiming TSV : %s%b\n' "$CYAN" "$TIMING_TSV" "$NC" if [[ "$UPDATE" == false && -f "$TIMING_TSV" ]]; then _cached_count=$(wc -l < "$TIMING_TSV" 2>/dev/null || echo 0) if [[ $_cached_count -gt 0 ]]; then printf 'Found %d cached entries (use --update to recompute all).\n' "$_cached_count" fi fi # ---------- 预编译 sylib.o(C 模式,仅一次)---------- SYLIB_OBJ="$BASELINE_DIR/sylib.o" if ! aarch64-linux-gnu-gcc -O2 -c -x c "$REPO_ROOT/sylib/sylib.c" \ -I "$REPO_ROOT/sylib" -o "$SYLIB_OBJ" 2>/dev/null; then printf '%bERROR: failed to compile sylib.c%b\n' "$RED" "$NC" >&2 exit 1 fi printf 'sylib.o compiled : %s\n' "$SYLIB_OBJ" printf '\n' TOTAL_START_NS=$(now_ns) # ---------- 运行 ---------- for sy_file in "${TEST_FILES[@]}"; do process_case "$sy_file" done for test_dir in "${TEST_DIRS[@]}"; do if [[ ! -d "$test_dir" ]]; then printf '%b SKIP missing dir: %s%b\n' "$YELLOW" "$test_dir" "$NC" continue fi while IFS= read -r -d '' sy_file; do process_case "$sy_file" done < <(find "$test_dir" -maxdepth 1 -type f -name '*.sy' -print0 | sort -z) done # ---------- 汇总 ---------- TOTAL_END_NS=$(now_ns) TOTAL_ELAPSED_NS=$((TOTAL_END_NS - TOTAL_START_NS)) TOTAL_CASES=$((PASS + SKIP + FAIL)) printf '\n' printf 'Summary: %d DONE / %d SKIP (cached) / %d FAIL / total %d\n' \ "$PASS" "$SKIP" "$FAIL" "$TOTAL_CASES" printf 'Total elapsed : %s\n' "$(format_duration_ns "$TOTAL_ELAPSED_NS")" printf 'Timing TSV : %s (%d entries)\n' \ "$TIMING_TSV" "$(wc -l < "$TIMING_TSV" 2>/dev/null || echo 0)" [[ $FAIL -eq 0 ]]