You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
nudt-compiler-cpp/scripts/run_baseline.sh

327 lines
9.5 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#!/usr/bin/env bash
# run_baseline.sh — 批量编译 GCC -O2 基线并保存汇编、输出与运行时间
#
# 数据统一保存在 output/baseline/
# gcc_timing.tsv — stem<TAB>gcc_elapsed_s (所有脚本的共享数据源)
# <stem>.gcc.s — GCC -O2 AArch64 汇编(供 analyze_case.sh 对比)
# <stem>.gcc.out — GCC 程序实际输出 stdout+exit_code供 analyze_case.sh 对比)
#
# 用法:
# run_baseline.sh [--update] [test_dir|file ...]
#
# --update 重新计算所有条目(默认跳过 gcc_timing.tsv 中已有的 stem
#
# 若不指定测试目录/文件,自动扫描 test/test_case 和 test/class_test_case
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
BASELINE_DIR="$REPO_ROOT/output/baseline"
TIMING_TSV="$BASELINE_DIR/gcc_timing.tsv"
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
NC='\033[0m'
UPDATE=false
TEST_DIRS=()
TEST_FILES=()
while [[ $# -gt 0 ]]; do
case "$1" in
--update) UPDATE=true ;;
*)
if [[ -f "$1" ]]; then
TEST_FILES+=("$1")
else
TEST_DIRS+=("$1")
fi
;;
esac
shift
done
# ---------- 工具检查 ----------
for tool in aarch64-linux-gnu-gcc qemu-aarch64; do
if ! command -v "$tool" >/dev/null 2>&1; then
printf '%bERROR: required tool not found: %s%b\n' "$RED" "$tool" "$NC" >&2
exit 1
fi
done
if [[ ! -x /usr/bin/time ]]; then
printf '%bERROR: /usr/bin/time not found%b\n' "$RED" "$NC" >&2
exit 1
fi
mkdir -p "$BASELINE_DIR"
# 是否已存在某 stem 的基线数据(直接查 TSV 文件,避免关联数组兼容性问题)
stem_is_cached() {
local key="$1"
[[ -f "$TIMING_TSV" ]] && grep -qF "${key} " "$TIMING_TSV" 2>/dev/null
}
stem_cached_time() {
local key="$1"
awk -F'\t' -v s="$key" '$1==s{print $2; exit}' "$TIMING_TSV" 2>/dev/null || true
}
# ---------- 测试用例发现 ----------
discover_default_test_dirs() {
local roots=(
"$REPO_ROOT/test/test_case"
"$REPO_ROOT/test/class_test_case"
)
local root
for root in "${roots[@]}"; do
[[ -d "$root" ]] || continue
find "$root" -mindepth 1 -maxdepth 1 -type d -print0
done | sort -z
}
if [[ ${#TEST_DIRS[@]} -eq 0 && ${#TEST_FILES[@]} -eq 0 ]]; then
while IFS= read -r -d '' d; do
TEST_DIRS+=("$d")
done < <(discover_default_test_dirs)
fi
# ---------- 计时工具 ----------
now_ns() { date +%s%N; }
format_duration_ns() {
local ns="$1"
printf '%d.%05ds' "$((ns / 1000000000))" "$(((ns % 1000000000) / 10000))"
}
# ---------- 处理单个用例 ----------
PASS=0
SKIP=0
FAIL=0
process_case() {
local sy_file="$1"
local base stem input_dir stdin_file
base="$(basename "$sy_file")"
stem="${base%.sy}"
input_dir="$(dirname "$sy_file")"
stdin_file="$input_dir/$stem.in"
local rel
rel="$(realpath --relative-to="$REPO_ROOT" "$sy_file")"
# 路径键:去掉 test/ 前缀和 .sy 后缀,保留完整目录结构
# 例test/class_test_case/h_functional/11_BST.sy → class_test_case/h_functional/11_BST
local case_key
case_key="${rel#test/}"
case_key="${case_key%.sy}"
local case_start_ns
case_start_ns=$(now_ns)
# 已有数据且不强制更新 → 跳过
if [[ "$UPDATE" == false ]] && stem_is_cached "$case_key"; then
printf '%b SKIP %s (cached: %ss)%b\n' \
"$CYAN" "$rel" "$(stem_cached_time "$case_key")" "$NC"
SKIP=$((SKIP + 1))
return 0
fi
# 输出目录镜像源路径结构
local case_out_dir
case_out_dir="$BASELINE_DIR/$(dirname "$case_key")"
mkdir -p "$case_out_dir"
local gcc_elf gcc_asm gcc_out gcc_err
gcc_elf="$case_out_dir/$stem.gcc.elf"
gcc_asm="$case_out_dir/$stem.gcc.s"
gcc_out="$case_out_dir/$stem.gcc.out"
gcc_err="$case_out_dir/$stem.gcc.err"
# 预处理:把 "const int NAME = EXPR;" 转为 "#define NAME ((int)(EXPR))"
# 同时处理多声明符const int A=1, B=2; → #define A ((int)(1))\n#define B ((int)(2))
# 原因SysY const int 是编译期常量C 模式下不能用于全局数组维度,#define 可以
local tmp_sy
tmp_sy="$(mktemp /tmp/sysy_XXXXXX.c)"
python3 - "$sy_file" "$tmp_sy" << 'PYEOF'
import re, sys
pat = re.compile(
r'^(\s*)const\s+int\s+((?:[A-Za-z_]\w*\s*=\s*[^,;]+)(?:,\s*[A-Za-z_]\w*\s*=\s*[^,;]+)*)\s*;',
re.MULTILINE
)
def replace(m):
indent = m.group(1)
decls = re.split(r',\s*(?=[A-Za-z_])', m.group(2))
lines = []
for d in decls:
name, _, val = d.partition('=')
lines.append(f'{indent}#define {name.strip()} ((int)({val.strip()}))')
return '\n'.join(lines)
with open(sys.argv[1]) as f:
src = f.read()
with open(sys.argv[2], 'w') as f:
f.write(pat.sub(replace, src))
PYEOF
# 步骤1编译链接C 模式,用于运行计时)
# -x c允许 delete/new/class 等作为标识符
# -include sylib.h强制注入 SysY 运行时声明(.sy 无 #include
# 无名称修饰,直接链接同为 C 编译的 sylib.o
if ! aarch64-linux-gnu-gcc -O2 \
-x c -include "$REPO_ROOT/sylib/sylib.h" \
-I "$REPO_ROOT/sylib" \
"$tmp_sy" -x none "$SYLIB_OBJ" \
-lm -o "$gcc_elf" > "$gcc_err" 2>&1; then
rm -f "$tmp_sy"
printf '%b FAIL %s (GCC compile error — see %s)%b\n' \
"$RED" "$rel" "$gcc_err" "$NC"
FAIL=$((FAIL + 1))
return 0
fi
# 步骤2生成汇编单独 -S仅针对 .sy 文件本身)
aarch64-linux-gnu-gcc -O2 \
-x c -include "$REPO_ROOT/sylib/sylib.h" \
-I "$REPO_ROOT/sylib" \
"$tmp_sy" -S -o "$gcc_asm" 2>/dev/null || true
rm -f "$tmp_sy"
# 步骤3运行并计时手动 ns 计时,精度 5 位小数)
local stdout_file="$case_out_dir/$stem.gcc.stdout"
local status=0
local timeout_sec=60
[[ "$sy_file" == *"/performance/"* || "$sy_file" == *"/h_performance/"* ]] && timeout_sec=300
local run_start_ns run_end_ns run_elapsed_ns
run_start_ns=$(now_ns)
set +e
if [[ -f "$stdin_file" ]]; then
timeout "$timeout_sec" \
qemu-aarch64 -L /usr/aarch64-linux-gnu "$gcc_elf" \
< "$stdin_file" > "$stdout_file" 2>/dev/null
else
timeout "$timeout_sec" \
qemu-aarch64 -L /usr/aarch64-linux-gnu "$gcc_elf" \
> "$stdout_file" 2>/dev/null
fi
status=$?
run_end_ns=$(now_ns)
run_elapsed_ns=$((run_end_ns - run_start_ns))
set -e
# 删除可执行(节省空间,数据已提取完毕)
rm -f "$gcc_elf"
if [[ $status -eq 124 ]]; then
printf '%b TIMEOUT %s (>%ds)%b\n' "$YELLOW" "$rel" "$timeout_sec" "$NC"
rm -f "$stdout_file"
FAIL=$((FAIL + 1))
return 0
fi
# 步骤4保存输出文件stdout + exit_code与 verify_asm.sh 格式一致)
{
cat "$stdout_file"
if [[ -s "$stdout_file" ]] && (( $(tail -c 1 "$stdout_file" | wc -l) == 0 )); then
printf '\n'
fi
printf '%s\n' "$status"
} > "$gcc_out"
rm -f "$stdout_file"
# 步骤5计算耗时5 位小数秒)并写入 TSV
local elapsed
elapsed=$(awk "BEGIN{printf \"%.5f\", $run_elapsed_ns / 1000000000}")
# 更新 TSV若已有该 case_key 的旧行则先删除再追加)
if grep -qF "${case_key} " "$TIMING_TSV" 2>/dev/null; then
local _tmp="$TIMING_TSV.tmp"
grep -vF "${case_key} " "$TIMING_TSV" > "$_tmp" || true
mv "$_tmp" "$TIMING_TSV"
fi
printf '%s\t%s\n' "$case_key" "$elapsed" >> "$TIMING_TSV"
local case_end_ns duration_ns
case_end_ns=$(now_ns)
duration_ns=$((case_end_ns - case_start_ns))
printf '%b DONE %s gcc=%ss [%s]%b\n' \
"$GREEN" "$rel" "$elapsed" "$(format_duration_ns "$duration_ns")" "$NC"
PASS=$((PASS + 1))
}
# ---------- 初始化 ----------
if [[ "$UPDATE" == true ]]; then
printf '%b[--update] Clearing all existing baseline data.%b\n' "$YELLOW" "$NC"
: > "$TIMING_TSV"
find "$BASELINE_DIR" -maxdepth 1 \
\( -name '*.gcc.s' -o -name '*.gcc.out' -o -name '*.gcc.time' -o -name '*.gcc.err' \) \
-delete 2>/dev/null || true
else
[[ -f "$TIMING_TSV" ]] || : > "$TIMING_TSV"
fi
printf '%bBaseline directory : %s%b\n' "$CYAN" "$BASELINE_DIR" "$NC"
printf '%bTiming TSV : %s%b\n' "$CYAN" "$TIMING_TSV" "$NC"
if [[ "$UPDATE" == false && -f "$TIMING_TSV" ]]; then
_cached_count=$(wc -l < "$TIMING_TSV" 2>/dev/null || echo 0)
if [[ $_cached_count -gt 0 ]]; then
printf 'Found %d cached entries (use --update to recompute all).\n' "$_cached_count"
fi
fi
# ---------- 预编译 sylib.oC 模式,仅一次)----------
SYLIB_OBJ="$BASELINE_DIR/sylib.o"
if ! aarch64-linux-gnu-gcc -O2 -c -x c "$REPO_ROOT/sylib/sylib.c" \
-I "$REPO_ROOT/sylib" -o "$SYLIB_OBJ" 2>/dev/null; then
printf '%bERROR: failed to compile sylib.c%b\n' "$RED" "$NC" >&2
exit 1
fi
printf 'sylib.o compiled : %s\n' "$SYLIB_OBJ"
printf '\n'
TOTAL_START_NS=$(now_ns)
# ---------- 运行 ----------
for sy_file in "${TEST_FILES[@]}"; do
process_case "$sy_file"
done
for test_dir in "${TEST_DIRS[@]}"; do
if [[ ! -d "$test_dir" ]]; then
printf '%b SKIP missing dir: %s%b\n' "$YELLOW" "$test_dir" "$NC"
continue
fi
while IFS= read -r -d '' sy_file; do
process_case "$sy_file"
done < <(find "$test_dir" -maxdepth 1 -type f -name '*.sy' -print0 | sort -z)
done
# ---------- 汇总 ----------
TOTAL_END_NS=$(now_ns)
TOTAL_ELAPSED_NS=$((TOTAL_END_NS - TOTAL_START_NS))
TOTAL_CASES=$((PASS + SKIP + FAIL))
printf '\n'
printf 'Summary: %d DONE / %d SKIP (cached) / %d FAIL / total %d\n' \
"$PASS" "$SKIP" "$FAIL" "$TOTAL_CASES"
printf 'Total elapsed : %s\n' "$(format_duration_ns "$TOTAL_ELAPSED_NS")"
printf 'Timing TSV : %s (%d entries)\n' \
"$TIMING_TSV" "$(wc -l < "$TIMING_TSV" 2>/dev/null || echo 0)"
[[ $FAIL -eq 0 ]]