|
|
#!/usr/bin/env bash
|
|
|
# run_baseline.sh — 批量编译 GCC -O2 基线并保存汇编、输出与运行时间
|
|
|
#
|
|
|
# 数据统一保存在 output/baseline/:
|
|
|
# gcc_timing.tsv — stem<TAB>gcc_elapsed_s (所有脚本的共享数据源)
|
|
|
# <stem>.gcc.s — GCC -O2 AArch64 汇编(供 analyze_case.sh 对比)
|
|
|
# <stem>.gcc.out — GCC 程序实际输出 stdout+exit_code(供 analyze_case.sh 对比)
|
|
|
#
|
|
|
# 用法:
|
|
|
# run_baseline.sh [--update] [test_dir|file ...]
|
|
|
#
|
|
|
# --update 重新计算所有条目(默认跳过 gcc_timing.tsv 中已有的 stem)
|
|
|
#
|
|
|
# 若不指定测试目录/文件,自动扫描 test/test_case 和 test/class_test_case
|
|
|
|
|
|
set -euo pipefail
|
|
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
|
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
|
|
|
|
BASELINE_DIR="$REPO_ROOT/output/baseline"
|
|
|
TIMING_TSV="$BASELINE_DIR/gcc_timing.tsv"
|
|
|
|
|
|
RED='\033[0;31m'
|
|
|
GREEN='\033[0;32m'
|
|
|
YELLOW='\033[1;33m'
|
|
|
CYAN='\033[0;36m'
|
|
|
NC='\033[0m'
|
|
|
|
|
|
UPDATE=false
|
|
|
TEST_DIRS=()
|
|
|
TEST_FILES=()
|
|
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
|
case "$1" in
|
|
|
--update) UPDATE=true ;;
|
|
|
*)
|
|
|
if [[ -f "$1" ]]; then
|
|
|
TEST_FILES+=("$1")
|
|
|
else
|
|
|
TEST_DIRS+=("$1")
|
|
|
fi
|
|
|
;;
|
|
|
esac
|
|
|
shift
|
|
|
done
|
|
|
|
|
|
# ---------- 工具检查 ----------
|
|
|
|
|
|
for tool in aarch64-linux-gnu-gcc qemu-aarch64; do
|
|
|
if ! command -v "$tool" >/dev/null 2>&1; then
|
|
|
printf '%bERROR: required tool not found: %s%b\n' "$RED" "$tool" "$NC" >&2
|
|
|
exit 1
|
|
|
fi
|
|
|
done
|
|
|
|
|
|
if [[ ! -x /usr/bin/time ]]; then
|
|
|
printf '%bERROR: /usr/bin/time not found%b\n' "$RED" "$NC" >&2
|
|
|
exit 1
|
|
|
fi
|
|
|
|
|
|
mkdir -p "$BASELINE_DIR"
|
|
|
|
|
|
# 是否已存在某 stem 的基线数据(直接查 TSV 文件,避免关联数组兼容性问题)
|
|
|
stem_is_cached() {
|
|
|
local key="$1"
|
|
|
[[ -f "$TIMING_TSV" ]] && grep -qF "${key} " "$TIMING_TSV" 2>/dev/null
|
|
|
}
|
|
|
|
|
|
stem_cached_time() {
|
|
|
local key="$1"
|
|
|
awk -F'\t' -v s="$key" '$1==s{print $2; exit}' "$TIMING_TSV" 2>/dev/null || true
|
|
|
}
|
|
|
|
|
|
# ---------- 测试用例发现 ----------
|
|
|
|
|
|
discover_default_test_dirs() {
|
|
|
local roots=(
|
|
|
"$REPO_ROOT/test/test_case"
|
|
|
"$REPO_ROOT/test/class_test_case"
|
|
|
)
|
|
|
local root
|
|
|
for root in "${roots[@]}"; do
|
|
|
[[ -d "$root" ]] || continue
|
|
|
find "$root" -mindepth 1 -maxdepth 1 -type d -print0
|
|
|
done | sort -z
|
|
|
}
|
|
|
|
|
|
if [[ ${#TEST_DIRS[@]} -eq 0 && ${#TEST_FILES[@]} -eq 0 ]]; then
|
|
|
while IFS= read -r -d '' d; do
|
|
|
TEST_DIRS+=("$d")
|
|
|
done < <(discover_default_test_dirs)
|
|
|
fi
|
|
|
|
|
|
# ---------- 计时工具 ----------
|
|
|
|
|
|
now_ns() { date +%s%N; }
|
|
|
|
|
|
format_duration_ns() {
|
|
|
local ns="$1"
|
|
|
printf '%d.%05ds' "$((ns / 1000000000))" "$(((ns % 1000000000) / 10000))"
|
|
|
}
|
|
|
|
|
|
# ---------- 处理单个用例 ----------
|
|
|
|
|
|
PASS=0
|
|
|
SKIP=0
|
|
|
FAIL=0
|
|
|
|
|
|
process_case() {
|
|
|
local sy_file="$1"
|
|
|
local base stem input_dir stdin_file
|
|
|
base="$(basename "$sy_file")"
|
|
|
stem="${base%.sy}"
|
|
|
input_dir="$(dirname "$sy_file")"
|
|
|
stdin_file="$input_dir/$stem.in"
|
|
|
|
|
|
local rel
|
|
|
rel="$(realpath --relative-to="$REPO_ROOT" "$sy_file")"
|
|
|
|
|
|
# 路径键:去掉 test/ 前缀和 .sy 后缀,保留完整目录结构
|
|
|
# 例:test/class_test_case/h_functional/11_BST.sy → class_test_case/h_functional/11_BST
|
|
|
local case_key
|
|
|
case_key="${rel#test/}"
|
|
|
case_key="${case_key%.sy}"
|
|
|
|
|
|
local case_start_ns
|
|
|
case_start_ns=$(now_ns)
|
|
|
|
|
|
# 已有数据且不强制更新 → 跳过
|
|
|
if [[ "$UPDATE" == false ]] && stem_is_cached "$case_key"; then
|
|
|
printf '%b SKIP %s (cached: %ss)%b\n' \
|
|
|
"$CYAN" "$rel" "$(stem_cached_time "$case_key")" "$NC"
|
|
|
SKIP=$((SKIP + 1))
|
|
|
return 0
|
|
|
fi
|
|
|
|
|
|
# 输出目录镜像源路径结构
|
|
|
local case_out_dir
|
|
|
case_out_dir="$BASELINE_DIR/$(dirname "$case_key")"
|
|
|
mkdir -p "$case_out_dir"
|
|
|
|
|
|
local gcc_elf gcc_asm gcc_out gcc_err
|
|
|
gcc_elf="$case_out_dir/$stem.gcc.elf"
|
|
|
gcc_asm="$case_out_dir/$stem.gcc.s"
|
|
|
gcc_out="$case_out_dir/$stem.gcc.out"
|
|
|
gcc_err="$case_out_dir/$stem.gcc.err"
|
|
|
|
|
|
# 预处理:把 "const int NAME = EXPR;" 转为 "#define NAME ((int)(EXPR))"
|
|
|
# 同时处理多声明符:const int A=1, B=2; → #define A ((int)(1))\n#define B ((int)(2))
|
|
|
# 原因:SysY const int 是编译期常量,C 模式下不能用于全局数组维度,#define 可以
|
|
|
local tmp_sy
|
|
|
tmp_sy="$(mktemp /tmp/sysy_XXXXXX.c)"
|
|
|
python3 - "$sy_file" "$tmp_sy" << 'PYEOF'
|
|
|
import re, sys
|
|
|
pat = re.compile(
|
|
|
r'^(\s*)const\s+int\s+((?:[A-Za-z_]\w*\s*=\s*[^,;]+)(?:,\s*[A-Za-z_]\w*\s*=\s*[^,;]+)*)\s*;',
|
|
|
re.MULTILINE
|
|
|
)
|
|
|
def replace(m):
|
|
|
indent = m.group(1)
|
|
|
decls = re.split(r',\s*(?=[A-Za-z_])', m.group(2))
|
|
|
lines = []
|
|
|
for d in decls:
|
|
|
name, _, val = d.partition('=')
|
|
|
lines.append(f'{indent}#define {name.strip()} ((int)({val.strip()}))')
|
|
|
return '\n'.join(lines)
|
|
|
with open(sys.argv[1]) as f:
|
|
|
src = f.read()
|
|
|
with open(sys.argv[2], 'w') as f:
|
|
|
f.write(pat.sub(replace, src))
|
|
|
PYEOF
|
|
|
|
|
|
# 步骤1:编译链接(C 模式,用于运行计时)
|
|
|
# -x c:允许 delete/new/class 等作为标识符
|
|
|
# -include sylib.h:强制注入 SysY 运行时声明(.sy 无 #include)
|
|
|
# 无名称修饰,直接链接同为 C 编译的 sylib.o
|
|
|
if ! aarch64-linux-gnu-gcc -O2 \
|
|
|
-x c -include "$REPO_ROOT/sylib/sylib.h" \
|
|
|
-I "$REPO_ROOT/sylib" \
|
|
|
"$tmp_sy" -x none "$SYLIB_OBJ" \
|
|
|
-lm -o "$gcc_elf" > "$gcc_err" 2>&1; then
|
|
|
rm -f "$tmp_sy"
|
|
|
printf '%b FAIL %s (GCC compile error — see %s)%b\n' \
|
|
|
"$RED" "$rel" "$gcc_err" "$NC"
|
|
|
FAIL=$((FAIL + 1))
|
|
|
return 0
|
|
|
fi
|
|
|
|
|
|
# 步骤2:生成汇编(单独 -S,仅针对 .sy 文件本身)
|
|
|
aarch64-linux-gnu-gcc -O2 \
|
|
|
-x c -include "$REPO_ROOT/sylib/sylib.h" \
|
|
|
-I "$REPO_ROOT/sylib" \
|
|
|
"$tmp_sy" -S -o "$gcc_asm" 2>/dev/null || true
|
|
|
|
|
|
rm -f "$tmp_sy"
|
|
|
|
|
|
# 步骤3:运行并计时(手动 ns 计时,精度 5 位小数)
|
|
|
local stdout_file="$case_out_dir/$stem.gcc.stdout"
|
|
|
local status=0
|
|
|
local timeout_sec=60
|
|
|
[[ "$sy_file" == *"/performance/"* || "$sy_file" == *"/h_performance/"* ]] && timeout_sec=300
|
|
|
|
|
|
local run_start_ns run_end_ns run_elapsed_ns
|
|
|
run_start_ns=$(now_ns)
|
|
|
set +e
|
|
|
if [[ -f "$stdin_file" ]]; then
|
|
|
timeout "$timeout_sec" \
|
|
|
qemu-aarch64 -L /usr/aarch64-linux-gnu "$gcc_elf" \
|
|
|
< "$stdin_file" > "$stdout_file" 2>/dev/null
|
|
|
else
|
|
|
timeout "$timeout_sec" \
|
|
|
qemu-aarch64 -L /usr/aarch64-linux-gnu "$gcc_elf" \
|
|
|
> "$stdout_file" 2>/dev/null
|
|
|
fi
|
|
|
status=$?
|
|
|
run_end_ns=$(now_ns)
|
|
|
run_elapsed_ns=$((run_end_ns - run_start_ns))
|
|
|
set -e
|
|
|
|
|
|
# 删除可执行(节省空间,数据已提取完毕)
|
|
|
rm -f "$gcc_elf"
|
|
|
|
|
|
if [[ $status -eq 124 ]]; then
|
|
|
printf '%b TIMEOUT %s (>%ds)%b\n' "$YELLOW" "$rel" "$timeout_sec" "$NC"
|
|
|
rm -f "$stdout_file"
|
|
|
FAIL=$((FAIL + 1))
|
|
|
return 0
|
|
|
fi
|
|
|
|
|
|
# 步骤4:保存输出文件(stdout + exit_code,与 verify_asm.sh 格式一致)
|
|
|
{
|
|
|
cat "$stdout_file"
|
|
|
if [[ -s "$stdout_file" ]] && (( $(tail -c 1 "$stdout_file" | wc -l) == 0 )); then
|
|
|
printf '\n'
|
|
|
fi
|
|
|
printf '%s\n' "$status"
|
|
|
} > "$gcc_out"
|
|
|
rm -f "$stdout_file"
|
|
|
|
|
|
# 步骤5:计算耗时(5 位小数秒)并写入 TSV
|
|
|
local elapsed
|
|
|
elapsed=$(awk "BEGIN{printf \"%.5f\", $run_elapsed_ns / 1000000000}")
|
|
|
|
|
|
# 更新 TSV(若已有该 case_key 的旧行则先删除再追加)
|
|
|
if grep -qF "${case_key} " "$TIMING_TSV" 2>/dev/null; then
|
|
|
local _tmp="$TIMING_TSV.tmp"
|
|
|
grep -vF "${case_key} " "$TIMING_TSV" > "$_tmp" || true
|
|
|
mv "$_tmp" "$TIMING_TSV"
|
|
|
fi
|
|
|
printf '%s\t%s\n' "$case_key" "$elapsed" >> "$TIMING_TSV"
|
|
|
|
|
|
local case_end_ns duration_ns
|
|
|
case_end_ns=$(now_ns)
|
|
|
duration_ns=$((case_end_ns - case_start_ns))
|
|
|
|
|
|
printf '%b DONE %s gcc=%ss [%s]%b\n' \
|
|
|
"$GREEN" "$rel" "$elapsed" "$(format_duration_ns "$duration_ns")" "$NC"
|
|
|
PASS=$((PASS + 1))
|
|
|
}
|
|
|
|
|
|
# ---------- 初始化 ----------
|
|
|
|
|
|
if [[ "$UPDATE" == true ]]; then
|
|
|
printf '%b[--update] Clearing all existing baseline data.%b\n' "$YELLOW" "$NC"
|
|
|
: > "$TIMING_TSV"
|
|
|
find "$BASELINE_DIR" -maxdepth 1 \
|
|
|
\( -name '*.gcc.s' -o -name '*.gcc.out' -o -name '*.gcc.time' -o -name '*.gcc.err' \) \
|
|
|
-delete 2>/dev/null || true
|
|
|
else
|
|
|
[[ -f "$TIMING_TSV" ]] || : > "$TIMING_TSV"
|
|
|
fi
|
|
|
|
|
|
printf '%bBaseline directory : %s%b\n' "$CYAN" "$BASELINE_DIR" "$NC"
|
|
|
printf '%bTiming TSV : %s%b\n' "$CYAN" "$TIMING_TSV" "$NC"
|
|
|
if [[ "$UPDATE" == false && -f "$TIMING_TSV" ]]; then
|
|
|
_cached_count=$(wc -l < "$TIMING_TSV" 2>/dev/null || echo 0)
|
|
|
if [[ $_cached_count -gt 0 ]]; then
|
|
|
printf 'Found %d cached entries (use --update to recompute all).\n' "$_cached_count"
|
|
|
fi
|
|
|
fi
|
|
|
|
|
|
# ---------- 预编译 sylib.o(C 模式,仅一次)----------
|
|
|
|
|
|
SYLIB_OBJ="$BASELINE_DIR/sylib.o"
|
|
|
if ! aarch64-linux-gnu-gcc -O2 -c -x c "$REPO_ROOT/sylib/sylib.c" \
|
|
|
-I "$REPO_ROOT/sylib" -o "$SYLIB_OBJ" 2>/dev/null; then
|
|
|
printf '%bERROR: failed to compile sylib.c%b\n' "$RED" "$NC" >&2
|
|
|
exit 1
|
|
|
fi
|
|
|
printf 'sylib.o compiled : %s\n' "$SYLIB_OBJ"
|
|
|
|
|
|
printf '\n'
|
|
|
|
|
|
TOTAL_START_NS=$(now_ns)
|
|
|
|
|
|
# ---------- 运行 ----------
|
|
|
|
|
|
for sy_file in "${TEST_FILES[@]}"; do
|
|
|
process_case "$sy_file"
|
|
|
done
|
|
|
|
|
|
for test_dir in "${TEST_DIRS[@]}"; do
|
|
|
if [[ ! -d "$test_dir" ]]; then
|
|
|
printf '%b SKIP missing dir: %s%b\n' "$YELLOW" "$test_dir" "$NC"
|
|
|
continue
|
|
|
fi
|
|
|
while IFS= read -r -d '' sy_file; do
|
|
|
process_case "$sy_file"
|
|
|
done < <(find "$test_dir" -maxdepth 1 -type f -name '*.sy' -print0 | sort -z)
|
|
|
done
|
|
|
|
|
|
# ---------- 汇总 ----------
|
|
|
|
|
|
TOTAL_END_NS=$(now_ns)
|
|
|
TOTAL_ELAPSED_NS=$((TOTAL_END_NS - TOTAL_START_NS))
|
|
|
|
|
|
TOTAL_CASES=$((PASS + SKIP + FAIL))
|
|
|
printf '\n'
|
|
|
printf 'Summary: %d DONE / %d SKIP (cached) / %d FAIL / total %d\n' \
|
|
|
"$PASS" "$SKIP" "$FAIL" "$TOTAL_CASES"
|
|
|
printf 'Total elapsed : %s\n' "$(format_duration_ns "$TOTAL_ELAPSED_NS")"
|
|
|
printf 'Timing TSV : %s (%d entries)\n' \
|
|
|
"$TIMING_TSV" "$(wc -l < "$TIMING_TSV" 2>/dev/null || echo 0)"
|
|
|
|
|
|
[[ $FAIL -eq 0 ]]
|