测评脚本升级

the-little-apprentice 1 month ago
parent bcfbf52488
commit 08ce9d96ab

@ -0,0 +1,315 @@
#!/usr/bin/env bash
# analyze_case.sh — 单个 .sy 测试用例的全流程编译 + IR/汇编保存脚本
# 用于深度分析单个样例与 GCC 基线之间的差距。
#
# 用法:
# analyze_case.sh <input.sy> [output_dir]
#
# 输出目录(默认 output/analyze/<stem>_<timestamp>)中包含:
# <stem>.ll — 我方编译器输出的 LLVM IR
# <stem>.s — 我方编译器输出的 AArch64 汇编
# <stem>.elf — 我方编译链接后的可执行文件
# <stem>.gcc.s — GCC -O2 输出的 AArch64 汇编
# <stem>.gcc.elf — GCC -O2 链接后的可执行文件
# <stem>.our.time — 我方程序运行耗时(秒)
# <stem>.gcc.time — GCC 程序运行耗时(秒)
# <stem>.our.out — 我方程序实际输出
# <stem>.gcc.out — GCC 程序实际输出
# <stem>.diff — 输出 diff若有差异
# report.txt — 汇总报告IR 行数、汇编行数、耗时、加速比)
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
BOLD='\033[1m'
NC='\033[0m'
# ---------- 参数解析 ----------
if [[ $# -lt 1 || $# -gt 2 ]]; then
printf 'usage: %s <input.sy> [output_dir]\n' "$0" >&2
exit 1
fi
INPUT="$1"
if [[ ! -f "$INPUT" ]]; then
printf 'input file not found: %s\n' "$INPUT" >&2
exit 1
fi
BASE="$(basename "$INPUT")"
STEM="${BASE%.sy}"
INPUT_DIR="$(dirname "$(realpath "$INPUT")")"
TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
if [[ $# -ge 2 ]]; then
OUT_DIR="$2"
else
OUT_DIR="$REPO_ROOT/output/analyze/${STEM}_${TIMESTAMP}"
fi
mkdir -p "$OUT_DIR"
REPORT="$OUT_DIR/report.txt"
: > "$REPORT"
rpt() {
printf '%s\n' "$*" | tee -a "$REPORT"
}
rpt_color() {
local color="$1"; shift
printf '%b%s%b\n' "$color" "$*" "$NC"
printf '%s\n' "$*" >> "$REPORT"
}
rpt "============================================================"
rpt " analyze_case report"
rpt " case : $STEM"
rpt " source : $INPUT"
rpt " output : $OUT_DIR"
rpt " date : $(date)"
rpt "============================================================"
rpt ""
# ---------- 查找编译器 ----------
COMPILER=""
for candidate in \
"$REPO_ROOT/build_lab3/bin/compiler" \
"$REPO_ROOT/build_lab2/bin/compiler" \
"$REPO_ROOT/build/bin/compiler"; do
if [[ -x "$candidate" ]]; then
COMPILER="$candidate"
break
fi
done
if [[ -z "$COMPILER" ]]; then
rpt_color "$RED" "ERROR: compiler not found. Build first:"
rpt " cmake -S $REPO_ROOT -B $REPO_ROOT/build_lab3 && cmake --build $REPO_ROOT/build_lab3 -j"
exit 1
fi
rpt "compiler : $COMPILER"
# ---------- 工具检查 ----------
for tool in aarch64-linux-gnu-gcc qemu-aarch64; do
if ! command -v "$tool" >/dev/null 2>&1; then
rpt_color "$RED" "ERROR: required tool not found: $tool"
exit 1
fi
done
STDIN_FILE="$INPUT_DIR/$STEM.in"
EXPECTED_FILE="$INPUT_DIR/$STEM.out"
# ---------- 1. 生成 IR ----------
rpt ""
rpt "--- [1/5] Generating LLVM IR ---"
IR_FILE="$OUT_DIR/$STEM.ll"
if "$COMPILER" --emit-ir "$INPUT" > "$IR_FILE" 2>"$OUT_DIR/$STEM.ir.err"; then
IR_LINES=$(wc -l < "$IR_FILE")
rpt_color "$GREEN" "IR generated: $IR_FILE ($IR_LINES lines)"
else
rpt_color "$RED" "ERROR: IR generation failed"
cat "$OUT_DIR/$STEM.ir.err" >&2
exit 1
fi
# ---------- 2. 生成我方汇编并链接 ----------
rpt ""
rpt "--- [2/5] Generating our ASM & linking ---"
OUR_ASM="$OUT_DIR/$STEM.s"
OUR_ELF="$OUT_DIR/$STEM.elf"
if "$COMPILER" --emit-asm "$INPUT" > "$OUR_ASM" 2>"$OUT_DIR/$STEM.asm.err"; then
OUR_ASM_LINES=$(wc -l < "$OUR_ASM")
rpt_color "$GREEN" "ASM generated: $OUR_ASM ($OUR_ASM_LINES lines)"
else
rpt_color "$RED" "ERROR: ASM generation failed"
cat "$OUT_DIR/$STEM.asm.err" >&2
exit 1
fi
if aarch64-linux-gnu-gcc "$OUR_ASM" "$REPO_ROOT/sylib/sylib.c" -O2 \
-I "$REPO_ROOT/sylib" -lm -o "$OUR_ELF" 2>"$OUT_DIR/$STEM.link.err"; then
rpt_color "$GREEN" "Linked: $OUR_ELF"
else
rpt_color "$RED" "ERROR: link failed"
cat "$OUT_DIR/$STEM.link.err" >&2
exit 1
fi
# ---------- 3. GCC -O2 基线(从预计算数据读取)----------
rpt ""
rpt "--- [3/5] GCC -O2 baseline (reading from pre-computed data) ---"
BASELINE_DATA_DIR="$REPO_ROOT/output/baseline"
BASELINE_TSV_PATH="$BASELINE_DATA_DIR/gcc_timing.tsv"
GCC_ASM="$OUT_DIR/$STEM.gcc.s"
GCC_OUT="$OUT_DIR/$STEM.gcc.out"
GCC_OK=false
GCC_ASM_LINES=0
GCC_ELAPSED_RAW="" # 秒,无 s 后缀
if [[ -f "$BASELINE_TSV_PATH" ]]; then
GCC_ELAPSED_RAW=$(awk -F'\t' -v s="$STEM" '$1==s{v=$2} END{if(v!="") print v}' \
"$BASELINE_TSV_PATH" 2>/dev/null || true)
if [[ -n "$GCC_ELAPSED_RAW" ]]; then
GCC_OK=true
rpt_color "$GREEN" "baseline timing: ${GCC_ELAPSED_RAW}s"
else
rpt_color "$YELLOW" "WARNING: no baseline entry for '$STEM'"
rpt " Run: scripts/run_baseline.sh"
fi
# 复制汇编文件
if [[ -f "$BASELINE_DATA_DIR/$STEM.gcc.s" ]]; then
cp "$BASELINE_DATA_DIR/$STEM.gcc.s" "$GCC_ASM"
GCC_ASM_LINES=$(wc -l < "$GCC_ASM")
rpt "GCC ASM: $GCC_ASM ($GCC_ASM_LINES lines)"
else
rpt_color "$YELLOW" "GCC ASM not found in baseline dir"
fi
# 复制输出文件供步骥5 diff
if [[ -f "$BASELINE_DATA_DIR/$STEM.gcc.out" ]]; then
cp "$BASELINE_DATA_DIR/$STEM.gcc.out" "$GCC_OUT"
rpt "GCC output: $GCC_OUT"
fi
else
rpt_color "$YELLOW" "WARNING: baseline data not found: $BASELINE_TSV_PATH"
rpt " Run: scripts/run_baseline.sh"
rpt " to pre-compute GCC -O2 baseline for all test cases."
fi
# ---------- 4. 运行并计时(仅我方编译器)----------
rpt ""
rpt "--- [4/5] Running & timing (our compiler) ---"
run_and_time() {
local label="$1"
local exe="$2"
local out_file="$3"
local time_file="$4"
local timeout_sec="${5:-60}"
local stdout_file="$out_file.raw"
local status=0
set +e
if [[ -f "$STDIN_FILE" ]]; then
timeout "$timeout_sec" \
/usr/bin/time -f "%e" -o "$time_file" \
qemu-aarch64 -L /usr/aarch64-linux-gnu "$exe" \
< "$STDIN_FILE" > "$stdout_file" 2>/dev/null
else
timeout "$timeout_sec" \
/usr/bin/time -f "%e" -o "$time_file" \
qemu-aarch64 -L /usr/aarch64-linux-gnu "$exe" \
> "$stdout_file" 2>/dev/null
fi
status=$?
set -e
# 将 stdout + exit_code 合并为 .out与 verify_asm.sh 格式一致)
{
cat "$stdout_file"
if [[ -s "$stdout_file" ]] && (( $(tail -c 1 "$stdout_file" | wc -l) == 0 )); then
printf '\n'
fi
printf '%s\n' "$status"
} > "$out_file"
rm -f "$stdout_file"
local elapsed="timeout"
[[ $status -ne 124 ]] && elapsed="$(cat "$time_file" 2>/dev/null || echo "?")s"
if [[ $status -eq 124 ]]; then
rpt_color "$YELLOW" "$label: TIMEOUT (>${timeout_sec}s)"
elif [[ $status -ne 0 ]]; then
rpt_color "$YELLOW" "$label: exit $status elapsed=${elapsed}"
else
rpt_color "$GREEN" "$label: OK elapsed=${elapsed}"
fi
echo "$elapsed"
}
OUR_OUT="$OUT_DIR/$STEM.our.out"
OUR_TIME_FILE="$OUT_DIR/$STEM.our.time"
TIMEOUT_SEC=60
[[ "$INPUT" == *"/performance/"* || "$INPUT" == *"/h_performance/"* ]] && TIMEOUT_SEC=300
OUR_ELAPSED=$(run_and_time "our compiler" "$OUR_ELF" "$OUR_OUT" "$OUR_TIME_FILE" "$TIMEOUT_SEC")
# GCC 耗时直接读取基线数据,不重新运行
GCC_ELAPSED="N/A"
if [[ "$GCC_OK" == true && -n "$GCC_ELAPSED_RAW" ]]; then
GCC_ELAPSED="${GCC_ELAPSED_RAW}s"
rpt_color "$GREEN" "gcc -O2: ${GCC_ELAPSED} (from pre-computed baseline)"
fi
# ---------- 5. 输出对比 ----------
rpt ""
rpt "--- [5/5] Output comparison ---"
normalize_out() {
awk '{ sub(/\r$/, ""); print }' "$1"
}
if [[ -f "$EXPECTED_FILE" ]]; then
DIFF_FILE="$OUT_DIR/$STEM.diff"
if diff <(normalize_out "$EXPECTED_FILE") <(normalize_out "$OUR_OUT") > "$DIFF_FILE" 2>&1; then
rpt_color "$GREEN" "our output: MATCH expected"
rm -f "$DIFF_FILE"
else
rpt_color "$RED" "our output: MISMATCH — diff saved to $DIFF_FILE"
fi
if [[ "$GCC_OK" == true && -f "$GCC_OUT" ]]; then
GCC_DIFF_FILE="$OUT_DIR/$STEM.gcc.diff"
if diff <(normalize_out "$EXPECTED_FILE") <(normalize_out "$GCC_OUT") > "$GCC_DIFF_FILE" 2>&1; then
rpt_color "$GREEN" "gcc output: MATCH expected"
rm -f "$GCC_DIFF_FILE"
else
rpt_color "$YELLOW" "gcc output: MISMATCH — diff saved to $GCC_DIFF_FILE"
fi
fi
else
rpt_color "$YELLOW" "no expected output file found, skipping diff"
fi
# ---------- 汇总报告 ----------
rpt ""
rpt "============================================================"
rpt_color "$BOLD" " Summary"
rpt "============================================================"
rpt "$(printf '%-20s %s' 'IR lines:' "$IR_LINES")"
rpt "$(printf '%-20s %s' 'Our ASM lines:' "$OUR_ASM_LINES")"
if [[ "$GCC_OK" == true && $GCC_ASM_LINES -gt 0 ]]; then
rpt "$(printf '%-20s %s' 'GCC ASM lines:' "$GCC_ASM_LINES")"
rpt "$(printf '%-20s %s' 'ASM ratio (ours/gcc):' \
"$(awk "BEGIN{if($GCC_ASM_LINES>0) printf \"%.2f\", $OUR_ASM_LINES/$GCC_ASM_LINES; else print \"N/A\"}")")"
fi
rpt "$(printf '%-20s %s' 'Our time:' "$OUR_ELAPSED")"
rpt "$(printf '%-20s %s' 'GCC time:' "$GCC_ELAPSED")"
if [[ "$GCC_ELAPSED" != "N/A" && "$GCC_ELAPSED" != "timeout" && "$OUR_ELAPSED" != "timeout" ]]; then
OUR_S="${OUR_ELAPSED%s}"
GCC_S="${GCC_ELAPSED%s}"
SPEEDUP=$(awk "BEGIN{if($OUR_S>0) printf \"%.3f\", $GCC_S/$OUR_S; else print \"inf\"}")
rpt "$(printf '%-20s %sx' 'Speedup (gcc/ours):' "$SPEEDUP")"
fi
rpt ""
rpt "Output directory: $OUT_DIR"
rpt "============================================================"
printf '\n%bReport saved to: %s%b\n' "$CYAN" "$REPORT" "$NC"

@ -0,0 +1,170 @@
#!/usr/bin/env bash
# clean_outputs.sh — 清理编译输出与日志垃圾文件
#
# 用法:
# clean_outputs.sh [选项]
#
# 选项:
# --logs 清理 output/logs/ 下的运行日志(保留 last_run.txt / last_failed.txt
# --analyze 清理 output/analyze/ 下的单用例分析结果
# --build 清理 build_lab*/ 构建目录
# --test-result 清理 test/test_result/ 下的测试产物
# --all 清理以上全部
# --dry-run 只打印将要删除的内容,不实际删除
# --yes 跳过确认提示,直接删除(配合 --logs / --all 等使用)
#
# 不带任何选项时交互式选择。
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
NC='\033[0m'
DO_LOGS=false
DO_ANALYZE=false
DO_BUILD=false
DO_TEST_RESULT=false
DRY_RUN=false
AUTO_YES=false
if [[ $# -eq 0 ]]; then
# 交互模式
printf '%bclean_outputs.sh — interactive mode%b\n' "$CYAN" "$NC"
printf 'Select what to clean (space-separated numbers, e.g. "1 3"):\n'
printf ' 1) output/logs/ — run logs\n'
printf ' 2) output/analyze/ — single-case analysis results\n'
printf ' 3) build_lab*/ — CMake build directories\n'
printf ' 4) test/test_result/ — test artifacts\n'
printf ' 0) cancel\n'
read -r -p 'choice: ' choices
for c in $choices; do
case "$c" in
1) DO_LOGS=true ;;
2) DO_ANALYZE=true ;;
3) DO_BUILD=true ;;
4) DO_TEST_RESULT=true ;;
0) printf 'cancelled.\n'; exit 0 ;;
*) printf '%bunknown option: %s (ignored)%b\n' "$YELLOW" "$c" "$NC" ;;
esac
done
fi
while [[ $# -gt 0 ]]; do
case "$1" in
--logs) DO_LOGS=true ;;
--analyze) DO_ANALYZE=true ;;
--build) DO_BUILD=true ;;
--test-result) DO_TEST_RESULT=true ;;
--all) DO_LOGS=true; DO_ANALYZE=true; DO_BUILD=true; DO_TEST_RESULT=true ;;
--dry-run) DRY_RUN=true ;;
--yes|-y) AUTO_YES=true ;;
*)
printf '%bunknown option: %s%b\n' "$YELLOW" "$1" "$NC" >&2
;;
esac
shift
done
if [[ "$DO_LOGS" == false && "$DO_ANALYZE" == false && \
"$DO_BUILD" == false && "$DO_TEST_RESULT" == false ]]; then
printf 'nothing selected. use --help or run without arguments for interactive mode.\n' >&2
exit 0
fi
# ---------- 收集要删除的路径 ----------
declare -a TARGETS=()
if [[ "$DO_LOGS" == true ]]; then
LOG_ROOT="$REPO_ROOT/output/logs"
if [[ -d "$LOG_ROOT" ]]; then
# 删除所有子目录(即每次的 run dir保留 last_run.txt / last_failed.txt
while IFS= read -r -d '' d; do
TARGETS+=("$d")
done < <(find "$LOG_ROOT" -mindepth 2 -maxdepth 2 -type d -print0 2>/dev/null)
fi
fi
if [[ "$DO_ANALYZE" == true ]]; then
ANALYZE_ROOT="$REPO_ROOT/output/analyze"
if [[ -d "$ANALYZE_ROOT" ]]; then
while IFS= read -r -d '' d; do
TARGETS+=("$d")
done < <(find "$ANALYZE_ROOT" -mindepth 1 -maxdepth 1 -print0 2>/dev/null)
fi
fi
if [[ "$DO_BUILD" == true ]]; then
while IFS= read -r -d '' d; do
TARGETS+=("$d")
done < <(find "$REPO_ROOT" -maxdepth 1 -type d -name 'build_lab*' -print0 2>/dev/null)
fi
if [[ "$DO_TEST_RESULT" == true ]]; then
TR_ROOT="$REPO_ROOT/test/test_result"
if [[ -d "$TR_ROOT" ]]; then
TARGETS+=("$TR_ROOT")
fi
fi
if [[ ${#TARGETS[@]} -eq 0 ]]; then
printf '%bNothing to clean — target directories are already empty or do not exist.%b\n' "$GREEN" "$NC"
exit 0
fi
# ---------- 打印列表 ----------
printf '\n%bThe following will be %s:%b\n' "$YELLOW" \
"$([[ "$DRY_RUN" == true ]] && echo "listed (dry-run)" || echo "DELETED")" "$NC"
TOTAL_SIZE=0
for t in "${TARGETS[@]}"; do
SIZE=$(du -sh "$t" 2>/dev/null | cut -f1 || echo "?")
printf ' [%s] %s\n' "$SIZE" "$t"
done
printf '\n'
if [[ "$DRY_RUN" == true ]]; then
printf '%bDry-run mode: nothing deleted.%b\n' "$CYAN" "$NC"
exit 0
fi
# ---------- 确认 ----------
if [[ "$AUTO_YES" == false ]]; then
read -r -p "Proceed with deletion? [y/N] " confirm
case "$confirm" in
[yY][eE][sS]|[yY]) ;;
*)
printf 'cancelled.\n'
exit 0
;;
esac
fi
# ---------- 删除 ----------
DELETED=0
ERRORS=0
for t in "${TARGETS[@]}"; do
if rm -rf "$t" 2>/dev/null; then
printf '%b deleted: %s%b\n' "$GREEN" "$t" "$NC"
DELETED=$((DELETED + 1))
else
printf '%b ERROR deleting: %s%b\n' "$RED" "$t" "$NC"
ERRORS=$((ERRORS + 1))
fi
done
printf '\n'
if [[ $ERRORS -eq 0 ]]; then
printf '%bDone. %d item(s) deleted.%b\n' "$GREEN" "$DELETED" "$NC"
else
printf '%bDone. %d deleted, %d errors.%b\n' "$YELLOW" "$DELETED" "$ERRORS" "$NC"
exit 1
fi

@ -19,6 +19,7 @@ FALLBACK_TO_FULL=false
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
NC='\033[0m'
TEST_DIRS=()
@ -104,8 +105,8 @@ now_ns() {
format_duration_ns() {
local ns="$1"
local sec=$((ns / 1000000000))
local ms=$(((ns % 1000000000) / 1000000))
printf '%d.%03ds' "$sec" "$ms"
local us10=$(((ns % 1000000000) / 10000))
printf '%d.%05ds' "$sec" "$us10"
}
is_transient_io_failure() {
@ -116,9 +117,34 @@ is_transient_io_failure() {
"$log_file"
}
# ---------- baseline 读取 & timing ----------
# 共享基线数据(由 run_baseline.sh 生成)
BASELINE_TSV="$REPO_ROOT/output/baseline/gcc_timing.tsv"
# 本次运行的我方计时 TSVstem<TAB>our_ns<TAB>gcc_s
TIMING_TSV="$RUN_DIR/timing.tsv"
# 从共享 TSV 查找某 stem 的 GCC 基线耗时(秒),找不到返回 N/A
lookup_gcc_s() {
local stem="$1"
local val="N/A"
if [[ -f "$BASELINE_TSV" ]]; then
val=$(awk -F'\t' -v s="$stem" '$1==s{v=$2} END{if(v!="") print v; else print "N/A"}' "$BASELINE_TSV")
fi
echo "$val"
}
record_timing() {
local stem="$1"
local our_ns="$2"
local gcc_s="${3:-N/A}"
printf '%s\t%s\t%s\n' "$stem" "$our_ns" "$gcc_s" >> "$TIMING_TSV"
}
test_one() {
local sy_file="$1"
local rel="$2"
local timing_out="${3:-}"
local safe_name="${rel//\//_}"
local case_key="${safe_name%.sy}"
local tmp_dir="$RUN_DIR/.tmp/$case_key"
@ -132,7 +158,10 @@ test_one() {
cleanup_tmp_dir "$tmp_dir"
mkdir -p "$tmp_dir"
if "$VERIFY_SCRIPT" "$sy_file" "$tmp_dir" --run > "$case_log" 2>&1; then
local verify_args=("$sy_file" "$tmp_dir" --run)
[[ -n "$timing_out" ]] && verify_args+=(--timing-out "$timing_out")
if "$VERIFY_SCRIPT" "${verify_args[@]}" > "$case_log" 2>&1; then
cleanup_tmp_dir "$tmp_dir"
return 0
fi
@ -156,19 +185,35 @@ run_case() {
local sy_file="$1"
local rel
local case_start_ns
local case_end_ns
local case_elapsed_ns
rel="$(realpath --relative-to="$REPO_ROOT" "$sy_file")"
case_start_ns=$(now_ns)
if test_one "$sy_file" "$rel"; then
case_end_ns=$(now_ns)
case_elapsed_ns=$((case_end_ns - case_start_ns))
log_color "$GREEN" "PASS $rel [$(format_duration_ns "$case_elapsed_ns")]"
local base stem case_key
base="$(basename "$sy_file")"
stem="${base%.sy}"
# 与 run_baseline.sh 保持一致:去掉 test/ 前缀和 .sy 后缀
case_key="${rel#test/}"
case_key="${case_key%.sy}"
local timing_file
timing_file="$(mktemp)"
if test_one "$sy_file" "$rel" "$timing_file"; then
local compile_ns=0 run_ns=0
if [[ -f "$timing_file" ]]; then
compile_ns=$(grep '^compile_ns=' "$timing_file" | cut -d= -f2 || echo 0)
run_ns=$(grep '^run_ns=' "$timing_file" | cut -d= -f2 || echo 0)
fi
rm -f "$timing_file"
log_color "$GREEN" "PASS $rel [compile=$(format_duration_ns "$compile_ns") run=$(format_duration_ns "$run_ns")]"
PASS=$((PASS + 1))
local gcc_s
gcc_s=$(lookup_gcc_s "$case_key")
record_timing "$case_key" "$run_ns" "$gcc_s"
else
case_end_ns=$(now_ns)
case_elapsed_ns=$((case_end_ns - case_start_ns))
rm -f "$timing_file"
local case_elapsed_ns=$(( $(now_ns) - case_start_ns ))
log_color "$RED" "FAIL $rel [$(format_duration_ns "$case_elapsed_ns")]"
FAIL=$((FAIL + 1))
FAIL_LIST+=("$rel")
@ -176,6 +221,7 @@ run_case() {
}
TOTAL_START_NS=$(now_ns)
: > "$TIMING_TSV"
if [[ "$FAILED_ONLY" == true ]]; then
if [[ -f "$LAST_FAILED_FILE" ]]; then
@ -209,6 +255,11 @@ fi
if [[ "$FALLBACK_TO_FULL" == true ]]; then
log_color "$YELLOW" "No cached failed cases found, fallback to full suite."
fi
if [[ -f "$BASELINE_TSV" ]]; then
log_plain "Baseline TSV: $BASELINE_TSV (speedup ratios will be computed)"
else
log_color "$CYAN" "Tip: run scripts/run_baseline.sh first to enable GCC -O2 speedup analysis."
fi
if [[ ! -f "$VERIFY_SCRIPT" ]]; then
log_color "$RED" "missing verify script: $VERIFY_SCRIPT"
@ -280,6 +331,93 @@ log_plain "summary: ${PASS} PASS / ${FAIL} FAIL / total $((PASS + FAIL))"
log_plain "build elapsed: $(format_duration_ns "$BUILD_ELAPSED_NS")"
log_plain "validation elapsed: $(format_duration_ns "$VALIDATION_ELAPSED_NS")"
log_plain "total elapsed: $(format_duration_ns "$TOTAL_ELAPSED_NS")"
# ---------- 计时与加速比分析 ----------
if [[ -s "$TIMING_TSV" ]]; then
log_plain ""
log_plain "==> Timing & Speedup Analysis"
# 检查本次结果中是否有任何 GCC 基线数据
HAS_BASELINE=false
if grep -qv $'\tN/A$' "$TIMING_TSV" 2>/dev/null; then
HAS_BASELINE=true
fi
if [[ "$HAS_BASELINE" == true ]]; then
# 将 TSV 展开为含计算值的临时文件case_key, our_s, gcc_s, speedup
_tmp_timing="$RUN_DIR/timing_computed.tsv"
while IFS=$'\t' read -r case_key our_ns gcc_s; do
our_s=$(awk "BEGIN{printf \"%.5f\", $our_ns / 1000000000}")
if [[ "$gcc_s" == "N/A" ]]; then
speedup="N/A"
else
speedup=$(awk "BEGIN{if($our_s>0) printf \"%.5f\", $gcc_s/$our_s; else print \"inf\"}")
fi
printf '%s\t%s\t%s\t%s\n' "$case_key" "$our_s" "$gcc_s" "$speedup"
done < "$TIMING_TSV" > "$_tmp_timing"
# 排序1加速比升序N/A 排最后)
log_plain ""
log_plain "--- [Sort 1] Speedup ratio ascending (worst speedup first) ---"
log_plain "$(printf '%-40s %10s %10s %10s' 'case' 'our(s)' 'gcc(s)' 'speedup')"
log_plain "$(printf '%0.s-' {1..76})"
{
grep -v $'\tN/A$' "$_tmp_timing" | sort -t$'\t' -k4 -n || true
grep $'\tN/A$' "$_tmp_timing" | sort -t$'\t' -k1 || true
} | while IFS=$'\t' read -r case_key our_s gcc_s speedup; do
disp="${case_key##*/}"
if [[ "$speedup" == "N/A" ]]; then
log_plain "$(printf '%-40s %10s %10s %10s' "$disp" "${our_s}s" "N/A" "N/A")"
else
log_plain "$(printf '%-40s %10s %10s %9sx' "$disp" "${our_s}s" "${gcc_s}s" "$speedup")"
fi
done
# 排序2我方总用时降序
log_plain ""
log_plain "--- [Sort 2] Our elapsed time descending (slowest first) ---"
log_plain "$(printf '%-40s %10s %10s %10s' 'case' 'our(s)' 'gcc(s)' 'speedup')"
log_plain "$(printf '%0.s-' {1..76})"
sort -t$'\t' -k2 -rn "$_tmp_timing" | \
while IFS=$'\t' read -r case_key our_s gcc_s speedup; do
disp="${case_key##*/}"
if [[ "$speedup" == "N/A" ]]; then
log_plain "$(printf '%-40s %10s %10s %10s' "$disp" "${our_s}s" "N/A" "N/A")"
else
log_plain "$(printf '%-40s %10s %10s %9sx' "$disp" "${our_s}s" "${gcc_s}s" "$speedup")"
fi
done
rm -f "$_tmp_timing"
else
# 无基线:只输出总用时降序
log_plain ""
log_plain "--- [Sort] Our elapsed time descending (slowest first) ---"
log_plain "$(printf '%-40s %10s' 'case' 'our(s)')"
log_plain "$(printf '%0.s-' {1..54})"
while IFS=$'\t' read -r case_key our_ns _; do
our_s=$(awk "BEGIN{printf \"%.5f\", $our_ns / 1000000000}")
printf '%s\t%s\n' "$case_key" "$our_s"
done < "$TIMING_TSV" | \
sort -t$'\t' -k2 -rn | \
while IFS=$'\t' read -r case_key our_s; do
disp="${case_key##*/}"
log_plain "$(printf '%-40s %10ss' "$disp" "$our_s")"
done
log_plain ""
log_color "$CYAN" "Tip: run scripts/run_baseline.sh to compute GCC -O2 baseline for speedup analysis."
fi
log_plain ""
log_plain "timing data saved to: $TIMING_TSV"
fi
# ---------- 失败用例列表 ----------
if [[ ${#FAIL_LIST[@]} -gt 0 ]]; then
log_plain "failed cases:"
for f in "${FAIL_LIST[@]}"; do

@ -0,0 +1,326 @@
#!/usr/bin/env bash
# run_baseline.sh — 批量编译 GCC -O2 基线并保存汇编、输出与运行时间
#
# 数据统一保存在 output/baseline/
# gcc_timing.tsv — stem<TAB>gcc_elapsed_s (所有脚本的共享数据源)
# <stem>.gcc.s — GCC -O2 AArch64 汇编(供 analyze_case.sh 对比)
# <stem>.gcc.out — GCC 程序实际输出 stdout+exit_code供 analyze_case.sh 对比)
#
# 用法:
# run_baseline.sh [--update] [test_dir|file ...]
#
# --update 重新计算所有条目(默认跳过 gcc_timing.tsv 中已有的 stem
#
# 若不指定测试目录/文件,自动扫描 test/test_case 和 test/class_test_case
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
BASELINE_DIR="$REPO_ROOT/output/baseline"
TIMING_TSV="$BASELINE_DIR/gcc_timing.tsv"
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
NC='\033[0m'
UPDATE=false
TEST_DIRS=()
TEST_FILES=()
while [[ $# -gt 0 ]]; do
case "$1" in
--update) UPDATE=true ;;
*)
if [[ -f "$1" ]]; then
TEST_FILES+=("$1")
else
TEST_DIRS+=("$1")
fi
;;
esac
shift
done
# ---------- 工具检查 ----------
for tool in aarch64-linux-gnu-gcc qemu-aarch64; do
if ! command -v "$tool" >/dev/null 2>&1; then
printf '%bERROR: required tool not found: %s%b\n' "$RED" "$tool" "$NC" >&2
exit 1
fi
done
if [[ ! -x /usr/bin/time ]]; then
printf '%bERROR: /usr/bin/time not found%b\n' "$RED" "$NC" >&2
exit 1
fi
mkdir -p "$BASELINE_DIR"
# 是否已存在某 stem 的基线数据(直接查 TSV 文件,避免关联数组兼容性问题)
stem_is_cached() {
local key="$1"
[[ -f "$TIMING_TSV" ]] && grep -qF "${key} " "$TIMING_TSV" 2>/dev/null
}
stem_cached_time() {
local key="$1"
awk -F'\t' -v s="$key" '$1==s{print $2; exit}' "$TIMING_TSV" 2>/dev/null || true
}
# ---------- 测试用例发现 ----------
discover_default_test_dirs() {
local roots=(
"$REPO_ROOT/test/test_case"
"$REPO_ROOT/test/class_test_case"
)
local root
for root in "${roots[@]}"; do
[[ -d "$root" ]] || continue
find "$root" -mindepth 1 -maxdepth 1 -type d -print0
done | sort -z
}
if [[ ${#TEST_DIRS[@]} -eq 0 && ${#TEST_FILES[@]} -eq 0 ]]; then
while IFS= read -r -d '' d; do
TEST_DIRS+=("$d")
done < <(discover_default_test_dirs)
fi
# ---------- 计时工具 ----------
now_ns() { date +%s%N; }
format_duration_ns() {
local ns="$1"
printf '%d.%05ds' "$((ns / 1000000000))" "$(((ns % 1000000000) / 10000))"
}
# ---------- 处理单个用例 ----------
PASS=0
SKIP=0
FAIL=0
process_case() {
local sy_file="$1"
local base stem input_dir stdin_file
base="$(basename "$sy_file")"
stem="${base%.sy}"
input_dir="$(dirname "$sy_file")"
stdin_file="$input_dir/$stem.in"
local rel
rel="$(realpath --relative-to="$REPO_ROOT" "$sy_file")"
# 路径键:去掉 test/ 前缀和 .sy 后缀,保留完整目录结构
# 例test/class_test_case/h_functional/11_BST.sy → class_test_case/h_functional/11_BST
local case_key
case_key="${rel#test/}"
case_key="${case_key%.sy}"
local case_start_ns
case_start_ns=$(now_ns)
# 已有数据且不强制更新 → 跳过
if [[ "$UPDATE" == false ]] && stem_is_cached "$case_key"; then
printf '%b SKIP %s (cached: %ss)%b\n' \
"$CYAN" "$rel" "$(stem_cached_time "$case_key")" "$NC"
SKIP=$((SKIP + 1))
return 0
fi
# 输出目录镜像源路径结构
local case_out_dir
case_out_dir="$BASELINE_DIR/$(dirname "$case_key")"
mkdir -p "$case_out_dir"
local gcc_elf gcc_asm gcc_out gcc_err
gcc_elf="$case_out_dir/$stem.gcc.elf"
gcc_asm="$case_out_dir/$stem.gcc.s"
gcc_out="$case_out_dir/$stem.gcc.out"
gcc_err="$case_out_dir/$stem.gcc.err"
# 预处理:把 "const int NAME = EXPR;" 转为 "#define NAME ((int)(EXPR))"
# 同时处理多声明符const int A=1, B=2; → #define A ((int)(1))\n#define B ((int)(2))
# 原因SysY const int 是编译期常量C 模式下不能用于全局数组维度,#define 可以
local tmp_sy
tmp_sy="$(mktemp /tmp/sysy_XXXXXX.c)"
python3 - "$sy_file" "$tmp_sy" << 'PYEOF'
import re, sys
pat = re.compile(
r'^(\s*)const\s+int\s+((?:[A-Za-z_]\w*\s*=\s*[^,;]+)(?:,\s*[A-Za-z_]\w*\s*=\s*[^,;]+)*)\s*;',
re.MULTILINE
)
def replace(m):
indent = m.group(1)
decls = re.split(r',\s*(?=[A-Za-z_])', m.group(2))
lines = []
for d in decls:
name, _, val = d.partition('=')
lines.append(f'{indent}#define {name.strip()} ((int)({val.strip()}))')
return '\n'.join(lines)
with open(sys.argv[1]) as f:
src = f.read()
with open(sys.argv[2], 'w') as f:
f.write(pat.sub(replace, src))
PYEOF
# 步骤1编译链接C 模式,用于运行计时)
# -x c允许 delete/new/class 等作为标识符
# -include sylib.h强制注入 SysY 运行时声明(.sy 无 #include
# 无名称修饰,直接链接同为 C 编译的 sylib.o
if ! aarch64-linux-gnu-gcc -O2 \
-x c -include "$REPO_ROOT/sylib/sylib.h" \
-I "$REPO_ROOT/sylib" \
"$tmp_sy" -x none "$SYLIB_OBJ" \
-lm -o "$gcc_elf" > "$gcc_err" 2>&1; then
rm -f "$tmp_sy"
printf '%b FAIL %s (GCC compile error — see %s)%b\n' \
"$RED" "$rel" "$gcc_err" "$NC"
FAIL=$((FAIL + 1))
return 0
fi
# 步骤2生成汇编单独 -S仅针对 .sy 文件本身)
aarch64-linux-gnu-gcc -O2 \
-x c -include "$REPO_ROOT/sylib/sylib.h" \
-I "$REPO_ROOT/sylib" \
"$tmp_sy" -S -o "$gcc_asm" 2>/dev/null || true
rm -f "$tmp_sy"
# 步骤3运行并计时手动 ns 计时,精度 5 位小数)
local stdout_file="$case_out_dir/$stem.gcc.stdout"
local status=0
local timeout_sec=60
[[ "$sy_file" == *"/performance/"* || "$sy_file" == *"/h_performance/"* ]] && timeout_sec=300
local run_start_ns run_end_ns run_elapsed_ns
run_start_ns=$(now_ns)
set +e
if [[ -f "$stdin_file" ]]; then
timeout "$timeout_sec" \
qemu-aarch64 -L /usr/aarch64-linux-gnu "$gcc_elf" \
< "$stdin_file" > "$stdout_file" 2>/dev/null
else
timeout "$timeout_sec" \
qemu-aarch64 -L /usr/aarch64-linux-gnu "$gcc_elf" \
> "$stdout_file" 2>/dev/null
fi
status=$?
run_end_ns=$(now_ns)
run_elapsed_ns=$((run_end_ns - run_start_ns))
set -e
# 删除可执行(节省空间,数据已提取完毕)
rm -f "$gcc_elf"
if [[ $status -eq 124 ]]; then
printf '%b TIMEOUT %s (>%ds)%b\n' "$YELLOW" "$rel" "$timeout_sec" "$NC"
rm -f "$stdout_file"
FAIL=$((FAIL + 1))
return 0
fi
# 步骤4保存输出文件stdout + exit_code与 verify_asm.sh 格式一致)
{
cat "$stdout_file"
if [[ -s "$stdout_file" ]] && (( $(tail -c 1 "$stdout_file" | wc -l) == 0 )); then
printf '\n'
fi
printf '%s\n' "$status"
} > "$gcc_out"
rm -f "$stdout_file"
# 步骤5计算耗时5 位小数秒)并写入 TSV
local elapsed
elapsed=$(awk "BEGIN{printf \"%.5f\", $run_elapsed_ns / 1000000000}")
# 更新 TSV若已有该 case_key 的旧行则先删除再追加)
if grep -qF "${case_key} " "$TIMING_TSV" 2>/dev/null; then
local _tmp="$TIMING_TSV.tmp"
grep -vF "${case_key} " "$TIMING_TSV" > "$_tmp" || true
mv "$_tmp" "$TIMING_TSV"
fi
printf '%s\t%s\n' "$case_key" "$elapsed" >> "$TIMING_TSV"
local case_end_ns duration_ns
case_end_ns=$(now_ns)
duration_ns=$((case_end_ns - case_start_ns))
printf '%b DONE %s gcc=%ss [%s]%b\n' \
"$GREEN" "$rel" "$elapsed" "$(format_duration_ns "$duration_ns")" "$NC"
PASS=$((PASS + 1))
}
# ---------- 初始化 ----------
if [[ "$UPDATE" == true ]]; then
printf '%b[--update] Clearing all existing baseline data.%b\n' "$YELLOW" "$NC"
: > "$TIMING_TSV"
find "$BASELINE_DIR" -maxdepth 1 \
\( -name '*.gcc.s' -o -name '*.gcc.out' -o -name '*.gcc.time' -o -name '*.gcc.err' \) \
-delete 2>/dev/null || true
else
[[ -f "$TIMING_TSV" ]] || : > "$TIMING_TSV"
fi
printf '%bBaseline directory : %s%b\n' "$CYAN" "$BASELINE_DIR" "$NC"
printf '%bTiming TSV : %s%b\n' "$CYAN" "$TIMING_TSV" "$NC"
if [[ "$UPDATE" == false && -f "$TIMING_TSV" ]]; then
_cached_count=$(wc -l < "$TIMING_TSV" 2>/dev/null || echo 0)
if [[ $_cached_count -gt 0 ]]; then
printf 'Found %d cached entries (use --update to recompute all).\n' "$_cached_count"
fi
fi
# ---------- 预编译 sylib.oC 模式,仅一次)----------
SYLIB_OBJ="$BASELINE_DIR/sylib.o"
if ! aarch64-linux-gnu-gcc -O2 -c -x c "$REPO_ROOT/sylib/sylib.c" \
-I "$REPO_ROOT/sylib" -o "$SYLIB_OBJ" 2>/dev/null; then
printf '%bERROR: failed to compile sylib.c%b\n' "$RED" "$NC" >&2
exit 1
fi
printf 'sylib.o compiled : %s\n' "$SYLIB_OBJ"
printf '\n'
TOTAL_START_NS=$(now_ns)
# ---------- 运行 ----------
for sy_file in "${TEST_FILES[@]}"; do
process_case "$sy_file"
done
for test_dir in "${TEST_DIRS[@]}"; do
if [[ ! -d "$test_dir" ]]; then
printf '%b SKIP missing dir: %s%b\n' "$YELLOW" "$test_dir" "$NC"
continue
fi
while IFS= read -r -d '' sy_file; do
process_case "$sy_file"
done < <(find "$test_dir" -maxdepth 1 -type f -name '*.sy' -print0 | sort -z)
done
# ---------- 汇总 ----------
TOTAL_END_NS=$(now_ns)
TOTAL_ELAPSED_NS=$((TOTAL_END_NS - TOTAL_START_NS))
TOTAL_CASES=$((PASS + SKIP + FAIL))
printf '\n'
printf 'Summary: %d DONE / %d SKIP (cached) / %d FAIL / total %d\n' \
"$PASS" "$SKIP" "$FAIL" "$TOTAL_CASES"
printf 'Total elapsed : %s\n' "$(format_duration_ns "$TOTAL_ELAPSED_NS")"
printf 'Timing TSV : %s (%d entries)\n' \
"$TIMING_TSV" "$(wc -l < "$TIMING_TSV" 2>/dev/null || echo 0)"
[[ $FAIL -eq 0 ]]

@ -4,8 +4,8 @@ set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
if [[ $# -lt 1 || $# -gt 3 ]]; then
echo "usage: $0 input.sy [output_dir] [--run]" >&2
if [[ $# -lt 1 || $# -gt 5 ]]; then
echo "usage: $0 input.sy [output_dir] [--run] [--timing-out file]" >&2
exit 1
fi
@ -13,6 +13,11 @@ input=$1
out_dir="$REPO_ROOT/test/test_result/asm"
run_exec=false
input_dir=$(dirname "$input")
timing_out=""
_compile_ns=0
_run_ns=0
now_ns() { date +%s%N; }
shift
while [[ $# -gt 0 ]]; do
@ -20,6 +25,10 @@ while [[ $# -gt 0 ]]; do
--run)
run_exec=true
;;
--timing-out)
timing_out="$2"
shift
;;
*)
out_dir="$1"
;;
@ -57,11 +66,13 @@ exe="$out_dir/$stem"
stdin_file="$input_dir/$stem.in"
expected_file="$input_dir/$stem.out"
_compile_start_ns=$(now_ns)
"$compiler" --emit-asm "$input" > "$asm_file"
echo "asm generated: $asm_file"
aarch64-linux-gnu-gcc "$asm_file" "$REPO_ROOT/sylib/sylib.c" -O2 -o "$exe"
echo "executable generated: $exe"
_compile_ns=$(($(now_ns) - _compile_start_ns))
if [[ "$run_exec" == true ]]; then
if ! command -v qemu-aarch64 >/dev/null 2>&1; then
@ -77,6 +88,7 @@ if [[ "$run_exec" == true ]]; then
fi
set +e
_run_start_ns=$(now_ns)
if command -v timeout >/dev/null 2>&1; then
if [[ -f "$stdin_file" ]]; then
timeout "$timeout_sec" qemu-aarch64 -L /usr/aarch64-linux-gnu "$exe" < "$stdin_file" > "$stdout_file"
@ -91,6 +103,7 @@ if [[ "$run_exec" == true ]]; then
fi
fi
status=$?
_run_ns=$(($(now_ns) - _run_start_ns))
set -e
if [[ $status -eq 124 ]]; then
@ -122,3 +135,7 @@ if [[ "$run_exec" == true ]]; then
echo "expected output not found, skipped diff: $expected_file"
fi
fi
if [[ -n "$timing_out" ]]; then
printf 'compile_ns=%s\nrun_ns=%s\n' "$_compile_ns" "$_run_ns" > "$timing_out"
fi

Loading…
Cancel
Save