diff --git a/scripts/analyze_case.sh b/scripts/analyze_case.sh new file mode 100755 index 0000000..0af0ef5 --- /dev/null +++ b/scripts/analyze_case.sh @@ -0,0 +1,315 @@ +#!/usr/bin/env bash +# analyze_case.sh — 单个 .sy 测试用例的全流程编译 + IR/汇编保存脚本 +# 用于深度分析单个样例与 GCC 基线之间的差距。 +# +# 用法: +# analyze_case.sh [output_dir] +# +# 输出目录(默认 output/analyze/_)中包含: +# .ll — 我方编译器输出的 LLVM IR +# .s — 我方编译器输出的 AArch64 汇编 +# .elf — 我方编译链接后的可执行文件 +# .gcc.s — GCC -O2 输出的 AArch64 汇编 +# .gcc.elf — GCC -O2 链接后的可执行文件 +# .our.time — 我方程序运行耗时(秒) +# .gcc.time — GCC 程序运行耗时(秒) +# .our.out — 我方程序实际输出 +# .gcc.out — GCC 程序实际输出 +# .diff — 输出 diff(若有差异) +# report.txt — 汇总报告(IR 行数、汇编行数、耗时、加速比) + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +CYAN='\033[0;36m' +BOLD='\033[1m' +NC='\033[0m' + +# ---------- 参数解析 ---------- + +if [[ $# -lt 1 || $# -gt 2 ]]; then + printf 'usage: %s [output_dir]\n' "$0" >&2 + exit 1 +fi + +INPUT="$1" +if [[ ! -f "$INPUT" ]]; then + printf 'input file not found: %s\n' "$INPUT" >&2 + exit 1 +fi + +BASE="$(basename "$INPUT")" +STEM="${BASE%.sy}" +INPUT_DIR="$(dirname "$(realpath "$INPUT")")" +TIMESTAMP="$(date +%Y%m%d_%H%M%S)" + +if [[ $# -ge 2 ]]; then + OUT_DIR="$2" +else + OUT_DIR="$REPO_ROOT/output/analyze/${STEM}_${TIMESTAMP}" +fi + +mkdir -p "$OUT_DIR" + +REPORT="$OUT_DIR/report.txt" +: > "$REPORT" + +rpt() { + printf '%s\n' "$*" | tee -a "$REPORT" +} + +rpt_color() { + local color="$1"; shift + printf '%b%s%b\n' "$color" "$*" "$NC" + printf '%s\n' "$*" >> "$REPORT" +} + +rpt "============================================================" +rpt " analyze_case report" +rpt " case : $STEM" +rpt " source : $INPUT" +rpt " output : $OUT_DIR" +rpt " date : $(date)" +rpt "============================================================" +rpt "" + +# ---------- 查找编译器 ---------- + +COMPILER="" +for candidate in \ + "$REPO_ROOT/build_lab3/bin/compiler" \ + "$REPO_ROOT/build_lab2/bin/compiler" \ + "$REPO_ROOT/build/bin/compiler"; do + if [[ -x "$candidate" ]]; then + COMPILER="$candidate" + break + fi +done + +if [[ -z "$COMPILER" ]]; then + rpt_color "$RED" "ERROR: compiler not found. Build first:" + rpt " cmake -S $REPO_ROOT -B $REPO_ROOT/build_lab3 && cmake --build $REPO_ROOT/build_lab3 -j" + exit 1 +fi +rpt "compiler : $COMPILER" + +# ---------- 工具检查 ---------- + +for tool in aarch64-linux-gnu-gcc qemu-aarch64; do + if ! command -v "$tool" >/dev/null 2>&1; then + rpt_color "$RED" "ERROR: required tool not found: $tool" + exit 1 + fi +done + +STDIN_FILE="$INPUT_DIR/$STEM.in" +EXPECTED_FILE="$INPUT_DIR/$STEM.out" + +# ---------- 1. 生成 IR ---------- + +rpt "" +rpt "--- [1/5] Generating LLVM IR ---" +IR_FILE="$OUT_DIR/$STEM.ll" +if "$COMPILER" --emit-ir "$INPUT" > "$IR_FILE" 2>"$OUT_DIR/$STEM.ir.err"; then + IR_LINES=$(wc -l < "$IR_FILE") + rpt_color "$GREEN" "IR generated: $IR_FILE ($IR_LINES lines)" +else + rpt_color "$RED" "ERROR: IR generation failed" + cat "$OUT_DIR/$STEM.ir.err" >&2 + exit 1 +fi + +# ---------- 2. 生成我方汇编并链接 ---------- + +rpt "" +rpt "--- [2/5] Generating our ASM & linking ---" +OUR_ASM="$OUT_DIR/$STEM.s" +OUR_ELF="$OUT_DIR/$STEM.elf" +if "$COMPILER" --emit-asm "$INPUT" > "$OUR_ASM" 2>"$OUT_DIR/$STEM.asm.err"; then + OUR_ASM_LINES=$(wc -l < "$OUR_ASM") + rpt_color "$GREEN" "ASM generated: $OUR_ASM ($OUR_ASM_LINES lines)" +else + rpt_color "$RED" "ERROR: ASM generation failed" + cat "$OUT_DIR/$STEM.asm.err" >&2 + exit 1 +fi + +if aarch64-linux-gnu-gcc "$OUR_ASM" "$REPO_ROOT/sylib/sylib.c" -O2 \ + -I "$REPO_ROOT/sylib" -lm -o "$OUR_ELF" 2>"$OUT_DIR/$STEM.link.err"; then + rpt_color "$GREEN" "Linked: $OUR_ELF" +else + rpt_color "$RED" "ERROR: link failed" + cat "$OUT_DIR/$STEM.link.err" >&2 + exit 1 +fi + +# ---------- 3. GCC -O2 基线(从预计算数据读取)---------- + +rpt "" +rpt "--- [3/5] GCC -O2 baseline (reading from pre-computed data) ---" + +BASELINE_DATA_DIR="$REPO_ROOT/output/baseline" +BASELINE_TSV_PATH="$BASELINE_DATA_DIR/gcc_timing.tsv" +GCC_ASM="$OUT_DIR/$STEM.gcc.s" +GCC_OUT="$OUT_DIR/$STEM.gcc.out" +GCC_OK=false +GCC_ASM_LINES=0 +GCC_ELAPSED_RAW="" # 秒,无 s 后缀 + +if [[ -f "$BASELINE_TSV_PATH" ]]; then + GCC_ELAPSED_RAW=$(awk -F'\t' -v s="$STEM" '$1==s{v=$2} END{if(v!="") print v}' \ + "$BASELINE_TSV_PATH" 2>/dev/null || true) + if [[ -n "$GCC_ELAPSED_RAW" ]]; then + GCC_OK=true + rpt_color "$GREEN" "baseline timing: ${GCC_ELAPSED_RAW}s" + else + rpt_color "$YELLOW" "WARNING: no baseline entry for '$STEM'" + rpt " Run: scripts/run_baseline.sh" + fi + # 复制汇编文件 + if [[ -f "$BASELINE_DATA_DIR/$STEM.gcc.s" ]]; then + cp "$BASELINE_DATA_DIR/$STEM.gcc.s" "$GCC_ASM" + GCC_ASM_LINES=$(wc -l < "$GCC_ASM") + rpt "GCC ASM: $GCC_ASM ($GCC_ASM_LINES lines)" + else + rpt_color "$YELLOW" "GCC ASM not found in baseline dir" + fi + # 复制输出文件(供步骥5 diff) + if [[ -f "$BASELINE_DATA_DIR/$STEM.gcc.out" ]]; then + cp "$BASELINE_DATA_DIR/$STEM.gcc.out" "$GCC_OUT" + rpt "GCC output: $GCC_OUT" + fi +else + rpt_color "$YELLOW" "WARNING: baseline data not found: $BASELINE_TSV_PATH" + rpt " Run: scripts/run_baseline.sh" + rpt " to pre-compute GCC -O2 baseline for all test cases." +fi + +# ---------- 4. 运行并计时(仅我方编译器)---------- + +rpt "" +rpt "--- [4/5] Running & timing (our compiler) ---" + +run_and_time() { + local label="$1" + local exe="$2" + local out_file="$3" + local time_file="$4" + local timeout_sec="${5:-60}" + local stdout_file="$out_file.raw" + local status=0 + + set +e + if [[ -f "$STDIN_FILE" ]]; then + timeout "$timeout_sec" \ + /usr/bin/time -f "%e" -o "$time_file" \ + qemu-aarch64 -L /usr/aarch64-linux-gnu "$exe" \ + < "$STDIN_FILE" > "$stdout_file" 2>/dev/null + else + timeout "$timeout_sec" \ + /usr/bin/time -f "%e" -o "$time_file" \ + qemu-aarch64 -L /usr/aarch64-linux-gnu "$exe" \ + > "$stdout_file" 2>/dev/null + fi + status=$? + set -e + + # 将 stdout + exit_code 合并为 .out(与 verify_asm.sh 格式一致) + { + cat "$stdout_file" + if [[ -s "$stdout_file" ]] && (( $(tail -c 1 "$stdout_file" | wc -l) == 0 )); then + printf '\n' + fi + printf '%s\n' "$status" + } > "$out_file" + rm -f "$stdout_file" + + local elapsed="timeout" + [[ $status -ne 124 ]] && elapsed="$(cat "$time_file" 2>/dev/null || echo "?")s" + + if [[ $status -eq 124 ]]; then + rpt_color "$YELLOW" "$label: TIMEOUT (>${timeout_sec}s)" + elif [[ $status -ne 0 ]]; then + rpt_color "$YELLOW" "$label: exit $status elapsed=${elapsed}" + else + rpt_color "$GREEN" "$label: OK elapsed=${elapsed}" + fi + echo "$elapsed" +} + +OUR_OUT="$OUT_DIR/$STEM.our.out" +OUR_TIME_FILE="$OUT_DIR/$STEM.our.time" + +TIMEOUT_SEC=60 +[[ "$INPUT" == *"/performance/"* || "$INPUT" == *"/h_performance/"* ]] && TIMEOUT_SEC=300 + +OUR_ELAPSED=$(run_and_time "our compiler" "$OUR_ELF" "$OUR_OUT" "$OUR_TIME_FILE" "$TIMEOUT_SEC") + +# GCC 耗时直接读取基线数据,不重新运行 +GCC_ELAPSED="N/A" +if [[ "$GCC_OK" == true && -n "$GCC_ELAPSED_RAW" ]]; then + GCC_ELAPSED="${GCC_ELAPSED_RAW}s" + rpt_color "$GREEN" "gcc -O2: ${GCC_ELAPSED} (from pre-computed baseline)" +fi + +# ---------- 5. 输出对比 ---------- + +rpt "" +rpt "--- [5/5] Output comparison ---" + +normalize_out() { + awk '{ sub(/\r$/, ""); print }' "$1" +} + +if [[ -f "$EXPECTED_FILE" ]]; then + DIFF_FILE="$OUT_DIR/$STEM.diff" + if diff <(normalize_out "$EXPECTED_FILE") <(normalize_out "$OUR_OUT") > "$DIFF_FILE" 2>&1; then + rpt_color "$GREEN" "our output: MATCH expected" + rm -f "$DIFF_FILE" + else + rpt_color "$RED" "our output: MISMATCH — diff saved to $DIFF_FILE" + fi + if [[ "$GCC_OK" == true && -f "$GCC_OUT" ]]; then + GCC_DIFF_FILE="$OUT_DIR/$STEM.gcc.diff" + if diff <(normalize_out "$EXPECTED_FILE") <(normalize_out "$GCC_OUT") > "$GCC_DIFF_FILE" 2>&1; then + rpt_color "$GREEN" "gcc output: MATCH expected" + rm -f "$GCC_DIFF_FILE" + else + rpt_color "$YELLOW" "gcc output: MISMATCH — diff saved to $GCC_DIFF_FILE" + fi + fi +else + rpt_color "$YELLOW" "no expected output file found, skipping diff" +fi + +# ---------- 汇总报告 ---------- + +rpt "" +rpt "============================================================" +rpt_color "$BOLD" " Summary" +rpt "============================================================" +rpt "$(printf '%-20s %s' 'IR lines:' "$IR_LINES")" +rpt "$(printf '%-20s %s' 'Our ASM lines:' "$OUR_ASM_LINES")" +if [[ "$GCC_OK" == true && $GCC_ASM_LINES -gt 0 ]]; then + rpt "$(printf '%-20s %s' 'GCC ASM lines:' "$GCC_ASM_LINES")" + rpt "$(printf '%-20s %s' 'ASM ratio (ours/gcc):' \ + "$(awk "BEGIN{if($GCC_ASM_LINES>0) printf \"%.2f\", $OUR_ASM_LINES/$GCC_ASM_LINES; else print \"N/A\"}")")" +fi +rpt "$(printf '%-20s %s' 'Our time:' "$OUR_ELAPSED")" +rpt "$(printf '%-20s %s' 'GCC time:' "$GCC_ELAPSED")" +if [[ "$GCC_ELAPSED" != "N/A" && "$GCC_ELAPSED" != "timeout" && "$OUR_ELAPSED" != "timeout" ]]; then + OUR_S="${OUR_ELAPSED%s}" + GCC_S="${GCC_ELAPSED%s}" + SPEEDUP=$(awk "BEGIN{if($OUR_S>0) printf \"%.3f\", $GCC_S/$OUR_S; else print \"inf\"}") + rpt "$(printf '%-20s %sx' 'Speedup (gcc/ours):' "$SPEEDUP")" +fi +rpt "" +rpt "Output directory: $OUT_DIR" +rpt "============================================================" + +printf '\n%bReport saved to: %s%b\n' "$CYAN" "$REPORT" "$NC" diff --git a/scripts/clean_outputs.sh b/scripts/clean_outputs.sh new file mode 100755 index 0000000..903d77e --- /dev/null +++ b/scripts/clean_outputs.sh @@ -0,0 +1,170 @@ +#!/usr/bin/env bash +# clean_outputs.sh — 清理编译输出与日志垃圾文件 +# +# 用法: +# clean_outputs.sh [选项] +# +# 选项: +# --logs 清理 output/logs/ 下的运行日志(保留 last_run.txt / last_failed.txt) +# --analyze 清理 output/analyze/ 下的单用例分析结果 +# --build 清理 build_lab*/ 构建目录 +# --test-result 清理 test/test_result/ 下的测试产物 +# --all 清理以上全部 +# --dry-run 只打印将要删除的内容,不实际删除 +# --yes 跳过确认提示,直接删除(配合 --logs / --all 等使用) +# +# 不带任何选项时交互式选择。 + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +CYAN='\033[0;36m' +NC='\033[0m' + +DO_LOGS=false +DO_ANALYZE=false +DO_BUILD=false +DO_TEST_RESULT=false +DRY_RUN=false +AUTO_YES=false + +if [[ $# -eq 0 ]]; then + # 交互模式 + printf '%bclean_outputs.sh — interactive mode%b\n' "$CYAN" "$NC" + printf 'Select what to clean (space-separated numbers, e.g. "1 3"):\n' + printf ' 1) output/logs/ — run logs\n' + printf ' 2) output/analyze/ — single-case analysis results\n' + printf ' 3) build_lab*/ — CMake build directories\n' + printf ' 4) test/test_result/ — test artifacts\n' + printf ' 0) cancel\n' + read -r -p 'choice: ' choices + for c in $choices; do + case "$c" in + 1) DO_LOGS=true ;; + 2) DO_ANALYZE=true ;; + 3) DO_BUILD=true ;; + 4) DO_TEST_RESULT=true ;; + 0) printf 'cancelled.\n'; exit 0 ;; + *) printf '%bunknown option: %s (ignored)%b\n' "$YELLOW" "$c" "$NC" ;; + esac + done +fi + +while [[ $# -gt 0 ]]; do + case "$1" in + --logs) DO_LOGS=true ;; + --analyze) DO_ANALYZE=true ;; + --build) DO_BUILD=true ;; + --test-result) DO_TEST_RESULT=true ;; + --all) DO_LOGS=true; DO_ANALYZE=true; DO_BUILD=true; DO_TEST_RESULT=true ;; + --dry-run) DRY_RUN=true ;; + --yes|-y) AUTO_YES=true ;; + *) + printf '%bunknown option: %s%b\n' "$YELLOW" "$1" "$NC" >&2 + ;; + esac + shift +done + +if [[ "$DO_LOGS" == false && "$DO_ANALYZE" == false && \ + "$DO_BUILD" == false && "$DO_TEST_RESULT" == false ]]; then + printf 'nothing selected. use --help or run without arguments for interactive mode.\n' >&2 + exit 0 +fi + +# ---------- 收集要删除的路径 ---------- + +declare -a TARGETS=() + +if [[ "$DO_LOGS" == true ]]; then + LOG_ROOT="$REPO_ROOT/output/logs" + if [[ -d "$LOG_ROOT" ]]; then + # 删除所有子目录(即每次的 run dir),保留 last_run.txt / last_failed.txt + while IFS= read -r -d '' d; do + TARGETS+=("$d") + done < <(find "$LOG_ROOT" -mindepth 2 -maxdepth 2 -type d -print0 2>/dev/null) + fi +fi + +if [[ "$DO_ANALYZE" == true ]]; then + ANALYZE_ROOT="$REPO_ROOT/output/analyze" + if [[ -d "$ANALYZE_ROOT" ]]; then + while IFS= read -r -d '' d; do + TARGETS+=("$d") + done < <(find "$ANALYZE_ROOT" -mindepth 1 -maxdepth 1 -print0 2>/dev/null) + fi +fi + +if [[ "$DO_BUILD" == true ]]; then + while IFS= read -r -d '' d; do + TARGETS+=("$d") + done < <(find "$REPO_ROOT" -maxdepth 1 -type d -name 'build_lab*' -print0 2>/dev/null) +fi + +if [[ "$DO_TEST_RESULT" == true ]]; then + TR_ROOT="$REPO_ROOT/test/test_result" + if [[ -d "$TR_ROOT" ]]; then + TARGETS+=("$TR_ROOT") + fi +fi + +if [[ ${#TARGETS[@]} -eq 0 ]]; then + printf '%bNothing to clean — target directories are already empty or do not exist.%b\n' "$GREEN" "$NC" + exit 0 +fi + +# ---------- 打印列表 ---------- + +printf '\n%bThe following will be %s:%b\n' "$YELLOW" \ + "$([[ "$DRY_RUN" == true ]] && echo "listed (dry-run)" || echo "DELETED")" "$NC" +TOTAL_SIZE=0 +for t in "${TARGETS[@]}"; do + SIZE=$(du -sh "$t" 2>/dev/null | cut -f1 || echo "?") + printf ' [%s] %s\n' "$SIZE" "$t" +done +printf '\n' + +if [[ "$DRY_RUN" == true ]]; then + printf '%bDry-run mode: nothing deleted.%b\n' "$CYAN" "$NC" + exit 0 +fi + +# ---------- 确认 ---------- + +if [[ "$AUTO_YES" == false ]]; then + read -r -p "Proceed with deletion? [y/N] " confirm + case "$confirm" in + [yY][eE][sS]|[yY]) ;; + *) + printf 'cancelled.\n' + exit 0 + ;; + esac +fi + +# ---------- 删除 ---------- + +DELETED=0 +ERRORS=0 +for t in "${TARGETS[@]}"; do + if rm -rf "$t" 2>/dev/null; then + printf '%b deleted: %s%b\n' "$GREEN" "$t" "$NC" + DELETED=$((DELETED + 1)) + else + printf '%b ERROR deleting: %s%b\n' "$RED" "$t" "$NC" + ERRORS=$((ERRORS + 1)) + fi +done + +printf '\n' +if [[ $ERRORS -eq 0 ]]; then + printf '%bDone. %d item(s) deleted.%b\n' "$GREEN" "$DELETED" "$NC" +else + printf '%bDone. %d deleted, %d errors.%b\n' "$YELLOW" "$DELETED" "$ERRORS" "$NC" + exit 1 +fi diff --git a/scripts/lab3_build_test.sh b/scripts/lab3_build_test.sh index 551f05f..0a3b43a 100755 --- a/scripts/lab3_build_test.sh +++ b/scripts/lab3_build_test.sh @@ -19,6 +19,7 @@ FALLBACK_TO_FULL=false RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' +CYAN='\033[0;36m' NC='\033[0m' TEST_DIRS=() @@ -104,8 +105,8 @@ now_ns() { format_duration_ns() { local ns="$1" local sec=$((ns / 1000000000)) - local ms=$(((ns % 1000000000) / 1000000)) - printf '%d.%03ds' "$sec" "$ms" + local us10=$(((ns % 1000000000) / 10000)) + printf '%d.%05ds' "$sec" "$us10" } is_transient_io_failure() { @@ -116,9 +117,34 @@ is_transient_io_failure() { "$log_file" } +# ---------- baseline 读取 & timing ---------- + +# 共享基线数据(由 run_baseline.sh 生成) +BASELINE_TSV="$REPO_ROOT/output/baseline/gcc_timing.tsv" +# 本次运行的我方计时 TSV:stemour_nsgcc_s +TIMING_TSV="$RUN_DIR/timing.tsv" + +# 从共享 TSV 查找某 stem 的 GCC 基线耗时(秒),找不到返回 N/A +lookup_gcc_s() { + local stem="$1" + local val="N/A" + if [[ -f "$BASELINE_TSV" ]]; then + val=$(awk -F'\t' -v s="$stem" '$1==s{v=$2} END{if(v!="") print v; else print "N/A"}' "$BASELINE_TSV") + fi + echo "$val" +} + +record_timing() { + local stem="$1" + local our_ns="$2" + local gcc_s="${3:-N/A}" + printf '%s\t%s\t%s\n' "$stem" "$our_ns" "$gcc_s" >> "$TIMING_TSV" +} + test_one() { local sy_file="$1" local rel="$2" + local timing_out="${3:-}" local safe_name="${rel//\//_}" local case_key="${safe_name%.sy}" local tmp_dir="$RUN_DIR/.tmp/$case_key" @@ -132,7 +158,10 @@ test_one() { cleanup_tmp_dir "$tmp_dir" mkdir -p "$tmp_dir" - if "$VERIFY_SCRIPT" "$sy_file" "$tmp_dir" --run > "$case_log" 2>&1; then + local verify_args=("$sy_file" "$tmp_dir" --run) + [[ -n "$timing_out" ]] && verify_args+=(--timing-out "$timing_out") + + if "$VERIFY_SCRIPT" "${verify_args[@]}" > "$case_log" 2>&1; then cleanup_tmp_dir "$tmp_dir" return 0 fi @@ -156,19 +185,35 @@ run_case() { local sy_file="$1" local rel local case_start_ns - local case_end_ns - local case_elapsed_ns rel="$(realpath --relative-to="$REPO_ROOT" "$sy_file")" case_start_ns=$(now_ns) - if test_one "$sy_file" "$rel"; then - case_end_ns=$(now_ns) - case_elapsed_ns=$((case_end_ns - case_start_ns)) - log_color "$GREEN" "PASS $rel [$(format_duration_ns "$case_elapsed_ns")]" + local base stem case_key + base="$(basename "$sy_file")" + stem="${base%.sy}" + # 与 run_baseline.sh 保持一致:去掉 test/ 前缀和 .sy 后缀 + case_key="${rel#test/}" + case_key="${case_key%.sy}" + + local timing_file + timing_file="$(mktemp)" + + if test_one "$sy_file" "$rel" "$timing_file"; then + local compile_ns=0 run_ns=0 + if [[ -f "$timing_file" ]]; then + compile_ns=$(grep '^compile_ns=' "$timing_file" | cut -d= -f2 || echo 0) + run_ns=$(grep '^run_ns=' "$timing_file" | cut -d= -f2 || echo 0) + fi + rm -f "$timing_file" + log_color "$GREEN" "PASS $rel [compile=$(format_duration_ns "$compile_ns") run=$(format_duration_ns "$run_ns")]" PASS=$((PASS + 1)) + + local gcc_s + gcc_s=$(lookup_gcc_s "$case_key") + record_timing "$case_key" "$run_ns" "$gcc_s" else - case_end_ns=$(now_ns) - case_elapsed_ns=$((case_end_ns - case_start_ns)) + rm -f "$timing_file" + local case_elapsed_ns=$(( $(now_ns) - case_start_ns )) log_color "$RED" "FAIL $rel [$(format_duration_ns "$case_elapsed_ns")]" FAIL=$((FAIL + 1)) FAIL_LIST+=("$rel") @@ -176,6 +221,7 @@ run_case() { } TOTAL_START_NS=$(now_ns) +: > "$TIMING_TSV" if [[ "$FAILED_ONLY" == true ]]; then if [[ -f "$LAST_FAILED_FILE" ]]; then @@ -209,6 +255,11 @@ fi if [[ "$FALLBACK_TO_FULL" == true ]]; then log_color "$YELLOW" "No cached failed cases found, fallback to full suite." fi +if [[ -f "$BASELINE_TSV" ]]; then + log_plain "Baseline TSV: $BASELINE_TSV (speedup ratios will be computed)" +else + log_color "$CYAN" "Tip: run scripts/run_baseline.sh first to enable GCC -O2 speedup analysis." +fi if [[ ! -f "$VERIFY_SCRIPT" ]]; then log_color "$RED" "missing verify script: $VERIFY_SCRIPT" @@ -280,6 +331,93 @@ log_plain "summary: ${PASS} PASS / ${FAIL} FAIL / total $((PASS + FAIL))" log_plain "build elapsed: $(format_duration_ns "$BUILD_ELAPSED_NS")" log_plain "validation elapsed: $(format_duration_ns "$VALIDATION_ELAPSED_NS")" log_plain "total elapsed: $(format_duration_ns "$TOTAL_ELAPSED_NS")" + +# ---------- 计时与加速比分析 ---------- + +if [[ -s "$TIMING_TSV" ]]; then + log_plain "" + log_plain "==> Timing & Speedup Analysis" + + # 检查本次结果中是否有任何 GCC 基线数据 + HAS_BASELINE=false + if grep -qv $'\tN/A$' "$TIMING_TSV" 2>/dev/null; then + HAS_BASELINE=true + fi + + if [[ "$HAS_BASELINE" == true ]]; then + + # 将 TSV 展开为含计算值的临时文件(case_key, our_s, gcc_s, speedup) + _tmp_timing="$RUN_DIR/timing_computed.tsv" + while IFS=$'\t' read -r case_key our_ns gcc_s; do + our_s=$(awk "BEGIN{printf \"%.5f\", $our_ns / 1000000000}") + if [[ "$gcc_s" == "N/A" ]]; then + speedup="N/A" + else + speedup=$(awk "BEGIN{if($our_s>0) printf \"%.5f\", $gcc_s/$our_s; else print \"inf\"}") + fi + printf '%s\t%s\t%s\t%s\n' "$case_key" "$our_s" "$gcc_s" "$speedup" + done < "$TIMING_TSV" > "$_tmp_timing" + + # 排序1:加速比升序(N/A 排最后) + log_plain "" + log_plain "--- [Sort 1] Speedup ratio ascending (worst speedup first) ---" + log_plain "$(printf '%-40s %10s %10s %10s' 'case' 'our(s)' 'gcc(s)' 'speedup')" + log_plain "$(printf '%0.s-' {1..76})" + { + grep -v $'\tN/A$' "$_tmp_timing" | sort -t$'\t' -k4 -n || true + grep $'\tN/A$' "$_tmp_timing" | sort -t$'\t' -k1 || true + } | while IFS=$'\t' read -r case_key our_s gcc_s speedup; do + disp="${case_key##*/}" + if [[ "$speedup" == "N/A" ]]; then + log_plain "$(printf '%-40s %10s %10s %10s' "$disp" "${our_s}s" "N/A" "N/A")" + else + log_plain "$(printf '%-40s %10s %10s %9sx' "$disp" "${our_s}s" "${gcc_s}s" "$speedup")" + fi + done + + # 排序2:我方总用时降序 + log_plain "" + log_plain "--- [Sort 2] Our elapsed time descending (slowest first) ---" + log_plain "$(printf '%-40s %10s %10s %10s' 'case' 'our(s)' 'gcc(s)' 'speedup')" + log_plain "$(printf '%0.s-' {1..76})" + sort -t$'\t' -k2 -rn "$_tmp_timing" | \ + while IFS=$'\t' read -r case_key our_s gcc_s speedup; do + disp="${case_key##*/}" + if [[ "$speedup" == "N/A" ]]; then + log_plain "$(printf '%-40s %10s %10s %10s' "$disp" "${our_s}s" "N/A" "N/A")" + else + log_plain "$(printf '%-40s %10s %10s %9sx' "$disp" "${our_s}s" "${gcc_s}s" "$speedup")" + fi + done + + rm -f "$_tmp_timing" + + else + # 无基线:只输出总用时降序 + log_plain "" + log_plain "--- [Sort] Our elapsed time descending (slowest first) ---" + log_plain "$(printf '%-40s %10s' 'case' 'our(s)')" + log_plain "$(printf '%0.s-' {1..54})" + while IFS=$'\t' read -r case_key our_ns _; do + our_s=$(awk "BEGIN{printf \"%.5f\", $our_ns / 1000000000}") + printf '%s\t%s\n' "$case_key" "$our_s" + done < "$TIMING_TSV" | \ + sort -t$'\t' -k2 -rn | \ + while IFS=$'\t' read -r case_key our_s; do + disp="${case_key##*/}" + log_plain "$(printf '%-40s %10ss' "$disp" "$our_s")" + done + + log_plain "" + log_color "$CYAN" "Tip: run scripts/run_baseline.sh to compute GCC -O2 baseline for speedup analysis." + fi + + log_plain "" + log_plain "timing data saved to: $TIMING_TSV" +fi + +# ---------- 失败用例列表 ---------- + if [[ ${#FAIL_LIST[@]} -gt 0 ]]; then log_plain "failed cases:" for f in "${FAIL_LIST[@]}"; do diff --git a/scripts/run_baseline.sh b/scripts/run_baseline.sh new file mode 100755 index 0000000..c09ca2b --- /dev/null +++ b/scripts/run_baseline.sh @@ -0,0 +1,326 @@ +#!/usr/bin/env bash +# run_baseline.sh — 批量编译 GCC -O2 基线并保存汇编、输出与运行时间 +# +# 数据统一保存在 output/baseline/: +# gcc_timing.tsv — stemgcc_elapsed_s (所有脚本的共享数据源) +# .gcc.s — GCC -O2 AArch64 汇编(供 analyze_case.sh 对比) +# .gcc.out — GCC 程序实际输出 stdout+exit_code(供 analyze_case.sh 对比) +# +# 用法: +# run_baseline.sh [--update] [test_dir|file ...] +# +# --update 重新计算所有条目(默认跳过 gcc_timing.tsv 中已有的 stem) +# +# 若不指定测试目录/文件,自动扫描 test/test_case 和 test/class_test_case + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +BASELINE_DIR="$REPO_ROOT/output/baseline" +TIMING_TSV="$BASELINE_DIR/gcc_timing.tsv" + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +CYAN='\033[0;36m' +NC='\033[0m' + +UPDATE=false +TEST_DIRS=() +TEST_FILES=() + +while [[ $# -gt 0 ]]; do + case "$1" in + --update) UPDATE=true ;; + *) + if [[ -f "$1" ]]; then + TEST_FILES+=("$1") + else + TEST_DIRS+=("$1") + fi + ;; + esac + shift +done + +# ---------- 工具检查 ---------- + +for tool in aarch64-linux-gnu-gcc qemu-aarch64; do + if ! command -v "$tool" >/dev/null 2>&1; then + printf '%bERROR: required tool not found: %s%b\n' "$RED" "$tool" "$NC" >&2 + exit 1 + fi +done + +if [[ ! -x /usr/bin/time ]]; then + printf '%bERROR: /usr/bin/time not found%b\n' "$RED" "$NC" >&2 + exit 1 +fi + +mkdir -p "$BASELINE_DIR" + +# 是否已存在某 stem 的基线数据(直接查 TSV 文件,避免关联数组兼容性问题) +stem_is_cached() { + local key="$1" + [[ -f "$TIMING_TSV" ]] && grep -qF "${key} " "$TIMING_TSV" 2>/dev/null +} + +stem_cached_time() { + local key="$1" + awk -F'\t' -v s="$key" '$1==s{print $2; exit}' "$TIMING_TSV" 2>/dev/null || true +} + +# ---------- 测试用例发现 ---------- + +discover_default_test_dirs() { + local roots=( + "$REPO_ROOT/test/test_case" + "$REPO_ROOT/test/class_test_case" + ) + local root + for root in "${roots[@]}"; do + [[ -d "$root" ]] || continue + find "$root" -mindepth 1 -maxdepth 1 -type d -print0 + done | sort -z +} + +if [[ ${#TEST_DIRS[@]} -eq 0 && ${#TEST_FILES[@]} -eq 0 ]]; then + while IFS= read -r -d '' d; do + TEST_DIRS+=("$d") + done < <(discover_default_test_dirs) +fi + +# ---------- 计时工具 ---------- + +now_ns() { date +%s%N; } + +format_duration_ns() { + local ns="$1" + printf '%d.%05ds' "$((ns / 1000000000))" "$(((ns % 1000000000) / 10000))" +} + +# ---------- 处理单个用例 ---------- + +PASS=0 +SKIP=0 +FAIL=0 + +process_case() { + local sy_file="$1" + local base stem input_dir stdin_file + base="$(basename "$sy_file")" + stem="${base%.sy}" + input_dir="$(dirname "$sy_file")" + stdin_file="$input_dir/$stem.in" + + local rel + rel="$(realpath --relative-to="$REPO_ROOT" "$sy_file")" + + # 路径键:去掉 test/ 前缀和 .sy 后缀,保留完整目录结构 + # 例:test/class_test_case/h_functional/11_BST.sy → class_test_case/h_functional/11_BST + local case_key + case_key="${rel#test/}" + case_key="${case_key%.sy}" + + local case_start_ns + case_start_ns=$(now_ns) + + # 已有数据且不强制更新 → 跳过 + if [[ "$UPDATE" == false ]] && stem_is_cached "$case_key"; then + printf '%b SKIP %s (cached: %ss)%b\n' \ + "$CYAN" "$rel" "$(stem_cached_time "$case_key")" "$NC" + SKIP=$((SKIP + 1)) + return 0 + fi + + # 输出目录镜像源路径结构 + local case_out_dir + case_out_dir="$BASELINE_DIR/$(dirname "$case_key")" + mkdir -p "$case_out_dir" + + local gcc_elf gcc_asm gcc_out gcc_err + gcc_elf="$case_out_dir/$stem.gcc.elf" + gcc_asm="$case_out_dir/$stem.gcc.s" + gcc_out="$case_out_dir/$stem.gcc.out" + gcc_err="$case_out_dir/$stem.gcc.err" + + # 预处理:把 "const int NAME = EXPR;" 转为 "#define NAME ((int)(EXPR))" + # 同时处理多声明符:const int A=1, B=2; → #define A ((int)(1))\n#define B ((int)(2)) + # 原因:SysY const int 是编译期常量,C 模式下不能用于全局数组维度,#define 可以 + local tmp_sy + tmp_sy="$(mktemp /tmp/sysy_XXXXXX.c)" + python3 - "$sy_file" "$tmp_sy" << 'PYEOF' +import re, sys +pat = re.compile( + r'^(\s*)const\s+int\s+((?:[A-Za-z_]\w*\s*=\s*[^,;]+)(?:,\s*[A-Za-z_]\w*\s*=\s*[^,;]+)*)\s*;', + re.MULTILINE +) +def replace(m): + indent = m.group(1) + decls = re.split(r',\s*(?=[A-Za-z_])', m.group(2)) + lines = [] + for d in decls: + name, _, val = d.partition('=') + lines.append(f'{indent}#define {name.strip()} ((int)({val.strip()}))') + return '\n'.join(lines) +with open(sys.argv[1]) as f: + src = f.read() +with open(sys.argv[2], 'w') as f: + f.write(pat.sub(replace, src)) +PYEOF + + # 步骤1:编译链接(C 模式,用于运行计时) + # -x c:允许 delete/new/class 等作为标识符 + # -include sylib.h:强制注入 SysY 运行时声明(.sy 无 #include) + # 无名称修饰,直接链接同为 C 编译的 sylib.o + if ! aarch64-linux-gnu-gcc -O2 \ + -x c -include "$REPO_ROOT/sylib/sylib.h" \ + -I "$REPO_ROOT/sylib" \ + "$tmp_sy" -x none "$SYLIB_OBJ" \ + -lm -o "$gcc_elf" > "$gcc_err" 2>&1; then + rm -f "$tmp_sy" + printf '%b FAIL %s (GCC compile error — see %s)%b\n' \ + "$RED" "$rel" "$gcc_err" "$NC" + FAIL=$((FAIL + 1)) + return 0 + fi + + # 步骤2:生成汇编(单独 -S,仅针对 .sy 文件本身) + aarch64-linux-gnu-gcc -O2 \ + -x c -include "$REPO_ROOT/sylib/sylib.h" \ + -I "$REPO_ROOT/sylib" \ + "$tmp_sy" -S -o "$gcc_asm" 2>/dev/null || true + + rm -f "$tmp_sy" + + # 步骤3:运行并计时(手动 ns 计时,精度 5 位小数) + local stdout_file="$case_out_dir/$stem.gcc.stdout" + local status=0 + local timeout_sec=60 + [[ "$sy_file" == *"/performance/"* || "$sy_file" == *"/h_performance/"* ]] && timeout_sec=300 + + local run_start_ns run_end_ns run_elapsed_ns + run_start_ns=$(now_ns) + set +e + if [[ -f "$stdin_file" ]]; then + timeout "$timeout_sec" \ + qemu-aarch64 -L /usr/aarch64-linux-gnu "$gcc_elf" \ + < "$stdin_file" > "$stdout_file" 2>/dev/null + else + timeout "$timeout_sec" \ + qemu-aarch64 -L /usr/aarch64-linux-gnu "$gcc_elf" \ + > "$stdout_file" 2>/dev/null + fi + status=$? + run_end_ns=$(now_ns) + run_elapsed_ns=$((run_end_ns - run_start_ns)) + set -e + + # 删除可执行(节省空间,数据已提取完毕) + rm -f "$gcc_elf" + + if [[ $status -eq 124 ]]; then + printf '%b TIMEOUT %s (>%ds)%b\n' "$YELLOW" "$rel" "$timeout_sec" "$NC" + rm -f "$stdout_file" + FAIL=$((FAIL + 1)) + return 0 + fi + + # 步骤4:保存输出文件(stdout + exit_code,与 verify_asm.sh 格式一致) + { + cat "$stdout_file" + if [[ -s "$stdout_file" ]] && (( $(tail -c 1 "$stdout_file" | wc -l) == 0 )); then + printf '\n' + fi + printf '%s\n' "$status" + } > "$gcc_out" + rm -f "$stdout_file" + + # 步骤5:计算耗时(5 位小数秒)并写入 TSV + local elapsed + elapsed=$(awk "BEGIN{printf \"%.5f\", $run_elapsed_ns / 1000000000}") + + # 更新 TSV(若已有该 case_key 的旧行则先删除再追加) + if grep -qF "${case_key} " "$TIMING_TSV" 2>/dev/null; then + local _tmp="$TIMING_TSV.tmp" + grep -vF "${case_key} " "$TIMING_TSV" > "$_tmp" || true + mv "$_tmp" "$TIMING_TSV" + fi + printf '%s\t%s\n' "$case_key" "$elapsed" >> "$TIMING_TSV" + + local case_end_ns duration_ns + case_end_ns=$(now_ns) + duration_ns=$((case_end_ns - case_start_ns)) + + printf '%b DONE %s gcc=%ss [%s]%b\n' \ + "$GREEN" "$rel" "$elapsed" "$(format_duration_ns "$duration_ns")" "$NC" + PASS=$((PASS + 1)) +} + +# ---------- 初始化 ---------- + +if [[ "$UPDATE" == true ]]; then + printf '%b[--update] Clearing all existing baseline data.%b\n' "$YELLOW" "$NC" + : > "$TIMING_TSV" + find "$BASELINE_DIR" -maxdepth 1 \ + \( -name '*.gcc.s' -o -name '*.gcc.out' -o -name '*.gcc.time' -o -name '*.gcc.err' \) \ + -delete 2>/dev/null || true +else + [[ -f "$TIMING_TSV" ]] || : > "$TIMING_TSV" +fi + +printf '%bBaseline directory : %s%b\n' "$CYAN" "$BASELINE_DIR" "$NC" +printf '%bTiming TSV : %s%b\n' "$CYAN" "$TIMING_TSV" "$NC" +if [[ "$UPDATE" == false && -f "$TIMING_TSV" ]]; then + _cached_count=$(wc -l < "$TIMING_TSV" 2>/dev/null || echo 0) + if [[ $_cached_count -gt 0 ]]; then + printf 'Found %d cached entries (use --update to recompute all).\n' "$_cached_count" + fi +fi + +# ---------- 预编译 sylib.o(C 模式,仅一次)---------- + +SYLIB_OBJ="$BASELINE_DIR/sylib.o" +if ! aarch64-linux-gnu-gcc -O2 -c -x c "$REPO_ROOT/sylib/sylib.c" \ + -I "$REPO_ROOT/sylib" -o "$SYLIB_OBJ" 2>/dev/null; then + printf '%bERROR: failed to compile sylib.c%b\n' "$RED" "$NC" >&2 + exit 1 +fi +printf 'sylib.o compiled : %s\n' "$SYLIB_OBJ" + +printf '\n' + +TOTAL_START_NS=$(now_ns) + +# ---------- 运行 ---------- + +for sy_file in "${TEST_FILES[@]}"; do + process_case "$sy_file" +done + +for test_dir in "${TEST_DIRS[@]}"; do + if [[ ! -d "$test_dir" ]]; then + printf '%b SKIP missing dir: %s%b\n' "$YELLOW" "$test_dir" "$NC" + continue + fi + while IFS= read -r -d '' sy_file; do + process_case "$sy_file" + done < <(find "$test_dir" -maxdepth 1 -type f -name '*.sy' -print0 | sort -z) +done + +# ---------- 汇总 ---------- + +TOTAL_END_NS=$(now_ns) +TOTAL_ELAPSED_NS=$((TOTAL_END_NS - TOTAL_START_NS)) + +TOTAL_CASES=$((PASS + SKIP + FAIL)) +printf '\n' +printf 'Summary: %d DONE / %d SKIP (cached) / %d FAIL / total %d\n' \ + "$PASS" "$SKIP" "$FAIL" "$TOTAL_CASES" +printf 'Total elapsed : %s\n' "$(format_duration_ns "$TOTAL_ELAPSED_NS")" +printf 'Timing TSV : %s (%d entries)\n' \ + "$TIMING_TSV" "$(wc -l < "$TIMING_TSV" 2>/dev/null || echo 0)" + +[[ $FAIL -eq 0 ]] diff --git a/scripts/verify_asm.sh b/scripts/verify_asm.sh index c1b589e..7a9e6ce 100755 --- a/scripts/verify_asm.sh +++ b/scripts/verify_asm.sh @@ -4,8 +4,8 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" -if [[ $# -lt 1 || $# -gt 3 ]]; then - echo "usage: $0 input.sy [output_dir] [--run]" >&2 +if [[ $# -lt 1 || $# -gt 5 ]]; then + echo "usage: $0 input.sy [output_dir] [--run] [--timing-out file]" >&2 exit 1 fi @@ -13,6 +13,11 @@ input=$1 out_dir="$REPO_ROOT/test/test_result/asm" run_exec=false input_dir=$(dirname "$input") +timing_out="" +_compile_ns=0 +_run_ns=0 + +now_ns() { date +%s%N; } shift while [[ $# -gt 0 ]]; do @@ -20,6 +25,10 @@ while [[ $# -gt 0 ]]; do --run) run_exec=true ;; + --timing-out) + timing_out="$2" + shift + ;; *) out_dir="$1" ;; @@ -57,11 +66,13 @@ exe="$out_dir/$stem" stdin_file="$input_dir/$stem.in" expected_file="$input_dir/$stem.out" +_compile_start_ns=$(now_ns) "$compiler" --emit-asm "$input" > "$asm_file" echo "asm generated: $asm_file" aarch64-linux-gnu-gcc "$asm_file" "$REPO_ROOT/sylib/sylib.c" -O2 -o "$exe" echo "executable generated: $exe" +_compile_ns=$(($(now_ns) - _compile_start_ns)) if [[ "$run_exec" == true ]]; then if ! command -v qemu-aarch64 >/dev/null 2>&1; then @@ -77,6 +88,7 @@ if [[ "$run_exec" == true ]]; then fi set +e + _run_start_ns=$(now_ns) if command -v timeout >/dev/null 2>&1; then if [[ -f "$stdin_file" ]]; then timeout "$timeout_sec" qemu-aarch64 -L /usr/aarch64-linux-gnu "$exe" < "$stdin_file" > "$stdout_file" @@ -91,6 +103,7 @@ if [[ "$run_exec" == true ]]; then fi fi status=$? + _run_ns=$(($(now_ns) - _run_start_ns)) set -e if [[ $status -eq 124 ]]; then @@ -122,3 +135,7 @@ if [[ "$run_exec" == true ]]; then echo "expected output not found, skipped diff: $expected_file" fi fi + +if [[ -n "$timing_out" ]]; then + printf 'compile_ns=%s\nrun_ns=%s\n' "$_compile_ns" "$_run_ns" > "$timing_out" +fi