#!/usr/bin/env bash # analyze_case.sh — 单个 .sy 测试用例的全流程编译 + IR/汇编保存脚本 # 用于深度分析单个样例与 GCC 基线之间的差距。 # # 用法: # analyze_case.sh [output_dir] # # 输出目录(默认 output/analyze/_)中包含: # .ll — 我方编译器输出的 LLVM IR # .s — 我方编译器输出的 AArch64 汇编 # .elf — 我方编译链接后的可执行文件 # .gcc.s — GCC -O2 输出的 AArch64 汇编 # .gcc.elf — GCC -O2 链接后的可执行文件 # .our.time — 我方程序运行耗时(秒) # .gcc.time — GCC 程序运行耗时(秒) # .our.out — 我方程序实际输出 # .gcc.out — GCC 程序实际输出 # .diff — 输出 diff(若有差异) # report.txt — 汇总报告(IR 行数、汇编行数、耗时、加速比) set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' CYAN='\033[0;36m' BOLD='\033[1m' NC='\033[0m' # ---------- 参数解析 ---------- if [[ $# -lt 1 || $# -gt 2 ]]; then printf 'usage: %s [output_dir]\n' "$0" >&2 exit 1 fi INPUT="$1" if [[ ! -f "$INPUT" ]]; then printf 'input file not found: %s\n' "$INPUT" >&2 exit 1 fi BASE="$(basename "$INPUT")" STEM="${BASE%.sy}" INPUT_DIR="$(dirname "$(realpath "$INPUT")")" TIMESTAMP="$(date +%Y%m%d_%H%M%S)" # 与 run_baseline.sh 一致的路径键:去掉 test/ 前缀和 .sy 后缀 REL="$(realpath --relative-to="$REPO_ROOT" "$INPUT" 2>/dev/null || echo "$INPUT")" CASE_KEY="${REL#test/}" CASE_KEY="${CASE_KEY%.sy}" if [[ $# -ge 2 ]]; then OUT_DIR="$2" else OUT_DIR="$REPO_ROOT/output/analyze/${STEM}_${TIMESTAMP}" fi mkdir -p "$OUT_DIR" REPORT="$OUT_DIR/report.txt" : > "$REPORT" rpt() { printf '%s\n' "$*" | tee -a "$REPORT" } rpt_color() { local color="$1"; shift printf '%b%s%b\n' "$color" "$*" "$NC" printf '%s\n' "$*" >> "$REPORT" } rpt "============================================================" rpt " analyze_case report" rpt " case : $STEM" rpt " source : $INPUT" rpt " output : $OUT_DIR" rpt " date : $(date)" rpt "============================================================" rpt "" # ---------- 查找编译器 ---------- COMPILER="" for candidate in \ "$REPO_ROOT/build_lab3/bin/compiler" \ "$REPO_ROOT/build_lab2/bin/compiler" \ "$REPO_ROOT/build/bin/compiler"; do if [[ -x "$candidate" ]]; then COMPILER="$candidate" break fi done if [[ -z "$COMPILER" ]]; then rpt_color "$RED" "ERROR: compiler not found. Build first:" rpt " cmake -S $REPO_ROOT -B $REPO_ROOT/build_lab3 && cmake --build $REPO_ROOT/build_lab3 -j" exit 1 fi rpt "compiler : $COMPILER" # ---------- 工具检查 ---------- for tool in aarch64-linux-gnu-gcc qemu-aarch64; do if ! command -v "$tool" >/dev/null 2>&1; then rpt_color "$RED" "ERROR: required tool not found: $tool" exit 1 fi done STDIN_FILE="$INPUT_DIR/$STEM.in" EXPECTED_FILE="$INPUT_DIR/$STEM.out" # ---------- 1. 生成 IR ---------- rpt "" rpt "--- [1/5] Generating LLVM IR ---" IR_FILE="$OUT_DIR/$STEM.ll" if "$COMPILER" --emit-ir "$INPUT" > "$IR_FILE" 2>"$OUT_DIR/$STEM.ir.err"; then IR_LINES=$(wc -l < "$IR_FILE") rpt_color "$GREEN" "IR generated: $IR_FILE ($IR_LINES lines)" else rpt_color "$RED" "ERROR: IR generation failed" cat "$OUT_DIR/$STEM.ir.err" >&2 exit 1 fi # ---------- 2. 生成我方汇编并链接 ---------- rpt "" rpt "--- [2/5] Generating our ASM & linking ---" OUR_ASM="$OUT_DIR/$STEM.s" OUR_ELF="$OUT_DIR/$STEM.elf" if "$COMPILER" --emit-asm "$INPUT" > "$OUR_ASM" 2>"$OUT_DIR/$STEM.asm.err"; then OUR_ASM_LINES=$(wc -l < "$OUR_ASM") rpt_color "$GREEN" "ASM generated: $OUR_ASM ($OUR_ASM_LINES lines)" else rpt_color "$RED" "ERROR: ASM generation failed" cat "$OUT_DIR/$STEM.asm.err" >&2 exit 1 fi if aarch64-linux-gnu-gcc "$OUR_ASM" "$REPO_ROOT/sylib/sylib.c" -O2 \ -I "$REPO_ROOT/sylib" -lm -o "$OUR_ELF" 2>"$OUT_DIR/$STEM.link.err"; then rpt_color "$GREEN" "Linked: $OUR_ELF" else rpt_color "$RED" "ERROR: link failed" cat "$OUT_DIR/$STEM.link.err" >&2 exit 1 fi # ---------- 3. GCC -O2 基线(从预计算数据读取)---------- rpt "" rpt "--- [3/5] GCC -O2 baseline (reading from pre-computed data) ---" BASELINE_DATA_DIR="$REPO_ROOT/output/baseline" BASELINE_TSV_PATH="$BASELINE_DATA_DIR/gcc_timing.tsv" GCC_ASM="$OUT_DIR/$STEM.gcc.s" GCC_OUT="$OUT_DIR/$STEM.gcc.out" GCC_OK=false GCC_ASM_LINES=0 GCC_ELAPSED_RAW="" # 秒,无 s 后缀 if [[ -f "$BASELINE_TSV_PATH" ]]; then GCC_ELAPSED_RAW=$(awk -F'\t' -v s="$CASE_KEY" '$1==s{v=$2} END{if(v!="") print v}' \ "$BASELINE_TSV_PATH" 2>/dev/null || true) if [[ -n "$GCC_ELAPSED_RAW" ]]; then GCC_OK=true rpt_color "$GREEN" "baseline timing: ${GCC_ELAPSED_RAW}s" else rpt_color "$YELLOW" "WARNING: no baseline entry for '$CASE_KEY'" rpt " Run: scripts/run_baseline.sh" fi # 复制汇编文件(路径镜像结构) local_gcc_asm="$BASELINE_DATA_DIR/${CASE_KEY}.gcc.s" if [[ -f "$local_gcc_asm" ]]; then cp "$local_gcc_asm" "$GCC_ASM" GCC_ASM_LINES=$(wc -l < "$GCC_ASM") rpt "GCC ASM: $GCC_ASM ($GCC_ASM_LINES lines)" else rpt_color "$YELLOW" "GCC ASM not found in baseline dir: $local_gcc_asm" fi # 复制输出文件(供步骤5 diff) local_gcc_out="$BASELINE_DATA_DIR/${CASE_KEY}.gcc.out" if [[ -f "$local_gcc_out" ]]; then cp "$local_gcc_out" "$GCC_OUT" rpt "GCC output: $GCC_OUT" fi else rpt_color "$YELLOW" "WARNING: baseline data not found: $BASELINE_TSV_PATH" rpt " Run: scripts/run_baseline.sh" rpt " to pre-compute GCC -O2 baseline for all test cases." fi # ---------- 4. 运行并计时(仅我方编译器)---------- rpt "" rpt "--- [4/5] Running & timing (our compiler) ---" run_and_time() { local label="$1" local exe="$2" local out_file="$3" local timeout_sec="${4:-60}" local stdout_file="$out_file.raw" local status=0 local _t0 _t1 _ns _t0=$(date +%s%N) set +e if [[ -f "$STDIN_FILE" ]]; then timeout "$timeout_sec" \ qemu-aarch64 -L /usr/aarch64-linux-gnu "$exe" \ < "$STDIN_FILE" > "$stdout_file" 2>/dev/null else timeout "$timeout_sec" \ qemu-aarch64 -L /usr/aarch64-linux-gnu "$exe" \ > "$stdout_file" 2>/dev/null fi status=$? _t1=$(date +%s%N) _ns=$((_t1 - _t0)) set -e # 将 stdout + exit_code 合并为 .out(与 verify_asm.sh 格式一致) { cat "$stdout_file" if [[ -s "$stdout_file" ]] && (( $(tail -c 1 "$stdout_file" | wc -l) == 0 )); then printf '\n' fi printf '%s\n' "$status" } > "$out_file" rm -f "$stdout_file" local elapsed if [[ $status -eq 124 ]]; then elapsed="timeout" rpt_color "$YELLOW" "$label: TIMEOUT (>${timeout_sec}s)" >&2 else elapsed=$(awk "BEGIN{printf \"%.5f\", $_ns / 1000000000}") if [[ $status -ne 0 ]]; then rpt_color "$YELLOW" "$label: exit $status elapsed=${elapsed}s" >&2 else rpt_color "$GREEN" "$label: OK elapsed=${elapsed}s" >&2 fi fi echo "$elapsed" } OUR_OUT="$OUT_DIR/$STEM.our.out" TIMEOUT_SEC=60 [[ "$INPUT" == *"/performance/"* || "$INPUT" == *"/h_performance/"* ]] && TIMEOUT_SEC=300 OUR_ELAPSED=$(run_and_time "our compiler" "$OUR_ELF" "$OUR_OUT" "$TIMEOUT_SEC") # GCC 耗时直接读取基线数据,不重新运行 GCC_ELAPSED="N/A" if [[ "$GCC_OK" == true && -n "$GCC_ELAPSED_RAW" ]]; then GCC_ELAPSED="${GCC_ELAPSED_RAW}s" rpt_color "$GREEN" "gcc -O2: ${GCC_ELAPSED} (from pre-computed baseline)" fi # ---------- 5. 输出对比 ---------- rpt "" rpt "--- [5/5] Output comparison ---" normalize_out() { awk '{ sub(/\r$/, ""); print }' "$1" } if [[ -f "$EXPECTED_FILE" ]]; then DIFF_FILE="$OUT_DIR/$STEM.diff" if diff <(normalize_out "$EXPECTED_FILE") <(normalize_out "$OUR_OUT") > "$DIFF_FILE" 2>&1; then rpt_color "$GREEN" "our output: MATCH expected" rm -f "$DIFF_FILE" else rpt_color "$RED" "our output: MISMATCH — diff saved to $DIFF_FILE" fi if [[ "$GCC_OK" == true && -f "$GCC_OUT" ]]; then GCC_DIFF_FILE="$OUT_DIR/$STEM.gcc.diff" if diff <(normalize_out "$EXPECTED_FILE") <(normalize_out "$GCC_OUT") > "$GCC_DIFF_FILE" 2>&1; then rpt_color "$GREEN" "gcc output: MATCH expected" rm -f "$GCC_DIFF_FILE" else rpt_color "$YELLOW" "gcc output: MISMATCH — diff saved to $GCC_DIFF_FILE" fi fi else rpt_color "$YELLOW" "no expected output file found, skipping diff" fi # ---------- 汇总报告 ---------- rpt "" rpt "============================================================" rpt_color "$BOLD" " Summary" rpt "============================================================" rpt "$(printf '%-20s %s' 'IR lines:' "$IR_LINES")" rpt "$(printf '%-20s %s' 'Our ASM lines:' "$OUR_ASM_LINES")" if [[ "$GCC_OK" == true && $GCC_ASM_LINES -gt 0 ]]; then rpt "$(printf '%-20s %s' 'GCC ASM lines:' "$GCC_ASM_LINES")" rpt "$(printf '%-20s %s' 'ASM ratio (ours/gcc):' \ "$(awk "BEGIN{if($GCC_ASM_LINES>0) printf \"%.2f\", $OUR_ASM_LINES/$GCC_ASM_LINES; else print \"N/A\"}")")" fi rpt "$(printf '%-20s %s' 'Our time:' "$OUR_ELAPSED")" rpt "$(printf '%-20s %s' 'GCC time:' "$GCC_ELAPSED")" if [[ "$GCC_ELAPSED" != "N/A" && "$GCC_ELAPSED" != "timeout" && "$OUR_ELAPSED" != "timeout" ]]; then OUR_S="${OUR_ELAPSED%s}" GCC_S="${GCC_ELAPSED%s}" SPEEDUP=$(awk "BEGIN{if($OUR_S>0) printf \"%.5f\", $GCC_S/$OUR_S; else print \"inf\"}") rpt "$(printf '%-20s %sx' 'Speedup (gcc/ours):' "$SPEEDUP")" fi rpt "" rpt "Output directory: $OUT_DIR" rpt "============================================================" printf '\n%bReport saved to: %s%b\n' "$CYAN" "$REPORT" "$NC"