|
|
#!/usr/bin/env bash
|
|
|
# analyze_case.sh — 单个 .sy 测试用例的全流程编译 + IR/汇编保存脚本
|
|
|
# 用于深度分析单个样例与 GCC 基线之间的差距。
|
|
|
#
|
|
|
# 用法:
|
|
|
# analyze_case.sh <input.sy> [output_dir]
|
|
|
#
|
|
|
# 输出目录(默认 output/analyze/<stem>_<timestamp>)中包含:
|
|
|
# <stem>.ll — 我方编译器输出的 LLVM IR
|
|
|
# <stem>.s — 我方编译器输出的 AArch64 汇编
|
|
|
# <stem>.elf — 我方编译链接后的可执行文件
|
|
|
# <stem>.gcc.s — GCC -O2 输出的 AArch64 汇编
|
|
|
# <stem>.gcc.elf — GCC -O2 链接后的可执行文件
|
|
|
# <stem>.our.time — 我方程序运行耗时(秒)
|
|
|
# <stem>.gcc.time — GCC 程序运行耗时(秒)
|
|
|
# <stem>.our.out — 我方程序实际输出
|
|
|
# <stem>.gcc.out — GCC 程序实际输出
|
|
|
# <stem>.diff — 输出 diff(若有差异)
|
|
|
# report.txt — 汇总报告(IR 行数、汇编行数、耗时、加速比)
|
|
|
|
|
|
set -euo pipefail
|
|
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
|
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
|
|
|
|
RED='\033[0;31m'
|
|
|
GREEN='\033[0;32m'
|
|
|
YELLOW='\033[1;33m'
|
|
|
CYAN='\033[0;36m'
|
|
|
BOLD='\033[1m'
|
|
|
NC='\033[0m'
|
|
|
|
|
|
# ---------- 参数解析 ----------
|
|
|
|
|
|
if [[ $# -lt 1 || $# -gt 2 ]]; then
|
|
|
printf 'usage: %s <input.sy> [output_dir]\n' "$0" >&2
|
|
|
exit 1
|
|
|
fi
|
|
|
|
|
|
INPUT="$1"
|
|
|
if [[ ! -f "$INPUT" ]]; then
|
|
|
printf 'input file not found: %s\n' "$INPUT" >&2
|
|
|
exit 1
|
|
|
fi
|
|
|
|
|
|
BASE="$(basename "$INPUT")"
|
|
|
STEM="${BASE%.sy}"
|
|
|
INPUT_DIR="$(dirname "$(realpath "$INPUT")")"
|
|
|
TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
|
|
|
|
|
|
# 与 run_baseline.sh 一致的路径键:去掉 test/ 前缀和 .sy 后缀
|
|
|
REL="$(realpath --relative-to="$REPO_ROOT" "$INPUT" 2>/dev/null || echo "$INPUT")"
|
|
|
CASE_KEY="${REL#test/}"
|
|
|
CASE_KEY="${CASE_KEY%.sy}"
|
|
|
|
|
|
if [[ $# -ge 2 ]]; then
|
|
|
OUT_DIR="$2"
|
|
|
else
|
|
|
OUT_DIR="$REPO_ROOT/output/analyze/${STEM}_${TIMESTAMP}"
|
|
|
fi
|
|
|
|
|
|
mkdir -p "$OUT_DIR"
|
|
|
|
|
|
REPORT="$OUT_DIR/report.txt"
|
|
|
: > "$REPORT"
|
|
|
|
|
|
rpt() {
|
|
|
printf '%s\n' "$*" | tee -a "$REPORT"
|
|
|
}
|
|
|
|
|
|
rpt_color() {
|
|
|
local color="$1"; shift
|
|
|
printf '%b%s%b\n' "$color" "$*" "$NC"
|
|
|
printf '%s\n' "$*" >> "$REPORT"
|
|
|
}
|
|
|
|
|
|
rpt "============================================================"
|
|
|
rpt " analyze_case report"
|
|
|
rpt " case : $STEM"
|
|
|
rpt " source : $INPUT"
|
|
|
rpt " output : $OUT_DIR"
|
|
|
rpt " date : $(date)"
|
|
|
rpt "============================================================"
|
|
|
rpt ""
|
|
|
|
|
|
# ---------- 查找编译器 ----------
|
|
|
|
|
|
COMPILER=""
|
|
|
for candidate in \
|
|
|
"$REPO_ROOT/build_lab3/bin/compiler" \
|
|
|
"$REPO_ROOT/build_lab2/bin/compiler" \
|
|
|
"$REPO_ROOT/build/bin/compiler"; do
|
|
|
if [[ -x "$candidate" ]]; then
|
|
|
COMPILER="$candidate"
|
|
|
break
|
|
|
fi
|
|
|
done
|
|
|
|
|
|
if [[ -z "$COMPILER" ]]; then
|
|
|
rpt_color "$RED" "ERROR: compiler not found. Build first:"
|
|
|
rpt " cmake -S $REPO_ROOT -B $REPO_ROOT/build_lab3 && cmake --build $REPO_ROOT/build_lab3 -j"
|
|
|
exit 1
|
|
|
fi
|
|
|
rpt "compiler : $COMPILER"
|
|
|
|
|
|
# ---------- 工具检查 ----------
|
|
|
|
|
|
for tool in aarch64-linux-gnu-gcc qemu-aarch64; do
|
|
|
if ! command -v "$tool" >/dev/null 2>&1; then
|
|
|
rpt_color "$RED" "ERROR: required tool not found: $tool"
|
|
|
exit 1
|
|
|
fi
|
|
|
done
|
|
|
|
|
|
STDIN_FILE="$INPUT_DIR/$STEM.in"
|
|
|
EXPECTED_FILE="$INPUT_DIR/$STEM.out"
|
|
|
|
|
|
# ---------- 1. 生成 IR ----------
|
|
|
|
|
|
rpt ""
|
|
|
rpt "--- [1/5] Generating LLVM IR ---"
|
|
|
IR_FILE="$OUT_DIR/$STEM.ll"
|
|
|
if "$COMPILER" --emit-ir "$INPUT" > "$IR_FILE" 2>"$OUT_DIR/$STEM.ir.err"; then
|
|
|
IR_LINES=$(wc -l < "$IR_FILE")
|
|
|
rpt_color "$GREEN" "IR generated: $IR_FILE ($IR_LINES lines)"
|
|
|
else
|
|
|
rpt_color "$RED" "ERROR: IR generation failed"
|
|
|
cat "$OUT_DIR/$STEM.ir.err" >&2
|
|
|
exit 1
|
|
|
fi
|
|
|
|
|
|
# ---------- 2. 生成我方汇编并链接 ----------
|
|
|
|
|
|
rpt ""
|
|
|
rpt "--- [2/5] Generating our ASM & linking ---"
|
|
|
OUR_ASM="$OUT_DIR/$STEM.s"
|
|
|
OUR_ELF="$OUT_DIR/$STEM.elf"
|
|
|
if "$COMPILER" --emit-asm "$INPUT" > "$OUR_ASM" 2>"$OUT_DIR/$STEM.asm.err"; then
|
|
|
OUR_ASM_LINES=$(wc -l < "$OUR_ASM")
|
|
|
rpt_color "$GREEN" "ASM generated: $OUR_ASM ($OUR_ASM_LINES lines)"
|
|
|
else
|
|
|
rpt_color "$RED" "ERROR: ASM generation failed"
|
|
|
cat "$OUT_DIR/$STEM.asm.err" >&2
|
|
|
exit 1
|
|
|
fi
|
|
|
|
|
|
if aarch64-linux-gnu-gcc "$OUR_ASM" "$REPO_ROOT/sylib/sylib.c" -O2 \
|
|
|
-I "$REPO_ROOT/sylib" -lm -o "$OUR_ELF" 2>"$OUT_DIR/$STEM.link.err"; then
|
|
|
rpt_color "$GREEN" "Linked: $OUR_ELF"
|
|
|
else
|
|
|
rpt_color "$RED" "ERROR: link failed"
|
|
|
cat "$OUT_DIR/$STEM.link.err" >&2
|
|
|
exit 1
|
|
|
fi
|
|
|
|
|
|
# ---------- 3. GCC -O2 基线(从预计算数据读取)----------
|
|
|
|
|
|
rpt ""
|
|
|
rpt "--- [3/5] GCC -O2 baseline (reading from pre-computed data) ---"
|
|
|
|
|
|
BASELINE_DATA_DIR="$REPO_ROOT/output/baseline"
|
|
|
BASELINE_TSV_PATH="$BASELINE_DATA_DIR/gcc_timing.tsv"
|
|
|
GCC_ASM="$OUT_DIR/$STEM.gcc.s"
|
|
|
GCC_OUT="$OUT_DIR/$STEM.gcc.out"
|
|
|
GCC_OK=false
|
|
|
GCC_ASM_LINES=0
|
|
|
GCC_ELAPSED_RAW="" # 秒,无 s 后缀
|
|
|
|
|
|
if [[ -f "$BASELINE_TSV_PATH" ]]; then
|
|
|
GCC_ELAPSED_RAW=$(awk -F'\t' -v s="$CASE_KEY" '$1==s{v=$2} END{if(v!="") print v}' \
|
|
|
"$BASELINE_TSV_PATH" 2>/dev/null || true)
|
|
|
if [[ -n "$GCC_ELAPSED_RAW" ]]; then
|
|
|
GCC_OK=true
|
|
|
rpt_color "$GREEN" "baseline timing: ${GCC_ELAPSED_RAW}s"
|
|
|
else
|
|
|
rpt_color "$YELLOW" "WARNING: no baseline entry for '$CASE_KEY'"
|
|
|
rpt " Run: scripts/run_baseline.sh"
|
|
|
fi
|
|
|
# 复制汇编文件(路径镜像结构)
|
|
|
local_gcc_asm="$BASELINE_DATA_DIR/${CASE_KEY}.gcc.s"
|
|
|
if [[ -f "$local_gcc_asm" ]]; then
|
|
|
cp "$local_gcc_asm" "$GCC_ASM"
|
|
|
GCC_ASM_LINES=$(wc -l < "$GCC_ASM")
|
|
|
rpt "GCC ASM: $GCC_ASM ($GCC_ASM_LINES lines)"
|
|
|
else
|
|
|
rpt_color "$YELLOW" "GCC ASM not found in baseline dir: $local_gcc_asm"
|
|
|
fi
|
|
|
# 复制输出文件(供步骤5 diff)
|
|
|
local_gcc_out="$BASELINE_DATA_DIR/${CASE_KEY}.gcc.out"
|
|
|
if [[ -f "$local_gcc_out" ]]; then
|
|
|
cp "$local_gcc_out" "$GCC_OUT"
|
|
|
rpt "GCC output: $GCC_OUT"
|
|
|
fi
|
|
|
else
|
|
|
rpt_color "$YELLOW" "WARNING: baseline data not found: $BASELINE_TSV_PATH"
|
|
|
rpt " Run: scripts/run_baseline.sh"
|
|
|
rpt " to pre-compute GCC -O2 baseline for all test cases."
|
|
|
fi
|
|
|
|
|
|
# ---------- 4. 运行并计时(仅我方编译器)----------
|
|
|
|
|
|
rpt ""
|
|
|
rpt "--- [4/5] Running & timing (our compiler) ---"
|
|
|
|
|
|
run_and_time() {
|
|
|
local label="$1"
|
|
|
local exe="$2"
|
|
|
local out_file="$3"
|
|
|
local timeout_sec="${4:-60}"
|
|
|
local stdout_file="$out_file.raw"
|
|
|
local status=0
|
|
|
|
|
|
local _t0 _t1 _ns
|
|
|
_t0=$(date +%s%N)
|
|
|
set +e
|
|
|
if [[ -f "$STDIN_FILE" ]]; then
|
|
|
timeout "$timeout_sec" \
|
|
|
qemu-aarch64 -L /usr/aarch64-linux-gnu "$exe" \
|
|
|
< "$STDIN_FILE" > "$stdout_file" 2>/dev/null
|
|
|
else
|
|
|
timeout "$timeout_sec" \
|
|
|
qemu-aarch64 -L /usr/aarch64-linux-gnu "$exe" \
|
|
|
> "$stdout_file" 2>/dev/null
|
|
|
fi
|
|
|
status=$?
|
|
|
_t1=$(date +%s%N)
|
|
|
_ns=$((_t1 - _t0))
|
|
|
set -e
|
|
|
|
|
|
# 将 stdout + exit_code 合并为 .out(与 verify_asm.sh 格式一致)
|
|
|
{
|
|
|
cat "$stdout_file"
|
|
|
if [[ -s "$stdout_file" ]] && (( $(tail -c 1 "$stdout_file" | wc -l) == 0 )); then
|
|
|
printf '\n'
|
|
|
fi
|
|
|
printf '%s\n' "$status"
|
|
|
} > "$out_file"
|
|
|
rm -f "$stdout_file"
|
|
|
|
|
|
local elapsed
|
|
|
if [[ $status -eq 124 ]]; then
|
|
|
elapsed="timeout"
|
|
|
rpt_color "$YELLOW" "$label: TIMEOUT (>${timeout_sec}s)" >&2
|
|
|
else
|
|
|
elapsed=$(awk "BEGIN{printf \"%.5f\", $_ns / 1000000000}")
|
|
|
if [[ $status -ne 0 ]]; then
|
|
|
rpt_color "$YELLOW" "$label: exit $status elapsed=${elapsed}s" >&2
|
|
|
else
|
|
|
rpt_color "$GREEN" "$label: OK elapsed=${elapsed}s" >&2
|
|
|
fi
|
|
|
fi
|
|
|
echo "$elapsed"
|
|
|
}
|
|
|
|
|
|
OUR_OUT="$OUT_DIR/$STEM.our.out"
|
|
|
|
|
|
TIMEOUT_SEC=60
|
|
|
[[ "$INPUT" == *"/performance/"* || "$INPUT" == *"/h_performance/"* ]] && TIMEOUT_SEC=300
|
|
|
|
|
|
OUR_ELAPSED=$(run_and_time "our compiler" "$OUR_ELF" "$OUR_OUT" "$TIMEOUT_SEC")
|
|
|
|
|
|
# GCC 耗时直接读取基线数据,不重新运行
|
|
|
GCC_ELAPSED="N/A"
|
|
|
if [[ "$GCC_OK" == true && -n "$GCC_ELAPSED_RAW" ]]; then
|
|
|
GCC_ELAPSED="${GCC_ELAPSED_RAW}s"
|
|
|
rpt_color "$GREEN" "gcc -O2: ${GCC_ELAPSED} (from pre-computed baseline)"
|
|
|
fi
|
|
|
|
|
|
# ---------- 5. 输出对比 ----------
|
|
|
|
|
|
rpt ""
|
|
|
rpt "--- [5/5] Output comparison ---"
|
|
|
|
|
|
normalize_out() {
|
|
|
awk '{ sub(/\r$/, ""); print }' "$1"
|
|
|
}
|
|
|
|
|
|
if [[ -f "$EXPECTED_FILE" ]]; then
|
|
|
DIFF_FILE="$OUT_DIR/$STEM.diff"
|
|
|
if diff <(normalize_out "$EXPECTED_FILE") <(normalize_out "$OUR_OUT") > "$DIFF_FILE" 2>&1; then
|
|
|
rpt_color "$GREEN" "our output: MATCH expected"
|
|
|
rm -f "$DIFF_FILE"
|
|
|
else
|
|
|
rpt_color "$RED" "our output: MISMATCH — diff saved to $DIFF_FILE"
|
|
|
fi
|
|
|
if [[ "$GCC_OK" == true && -f "$GCC_OUT" ]]; then
|
|
|
GCC_DIFF_FILE="$OUT_DIR/$STEM.gcc.diff"
|
|
|
if diff <(normalize_out "$EXPECTED_FILE") <(normalize_out "$GCC_OUT") > "$GCC_DIFF_FILE" 2>&1; then
|
|
|
rpt_color "$GREEN" "gcc output: MATCH expected"
|
|
|
rm -f "$GCC_DIFF_FILE"
|
|
|
else
|
|
|
rpt_color "$YELLOW" "gcc output: MISMATCH — diff saved to $GCC_DIFF_FILE"
|
|
|
fi
|
|
|
fi
|
|
|
else
|
|
|
rpt_color "$YELLOW" "no expected output file found, skipping diff"
|
|
|
fi
|
|
|
|
|
|
# ---------- 汇总报告 ----------
|
|
|
|
|
|
rpt ""
|
|
|
rpt "============================================================"
|
|
|
rpt_color "$BOLD" " Summary"
|
|
|
rpt "============================================================"
|
|
|
rpt "$(printf '%-20s %s' 'IR lines:' "$IR_LINES")"
|
|
|
rpt "$(printf '%-20s %s' 'Our ASM lines:' "$OUR_ASM_LINES")"
|
|
|
if [[ "$GCC_OK" == true && $GCC_ASM_LINES -gt 0 ]]; then
|
|
|
rpt "$(printf '%-20s %s' 'GCC ASM lines:' "$GCC_ASM_LINES")"
|
|
|
rpt "$(printf '%-20s %s' 'ASM ratio (ours/gcc):' \
|
|
|
"$(awk "BEGIN{if($GCC_ASM_LINES>0) printf \"%.2f\", $OUR_ASM_LINES/$GCC_ASM_LINES; else print \"N/A\"}")")"
|
|
|
fi
|
|
|
rpt "$(printf '%-20s %s' 'Our time:' "$OUR_ELAPSED")"
|
|
|
rpt "$(printf '%-20s %s' 'GCC time:' "$GCC_ELAPSED")"
|
|
|
if [[ "$GCC_ELAPSED" != "N/A" && "$GCC_ELAPSED" != "timeout" && "$OUR_ELAPSED" != "timeout" ]]; then
|
|
|
OUR_S="${OUR_ELAPSED%s}"
|
|
|
GCC_S="${GCC_ELAPSED%s}"
|
|
|
SPEEDUP=$(awk "BEGIN{if($OUR_S>0) printf \"%.5f\", $GCC_S/$OUR_S; else print \"inf\"}")
|
|
|
rpt "$(printf '%-20s %sx' 'Speedup (gcc/ours):' "$SPEEDUP")"
|
|
|
fi
|
|
|
rpt ""
|
|
|
rpt "Output directory: $OUT_DIR"
|
|
|
rpt "============================================================"
|
|
|
|
|
|
printf '\n%bReport saved to: %s%b\n' "$CYAN" "$REPORT" "$NC"
|