You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
nudt-compiler-cpp/scripts/analyze_case.sh

325 lines
9.7 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#!/usr/bin/env bash
# analyze_case.sh — 单个 .sy 测试用例的全流程编译 + IR/汇编保存脚本
# 用于深度分析单个样例与 GCC 基线之间的差距。
#
# 用法:
# analyze_case.sh <input.sy> [output_dir]
#
# 输出目录(默认 output/analyze/<stem>_<timestamp>)中包含:
# <stem>.ll — 我方编译器输出的 LLVM IR
# <stem>.s — 我方编译器输出的 AArch64 汇编
# <stem>.elf — 我方编译链接后的可执行文件
# <stem>.gcc.s — GCC -O2 输出的 AArch64 汇编
# <stem>.gcc.elf — GCC -O2 链接后的可执行文件
# <stem>.our.time — 我方程序运行耗时(秒)
# <stem>.gcc.time — GCC 程序运行耗时(秒)
# <stem>.our.out — 我方程序实际输出
# <stem>.gcc.out — GCC 程序实际输出
# <stem>.diff — 输出 diff若有差异
# report.txt — 汇总报告IR 行数、汇编行数、耗时、加速比)
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
BOLD='\033[1m'
NC='\033[0m'
# ---------- 参数解析 ----------
if [[ $# -lt 1 || $# -gt 2 ]]; then
printf 'usage: %s <input.sy> [output_dir]\n' "$0" >&2
exit 1
fi
INPUT="$1"
if [[ ! -f "$INPUT" ]]; then
printf 'input file not found: %s\n' "$INPUT" >&2
exit 1
fi
BASE="$(basename "$INPUT")"
STEM="${BASE%.sy}"
INPUT_DIR="$(dirname "$(realpath "$INPUT")")"
TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
# 与 run_baseline.sh 一致的路径键:去掉 test/ 前缀和 .sy 后缀
REL="$(realpath --relative-to="$REPO_ROOT" "$INPUT" 2>/dev/null || echo "$INPUT")"
CASE_KEY="${REL#test/}"
CASE_KEY="${CASE_KEY%.sy}"
if [[ $# -ge 2 ]]; then
OUT_DIR="$2"
else
OUT_DIR="$REPO_ROOT/output/analyze/${STEM}_${TIMESTAMP}"
fi
mkdir -p "$OUT_DIR"
REPORT="$OUT_DIR/report.txt"
: > "$REPORT"
rpt() {
printf '%s\n' "$*" | tee -a "$REPORT"
}
rpt_color() {
local color="$1"; shift
printf '%b%s%b\n' "$color" "$*" "$NC"
printf '%s\n' "$*" >> "$REPORT"
}
rpt "============================================================"
rpt " analyze_case report"
rpt " case : $STEM"
rpt " source : $INPUT"
rpt " output : $OUT_DIR"
rpt " date : $(date)"
rpt "============================================================"
rpt ""
# ---------- 查找编译器 ----------
COMPILER=""
for candidate in \
"$REPO_ROOT/build_lab3/bin/compiler" \
"$REPO_ROOT/build_lab2/bin/compiler" \
"$REPO_ROOT/build/bin/compiler"; do
if [[ -x "$candidate" ]]; then
COMPILER="$candidate"
break
fi
done
if [[ -z "$COMPILER" ]]; then
rpt_color "$RED" "ERROR: compiler not found. Build first:"
rpt " cmake -S $REPO_ROOT -B $REPO_ROOT/build_lab3 && cmake --build $REPO_ROOT/build_lab3 -j"
exit 1
fi
rpt "compiler : $COMPILER"
# ---------- 工具检查 ----------
for tool in aarch64-linux-gnu-gcc qemu-aarch64; do
if ! command -v "$tool" >/dev/null 2>&1; then
rpt_color "$RED" "ERROR: required tool not found: $tool"
exit 1
fi
done
STDIN_FILE="$INPUT_DIR/$STEM.in"
EXPECTED_FILE="$INPUT_DIR/$STEM.out"
# ---------- 1. 生成 IR ----------
rpt ""
rpt "--- [1/5] Generating LLVM IR ---"
IR_FILE="$OUT_DIR/$STEM.ll"
if "$COMPILER" --emit-ir "$INPUT" > "$IR_FILE" 2>"$OUT_DIR/$STEM.ir.err"; then
IR_LINES=$(wc -l < "$IR_FILE")
rpt_color "$GREEN" "IR generated: $IR_FILE ($IR_LINES lines)"
else
rpt_color "$RED" "ERROR: IR generation failed"
cat "$OUT_DIR/$STEM.ir.err" >&2
exit 1
fi
# ---------- 2. 生成我方汇编并链接 ----------
rpt ""
rpt "--- [2/5] Generating our ASM & linking ---"
OUR_ASM="$OUT_DIR/$STEM.s"
OUR_ELF="$OUT_DIR/$STEM.elf"
if "$COMPILER" --emit-asm "$INPUT" > "$OUR_ASM" 2>"$OUT_DIR/$STEM.asm.err"; then
OUR_ASM_LINES=$(wc -l < "$OUR_ASM")
rpt_color "$GREEN" "ASM generated: $OUR_ASM ($OUR_ASM_LINES lines)"
else
rpt_color "$RED" "ERROR: ASM generation failed"
cat "$OUT_DIR/$STEM.asm.err" >&2
exit 1
fi
if aarch64-linux-gnu-gcc "$OUR_ASM" "$REPO_ROOT/sylib/sylib.c" -O2 \
-I "$REPO_ROOT/sylib" -lm -o "$OUR_ELF" 2>"$OUT_DIR/$STEM.link.err"; then
rpt_color "$GREEN" "Linked: $OUR_ELF"
else
rpt_color "$RED" "ERROR: link failed"
cat "$OUT_DIR/$STEM.link.err" >&2
exit 1
fi
# ---------- 3. GCC -O2 基线(从预计算数据读取)----------
rpt ""
rpt "--- [3/5] GCC -O2 baseline (reading from pre-computed data) ---"
BASELINE_DATA_DIR="$REPO_ROOT/output/baseline"
BASELINE_TSV_PATH="$BASELINE_DATA_DIR/gcc_timing.tsv"
GCC_ASM="$OUT_DIR/$STEM.gcc.s"
GCC_OUT="$OUT_DIR/$STEM.gcc.out"
GCC_OK=false
GCC_ASM_LINES=0
GCC_ELAPSED_RAW="" # 秒,无 s 后缀
if [[ -f "$BASELINE_TSV_PATH" ]]; then
GCC_ELAPSED_RAW=$(awk -F'\t' -v s="$CASE_KEY" '$1==s{v=$2} END{if(v!="") print v}' \
"$BASELINE_TSV_PATH" 2>/dev/null || true)
if [[ -n "$GCC_ELAPSED_RAW" ]]; then
GCC_OK=true
rpt_color "$GREEN" "baseline timing: ${GCC_ELAPSED_RAW}s"
else
rpt_color "$YELLOW" "WARNING: no baseline entry for '$CASE_KEY'"
rpt " Run: scripts/run_baseline.sh"
fi
# 复制汇编文件(路径镜像结构)
local_gcc_asm="$BASELINE_DATA_DIR/${CASE_KEY}.gcc.s"
if [[ -f "$local_gcc_asm" ]]; then
cp "$local_gcc_asm" "$GCC_ASM"
GCC_ASM_LINES=$(wc -l < "$GCC_ASM")
rpt "GCC ASM: $GCC_ASM ($GCC_ASM_LINES lines)"
else
rpt_color "$YELLOW" "GCC ASM not found in baseline dir: $local_gcc_asm"
fi
# 复制输出文件供步骤5 diff
local_gcc_out="$BASELINE_DATA_DIR/${CASE_KEY}.gcc.out"
if [[ -f "$local_gcc_out" ]]; then
cp "$local_gcc_out" "$GCC_OUT"
rpt "GCC output: $GCC_OUT"
fi
else
rpt_color "$YELLOW" "WARNING: baseline data not found: $BASELINE_TSV_PATH"
rpt " Run: scripts/run_baseline.sh"
rpt " to pre-compute GCC -O2 baseline for all test cases."
fi
# ---------- 4. 运行并计时(仅我方编译器)----------
rpt ""
rpt "--- [4/5] Running & timing (our compiler) ---"
run_and_time() {
local label="$1"
local exe="$2"
local out_file="$3"
local timeout_sec="${4:-60}"
local stdout_file="$out_file.raw"
local status=0
local _t0 _t1 _ns
_t0=$(date +%s%N)
set +e
if [[ -f "$STDIN_FILE" ]]; then
timeout "$timeout_sec" \
qemu-aarch64 -L /usr/aarch64-linux-gnu "$exe" \
< "$STDIN_FILE" > "$stdout_file" 2>/dev/null
else
timeout "$timeout_sec" \
qemu-aarch64 -L /usr/aarch64-linux-gnu "$exe" \
> "$stdout_file" 2>/dev/null
fi
status=$?
_t1=$(date +%s%N)
_ns=$((_t1 - _t0))
set -e
# 将 stdout + exit_code 合并为 .out与 verify_asm.sh 格式一致)
{
cat "$stdout_file"
if [[ -s "$stdout_file" ]] && (( $(tail -c 1 "$stdout_file" | wc -l) == 0 )); then
printf '\n'
fi
printf '%s\n' "$status"
} > "$out_file"
rm -f "$stdout_file"
local elapsed
if [[ $status -eq 124 ]]; then
elapsed="timeout"
rpt_color "$YELLOW" "$label: TIMEOUT (>${timeout_sec}s)" >&2
else
elapsed=$(awk "BEGIN{printf \"%.5f\", $_ns / 1000000000}")
if [[ $status -ne 0 ]]; then
rpt_color "$YELLOW" "$label: exit $status elapsed=${elapsed}s" >&2
else
rpt_color "$GREEN" "$label: OK elapsed=${elapsed}s" >&2
fi
fi
echo "$elapsed"
}
OUR_OUT="$OUT_DIR/$STEM.our.out"
TIMEOUT_SEC=60
[[ "$INPUT" == *"/performance/"* || "$INPUT" == *"/h_performance/"* ]] && TIMEOUT_SEC=300
OUR_ELAPSED=$(run_and_time "our compiler" "$OUR_ELF" "$OUR_OUT" "$TIMEOUT_SEC")
# GCC 耗时直接读取基线数据,不重新运行
GCC_ELAPSED="N/A"
if [[ "$GCC_OK" == true && -n "$GCC_ELAPSED_RAW" ]]; then
GCC_ELAPSED="${GCC_ELAPSED_RAW}s"
rpt_color "$GREEN" "gcc -O2: ${GCC_ELAPSED} (from pre-computed baseline)"
fi
# ---------- 5. 输出对比 ----------
rpt ""
rpt "--- [5/5] Output comparison ---"
normalize_out() {
awk '{ sub(/\r$/, ""); print }' "$1"
}
if [[ -f "$EXPECTED_FILE" ]]; then
DIFF_FILE="$OUT_DIR/$STEM.diff"
if diff <(normalize_out "$EXPECTED_FILE") <(normalize_out "$OUR_OUT") > "$DIFF_FILE" 2>&1; then
rpt_color "$GREEN" "our output: MATCH expected"
rm -f "$DIFF_FILE"
else
rpt_color "$RED" "our output: MISMATCH — diff saved to $DIFF_FILE"
fi
if [[ "$GCC_OK" == true && -f "$GCC_OUT" ]]; then
GCC_DIFF_FILE="$OUT_DIR/$STEM.gcc.diff"
if diff <(normalize_out "$EXPECTED_FILE") <(normalize_out "$GCC_OUT") > "$GCC_DIFF_FILE" 2>&1; then
rpt_color "$GREEN" "gcc output: MATCH expected"
rm -f "$GCC_DIFF_FILE"
else
rpt_color "$YELLOW" "gcc output: MISMATCH — diff saved to $GCC_DIFF_FILE"
fi
fi
else
rpt_color "$YELLOW" "no expected output file found, skipping diff"
fi
# ---------- 汇总报告 ----------
rpt ""
rpt "============================================================"
rpt_color "$BOLD" " Summary"
rpt "============================================================"
rpt "$(printf '%-20s %s' 'IR lines:' "$IR_LINES")"
rpt "$(printf '%-20s %s' 'Our ASM lines:' "$OUR_ASM_LINES")"
if [[ "$GCC_OK" == true && $GCC_ASM_LINES -gt 0 ]]; then
rpt "$(printf '%-20s %s' 'GCC ASM lines:' "$GCC_ASM_LINES")"
rpt "$(printf '%-20s %s' 'ASM ratio (ours/gcc):' \
"$(awk "BEGIN{if($GCC_ASM_LINES>0) printf \"%.2f\", $OUR_ASM_LINES/$GCC_ASM_LINES; else print \"N/A\"}")")"
fi
rpt "$(printf '%-20s %s' 'Our time:' "$OUR_ELAPSED")"
rpt "$(printf '%-20s %s' 'GCC time:' "$GCC_ELAPSED")"
if [[ "$GCC_ELAPSED" != "N/A" && "$GCC_ELAPSED" != "timeout" && "$OUR_ELAPSED" != "timeout" ]]; then
OUR_S="${OUR_ELAPSED%s}"
GCC_S="${GCC_ELAPSED%s}"
SPEEDUP=$(awk "BEGIN{if($OUR_S>0) printf \"%.5f\", $GCC_S/$OUR_S; else print \"inf\"}")
rpt "$(printf '%-20s %sx' 'Speedup (gcc/ours):' "$SPEEDUP")"
fi
rpt ""
rpt "Output directory: $OUT_DIR"
rpt "============================================================"
printf '\n%bReport saved to: %s%b\n' "$CYAN" "$REPORT" "$NC"