You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
nudt-compiler-cpp/scripts/compare_ra.sh

464 lines
16 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#!/usr/bin/env bash
# compare_ra.sh — 对比寄存器分配(新版)与旧版编译器的汇编质量和运行性能
#
# 用法:
# ./scripts/compare_ra.sh --old feature/mir # 对比 feature/mir 分支
# ./scripts/compare_ra.sh --old 70234dd # 对比指定 commit
# ./scripts/compare_ra.sh --old feature/mir --tests performance # 仅性能测试
# ./scripts/compare_ra.sh --old feature/mir --mode asm # 仅对比汇编
# ./scripts/compare_ra.sh --old path/to/old/compiler --no-build # 使用已构建的旧编译器
#
# 输出: 终端表格 + compare_result/ 目录下的详细文件
set -euo pipefail
# ========== 参数解析 ==========
OLD_REF=""
MODE="all" # asm | run | all
TEST_SET="all" # functional | performance | all
NO_BUILD=false
OLD_COMPILER_PATH=""
NEW_COMPILER_PATH="./build/bin/compiler"
WORKTREE_DIR=""
KEEP_WORKTREE=false
usage() {
echo "用法: $0 --old <branch|commit|path> [选项]"
echo ""
echo "必选:"
echo " --old <ref> 对比基线git 分支名、commit hash、或旧编译器路径"
echo ""
echo "可选:"
echo " --mode <mode> 对比模式: asm (仅汇编) | run (仅运行) | all (默认)"
echo " --tests <set> 测试集: functional | performance | all (默认)"
echo " --no-build 旧编译器已构建好,--old 指向编译器可执行文件路径"
echo " --keep-worktree 保留旧的 git worktree默认会删除"
echo ""
echo "示例:"
echo " $0 --old feature/mir # 对比 feature/mir 分支"
echo " $0 --old 70234dd # 对比特定 commit"
echo " $0 --old feature/mir --tests performance --mode run # 仅对比性能测试的运行时间"
echo " $0 --old /tmp/old-compiler --no-build # 使用预构建的旧编译器"
exit 1
}
while [[ $# -gt 0 ]]; do
case "$1" in
--old) OLD_REF="$2"; shift 2 ;;
--mode) MODE="$2"; shift 2 ;;
--tests) TEST_SET="$2"; shift 2 ;;
--no-build) NO_BUILD=true; shift ;;
--keep-worktree) KEEP_WORKTREE=true; shift ;;
*) echo "未知参数: $1"; usage ;;
esac
done
if [[ -z "$OLD_REF" ]]; then
echo "错误: 必须指定 --old"
usage
fi
# ========== 路径设置 ==========
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
RESULT_DIR="$PROJECT_DIR/compare_result"
OLD_BUILD_DIR=""
OLD_COMPILER=""
rm -rf "$RESULT_DIR"
mkdir -p "$RESULT_DIR"
# ========== 构建旧版编译器 ==========
setup_old_compiler() {
if [[ "$NO_BUILD" == true ]]; then
# 用户直接提供编译器路径
if [[ ! -x "$OLD_REF" ]]; then
echo "错误: 旧编译器路径不存在或不可执行: $OLD_REF"
exit 1
fi
OLD_COMPILER="$(realpath "$OLD_REF")"
echo "使用预构建旧编译器: $OLD_COMPILER"
return
fi
# 检查是否是 git ref
if ! git rev-parse --verify "$OLD_REF" >/dev/null 2>&1; then
echo "错误: '$OLD_REF' 不是有效的 git 引用(分支/commit或使用 --no-build 指定编译器路径"
exit 1
fi
WORKTREE_DIR="$PROJECT_DIR/.worktree-old-$(echo "$OLD_REF" | tr '/' '-')"
OLD_BUILD_DIR="$WORKTREE_DIR/build"
echo "=== 准备旧版编译器 ==="
echo "Git 引用: $OLD_REF"
echo "Worktree: $WORKTREE_DIR"
# 清理已有的 worktree
if [[ -d "$WORKTREE_DIR" ]]; then
echo "移除已有 worktree..."
git worktree remove --force "$WORKTREE_DIR" 2>/dev/null || rm -rf "$WORKTREE_DIR"
fi
git worktree add "$WORKTREE_DIR" "$OLD_REF"
echo "Worktree 已创建: $WORKTREE_DIR"
# 生成 ANTLR 语法分析器
echo "生成 ANTLR 语法分析器..."
java -jar "$WORKTREE_DIR/third_party/antlr-4.13.2-complete.jar" \
-Dlanguage=Cpp \
-visitor -no-listener \
-Xexact-output-dir \
-o "$OLD_BUILD_DIR/generated/antlr4" \
"$WORKTREE_DIR/src/antlr4/SysY.g4" > "$RESULT_DIR/build_old.log" 2>&1 || {
echo "错误: ANTLR 生成失败,日志:"
tail -20 "$RESULT_DIR/build_old.log"
exit 1
}
# 构建
echo "构建旧版编译器..."
cmake -S "$WORKTREE_DIR" -B "$OLD_BUILD_DIR" -DCMAKE_BUILD_TYPE=Release >> "$RESULT_DIR/build_old.log" 2>&1 || {
echo "错误: 旧版 cmake 配置失败,日志:"
tail -20 "$RESULT_DIR/build_old.log"
exit 1
}
cmake --build "$OLD_BUILD_DIR" -j"$(nproc 2>/dev/null || echo 4)" >> "$RESULT_DIR/build_old.log" 2>&1 || {
echo "错误: 旧版构建失败,日志:"
tail -30 "$RESULT_DIR/build_old.log"
exit 1
}
OLD_COMPILER="$OLD_BUILD_DIR/bin/compiler"
if [[ ! -x "$OLD_COMPILER" ]]; then
echo "错误: 旧编译器未生成: $OLD_COMPILER"
exit 1
fi
echo "旧编译器已构建: $OLD_COMPILER"
}
# ========== 确保新版编译器存在 ==========
setup_new_compiler() {
if [[ ! -x "$NEW_COMPILER_PATH" ]]; then
echo "错误: 新编译器不存在,请先构建: cmake -B build && cmake --build build -j"
exit 1
fi
NEW_COMPILER="$(realpath "$NEW_COMPILER_PATH")"
echo "新版编译器: $NEW_COMPILER"
}
# ========== 工具检查 ==========
check_tools() {
if [[ "$NO_BUILD" == false ]]; then
if ! command -v java >/dev/null 2>&1; then
echo "错误: 未找到 java构建时需要 ANTLR 生成语法分析器"
exit 1
fi
fi
if ! command -v aarch64-linux-gnu-gcc >/dev/null 2>&1; then
echo "警告: 未找到 aarch64-linux-gnu-gcc汇编模式可用但运行模式不可用"
fi
if [[ "$MODE" == "run" || "$MODE" == "all" ]]; then
if ! command -v qemu-aarch64 >/dev/null 2>&1; then
echo "错误: 未找到 qemu-aarch64无法运行测试"
echo " apt install qemu-user (Ubuntu/Debian)"
exit 1
fi
if ! command -v aarch64-linux-gnu-gcc >/dev/null 2>&1; then
echo "错误: 未找到 aarch64-linux-gnu-gcc无法链接可执行文件"
exit 1
fi
fi
}
# ========== 获取测试列表 ==========
get_tests() {
local test_dir="$PROJECT_DIR/test/test_case"
local tests=()
if [[ "$TEST_SET" == "functional" || "$TEST_SET" == "all" ]]; then
for f in "$test_dir/functional"/*.sy; do
[[ -f "$f" ]] && tests+=("$f")
done
fi
if [[ "$TEST_SET" == "performance" || "$TEST_SET" == "all" ]]; then
for f in "$test_dir/performance"/*.sy; do
[[ -f "$f" ]] && tests+=("$f")
done
fi
if [[ ${#tests[@]} -eq 0 ]]; then
echo "错误: 没有找到测试用例"
exit 1
fi
printf '%s\n' "${tests[@]}"
}
# ========== 汇编对比 ==========
compare_asm() {
local test_file="$1"
local stem; stem=$(basename "$test_file" .sy)
local test_dir; test_dir=$(dirname "$test_file")
local old_asm="$RESULT_DIR/asm/$stem.old.s"
local new_asm="$RESULT_DIR/asm/$stem.new.s"
mkdir -p "$RESULT_DIR/asm"
"$OLD_COMPILER" --emit-asm "$test_file" > "$old_asm" 2>/dev/null || {
echo "OLD_BUILD_FAIL" > "$RESULT_DIR/asm/$stem.result"
return
}
"$NEW_COMPILER" --emit-asm "$test_file" > "$new_asm" 2>/dev/null || {
echo "NEW_BUILD_FAIL" > "$RESULT_DIR/asm/$stem.result"
return
}
# 统计指令数(排除标签行、伪指令行、空行)
local old_inst new_inst old_mem new_mem old_branches new_branches
old_inst=$(grep -cE '^\s+\w+\s' "$old_asm" 2>/dev/null || echo 0)
new_inst=$(grep -cE '^\s+\w+\s' "$new_asm" 2>/dev/null || echo 0)
old_mem=$(grep -cE '\b(ldr|str|ldur|stur|ldp|stp)\b' "$old_asm" 2>/dev/null || echo 0)
new_mem=$(grep -cE '\b(ldr|str|ldur|stur|ldp|stp)\b' "$new_asm" 2>/dev/null || echo 0)
old_branches=$(grep -cE '\bb(|\.\w+)\s' "$old_asm" 2>/dev/null || echo 0)
new_branches=$(grep -cE '\bb(|\.\w+)\s' "$new_asm" 2>/dev/null || echo 0)
# 计算变化百分比
local inst_pct mem_pct
if [[ "$old_inst" -gt 0 ]]; then
inst_pct=$(echo "scale=1; ($new_inst - $old_inst) * 100 / $old_inst" | bc 2>/dev/null || echo "N/A")
else
inst_pct="N/A"
fi
if [[ "$old_mem" -gt 0 ]]; then
mem_pct=$(echo "scale=1; ($new_mem - $old_mem) * 100 / $old_mem" | bc 2>/dev/null || echo "N/A")
else
mem_pct="N/A"
fi
# 保存结果
echo "$stem $old_inst $new_inst $inst_pct $old_mem $new_mem $mem_pct $old_branches $new_branches" \
> "$RESULT_DIR/asm/$stem.result"
# 生成 diff
diff -u "$old_asm" "$new_asm" > "$RESULT_DIR/asm/$stem.diff" 2>/dev/null || true
}
# ========== 运行对比 ==========
compare_run() {
local test_file="$1"
local stem; stem=$(basename "$test_file" .sy)
local test_dir; test_dir=$(dirname "$test_file")
local old_exe="$RESULT_DIR/run/$stem.old"
local new_exe="$RESULT_DIR/run/$stem.new"
local old_out="$RESULT_DIR/run/$stem.old.out"
local new_out="$RESULT_DIR/run/$stem.new.out"
local stdin_file="$test_dir/$stem.in"
local expected_file="$test_dir/$stem.out"
mkdir -p "$RESULT_DIR/run"
# 生成旧版可执行文件
local old_asm="$RESULT_DIR/run/$stem.old.s"
"$OLD_COMPILER" --emit-asm "$test_file" > "$old_asm" 2>/dev/null || {
echo "OLD_COMPILE_FAIL" > "$RESULT_DIR/run/$stem.result"
return
}
aarch64-linux-gnu-gcc -no-pie "$old_asm" -L"$PROJECT_DIR/sylib" -lsysy -static -o "$old_exe" 2>/dev/null || {
echo "OLD_LINK_FAIL" > "$RESULT_DIR/run/$stem.result"
return
}
# 生成新版可执行文件
local new_asm="$RESULT_DIR/run/$stem.new.s"
"$NEW_COMPILER" --emit-asm "$test_file" > "$new_asm" 2>/dev/null || {
echo "NEW_COMPILE_FAIL" > "$RESULT_DIR/run/$stem.result"
return
}
aarch64-linux-gnu-gcc -no-pie "$new_asm" -L"$PROJECT_DIR/sylib" -lsysy -static -o "$new_exe" 2>/dev/null || {
echo "NEW_LINK_FAIL" > "$RESULT_DIR/run/$stem.result"
return
}
# 运行旧版
local old_time="N/A" old_rc="N/A"
set +eo pipefail
if [[ -f "$stdin_file" ]]; then
old_time=$( { time qemu-aarch64 -L /usr/aarch64-linux-gnu "$old_exe" < "$stdin_file" > "$old_out" 2>/dev/null; echo $? > "$old_out.rc"; } 2>&1 | grep real | awk '{print $2}' || echo "N/A")
else
old_time=$( { time qemu-aarch64 -L /usr/aarch64-linux-gnu "$old_exe" > "$old_out" 2>/dev/null; echo $? > "$old_out.rc"; } 2>&1 | grep real | awk '{print $2}' || echo "N/A")
fi
old_rc=$(cat "$old_out.rc" 2>/dev/null || echo "1")
# 运行新版
local new_time="N/A" new_rc="N/A"
if [[ -f "$stdin_file" ]]; then
new_time=$( { time qemu-aarch64 -L /usr/aarch64-linux-gnu "$new_exe" < "$stdin_file" > "$new_out" 2>/dev/null; echo $? > "$new_out.rc"; } 2>&1 | grep real | awk '{print $2}' || echo "N/A")
else
new_time=$( { time qemu-aarch64 -L /usr/aarch64-linux-gnu "$new_exe" > "$new_out" 2>/dev/null; echo $? > "$new_out.rc"; } 2>&1 | grep real | awk '{print $2}' || echo "N/A")
fi
new_rc=$(cat "$new_out.rc" 2>/dev/null || echo "1")
set -eo pipefail
# 构造实际输出(程序输出 + 退出码),与 verify_asm.sh 格式一致
local old_actual="$RESULT_DIR/run/$stem.old.actual"
local new_actual="$RESULT_DIR/run/$stem.new.actual"
{
cat "$old_out"
if [[ -s "$old_out" ]] && (( $(tail -c 1 "$old_out" | wc -l) == 0 )); then
printf '\n'
fi
printf '%s\n' "$old_rc"
} > "$old_actual"
{
cat "$new_out"
if [[ -s "$new_out" ]] && (( $(tail -c 1 "$new_out" | wc -l) == 0 )); then
printf '\n'
fi
printf '%s\n' "$new_rc"
} > "$new_actual"
# 检查输出匹配(与 expected 文件比较expected 格式为 stdout + exit_code
local old_match="N" new_match="N"
if [[ -f "$expected_file" ]]; then
diff -w -q "$old_actual" "$expected_file" >/dev/null 2>&1 && old_match="Y"
diff -w -q "$new_actual" "$expected_file" >/dev/null 2>&1 && new_match="Y"
fi
# 速度比
local speedup="N/A"
if [[ "$old_time" != "N/A" && "$new_time" != "N/A" ]]; then
local old_sec new_sec
old_sec=$(echo "$old_time" | sed 's/m/ /' | awk '{print $1 * 60 + $2}' 2>/dev/null || echo 0)
new_sec=$(echo "$new_time" | sed 's/m/ /' | awk '{print $1 * 60 + $2}' 2>/dev/null || echo 0)
if [[ "$(echo "$new_sec > 0" | bc -l 2>/dev/null)" == "1" ]]; then
speedup=$(echo "scale=2; $old_sec / $new_sec" | bc 2>/dev/null || echo "N/A")
fi
fi
echo "$stem $old_time $new_time $speedup $old_match $new_match $old_rc $new_rc" \
> "$RESULT_DIR/run/$stem.result"
}
# ========== 主流程 ==========
main() {
echo "============================================="
echo " 寄存器分配编译器对比"
echo " 旧版: $OLD_REF"
echo " 测试集: $TEST_SET"
echo " 模式: $MODE"
echo "============================================="
echo ""
check_tools
setup_old_compiler
setup_new_compiler
local tests
mapfile -t tests < <(get_tests)
local total=${#tests[@]}
echo "$total 个测试用例"
echo ""
# ========== 汇编对比 ==========
if [[ "$MODE" == "asm" || "$MODE" == "all" ]]; then
echo "=== 汇编质量对比 ==="
local count=0
for test_file in "${tests[@]}"; do
compare_asm "$test_file"
count=$((count + 1))
printf "\r 进度: %d/%d" "$count" "$total"
done
echo ""
# 输出汇编对比表
echo ""
printf "%-30s %8s %8s %8s %8s %8s %8s\n" \
"测试用例" "旧指令数" "新指令数" "变化%" "旧访存" "新访存" "变化%"
printf "%-30s %8s %8s %8s %8s %8s %8s\n" \
"------------------------------" "--------" "--------" "--------" "--------" "--------" "--------"
local total_old_inst=0 total_new_inst=0 total_old_mem=0 total_new_mem=0 valid_count=0
for f in "$RESULT_DIR/asm"/*.result; do
[[ -f "$f" ]] || continue
local result
result=$(cat "$f")
if [[ "$result" == *"FAIL"* ]]; then
printf "%-30s %8s\n" "$(basename "$f" .result)" "编译失败"
continue
fi
read -r stem old_inst new_inst inst_pct old_mem new_mem mem_pct _ _ <<< "$result"
printf "%-30s %8d %8d %7s%% %8d %8d %7s%%\n" \
"$stem" "$old_inst" "$new_inst" "$inst_pct" "$old_mem" "$new_mem" "$mem_pct"
total_old_inst=$((total_old_inst + old_inst))
total_new_inst=$((total_new_inst + new_inst))
total_old_mem=$((total_old_mem + old_mem))
total_new_mem=$((total_new_mem + new_mem))
valid_count=$((valid_count + 1))
done
if [[ "$valid_count" -gt 0 ]]; then
local avg_inst_pct avg_mem_pct
avg_inst_pct=$(echo "scale=1; ($total_new_inst - $total_old_inst) * 100 / $total_old_inst" | bc 2>/dev/null || echo "N/A")
avg_mem_pct=$(echo "scale=1; ($total_new_mem - $total_old_mem) * 100 / $total_old_mem" | bc 2>/dev/null || echo "N/A")
printf "%-30s %8d %8d %7s%% %8d %8d %7s%%\n" \
"--- 合计 ---" "$total_old_inst" "$total_new_inst" "$avg_inst_pct" \
"$total_old_mem" "$total_new_mem" "$avg_mem_pct"
fi
echo ""
echo "详细 diff 文件: $RESULT_DIR/asm/*.diff"
fi
# ========== 运行对比 ==========
if [[ "$MODE" == "run" || "$MODE" == "all" ]]; then
echo "=== 运行结果对比 ==="
local count=0
for test_file in "${tests[@]}"; do
compare_run "$test_file"
count=$((count + 1))
printf "\r 进度: %d/%d" "$count" "$total"
done
echo ""
echo ""
printf "%-30s %10s %10s %8s %6s %6s %8s %8s\n" \
"测试用例" "旧耗时" "新耗时" "加速比" "旧匹配" "新匹配" "旧退出码" "新退出码"
printf "%-30s %10s %10s %8s %6s %6s %8s %8s\n" \
"------------------------------" "----------" "----------" "--------" "------" "------" "--------" "--------"
local pass_old=0 pass_new=0 total_valid=0
for f in "$RESULT_DIR/run"/*.result; do
[[ -f "$f" ]] || continue
local result
result=$(cat "$f")
if [[ "$result" == *"FAIL"* ]]; then
printf "%-30s %10s\n" "$(basename "$f" .result)" "$result"
continue
fi
read -r stem old_time new_time speedup old_match new_match old_status new_status <<< "$result"
printf "%-30s %10s %10s %8s %6s %6s %8s %8s\n" \
"$stem" "$old_time" "$new_time" "$speedup" "$old_match" "$new_match" "$old_status" "$new_status"
[[ "$old_match" == "Y" ]] && pass_old=$((pass_old + 1))
[[ "$new_match" == "Y" ]] && pass_new=$((pass_new + 1))
total_valid=$((total_valid + 1))
done
if [[ "$total_valid" -gt 0 ]]; then
echo ""
printf "输出匹配率: 旧版 %d/%d, 新版 %d/%d\n" "$pass_old" "$total_valid" "$pass_new" "$total_valid"
fi
fi
# ========== 清理 ==========
if [[ -n "$WORKTREE_DIR" && "$KEEP_WORKTREE" == false ]]; then
echo ""
echo "清理 worktree..."
git worktree remove --force "$WORKTREE_DIR" 2>/dev/null || true
fi
echo ""
echo "对比结果保存在: $RESULT_DIR"
}
main