You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
nudt-compiler-cpp/scripts/diff_test_llvm.sh

735 lines
23 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#!/usr/bin/env bash
# 差分测试:编译器 vs LLVM/clang支持正确性对比和性能对比
# 用法:
# ./scripts/diff_test_llvm.sh --baseline 生成 LLVM 正确性基线
# ./scripts/diff_test_llvm.sh --diff 正确性差分对比(输出是否一致)
# ./scripts/diff_test_llvm.sh --perf 性能对比(指令数)
# ./scripts/diff_test_llvm.sh --perf --llvm-opt 2 性能对比 vs clang -O2
# ./scripts/diff_test_llvm.sh --perf --save-asm 性能对比并保存 LLVM 汇编
# ./scripts/diff_test_llvm.sh --perf --llvm-opt 0 对比 clang -O0最低基线
set -euo pipefail
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
BOLD='\033[1m'
BLUE='\033[0;34m'
NC='\033[0m'
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
TEST_ROOT="${PROJECT_ROOT}/2026test" # 默认,可用 -d 覆盖
RESULTS_ROOT="$PROJECT_ROOT/2026test_results"
LLVM_BASELINE_DIR="$RESULTS_ROOT/llvm_baseline"
LLVM_ASM_DIR="$RESULTS_ROOT/llvm_asm"
RUNTIME_SRC="$PROJECT_ROOT/sylib/sylib.c"
RUNTIME_OBJ="$PROJECT_ROOT/build/test_runtime/sylib_llvm.o"
COMPILER="$PROJECT_ROOT/build/bin/compiler"
# 检测 clang优先用交叉编译器其次尝试系统 clang + target
CLANG=""
CLANG_TARGET="aarch64-linux-gnu"
for cand in aarch64-linux-gnu-clang clang; do
if command -v "$cand" >/dev/null 2>&1; then
CLANG="$cand"
break
fi
done
QEMU="qemu-aarch64"
CORRECTNESS_CATS=("functional" "h_functional")
PERF_CATS=("performance")
DO_BASELINE=false
DO_DIFF=false
DO_PERF=false
SAVE_ASM=false
LLVM_OPT_LEVEL=2
MAX_CASES=0
REPORT_FILE=""
JSON_FILE=""
JOBS=$(nproc 2>/dev/null || echo 4)
usage() {
cat <<'EOF'
用法: ./scripts/diff_test_llvm.sh [选项]
差分测试:对每个 .sy 用例,用编译器生成汇编和 LLVM/clang 编译后,
比较输出(正确性)或指令数(性能)。
模式(至少选一个):
--baseline 生成 LLVM 正确性基线(保存输出到 llvm_baseline/
--diff 正确性差分对比(编译器输出 vs LLVM 基线)
--perf 性能对比(编译器 vs LLVM 指令数及比值)
性能对比选项(与 --perf 配合):
--llvm-opt <N> clang 优化级别 0/1/2/3/s/z默认: 3
--save-asm 保存 LLVM 汇编到 llvm_asm/ 供人工分析
--perf-cats <cats> 性能对比覆盖的类别,逗号分隔
(默认: performance也可加 functional,h_functional
--report <file> 导出性能对比结果到 CSV 文件
--json <file> 导出性能对比结果到 JSON 文件
通用选项:
-n, --max N 最多运行 N 个用例 (0=不限制,默认: 0)
-d, --test-dir DIR 测试用例目录 (默认 2026test)
-j, --jobs N 并行任务数 (默认: nproc设为1恢复串行)
-h, --help 显示此帮助信息
输出目录:
2026test_results/llvm_baseline/ LLVM 正确性基线(输出 + 退出码)
2026test_results/llvm_asm/ LLVM 汇编(--save-asm 时保存)
EOF
}
# ============================================================
# 参数解析
# ============================================================
while [[ $# -gt 0 ]]; do
case "$1" in
--baseline) DO_BASELINE=true ;;
--diff) DO_DIFF=true ;;
--perf) DO_PERF=true ;;
--save-asm) SAVE_ASM=true ;;
--llvm-opt) LLVM_OPT_LEVEL="$2"; shift ;;
--perf-cats) IFS=',' read -ra PERF_CATS <<< "$2"; shift ;;
--report) REPORT_FILE="$2"; shift ;;
--json) JSON_FILE="$2"; shift ;;
-n|--max) MAX_CASES="$2"; shift ;;
-d|--test-dir) TEST_ROOT="$2"; shift ;;
-j|--jobs) JOBS="$2"; shift
if ! [[ "$JOBS" =~ ^[0-9]+$ ]] || [[ "$JOBS" -lt 1 ]]; then
echo "错误: --jobs 需要正整数"; exit 1
fi ;;
-h|--help) usage; exit 0 ;;
*) echo "未知选项: $1"; usage; exit 1 ;;
esac
shift
done
if [[ "$DO_BASELINE" == false && "$DO_DIFF" == false && "$DO_PERF" == false ]]; then
echo "错误: 至少需要 --baseline、--diff 或 --perf 之一"
usage
exit 1
fi
if ! [[ "$LLVM_OPT_LEVEL" =~ ^[0-3sSzZ]$ ]]; then
echo "错误: --llvm-opt 必须是 0/1/2/3/s/z"
exit 1
fi
# ============================================================
# 预检
# ============================================================
if [[ -z "$CLANG" ]]; then
echo "错误: 未找到 clang 或 aarch64-linux-gnu-clang"
echo " 安装: sudo apt install clang 或 sudo apt install clang-18"
exit 1
fi
# 检测 clang 是否需要 --target 标志
CLANG_FLAGS="-std=gnu89" # SysY 依赖隐式函数声明starttime/getint 等)
if [[ "$CLANG" != "aarch64-linux-gnu-clang" ]]; then
# 系统 clang需要指定交叉编译 target
CLANG_FLAGS="$CLANG_FLAGS --target=$CLANG_TARGET"
fi
command -v "$QEMU" >/dev/null 2>&1 || { echo "未找到 $QEMU"; exit 1; }
[[ -f "$RUNTIME_SRC" ]] || { echo "未找到 $RUNTIME_SRC"; exit 1; }
[[ -f "$COMPILER" ]] || { echo "未找到编译器 $COMPILER,请先构建"; exit 1; }
echo "LLVM 工具链: $CLANG $CLANG_FLAGS"
# 编译运行时库
mkdir -p "$(dirname "$RUNTIME_OBJ")"
if [[ ! -f "$RUNTIME_OBJ" || "$RUNTIME_SRC" -nt "$RUNTIME_OBJ" ]]; then
echo "编译运行时库 $RUNTIME_SRC$RUNTIME_OBJ ..."
$CLANG $CLANG_FLAGS -O2 -c "$RUNTIME_SRC" -o "$RUNTIME_OBJ"
fi
# ============================================================
# SysY → C 预处理(处理 clang 不支持的 SysY 语法)
# ============================================================
preprocess_for_clang() {
local src="$1"
local dst="$2"
python3 -c "
import re, sys
with open('$src') as f:
content = f.read()
# const int X = V → #define X V SysY 全局常量C 不支持作为数组大小)
content = re.sub(r'^const int (\w+) = ([^;]+);', r'#define \1 \2', content, flags=re.MULTILINE)
with open('$dst', 'w') as f:
f.write(content)
"
}
# ============================================================
# 规范化比较
# ============================================================
canon_compare() {
local expected="$1" actual="$2"
diff -q \
<(sed 's/\r$//; s/[[:space:]]*$//' "$expected" \
| awk '{lines[NR]=$0} END{last=NR; while(last>0&&lines[last]=="")last--; for(i=1;i<=last;i++)print lines[i]}') \
<(sed 's/\r$//; s/[[:space:]]*$//' "$actual" \
| awk '{lines[NR]=$0} END{last=NR; while(last>0&&lines[last]=="")last--; for(i=1;i<=last;i++)print lines[i]}') \
> /dev/null 2>&1
}
# 统计 AArch64 汇编中的实际指令行数(排除伪指令、标签、空行)
count_insn() {
local asm="$1"
grep -cE '^[[:space:]]+[a-z]' "$asm" 2>/dev/null || echo 0
}
# ============================================================
# 收集用例
# ============================================================
collect_cases() {
local cats=("$@")
local cases=()
for cat in "${cats[@]}"; do
local dir="$TEST_ROOT/$cat"
[[ -d "$dir" ]] || continue
for sy in "$dir"/*.sy; do
[[ -f "$sy" ]] || continue
cases+=("$sy")
done
done
printf '%s\n' "${cases[@]}" | sort
}
load_cases() {
local cats=("$@")
CASES=()
while IFS= read -r line; do
CASES+=("$line")
done < <(collect_cases "${cats[@]}")
if [[ "$MAX_CASES" -gt 0 && "$MAX_CASES" -lt "${#CASES[@]}" ]]; then
CASES=("${CASES[@]:0:$MAX_CASES}")
fi
}
# ============================================================
# 生成 LLVM 正确性基线(并行化)
# ============================================================
run_baseline_worker() {
local idx="$1" sy="$2"
local result_file="$3"
local dir=$(dirname "$sy")
local cat=$(basename "$dir")
local base=$(basename "$sy")
local stem=${base%.sy}
local out_dir="$LLVM_BASELINE_DIR/$cat"
local exe="$out_dir/$stem"
local actual_file="$out_dir/$stem.actual.out"
local stdin_file="$dir/$stem.in"
mkdir -p "$out_dir"
local clang_src=$(mktemp /tmp/clang_baseline_XXXX.sy)
preprocess_for_clang "$sy" "$clang_src"
local status="FAIL"
if $CLANG $CLANG_FLAGS -x c "$clang_src" -x none "$RUNTIME_OBJ" -static -o "$exe" -lm 2>/dev/null; then
rm -f "$clang_src"
local exit_code=0
set +e
if [[ -f "$stdin_file" ]]; then
timeout --signal=KILL 60 "$QEMU" "$exe" < "$stdin_file" > "$out_dir/$stem.stdout" 2>/dev/null || exit_code=$?
else
timeout --signal=KILL 60 "$QEMU" "$exe" < /dev/null > "$out_dir/$stem.stdout" 2>/dev/null || exit_code=$?
fi
set -e
{
cat "$out_dir/$stem.stdout"
if [[ -s "$out_dir/$stem.stdout" ]] && (( $(tail -c 1 "$out_dir/$stem.stdout" | wc -l) == 0 )); then
printf '\n'
fi
printf '%s\n' "$exit_code"
} > "$actual_file"
status="OK"
else
rm -f "$clang_src"
fi
printf 'STATUS=%s\nNAME=%s\n' "$status" "$stem" > "$result_file"
}
run_baseline() {
load_cases "${CORRECTNESS_CATS[@]}"
echo ""
echo "========== 生成 LLVM 正确性基线(${#CASES[@]} 用例)=========="
local res_dir="$LLVM_BASELINE_DIR/.results"
mkdir -p "$res_dir"
if [[ $JOBS -gt 1 && ${#CASES[@]} -gt 1 ]]; then
export LLVM_BASELINE_DIR RUNTIME_OBJ CLANG CLANG_FLAGS QEMU
export -f run_baseline_worker preprocess_for_clang
declare -a QUEUE=()
for i in "${!CASES[@]}"; do
QUEUE+=("$i|${CASES[$i]}")
done
printf '%s\n' "${QUEUE[@]}" | xargs -P "$JOBS" -L 1 bash -c '
IFS="|" read -r idx sy <<< "$1"
run_baseline_worker "$idx" "$sy" "'"$res_dir"'/$idx"
' _
else
for i in "${!CASES[@]}"; do
run_baseline_worker "$i" "${CASES[$i]}" "$res_dir/$i"
done
fi
# 汇总
local total=0 pass=0 fail=0
for i in "${!CASES[@]}"; do
total=$((total + 1))
if [[ -f "$res_dir/$i" ]]; then
local status name
status=$(grep '^STATUS=' "$res_dir/$i" | cut -d= -f2)
name=$(grep '^NAME=' "$res_dir/$i" | cut -d= -f2)
if [[ "$status" == "OK" ]]; then
pass=$((pass + 1))
printf " [${GREEN}OK${NC}] %-35s (%d/%d)\r" "$name" "$total" "${#CASES[@]}"
else
fail=$((fail + 1))
echo -e " [${RED}FAIL${NC}] $name (clang 编译失败)"
fi
else
fail=$((fail + 1))
fi
done
rm -rf "$res_dir"
printf '\n'
echo "基线完成: $pass/$total 成功"
if [[ $fail -gt 0 ]]; then
echo -e " ${YELLOW}$fail 个 clang 编译失败(可能使用了 clang 不支持的 SysY 语法)${NC}"
fi
}
# ============================================================
# 正确性差分对比
# ============================================================
run_diff() {
load_cases "${CORRECTNESS_CATS[@]}"
echo ""
echo "========== 正确性差分对比(${#CASES[@]} 用例)=========="
local total=0 match=0 mismatch=0 skip=0
for sy in "${CASES[@]}"; do
total=$((total + 1))
local dir=$(dirname "$sy")
local cat=$(basename "$dir")
local base=$(basename "$sy")
local stem=${base%.sy}
local compiler_out="$RESULTS_ROOT/$cat/$stem.actual.out"
local llvm_out="$LLVM_BASELINE_DIR/$cat/$stem.actual.out"
if [[ ! -f "$compiler_out" ]]; then
echo -e " [${YELLOW}SKIP${NC}] $stem (无编译器输出,先跑 2026test.sh)"
skip=$((skip + 1))
continue
fi
if [[ ! -f "$llvm_out" ]]; then
echo -e " [${YELLOW}SKIP${NC}] $stem (无 LLVM 基线,先跑 --baseline)"
skip=$((skip + 1))
continue
fi
if canon_compare "$compiler_out" "$llvm_out"; then
match=$((match + 1))
printf " [${GREEN}MATCH${NC}] %-35s (%d/%d)\r" "$stem" "$total" "${#CASES[@]}"
else
mismatch=$((mismatch + 1))
printf '\n'
echo -e " [${RED}MISMATCH${NC}] $stem"
echo " --- 编译器输出 ---"
cat "$compiler_out" | head -20 | sed 's/^/ | /'
echo " --- clang 输出 ---"
cat "$llvm_out" | head -20 | sed 's/^/ | /'
echo " --- diff ---"
diff -u <(cat "$compiler_out") <(cat "$llvm_out") | head -20 | sed 's/^/ | /' || true
echo ""
fi
done
printf '\n'
echo "========== 正确性差分结果 =========="
echo -e " 匹配: ${GREEN}$match${NC}"
echo -e " 不匹配: ${RED}$mismatch${NC}"
if [[ $skip -gt 0 ]]; then
echo -e " 跳过: ${YELLOW}$skip${NC}"
fi
if [[ $mismatch -eq 0 ]]; then
echo -e "\n${GREEN}全部匹配,编译器输出与 clang 一致${NC}"
fi
}
# ============================================================
# 性能对比(并行化)
# ============================================================
run_perf_worker() {
local idx="$1" sy="$2" result_file="$3"
local dir=$(dirname "$sy")
local cat=$(basename "$dir")
local base=$(basename "$sy")
local stem=${base%.sy}
local compiler_asm=$(mktemp /tmp/compiler_llvm_XXXX.s)
local llvm_asm=$(mktemp /tmp/llvm_XXXX.s)
# 编译器 + clang 并行编译
local comp_ok=true compiler_lines=0
local llvm_ok=true llvm_lines=0
timeout --signal=KILL 60 "$COMPILER" -S -O -o "$compiler_asm" "$sy" 2>/dev/null &
local comp_pid=$!
local clang_src=$(mktemp /tmp/clang_perf_XXXX.sy)
preprocess_for_clang "$sy" "$clang_src"
$CLANG $CLANG_FLAGS -x c -S "-O${LLVM_OPT_LEVEL}" -o "$llvm_asm" "$clang_src" 2>/dev/null &
local clang_pid=$!
wait $comp_pid 2>/dev/null || comp_ok=false
wait $clang_pid 2>/dev/null || llvm_ok=false
rm -f "$clang_src"
if $comp_ok && [[ -s "$compiler_asm" ]]; then
compiler_lines=$(count_insn "$compiler_asm")
else
comp_ok=false
fi
if $llvm_ok && [[ -s "$llvm_asm" ]]; then
llvm_lines=$(count_insn "$llvm_asm")
else
llvm_ok=false
fi
# 保存 LLVM 汇编
if [[ "$SAVE_ASM" == true && "$llvm_ok" == true ]]; then
local save_dir="$LLVM_ASM_DIR/${cat}/${LLVM_OPT_LEVEL}"
mkdir -p "$save_dir"
cp "$llvm_asm" "$save_dir/${stem}.s"
fi
rm -f "$compiler_asm" "$llvm_asm"
printf 'STATUS=%s\nSTEM=%s\nCAT=%s\nCOMPILER_LINES=%s\nLLVM_LINES=%s\n' \
"$(if $comp_ok && $llvm_ok; then echo "OK"; elif ! $comp_ok; then echo "COMP_FAIL"; else echo "LLVM_FAIL"; fi)" \
"$stem" "$cat" "$compiler_lines" "$llvm_lines" \
> "$result_file"
}
run_perf() {
load_cases "${PERF_CATS[@]}"
local llvm_opt="-O${LLVM_OPT_LEVEL}"
local llvm_label="clang ${llvm_opt}"
echo ""
echo "========== 性能对比:编译器 -O vs ${llvm_label}${#CASES[@]} 用例)=========="
echo ""
local res_dir="$RESULTS_ROOT/.perf_llvm_results"
rm -rf "$res_dir"
mkdir -p "$res_dir"
# 并行或串行执行
if [[ $JOBS -gt 1 && ${#CASES[@]} -gt 1 ]]; then
export COMPILER CLANG CLANG_FLAGS LLVM_OPT_LEVEL SAVE_ASM LLVM_ASM_DIR
export -f run_perf_worker preprocess_for_clang count_insn
declare -a QUEUE=()
for i in "${!CASES[@]}"; do
QUEUE+=("$i|${CASES[$i]}")
done
printf '%s\n' "${QUEUE[@]}" | xargs -P "$JOBS" -L 1 bash -c '
IFS="|" read -r idx sy <<< "$1"
run_perf_worker "$idx" "$sy" "'"$res_dir"'/$idx"
' _
else
for i in "${!CASES[@]}"; do
run_perf_worker "$i" "${CASES[$i]}" "$res_dir/$i"
done
fi
# 汇总
local total=${#CASES[@]}
local compiler_fail=0 llvm_fail=0
local -a results=() # "stem|compiler_lines|llvm_lines|ratio"
for i in "${!CASES[@]}"; do
local rf="$res_dir/$i"
if [[ ! -f "$rf" ]]; then
compiler_fail=$((compiler_fail + 1))
echo -e " [${RED}FAIL${NC}] $(basename "${CASES[$i]}" .sy) 超时/崩溃"
continue
fi
local status stem cat cl ll
status=$(grep '^STATUS=' "$rf" | cut -d= -f2)
stem=$(grep '^STEM=' "$rf" | cut -d= -f2)
cl=$(grep '^COMPILER_LINES=' "$rf" | cut -d= -f2)
ll=$(grep '^LLVM_LINES=' "$rf" | cut -d= -f2)
case "$status" in
COMP_FAIL)
compiler_fail=$((compiler_fail + 1))
echo -e " [${RED}FAIL${NC}] $stem 编译器编译失败"
;;
LLVM_FAIL)
llvm_fail=$((llvm_fail + 1))
echo -e " [${YELLOW}SKIP${NC}] $stem clang 编译失败"
;;
OK)
local ratio flag=""
if [[ "$ll" -eq 0 ]]; then
ratio="N/A"
else
ratio=$(awk -v c="$cl" -v l="$ll" 'BEGIN { printf "%.2f", c/l }')
fi
if [[ "$ratio" != "N/A" ]]; then
if awk -v r="$ratio" 'BEGIN { exit(r <= 1.5 ? 0 : 1) }'; then
flag="${GREEN}"
elif awk -v r="$ratio" 'BEGIN { exit(r <= 3.0 ? 0 : 1) }'; then
flag="${YELLOW}"
else
flag="${RED}"
fi
fi
printf " %-35s 编译器:%5d clang:%5d ${flag}${BOLD}%sx${NC}\n" \
"$stem" "$cl" "$ll" "$ratio"
results+=("$stem|$cl|$ll|$ratio")
;;
esac
done
rm -rf "$res_dir"
# 汇总统计
printf '\n'
echo "========== 性能对比汇总 =========="
echo ""
local valid=${#results[@]}
if [[ $valid -eq 0 ]]; then
echo "无有效用例"
return
fi
# TOP 5 差距最大(编译器劣于 clang
echo "--- 差距最大 TOP 5优先优化编译器/clang > 1.0---"
printf '%s\n' "${results[@]}" | awk -F'|' '$4+0 > 1.0' | sort -t'|' -k4 -rn | head -5 | while IFS='|' read -r stem cl ll ratio; do
local flag="${RED}"
if awk -v r="$ratio" 'BEGIN { exit(r <= 1.5 ? 0 : 1) }'; then flag="${GREEN}"
elif awk -v r="$ratio" 'BEGIN { exit(r <= 3.0 ? 0 : 1) }'; then flag="${YELLOW}"; fi
printf " %-35s 编译器:%5d clang:%5d ${flag}${BOLD}%sx${NC}\n" "$stem" "$cl" "$ll" "$ratio"
done
echo ""
echo "--- 编译器优于 clang TOP 5编译器/clang < 1.0---"
printf '%s\n' "${results[@]}" | awk -F'|' '$4+0 < 1.0' | sort -t'|' -k4 -n | head -5 | while IFS='|' read -r stem cl ll ratio; do
printf " %-35s 编译器:%5d clang:%5d ${GREEN}${BOLD}%sx${NC}\n" "$stem" "$cl" "$ll" "$ratio"
done
echo ""
echo "--- 差距最小 TOP 5最接近 1.0x---"
printf '%s\n' "${results[@]}" | awk -F'|' '
{
ratio = $4 + 0
dist = (ratio > 1.0) ? (ratio - 1.0) : (1.0 - ratio)
printf "%s|%s|%s|%s|%f\n", $1, $2, $3, $4, dist
}' | sort -t'|' -k5 -n | head -5 | while IFS='|' read -r stem cl ll ratio dist; do
printf " %-35s 编译器:%5d clang:%5d ${GREEN}${BOLD}%sx${NC}\n" "$stem" "$cl" "$ll" "$ratio"
done
echo ""
# 编译器指令数总计
local total_compiler=0 total_llvm=0
for r in "${results[@]}"; do
local cl=$(echo "$r" | cut -d'|' -f2)
local ll=$(echo "$r" | cut -d'|' -f3)
total_compiler=$((total_compiler + cl))
total_llvm=$((total_llvm + ll))
done
# 几何平均
local geo_mean
geo_mean=$(printf '%s\n' "${results[@]}" | awk -F'|' '
BEGIN { sum = 0; n = 0 }
{
ratio = $4 + 0
if (ratio > 0) { sum += log(ratio); n++ }
}
END {
if (n > 0) printf "%.2f", exp(sum / n)
else print "N/A"
}')
echo "--- 整体指标 ---"
printf " 编译器总指令数: %d\n" "$total_compiler"
printf " %s 总指令数: %d\n" "$llvm_label" "$total_llvm"
printf " 总指令数比: ${BOLD}%.2fx${NC}\n" "$(awk -v c="$total_compiler" -v l="$total_llvm" 'BEGIN { printf "%.2f", c/l }')"
printf " 几何平均比: ${BOLD}%sx${NC} (越接近 1.0 越接近 %s)\n" "$geo_mean" "$llvm_label"
printf " 有效用例: %d\n" "$valid"
if [[ $compiler_fail -gt 0 ]]; then
printf " 编译器失败: %d\n" "$compiler_fail"
fi
if [[ $llvm_fail -gt 0 ]]; then
printf " clang 失败: %d\n" "$llvm_fail"
fi
echo ""
# 性能分估算
local target_ratio="1.11"
if awk -v gm="$geo_mean" -v tr="$target_ratio" 'BEGIN { exit(gm <= tr ? 0 : 1) }'; then
echo -e "${GREEN}几何平均比 ${geo_mean}x ≤ ${target_ratio}x性能分预估 ≥90一级水平${NC}"
else
local perf_est
perf_est=$(awk -v gm="$geo_mean" 'BEGIN { printf "%.0f", 100 / gm }')
echo -e "${YELLOW}几何平均比 ${geo_mean}x > ${target_ratio}x性能分预估 ≈${perf_est}(一级需 ≥90${NC}"
fi
if [[ "$SAVE_ASM" == true ]]; then
echo ""
echo -e "${CYAN}clang 汇编已保存到 $LLVM_ASM_DIR/${LLVM_OPT_LEVEL}/${NC}"
echo " 可对比分析 clang/LLVM 的优化策略(循环展开、向量化、指令调度等)"
fi
# 导出报告
if [[ -n "$REPORT_FILE" ]]; then
_export_csv "$llvm_label" "$LLVM_OPT_LEVEL" "$total_compiler" "$total_llvm" "$geo_mean" "$valid" "$compiler_fail" "$llvm_fail"
echo ""
echo -e "${CYAN}CSV 报告已导出到 $REPORT_FILE${NC}"
fi
if [[ -n "$JSON_FILE" ]]; then
_export_json "$llvm_label" "$LLVM_OPT_LEVEL" "$total_compiler" "$total_llvm" "$geo_mean" "$valid" "$compiler_fail" "$llvm_fail"
echo ""
echo -e "${CYAN}JSON 报告已导出到 $JSON_FILE${NC}"
fi
}
# ============================================================
# 导出函数
# ============================================================
_export_csv() {
local llvm_label="$1" llvm_opt="$2"
local total_compiler="$3" total_llvm="$4" geo_mean="$5"
local valid="$6" compiler_fail="$7" llvm_fail="$8"
local now=$(date '+%Y-%m-%d %H:%M:%S')
local perf_est=$(awk -v gm="$geo_mean" 'BEGIN { printf "%.0f", 100 / gm }')
{
echo "test_case,category,compiler_insn,clang_insn,ratio,winner"
printf '%s\n' "${results[@]}" | sort -t'|' -k4 -rn | while IFS='|' read -r stem cl ll ratio; do
local cat=""
for c in "${PERF_CATS[@]}"; do
[[ -f "$TEST_ROOT/$c/${stem}.sy" ]] && { cat="$c"; break; }
done
local winner="clang"
if awk -v r="$ratio" 'BEGIN { exit(r < 1.0 ? 0 : 1) }'; then winner="compiler"; fi
if [[ "$ratio" == "1.00" ]]; then winner="tie"; fi
echo "${stem},${cat},${cl},${ll},${ratio},${winner}"
done
echo ""
echo "# 汇总,,,"
echo "生成时间,,${now}"
echo "clang优化级别,,${llvm_opt}"
echo "有效用例,,${valid}"
echo "编译器总指令数,,${total_compiler}"
echo "clang总指令数,,${total_llvm}"
echo "总指令数比,,${total_compiler}/${total_llvm}"
echo "几何平均比,,${geo_mean}"
echo "性能分预估,,${perf_est}"
} > "$REPORT_FILE"
}
_export_json() {
local llvm_label="$1" llvm_opt="$2"
local total_compiler="$3" total_llvm="$4" geo_mean="$5"
local valid="$6" compiler_fail="$7" llvm_fail="$8"
local now=$(date -Iseconds)
local perf_est=$(awk -v gm="$geo_mean" 'BEGIN { printf "%.0f", 100 / gm }')
python3 - "$JSON_FILE" "$now" "$llvm_opt" "$valid" \
"$total_compiler" "$total_llvm" "$geo_mean" "$perf_est" \
"$compiler_fail" "$llvm_fail" \
"${results[@]}" <<'PY'
import sys, json
outfile = sys.argv[1]
report = {
"generated_at": sys.argv[2],
"clang_opt_level": str(sys.argv[3]),
"summary": {
"valid_cases": int(sys.argv[4]),
"total_compiler_insn": int(sys.argv[5]),
"total_clang_insn": int(sys.argv[6]),
"geometric_mean_ratio": float(sys.argv[7]),
"estimated_performance_score": float(sys.argv[8]),
"compiler_fail": int(sys.argv[9]),
"clang_fail": int(sys.argv[10]),
},
"cases": []
}
for r in sys.argv[11:]:
stem, cl, ll, ratio = r.split('|')
rv = float(ratio)
winner = "compiler" if rv < 1.0 else ("tie" if rv == 1.0 else "clang")
report["cases"].append({
"test_case": stem,
"compiler_insn": int(cl),
"clang_insn": int(ll),
"ratio": rv,
"winner": winner
})
with open(outfile, 'w') as f:
json.dump(report, f, ensure_ascii=False, indent=2)
PY
}
# ============================================================
# 执行
# ============================================================
if [[ "$DO_BASELINE" == true ]]; then
run_baseline
fi
if [[ "$DO_DIFF" == true ]]; then
run_diff
fi
if [[ "$DO_PERF" == true ]]; then
run_perf
fi