diff --git a/scripts/diff_test_llvm.sh b/scripts/diff_test_llvm.sh new file mode 100755 index 00000000..d32f1a12 --- /dev/null +++ b/scripts/diff_test_llvm.sh @@ -0,0 +1,719 @@ +#!/usr/bin/env bash +# 差分测试:编译器 vs LLVM/clang,支持正确性对比和性能对比 +# 用法: +# ./scripts/diff_test_llvm.sh --baseline 生成 LLVM 正确性基线 +# ./scripts/diff_test_llvm.sh --diff 正确性差分对比(输出是否一致) +# ./scripts/diff_test_llvm.sh --perf 性能对比(指令数) +# ./scripts/diff_test_llvm.sh --perf --llvm-opt 2 性能对比 vs clang -O2 +# ./scripts/diff_test_llvm.sh --perf --save-asm 性能对比并保存 LLVM 汇编 +# ./scripts/diff_test_llvm.sh --perf --llvm-opt 0 对比 clang -O0(最低基线) + +set -euo pipefail + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +CYAN='\033[0;36m' +BOLD='\033[1m' +BLUE='\033[0;34m' +NC='\033[0m' + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +TEST_ROOT="$PROJECT_ROOT/2026test" +RESULTS_ROOT="$PROJECT_ROOT/2026test_results" +LLVM_BASELINE_DIR="$RESULTS_ROOT/llvm_baseline" +LLVM_ASM_DIR="$RESULTS_ROOT/llvm_asm" +RUNTIME_SRC="$PROJECT_ROOT/sylib/sylib.c" +RUNTIME_OBJ="$PROJECT_ROOT/build/test_runtime/sylib_llvm.o" +COMPILER="$PROJECT_ROOT/build/bin/compiler" + +# 检测 clang:优先用交叉编译器,其次尝试系统 clang + target +CLANG="" +CLANG_TARGET="aarch64-linux-gnu" +for cand in aarch64-linux-gnu-clang clang; do + if command -v "$cand" >/dev/null 2>&1; then + CLANG="$cand" + break + fi +done + +QEMU="qemu-aarch64" + +CORRECTNESS_CATS=("functional" "h_functional") +PERF_CATS=("performance") +DO_BASELINE=false +DO_DIFF=false +DO_PERF=false +SAVE_ASM=false +LLVM_OPT_LEVEL=3 +MAX_CASES=0 +REPORT_FILE="" +JSON_FILE="" +JOBS=$(nproc 2>/dev/null || echo 4) + +usage() { + cat <<'EOF' +用法: ./scripts/diff_test_llvm.sh [选项] + +差分测试:对每个 .sy 用例,用编译器生成汇编和 LLVM/clang 编译后, +比较输出(正确性)或指令数(性能)。 + +模式(至少选一个): + --baseline 生成 LLVM 正确性基线(保存输出到 llvm_baseline/) + --diff 正确性差分对比(编译器输出 vs LLVM 基线) + --perf 性能对比(编译器 vs LLVM 指令数及比值) + +性能对比选项(与 --perf 配合): + --llvm-opt clang 优化级别 0/1/2/3/s/z(默认: 3) + --save-asm 保存 LLVM 汇编到 llvm_asm/ 供人工分析 + --perf-cats 性能对比覆盖的类别,逗号分隔 + (默认: performance,也可加 functional,h_functional) + --report 导出性能对比结果到 CSV 文件 + --json 导出性能对比结果到 JSON 文件 + +通用选项: + -n, --max N 最多运行 N 个用例 (0=不限制,默认: 0) + -j, --jobs N 并行任务数 (默认: nproc,设为1恢复串行) + -h, --help 显示此帮助信息 + +输出目录: + 2026test_results/llvm_baseline/ LLVM 正确性基线(输出 + 退出码) + 2026test_results/llvm_asm/ LLVM 汇编(--save-asm 时保存) +EOF +} + +# ============================================================ +# 参数解析 +# ============================================================ +while [[ $# -gt 0 ]]; do + case "$1" in + --baseline) DO_BASELINE=true ;; + --diff) DO_DIFF=true ;; + --perf) DO_PERF=true ;; + --save-asm) SAVE_ASM=true ;; + --llvm-opt) LLVM_OPT_LEVEL="$2"; shift ;; + --perf-cats) IFS=',' read -ra PERF_CATS <<< "$2"; shift ;; + --report) REPORT_FILE="$2"; shift ;; + --json) JSON_FILE="$2"; shift ;; + -n|--max) MAX_CASES="$2"; shift ;; + -j|--jobs) JOBS="$2" + if ! [[ "$JOBS" =~ ^[0-9]+$ ]] || [[ "$JOBS" -lt 1 ]]; then + echo "错误: --jobs 需要正整数"; exit 1 + fi ;; + -h|--help) usage; exit 0 ;; + *) echo "未知选项: $1"; usage; exit 1 ;; + esac + shift +done + +if [[ "$DO_BASELINE" == false && "$DO_DIFF" == false && "$DO_PERF" == false ]]; then + echo "错误: 至少需要 --baseline、--diff 或 --perf 之一" + usage + exit 1 +fi + +if ! [[ "$LLVM_OPT_LEVEL" =~ ^[0-3sSzZ]$ ]]; then + echo "错误: --llvm-opt 必须是 0/1/2/3/s/z" + exit 1 +fi + +# ============================================================ +# 预检 +# ============================================================ +if [[ -z "$CLANG" ]]; then + echo "错误: 未找到 clang 或 aarch64-linux-gnu-clang" + echo " 安装: sudo apt install clang 或 sudo apt install clang-18" + exit 1 +fi + +# 检测 clang 是否需要 --target 标志 +CLANG_FLAGS="-std=gnu89" # SysY 依赖隐式函数声明(starttime/getint 等) +if [[ "$CLANG" != "aarch64-linux-gnu-clang" ]]; then + # 系统 clang,需要指定交叉编译 target + CLANG_FLAGS="$CLANG_FLAGS --target=$CLANG_TARGET" +fi + +command -v "$QEMU" >/dev/null 2>&1 || { echo "未找到 $QEMU"; exit 1; } +[[ -f "$RUNTIME_SRC" ]] || { echo "未找到 $RUNTIME_SRC"; exit 1; } +[[ -f "$COMPILER" ]] || { echo "未找到编译器 $COMPILER,请先构建"; exit 1; } + +echo "LLVM 工具链: $CLANG $CLANG_FLAGS" + +# 编译运行时库 +mkdir -p "$(dirname "$RUNTIME_OBJ")" +if [[ ! -f "$RUNTIME_OBJ" || "$RUNTIME_SRC" -nt "$RUNTIME_OBJ" ]]; then + echo "编译运行时库 $RUNTIME_SRC → $RUNTIME_OBJ ..." + $CLANG $CLANG_FLAGS -O2 -c "$RUNTIME_SRC" -o "$RUNTIME_OBJ" +fi + +# ============================================================ +# SysY → C 预处理(处理 clang 不支持的 SysY 语法) +# ============================================================ +preprocess_for_clang() { + local src="$1" + local dst="$2" + python3 -c " +import re, sys +with open('$src') as f: + content = f.read() +# const int X = V → #define X V (SysY 全局常量,C 不支持作为数组大小) +content = re.sub(r'^const int (\w+) = ([^;]+);', r'#define \1 \2', content, flags=re.MULTILINE) +with open('$dst', 'w') as f: + f.write(content) +" +} + +# ============================================================ +# 规范化比较 +# ============================================================ +canon_compare() { + local expected="$1" actual="$2" + diff -q \ + <(sed 's/\r$//; s/[[:space:]]*$//' "$expected" \ + | awk '{lines[NR]=$0} END{last=NR; while(last>0&&lines[last]=="")last--; for(i=1;i<=last;i++)print lines[i]}') \ + <(sed 's/\r$//; s/[[:space:]]*$//' "$actual" \ + | awk '{lines[NR]=$0} END{last=NR; while(last>0&&lines[last]=="")last--; for(i=1;i<=last;i++)print lines[i]}') \ + > /dev/null 2>&1 +} + +# ============================================================ +# 收集用例 +# ============================================================ +collect_cases() { + local cats=("$@") + local cases=() + for cat in "${cats[@]}"; do + local dir="$TEST_ROOT/$cat" + [[ -d "$dir" ]] || continue + for sy in "$dir"/*.sy; do + [[ -f "$sy" ]] || continue + cases+=("$sy") + done + done + printf '%s\n' "${cases[@]}" | sort +} + +load_cases() { + local cats=("$@") + CASES=() + while IFS= read -r line; do + CASES+=("$line") + done < <(collect_cases "${cats[@]}") + + if [[ "$MAX_CASES" -gt 0 && "$MAX_CASES" -lt "${#CASES[@]}" ]]; then + CASES=("${CASES[@]:0:$MAX_CASES}") + fi +} + +# ============================================================ +# 生成 LLVM 正确性基线(并行化) +# ============================================================ +run_baseline_worker() { + local idx="$1" sy="$2" + local result_file="$3" + + local dir=$(dirname "$sy") + local cat=$(basename "$dir") + local base=$(basename "$sy") + local stem=${base%.sy} + local out_dir="$LLVM_BASELINE_DIR/$cat" + local exe="$out_dir/$stem" + local actual_file="$out_dir/$stem.actual.out" + local stdin_file="$dir/$stem.in" + + mkdir -p "$out_dir" + + local clang_src=$(mktemp /tmp/clang_baseline_XXXX.sy) + preprocess_for_clang "$sy" "$clang_src" + + local status="FAIL" + if $CLANG $CLANG_FLAGS -x c "$clang_src" -x none "$RUNTIME_OBJ" -static -o "$exe" -lm 2>/dev/null; then + rm -f "$clang_src" + local exit_code=0 + set +e + if [[ -f "$stdin_file" ]]; then + timeout --signal=KILL 60 "$QEMU" "$exe" < "$stdin_file" > "$out_dir/$stem.stdout" 2>/dev/null || exit_code=$? + else + timeout --signal=KILL 60 "$QEMU" "$exe" < /dev/null > "$out_dir/$stem.stdout" 2>/dev/null || exit_code=$? + fi + set -e + + { + cat "$out_dir/$stem.stdout" + if [[ -s "$out_dir/$stem.stdout" ]] && (( $(tail -c 1 "$out_dir/$stem.stdout" | wc -l) == 0 )); then + printf '\n' + fi + printf '%s\n' "$exit_code" + } > "$actual_file" + status="OK" + else + rm -f "$clang_src" + fi + + printf 'STATUS=%s\nNAME=%s\n' "$status" "$stem" > "$result_file" +} + +run_baseline() { + load_cases "${CORRECTNESS_CATS[@]}" + echo "" + echo "========== 生成 LLVM 正确性基线(${#CASES[@]} 用例)==========" + + local res_dir="$LLVM_BASELINE_DIR/.results" + mkdir -p "$res_dir" + + if [[ $JOBS -gt 1 && ${#CASES[@]} -gt 1 ]]; then + export LLVM_BASELINE_DIR RUNTIME_OBJ CLANG CLANG_FLAGS QEMU + export -f run_baseline_worker preprocess_for_clang + + declare -a QUEUE=() + for i in "${!CASES[@]}"; do + QUEUE+=("$i|${CASES[$i]}") + done + + printf '%s\n' "${QUEUE[@]}" | xargs -P "$JOBS" -L 1 bash -c ' + IFS="|" read -r idx sy <<< "$1" + run_baseline_worker "$idx" "$sy" "'"$res_dir"'/$idx" + ' _ + else + for i in "${!CASES[@]}"; do + run_baseline_worker "$i" "${CASES[$i]}" "$res_dir/$i" + done + fi + + # 汇总 + local total=0 pass=0 fail=0 + for i in "${!CASES[@]}"; do + total=$((total + 1)) + if [[ -f "$res_dir/$i" ]]; then + local status name + status=$(grep '^STATUS=' "$res_dir/$i" | cut -d= -f2) + name=$(grep '^NAME=' "$res_dir/$i" | cut -d= -f2) + if [[ "$status" == "OK" ]]; then + pass=$((pass + 1)) + printf " [${GREEN}OK${NC}] %-35s (%d/%d)\r" "$name" "$total" "${#CASES[@]}" + else + fail=$((fail + 1)) + echo -e " [${RED}FAIL${NC}] $name (clang 编译失败)" + fi + else + fail=$((fail + 1)) + fi + done + + rm -rf "$res_dir" + printf '\n' + echo "基线完成: $pass/$total 成功" + if [[ $fail -gt 0 ]]; then + echo -e " ${YELLOW}$fail 个 clang 编译失败(可能使用了 clang 不支持的 SysY 语法)${NC}" + fi +} + +# ============================================================ +# 正确性差分对比 +# ============================================================ +run_diff() { + load_cases "${CORRECTNESS_CATS[@]}" + echo "" + echo "========== 正确性差分对比(${#CASES[@]} 用例)==========" + + local total=0 match=0 mismatch=0 skip=0 + + for sy in "${CASES[@]}"; do + total=$((total + 1)) + + local dir=$(dirname "$sy") + local cat=$(basename "$dir") + local base=$(basename "$sy") + local stem=${base%.sy} + + local compiler_out="$RESULTS_ROOT/$cat/$stem.actual.out" + local llvm_out="$LLVM_BASELINE_DIR/$cat/$stem.actual.out" + + if [[ ! -f "$compiler_out" ]]; then + echo -e " [${YELLOW}SKIP${NC}] $stem (无编译器输出,先跑 2026test.sh)" + skip=$((skip + 1)) + continue + fi + + if [[ ! -f "$llvm_out" ]]; then + echo -e " [${YELLOW}SKIP${NC}] $stem (无 LLVM 基线,先跑 --baseline)" + skip=$((skip + 1)) + continue + fi + + if canon_compare "$compiler_out" "$llvm_out"; then + match=$((match + 1)) + printf " [${GREEN}MATCH${NC}] %-35s (%d/%d)\r" "$stem" "$total" "${#CASES[@]}" + else + mismatch=$((mismatch + 1)) + printf '\n' + echo -e " [${RED}MISMATCH${NC}] $stem" + echo " --- 编译器输出 ---" + cat "$compiler_out" | head -20 | sed 's/^/ | /' + echo " --- clang 输出 ---" + cat "$llvm_out" | head -20 | sed 's/^/ | /' + echo " --- diff ---" + diff -u <(cat "$compiler_out") <(cat "$llvm_out") | head -20 | sed 's/^/ | /' || true + echo "" + fi + done + + printf '\n' + echo "========== 正确性差分结果 ==========" + echo -e " 匹配: ${GREEN}$match${NC}" + echo -e " 不匹配: ${RED}$mismatch${NC}" + if [[ $skip -gt 0 ]]; then + echo -e " 跳过: ${YELLOW}$skip${NC}" + fi + + if [[ $mismatch -eq 0 ]]; then + echo -e "\n${GREEN}全部匹配,编译器输出与 clang 一致${NC}" + fi +} + +# ============================================================ +# 性能对比(并行化) +# ============================================================ +run_perf_worker() { + local idx="$1" sy="$2" result_file="$3" + + local dir=$(dirname "$sy") + local cat=$(basename "$dir") + local base=$(basename "$sy") + local stem=${base%.sy} + + local compiler_asm=$(mktemp /tmp/compiler_llvm_XXXX.s) + local llvm_asm=$(mktemp /tmp/llvm_XXXX.s) + + # 编译器生成汇编 + local comp_ok=true compiler_lines=0 + if ! timeout --signal=KILL 60 "$COMPILER" -S -O -o "$compiler_asm" "$sy" 2>/dev/null; then + comp_ok=false + else + compiler_lines=$(wc -l < "$compiler_asm") + compiler_lines=${compiler_lines:-0} + fi + + # clang 生成汇编 + local llvm_ok=true llvm_lines=0 + local clang_src=$(mktemp /tmp/clang_perf_XXXX.sy) + preprocess_for_clang "$sy" "$clang_src" + if ! $CLANG $CLANG_FLAGS -x c -S "-O${LLVM_OPT_LEVEL}" -o "$llvm_asm" "$clang_src" 2>/dev/null; then + llvm_ok=false + else + llvm_lines=$(wc -l < "$llvm_asm") + llvm_lines=${llvm_lines:-0} + fi + rm -f "$clang_src" + + # 保存 LLVM 汇编 + if [[ "$SAVE_ASM" == true && "$llvm_ok" == true ]]; then + local save_dir="$LLVM_ASM_DIR/${cat}/${LLVM_OPT_LEVEL}" + mkdir -p "$save_dir" + cp "$llvm_asm" "$save_dir/${stem}.s" + fi + + rm -f "$compiler_asm" "$llvm_asm" + + printf 'STATUS=%s\nSTEM=%s\nCAT=%s\nCOMPILER_LINES=%s\nLLVM_LINES=%s\n' \ + "$(if $comp_ok && $llvm_ok; then echo "OK"; elif ! $comp_ok; then echo "COMP_FAIL"; else echo "LLVM_FAIL"; fi)" \ + "$stem" "$cat" "$compiler_lines" "$llvm_lines" \ + > "$result_file" +} + +run_perf() { + load_cases "${PERF_CATS[@]}" + + local llvm_opt="-O${LLVM_OPT_LEVEL}" + local llvm_label="clang ${llvm_opt}" + + echo "" + echo "========== 性能对比:编译器 -O vs ${llvm_label}(${#CASES[@]} 用例)==========" + echo "" + + local res_dir="$RESULTS_ROOT/.perf_llvm_results" + rm -rf "$res_dir" + mkdir -p "$res_dir" + + # 并行或串行执行 + if [[ $JOBS -gt 1 && ${#CASES[@]} -gt 1 ]]; then + export COMPILER CLANG CLANG_FLAGS LLVM_OPT_LEVEL SAVE_ASM LLVM_ASM_DIR + export -f run_perf_worker preprocess_for_clang + + declare -a QUEUE=() + for i in "${!CASES[@]}"; do + QUEUE+=("$i|${CASES[$i]}") + done + + printf '%s\n' "${QUEUE[@]}" | xargs -P "$JOBS" -L 1 bash -c ' + IFS="|" read -r idx sy <<< "$1" + run_perf_worker "$idx" "$sy" "'"$res_dir"'/$idx" + ' _ + else + for i in "${!CASES[@]}"; do + run_perf_worker "$i" "${CASES[$i]}" "$res_dir/$i" + done + fi + + # 汇总 + local total=${#CASES[@]} + local compiler_fail=0 llvm_fail=0 + local -a results=() # "stem|compiler_lines|llvm_lines|ratio" + + for i in "${!CASES[@]}"; do + local rf="$res_dir/$i" + + if [[ ! -f "$rf" ]]; then + compiler_fail=$((compiler_fail + 1)) + echo -e " [${RED}FAIL${NC}] $(basename "${CASES[$i]}" .sy) 超时/崩溃" + continue + fi + + local status stem cat cl ll + status=$(grep '^STATUS=' "$rf" | cut -d= -f2) + stem=$(grep '^STEM=' "$rf" | cut -d= -f2) + cl=$(grep '^COMPILER_LINES=' "$rf" | cut -d= -f2) + ll=$(grep '^LLVM_LINES=' "$rf" | cut -d= -f2) + + case "$status" in + COMP_FAIL) + compiler_fail=$((compiler_fail + 1)) + echo -e " [${RED}FAIL${NC}] $stem 编译器编译失败" + ;; + LLVM_FAIL) + llvm_fail=$((llvm_fail + 1)) + echo -e " [${YELLOW}SKIP${NC}] $stem clang 编译失败" + ;; + OK) + local ratio flag="" + if [[ "$ll" -eq 0 ]]; then + ratio="N/A" + else + ratio=$(awk -v c="$cl" -v l="$ll" 'BEGIN { printf "%.2f", c/l }') + fi + + if [[ "$ratio" != "N/A" ]]; then + if awk -v r="$ratio" 'BEGIN { exit(r <= 1.5 ? 0 : 1) }'; then + flag="${GREEN}" + elif awk -v r="$ratio" 'BEGIN { exit(r <= 3.0 ? 0 : 1) }'; then + flag="${YELLOW}" + else + flag="${RED}" + fi + fi + + printf " %-35s 编译器:%5d clang:%5d ${flag}${BOLD}%sx${NC}\n" \ + "$stem" "$cl" "$ll" "$ratio" + + results+=("$stem|$cl|$ll|$ratio") + ;; + esac + done + + rm -rf "$res_dir" + + # 汇总统计 + printf '\n' + echo "========== 性能对比汇总 ==========" + echo "" + + local valid=${#results[@]} + + if [[ $valid -eq 0 ]]; then + echo "无有效用例" + return + fi + + # TOP 5 差距最大(编译器劣于 clang) + echo "--- 差距最大 TOP 5(优先优化,编译器/clang > 1.0)---" + printf '%s\n' "${results[@]}" | awk -F'|' '$4+0 > 1.0' | sort -t'|' -k4 -rn | head -5 | while IFS='|' read -r stem cl ll ratio; do + local flag="${RED}" + if awk -v r="$ratio" 'BEGIN { exit(r <= 1.5 ? 0 : 1) }'; then flag="${GREEN}" + elif awk -v r="$ratio" 'BEGIN { exit(r <= 3.0 ? 0 : 1) }'; then flag="${YELLOW}"; fi + printf " %-35s 编译器:%5d clang:%5d ${flag}${BOLD}%sx${NC}\n" "$stem" "$cl" "$ll" "$ratio" + done + + echo "" + echo "--- 编译器优于 clang TOP 5(编译器/clang < 1.0)---" + printf '%s\n' "${results[@]}" | awk -F'|' '$4+0 < 1.0' | sort -t'|' -k4 -n | head -5 | while IFS='|' read -r stem cl ll ratio; do + printf " %-35s 编译器:%5d clang:%5d ${GREEN}${BOLD}%sx${NC}\n" "$stem" "$cl" "$ll" "$ratio" + done + + echo "" + echo "--- 差距最小 TOP 5(最接近 1.0x)---" + printf '%s\n' "${results[@]}" | awk -F'|' ' + { + ratio = $4 + 0 + dist = (ratio > 1.0) ? (ratio - 1.0) : (1.0 - ratio) + printf "%s|%s|%s|%s|%f\n", $1, $2, $3, $4, dist + }' | sort -t'|' -k5 -n | head -5 | while IFS='|' read -r stem cl ll ratio dist; do + printf " %-35s 编译器:%5d clang:%5d ${GREEN}${BOLD}%sx${NC}\n" "$stem" "$cl" "$ll" "$ratio" + done + + echo "" + + # 编译器指令数总计 + local total_compiler=0 total_llvm=0 + for r in "${results[@]}"; do + local cl=$(echo "$r" | cut -d'|' -f2) + local ll=$(echo "$r" | cut -d'|' -f3) + total_compiler=$((total_compiler + cl)) + total_llvm=$((total_llvm + ll)) + done + + # 几何平均 + local geo_mean + geo_mean=$(printf '%s\n' "${results[@]}" | awk -F'|' ' + BEGIN { sum = 0; n = 0 } + { + ratio = $4 + 0 + if (ratio > 0) { sum += log(ratio); n++ } + } + END { + if (n > 0) printf "%.2f", exp(sum / n) + else print "N/A" + }') + + echo "--- 整体指标 ---" + printf " 编译器总指令数: %d\n" "$total_compiler" + printf " %s 总指令数: %d\n" "$llvm_label" "$total_llvm" + printf " 总指令数比: ${BOLD}%.2fx${NC}\n" "$(awk -v c="$total_compiler" -v l="$total_llvm" 'BEGIN { printf "%.2f", c/l }')" + printf " 几何平均比: ${BOLD}%sx${NC} (越接近 1.0 越接近 %s)\n" "$geo_mean" "$llvm_label" + printf " 有效用例: %d\n" "$valid" + if [[ $compiler_fail -gt 0 ]]; then + printf " 编译器失败: %d\n" "$compiler_fail" + fi + if [[ $llvm_fail -gt 0 ]]; then + printf " clang 失败: %d\n" "$llvm_fail" + fi + + echo "" + + # 性能分估算 + local target_ratio="1.11" + if awk -v gm="$geo_mean" -v tr="$target_ratio" 'BEGIN { exit(gm <= tr ? 0 : 1) }'; then + echo -e "${GREEN}几何平均比 ${geo_mean}x ≤ ${target_ratio}x,性能分预估 ≥90(一级水平)${NC}" + else + local perf_est + perf_est=$(awk -v gm="$geo_mean" 'BEGIN { printf "%.0f", 100 / gm }') + echo -e "${YELLOW}几何平均比 ${geo_mean}x > ${target_ratio}x,性能分预估 ≈${perf_est}(一级需 ≥90)${NC}" + fi + + if [[ "$SAVE_ASM" == true ]]; then + echo "" + echo -e "${CYAN}clang 汇编已保存到 $LLVM_ASM_DIR/${LLVM_OPT_LEVEL}/${NC}" + echo " 可对比分析 clang/LLVM 的优化策略(循环展开、向量化、指令调度等)" + fi + + # 导出报告 + if [[ -n "$REPORT_FILE" ]]; then + _export_csv "$llvm_label" "$LLVM_OPT_LEVEL" "$total_compiler" "$total_llvm" "$geo_mean" "$valid" "$compiler_fail" "$llvm_fail" + echo "" + echo -e "${CYAN}CSV 报告已导出到 $REPORT_FILE${NC}" + fi + + if [[ -n "$JSON_FILE" ]]; then + _export_json "$llvm_label" "$LLVM_OPT_LEVEL" "$total_compiler" "$total_llvm" "$geo_mean" "$valid" "$compiler_fail" "$llvm_fail" + echo "" + echo -e "${CYAN}JSON 报告已导出到 $JSON_FILE${NC}" + fi +} + +# ============================================================ +# 导出函数 +# ============================================================ +_export_csv() { + local llvm_label="$1" llvm_opt="$2" + local total_compiler="$3" total_llvm="$4" geo_mean="$5" + local valid="$6" compiler_fail="$7" llvm_fail="$8" + + local now=$(date '+%Y-%m-%d %H:%M:%S') + local perf_est=$(awk -v gm="$geo_mean" 'BEGIN { printf "%.0f", 100 / gm }') + + { + echo "test_case,category,compiler_insn,clang_insn,ratio,winner" + printf '%s\n' "${results[@]}" | sort -t'|' -k4 -rn | while IFS='|' read -r stem cl ll ratio; do + local cat="" + for c in "${PERF_CATS[@]}"; do + [[ -f "$TEST_ROOT/$c/${stem}.sy" ]] && { cat="$c"; break; } + done + local winner="clang" + if awk -v r="$ratio" 'BEGIN { exit(r < 1.0 ? 0 : 1) }'; then winner="compiler"; fi + if [[ "$ratio" == "1.00" ]]; then winner="tie"; fi + echo "${stem},${cat},${cl},${ll},${ratio},${winner}" + done + echo "" + echo "# 汇总,,," + echo "生成时间,,${now}" + echo "clang优化级别,,${llvm_opt}" + echo "有效用例,,${valid}" + echo "编译器总指令数,,${total_compiler}" + echo "clang总指令数,,${total_llvm}" + echo "总指令数比,,${total_compiler}/${total_llvm}" + echo "几何平均比,,${geo_mean}" + echo "性能分预估,,${perf_est}" + } > "$REPORT_FILE" +} + +_export_json() { + local llvm_label="$1" llvm_opt="$2" + local total_compiler="$3" total_llvm="$4" geo_mean="$5" + local valid="$6" compiler_fail="$7" llvm_fail="$8" + + local now=$(date -Iseconds) + local perf_est=$(awk -v gm="$geo_mean" 'BEGIN { printf "%.0f", 100 / gm }') + + python3 - "$JSON_FILE" "$now" "$llvm_opt" "$valid" \ + "$total_compiler" "$total_llvm" "$geo_mean" "$perf_est" \ + "$compiler_fail" "$llvm_fail" \ + "${results[@]}" <<'PY' +import sys, json + +outfile = sys.argv[1] +report = { + "generated_at": sys.argv[2], + "clang_opt_level": str(sys.argv[3]), + "summary": { + "valid_cases": int(sys.argv[4]), + "total_compiler_insn": int(sys.argv[5]), + "total_clang_insn": int(sys.argv[6]), + "geometric_mean_ratio": float(sys.argv[7]), + "estimated_performance_score": float(sys.argv[8]), + "compiler_fail": int(sys.argv[9]), + "clang_fail": int(sys.argv[10]), + }, + "cases": [] +} + +for r in sys.argv[11:]: + stem, cl, ll, ratio = r.split('|') + rv = float(ratio) + winner = "compiler" if rv < 1.0 else ("tie" if rv == 1.0 else "clang") + report["cases"].append({ + "test_case": stem, + "compiler_insn": int(cl), + "clang_insn": int(ll), + "ratio": rv, + "winner": winner + }) + +with open(outfile, 'w') as f: + json.dump(report, f, ensure_ascii=False, indent=2) +PY +} + +# ============================================================ +# 执行 +# ============================================================ +if [[ "$DO_BASELINE" == true ]]; then + run_baseline +fi + +if [[ "$DO_DIFF" == true ]]; then + run_diff +fi + +if [[ "$DO_PERF" == true ]]; then + run_perf +fi