|
|
#!/usr/bin/env bash
|
|
|
# 差分测试:编译器 vs LLVM/clang,支持正确性对比和性能对比
|
|
|
# 用法:
|
|
|
# ./scripts/diff_test_llvm.sh --baseline 生成 LLVM 正确性基线
|
|
|
# ./scripts/diff_test_llvm.sh --diff 正确性差分对比(输出是否一致)
|
|
|
# ./scripts/diff_test_llvm.sh --perf 性能对比(指令数)
|
|
|
# ./scripts/diff_test_llvm.sh --perf --llvm-opt 2 性能对比 vs clang -O2
|
|
|
# ./scripts/diff_test_llvm.sh --perf --save-asm 性能对比并保存 LLVM 汇编
|
|
|
# ./scripts/diff_test_llvm.sh --perf --llvm-opt 0 对比 clang -O0(最低基线)
|
|
|
|
|
|
set -euo pipefail
|
|
|
|
|
|
RED='\033[0;31m'
|
|
|
GREEN='\033[0;32m'
|
|
|
YELLOW='\033[1;33m'
|
|
|
CYAN='\033[0;36m'
|
|
|
BOLD='\033[1m'
|
|
|
BLUE='\033[0;34m'
|
|
|
NC='\033[0m'
|
|
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
|
TEST_ROOT="${PROJECT_ROOT}/2026test" # 默认,可用 -d 覆盖
|
|
|
RESULTS_ROOT="$PROJECT_ROOT/2026test_results"
|
|
|
LLVM_BASELINE_DIR="$RESULTS_ROOT/llvm_baseline"
|
|
|
LLVM_ASM_DIR="$RESULTS_ROOT/llvm_asm"
|
|
|
RUNTIME_SRC="$PROJECT_ROOT/sylib/sylib.c"
|
|
|
RUNTIME_OBJ="$PROJECT_ROOT/build/test_runtime/sylib_llvm.o"
|
|
|
COMPILER="$PROJECT_ROOT/build/bin/compiler"
|
|
|
|
|
|
# 检测 clang:优先用交叉编译器,其次尝试系统 clang + target
|
|
|
CLANG=""
|
|
|
CLANG_TARGET="aarch64-linux-gnu"
|
|
|
for cand in aarch64-linux-gnu-clang clang; do
|
|
|
if command -v "$cand" >/dev/null 2>&1; then
|
|
|
CLANG="$cand"
|
|
|
break
|
|
|
fi
|
|
|
done
|
|
|
|
|
|
QEMU="qemu-aarch64"
|
|
|
|
|
|
CORRECTNESS_CATS=("functional" "h_functional")
|
|
|
PERF_CATS=("performance")
|
|
|
DO_BASELINE=false
|
|
|
DO_DIFF=false
|
|
|
DO_PERF=false
|
|
|
SAVE_ASM=false
|
|
|
LLVM_OPT_LEVEL=2
|
|
|
MAX_CASES=0
|
|
|
REPORT_FILE=""
|
|
|
JSON_FILE=""
|
|
|
JOBS=$(nproc 2>/dev/null || echo 4)
|
|
|
|
|
|
usage() {
|
|
|
cat <<'EOF'
|
|
|
用法: ./scripts/diff_test_llvm.sh [选项]
|
|
|
|
|
|
差分测试:对每个 .sy 用例,用编译器生成汇编和 LLVM/clang 编译后,
|
|
|
比较输出(正确性)或指令数(性能)。
|
|
|
|
|
|
模式(至少选一个):
|
|
|
--baseline 生成 LLVM 正确性基线(保存输出到 llvm_baseline/)
|
|
|
--diff 正确性差分对比(编译器输出 vs LLVM 基线)
|
|
|
--perf 性能对比(编译器 vs LLVM 指令数及比值)
|
|
|
|
|
|
性能对比选项(与 --perf 配合):
|
|
|
--llvm-opt <N> clang 优化级别 0/1/2/3/s/z(默认: 3)
|
|
|
--save-asm 保存 LLVM 汇编到 llvm_asm/ 供人工分析
|
|
|
--perf-cats <cats> 性能对比覆盖的类别,逗号分隔
|
|
|
(默认: performance,也可加 functional,h_functional)
|
|
|
--report <file> 导出性能对比结果到 CSV 文件
|
|
|
--json <file> 导出性能对比结果到 JSON 文件
|
|
|
|
|
|
通用选项:
|
|
|
-n, --max N 最多运行 N 个用例 (0=不限制,默认: 0)
|
|
|
-d, --test-dir DIR 测试用例目录 (默认 2026test)
|
|
|
-j, --jobs N 并行任务数 (默认: nproc,设为1恢复串行)
|
|
|
-h, --help 显示此帮助信息
|
|
|
|
|
|
输出目录:
|
|
|
2026test_results/llvm_baseline/ LLVM 正确性基线(输出 + 退出码)
|
|
|
2026test_results/llvm_asm/ LLVM 汇编(--save-asm 时保存)
|
|
|
EOF
|
|
|
}
|
|
|
|
|
|
# ============================================================
|
|
|
# 参数解析
|
|
|
# ============================================================
|
|
|
while [[ $# -gt 0 ]]; do
|
|
|
case "$1" in
|
|
|
--baseline) DO_BASELINE=true ;;
|
|
|
--diff) DO_DIFF=true ;;
|
|
|
--perf) DO_PERF=true ;;
|
|
|
--save-asm) SAVE_ASM=true ;;
|
|
|
--llvm-opt) LLVM_OPT_LEVEL="$2"; shift ;;
|
|
|
--perf-cats) IFS=',' read -ra PERF_CATS <<< "$2"; shift ;;
|
|
|
--report) REPORT_FILE="$2"; shift ;;
|
|
|
--json) JSON_FILE="$2"; shift ;;
|
|
|
-n|--max) MAX_CASES="$2"; shift ;;
|
|
|
-d|--test-dir) TEST_ROOT="$2"; shift ;;
|
|
|
-j|--jobs) JOBS="$2"; shift
|
|
|
if ! [[ "$JOBS" =~ ^[0-9]+$ ]] || [[ "$JOBS" -lt 1 ]]; then
|
|
|
echo "错误: --jobs 需要正整数"; exit 1
|
|
|
fi ;;
|
|
|
-h|--help) usage; exit 0 ;;
|
|
|
*) echo "未知选项: $1"; usage; exit 1 ;;
|
|
|
esac
|
|
|
shift
|
|
|
done
|
|
|
|
|
|
if [[ "$DO_BASELINE" == false && "$DO_DIFF" == false && "$DO_PERF" == false ]]; then
|
|
|
echo "错误: 至少需要 --baseline、--diff 或 --perf 之一"
|
|
|
usage
|
|
|
exit 1
|
|
|
fi
|
|
|
|
|
|
if ! [[ "$LLVM_OPT_LEVEL" =~ ^[0-3sSzZ]$ ]]; then
|
|
|
echo "错误: --llvm-opt 必须是 0/1/2/3/s/z"
|
|
|
exit 1
|
|
|
fi
|
|
|
|
|
|
# ============================================================
|
|
|
# 预检
|
|
|
# ============================================================
|
|
|
if [[ -z "$CLANG" ]]; then
|
|
|
echo "错误: 未找到 clang 或 aarch64-linux-gnu-clang"
|
|
|
echo " 安装: sudo apt install clang 或 sudo apt install clang-18"
|
|
|
exit 1
|
|
|
fi
|
|
|
|
|
|
# 检测 clang 是否需要 --target 标志
|
|
|
CLANG_FLAGS="-std=gnu89" # SysY 依赖隐式函数声明(starttime/getint 等)
|
|
|
if [[ "$CLANG" != "aarch64-linux-gnu-clang" ]]; then
|
|
|
# 系统 clang,需要指定交叉编译 target
|
|
|
CLANG_FLAGS="$CLANG_FLAGS --target=$CLANG_TARGET"
|
|
|
fi
|
|
|
|
|
|
command -v "$QEMU" >/dev/null 2>&1 || { echo "未找到 $QEMU"; exit 1; }
|
|
|
[[ -f "$RUNTIME_SRC" ]] || { echo "未找到 $RUNTIME_SRC"; exit 1; }
|
|
|
[[ -f "$COMPILER" ]] || { echo "未找到编译器 $COMPILER,请先构建"; exit 1; }
|
|
|
|
|
|
echo "LLVM 工具链: $CLANG $CLANG_FLAGS"
|
|
|
|
|
|
# 编译运行时库
|
|
|
mkdir -p "$(dirname "$RUNTIME_OBJ")"
|
|
|
if [[ ! -f "$RUNTIME_OBJ" || "$RUNTIME_SRC" -nt "$RUNTIME_OBJ" ]]; then
|
|
|
echo "编译运行时库 $RUNTIME_SRC → $RUNTIME_OBJ ..."
|
|
|
$CLANG $CLANG_FLAGS -O2 -c "$RUNTIME_SRC" -o "$RUNTIME_OBJ"
|
|
|
fi
|
|
|
|
|
|
# ============================================================
|
|
|
# SysY → C 预处理(处理 clang 不支持的 SysY 语法)
|
|
|
# ============================================================
|
|
|
preprocess_for_clang() {
|
|
|
local src="$1"
|
|
|
local dst="$2"
|
|
|
python3 -c "
|
|
|
import re, sys
|
|
|
with open('$src') as f:
|
|
|
content = f.read()
|
|
|
# const int X = V → #define X V (SysY 全局常量,C 不支持作为数组大小)
|
|
|
content = re.sub(r'^const int (\w+) = ([^;]+);', r'#define \1 \2', content, flags=re.MULTILINE)
|
|
|
with open('$dst', 'w') as f:
|
|
|
f.write(content)
|
|
|
"
|
|
|
}
|
|
|
|
|
|
# ============================================================
|
|
|
# 规范化比较
|
|
|
# ============================================================
|
|
|
canon_compare() {
|
|
|
local expected="$1" actual="$2"
|
|
|
diff -q \
|
|
|
<(sed 's/\r$//; s/[[:space:]]*$//' "$expected" \
|
|
|
| awk '{lines[NR]=$0} END{last=NR; while(last>0&&lines[last]=="")last--; for(i=1;i<=last;i++)print lines[i]}') \
|
|
|
<(sed 's/\r$//; s/[[:space:]]*$//' "$actual" \
|
|
|
| awk '{lines[NR]=$0} END{last=NR; while(last>0&&lines[last]=="")last--; for(i=1;i<=last;i++)print lines[i]}') \
|
|
|
> /dev/null 2>&1
|
|
|
}
|
|
|
|
|
|
# 统计 AArch64 汇编中的实际指令行数(排除伪指令、标签、空行)
|
|
|
count_insn() {
|
|
|
local asm="$1"
|
|
|
grep -cE '^[[:space:]]+[a-z]' "$asm" 2>/dev/null || echo 0
|
|
|
}
|
|
|
|
|
|
# ============================================================
|
|
|
# 收集用例
|
|
|
# ============================================================
|
|
|
collect_cases() {
|
|
|
local cats=("$@")
|
|
|
local cases=()
|
|
|
for cat in "${cats[@]}"; do
|
|
|
local dir="$TEST_ROOT/$cat"
|
|
|
[[ -d "$dir" ]] || continue
|
|
|
for sy in "$dir"/*.sy; do
|
|
|
[[ -f "$sy" ]] || continue
|
|
|
cases+=("$sy")
|
|
|
done
|
|
|
done
|
|
|
printf '%s\n' "${cases[@]}" | sort
|
|
|
}
|
|
|
|
|
|
load_cases() {
|
|
|
local cats=("$@")
|
|
|
CASES=()
|
|
|
while IFS= read -r line; do
|
|
|
CASES+=("$line")
|
|
|
done < <(collect_cases "${cats[@]}")
|
|
|
|
|
|
if [[ "$MAX_CASES" -gt 0 && "$MAX_CASES" -lt "${#CASES[@]}" ]]; then
|
|
|
CASES=("${CASES[@]:0:$MAX_CASES}")
|
|
|
fi
|
|
|
}
|
|
|
|
|
|
# ============================================================
|
|
|
# 生成 LLVM 正确性基线(并行化)
|
|
|
# ============================================================
|
|
|
run_baseline_worker() {
|
|
|
local idx="$1" sy="$2"
|
|
|
local result_file="$3"
|
|
|
|
|
|
local dir=$(dirname "$sy")
|
|
|
local cat=$(basename "$dir")
|
|
|
local base=$(basename "$sy")
|
|
|
local stem=${base%.sy}
|
|
|
local out_dir="$LLVM_BASELINE_DIR/$cat"
|
|
|
local exe="$out_dir/$stem"
|
|
|
local actual_file="$out_dir/$stem.actual.out"
|
|
|
local stdin_file="$dir/$stem.in"
|
|
|
|
|
|
mkdir -p "$out_dir"
|
|
|
|
|
|
local clang_src=$(mktemp /tmp/clang_baseline_XXXX.sy)
|
|
|
preprocess_for_clang "$sy" "$clang_src"
|
|
|
|
|
|
local status="FAIL"
|
|
|
if $CLANG $CLANG_FLAGS -x c "$clang_src" -x none "$RUNTIME_OBJ" -static -o "$exe" -lm 2>/dev/null; then
|
|
|
rm -f "$clang_src"
|
|
|
local exit_code=0
|
|
|
set +e
|
|
|
if [[ -f "$stdin_file" ]]; then
|
|
|
timeout --signal=KILL 60 "$QEMU" "$exe" < "$stdin_file" > "$out_dir/$stem.stdout" 2>/dev/null || exit_code=$?
|
|
|
else
|
|
|
timeout --signal=KILL 60 "$QEMU" "$exe" < /dev/null > "$out_dir/$stem.stdout" 2>/dev/null || exit_code=$?
|
|
|
fi
|
|
|
set -e
|
|
|
|
|
|
{
|
|
|
cat "$out_dir/$stem.stdout"
|
|
|
if [[ -s "$out_dir/$stem.stdout" ]] && (( $(tail -c 1 "$out_dir/$stem.stdout" | wc -l) == 0 )); then
|
|
|
printf '\n'
|
|
|
fi
|
|
|
printf '%s\n' "$exit_code"
|
|
|
} > "$actual_file"
|
|
|
status="OK"
|
|
|
else
|
|
|
rm -f "$clang_src"
|
|
|
fi
|
|
|
|
|
|
printf 'STATUS=%s\nNAME=%s\n' "$status" "$stem" > "$result_file"
|
|
|
}
|
|
|
|
|
|
run_baseline() {
|
|
|
load_cases "${CORRECTNESS_CATS[@]}"
|
|
|
echo ""
|
|
|
echo "========== 生成 LLVM 正确性基线(${#CASES[@]} 用例)=========="
|
|
|
|
|
|
local res_dir="$LLVM_BASELINE_DIR/.results"
|
|
|
mkdir -p "$res_dir"
|
|
|
|
|
|
if [[ $JOBS -gt 1 && ${#CASES[@]} -gt 1 ]]; then
|
|
|
export LLVM_BASELINE_DIR RUNTIME_OBJ CLANG CLANG_FLAGS QEMU
|
|
|
export -f run_baseline_worker preprocess_for_clang
|
|
|
|
|
|
declare -a QUEUE=()
|
|
|
for i in "${!CASES[@]}"; do
|
|
|
QUEUE+=("$i|${CASES[$i]}")
|
|
|
done
|
|
|
|
|
|
printf '%s\n' "${QUEUE[@]}" | xargs -P "$JOBS" -L 1 bash -c '
|
|
|
IFS="|" read -r idx sy <<< "$1"
|
|
|
run_baseline_worker "$idx" "$sy" "'"$res_dir"'/$idx"
|
|
|
' _
|
|
|
else
|
|
|
for i in "${!CASES[@]}"; do
|
|
|
run_baseline_worker "$i" "${CASES[$i]}" "$res_dir/$i"
|
|
|
done
|
|
|
fi
|
|
|
|
|
|
# 汇总
|
|
|
local total=0 pass=0 fail=0
|
|
|
for i in "${!CASES[@]}"; do
|
|
|
total=$((total + 1))
|
|
|
if [[ -f "$res_dir/$i" ]]; then
|
|
|
local status name
|
|
|
status=$(grep '^STATUS=' "$res_dir/$i" | cut -d= -f2)
|
|
|
name=$(grep '^NAME=' "$res_dir/$i" | cut -d= -f2)
|
|
|
if [[ "$status" == "OK" ]]; then
|
|
|
pass=$((pass + 1))
|
|
|
printf " [${GREEN}OK${NC}] %-35s (%d/%d)\r" "$name" "$total" "${#CASES[@]}"
|
|
|
else
|
|
|
fail=$((fail + 1))
|
|
|
echo -e " [${RED}FAIL${NC}] $name (clang 编译失败)"
|
|
|
fi
|
|
|
else
|
|
|
fail=$((fail + 1))
|
|
|
fi
|
|
|
done
|
|
|
|
|
|
rm -rf "$res_dir"
|
|
|
printf '\n'
|
|
|
echo "基线完成: $pass/$total 成功"
|
|
|
if [[ $fail -gt 0 ]]; then
|
|
|
echo -e " ${YELLOW}$fail 个 clang 编译失败(可能使用了 clang 不支持的 SysY 语法)${NC}"
|
|
|
fi
|
|
|
}
|
|
|
|
|
|
# ============================================================
|
|
|
# 正确性差分对比
|
|
|
# ============================================================
|
|
|
run_diff() {
|
|
|
load_cases "${CORRECTNESS_CATS[@]}"
|
|
|
echo ""
|
|
|
echo "========== 正确性差分对比(${#CASES[@]} 用例)=========="
|
|
|
|
|
|
local total=0 match=0 mismatch=0 skip=0
|
|
|
|
|
|
for sy in "${CASES[@]}"; do
|
|
|
total=$((total + 1))
|
|
|
|
|
|
local dir=$(dirname "$sy")
|
|
|
local cat=$(basename "$dir")
|
|
|
local base=$(basename "$sy")
|
|
|
local stem=${base%.sy}
|
|
|
|
|
|
local compiler_out="$RESULTS_ROOT/$cat/$stem.actual.out"
|
|
|
local llvm_out="$LLVM_BASELINE_DIR/$cat/$stem.actual.out"
|
|
|
|
|
|
if [[ ! -f "$compiler_out" ]]; then
|
|
|
echo -e " [${YELLOW}SKIP${NC}] $stem (无编译器输出,先跑 2026test.sh)"
|
|
|
skip=$((skip + 1))
|
|
|
continue
|
|
|
fi
|
|
|
|
|
|
if [[ ! -f "$llvm_out" ]]; then
|
|
|
echo -e " [${YELLOW}SKIP${NC}] $stem (无 LLVM 基线,先跑 --baseline)"
|
|
|
skip=$((skip + 1))
|
|
|
continue
|
|
|
fi
|
|
|
|
|
|
if canon_compare "$compiler_out" "$llvm_out"; then
|
|
|
match=$((match + 1))
|
|
|
printf " [${GREEN}MATCH${NC}] %-35s (%d/%d)\r" "$stem" "$total" "${#CASES[@]}"
|
|
|
else
|
|
|
mismatch=$((mismatch + 1))
|
|
|
printf '\n'
|
|
|
echo -e " [${RED}MISMATCH${NC}] $stem"
|
|
|
echo " --- 编译器输出 ---"
|
|
|
cat "$compiler_out" | head -20 | sed 's/^/ | /'
|
|
|
echo " --- clang 输出 ---"
|
|
|
cat "$llvm_out" | head -20 | sed 's/^/ | /'
|
|
|
echo " --- diff ---"
|
|
|
diff -u <(cat "$compiler_out") <(cat "$llvm_out") | head -20 | sed 's/^/ | /' || true
|
|
|
echo ""
|
|
|
fi
|
|
|
done
|
|
|
|
|
|
printf '\n'
|
|
|
echo "========== 正确性差分结果 =========="
|
|
|
echo -e " 匹配: ${GREEN}$match${NC}"
|
|
|
echo -e " 不匹配: ${RED}$mismatch${NC}"
|
|
|
if [[ $skip -gt 0 ]]; then
|
|
|
echo -e " 跳过: ${YELLOW}$skip${NC}"
|
|
|
fi
|
|
|
|
|
|
if [[ $mismatch -eq 0 ]]; then
|
|
|
echo -e "\n${GREEN}全部匹配,编译器输出与 clang 一致${NC}"
|
|
|
fi
|
|
|
}
|
|
|
|
|
|
# ============================================================
|
|
|
# 性能对比(并行化)
|
|
|
# ============================================================
|
|
|
run_perf_worker() {
|
|
|
local idx="$1" sy="$2" result_file="$3"
|
|
|
|
|
|
local dir=$(dirname "$sy")
|
|
|
local cat=$(basename "$dir")
|
|
|
local base=$(basename "$sy")
|
|
|
local stem=${base%.sy}
|
|
|
|
|
|
local compiler_asm=$(mktemp /tmp/compiler_llvm_XXXX.s)
|
|
|
local llvm_asm=$(mktemp /tmp/llvm_XXXX.s)
|
|
|
|
|
|
# 编译器 + clang 并行编译
|
|
|
local comp_ok=true compiler_lines=0
|
|
|
local llvm_ok=true llvm_lines=0
|
|
|
|
|
|
timeout --signal=KILL 60 "$COMPILER" -S -O -o "$compiler_asm" "$sy" 2>/dev/null &
|
|
|
local comp_pid=$!
|
|
|
|
|
|
local clang_src=$(mktemp /tmp/clang_perf_XXXX.sy)
|
|
|
preprocess_for_clang "$sy" "$clang_src"
|
|
|
$CLANG $CLANG_FLAGS -x c -S "-O${LLVM_OPT_LEVEL}" -o "$llvm_asm" "$clang_src" 2>/dev/null &
|
|
|
local clang_pid=$!
|
|
|
|
|
|
wait $comp_pid 2>/dev/null || comp_ok=false
|
|
|
wait $clang_pid 2>/dev/null || llvm_ok=false
|
|
|
rm -f "$clang_src"
|
|
|
|
|
|
if $comp_ok && [[ -s "$compiler_asm" ]]; then
|
|
|
compiler_lines=$(count_insn "$compiler_asm")
|
|
|
else
|
|
|
comp_ok=false
|
|
|
fi
|
|
|
|
|
|
if $llvm_ok && [[ -s "$llvm_asm" ]]; then
|
|
|
llvm_lines=$(count_insn "$llvm_asm")
|
|
|
else
|
|
|
llvm_ok=false
|
|
|
fi
|
|
|
|
|
|
# 保存 LLVM 汇编
|
|
|
if [[ "$SAVE_ASM" == true && "$llvm_ok" == true ]]; then
|
|
|
local save_dir="$LLVM_ASM_DIR/${cat}/${LLVM_OPT_LEVEL}"
|
|
|
mkdir -p "$save_dir"
|
|
|
cp "$llvm_asm" "$save_dir/${stem}.s"
|
|
|
fi
|
|
|
|
|
|
rm -f "$compiler_asm" "$llvm_asm"
|
|
|
|
|
|
printf 'STATUS=%s\nSTEM=%s\nCAT=%s\nCOMPILER_LINES=%s\nLLVM_LINES=%s\n' \
|
|
|
"$(if $comp_ok && $llvm_ok; then echo "OK"; elif ! $comp_ok; then echo "COMP_FAIL"; else echo "LLVM_FAIL"; fi)" \
|
|
|
"$stem" "$cat" "$compiler_lines" "$llvm_lines" \
|
|
|
> "$result_file"
|
|
|
}
|
|
|
|
|
|
run_perf() {
|
|
|
load_cases "${PERF_CATS[@]}"
|
|
|
|
|
|
local llvm_opt="-O${LLVM_OPT_LEVEL}"
|
|
|
local llvm_label="clang ${llvm_opt}"
|
|
|
|
|
|
echo ""
|
|
|
echo "========== 性能对比:编译器 -O vs ${llvm_label}(${#CASES[@]} 用例)=========="
|
|
|
echo ""
|
|
|
|
|
|
local res_dir="$RESULTS_ROOT/.perf_llvm_results"
|
|
|
rm -rf "$res_dir"
|
|
|
mkdir -p "$res_dir"
|
|
|
|
|
|
# 并行或串行执行
|
|
|
if [[ $JOBS -gt 1 && ${#CASES[@]} -gt 1 ]]; then
|
|
|
export COMPILER CLANG CLANG_FLAGS LLVM_OPT_LEVEL SAVE_ASM LLVM_ASM_DIR
|
|
|
export -f run_perf_worker preprocess_for_clang count_insn
|
|
|
|
|
|
declare -a QUEUE=()
|
|
|
for i in "${!CASES[@]}"; do
|
|
|
QUEUE+=("$i|${CASES[$i]}")
|
|
|
done
|
|
|
|
|
|
printf '%s\n' "${QUEUE[@]}" | xargs -P "$JOBS" -L 1 bash -c '
|
|
|
IFS="|" read -r idx sy <<< "$1"
|
|
|
run_perf_worker "$idx" "$sy" "'"$res_dir"'/$idx"
|
|
|
' _
|
|
|
else
|
|
|
for i in "${!CASES[@]}"; do
|
|
|
run_perf_worker "$i" "${CASES[$i]}" "$res_dir/$i"
|
|
|
done
|
|
|
fi
|
|
|
|
|
|
# 汇总
|
|
|
local total=${#CASES[@]}
|
|
|
local compiler_fail=0 llvm_fail=0
|
|
|
local -a results=() # "stem|compiler_lines|llvm_lines|ratio"
|
|
|
|
|
|
for i in "${!CASES[@]}"; do
|
|
|
local rf="$res_dir/$i"
|
|
|
|
|
|
if [[ ! -f "$rf" ]]; then
|
|
|
compiler_fail=$((compiler_fail + 1))
|
|
|
echo -e " [${RED}FAIL${NC}] $(basename "${CASES[$i]}" .sy) 超时/崩溃"
|
|
|
continue
|
|
|
fi
|
|
|
|
|
|
local status stem cat cl ll
|
|
|
status=$(grep '^STATUS=' "$rf" | cut -d= -f2)
|
|
|
stem=$(grep '^STEM=' "$rf" | cut -d= -f2)
|
|
|
cl=$(grep '^COMPILER_LINES=' "$rf" | cut -d= -f2)
|
|
|
ll=$(grep '^LLVM_LINES=' "$rf" | cut -d= -f2)
|
|
|
|
|
|
case "$status" in
|
|
|
COMP_FAIL)
|
|
|
compiler_fail=$((compiler_fail + 1))
|
|
|
echo -e " [${RED}FAIL${NC}] $stem 编译器编译失败"
|
|
|
;;
|
|
|
LLVM_FAIL)
|
|
|
llvm_fail=$((llvm_fail + 1))
|
|
|
echo -e " [${YELLOW}SKIP${NC}] $stem clang 编译失败"
|
|
|
;;
|
|
|
OK)
|
|
|
local ratio flag=""
|
|
|
if [[ "$ll" -eq 0 ]]; then
|
|
|
ratio="N/A"
|
|
|
else
|
|
|
ratio=$(awk -v c="$cl" -v l="$ll" 'BEGIN { printf "%.2f", c/l }')
|
|
|
fi
|
|
|
|
|
|
if [[ "$ratio" != "N/A" ]]; then
|
|
|
if awk -v r="$ratio" 'BEGIN { exit(r <= 1.5 ? 0 : 1) }'; then
|
|
|
flag="${GREEN}"
|
|
|
elif awk -v r="$ratio" 'BEGIN { exit(r <= 3.0 ? 0 : 1) }'; then
|
|
|
flag="${YELLOW}"
|
|
|
else
|
|
|
flag="${RED}"
|
|
|
fi
|
|
|
fi
|
|
|
|
|
|
printf " %-35s 编译器:%5d clang:%5d ${flag}${BOLD}%sx${NC}\n" \
|
|
|
"$stem" "$cl" "$ll" "$ratio"
|
|
|
|
|
|
results+=("$stem|$cl|$ll|$ratio")
|
|
|
;;
|
|
|
esac
|
|
|
done
|
|
|
|
|
|
rm -rf "$res_dir"
|
|
|
|
|
|
# 汇总统计
|
|
|
printf '\n'
|
|
|
echo "========== 性能对比汇总 =========="
|
|
|
echo ""
|
|
|
|
|
|
local valid=${#results[@]}
|
|
|
|
|
|
if [[ $valid -eq 0 ]]; then
|
|
|
echo "无有效用例"
|
|
|
return
|
|
|
fi
|
|
|
|
|
|
# TOP 5 差距最大(编译器劣于 clang)
|
|
|
echo "--- 差距最大 TOP 5(优先优化,编译器/clang > 1.0)---"
|
|
|
printf '%s\n' "${results[@]}" | awk -F'|' '$4+0 > 1.0' | sort -t'|' -k4 -rn | head -5 | while IFS='|' read -r stem cl ll ratio; do
|
|
|
local flag="${RED}"
|
|
|
if awk -v r="$ratio" 'BEGIN { exit(r <= 1.5 ? 0 : 1) }'; then flag="${GREEN}"
|
|
|
elif awk -v r="$ratio" 'BEGIN { exit(r <= 3.0 ? 0 : 1) }'; then flag="${YELLOW}"; fi
|
|
|
printf " %-35s 编译器:%5d clang:%5d ${flag}${BOLD}%sx${NC}\n" "$stem" "$cl" "$ll" "$ratio"
|
|
|
done
|
|
|
|
|
|
echo ""
|
|
|
echo "--- 编译器优于 clang TOP 5(编译器/clang < 1.0)---"
|
|
|
printf '%s\n' "${results[@]}" | awk -F'|' '$4+0 < 1.0' | sort -t'|' -k4 -n | head -5 | while IFS='|' read -r stem cl ll ratio; do
|
|
|
printf " %-35s 编译器:%5d clang:%5d ${GREEN}${BOLD}%sx${NC}\n" "$stem" "$cl" "$ll" "$ratio"
|
|
|
done
|
|
|
|
|
|
echo ""
|
|
|
echo "--- 差距最小 TOP 5(最接近 1.0x)---"
|
|
|
printf '%s\n' "${results[@]}" | awk -F'|' '
|
|
|
{
|
|
|
ratio = $4 + 0
|
|
|
dist = (ratio > 1.0) ? (ratio - 1.0) : (1.0 - ratio)
|
|
|
printf "%s|%s|%s|%s|%f\n", $1, $2, $3, $4, dist
|
|
|
}' | sort -t'|' -k5 -n | head -5 | while IFS='|' read -r stem cl ll ratio dist; do
|
|
|
printf " %-35s 编译器:%5d clang:%5d ${GREEN}${BOLD}%sx${NC}\n" "$stem" "$cl" "$ll" "$ratio"
|
|
|
done
|
|
|
|
|
|
echo ""
|
|
|
|
|
|
# 编译器指令数总计
|
|
|
local total_compiler=0 total_llvm=0
|
|
|
for r in "${results[@]}"; do
|
|
|
local cl=$(echo "$r" | cut -d'|' -f2)
|
|
|
local ll=$(echo "$r" | cut -d'|' -f3)
|
|
|
total_compiler=$((total_compiler + cl))
|
|
|
total_llvm=$((total_llvm + ll))
|
|
|
done
|
|
|
|
|
|
# 几何平均
|
|
|
local geo_mean
|
|
|
geo_mean=$(printf '%s\n' "${results[@]}" | awk -F'|' '
|
|
|
BEGIN { sum = 0; n = 0 }
|
|
|
{
|
|
|
ratio = $4 + 0
|
|
|
if (ratio > 0) { sum += log(ratio); n++ }
|
|
|
}
|
|
|
END {
|
|
|
if (n > 0) printf "%.2f", exp(sum / n)
|
|
|
else print "N/A"
|
|
|
}')
|
|
|
|
|
|
echo "--- 整体指标 ---"
|
|
|
printf " 编译器总指令数: %d\n" "$total_compiler"
|
|
|
printf " %s 总指令数: %d\n" "$llvm_label" "$total_llvm"
|
|
|
printf " 总指令数比: ${BOLD}%.2fx${NC}\n" "$(awk -v c="$total_compiler" -v l="$total_llvm" 'BEGIN { printf "%.2f", c/l }')"
|
|
|
printf " 几何平均比: ${BOLD}%sx${NC} (越接近 1.0 越接近 %s)\n" "$geo_mean" "$llvm_label"
|
|
|
printf " 有效用例: %d\n" "$valid"
|
|
|
if [[ $compiler_fail -gt 0 ]]; then
|
|
|
printf " 编译器失败: %d\n" "$compiler_fail"
|
|
|
fi
|
|
|
if [[ $llvm_fail -gt 0 ]]; then
|
|
|
printf " clang 失败: %d\n" "$llvm_fail"
|
|
|
fi
|
|
|
|
|
|
echo ""
|
|
|
|
|
|
# 性能分估算
|
|
|
local target_ratio="1.11"
|
|
|
if awk -v gm="$geo_mean" -v tr="$target_ratio" 'BEGIN { exit(gm <= tr ? 0 : 1) }'; then
|
|
|
echo -e "${GREEN}几何平均比 ${geo_mean}x ≤ ${target_ratio}x,性能分预估 ≥90(一级水平)${NC}"
|
|
|
else
|
|
|
local perf_est
|
|
|
perf_est=$(awk -v gm="$geo_mean" 'BEGIN { printf "%.0f", 100 / gm }')
|
|
|
echo -e "${YELLOW}几何平均比 ${geo_mean}x > ${target_ratio}x,性能分预估 ≈${perf_est}(一级需 ≥90)${NC}"
|
|
|
fi
|
|
|
|
|
|
if [[ "$SAVE_ASM" == true ]]; then
|
|
|
echo ""
|
|
|
echo -e "${CYAN}clang 汇编已保存到 $LLVM_ASM_DIR/${LLVM_OPT_LEVEL}/${NC}"
|
|
|
echo " 可对比分析 clang/LLVM 的优化策略(循环展开、向量化、指令调度等)"
|
|
|
fi
|
|
|
|
|
|
# 导出报告
|
|
|
if [[ -n "$REPORT_FILE" ]]; then
|
|
|
_export_csv "$llvm_label" "$LLVM_OPT_LEVEL" "$total_compiler" "$total_llvm" "$geo_mean" "$valid" "$compiler_fail" "$llvm_fail"
|
|
|
echo ""
|
|
|
echo -e "${CYAN}CSV 报告已导出到 $REPORT_FILE${NC}"
|
|
|
fi
|
|
|
|
|
|
if [[ -n "$JSON_FILE" ]]; then
|
|
|
_export_json "$llvm_label" "$LLVM_OPT_LEVEL" "$total_compiler" "$total_llvm" "$geo_mean" "$valid" "$compiler_fail" "$llvm_fail"
|
|
|
echo ""
|
|
|
echo -e "${CYAN}JSON 报告已导出到 $JSON_FILE${NC}"
|
|
|
fi
|
|
|
}
|
|
|
|
|
|
# ============================================================
|
|
|
# 导出函数
|
|
|
# ============================================================
|
|
|
_export_csv() {
|
|
|
local llvm_label="$1" llvm_opt="$2"
|
|
|
local total_compiler="$3" total_llvm="$4" geo_mean="$5"
|
|
|
local valid="$6" compiler_fail="$7" llvm_fail="$8"
|
|
|
|
|
|
local now=$(date '+%Y-%m-%d %H:%M:%S')
|
|
|
local perf_est=$(awk -v gm="$geo_mean" 'BEGIN { printf "%.0f", 100 / gm }')
|
|
|
|
|
|
{
|
|
|
echo "test_case,category,compiler_insn,clang_insn,ratio,winner"
|
|
|
printf '%s\n' "${results[@]}" | sort -t'|' -k4 -rn | while IFS='|' read -r stem cl ll ratio; do
|
|
|
local cat=""
|
|
|
for c in "${PERF_CATS[@]}"; do
|
|
|
[[ -f "$TEST_ROOT/$c/${stem}.sy" ]] && { cat="$c"; break; }
|
|
|
done
|
|
|
local winner="clang"
|
|
|
if awk -v r="$ratio" 'BEGIN { exit(r < 1.0 ? 0 : 1) }'; then winner="compiler"; fi
|
|
|
if [[ "$ratio" == "1.00" ]]; then winner="tie"; fi
|
|
|
echo "${stem},${cat},${cl},${ll},${ratio},${winner}"
|
|
|
done
|
|
|
echo ""
|
|
|
echo "# 汇总,,,"
|
|
|
echo "生成时间,,${now}"
|
|
|
echo "clang优化级别,,${llvm_opt}"
|
|
|
echo "有效用例,,${valid}"
|
|
|
echo "编译器总指令数,,${total_compiler}"
|
|
|
echo "clang总指令数,,${total_llvm}"
|
|
|
echo "总指令数比,,${total_compiler}/${total_llvm}"
|
|
|
echo "几何平均比,,${geo_mean}"
|
|
|
echo "性能分预估,,${perf_est}"
|
|
|
} > "$REPORT_FILE"
|
|
|
}
|
|
|
|
|
|
_export_json() {
|
|
|
local llvm_label="$1" llvm_opt="$2"
|
|
|
local total_compiler="$3" total_llvm="$4" geo_mean="$5"
|
|
|
local valid="$6" compiler_fail="$7" llvm_fail="$8"
|
|
|
|
|
|
local now=$(date -Iseconds)
|
|
|
local perf_est=$(awk -v gm="$geo_mean" 'BEGIN { printf "%.0f", 100 / gm }')
|
|
|
|
|
|
python3 - "$JSON_FILE" "$now" "$llvm_opt" "$valid" \
|
|
|
"$total_compiler" "$total_llvm" "$geo_mean" "$perf_est" \
|
|
|
"$compiler_fail" "$llvm_fail" \
|
|
|
"${results[@]}" <<'PY'
|
|
|
import sys, json
|
|
|
|
|
|
outfile = sys.argv[1]
|
|
|
report = {
|
|
|
"generated_at": sys.argv[2],
|
|
|
"clang_opt_level": str(sys.argv[3]),
|
|
|
"summary": {
|
|
|
"valid_cases": int(sys.argv[4]),
|
|
|
"total_compiler_insn": int(sys.argv[5]),
|
|
|
"total_clang_insn": int(sys.argv[6]),
|
|
|
"geometric_mean_ratio": float(sys.argv[7]),
|
|
|
"estimated_performance_score": float(sys.argv[8]),
|
|
|
"compiler_fail": int(sys.argv[9]),
|
|
|
"clang_fail": int(sys.argv[10]),
|
|
|
},
|
|
|
"cases": []
|
|
|
}
|
|
|
|
|
|
for r in sys.argv[11:]:
|
|
|
stem, cl, ll, ratio = r.split('|')
|
|
|
rv = float(ratio)
|
|
|
winner = "compiler" if rv < 1.0 else ("tie" if rv == 1.0 else "clang")
|
|
|
report["cases"].append({
|
|
|
"test_case": stem,
|
|
|
"compiler_insn": int(cl),
|
|
|
"clang_insn": int(ll),
|
|
|
"ratio": rv,
|
|
|
"winner": winner
|
|
|
})
|
|
|
|
|
|
with open(outfile, 'w') as f:
|
|
|
json.dump(report, f, ensure_ascii=False, indent=2)
|
|
|
PY
|
|
|
}
|
|
|
|
|
|
# ============================================================
|
|
|
# 执行
|
|
|
# ============================================================
|
|
|
if [[ "$DO_BASELINE" == true ]]; then
|
|
|
run_baseline
|
|
|
fi
|
|
|
|
|
|
if [[ "$DO_DIFF" == true ]]; then
|
|
|
run_diff
|
|
|
fi
|
|
|
|
|
|
if [[ "$DO_PERF" == true ]]; then
|
|
|
run_perf
|
|
|
fi
|