diff --git a/.claude/hooks/block-destructive.sh b/.claude/hooks/block-destructive.sh
new file mode 100755
index 00000000..28695fdd
--- /dev/null
+++ b/.claude/hooks/block-destructive.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+# 阻止危险操作：rm -rf /, git reset --hard, git push --force 到 master
+set -euo pipefail
+
+# 从 stdin 读取 hook 输入（JSON）
+input=$(cat)
+
+# 提取命令
+command=$(echo "$input" | python3 -c "import sys,json; print(json.load(sys.stdin).get('tool_input',{}).get('command',''))" 2>/dev/null || echo "")
+
+# 检测危险模式
+# 1. rm -rf / 或 rm -rf /* 等真正危险的根目录操作
+if echo "$command" | grep -qE 'rm\s+-rf\s+/(\*|\s|$)'; then
+  echo '{"permissionDecision": "deny", "reason": "危险操作：rm -rf / 被阻止。如需删除构建产物，请使用更精确的路径。"}'
+  exit 0
+fi
+
+# 2. git reset --hard（防止丢失未提交工作）
+if echo "$command" | grep -qE 'git\s+reset\s+--hard'; then
+  echo '{"permissionDecision": "deny", "reason": "危险操作：git reset --hard 被阻止。请先确认所有改动已提交或使用 git stash。"}'
+  exit 0
+fi
+
+# 3. git push --force/-f 到 master/main
+if echo "$command" | grep -qE 'git\s+push\s+.*(--force|-f).*(master|main)'; then
+  echo '{"permissionDecision": "deny", "reason": "禁止 force push 到 master/main 分支。请使用正常 push 或创建新分支。"}'
+  exit 0
+fi
+
+# 通过
+echo '{"permissionDecision": "allow"}'
diff --git a/.claude/hooks/spec-reminder.sh b/.claude/hooks/spec-reminder.sh
new file mode 100755
index 00000000..2f008613
--- /dev/null
+++ b/.claude/hooks/spec-reminder.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+# SessionStart 钩子：注入关键规范提醒到上下文
+# 输出 JSON 带 context 字段，会被注入到新会话的上下文中
+cat << 'HEREDOC_END'
+{
+  "continue": true,
+  "hookSpecificOutput": {
+    "hookEventName": "SessionStart",
+    "additionalContext": "## 本次会话开发规范提醒\n\n遵循 CLAUDE.md 中的七个模块规范。关键要点：\n\n1. **安全第一**：任何优化必须通用，不得针对特定测试用例；段错误高危区（寄存器分配合并后重算degree、大栈帧用movz/movk、大函数限制spill轮次≤3）\n2. **性能北极星**：假设→测量基线→实现→正确性验证→性能测量→决策。修改前跑 ./count_asm.sh\n3. **快门禁（每次commit前）**：./2026test.sh -c functional -x && ./2026test.sh -c h_functional -x\n4. **MCP铁律**：查符号用codegraph，搜字面量才用grep；改代码前先用codegraph_impact评估影响\n5. **小diff原则**：每次变更≤200行，大改动分步进行\n"
+  }
+}
+HEREDOC_END
diff --git a/.claude/settings.json b/.claude/settings.json
new file mode 100644
index 00000000..f6981373
--- /dev/null
+++ b/.claude/settings.json
@@ -0,0 +1,28 @@
+{
+  "hooks": {
+    "SessionStart": [
+      {
+        "matcher": "",
+        "type": "command",
+        "command": "bash .claude/hooks/spec-reminder.sh",
+        "timeout": 5000
+      }
+    ],
+    "PreToolUse": [
+      {
+        "matcher": "Bash",
+        "type": "command",
+        "command": "bash .claude/hooks/block-destructive.sh",
+        "timeout": 5000
+      }
+    ],
+    "Stop": [
+      {
+        "matcher": "",
+        "type": "prompt",
+        "prompt": "在结束本次会话之前，请确认以下事项：\n1. 快门禁是否通过？（./2026test.sh -c functional -x && ./2026test.sh -c h_functional -x）\n2. 指令数是否有退化？（如有优化改动，跑 ./count_asm.sh）\n3. 是否有未提交的改动需要处理？\n4. 如有重要经验教训，是否已写入 memory？\n\n请简要回答每个问题（是/否/不适用），然后正常结束。",
+        "timeout": 15000
+      }
+    ]
+  }
+}
diff --git a/CLAUDE.md b/CLAUDE.md
index cbd782bc..2ee16d99 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -2,133 +2,322 @@
 
 This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
 
-## Project Overview
+## 语言规范
 
-SysY 编译器课程实验 — a progressive compiler (Lab1–Lab6) for the SysY language (a C subset) targeting ARM64/AArch64. Built with C++17, CMake, and ANTLR4.
+主要使用中文交流、写注释、写文档和 commit message。代码标识符（变量名、函数名、类名）和文件名使用英文。
 
-## Build Commands
+## 项目概述
+
+SysY → ARM64/AArch64 编译器，CMake + C++17 + ANTLR 4.13.2。2026 编译系统设计赛（华为毕昇杯）ARM 赛道参赛项目。
+
+## 构建
 
-### Prerequisites
 ```bash
-# Install dependencies (Ubuntu 22.04 / WSL)
-sudo apt install -y build-essential cmake git openjdk-11-jre llvm clang gcc-aarch64-linux-gnu qemu-user
+# 首次：生成 ANTLR Lexer/Parser
+mkdir -p build/generated/antlr4
+java -jar third_party/antlr-4.13.2-complete.jar -Dlanguage=Cpp -visitor -no-listener \
+  -Xexact-output-dir -o build/generated/antlr4 src/antlr4/SysY.g4
+
+# 全量构建
+cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DCOMPILER_PARSE_ONLY=OFF
+cmake --build build -j "$(nproc)"
 ```
 
-### Generate ANTLR Lexer/Parser (required before first build)
+可执行文件：`./build/bin/compiler`
+
+## 编译器 CLI（比赛格式）
+
 ```bash
-mkdir -p build/generated/antlr4
-java -jar third_party/antlr-4.13.2-complete.jar \
-  -Dlanguage=Cpp -visitor -no-listener -Xexact-output-dir \
-  -o build/generated/antlr4 src/antlr4/SysY.g4
+compiler -S -o output.s input.sy       # 汇编输出（比赛标准）
+compiler -S -o output.s input.sy -O    # 带优化
+compiler --emit-ir input.sy            # 打印 IR
+compiler --emit-parse-tree input.sy    # 打印语法树
+```
+
+## 架构
+
+编译管线：`SysY → [frontend] ANTLR 语法树 → [sem] 语义分析 → [irgen] IR 生成 → [ir/passes] IR 优化 → [mir/Lowering] MIR 降级 → [mir/RegAlloc] 寄存器分配 → [mir/FrameLowering] 栈帧 → [mir/peephole] 窥孔 → [mir/AsmPrinter] AArch64 汇编`
+
+源码目录：
+- `src/frontend/` — ANTLR 驱动、语法树打印
+- `src/sem/` — Sema、SymbolTable、ConstEval
+- `src/irgen/` — 语法树 → LLVM IR 翻译
+- `src/ir/` — IR 数据结构（Module→Function→BasicBlock→Instruction），passes/ 含 Mem2Reg/CFGSimplify/ConstFold/ConstProp/DCE/CSE/LICM
+- `src/mir/` — 机器 IR（MachineModule→MachineFunction→MachineBasicBlock→MachineInstr），Lowering/RegAlloc/FrameLowering/AsmPrinter/Peephole
+- `src/include/` — 各模块头文件
+
+关键设计：IR 类型支持 void/i1/i32/float/i32*/float*；MIR 操作数为 PhysReg/VReg/Imm/FrameIndex/Label/Symbol；`-O` 触发所有 IR pass；GP 可分配集含 x16/x17；xzr/wzr 为零寄存器，sp 为栈指针。
+
+---
+
+# 一、安全第一：编译器优化的生存法则
+
+## 竞赛红线（零容忍，违反即取消成绩）
+
+1. **禁止投机优化**：不得识别特定函数名、字符串、输入特征来激活优化
+2. **禁止硬编码结果**：不得对计算结果预设答案
+3. **禁止依赖 UB**：不得假设数组不越界、除法不溢出等
+4. **优化必须通用**：对所有合法 SysY2026 程序语义保持
+
+## 段错误预防（从历史故障中提炼的高危区）
+
+| 区域 | 常见根因 | 预防规则 |
+|------|----------|----------|
+| 寄存器分配 | 合并后颜色/degree 不一致 | 合并后总是重算 degree；不修改遍历中的容器 |
+| 栈帧 | 大偏移量（>12KB）ldr/str 溢出 | 大栈帧必须用 movz/movk 合成偏移 |
+| spill | 无限 spill 循环 | 大函数（>120 vregs）限制 spill 轮次 ≤3 |
+| 活跃合并 | 干涉图边不完整 | Briggs 保守测试：邻居 degree≥K 计数 < K |
+| 迭代器 | move_adj 自环导致失效 | 合并前检查 u != v，清理后验证 |
+
+## 优化安全性自检清单
+
+实现任何优化变换时必须确认：
+- [ ] 对所有合法 SysY2026 程序，语义是否保持？
+- [ ] 副作用顺序是否保持？（Load/Store/Call 不能重排跨越彼此）
+- [ ] 浮点语义是否保持？（不能随意重关联）
+- [ ] 是否有针对特定测试用例的投机判断？（有则违规）
+
+## 修改范围限制
+
+- 一次只改一个 pass 或一个模块
+- 寄存器分配和 IR 优化不能混在一次改动中
+- 改 MIR 层前先确认 IR 层正确性基线
+
+---
+
+# 二、性能北极星：指令数驱动的开发循环
+
+## 核心循环（不可跳步）
+
 ```
+假设 → 测量基线 → 实现 → 正确性验证 → 性能测量 → 决策
+  ↑                                                      |
+  └──────────── 回退或调整 ←─────────────────────────────┘
+```
+
+| 步骤 | 命令/动作 |
+|------|-----------|
+| 假设 | 写一句话：「改 X，预期指令数减少 Y 条」 |
+| 基线 | `./count_asm.sh` |
+| 实现 | 写代码 |
+| 正确性 | `./2026test.sh -c functional -x && ./2026test.sh -c h_functional -x` |
+| 性能 | `./count_asm.sh` |
+| 决策 | 对比基线，判断合并/调整/回退 |
+
+## 指令数作为核心指标
+
+指令数（`wc -l` 汇编）无测量噪声、可复现。目标硬件 Cortex-A53 为顺序核，指令数与运行时间相关性较高。指令数减少是积极信号但非绝对保证。
+
+## 性能退化门禁
+
+| 规模 | 动作 |
+|------|------|
+| 大面积退化（>5 用例） | **阻止合并**，分析根因 |
+| 少量退化（2-5 用例，<5%） | 标记关注，分析根因，记录到 commit |
+| 零散退化（1-2 用例，<2%） | 可容忍，注明到 commit |
+| 零退化 + 改善 | **理想**，更新基线，合并 |
+
+## 基线管理
+
+`指令数基线.md` 记录历史最低值，`count_asm.sh` 自动维护——新值更低时才更新。性能改善 commit 须附带基线更新。
+
+---
+
+# 三、AI 协作协议
+
+## 任务规模 → 流程映射
+
+| 规模 | 特征 | 流程 |
+|------|------|------|
+| 轻量 | 单文件、阈值调整、明显 bug（<30 分钟） | 直接改 → 快门禁 → commit |
+| 中量 | 跨文件、新 pass、算法改动（1-4 小时） | brainstorming → 计划 → 实现 → 快门禁 → 中门禁 |
+| 重量 | 寄存器分配重构、IR 框架（多日） | office-hours → GSD 全流程 → 全门禁 → extract-learnings |
+
+## 提示词模式
+
+- **先说为什么再说做什么**：告诉 AI 目标（「减少 spill 代码量」）而非操作（「修改 spillCost 函数」）
+- **先计划后代码**：要求 AI 先提变更计划，审查通过后再写代码。即使是「简单」改动
+- **显式约束前置**：一次性给出所有约束（红线、修改范围、语义安全要求）
+- **小 diff（≤200 行）**：大改动分步进行；大 diff 的审查成本超过生成收益
+
+## AI 不能做的事
+
+- **不能自行验证自己生成的代码**——必须通过编译器运行和测试脚本等外部工具验证
+- **不能决定核心算法设计**——可以建议和实现，最终设计由人审查
+- **不能跳过门禁**——任何改动都必须过测试
+- **AI 生成的测试不能作为唯一的正确性验证**——需要独立的外部 oracle（.out 对比）
+
+## DeepSeek 特化策略
+
+- **开启 thinking mode**：复杂推理任务使用 `reasoning_effort: max`
+- **利用上下文缓存**：同一文件反复读取几乎免费，大胆多次参考已有代码
+- **分割大 prompt**：DeepSeek 在独立小任务上表现最好
+- **C++ 代码 double-check**：DeepSeek 在系统编程上偏弱，指针/迭代器/内存操作需额外验证
+
+## MCP 使用铁律
+
+| 场景 | 工具 | 不要 |
+|------|------|------|
+| 查找符号 | `codegraph_search` | 不要 grep |
+| 调用关系 | `codegraph_callers/callees` | 不要手动 Read 追踪 |
+| 改动影响 | `codegraph_impact` | 不要猜测 |
+| 代码区探索 | `codegraph_explore`（一次） | 不要逐个 codegraph_node |
+| 字面量搜索 | `grep` | 不要用 codegraph 搜 |
+| PR 管理 | `gh create_pr/create_review` | 不要手动 |
+
+---
+
+# 四、三层门禁
+
+## 数据集优先级
+
+- **一级**：`2026test/`（竞赛官方，有 .out，每次必跑）
+- **二级**：`test_merged/`、`testdata2022/`、`testdata2024/`（历史回归，关键节点跑）
+
+## 快门禁（每次 commit 前，~2 分钟）
 
-### Lab1 (frontend only — parse tree printing)
 ```bash
-cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DCOMPILER_PARSE_ONLY=ON
-cmake --build build -j "$(nproc)"
-./build/bin/compiler --emit-parse-tree test/test_case/functional/simple_add.sy
+./2026test.sh -c functional -x && ./2026test.sh -c h_functional -x
 ```
 
-### Full build (all labs, including IR gen, optimization, and codegen)
+通过标准：0 失败。失败则阻止 commit。
+
+## 中门禁（每次 merge 前，~10 分钟）
+
 ```bash
-cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DCOMPILER_PARSE_ONLY=OFF
-cmake --build build -j "$(nproc)"
+./2026test.sh -c functional -x
+./2026test.sh -c h_functional -x
+./2026test.sh -c performance -x
+./count_asm.sh
 ```
 
-## Compiler Usage
+通过标准：0 功能失败 + 指令数无大面积退化。
+
+## 全门禁（关键节点，~30 分钟）
 
 ```bash
-# Competition format: compile to assembly
-./build/bin/compiler -S -o output.s input.sy
+./2026test.sh                    # 全量
+# 历史数据集回归
+for f in test_merged/**/*.sy; do
+  ./scripts/verify_asm.sh "$f" --run -O || echo "FAIL: $f"
+done
+```
+
+触发条件：寄存器分配重构、IR 框架改动、赛前验证。
+
+## 门禁纪律
+
+- 绝不跳过门禁——改动能通过门禁才是真的「完成」
+- 功能测试失败时绝不进入性能测量——先修正确性
+- 门禁失败后修复再跑——不允许「先合并后修复」
+
+---
+
+# 五、代码与 Git 规范
 
-# With optimization
-./build/bin/compiler -S -o output.s input.sy -O1
+## 命名
 
-# Emit IR
-./build/bin/compiler --emit-ir input.sy
+- 变量：`snake_case`；函数/类：`PascalCase`；成员变量：`snake_case_`
+- 标识符和文件名用英文
 
-# Single test: compile, link, run with QEMU, and compare output
-./scripts/verify_asm.sh test/test_case/functional/simple_add.sy --run
+## 中文规范
 
-# Same for IR path (uses llc + clang to compile/run)
-./scripts/verify_ir.sh test/test_case/functional/simple_add.sy --run
+注释、commit message、文档、错误信息用中文。IR/MIR 调试打印用英文。
+
+```cpp
+// 使用 Briggs 保守测试而非 George，因为 O(k) vs O(k^2)
+if (HasMovePair(u)) ReactivatePairs(u);
 ```
 
-### CLI Options
-| Flag | Effect |
-|------|--------|
-| `-S` | Emit assembly (default when no mode specified) |
-| `-o <file>` | Output file path |
-| `-O`, `-O1`, `-O2`, `-O3` | Optimization level |
-| `--emit-parse-tree` | Print ANTLR parse tree |
-| `--emit-ir` | Print LLVM-style IR |
-| `--emit-asm` | Print AArch64 assembly |
-| `-h`, `--help` | Show help |
+## Git
 
-## Testing
+格式：`<type>(<scope>): <中文简述>`。一 commit 一逻辑变更。不提交编译或测试失败的代码。功能分支开发，master 保护。
 
-### Main test harness
-```bash
-# Run all 2026test functional tests with optimization
-./2026test.sh
+---
+
+# 六、自进化机制
+
+## 触发条件
 
-# Functional tests only, max 10 cases, stop on first failure
-./2026test.sh -c functional -n 10 -x
+1. **故障驱动**：段错误、性能退化、测试失败 → 诊断根因 → 可预防则更新本规范
+2. **成功固化**：新优化方法被验证有效 → 记录模式
+3. **定期审查**：每两周 review 本规范，删除过时规则，强化有效规则
 
-# Without optimization
-./2026test.sh -O0
+## 操作流程
+
+```
+故障发生 → 根因分析（中文 ≤100 字）
+         → 可预防？
+            ├─ 是 → 更新 CLAUDE.md 或 memory
+            └─ 否 → 只记录
 ```
 
-### Legacy test scripts
-```bash
-./test1.sh   # Lab1: syntax tree
-./test2.sh   # Lab2: IR generation
-./test3.sh   # Lab3: assembly generation
-./test4.sh   # Lab4: scalar optimization
-./test5.sh   # Lab5: register allocation
-```
-
-## Architecture
-
-### Compiler Pipeline
-```
-SysY source (.sy) → ANTLR Lexer/Parser → AST (ANTLR parse tree)
-  → Sema (name resolution, type checking) → IR (LLVM-style, load/store form)
-  → IR Passes (Mem2Reg → LICM → ConstFold/Prop/DCE/CFG/CSE) → MIR (machine IR, AArch64)
-  → RegAlloc → FrameLowering → Peephole → AArch64 Assembly output
-```
-
-### Source Layout
-
-| Directory | Purpose |
-|-----------|---------|
-| `src/antlr4/SysY.g4` | ANTLR grammar for SysY language |
-| `src/frontend/` | ANTLR driver + syntax tree printer (Lab1) |
-| `src/sem/` | Semantic analysis: name binding, scope, type checking (Lab2 prep) |
-| `src/irgen/` | IR generation via ANTLR visitor: Decl, Exp, Stmt, Func (Lab2) |
-| `src/ir/` | LLVM-style IR: Value/User/Use hierarchy, Module/Function/BasicBlock, IRBuilder (Lab2) |
-| `src/ir/passes/` | Scalar optimizations (Lab4): Mem2Reg, ConstFold, ConstProp, DCE, CFGSimplify, CSE, LICM |
-| `src/ir/analysis/` | DominatorTree, LoopInfo |
-| `src/mir/` | Machine IR + AArch64 backend (Lab3, Lab5): Lowering (IR→MIR), RegAlloc, FrameLowering, AsmPrinter |
-| `src/mir/passes/` | MIR peephole pass |
-| `src/utils/` | CLI argument parsing, logging |
-| `src/include/` | **Build-time headers.** At build time CMake adds `src/include` as include path. |
-| `include/` | **Platform-provided headers.** Gitignored — supplied externally by grading platform. Mirrors `src/include/`. |
-| `sylib/` | SysY runtime library (sylib.c), linked into final executables |
-| `scripts/` | verify_asm.sh, verify_ir.sh — single-case test helpers |
-| `third_party/` | ANTLR jar + antlr4-runtime sources |
-| `test/test_case/` | Reference test cases with expected outputs |
-
-### Key Design Patterns
-
-- **IR IR**: Lightweight LLVM-style IR with `Value → User → Instruction` class hierarchy and def-use chains via `Use` objects. `IRBuilder` appends instructions to a `BasicBlock`. The IR starts in load/store (alloca-based) form; `Mem2Reg` promotes allocas to SSA phi nodes.
-- **MIR IR**: Lower-level, three-address machine IR using AArch64 opcodes and a union-like `Operand` (reg, vreg, imm, frame index, label, symbol). Purely a data container — no SSA, no def-use analysis.
-- **ANTLR Visitor**: `IRGenImpl` extends the ANTLR-generated `SysYBaseVisitor` to walk the parse tree and emit IR. Each `visit*` method returns `std::any` (typically `ir::Value*`).
-- **Pass infrastructure**: Each IR pass is a standalone function (`RunMem2Reg`, `RunDCE`, etc.) taking a `Module&`. `PassManager` and `PassManagerModule` orchestrate them with fixed-point iteration (serialize, compare, re-run until convergence).
-
-### Important Notes
-
-- The `#if COMPILER_PARSE_ONLY` macro in `main.cpp` guards all code beyond Lab1. The CMake option `COMPILER_PARSE_ONLY` controls whether `sem`, `irgen`, and `mir` subdirectories are built.
-- `include/` is in `.gitignore` and absent from the build include path. It is provided externally by the grading platform. When editing headers, work in `src/include/`.
-- The grammar `SysY.g4` defines the SysY language subset: `int`/`float`/`void` types, arrays, `const`, `if`/`else`/`while`/`break`/`continue`/`return`, and C-like expressions including logical short-circuit `&&`/`||`.
-- Commit message convention: `<type>(<scope>): <subject>` where type ∈ {feat, fix, refactor, docs, test, chore} and scope ∈ {frontend, irgen, backend, test, doc}.
+## 有效性度量
+
+- 同类 bug 是否再次出现
+- 门禁是否在 commit 前拦截了问题
+- 指令数趋势是否持续改善
+
+---
+
+# 七、Skill & MCP 最优编排
+
+## 仅保留有用的工具
+
+本项目是 C++ 编译器，以下可用工具**有价值**：
+- **MCP**：codegraph（代码智能）、github（PR/issue）
+- **流程 skill**：brainstorming、writing-plans、TDD、verification-before-completion、systematic-debugging
+- **门禁 skill**：gsd-code-review、review
+- **战略 skill**（仅多日架构决策）：gsd-discuss/plan/execute-phase、office-hours、gsd-extract-learnings、ship
+
+以下**忽略**：qa/browse/benchmark/canary（Web）、figma/react-bits/design-shotgun（前端）、cso/security-review（Web 安全）
+
+## 三种模式
+
+### 快刀（<30 分钟）
+
+```
+codegraph_context → 直接改 → 快门禁 → commit
+```
+
+只用 Superpowers 战术层。示例：调阈值、修明显 bug。
+
+### 标准优化（1-4 小时）
+
+```
+brainstorming → writing-plans → TDD → 快门禁 → gsd-code-review → 中门禁 → commit
+```
+
+**brainstorming 是强制的**——「太简单不需要设计」是反模式。
+
+### 重器（多日）
+
+```
+office-hours → gsd-discuss → gsd-plan → gsd-execute（内含 TDD 循环）
+  → 全门禁 → gsd-code-review → gsd-extract-learnings → ship
+```
+
+只在 3-4 个关键架构点使用。编译器反馈循环很快，偏向执行而非过度计划。
+
+## 最高 ROI 组合
+
+```
+bug 发现 → codegraph_impact（影响范围）
+        → gh create_issue（用 impact 结果作 body）
+        → brainstorming（设计方案）
+        → TDD（先写会失败的测试）
+        → 实现 → verification（快门禁）
+        → gh create_pr
+```
+
+每一步都有外部信号验证，避免 AI 自说自话。
+
+## 自进化闭环
+
+```
+gsd-extract-learnings → 更新 CLAUDE.md/memory
+     ↑                        ↓
+ 全门禁验证              下次任务自动加载
+     ↑                        ↓
+  实现优化              AI 知道上次的教训
+```
diff --git a/src/mir/Lowering.cpp b/src/mir/Lowering.cpp
index 679eb75c..1252c6be 100644
--- a/src/mir/Lowering.cpp
+++ b/src/mir/Lowering.cpp
@@ -115,6 +115,24 @@ namespace mir
       }
     }
 
+    // 交换比较操作数时反转条件码（a<b 变成 b>a）
+    static CondCode SwapCondCode(CondCode cond)
+    {
+      switch (cond)
+      {
+      case CondCode::LT:
+        return CondCode::GT;
+      case CondCode::LE:
+        return CondCode::GE;
+      case CondCode::GT:
+        return CondCode::LT;
+      case CondCode::GE:
+        return CondCode::LE;
+      default:
+        return cond; // EQ/NE 对称
+      }
+    }
+
     static PhysReg GetArgWReg(size_t index)
     {
       static const PhysReg regs[] = {
@@ -338,16 +356,43 @@ namespace mir
       {
         if (IsIntegerCompareOpcode(bin->GetOpcode()))
         {
-          int lhs = EmitIntValue(bin->GetLhs(), function, value_vregs,
-                                scalar_slots, array_slots, block);
-          int rhs = EmitIntValue(bin->GetRhs(), function, value_vregs,
-                                scalar_slots, array_slots, block);
-          block.Append(Opcode::CmpRR,
-                       {Operand::VReg(lhs, VRegClass::Int), Operand::VReg(rhs, VRegClass::Int)});
+          // 常量折叠到 CmpImm，消除冗余 MovImm
+          int lhs_imm, rhs_imm;
+          bool lhs_const = TryGetConstantInt(bin->GetLhs(), lhs_imm);
+          bool rhs_const = TryGetConstantInt(bin->GetRhs(), rhs_imm);
+          auto imm_fits = [](int imm) { return imm >= 0 && imm <= 4095; };
+
+          CondCode cond = GetCondCodeForCompareOpcode(bin->GetOpcode());
+
+          if (rhs_const && imm_fits(rhs_imm))
+          {
+            int lhs = EmitIntValue(bin->GetLhs(), function, value_vregs,
+                                   scalar_slots, array_slots, block);
+            block.Append(Opcode::CmpImm,
+                         {Operand::VReg(lhs, VRegClass::Int), Operand::Imm(rhs_imm)});
+          }
+          else if (lhs_const && imm_fits(lhs_imm))
+          {
+            int rhs = EmitIntValue(bin->GetRhs(), function, value_vregs,
+                                   scalar_slots, array_slots, block);
+            block.Append(Opcode::CmpImm,
+                         {Operand::VReg(rhs, VRegClass::Int), Operand::Imm(lhs_imm)});
+            cond = SwapCondCode(cond);
+          }
+          else
+          {
+            int lhs = EmitIntValue(bin->GetLhs(), function, value_vregs,
+                                   scalar_slots, array_slots, block);
+            int rhs = EmitIntValue(bin->GetRhs(), function, value_vregs,
+                                   scalar_slots, array_slots, block);
+            block.Append(Opcode::CmpRR,
+                         {Operand::VReg(lhs, VRegClass::Int), Operand::VReg(rhs, VRegClass::Int)});
+          }
+
           int dst = function.CreateVReg(VRegClass::Int);
           block.Append(Opcode::CSet,
                        {Operand::VReg(dst, VRegClass::Int),
-                        Operand::Imm(static_cast<int>(GetCondCodeForCompareOpcode(bin->GetOpcode())))});
+                        Operand::Imm(static_cast<int>(cond))});
           value_vregs[value] = dst;
           return dst;
         }
@@ -958,12 +1003,35 @@ namespace mir
         return;
       }
 
-      int lhs = EmitIntValue(bin.GetLhs(), function, value_vregs,
-                            scalar_slots, array_slots, block);
-      int rhs = EmitIntValue(bin.GetRhs(), function, value_vregs,
-                            scalar_slots, array_slots, block);
-      block.Append(Opcode::CmpRR,
-                   {Operand::VReg(lhs, VRegClass::Int), Operand::VReg(rhs, VRegClass::Int)});
+      // 常量折叠到 CmpImm
+      int lhs_imm, rhs_imm;
+      bool lhs_const = TryGetConstantInt(bin.GetLhs(), lhs_imm);
+      bool rhs_const = TryGetConstantInt(bin.GetRhs(), rhs_imm);
+      auto imm_fits = [](int imm) { return imm >= 0 && imm <= 4095; };
+
+      if (rhs_const && imm_fits(rhs_imm))
+      {
+        int lhs = EmitIntValue(bin.GetLhs(), function, value_vregs,
+                               scalar_slots, array_slots, block);
+        block.Append(Opcode::CmpImm,
+                     {Operand::VReg(lhs, VRegClass::Int), Operand::Imm(rhs_imm)});
+      }
+      else if (lhs_const && imm_fits(lhs_imm))
+      {
+        int rhs = EmitIntValue(bin.GetRhs(), function, value_vregs,
+                               scalar_slots, array_slots, block);
+        block.Append(Opcode::CmpImm,
+                     {Operand::VReg(rhs, VRegClass::Int), Operand::Imm(lhs_imm)});
+      }
+      else
+      {
+        int lhs = EmitIntValue(bin.GetLhs(), function, value_vregs,
+                              scalar_slots, array_slots, block);
+        int rhs = EmitIntValue(bin.GetRhs(), function, value_vregs,
+                              scalar_slots, array_slots, block);
+        block.Append(Opcode::CmpRR,
+                     {Operand::VReg(lhs, VRegClass::Int), Operand::VReg(rhs, VRegClass::Int)});
+      }
     }
 
     static bool TryEmitCondValueToFlags(const ir::Value *value,
diff --git a/指令数基线.md b/指令数基线.md
index 37066c6c..29f24025 100644
--- a/指令数基线.md
+++ b/指令数基线.md
@@ -20,66 +20,66 @@
 
 | 测试集标识 | 基线(行) |
 |---|---|
-| performance/01_mm1 | 310 |
-| performance/01_mm2 | 310 |
-| performance/01_mm3 | 310 |
-| performance/03_sort1 | 640 |
-| performance/03_sort2 | 640 |
-| performance/03_sort3 | 640 |
-| performance/conv2d-1 | 629 |
-| performance/conv2d-2 | 629 |
-| performance/conv2d-3 | 629 |
-| performance/crc1 | 290 |
-| performance/crc2 | 290 |
-| performance/crc3 | 290 |
-| performance/crypto-1 | 1949 |
-| performance/crypto-2 | 1949 |
-| performance/crypto-3 | 1949 |
-| performance/fft0 | 605 |
-| performance/fft1 | 605 |
-| performance/fft2 | 605 |
-| performance/h-1-01 | 158 |
-| performance/h-1-02 | 158 |
-| performance/h-1-03 | 158 |
-| performance/h-10-01 | 329 |
-| performance/h-10-02 | 329 |
-| performance/h-10-03 | 329 |
+| performance/01_mm1 | 309 |
+| performance/01_mm2 | 309 |
+| performance/01_mm3 | 309 |
+| performance/03_sort1 | 641 |
+| performance/03_sort2 | 641 |
+| performance/03_sort3 | 641 |
+| performance/conv2d-1 | 656 |
+| performance/conv2d-2 | 656 |
+| performance/conv2d-3 | 656 |
+| performance/crc1 | 279 |
+| performance/crc2 | 279 |
+| performance/crc3 | 279 |
+| performance/crypto-1 | 1926 |
+| performance/crypto-2 | 1926 |
+| performance/crypto-3 | 1926 |
+| performance/fft0 | 597 |
+| performance/fft1 | 597 |
+| performance/fft2 | 597 |
+| performance/h-1-01 | 157 |
+| performance/h-1-02 | 157 |
+| performance/h-1-03 | 157 |
+| performance/h-10-01 | 328 |
+| performance/h-10-02 | 328 |
+| performance/h-10-03 | 328 |
 | performance/h-4-01 | 163 |
 | performance/h-4-02 | 163 |
 | performance/h-4-03 | 163 |
-| performance/h-5-01 | 352 |
-| performance/h-5-02 | 352 |
-| performance/h-5-03 | 352 |
-| performance/h-8-01 | 407 |
-| performance/h-8-02 | 407 |
-| performance/h-8-03 | 407 |
-| performance/h-9-01 | 227 |
-| performance/h-9-02 | 227 |
-| performance/h-9-03 | 227 |
-| performance/huffman-01 | 829 |
-| performance/huffman-02 | 829 |
-| performance/huffman-03 | 829 |
+| performance/h-5-01 | 341 |
+| performance/h-5-02 | 341 |
+| performance/h-5-03 | 341 |
+| performance/h-8-01 | 411 |
+| performance/h-8-02 | 411 |
+| performance/h-8-03 | 411 |
+| performance/h-9-01 | 224 |
+| performance/h-9-02 | 224 |
+| performance/h-9-03 | 224 |
+| performance/huffman-01 | 792 |
+| performance/huffman-02 | 792 |
+| performance/huffman-03 | 792 |
 | performance/knapsack_naive-1 | 167 |
 | performance/knapsack_naive-2 | 167 |
 | performance/knapsack_naive-3 | 167 |
-| performance/many_mat_cal-1 | 432 |
-| performance/many_mat_cal-2 | 432 |
-| performance/many_mat_cal-3 | 432 |
-| performance/matmul1 | 366 |
-| performance/matmul2 | 366 |
-| performance/matmul3 | 366 |
-| performance/optimization_scheduling1 | 122 |
-| performance/optimization_scheduling2 | 122 |
-| performance/optimization_scheduling3 | 122 |
-| performance/shuffle0 | 472 |
-| performance/shuffle1 | 472 |
-| performance/shuffle2 | 472 |
-| performance/sl1 | 264 |
-| performance/sl2 | 264 |
-| performance/sl3 | 264 |
-| performance/transpose0 | 207 |
-| performance/transpose1 | 207 |
-| performance/transpose2 | 207 |
+| performance/many_mat_cal-1 | 434 |
+| performance/many_mat_cal-2 | 434 |
+| performance/many_mat_cal-3 | 434 |
+| performance/matmul1 | 379 |
+| performance/matmul2 | 379 |
+| performance/matmul3 | 379 |
+| performance/optimization_scheduling1 | 116 |
+| performance/optimization_scheduling2 | 116 |
+| performance/optimization_scheduling3 | 116 |
+| performance/shuffle0 | 471 |
+| performance/shuffle1 | 471 |
+| performance/shuffle2 | 471 |
+| performance/sl1 | 261 |
+| performance/sl2 | 261 |
+| performance/sl3 | 261 |
+| performance/transpose0 | 204 |
+| performance/transpose1 | 204 |
+| performance/transpose2 | 204 |
 
 ## 统计