feat(backend): 新增 AddImm/SubImm 操作码，消除冗余 MovImm

AArch64 add/sub 支持 12 位立即数，但 MIR 只有 AddRR/SubRR，导致 RHS 为常量时需先 MovImm 再 RR 运算。本次修改： - MIR.h：新增 AddImm、SubImm 操作码 - Lowering.cpp：Add/Sub 降级时 RHS 为 0-4095 常量直接用 AddImm/SubImm - RegAlloc.cpp：AddImm/SubImm 复用 AddRR/SubRR 的 def-use 分析 - AsmPrinter.cpp：通用打印机自动处理 Imm 操作数（#value）效果（对比 CmpImm 基线）： - sl1-3: 261→247 (-14, -5.4%) - huffman-01-03: 792→790 (-2) - h-5-01-03: 341→338 (-3) - 全 60 个性能用例总减少 55 行 - 功能测试 0 新故障更新：优化记录.md 新增条目，基线自动更新
6 days ago · fccd935a24
parent bd7dcedb2a
commit fccd935a24
7 changed files with 82 additions and 14 deletions
--- a/.claude/settings.json
+++ b/.claude/settings.json
@ -14,13 +14,19 @@
        "type": "command",
        "command": "bash .claude/hooks/block-destructive.sh",
        "timeout": 5000
+      },
+      {
+        "matcher": "Bash(git commit *)",
+        "type": "prompt",
+        "prompt": "在提交之前，请确认优化记录是否已更新。如果本次会话做了任何有效的编译器优化（指令数减少、运行时间降低、功能改进），必须在 优化记录.md 中添加或更新条目。条目格式见该文件顶部。如未做优化或本次提交不涉及编译器改动，可忽略。",
+        "timeout": 10000
      }
    ],
    "Stop": [
      {
        "matcher": "",
        "type": "prompt",
-        "prompt": "在结束本次会话之前，请确认以下事项：\n1. 快门禁是否通过？（./2026test.sh -c functional -x && ./2026test.sh -c h_functional -x）\n2. 指令数是否有退化？（如有优化改动，跑 ./count_asm.sh）\n3. 是否有未提交的改动需要处理？\n4. 如有重要经验教训，是否已写入 memory？\n\n请简要回答每个问题（是/否/不适用），然后正常结束。",
+        "prompt": "在结束本次会话之前，请确认以下事项：\n1. 快门禁是否通过？（./2026test.sh -c functional -x && ./2026test.sh -c h_functional -x）\n2. 指令数是否有退化？（如有优化改动，跑 ./count_asm.sh）\n3. 本次有效的优化是否已记录到 优化记录.md？\n4. 是否有未提交的改动需要处理？\n5. 如有重要经验教训，是否已写入 memory？\n\n请简要回答每个问题（是/否/不适用），然后正常结束。",
        "timeout": 15000
      }
    ]
--- a/src/include/mir/MIR.h
+++ b/src/include/mir/MIR.h
@ -147,6 +147,8 @@ namespace mir
    StoreMem,
    AddRR,
    SubRR,
+    AddImm,
+    SubImm,
    MulRR,
    DivRR,
    ModRR,
--- a/src/mir/AsmPrinter.cpp
+++ b/src/mir/AsmPrinter.cpp
@ -42,8 +42,10 @@ namespace mir
            case Opcode::StoreStack:
                return "stur";
            case Opcode::AddRR:
+            case Opcode::AddImm:
                return "add";
            case Opcode::SubRR:
+            case Opcode::SubImm:
                return "sub";
            case Opcode::MulRR:
                return "mul";
--- a/src/mir/Lowering.cpp
+++ b/src/mir/Lowering.cpp
@ -695,10 +695,30 @@ namespace mir
          }
        }

-        block.Append(opcode,
-                     {Operand::VReg(dst, VRegClass::Int),
-                      Operand::VReg(lhs, VRegClass::Int),
-                      Operand::VReg(rhs, VRegClass::Int)});
+        // Add/Sub 常量折叠到立即数操作码
+        int rhs_imm_val;
+        bool rhs_is_imm = false;
+        if ((opcode == Opcode::AddRR || opcode == Opcode::SubRR) &&
+            bin->GetRhs() && TryGetConstantInt(bin->GetRhs(), rhs_imm_val) &&
+            rhs_imm_val >= 0 && rhs_imm_val <= 4095)
+        {
+          rhs_is_imm = true;
+          if (opcode == Opcode::AddRR)
+            opcode = Opcode::AddImm;
+          else
+            opcode = Opcode::SubImm;
+          block.Append(opcode,
+                       {Operand::VReg(dst, VRegClass::Int),
+                        Operand::VReg(lhs, VRegClass::Int),
+                        Operand::Imm(rhs_imm_val)});
+        }
+        else
+        {
+          block.Append(opcode,
+                       {Operand::VReg(dst, VRegClass::Int),
+                        Operand::VReg(lhs, VRegClass::Int),
+                        Operand::VReg(rhs, VRegClass::Int)});
+        }
        value_vregs[value] = dst;
        return dst;
      }
--- a/src/mir/RegAlloc.cpp
+++ b/src/mir/RegAlloc.cpp
@ -119,6 +119,8 @@ namespace mir

      case Opcode::AddRR:
      case Opcode::SubRR:
+      case Opcode::AddImm:
+      case Opcode::SubImm:
      case Opcode::MulRR:
      case Opcode::DivRR:
      case Opcode::ModRR:
--- a/优化记录.md
+++ b/优化记录.md
@ -0,0 +1,36 @@
+# 优化记录
+
+本文档追踪编译器的所有有效优化，用于答辩展示和技术积累。
+
+## 记录格式
+
+每条优化记录包含：日期、优化名称、类型（IR/MIR/后端）、假设、实现摘要、指令数效果、QEMU 时间效果、已知局限。
+
+---
+
+## 2026-05-25 | CmpImm 常量折叠
+
+- **类型**：后端（MIR 降级）
+- **假设**：ICmp 降级时，操作数为常量（0-4095）直接用 CmpImm，消除冗余 MovImm
+- **实现**：Lowering.cpp 两个 ICmp 降级路径中，检查操作数是否为常量。RHS 常量 → CmpImm；LHS 常量 → CmpImm + SwapCondCode
+- **新增代码**：SwapCondCode 辅助函数（18 行），两个降级路径各约 30 行
+- **指令数效果**（20 个代表性用例）：减少 91 条（-1.1%），matmul -15（-3.8%）、huffman -25（-3.1%）、crypto -23（-1.2%）
+- **退化**：h-5 +1（+0.3%），由寄存器分配差异导致，在容忍范围内
+- **功能测试**：100/100 functional 通过，39/40 h_functional 通过（1 个预存故障 30_many_dimensions）
+- **已知局限**：仅处理 0-4095 范围的立即数；浮点比较未覆盖
+
+---
+
+## 2026-05-25 | AddImm/SubImm 立即数折叠
+
+- **类型**：后端（MIR 降级 + 新操作码）
+- **假设**：AArch64 add/sub 支持 12 位立即数，但 MIR 只有 AddRR/SubRR，导致 `mov #imm; add/sub dst, src, tmp` 浪费 1 条指令。添加 AddImm/SubImm 操作码消除冗余 MovImm
+- **实现**：
+  - MIR.h：新增 AddImm、SubImm 操作码
+  - Lowering.cpp：Add/Sub 降级时 RHS 为 0-4095 常量 → AddImm/SubImm
+  - RegAlloc.cpp：AddImm/SubImm 加入 AddRR/SubRR 同一处理分支
+  - AsmPrinter.cpp：通用三操作数打印机自动处理 Imm 操作数（`#value`）
+- **指令数效果**（全部 60 个性能用例）：减少 55 条，sl1-3 -14（-5.4%）、huffman-01-03 -2（-0.3%）、h-5-01-03 -3（-0.9%）
+- **退化**：无
+- **功能测试**：87/88 functional 通过（1 个预存故障 87_many_params）、30/31 h_functional 通过（1 个预存故障 30_many_dimensions）
+- **已知局限**：仅处理 IR 中直接常量操作数；经 vreg 传递的常量需 ConstProp 配合才能折叠；仅 0-4095 范围
--- a/指令数基线.md
+++ b/指令数基线.md
@ -47,18 +47,18 @@
 | performance/h-4-01 | 163 |
 | performance/h-4-02 | 163 |
 | performance/h-4-03 | 163 |
-| performance/h-5-01 | 341 |
-| performance/h-5-02 | 341 |
-| performance/h-5-03 | 341 |
+| performance/h-5-01 | 338 |
+| performance/h-5-02 | 338 |
+| performance/h-5-03 | 338 |
 | performance/h-8-01 | 411 |
 | performance/h-8-02 | 411 |
 | performance/h-8-03 | 411 |
 | performance/h-9-01 | 224 |
 | performance/h-9-02 | 224 |
 | performance/h-9-03 | 224 |
-| performance/huffman-01 | 792 |
-| performance/huffman-02 | 792 |
-| performance/huffman-03 | 792 |
+| performance/huffman-01 | 790 |
+| performance/huffman-02 | 790 |
+| performance/huffman-03 | 790 |
 | performance/knapsack_naive-1 | 167 |
 | performance/knapsack_naive-2 | 167 |
 | performance/knapsack_naive-3 | 167 |
@ -74,9 +74,9 @@
 | performance/shuffle0 | 471 |
 | performance/shuffle1 | 471 |
 | performance/shuffle2 | 471 |
-| performance/sl1 | 261 |
-| performance/sl2 | 261 |
-| performance/sl3 | 261 |
+| performance/sl1 | 247 |
+| performance/sl2 | 247 |
+| performance/sl3 | 247 |
 | performance/transpose0 | 204 |
 | performance/transpose1 | 204 |
 | performance/transpose2 | 204 |