diff --git a/src/mir/Lowering.cpp b/src/mir/Lowering.cpp
index d63bc9c..9c1cc5b 100644
--- a/src/mir/Lowering.cpp
+++ b/src/mir/Lowering.cpp
@@ -160,29 +160,21 @@ void EmitMovImm64(int vreg, uint64_t imm, MachineBasicBlock& block) {
 // ========== 核心：将 IR Value 转换为虚拟寄存器 ==========
 int EmitValueToVReg(const ir::Value* value, VRegContext& ctx,
                     MachineBasicBlock& block, MachineFunction& function) {
-  // 已经映射的值直接返回
-  if (ctx.HasVReg(value)) {
-    return ctx.GetVReg(value);
-  }
-
-  // 整数常量
+  // 整数常量：每次都生成新的 MovImm（不缓存）
+  // 缓存会导致常量在某个条件块中定义，但其他控制流路径上的使用
+  // 可能不经过该定义，导致寄存器残留过期值。
   if (auto* constant = dynamic_cast<const ir::ConstantInt*>(value)) {
     uint32_t imm = static_cast<uint32_t>(constant->GetValue());
     int vreg = ctx.NewVReg(VRegType::kInt32);
     EmitMovImm(vreg, imm, block);
-    ctx.SetVReg(value, vreg);
     return vreg;
   }
 
-  // 浮点常量（需要经过栈槽：整数bit→栈→浮点load）
+  // 浮点常量：每次都走栈槽加载（不缓存）
   if (auto* fconstant = dynamic_cast<const ir::ConstantFloat*>(value)) {
     float fval = fconstant->GetValue();
     uint32_t bits = FloatToBits(fval);
     int slot = function.CreateFrameIndex(4);
-    EmitMovImm(ctx.NewVReg(VRegType::kInt32), bits, block);
-    // 上面生成了一个新 vreg 来承载 bit pattern，直接用那个 vreg store 到栈
-    // 但我们需要拿到它的编号...上面的 EmitMovImm 内部分配了 vreg，不方便取回。
-    // 简化处理：用一个临时 vreg
     int tmp = ctx.NewVReg(VRegType::kInt32);
     EmitMovImm(tmp, bits, block);
     auto& s = block.Append(Opcode::StoreStack,
@@ -192,22 +184,22 @@ int EmitValueToVReg(const ir::Value* value, VRegContext& ctx,
     auto& l = block.Append(Opcode::LoadStack,
         {Operand::VReg(fvreg), Operand::FrameIndex(slot)});
     l.AddDef(fvreg);
-    ctx.SetVReg(value, fvreg);
     return fvreg;
   }
 
-  // 零常量 / 聚合零
+  // 零常量 / 聚合零：每次都生成新 MovImm（不缓存）
   if (dynamic_cast<const ir::ConstantZero*>(value) ||
       dynamic_cast<const ir::ConstantAggregateZero*>(value)) {
     int vreg = ctx.NewVReg(VRegType::kInt32);
     auto& instr = block.Append(Opcode::MovImm,
         {Operand::VReg(vreg), Operand::Imm(0)});
     instr.AddDef(vreg);
-    ctx.SetVReg(value, vreg);
     return vreg;
   }
 
-  // 全局变量：生成地址到 64 位 vreg
+  // 全局变量：每次都生成新的 adrp+add（不缓存）
+  // 与常量同理：缓存会导致全局变量在某条件块中定义，但其他控制流路径
+  // 上的使用可能不经过该定义，导致寄存器残留过期值。
   if (auto* global = dynamic_cast<const ir::GlobalValue*>(value)) {
     int vreg = ctx.NewVReg(VRegType::kInt64);
     auto& i1 = block.Append(Opcode::Adrp,
@@ -217,10 +209,14 @@ int EmitValueToVReg(const ir::Value* value, VRegContext& ctx,
         {Operand::VReg(vreg), Operand::VReg(vreg), Operand::Label(global->GetName())});
     i2.AddDef(vreg);
     i2.AddUse(vreg);
-    ctx.SetVReg(value, vreg);
     return vreg;
   }
 
+  // 非常量值：检查是否已有映射
+  if (ctx.HasVReg(value)) {
+    return ctx.GetVReg(value);
+  }
+
   // 未找到
   std::string name = value->GetName();
   if (name.empty()) name = "(anonymous)";
@@ -1073,9 +1069,9 @@ std::unique_ptr<MachineFunction> LowerFunction(const ir::Function& func) {
     auto& insts = mirBB->GetInstructions();
     if (insts.empty()) continue;
 
-    const auto& last = insts.back();
-    if (last.GetOpcode() == Opcode::B) {
-      for (const auto& op : last.GetOperands()) {
+    // 辅助：从指令中提取 Label 操作数并添加边
+    auto addLabelSuccessors = [&](const MachineInstr& inst) {
+      for (const auto& op : inst.GetOperands()) {
         if (op.GetKind() == Operand::Kind::Label) {
           MachineBasicBlock* target = machineFunc->GetBlockByName(op.GetLabel());
           if (target) {
@@ -1084,39 +1080,10 @@ std::unique_ptr<MachineFunction> LowerFunction(const ir::Function& func) {
           }
         }
       }
-    } else if (last.GetOpcode() == Opcode::BCond) {
-      // BCond 之后如果有 B，则有两个后继
-      // 遍历该块倒数第二条开始找目标
-      for (const auto& op : last.GetOperands()) {
-        if (op.GetKind() == Operand::Kind::Label) {
-          MachineBasicBlock* target = machineFunc->GetBlockByName(op.GetLabel());
-          if (target) {
-            mirBB->AddSuccessor(target);
-            target->AddPredecessor(mirBB);
-          }
-        }
-      }
-      // 查找倒数第二条 B 指令的目标
-      if (insts.size() >= 2) {
-        const auto& prev = insts[insts.size() - 2];
-        if (prev.GetOpcode() == Opcode::B) {
-          for (const auto& op : prev.GetOperands()) {
-            if (op.GetKind() == Operand::Kind::Label) {
-              MachineBasicBlock* target = machineFunc->GetBlockByName(op.GetLabel());
-              if (target) {
-                mirBB->AddSuccessor(target);
-                target->AddPredecessor(mirBB);
-              }
-            }
-          }
-        }
-      }
-    } else if (last.GetOpcode() == Opcode::Ret) {
-      // Ret 无后继
-    } else {
-      // 非终结指令：fall-through 到下一个基本块（如果有）
-      // 查找基本块列表中的下一个
-      bool found = false;
+    };
+
+    // 辅助：添加 fallthrough 到下一个 IR 基本块
+    auto addFallthrough = [&]() {
       for (size_t i = 0; i + 1 < func.GetBlocks().size(); ++i) {
         if (func.GetBlocks()[i].get() == bb.get()) {
           const auto* nextBB = func.GetBlocks()[i + 1].get();
@@ -1125,10 +1092,39 @@ std::unique_ptr<MachineFunction> LowerFunction(const ir::Function& func) {
             mirBB->AddSuccessor(nextMIR);
             nextMIR->AddPredecessor(mirBB);
           }
-          found = true;
           break;
         }
       }
+    };
+
+    const auto& last = insts.back();
+    if (last.GetOpcode() == Opcode::B) {
+      // B 为最后一条指令：添加 B 的目标
+      addLabelSuccessors(last);
+      // 若倒数第二条是 BCond，则 BCond 的目标也是后继（BCond; B 模式）
+      if (insts.size() >= 2) {
+        const auto& prev = insts[insts.size() - 2];
+        if (prev.GetOpcode() == Opcode::BCond) {
+          addLabelSuccessors(prev);
+        }
+      }
+    } else if (last.GetOpcode() == Opcode::BCond) {
+      // BCond 为最后一条指令：添加 BCond 目标 + fallthrough
+      addLabelSuccessors(last);
+      // 若倒数第二条是 B（罕见：B; BCond），也添加 B 目标
+      if (insts.size() >= 2) {
+        const auto& prev = insts[insts.size() - 2];
+        if (prev.GetOpcode() == Opcode::B) {
+          addLabelSuccessors(prev);
+        }
+      }
+      // BCond 之后无 B，则 false 分支 fallthrough
+      addFallthrough();
+    } else if (last.GetOpcode() == Opcode::Ret) {
+      // Ret 无后继
+    } else {
+      // 非终结指令：fall-through 到下一个基本块
+      addFallthrough();
     }
   }
 
diff --git a/src/mir/RegAlloc.cpp b/src/mir/RegAlloc.cpp
index a4b842d..867476b 100644
--- a/src/mir/RegAlloc.cpp
+++ b/src/mir/RegAlloc.cpp
@@ -27,25 +27,23 @@ struct LiveInterval {
 };
 
 // ========== 可分配物理寄存器池 ==========
-// 整数 32位：callee-saved 优先（W19-W28），然后 caller-saved（W8-W13）
-// W14, W15 保留为 spill scratch
+// 仅使用 callee-saved 寄存器：W19-W28 (10个), X19-X28 (10个), S8-S13 (6个)
+// 原因：caller-saved 寄存器 (W0-W18, X0-X18, S0-S7) 不能跨函数调用存活，
+// 而寄存器分配器未实现调用点 spill。使用 caller-saved 寄存器会导致跨调用值被
+// 被调用者破坏。
+// W14, W15 / X14, X15 / S14, S15 保留为 spill scratch。
 const PhysReg kGPR32Pool[] = {
   PhysReg::W19, PhysReg::W20, PhysReg::W21, PhysReg::W22,
   PhysReg::W23, PhysReg::W24, PhysReg::W25, PhysReg::W26,
   PhysReg::W27, PhysReg::W28,
-  PhysReg::W8, PhysReg::W9, PhysReg::W10, PhysReg::W11,
-  PhysReg::W12, PhysReg::W13,
 };
 constexpr int kNumGPR32 = sizeof(kGPR32Pool) / sizeof(kGPR32Pool[0]);
 
-// 整数 64位：callee-saved 优先（X19-X28），然后 caller-saved（X8-X13）
-// X14, X15 保留为 spill scratch
+// 整数 64位：仅 callee-saved（X19-X28）
 const PhysReg kGPR64Pool[] = {
   PhysReg::X19, PhysReg::X20, PhysReg::X21, PhysReg::X22,
   PhysReg::X23, PhysReg::X24, PhysReg::X25, PhysReg::X26,
   PhysReg::X27, PhysReg::X28,
-  PhysReg::X8, PhysReg::X9, PhysReg::X10, PhysReg::X11,
-  PhysReg::X12, PhysReg::X13,
 };
 constexpr int kNumGPR64 = sizeof(kGPR64Pool) / sizeof(kGPR64Pool[0]);
 
@@ -220,14 +218,29 @@ std::vector<LiveInterval> ComputeLiveIntervals(MachineFunction& function) {
   }
 
   // 生成 LiveInterval
-  // 注意：不将 liveIn 扩展到整个基本块的每个位置，因为线性扫描只需要
-  // [start, end] 区间。扩展会导致过长的活跃区间，造成不必要的 spill。
+  // 同时使用 liveIn 和 liveOut 扩展 end。仅从 liveOut 扩展不够：
+  // 考虑值在循环中 liveIn 于块 B（在 B 中被使用）但其线性最后使用
+  // 出现在较早块的情况。若回边之后的块中定义了新 vreg 并分配到
+  // 同一物理寄存器，该寄存器会在循环入口被覆盖（如 graphColoring
+  // 中 &i 地址被 sxtw 覆盖）。将 end 扩展到 B.endIdx 可防止此问题。
   std::vector<LiveInterval> intervals;
   for (int vreg : allVRegs) {
     auto it = vregPositions.find(vreg);
     if (it == vregPositions.end() || it->second.empty()) continue;
     int start = *it->second.begin();
     int end = *it->second.rbegin();
+
+    for (const auto& bb : blocks) {
+      auto& info = blockInfo[bb.get()];
+      if (info.startIdx == 0 && info.endIdx == 0) continue;
+      bool isLiveIn  = liveIn[bb.get()].count(vreg) != 0;
+      bool isLiveOut = liveOut[bb.get()].count(vreg) != 0;
+      if (isLiveIn || isLiveOut) {
+        if (info.startIdx < start) start = info.startIdx;
+        if (info.endIdx > end) end = info.endIdx;
+      }
+    }
+
     VRegClass rc = InferVRegClass(vreg, function);
     intervals.emplace_back(vreg, start, end, rc);
   }