线性扫描改图着色

feat(ra)进行不同版本的性能比较
feat(ra)通过测试
13 changed files with 2634 additions and 1282 deletions
--- a/.gitignore
+++ b/.gitignore
@ -74,4 +74,5 @@ test/test_result/
 # =========================
 result.txt
 build.sh
-gdb.sh
+gdb.sh
+compare_result/
--- a/include/mir/MIR.h
+++ b/include/mir/MIR.h
@ -1,9 +1,12 @@
 #pragma once

+#include <cstdint>
 #include <initializer_list>
 #include <iosfwd>
 #include <memory>
+#include <set>
 #include <string>
+#include <unordered_map>
 #include <vector>

 namespace ir {
@ -148,9 +151,10 @@ enum class Opcode {
 // ========== 操作数类 ==========
 class Operand {
 public:
-  enum class Kind { Reg, Imm, FrameIndex, Cond, Label };
+  enum class Kind { Reg, VReg, Imm, FrameIndex, Cond, Label };

  static Operand Reg(PhysReg reg);
+  static Operand VReg(int id);
  static Operand Imm(int value);
  static Operand FrameIndex(int index);
  static Operand Cond(CondCode cc);
@ -158,11 +162,15 @@ class Operand {

  Kind GetKind() const { return kind_; }
  PhysReg GetReg() const { return reg_; }
+  int GetVReg() const { return imm_; }
  int GetImm() const { return imm_; }
  int GetFrameIndex() const { return imm_; }
  CondCode GetCondCode() const { return cc_; }
  const std::string& GetLabel() const { return label_; }

+  bool IsVReg() const { return kind_ == Kind::VReg; }
+  bool IsPhysReg() const { return kind_ == Kind::Reg; }
+
 private:
  Operand(Kind kind, PhysReg reg, int imm, CondCode cc, const std::string& label);

@ -180,10 +188,28 @@ class MachineInstr {

  Opcode GetOpcode() const { return opcode_; }
  const std::vector<Operand>& GetOperands() const { return operands_; }
+  std::vector<Operand>& GetOperands() { return operands_; }
+
+  // def/use 信息（用于活跃性分析）
+  const std::vector<int>& GetDefs() const { return defs_; }
+  const std::vector<int>& GetUses() const { return uses_; }
+  std::vector<int>& GetDefs() { return defs_; }
+  std::vector<int>& GetUses() { return uses_; }
+  void AddDef(int vreg) { defs_.push_back(vreg); }
+  void AddUse(int vreg) { uses_.push_back(vreg); }
+
+  // 指令分类
+  bool IsCall() const { return opcode_ == Opcode::Call; }
+  bool IsTerminator() const {
+    return opcode_ == Opcode::B || opcode_ == Opcode::BCond || opcode_ == Opcode::Ret;
+  }
+  bool IsMove() const { return opcode_ == Opcode::MovReg; }

 private:
  Opcode opcode_;
  std::vector<Operand> operands_;
+  std::vector<int> defs_;
+  std::vector<int> uses_;
 };

 // ========== 栈槽结构 ==========
@ -211,10 +237,15 @@ class MachineBasicBlock {
  const std::vector<MachineBasicBlock*>& GetSuccessors() const { return successors_; }
  void AddSuccessor(MachineBasicBlock* succ) { successors_.push_back(succ); }

+  std::vector<MachineBasicBlock*>& GetPredecessors() { return predecessors_; }
+  const std::vector<MachineBasicBlock*>& GetPredecessors() const { return predecessors_; }
+  void AddPredecessor(MachineBasicBlock* pred) { predecessors_.push_back(pred); }
+
 private:
  std::string name_;
  std::vector<MachineInstr> instructions_;
  std::vector<MachineBasicBlock*> successors_;
+  std::vector<MachineBasicBlock*> predecessors_;
 };

 // ========== MIR 函数 ==========
@ -223,39 +254,69 @@ class MachineFunction {
  explicit MachineFunction(std::string name);

  const std::string& GetName() const { return name_; }
-  
+
  // 基本块管理
  MachineBasicBlock& GetEntry() { return entry_; }
  const MachineBasicBlock& GetEntry() const { return entry_; }

-  std::vector<std::unique_ptr<MachineBasicBlock>>& GetBasicBlocks() { 
-    return basic_blocks_; 
+  std::vector<std::unique_ptr<MachineBasicBlock>>& GetBasicBlocks() {
+    return basic_blocks_;
  }
  const std::vector<std::unique_ptr<MachineBasicBlock>>& GetBasicBlocks() const {
    return basic_blocks_;
  }
-  
+
  void AddBasicBlock(std::unique_ptr<MachineBasicBlock> bb) {
    basic_blocks_.push_back(std::move(bb));
  }

+  MachineBasicBlock* GetBlockByName(const std::string& name) {
+    for (auto& bb : basic_blocks_) {
+      if (bb->GetName() == name) return bb.get();
+    }
+    return nullptr;
+  }
+
  // 栈槽管理
  int CreateFrameIndex(int size = 4);
  FrameSlot& GetFrameSlot(int index);
  const FrameSlot& GetFrameSlot(int index) const;
  std::vector<FrameSlot>& GetFrameSlots() { return frame_slots_; }
  const std::vector<FrameSlot>& GetFrameSlots() const { return frame_slots_; }
-  
+
  // 栈帧大小
  int GetFrameSize() const { return frame_size_; }
  void SetFrameSize(int size) { frame_size_ = size; }

+  // callee-saved 寄存器管理
+  void MarkCalleeSaved(PhysReg reg) { used_callee_saved_regs_.insert(reg); }
+  const std::set<PhysReg>& GetCalleeSavedRegs() const { return used_callee_saved_regs_; }
+  bool IsCalleeSavedUsed(PhysReg reg) const {
+    return used_callee_saved_regs_.count(reg) > 0;
+  }
+
+  // spill 槽管理
+  int CreateSpillSlot(int size = 4);
+  bool IsSpillSlot(int index) const;
+
+  // vreg 类型管理（由 Lowering 填充，RA 使用）
+  enum class VRegType : uint8_t { kInt32 = 0, kInt64 = 1, kFloat32 = 2 };
+  void SetVRegType(int vreg, VRegType type) { vreg_types_[vreg] = type; }
+  VRegType GetVRegType(int vreg) const {
+    auto it = vreg_types_.find(vreg);
+    return it != vreg_types_.end() ? it->second : VRegType::kInt32;
+  }
+  bool HasVRegType(int vreg) const { return vreg_types_.count(vreg) > 0; }
+
 private:
  std::string name_;
  MachineBasicBlock entry_;
  std::vector<std::unique_ptr<MachineBasicBlock>> basic_blocks_;
  std::vector<FrameSlot> frame_slots_;
+  std::set<int> spill_slot_indices_;
  int frame_size_ = 0;
+  std::set<PhysReg> used_callee_saved_regs_;
+  std::unordered_map<int, VRegType> vreg_types_;
 };

 // ========== MIR 模块 ==========
@ -324,12 +385,9 @@ class MachineModule {
 };

 // ========== 后端流程函数 ==========
-/* std::unique_ptr<MachineFunction> LowerToMIR(const ir::Module& module);
-void RunRegAlloc(MachineFunction& function);
-void RunFrameLowering(MachineFunction& function);
-void PrintAsm(const MachineFunction& function, std::ostream& os); */
 std::unique_ptr<MachineModule> LowerToMIR(const ir::Module& module);
 void RunRegAlloc(MachineModule& module);
+void RunMIRPasses(MachineModule& module);
 void RunFrameLowering(MachineModule& module);
 void PrintAsm(const MachineModule& module, std::ostream& os);

--- a/scripts/compare_ra.sh
+++ b/scripts/compare_ra.sh
@ -0,0 +1,464 @@
+#!/usr/bin/env bash
+# compare_ra.sh — 对比寄存器分配（新版）与旧版编译器的汇编质量和运行性能
+#
+# 用法:
+#   ./scripts/compare_ra.sh --old feature/mir                          # 对比 feature/mir 分支
+#   ./scripts/compare_ra.sh --old 70234dd                              # 对比指定 commit
+#   ./scripts/compare_ra.sh --old feature/mir --tests performance       # 仅性能测试
+#   ./scripts/compare_ra.sh --old feature/mir --mode asm                # 仅对比汇编
+#   ./scripts/compare_ra.sh --old path/to/old/compiler --no-build       # 使用已构建的旧编译器
+#
+# 输出: 终端表格 + compare_result/ 目录下的详细文件
+
+set -euo pipefail
+
+# ========== 参数解析 ==========
+OLD_REF=""
+MODE="all"        # asm | run | all
+TEST_SET="all"    # functional | performance | all
+NO_BUILD=false
+OLD_COMPILER_PATH=""
+NEW_COMPILER_PATH="./build/bin/compiler"
+WORKTREE_DIR=""
+KEEP_WORKTREE=false
+
+usage() {
+  echo "用法: $0 --old <branch|commit|path> [选项]"
+  echo ""
+  echo "必选:"
+  echo "  --old <ref>        对比基线：git 分支名、commit hash、或旧编译器路径"
+  echo ""
+  echo "可选:"
+  echo "  --mode <mode>      对比模式: asm (仅汇编) | run (仅运行) | all (默认)"
+  echo "  --tests <set>      测试集: functional | performance | all (默认)"
+  echo "  --no-build         旧编译器已构建好，--old 指向编译器可执行文件路径"
+  echo "  --keep-worktree    保留旧的 git worktree（默认会删除）"
+  echo ""
+  echo "示例:"
+  echo "  $0 --old feature/mir                              # 对比 feature/mir 分支"
+  echo "  $0 --old 70234dd                                   # 对比特定 commit"
+  echo "  $0 --old feature/mir --tests performance --mode run # 仅对比性能测试的运行时间"
+  echo "  $0 --old /tmp/old-compiler --no-build               # 使用预构建的旧编译器"
+  exit 1
+}
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --old) OLD_REF="$2"; shift 2 ;;
+    --mode) MODE="$2"; shift 2 ;;
+    --tests) TEST_SET="$2"; shift 2 ;;
+    --no-build) NO_BUILD=true; shift ;;
+    --keep-worktree) KEEP_WORKTREE=true; shift ;;
+    *) echo "未知参数: $1"; usage ;;
+  esac
+done
+
+if [[ -z "$OLD_REF" ]]; then
+  echo "错误: 必须指定 --old"
+  usage
+fi
+
+# ========== 路径设置 ==========
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
+RESULT_DIR="$PROJECT_DIR/compare_result"
+OLD_BUILD_DIR=""
+OLD_COMPILER=""
+
+rm -rf "$RESULT_DIR"
+mkdir -p "$RESULT_DIR"
+
+# ========== 构建旧版编译器 ==========
+setup_old_compiler() {
+  if [[ "$NO_BUILD" == true ]]; then
+    # 用户直接提供编译器路径
+    if [[ ! -x "$OLD_REF" ]]; then
+      echo "错误: 旧编译器路径不存在或不可执行: $OLD_REF"
+      exit 1
+    fi
+    OLD_COMPILER="$(realpath "$OLD_REF")"
+    echo "使用预构建旧编译器: $OLD_COMPILER"
+    return
+  fi
+
+  # 检查是否是 git ref
+  if ! git rev-parse --verify "$OLD_REF" >/dev/null 2>&1; then
+    echo "错误: '$OLD_REF' 不是有效的 git 引用（分支/commit），或使用 --no-build 指定编译器路径"
+    exit 1
+  fi
+
+  WORKTREE_DIR="$PROJECT_DIR/.worktree-old-$(echo "$OLD_REF" | tr '/' '-')"
+  OLD_BUILD_DIR="$WORKTREE_DIR/build"
+
+  echo "=== 准备旧版编译器 ==="
+  echo "Git 引用: $OLD_REF"
+  echo "Worktree: $WORKTREE_DIR"
+
+  # 清理已有的 worktree
+  if [[ -d "$WORKTREE_DIR" ]]; then
+    echo "移除已有 worktree..."
+    git worktree remove --force "$WORKTREE_DIR" 2>/dev/null || rm -rf "$WORKTREE_DIR"
+  fi
+
+  git worktree add "$WORKTREE_DIR" "$OLD_REF"
+  echo "Worktree 已创建: $WORKTREE_DIR"
+
+  # 生成 ANTLR 语法分析器
+  echo "生成 ANTLR 语法分析器..."
+  java -jar "$WORKTREE_DIR/third_party/antlr-4.13.2-complete.jar" \
+    -Dlanguage=Cpp \
+    -visitor -no-listener \
+    -Xexact-output-dir \
+    -o "$OLD_BUILD_DIR/generated/antlr4" \
+    "$WORKTREE_DIR/src/antlr4/SysY.g4" > "$RESULT_DIR/build_old.log" 2>&1 || {
+    echo "错误: ANTLR 生成失败，日志:"
+    tail -20 "$RESULT_DIR/build_old.log"
+    exit 1
+  }
+
+  # 构建
+  echo "构建旧版编译器..."
+  cmake -S "$WORKTREE_DIR" -B "$OLD_BUILD_DIR" -DCMAKE_BUILD_TYPE=Release >> "$RESULT_DIR/build_old.log" 2>&1 || {
+    echo "错误: 旧版 cmake 配置失败，日志:"
+    tail -20 "$RESULT_DIR/build_old.log"
+    exit 1
+  }
+  cmake --build "$OLD_BUILD_DIR" -j"$(nproc 2>/dev/null || echo 4)" >> "$RESULT_DIR/build_old.log" 2>&1 || {
+    echo "错误: 旧版构建失败，日志:"
+    tail -30 "$RESULT_DIR/build_old.log"
+    exit 1
+  }
+
+  OLD_COMPILER="$OLD_BUILD_DIR/bin/compiler"
+  if [[ ! -x "$OLD_COMPILER" ]]; then
+    echo "错误: 旧编译器未生成: $OLD_COMPILER"
+    exit 1
+  fi
+  echo "旧编译器已构建: $OLD_COMPILER"
+}
+
+# ========== 确保新版编译器存在 ==========
+setup_new_compiler() {
+  if [[ ! -x "$NEW_COMPILER_PATH" ]]; then
+    echo "错误: 新编译器不存在，请先构建: cmake -B build && cmake --build build -j"
+    exit 1
+  fi
+  NEW_COMPILER="$(realpath "$NEW_COMPILER_PATH")"
+  echo "新版编译器: $NEW_COMPILER"
+}
+
+# ========== 工具检查 ==========
+check_tools() {
+  if [[ "$NO_BUILD" == false ]]; then
+    if ! command -v java >/dev/null 2>&1; then
+      echo "错误: 未找到 java，构建时需要 ANTLR 生成语法分析器"
+      exit 1
+    fi
+  fi
+  if ! command -v aarch64-linux-gnu-gcc >/dev/null 2>&1; then
+    echo "警告: 未找到 aarch64-linux-gnu-gcc，汇编模式可用但运行模式不可用"
+  fi
+  if [[ "$MODE" == "run" || "$MODE" == "all" ]]; then
+    if ! command -v qemu-aarch64 >/dev/null 2>&1; then
+      echo "错误: 未找到 qemu-aarch64，无法运行测试"
+      echo "  apt install qemu-user  (Ubuntu/Debian)"
+      exit 1
+    fi
+    if ! command -v aarch64-linux-gnu-gcc >/dev/null 2>&1; then
+      echo "错误: 未找到 aarch64-linux-gnu-gcc，无法链接可执行文件"
+      exit 1
+    fi
+  fi
+}
+
+# ========== 获取测试列表 ==========
+get_tests() {
+  local test_dir="$PROJECT_DIR/test/test_case"
+  local tests=()
+
+  if [[ "$TEST_SET" == "functional" || "$TEST_SET" == "all" ]]; then
+    for f in "$test_dir/functional"/*.sy; do
+      [[ -f "$f" ]] && tests+=("$f")
+    done
+  fi
+  if [[ "$TEST_SET" == "performance" || "$TEST_SET" == "all" ]]; then
+    for f in "$test_dir/performance"/*.sy; do
+      [[ -f "$f" ]] && tests+=("$f")
+    done
+  fi
+
+  if [[ ${#tests[@]} -eq 0 ]]; then
+    echo "错误: 没有找到测试用例"
+    exit 1
+  fi
+  printf '%s\n' "${tests[@]}"
+}
+
+# ========== 汇编对比 ==========
+compare_asm() {
+  local test_file="$1"
+  local stem; stem=$(basename "$test_file" .sy)
+  local test_dir; test_dir=$(dirname "$test_file")
+
+  local old_asm="$RESULT_DIR/asm/$stem.old.s"
+  local new_asm="$RESULT_DIR/asm/$stem.new.s"
+
+  mkdir -p "$RESULT_DIR/asm"
+
+  "$OLD_COMPILER" --emit-asm "$test_file" > "$old_asm" 2>/dev/null || {
+    echo "OLD_BUILD_FAIL" > "$RESULT_DIR/asm/$stem.result"
+    return
+  }
+  "$NEW_COMPILER" --emit-asm "$test_file" > "$new_asm" 2>/dev/null || {
+    echo "NEW_BUILD_FAIL" > "$RESULT_DIR/asm/$stem.result"
+    return
+  }
+
+  # 统计指令数（排除标签行、伪指令行、空行）
+  local old_inst new_inst old_mem new_mem old_branches new_branches
+  old_inst=$(grep -cE '^\s+\w+\s' "$old_asm" 2>/dev/null || echo 0)
+  new_inst=$(grep -cE '^\s+\w+\s' "$new_asm" 2>/dev/null || echo 0)
+  old_mem=$(grep -cE '\b(ldr|str|ldur|stur|ldp|stp)\b' "$old_asm" 2>/dev/null || echo 0)
+  new_mem=$(grep -cE '\b(ldr|str|ldur|stur|ldp|stp)\b' "$new_asm" 2>/dev/null || echo 0)
+  old_branches=$(grep -cE '\bb(|\.\w+)\s' "$old_asm" 2>/dev/null || echo 0)
+  new_branches=$(grep -cE '\bb(|\.\w+)\s' "$new_asm" 2>/dev/null || echo 0)
+
+  # 计算变化百分比
+  local inst_pct mem_pct
+  if [[ "$old_inst" -gt 0 ]]; then
+    inst_pct=$(echo "scale=1; ($new_inst - $old_inst) * 100 / $old_inst" | bc 2>/dev/null || echo "N/A")
+  else
+    inst_pct="N/A"
+  fi
+  if [[ "$old_mem" -gt 0 ]]; then
+    mem_pct=$(echo "scale=1; ($new_mem - $old_mem) * 100 / $old_mem" | bc 2>/dev/null || echo "N/A")
+  else
+    mem_pct="N/A"
+  fi
+
+  # 保存结果
+  echo "$stem $old_inst $new_inst $inst_pct $old_mem $new_mem $mem_pct $old_branches $new_branches" \
+    > "$RESULT_DIR/asm/$stem.result"
+
+  # 生成 diff
+  diff -u "$old_asm" "$new_asm" > "$RESULT_DIR/asm/$stem.diff" 2>/dev/null || true
+}
+
+# ========== 运行对比 ==========
+compare_run() {
+  local test_file="$1"
+  local stem; stem=$(basename "$test_file" .sy)
+  local test_dir; test_dir=$(dirname "$test_file")
+
+  local old_exe="$RESULT_DIR/run/$stem.old"
+  local new_exe="$RESULT_DIR/run/$stem.new"
+  local old_out="$RESULT_DIR/run/$stem.old.out"
+  local new_out="$RESULT_DIR/run/$stem.new.out"
+  local stdin_file="$test_dir/$stem.in"
+  local expected_file="$test_dir/$stem.out"
+
+  mkdir -p "$RESULT_DIR/run"
+
+  # 生成旧版可执行文件
+  local old_asm="$RESULT_DIR/run/$stem.old.s"
+  "$OLD_COMPILER" --emit-asm "$test_file" > "$old_asm" 2>/dev/null || {
+    echo "OLD_COMPILE_FAIL" > "$RESULT_DIR/run/$stem.result"
+    return
+  }
+  aarch64-linux-gnu-gcc -no-pie "$old_asm" -L"$PROJECT_DIR/sylib" -lsysy -static -o "$old_exe" 2>/dev/null || {
+    echo "OLD_LINK_FAIL" > "$RESULT_DIR/run/$stem.result"
+    return
+  }
+
+  # 生成新版可执行文件
+  local new_asm="$RESULT_DIR/run/$stem.new.s"
+  "$NEW_COMPILER" --emit-asm "$test_file" > "$new_asm" 2>/dev/null || {
+    echo "NEW_COMPILE_FAIL" > "$RESULT_DIR/run/$stem.result"
+    return
+  }
+  aarch64-linux-gnu-gcc -no-pie "$new_asm" -L"$PROJECT_DIR/sylib" -lsysy -static -o "$new_exe" 2>/dev/null || {
+    echo "NEW_LINK_FAIL" > "$RESULT_DIR/run/$stem.result"
+    return
+  }
+
+  # 运行旧版
+  local old_time="N/A" old_rc="N/A"
+  set +eo pipefail
+  if [[ -f "$stdin_file" ]]; then
+    old_time=$( { time qemu-aarch64 -L /usr/aarch64-linux-gnu "$old_exe" < "$stdin_file" > "$old_out" 2>/dev/null; echo $? > "$old_out.rc"; } 2>&1 | grep real | awk '{print $2}' || echo "N/A")
+  else
+    old_time=$( { time qemu-aarch64 -L /usr/aarch64-linux-gnu "$old_exe" > "$old_out" 2>/dev/null; echo $? > "$old_out.rc"; } 2>&1 | grep real | awk '{print $2}' || echo "N/A")
+  fi
+  old_rc=$(cat "$old_out.rc" 2>/dev/null || echo "1")
+
+  # 运行新版
+  local new_time="N/A" new_rc="N/A"
+  if [[ -f "$stdin_file" ]]; then
+    new_time=$( { time qemu-aarch64 -L /usr/aarch64-linux-gnu "$new_exe" < "$stdin_file" > "$new_out" 2>/dev/null; echo $? > "$new_out.rc"; } 2>&1 | grep real | awk '{print $2}' || echo "N/A")
+  else
+    new_time=$( { time qemu-aarch64 -L /usr/aarch64-linux-gnu "$new_exe" > "$new_out" 2>/dev/null; echo $? > "$new_out.rc"; } 2>&1 | grep real | awk '{print $2}' || echo "N/A")
+  fi
+  new_rc=$(cat "$new_out.rc" 2>/dev/null || echo "1")
+  set -eo pipefail
+
+  # 构造实际输出（程序输出 + 退出码），与 verify_asm.sh 格式一致
+  local old_actual="$RESULT_DIR/run/$stem.old.actual"
+  local new_actual="$RESULT_DIR/run/$stem.new.actual"
+  {
+    cat "$old_out"
+    if [[ -s "$old_out" ]] && (( $(tail -c 1 "$old_out" | wc -l) == 0 )); then
+      printf '\n'
+    fi
+    printf '%s\n' "$old_rc"
+  } > "$old_actual"
+  {
+    cat "$new_out"
+    if [[ -s "$new_out" ]] && (( $(tail -c 1 "$new_out" | wc -l) == 0 )); then
+      printf '\n'
+    fi
+    printf '%s\n' "$new_rc"
+  } > "$new_actual"
+
+  # 检查输出匹配（与 expected 文件比较，expected 格式为 stdout + exit_code）
+  local old_match="N" new_match="N"
+  if [[ -f "$expected_file" ]]; then
+    diff -w -q "$old_actual" "$expected_file" >/dev/null 2>&1 && old_match="Y"
+    diff -w -q "$new_actual" "$expected_file" >/dev/null 2>&1 && new_match="Y"
+  fi
+
+  # 速度比
+  local speedup="N/A"
+  if [[ "$old_time" != "N/A" && "$new_time" != "N/A" ]]; then
+    local old_sec new_sec
+    old_sec=$(echo "$old_time" | sed 's/m/ /' | awk '{print $1 * 60 + $2}' 2>/dev/null || echo 0)
+    new_sec=$(echo "$new_time" | sed 's/m/ /' | awk '{print $1 * 60 + $2}' 2>/dev/null || echo 0)
+    if [[ "$(echo "$new_sec > 0" | bc -l 2>/dev/null)" == "1" ]]; then
+      speedup=$(echo "scale=2; $old_sec / $new_sec" | bc 2>/dev/null || echo "N/A")
+    fi
+  fi
+
+  echo "$stem $old_time $new_time $speedup $old_match $new_match $old_rc $new_rc" \
+    > "$RESULT_DIR/run/$stem.result"
+}
+
+# ========== 主流程 ==========
+main() {
+  echo "============================================="
+  echo "  寄存器分配编译器对比"
+  echo "  旧版: $OLD_REF"
+  echo "  测试集: $TEST_SET"
+  echo "  模式: $MODE"
+  echo "============================================="
+  echo ""
+
+  check_tools
+  setup_old_compiler
+  setup_new_compiler
+
+  local tests
+  mapfile -t tests < <(get_tests)
+  local total=${#tests[@]}
+  echo "共 $total 个测试用例"
+  echo ""
+
+  # ========== 汇编对比 ==========
+  if [[ "$MODE" == "asm" || "$MODE" == "all" ]]; then
+    echo "=== 汇编质量对比 ==="
+    local count=0
+    for test_file in "${tests[@]}"; do
+      compare_asm "$test_file"
+      count=$((count + 1))
+      printf "\r  进度: %d/%d" "$count" "$total"
+    done
+    echo ""
+
+    # 输出汇编对比表
+    echo ""
+    printf "%-30s %8s %8s %8s %8s %8s %8s\n" \
+      "测试用例" "旧指令数" "新指令数" "变化%" "旧访存" "新访存" "变化%"
+    printf "%-30s %8s %8s %8s %8s %8s %8s\n" \
+      "------------------------------" "--------" "--------" "--------" "--------" "--------" "--------"
+
+    local total_old_inst=0 total_new_inst=0 total_old_mem=0 total_new_mem=0 valid_count=0
+    for f in "$RESULT_DIR/asm"/*.result; do
+      [[ -f "$f" ]] || continue
+      local result
+      result=$(cat "$f")
+      if [[ "$result" == *"FAIL"* ]]; then
+        printf "%-30s %8s\n" "$(basename "$f" .result)" "编译失败"
+        continue
+      fi
+      read -r stem old_inst new_inst inst_pct old_mem new_mem mem_pct _ _ <<< "$result"
+      printf "%-30s %8d %8d %7s%% %8d %8d %7s%%\n" \
+        "$stem" "$old_inst" "$new_inst" "$inst_pct" "$old_mem" "$new_mem" "$mem_pct"
+      total_old_inst=$((total_old_inst + old_inst))
+      total_new_inst=$((total_new_inst + new_inst))
+      total_old_mem=$((total_old_mem + old_mem))
+      total_new_mem=$((total_new_mem + new_mem))
+      valid_count=$((valid_count + 1))
+    done
+
+    if [[ "$valid_count" -gt 0 ]]; then
+      local avg_inst_pct avg_mem_pct
+      avg_inst_pct=$(echo "scale=1; ($total_new_inst - $total_old_inst) * 100 / $total_old_inst" | bc 2>/dev/null || echo "N/A")
+      avg_mem_pct=$(echo "scale=1; ($total_new_mem - $total_old_mem) * 100 / $total_old_mem" | bc 2>/dev/null || echo "N/A")
+      printf "%-30s %8d %8d %7s%% %8d %8d %7s%%\n" \
+        "--- 合计 ---" "$total_old_inst" "$total_new_inst" "$avg_inst_pct" \
+        "$total_old_mem" "$total_new_mem" "$avg_mem_pct"
+    fi
+    echo ""
+    echo "详细 diff 文件: $RESULT_DIR/asm/*.diff"
+  fi
+
+  # ========== 运行对比 ==========
+  if [[ "$MODE" == "run" || "$MODE" == "all" ]]; then
+    echo "=== 运行结果对比 ==="
+    local count=0
+    for test_file in "${tests[@]}"; do
+      compare_run "$test_file"
+      count=$((count + 1))
+      printf "\r  进度: %d/%d" "$count" "$total"
+    done
+    echo ""
+
+    echo ""
+    printf "%-30s %10s %10s %8s %6s %6s %8s %8s\n" \
+      "测试用例" "旧耗时" "新耗时" "加速比" "旧匹配" "新匹配" "旧退出码" "新退出码"
+    printf "%-30s %10s %10s %8s %6s %6s %8s %8s\n" \
+      "------------------------------" "----------" "----------" "--------" "------" "------" "--------" "--------"
+
+    local pass_old=0 pass_new=0 total_valid=0
+    for f in "$RESULT_DIR/run"/*.result; do
+      [[ -f "$f" ]] || continue
+      local result
+      result=$(cat "$f")
+      if [[ "$result" == *"FAIL"* ]]; then
+        printf "%-30s %10s\n" "$(basename "$f" .result)" "$result"
+        continue
+      fi
+      read -r stem old_time new_time speedup old_match new_match old_status new_status <<< "$result"
+      printf "%-30s %10s %10s %8s %6s %6s %8s %8s\n" \
+        "$stem" "$old_time" "$new_time" "$speedup" "$old_match" "$new_match" "$old_status" "$new_status"
+      [[ "$old_match" == "Y" ]] && pass_old=$((pass_old + 1))
+      [[ "$new_match" == "Y" ]] && pass_new=$((pass_new + 1))
+      total_valid=$((total_valid + 1))
+    done
+
+    if [[ "$total_valid" -gt 0 ]]; then
+      echo ""
+      printf "输出匹配率: 旧版 %d/%d, 新版 %d/%d\n" "$pass_old" "$total_valid" "$pass_new" "$total_valid"
+    fi
+  fi
+
+  # ========== 清理 ==========
+  if [[ -n "$WORKTREE_DIR" && "$KEEP_WORKTREE" == false ]]; then
+    echo ""
+    echo "清理 worktree..."
+    git worktree remove --force "$WORKTREE_DIR" 2>/dev/null || true
+  fi
+
+  echo ""
+  echo "对比结果保存在: $RESULT_DIR"
+}
+
+main
--- a/src/main.cpp
+++ b/src/main.cpp
@ -46,16 +46,13 @@ int main(int argc, char** argv) {
    }

    if (opts.emit_asm) {
-      //auto machine_func = mir::LowerToMIR(*module);
      auto machine_module = mir::LowerToMIR(*module);
-      //mir::RunRegAlloc(*machine_func);
      mir::RunRegAlloc(*machine_module);
-      //mir::RunFrameLowering(*machine_func);
+      mir::RunMIRPasses(*machine_module);
      mir::RunFrameLowering(*machine_module);
      if (need_blank_line) {
        std::cout << "\n";
      }
-      //mir::PrintAsm(*machine_func, std::cout);
      mir::PrintAsm(*machine_module, std::cout);
    }
 #else
--- a/src/mir/AsmPrinter.cpp
+++ b/src/mir/AsmPrinter.cpp
@ -64,6 +64,10 @@ void PrintOperand(std::ostream& os, const Operand& op) {
    case Operand::Kind::Reg:
      os << PhysRegName(op.GetReg());
      break;
+    case Operand::Kind::VReg:
+      throw std::runtime_error(
+          FormatError("asm", "寄存器分配未完成: 存在虚拟寄存器 #" +
+                              std::to_string(op.GetVReg())));
    case Operand::Kind::Imm:
      os << "#" << op.GetImm();
      break;
@ -88,6 +92,57 @@ static bool IsLegalAddSubImm(int64_t imm) {
  return false;
 }

+// ---- 寄存器宽度规范化 ----
+static bool IsWReg(PhysReg reg) {
+  return reg >= PhysReg::W0 && reg <= PhysReg::W30;
+}
+static bool IsXReg(PhysReg reg) {
+  return reg >= PhysReg::X0 && reg <= PhysReg::X30;
+}
+static bool IsSReg(PhysReg reg) {
+  return reg >= PhysReg::S0 && reg <= PhysReg::S31;
+}
+
+// Xn → Wn, Wn → Wn, Sn → Sn
+static PhysReg ToW(PhysReg reg) {
+  if (IsXReg(reg))
+    return static_cast<PhysReg>(
+        static_cast<int>(reg) - static_cast<int>(PhysReg::X0) + static_cast<int>(PhysReg::W0));
+  return reg;
+}
+// Wn → Xn, Xn → Xn, Sn → Sn
+static PhysReg ToX(PhysReg reg) {
+  if (IsWReg(reg))
+    return static_cast<PhysReg>(
+        static_cast<int>(reg) - static_cast<int>(PhysReg::W0) + static_cast<int>(PhysReg::X0));
+  return reg;
+}
+
+// 检查一组操作数是否全是同一宽度（W/X/S）
+static bool AllSameRegWidth(const std::vector<Operand>& ops) {
+  int kind = -1;
+  for (const auto& op : ops) {
+    if (op.GetKind() != Operand::Kind::Reg) continue;
+    PhysReg r = op.GetReg();
+    if (IsWReg(r)) { if (kind == -1) kind = 0; else if (kind != 0) return false; }
+    else if (IsXReg(r)) { if (kind == -1) kind = 1; else if (kind != 1) return false; }
+    else if (IsSReg(r)) { if (kind == -1) kind = 2; else if (kind != 2) return false; }
+  }
+  return true;
+}
+
+// 根据目的地宽度规范化所有寄存器操作数
+static void NormalizeRegOps(std::vector<Operand>& ops, PhysReg dst) {
+  PhysReg base = dst;
+  bool wantW = IsWReg(base);
+  bool wantX = IsXReg(base);
+  for (auto& op : ops) {
+    if (op.GetKind() != Operand::Kind::Reg) continue;
+    if (wantW) op = Operand::Reg(ToW(op.GetReg()));
+    else if (wantX) op = Operand::Reg(ToX(op.GetReg()));
+  }
+}
+
 // 在匿名命名空间添加辅助函数
 static void PrintLoadImm64(std::ostream& os, PhysReg reg, uint64_t imm) {
  // 输出 movz + movk 序列
@ -148,37 +203,48 @@ void PrintInstruction(std::ostream& os, const MachineInstr& instr,
      os << "  mov " << PhysRegName(ops.at(0).GetReg()) << ", #"
         << ops.at(1).GetImm() << "\n";
      break;
-    case Opcode::MovReg:
-      os << "  mov " << PhysRegName(ops.at(0).GetReg()) << ", "
-         << PhysRegName(ops.at(1).GetReg()) << "\n";
+    case Opcode::MovReg:{
+      PhysReg dst = ops.at(0).GetReg();
+      PhysReg src = ops.at(1).GetReg();
+      if (IsSReg(dst) || IsSReg(src)) {
+        // 涉及 S 寄存器的 move：使用 fmov
+        if (!IsSReg(dst)) dst = ToW(dst);  // 确保是 W 寄存器
+        if (!IsSReg(src)) src = ToW(src);
+        os << "  fmov " << PhysRegName(dst) << ", " << PhysRegName(src) << "\n";
+      } else {
+        // GPR move：规范化宽度
+        if (IsWReg(dst) && IsXReg(src)) {
+          src = ToW(src);
+        } else if (IsXReg(dst) && IsWReg(src)) {
+          src = ToX(src);
+        }
+        os << "  mov " << PhysRegName(dst) << ", " << PhysRegName(src) << "\n";
+      }
      break;
+    }
    case Opcode::StoreStack: {
-      // 检查第二个操作数的类型
      if (ops.size() >= 2 && ops.at(1).GetKind() == Operand::Kind::FrameIndex) {
-        // 存储到栈槽
        const auto& slot = GetFrameSlot(function, ops.at(1));
        PrintStackAccess(os, "stur", ops.at(0).GetReg(), slot.offset);
      } else if (ops.size() >= 2 && ops.at(1).GetKind() == Operand::Kind::Reg) {
-        // 间接存储：存储到寄存器指向的地址
-        // STR W9, [X8]
+        // 间接存储：基址必须是 X 寄存器
+        PhysReg base = ToX(ops.at(1).GetReg());
        os << "  str " << PhysRegName(ops.at(0).GetReg()) << ", ["
-           << PhysRegName(ops.at(1).GetReg()) << "]\n";
+           << PhysRegName(base) << "]\n";
      } else {
        throw std::runtime_error("StoreStack: 无效的操作数类型");
      }
      break;
    }
    case Opcode::LoadStack: {
-      // 检查第二个操作数的类型
      if (ops.size() >= 2 && ops.at(1).GetKind() == Operand::Kind::FrameIndex) {
-        // 从栈槽加载
        const auto& slot = GetFrameSlot(function, ops.at(1));
        PrintStackAccess(os, "ldur", ops.at(0).GetReg(), slot.offset);
      } else if (ops.size() >= 2 && ops.at(1).GetKind() == Operand::Kind::Reg) {
-        // 间接加载：从寄存器指向的地址加载
-        // LDR W9, [X8]
+        // 间接加载：基址必须是 X 寄存器
+        PhysReg base = ToX(ops.at(1).GetReg());
        os << "  ldr " << PhysRegName(ops.at(0).GetReg()) << ", ["
-           << PhysRegName(ops.at(1).GetReg()) << "]\n";
+           << PhysRegName(base) << "]\n";
      } else {
        throw std::runtime_error("LoadStack: 无效的操作数类型");
      }
@ -204,115 +270,181 @@ void PrintInstruction(std::ostream& os, const MachineInstr& instr,
      }
      os << "\n";
      break;
-    case Opcode::AddRR:
-      os << "  add " << PhysRegName(ops.at(0).GetReg()) << ", "
-         << PhysRegName(ops.at(1).GetReg()) << ", "
-         << PhysRegName(ops.at(2).GetReg()) << "\n";
-      break;
-    case Opcode::AddRI:
-      os << "  add " << PhysRegName(ops.at(0).GetReg()) << ", "
-         << PhysRegName(ops.at(1).GetReg()) << ", #"
-         << ops.at(2).GetImm() << "\n";
-      break;
-    case Opcode::SubRR:
-      os << "  sub " << PhysRegName(ops.at(0).GetReg()) << ", "
-         << PhysRegName(ops.at(1).GetReg()) << ", "
-         << PhysRegName(ops.at(2).GetReg()) << "\n";
-      break;
-    case Opcode::SubRI:
-      os << "  sub " << PhysRegName(ops.at(0).GetReg()) << ", "
-         << PhysRegName(ops.at(1).GetReg()) << ", #"
-         << ops.at(2).GetImm() << "\n";
-      break;
-    case Opcode::MulRR:
-      os << "  mul " << PhysRegName(ops.at(0).GetReg()) << ", "
-         << PhysRegName(ops.at(1).GetReg()) << ", "
-         << PhysRegName(ops.at(2).GetReg()) << "\n";
-      break;
-    case Opcode::SDivRR:
-      os << "  sdiv " << PhysRegName(ops.at(0).GetReg()) << ", "
-         << PhysRegName(ops.at(1).GetReg()) << ", "
-         << PhysRegName(ops.at(2).GetReg()) << "\n";
-      break;      
-    case Opcode::UDivRR:
-      os << "  udiv " << PhysRegName(ops.at(0).GetReg()) << ", "
-         << PhysRegName(ops.at(1).GetReg()) << ", "
-         << PhysRegName(ops.at(2).GetReg()) << "\n";
-      break;
-    case Opcode::FAddRR:
-      os << "  fadd " << PhysRegName(ops.at(0).GetReg()) << ", "
-         << PhysRegName(ops.at(1).GetReg()) << ", "
-         << PhysRegName(ops.at(2).GetReg()) << "\n";
-      break;
-    case Opcode::FSubRR:
-      os << "  fsub " << PhysRegName(ops.at(0).GetReg()) << ", "
-         << PhysRegName(ops.at(1).GetReg()) << ", "
-         << PhysRegName(ops.at(2).GetReg()) << "\n";
-      break;
-    case Opcode::FMulRR:
-      os << "  fmul " << PhysRegName(ops.at(0).GetReg()) << ", "
-         << PhysRegName(ops.at(1).GetReg()) << ", "
-         << PhysRegName(ops.at(2).GetReg()) << "\n";
-      break;
-    case Opcode::FDivRR:
-      os << "  fdiv " << PhysRegName(ops.at(0).GetReg()) << ", "
-         << PhysRegName(ops.at(1).GetReg()) << ", "
-         << PhysRegName(ops.at(2).GetReg()) << "\n";
-      break; 
-    case Opcode::CmpRR:
-      os << "  cmp " << PhysRegName(ops.at(0).GetReg()) << ", "
-         << PhysRegName(ops.at(1).GetReg()) << "\n";
-      break;
-    case Opcode::CmpRI:
-      os << "  cmp " << PhysRegName(ops.at(0).GetReg()) << ", #"
-         << ops.at(1).GetImm() << "\n";
+    case Opcode::AddRR: {
+      std::vector<Operand> nops = ops;
+      NormalizeRegOps(nops, nops[0].GetReg());
+      os << "  add " << PhysRegName(nops[0].GetReg()) << ", "
+         << PhysRegName(nops[1].GetReg()) << ", "
+         << PhysRegName(nops[2].GetReg()) << "\n";
+      break;
+    }
+    case Opcode::AddRI: {
+      std::vector<Operand> nops = ops;
+      NormalizeRegOps(nops, nops[0].GetReg());
+      os << "  add " << PhysRegName(nops[0].GetReg()) << ", "
+         << PhysRegName(nops[1].GetReg()) << ", #"
+         << nops[2].GetImm() << "\n";
      break;
-    case Opcode::FCmpRR:
-      os << "  fcmp " << PhysRegName(ops.at(0).GetReg()) << ", "
-         << PhysRegName(ops.at(1).GetReg()) << "\n";
+    }
+    case Opcode::SubRR: {
+      std::vector<Operand> nops = ops;
+      NormalizeRegOps(nops, nops[0].GetReg());
+      os << "  sub " << PhysRegName(nops[0].GetReg()) << ", "
+         << PhysRegName(nops[1].GetReg()) << ", "
+         << PhysRegName(nops[2].GetReg()) << "\n";
      break;
-    case Opcode::SIToFP:
-      os << "  scvtf " << PhysRegName(ops.at(0).GetReg()) << ", "
-         << PhysRegName(ops.at(1).GetReg()) << "\n";
+    }
+    case Opcode::SubRI: {
+      std::vector<Operand> nops = ops;
+      NormalizeRegOps(nops, nops[0].GetReg());
+      os << "  sub " << PhysRegName(nops[0].GetReg()) << ", "
+         << PhysRegName(nops[1].GetReg()) << ", #"
+         << nops[2].GetImm() << "\n";
      break;
-    case Opcode::FPToSI:
-      os << "  fcvtzs " << PhysRegName(ops.at(0).GetReg()) << ", "
-         << PhysRegName(ops.at(1).GetReg()) << "\n";
+    }
+    case Opcode::MulRR: {
+      std::vector<Operand> nops = ops;
+      NormalizeRegOps(nops, nops[0].GetReg());
+      os << "  mul " << PhysRegName(nops[0].GetReg()) << ", "
+         << PhysRegName(nops[1].GetReg()) << ", "
+         << PhysRegName(nops[2].GetReg()) << "\n";
      break;
+    }
+    case Opcode::SDivRR: {
+      std::vector<Operand> nops = ops;
+      NormalizeRegOps(nops, nops[0].GetReg());
+      os << "  sdiv " << PhysRegName(nops[0].GetReg()) << ", "
+         << PhysRegName(nops[1].GetReg()) << ", "
+         << PhysRegName(nops[2].GetReg()) << "\n";
+      break;
+    }
+    case Opcode::UDivRR: {
+      std::vector<Operand> nops = ops;
+      NormalizeRegOps(nops, nops[0].GetReg());
+      os << "  udiv " << PhysRegName(nops[0].GetReg()) << ", "
+         << PhysRegName(nops[1].GetReg()) << ", "
+         << PhysRegName(nops[2].GetReg()) << "\n";
+      break;
+    }
+    case Opcode::FAddRR: {
+      std::vector<Operand> nops = ops;
+      NormalizeRegOps(nops, nops[0].GetReg());
+      os << "  fadd " << PhysRegName(nops[0].GetReg()) << ", "
+         << PhysRegName(nops[1].GetReg()) << ", "
+         << PhysRegName(nops[2].GetReg()) << "\n";
+      break;
+    }
+    case Opcode::FSubRR: {
+      std::vector<Operand> nops = ops;
+      NormalizeRegOps(nops, nops[0].GetReg());
+      os << "  fsub " << PhysRegName(nops[0].GetReg()) << ", "
+         << PhysRegName(nops[1].GetReg()) << ", "
+         << PhysRegName(nops[2].GetReg()) << "\n";
+      break;
+    }
+    case Opcode::FMulRR: {
+      std::vector<Operand> nops = ops;
+      NormalizeRegOps(nops, nops[0].GetReg());
+      os << "  fmul " << PhysRegName(nops[0].GetReg()) << ", "
+         << PhysRegName(nops[1].GetReg()) << ", "
+         << PhysRegName(nops[2].GetReg()) << "\n";
+      break;
+    }
+    case Opcode::FDivRR: {
+      std::vector<Operand> nops = ops;
+      NormalizeRegOps(nops, nops[0].GetReg());
+      os << "  fdiv " << PhysRegName(nops[0].GetReg()) << ", "
+         << PhysRegName(nops[1].GetReg()) << ", "
+         << PhysRegName(nops[2].GetReg()) << "\n";
+      break;
+    }
+    case Opcode::CmpRR: {
+      std::vector<Operand> nops = ops;
+      NormalizeRegOps(nops, nops[0].GetReg());
+      os << "  cmp " << PhysRegName(nops[0].GetReg()) << ", "
+         << PhysRegName(nops[1].GetReg()) << "\n";
+      break;
+    }
+    case Opcode::CmpRI: {
+      std::vector<Operand> nops = ops;
+      NormalizeRegOps(nops, nops[0].GetReg());
+      os << "  cmp " << PhysRegName(nops[0].GetReg()) << ", #"
+         << nops[1].GetImm() << "\n";
+      break;
+    }
+    case Opcode::FCmpRR: {
+      std::vector<Operand> nops = ops;
+      NormalizeRegOps(nops, nops[0].GetReg());
+      os << "  fcmp " << PhysRegName(nops[0].GetReg()) << ", "
+         << PhysRegName(nops[1].GetReg()) << "\n";
+      break;
+    }
+    case Opcode::SIToFP: {
+      PhysReg dst = ops.at(0).GetReg();
+      PhysReg src = ops.at(1).GetReg();
+      if (!IsWReg(src)) src = ToW(src);
+      os << "  scvtf " << PhysRegName(dst) << ", " << PhysRegName(src) << "\n";
+      break;
+    }
+    case Opcode::FPToSI: {
+      PhysReg dst = ops.at(0).GetReg();
+      PhysReg src = ops.at(1).GetReg();
+      if (!IsWReg(dst)) dst = ToW(dst);
+      os << "  fcvtzs " << PhysRegName(dst) << ", " << PhysRegName(src) << "\n";
+      break;
+    }
    case Opcode::ZExt:
      os << "  and " << PhysRegName(ops.at(0).GetReg()) << ", "
         << PhysRegName(ops.at(1).GetReg()) << ", #1\n";
      break;
-    case Opcode::AndRR:
-      os << "  and " << PhysRegName(ops.at(0).GetReg()) << ", "
-         << PhysRegName(ops.at(1).GetReg()) << ", "
-         << PhysRegName(ops.at(2).GetReg()) << "\n";
-      break;
-    case Opcode::OrRR:
-      os << "  orr " << PhysRegName(ops.at(0).GetReg()) << ", "
-         << PhysRegName(ops.at(1).GetReg()) << ", "
-         << PhysRegName(ops.at(2).GetReg()) << "\n";
-      break;
-    case Opcode::EorRR:
-      os << "  eor " << PhysRegName(ops.at(0).GetReg()) << ", "
-         << PhysRegName(ops.at(1).GetReg()) << ", "
-         << PhysRegName(ops.at(2).GetReg()) << "\n";
-      break;
-    case Opcode::LslRR:
-      os << "  lsl " << PhysRegName(ops.at(0).GetReg()) << ", "
-         << PhysRegName(ops.at(1).GetReg()) << ", "
-         << PhysRegName(ops.at(2).GetReg()) << "\n";
-      break;
-    case Opcode::LsrRR:
-      os << "  lsr " << PhysRegName(ops.at(0).GetReg()) << ", "
-         << PhysRegName(ops.at(1).GetReg()) << ", "
-         << PhysRegName(ops.at(2).GetReg()) << "\n";
-      break;
-    case Opcode::AsrRR:
-      os << "  asr " << PhysRegName(ops.at(0).GetReg()) << ", "
-         << PhysRegName(ops.at(1).GetReg()) << ", "
-         << PhysRegName(ops.at(2).GetReg()) << "\n";
+    case Opcode::AndRR: {
+      std::vector<Operand> nops = ops;
+      NormalizeRegOps(nops, nops[0].GetReg());
+      os << "  and " << PhysRegName(nops[0].GetReg()) << ", "
+         << PhysRegName(nops[1].GetReg()) << ", "
+         << PhysRegName(nops[2].GetReg()) << "\n";
+      break;
+    }
+    case Opcode::OrRR: {
+      std::vector<Operand> nops = ops;
+      NormalizeRegOps(nops, nops[0].GetReg());
+      os << "  orr " << PhysRegName(nops[0].GetReg()) << ", "
+         << PhysRegName(nops[1].GetReg()) << ", "
+         << PhysRegName(nops[2].GetReg()) << "\n";
+      break;
+    }
+    case Opcode::EorRR: {
+      std::vector<Operand> nops = ops;
+      NormalizeRegOps(nops, nops[0].GetReg());
+      os << "  eor " << PhysRegName(nops[0].GetReg()) << ", "
+         << PhysRegName(nops[1].GetReg()) << ", "
+         << PhysRegName(nops[2].GetReg()) << "\n";
+      break;
+    }
+    case Opcode::LslRR: {
+      std::vector<Operand> nops = ops;
+      NormalizeRegOps(nops, nops[0].GetReg());
+      os << "  lsl " << PhysRegName(nops[0].GetReg()) << ", "
+         << PhysRegName(nops[1].GetReg()) << ", "
+         << PhysRegName(nops[2].GetReg()) << "\n";
+      break;
+    }
+    case Opcode::LsrRR: {
+      std::vector<Operand> nops = ops;
+      NormalizeRegOps(nops, nops[0].GetReg());
+      os << "  lsr " << PhysRegName(nops[0].GetReg()) << ", "
+         << PhysRegName(nops[1].GetReg()) << ", "
+         << PhysRegName(nops[2].GetReg()) << "\n";
      break;
+    }
+    case Opcode::AsrRR: {
+      std::vector<Operand> nops = ops;
+      NormalizeRegOps(nops, nops[0].GetReg());
+      os << "  asr " << PhysRegName(nops[0].GetReg()) << ", "
+         << PhysRegName(nops[1].GetReg()) << ", "
+         << PhysRegName(nops[2].GetReg()) << "\n";
+      break;
+    }
    case Opcode::B:
      os << "  b ";
      PrintOperand(os, ops.at(0));
@ -345,8 +477,8 @@ void PrintInstruction(std::ostream& os, const MachineInstr& instr,
      break;
    case Opcode::LoadStackAddr: {
        const FrameSlot& slot = GetFrameSlot(function, ops.at(1));
-        int64_t offset = slot.offset;          // 负值，如 -8
-        PhysReg dst = ops.at(0).GetReg();
+        int64_t offset = slot.offset;
+        PhysReg dst = ToX(ops.at(0).GetReg());  // 地址必须是 X 寄存器

        auto tryEmitSimple = [&]() -> bool {
            if (offset >= 0 && offset <= 4095) {
@ -384,10 +516,15 @@ void PrintInstruction(std::ostream& os, const MachineInstr& instr,
          << ops.at(2).GetLabel() << "\n";
        break;
    }
-    case Opcode::Sxtw:
-      os << "  sxtw " << PhysRegName(ops.at(0).GetReg()) << ", "
-        << PhysRegName(ops.at(1).GetReg()) << "\n";
+    case Opcode::Sxtw: {
+      PhysReg dst = ops.at(0).GetReg();
+      PhysReg src = ops.at(1).GetReg();
+      // sxtw 要求 X 目标，W 源
+      if (!IsXReg(dst)) dst = ToX(dst);
+      if (!IsWReg(src)) src = ToW(src);
+      os << "  sxtw " << PhysRegName(dst) << ", " << PhysRegName(src) << "\n";
      break;
+    }
    default:
        os << "  // unknown instruction\n";
        break;
--- a/src/mir/FrameLowering.cpp
+++ b/src/mir/FrameLowering.cpp
@ -1,19 +1,11 @@
 #include "mir/MIR.h"

+#include <algorithm>
 #include <stdexcept>
 #include <vector>

 #include "utils/Log.h"

-//#define DEBUG_Frame
-
-#ifdef DEBUG_Frame
-#include <iostream>
-#define DEBUG_MSG(msg) std::cerr << "[Frame Debug] " << msg << std::endl
-#else
-#define DEBUG_MSG(msg)
-#endif
-
 namespace mir {
 namespace {

@ -21,10 +13,47 @@ int AlignTo(int value, int align) {
  return ((value + align - 1) / align) * align;
 }

+// 收集排序后的 callee-saved 寄存器列表
+// 返回：{ (physReg, frameIndex) } 对
+struct CSRegSlot {
+  PhysReg phys_reg;
+  int frame_index;
+  int size;  // 8 for x registers, 4 for s registers
+};
+
+std::vector<CSRegSlot> CollectCalleeSavedSlots(MachineFunction& function) {
+  std::vector<CSRegSlot> slots;
+  const auto& regs = function.GetCalleeSavedRegs();
+
+  // 整数 callee-saved (X19-X28 格式，每个 8 字节)
+  for (int i = 19; i <= 28; ++i) {
+    PhysReg xreg = static_cast<PhysReg>(static_cast<int>(PhysReg::X19) + (i - 19));
+    PhysReg wreg = static_cast<PhysReg>(static_cast<int>(PhysReg::W19) + (i - 19));
+    if (regs.count(wreg) || regs.count(xreg)) {
+      int slot = function.CreateFrameIndex(8);
+      slots.push_back({xreg, slot, 8});
+    }
+  }
+
+  // 浮点 callee-saved (S8-S15，每个 4 字节)
+  for (int i = 8; i <= 15; ++i) {
+    PhysReg sreg = static_cast<PhysReg>(static_cast<int>(PhysReg::S8) + (i - 8));
+    if (regs.count(sreg)) {
+      int slot = function.CreateFrameIndex(8);
+      slots.push_back({sreg, slot, 8});
+    }
+  }
+
+  return slots;
+}
+
 }  // namespace

 void RunFrameLowering(MachineFunction& function) {
-  DEBUG_MSG("function RunFrameLowering");
+  // 收集 callee-saved 寄存器并分配栈槽
+  auto csSlots = CollectCalleeSavedSlots(function);
+
+  // 计算栈槽偏移
  int cursor = 0;
  for (const auto& slot : function.GetFrameSlots()) {
    cursor += slot.size;
@ -32,26 +61,34 @@ void RunFrameLowering(MachineFunction& function) {
  }
  function.SetFrameSize(AlignTo(cursor, 16));

-  // 基本块
+  // 插入 Prologue / Epilogue
  const auto& blocks = function.GetBasicBlocks();
  bool firstBlock = true;
-  
+
  for (const auto& bb : blocks) {
-    DEBUG_MSG("block");
    auto& insts = bb->GetInstructions();
    std::vector<MachineInstr> lowered;
-    // 输出基本块标签（非第一个基本块）
+
    if (firstBlock) {
-      DEBUG_MSG("empalace Prologue");
-      lowered.emplace_back(Opcode::Prologue); 
+      lowered.emplace_back(Opcode::Prologue);
+
+      // 在 Prologue 后保存 callee-saved 寄存器
+      for (const auto& cs : csSlots) {
+        lowered.emplace_back(Opcode::StoreStack,
+            std::vector<Operand>{Operand::Reg(cs.phys_reg),
+                                 Operand::FrameIndex(cs.frame_index)});
+      }
    }
    firstBlock = false;

-    // 输出基本块中的指令
    for (const auto& inst : insts) {
-      DEBUG_MSG("inst");
      if (inst.GetOpcode() == Opcode::Ret) {
-        DEBUG_MSG("empalace Epilogue");
+        // 在 Epilogue 前恢复 callee-saved 寄存器
+        for (const auto& cs : csSlots) {
+          lowered.emplace_back(Opcode::LoadStack,
+              std::vector<Operand>{Operand::Reg(cs.phys_reg),
+                                   Operand::FrameIndex(cs.frame_index)});
+        }
        lowered.emplace_back(Opcode::Epilogue);
      }
      lowered.push_back(inst);
@ -60,13 +97,10 @@ void RunFrameLowering(MachineFunction& function) {
  }
 }

-// 模块版本的栈帧布局
 void RunFrameLowering(MachineModule& module) {
-  // 对模块中的每个函数执行栈帧布局
-  DEBUG_MSG("module RunFrameLowering");
  for (auto& func : module.GetFunctions()) {
    RunFrameLowering(*func);
  }
 }

-}  // namespace mir
+}  // namespace mir
--- a/src/mir/Lowering.cpp
+++ b/src/mir/Lowering.cpp
--- a/src/mir/MIRFunction.cpp
+++ b/src/mir/MIRFunction.cpp
@ -30,4 +30,14 @@ const FrameSlot& MachineFunction::GetFrameSlot(int index) const {
  return frame_slots_[index];
 }

+int MachineFunction::CreateSpillSlot(int size) {
+  int index = CreateFrameIndex(size);
+  spill_slot_indices_.insert(index);
+  return index;
+}
+
+bool MachineFunction::IsSpillSlot(int index) const {
+  return spill_slot_indices_.count(index) > 0;
+}
+
 }  // namespace mir
--- a/src/mir/MIRInstr.cpp
+++ b/src/mir/MIRInstr.cpp
@ -9,6 +9,8 @@ Operand::Operand(Kind kind, PhysReg reg, int imm, CondCode cc, const std::string

 Operand Operand::Reg(PhysReg reg) { return Operand(Kind::Reg, reg, 0, CondCode::EQ, ""); }

+Operand Operand::VReg(int id) { return Operand(Kind::VReg, PhysReg::W0, id, CondCode::EQ, ""); }
+
 Operand Operand::Imm(int value) {
  return Operand(Kind::Imm, PhysReg::W0, value, CondCode::EQ, "");
 }
--- a/src/mir/RegAlloc.cpp
+++ b/src/mir/RegAlloc.cpp
@ -1,82 +1,691 @@
 #include "mir/MIR.h"

+#include <algorithm>
+#include <map>
+#include <set>
 #include <stdexcept>
+#include <unordered_map>
+#include <vector>
+#include <queue>
+#include <cmath>

 #include "utils/Log.h"

 namespace mir {
 namespace {

-bool IsAllowedReg(PhysReg reg) {
+// ========== VReg 类型 ==========
+enum class VRegClass { kInt32, kInt64, kFloat32 };
+
+// ========== 活跃区间 ==========
+struct LiveInterval {
+  int vreg;
+  int start;
+  int end;
+  VRegClass reg_class;
+
+  LiveInterval(int v, int s, int e, VRegClass rc)
+      : vreg(v), start(s), end(e), reg_class(rc) {}
+};
+
+// ========== 物理寄存器池 ==========
+// GPR: X19-X28 / W19-W28 (10个物理寄存器，Xn和Wn是同一寄存器的不同视图)
+// 注意：Int32和Int64共享这10个物理GPR
+const PhysReg kGPRPool[] = {
+  PhysReg::X19, PhysReg::X20, PhysReg::X21, PhysReg::X22,
+  PhysReg::X23, PhysReg::X24, PhysReg::X25, PhysReg::X26,
+  PhysReg::X27, PhysReg::X28,
+};
+constexpr int kNumGPR = sizeof(kGPRPool) / sizeof(kGPRPool[0]);
+
+// 获取对应的W寄存器
+PhysReg ToWReg(PhysReg xreg) {
+  int idx = static_cast<int>(xreg) - static_cast<int>(PhysReg::X0);
+  return static_cast<PhysReg>(static_cast<int>(PhysReg::W0) + idx);
+}
+
+// 浮点寄存器池
+const PhysReg kFPR32Pool[] = {
+  PhysReg::S8, PhysReg::S9, PhysReg::S10, PhysReg::S11,
+  PhysReg::S12, PhysReg::S13,
+};
+constexpr int kNumFPR32 = sizeof(kFPR32Pool) / sizeof(kFPR32Pool[0]);
+
+// Spill scratch registers
+const PhysReg kSpillScratchInt32[] = { PhysReg::W15, PhysReg::W14 };
+const PhysReg kSpillScratchInt64[] = { PhysReg::X15, PhysReg::X14 };
+const PhysReg kSpillScratchFloat[]  = { PhysReg::S15, PhysReg::S14 };
+
+PhysReg GetSpillScratch(VRegClass rc, int idx) {
+  switch (rc) {
+    case VRegClass::kInt32: return kSpillScratchInt32[idx % 2];
+    case VRegClass::kInt64: return kSpillScratchInt64[idx % 2];
+    case VRegClass::kFloat32: return kSpillScratchFloat[idx % 2];
+  }
+  return kSpillScratchInt32[0];
+}
+
+bool IsCalleeSaved(PhysReg reg) {
  switch (reg) {
-    case PhysReg::W0:
-    case PhysReg::W8:
-    case PhysReg::W9:
-    case PhysReg::X29:  //FP = X29 帧指针
-    case PhysReg::X30:  //LR = X30 链接寄存器
-    case PhysReg::SP:
+    case PhysReg::W19: case PhysReg::W20: case PhysReg::W21: case PhysReg::W22:
+    case PhysReg::W23: case PhysReg::W24: case PhysReg::W25: case PhysReg::W26:
+    case PhysReg::W27: case PhysReg::W28:
+    case PhysReg::X19: case PhysReg::X20: case PhysReg::X21: case PhysReg::X22:
+    case PhysReg::X23: case PhysReg::X24: case PhysReg::X25: case PhysReg::X26:
+    case PhysReg::X27: case PhysReg::X28:
+    case PhysReg::S8: case PhysReg::S9: case PhysReg::S10: case PhysReg::S11:
+    case PhysReg::S12: case PhysReg::S13: case PhysReg::S14: case PhysReg::S15:
+    case PhysReg::S16: case PhysReg::S17: case PhysReg::S18: case PhysReg::S19:
+    case PhysReg::S20: case PhysReg::S21: case PhysReg::S22: case PhysReg::S23:
+    case PhysReg::S24: case PhysReg::S25: case PhysReg::S26: case PhysReg::S27:
+    case PhysReg::S28: case PhysReg::S29: case PhysReg::S30: case PhysReg::S31:
      return true;
+    default: return false;
+  }
+}
+
+// 获取GPR的统一编号（0-9对应X19/W19到X28/W28）
+int GetGPRIndex(PhysReg reg) {
+  if (reg >= PhysReg::W19 && reg <= PhysReg::W28)
+    return static_cast<int>(reg) - static_cast<int>(PhysReg::W19);
+  if (reg >= PhysReg::X19 && reg <= PhysReg::X28)
+    return static_cast<int>(reg) - static_cast<int>(PhysReg::X19);
+  return -1;
+}
+
+// 获取FPR编号
+int GetFPRIndex(PhysReg reg) {
+  if (reg >= PhysReg::S8 && reg <= PhysReg::S13)
+    return static_cast<int>(reg) - static_cast<int>(PhysReg::S8);
+  return -1;
+}
+
+// ========== 推断 vreg 类型 ==========
+VRegClass InferVRegClass(int vreg, MachineFunction& function) {
+  if (function.HasVRegType(vreg)) {
+    switch (function.GetVRegType(vreg)) {
+      case MachineFunction::VRegType::kFloat32: return VRegClass::kFloat32;
+      case MachineFunction::VRegType::kInt64:   return VRegClass::kInt64;
+      case MachineFunction::VRegType::kInt32:   return VRegClass::kInt32;
+    }
+  }
+  return VRegClass::kInt32;
+}
+
+// ========== 指令编号 ==========
+void NumberInstructions(MachineFunction& function,
+                        std::unordered_map<MachineInstr*, int>& instrToIdx,
+                        std::vector<MachineInstr*>& idxToInstr,
+                        std::map<int, MachineBasicBlock*>& blockBoundary) {
+  int idx = 0;
+  for (auto& bb : function.GetBasicBlocks()) {
+    blockBoundary[idx] = bb.get();
+    for (auto& inst : bb->GetInstructions()) {
+      instrToIdx[&inst] = idx;
+      idxToInstr.push_back(&inst);
+      ++idx;
+    }
+  }
+}
+
+// ========== 计算活跃区间和数据流信息 ==========
+std::vector<LiveInterval> ComputeLiveIntervals(
+    MachineFunction& function,
+    std::unordered_map<MachineBasicBlock*, std::set<int>>& liveIn,
+    std::unordered_map<MachineBasicBlock*, std::set<int>>& liveOut,
+    std::unordered_map<MachineInstr*, int>& instrToIdx,
+    std::vector<MachineInstr*>& idxToInstr) {
+
+  const auto& blocks = function.GetBasicBlocks();
+  if (blocks.empty()) return {};
+
+  std::map<int, MachineBasicBlock*> blockBoundary;
+  NumberInstructions(function, instrToIdx, idxToInstr, blockBoundary);
+
+  // 收集所有 vreg
+  std::set<int> allVRegs;
+  for (auto* inst : idxToInstr) {
+    for (int d : inst->GetDefs()) allVRegs.insert(d);
+    for (int u : inst->GetUses()) allVRegs.insert(u);
+  }
+
+  // 每个 vreg 的活跃位置
+  std::unordered_map<int, std::set<int>> vregPositions;
+
+  struct BlockInfo {
+    std::set<int> use;
+    std::set<int> def;
+    int startIdx;
+    int endIdx;
+  };
+  std::unordered_map<MachineBasicBlock*, BlockInfo> blockInfo;
+
+  for (const auto& bb : blocks) {
+    auto& info = blockInfo[bb.get()];
+    auto& insts = bb->GetInstructions();
+    if (!insts.empty()) {
+      info.startIdx = instrToIdx[&insts.front()];
+      info.endIdx = instrToIdx[&insts.back()] + 1;
+    } else {
+      info.startIdx = 0;
+      info.endIdx = 0;
+    }
+
+    for (auto& inst : insts) {
+      int pos = instrToIdx[&inst];
+      for (int def : inst.GetDefs()) {
+        info.def.insert(def);
+        vregPositions[def].insert(pos);
+      }
+      for (int use : inst.GetUses()) {
+        if (info.def.count(use) == 0) {
+          info.use.insert(use);
+        }
+        vregPositions[use].insert(pos);
+      }
+    }
+  }
+
+  // 数据流分析
+  bool changed = true;
+  while (changed) {
+    changed = false;
+    for (auto it = blocks.rbegin(); it != blocks.rend(); ++it) {
+      MachineBasicBlock* bb = it->get();
+      auto& info = blockInfo[bb];
+
+      std::set<int> newLiveOut;
+      for (auto* succ : bb->GetSuccessors()) {
+        for (int v : liveIn[succ]) newLiveOut.insert(v);
+      }
+      if (newLiveOut != liveOut[bb]) {
+        liveOut[bb] = newLiveOut;
+        changed = true;
+      }
+
+      std::set<int> newLiveIn = info.use;
+      for (int v : liveOut[bb]) {
+        if (info.def.count(v) == 0) newLiveIn.insert(v);
+      }
+      if (newLiveIn != liveIn[bb]) {
+        liveIn[bb] = newLiveIn;
+        changed = true;
+      }
+    }
+  }
+
+  // 生成 LiveInterval
+  std::vector<LiveInterval> intervals;
+  for (int vreg : allVRegs) {
+    auto it = vregPositions.find(vreg);
+    if (it == vregPositions.end() || it->second.empty()) continue;
+    int start = *it->second.begin();
+    int end = *it->second.rbegin();
+
+    for (const auto& bb : blocks) {
+      auto& info = blockInfo[bb.get()];
+      if (info.startIdx == 0 && info.endIdx == 0) continue;
+      bool isLiveIn  = liveIn[bb.get()].count(vreg) != 0;
+      bool isLiveOut = liveOut[bb.get()].count(vreg) != 0;
+      if (isLiveIn || isLiveOut) {
+        if (info.startIdx < start) start = info.startIdx;
+        if (info.endIdx > end) end = info.endIdx;
+      }
+    }
+
+    VRegClass rc = InferVRegClass(vreg, function);
+    intervals.emplace_back(vreg, start, end, rc);
+  }
+
+  return intervals;
+}
+
+// ========== 图着色核心数据结构 ==========
+
+struct IGNode {
+  int vreg;
+  VRegClass reg_class;
+  std::set<int> neighbors;
+  int degree;
+  bool removed;
+  bool is_spill_candidate;
+  double spill_cost;
+};
+
+struct StackEntry {
+  int vreg;
+  bool is_spill_candidate;
+};
+
+// 干涉图：按"寄存器类别组"构建
+// GPR组: Int32 + Int64 (共享物理寄存器)
+// FPR组: Float32
+struct InterferenceGraph {
+  std::unordered_map<int, IGNode> nodes;
+  std::set<int> remaining;
+  int k;              // 可用颜色数
+  bool is_gpr;        // true=GPR组(Int32+Int64), false=FPR组
+};
+
+// ========== 计算使用频率 ==========
+std::unordered_map<int, int> ComputeUseCounts(MachineFunction& function) {
+  std::unordered_map<int, int> useCounts;
+  for (auto& bb : function.GetBasicBlocks()) {
+    for (auto& inst : bb->GetInstructions()) {
+      for (int u : inst.GetUses()) useCounts[u]++;
+      for (int d : inst.GetDefs()) useCounts[d]++;
+    }
+  }
+  return useCounts;
+}
+
+// ========== 构建干涉图 ==========
+// 关键修复：Int32和Int64在同一个干涉图中（因为共享物理GPR）
+InterferenceGraph BuildInterferenceGraph(
+    MachineFunction& function,
+    const std::vector<LiveInterval>& intervals,
+    const std::unordered_map<MachineBasicBlock*, std::set<int>>& liveIn,
+    const std::unordered_map<MachineInstr*, int>& instrToIdx,
+    bool buildGPR) {  // true=构建GPR图(Int32+Int64), false=构建FPR图
+
+  InterferenceGraph ig;
+  ig.is_gpr = buildGPR;
+  ig.k = buildGPR ? kNumGPR : kNumFPR32;
+
+  // 收集vreg
+  std::set<int> vregs;
+  for (const auto& iv : intervals) {
+    if (buildGPR) {
+      // GPR图包含Int32和Int64
+      if (iv.reg_class == VRegClass::kInt32 || iv.reg_class == VRegClass::kInt64) {
+        vregs.insert(iv.vreg);
+      }
+    } else {
+      // FPR图只包含Float32
+      if (iv.reg_class == VRegClass::kFloat32) {
+        vregs.insert(iv.vreg);
+      }
+    }
+  }
+  if (vregs.empty()) return ig;
+
+  // 初始化节点
+  auto useCounts = ComputeUseCounts(function);
+  for (int v : vregs) {
+    IGNode node;
+    node.vreg = v;
+    // 找到vreg的reg_class
+    for (const auto& iv : intervals) {
+      if (iv.vreg == v) {
+        node.reg_class = iv.reg_class;
+        break;
+      }
+    }
+    node.degree = 0;
+    node.removed = false;
+    node.is_spill_candidate = false;
+    node.spill_cost = useCounts.count(v) ? useCounts[v] : 1.0;
+    ig.nodes[v] = std::move(node);
+    ig.remaining.insert(v);
+  }
+
+  // 构建干涉边：反向遍历指令，正确模拟活跃集合
+  for (const auto& bb : function.GetBasicBlocks()) {
+    auto& insts = bb->GetInstructions();
+    if (insts.empty()) continue;
+
+    // 反向遍历：从liveOut开始
+    std::set<int> live = liveIn.at(bb.get());
+
+    // 注意：我们需要正向检查活跃集合，但用正确的数据流
+    // 更简单的方法：直接用活跃区间重叠来构建边
+  }
+
+  // 用活跃区间重叠构建干涉边（更可靠）
+  for (auto it1 = vregs.begin(); it1 != vregs.end(); ++it1) {
+    auto it2 = it1;
+    ++it2;
+    for (; it2 != vregs.end(); ++it2) {
+      int a = *it1, b = *it2;
+
+      // 找到a和b的活跃区间
+      const LiveInterval* ivA = nullptr;
+      const LiveInterval* ivB = nullptr;
+      for (const auto& iv : intervals) {
+        if (iv.vreg == a) ivA = &iv;
+        if (iv.vreg == b) ivB = &iv;
+      }
+      if (!ivA || !ivB) continue;
+
+      // 检查区间是否重叠（包含端点）
+      // 两个区间[s1,e1]和[s2,e2]重叠当且仅当：
+      // max(s1,s2) <= min(e1,e2)
+      int maxStart = std::max(ivA->start, ivB->start);
+      int minEnd = std::min(ivA->end, ivB->end);
+      if (maxStart <= minEnd) {
+        // 活跃区间重叠，添加干涉边
+        ig.nodes[a].neighbors.insert(b);
+        ig.nodes[b].neighbors.insert(a);
+      }
+    }
+  }
+
+  // 计算度数和spill cost
+  for (auto& [vreg, node] : ig.nodes) {
+    node.degree = static_cast<int>(node.neighbors.size());
+    if (node.degree > 0) {
+      node.spill_cost = node.spill_cost / node.degree;
+    }
  }
-  return false;
+
+  return ig;
 }

-}  // namespace
-
-//void RunRegAlloc(MachineFunction& function) {
-//  for (const auto& inst : function.GetEntry().GetInstructions()) {
-//    for (const auto& operand : inst.GetOperands()) {
-//      if (operand.GetKind() == Operand::Kind::Reg &&
-//          !IsAllowedReg(operand.GetReg())) {
-//        throw std::runtime_error(FormatError("mir", "寄存器分配失败"));
-//      }
-//    }
-//  }
-//}
-
-// 单函数版本的寄存器分配（原有逻辑）
-void RunRegAlloc(MachineFunction& function) {
-  // 当前仅执行最小一致性检查，不实现真实寄存器分配
-  // Lab3 阶段保持栈槽模型，不需要真实寄存器分配
-  
-  // 检查每个基本块中的指令
+// ========== Simplify阶段 ==========
+std::vector<StackEntry> Simplify(InterferenceGraph& ig) {
+  std::vector<StackEntry> stack;
+  std::queue<int> worklist;
+
+  for (int v : ig.remaining) {
+    if (ig.nodes[v].degree < ig.k) {
+      worklist.push(v);
+    }
+  }
+
+  while (!ig.remaining.empty()) {
+    if (!worklist.empty()) {
+      int v = worklist.front();
+      worklist.pop();
+      if (!ig.remaining.count(v) || ig.nodes[v].removed) continue;
+      if (ig.nodes[v].degree >= ig.k) continue;
+
+      stack.push_back({v, false});
+      ig.nodes[v].removed = true;
+      ig.remaining.erase(v);
+
+      for (int u : ig.nodes[v].neighbors) {
+        if (ig.remaining.count(u) && !ig.nodes[u].removed) {
+          ig.nodes[u].degree--;
+          if (ig.nodes[u].degree < ig.k) {
+            worklist.push(u);
+          }
+        }
+      }
+    } else {
+      double bestCost = 1e300;
+      int bestVreg = -1;
+      for (int v : ig.remaining) {
+        if (ig.nodes[v].removed) continue;
+        if (ig.nodes[v].spill_cost < bestCost) {
+          bestCost = ig.nodes[v].spill_cost;
+          bestVreg = v;
+        }
+      }
+
+      if (bestVreg < 0) break;
+
+      stack.push_back({bestVreg, true});
+      ig.nodes[bestVreg].removed = true;
+      ig.remaining.erase(bestVreg);
+
+      for (int u : ig.nodes[bestVreg].neighbors) {
+        if (ig.remaining.count(u) && !ig.nodes[u].removed) {
+          ig.nodes[u].degree--;
+          if (ig.nodes[u].degree < ig.k) {
+            worklist.push(u);
+          }
+        }
+      }
+    }
+  }
+
+  return stack;
+}
+
+// ========== Select阶段 ==========
+// 关键修复：颜色用GPR索引（0-9）表示，分配时根据vreg类型选择Xn或Wn
+std::pair<std::unordered_map<int, PhysReg>, std::set<int>> SelectColors(
+    const InterferenceGraph& origIg,
+    const std::vector<StackEntry>& stack,
+    const std::vector<LiveInterval>& intervals,
+    MachineFunction& function) {
+
+  std::unordered_map<int, PhysReg> coloring;
+  std::set<int> actualSpills;
+
+  // 跟踪已分配的GPR索引（0-9）
+  std::unordered_map<int, int> colorToVReg;
+
+  // 逆序弹出栈
+  for (int i = static_cast<int>(stack.size()) - 1; i >= 0; --i) {
+    const auto& entry = stack[i];
+    int vreg = entry.vreg;
+    const auto& node = origIg.nodes.at(vreg);
+
+    // 收集已着色邻居使用的颜色（GPR索引）
+    std::set<int> usedColors;
+    for (int neighbor : node.neighbors) {
+      auto it = coloring.find(neighbor);
+      if (it != coloring.end()) {
+        int colorIdx = -1;
+        if (origIg.is_gpr) {
+          colorIdx = GetGPRIndex(it->second);
+        } else {
+          colorIdx = GetFPRIndex(it->second);
+        }
+        if (colorIdx >= 0) {
+          usedColors.insert(colorIdx);
+        }
+      }
+    }
+
+    // 找第一个可用颜色
+    int chosenColor = -1;
+    for (int c = 0; c < origIg.k; ++c) {
+      if (!usedColors.count(c)) {
+        chosenColor = c;
+        break;
+      }
+    }
+
+    if (chosenColor >= 0) {
+      // 根据vreg类型选择物理寄存器
+      PhysReg physReg;
+      if (origIg.is_gpr) {
+        if (node.reg_class == VRegClass::kInt64) {
+          physReg = kGPRPool[chosenColor];  // Xn
+        } else {
+          physReg = ToWReg(kGPRPool[chosenColor]);  // Wn
+        }
+      } else {
+        physReg = kFPR32Pool[chosenColor];  // Sn
+      }
+      coloring[vreg] = physReg;
+      if (IsCalleeSaved(physReg)) {
+        function.MarkCalleeSaved(physReg);
+      }
+    } else {
+      actualSpills.insert(vreg);
+    }
+  }
+
+  return {coloring, actualSpills};
+}
+
+// ========== 重写指令 ==========
+void RewriteInstructions(
+    MachineFunction& function,
+    const std::unordered_map<int, PhysReg>& vregToPhys,
+    const std::unordered_map<int, int>& spillSlots,
+    const std::vector<LiveInterval>& intervals) {
+
+  auto getSpillRC = [&](int vreg) -> VRegClass {
+    for (auto& iv : intervals) {
+      if (iv.vreg == vreg) return iv.reg_class;
+    }
+    return VRegClass::kInt32;
+  };
+
  for (auto& bb : function.GetBasicBlocks()) {
-    for (auto& instr : bb->GetInstructions()) {
-      // 检查指令的操作数是否有效
-      for (const auto& operand : instr.GetOperands()) {
-        switch (operand.GetKind()) {
-          case Operand::Kind::Reg:
-            // 寄存器操作数：检查是否在允许的范围内
-            // 当前使用固定寄存器 w0, w8, w9, s0, s1 等
-            break;
-          case Operand::Kind::FrameIndex:
-            // 栈槽索引：检查是否有效
-            if (operand.GetFrameIndex() < 0 || 
-                operand.GetFrameIndex() >= static_cast<int>(function.GetFrameSlots().size())) {
-              throw std::runtime_error(
-                  FormatError("regalloc", "无效的栈槽索引: " + 
-                              std::to_string(operand.GetFrameIndex())));
+    std::vector<MachineInstr> newInsts;
+    auto& insts = bb->GetInstructions();
+
+    for (auto& inst : insts) {
+      auto& ops = inst.GetOperands();
+      std::vector<int>& defs = inst.GetDefs();
+      std::vector<int>& uses = inst.GetUses();
+
+      // 收集需要reload的spilled use
+      std::vector<int> spilledUses;
+      {
+        std::set<int> seen;
+        for (int vreg : uses) {
+          if (spillSlots.count(vreg) && seen.insert(vreg).second) {
+            spilledUses.push_back(vreg);
+          }
+        }
+      }
+
+      // 插入reload
+      for (size_t si = 0; si < spilledUses.size(); ++si) {
+        int vreg = spilledUses[si];
+        int slot = spillSlots.at(vreg);
+        PhysReg loadReg;
+        auto it = vregToPhys.find(vreg);
+        if (it != vregToPhys.end()) {
+          loadReg = it->second;
+        } else {
+          loadReg = GetSpillScratch(getSpillRC(vreg), static_cast<int>(si));
+        }
+        newInsts.emplace_back(Opcode::LoadStack,
+            std::vector<Operand>{Operand::Reg(loadReg), Operand::FrameIndex(slot)});
+      }
+
+      // 替换VReg为PhysReg
+      int spillUseIdx = 0;
+      for (auto& op : ops) {
+        if (op.GetKind() == Operand::Kind::VReg) {
+          int vreg = op.GetVReg();
+          auto it = vregToPhys.find(vreg);
+          if (it != vregToPhys.end()) {
+            op = Operand::Reg(it->second);
+          } else {
+            int idx = 0;
+            if (spillSlots.count(vreg)) {
+              for (size_t si = 0; si < spilledUses.size(); ++si) {
+                if (spilledUses[si] == vreg) { idx = static_cast<int>(si); break; }
+              }
            }
-            break;
-          case Operand::Kind::Imm:
-          case Operand::Kind::Cond:
-          case Operand::Kind::Label:
-            // 立即数、条件码、标签不需要检查
-            break;
+            op = Operand::Reg(GetSpillScratch(getSpillRC(vreg), idx));
+            spillUseIdx++;
+          }
+        }
+      }
+
+      newInsts.push_back(inst);
+
+      // 插入def后的store
+      std::vector<int> spilledDefs;
+      {
+        std::set<int> seen;
+        for (int vreg : defs) {
+          if (spillSlots.count(vreg) && seen.insert(vreg).second) {
+            spilledDefs.push_back(vreg);
+          }
+        }
+      }
+      for (size_t si = 0; si < spilledDefs.size(); ++si) {
+        int vreg = spilledDefs[si];
+        int slot = spillSlots.at(vreg);
+        PhysReg storeReg;
+        auto it = vregToPhys.find(vreg);
+        if (it != vregToPhys.end()) {
+          storeReg = it->second;
+        } else {
+          storeReg = GetSpillScratch(getSpillRC(vreg), static_cast<int>(si));
        }
+        newInsts.emplace_back(Opcode::StoreStack,
+            std::vector<Operand>{Operand::Reg(storeReg), Operand::FrameIndex(slot)});
      }
    }
+    insts = std::move(newInsts);
  }
-  
-  // 注意：Lab3 阶段不实现真实寄存器分配
-  // 所有值仍然使用栈槽模型，寄存器仅作为临时计算使用
 }

-// 模块版本的寄存器分配
+// ========== 图着色寄存器分配主函数 ==========
+void RunGraphColoringRegAlloc(MachineFunction& function) {
+  // 1. 数据流分析和活跃区间
+  std::unordered_map<MachineBasicBlock*, std::set<int>> liveIn, liveOut;
+  std::unordered_map<MachineInstr*, int> instrToIdx;
+  std::vector<MachineInstr*> idxToInstr;
+
+  auto intervals = ComputeLiveIntervals(function, liveIn, liveOut, instrToIdx, idxToInstr);
+  if (intervals.empty()) return;
+
+  // 2. 分配结果
+  std::unordered_map<int, PhysReg> vregToPhys;
+  std::unordered_map<int, int> spillSlots;
+
+  // 3. 构建GPR干涉图（Int32 + Int64，共享物理寄存器）
+  {
+    InterferenceGraph ig = BuildInterferenceGraph(
+        function, intervals, liveIn, instrToIdx, true);
+    if (!ig.nodes.empty()) {
+      auto stack = Simplify(ig);
+      auto [coloring, spills] = SelectColors(ig, stack, intervals, function);
+      for (auto& [vreg, phys] : coloring) {
+        vregToPhys[vreg] = phys;
+      }
+      for (int vreg : spills) {
+        if (spillSlots.count(vreg)) continue;
+        VRegClass rc = VRegClass::kInt32;
+        for (const auto& iv : intervals) {
+          if (iv.vreg == vreg) { rc = iv.reg_class; break; }
+        }
+        int slotSize = (rc == VRegClass::kInt64) ? 8 : 4;
+        int slot = function.CreateSpillSlot(slotSize);
+        spillSlots[vreg] = slot;
+      }
+    }
+  }
+
+  // 4. 构建FPR干涉图（Float32）
+  {
+    InterferenceGraph ig = BuildInterferenceGraph(
+        function, intervals, liveIn, instrToIdx, false);
+    if (!ig.nodes.empty()) {
+      auto stack = Simplify(ig);
+      auto [coloring, spills] = SelectColors(ig, stack, intervals, function);
+      for (auto& [vreg, phys] : coloring) {
+        vregToPhys[vreg] = phys;
+      }
+      for (int vreg : spills) {
+        if (spillSlots.count(vreg)) continue;
+        int slot = function.CreateSpillSlot(4);
+        spillSlots[vreg] = slot;
+      }
+    }
+  }
+
+  // 5. 重写指令
+  RewriteInstructions(function, vregToPhys, spillSlots, intervals);
+
+  // 6. 清除def/use标记
+  for (auto& bb : function.GetBasicBlocks()) {
+    for (auto& inst : bb->GetInstructions()) {
+      inst.GetDefs().clear();
+      inst.GetUses().clear();
+    }
+  }
+}
+
+} // namespace
+
+// ========== 模块入口 ==========
 void RunRegAlloc(MachineModule& module) {
-  // 对模块中的每个函数执行寄存器分配
  for (auto& func : module.GetFunctions()) {
-    RunRegAlloc(*func);
+    RunGraphColoringRegAlloc(*func);
  }
 }

-}  // namespace mir
+} // namespace mir
--- a/src/mir/Register.cpp
+++ b/src/mir/Register.cpp
@ -83,11 +83,35 @@ const char* PhysRegName(PhysReg reg) {
    case PhysReg::S5: return "s5";
    case PhysReg::S6: return "s6";
    case PhysReg::S7: return "s7";
-    
+    case PhysReg::S8: return "s8";
+    case PhysReg::S9: return "s9";
+    case PhysReg::S10: return "s10";
+    case PhysReg::S11: return "s11";
+    case PhysReg::S12: return "s12";
+    case PhysReg::S13: return "s13";
+    case PhysReg::S14: return "s14";
+    case PhysReg::S15: return "s15";
+    case PhysReg::S16: return "s16";
+    case PhysReg::S17: return "s17";
+    case PhysReg::S18: return "s18";
+    case PhysReg::S19: return "s19";
+    case PhysReg::S20: return "s20";
+    case PhysReg::S21: return "s21";
+    case PhysReg::S22: return "s22";
+    case PhysReg::S23: return "s23";
+    case PhysReg::S24: return "s24";
+    case PhysReg::S25: return "s25";
+    case PhysReg::S26: return "s26";
+    case PhysReg::S27: return "s27";
+    case PhysReg::S28: return "s28";
+    case PhysReg::S29: return "s29";
+    case PhysReg::S30: return "s30";
+    case PhysReg::S31: return "s31";
+
    // 特殊寄存器
    case PhysReg::SP: return "sp";
    case PhysReg::ZR: return "xzr";
-    
+
    default: return "unknown";
  }
  throw std::runtime_error(FormatError("mir", "未知物理寄存器"));
--- a/src/mir/passes/PassManager.cpp
+++ b/src/mir/passes/PassManager.cpp
@ -1,4 +1,16 @@
 // MIR Pass 管理：
-// - 组织后端 pass 的运行顺序（PreRA/PostRA/PEI 等阶段）
-// - 统一运行 pass 与调试输出（按需要扩展）
+// - 组织后端 pass 的运行顺序
+// - 统一运行 pass 与调试输出

+#include "mir/MIR.h"
+
+namespace mir {
+
+void RunPeephole(MachineModule& module);
+
+void RunMIRPasses(MachineModule& module) {
+  // Peephole：RA 后局部优化
+  RunPeephole(module);
+}
+
+}  // namespace mir
--- a/src/mir/passes/Peephole.cpp
+++ b/src/mir/passes/Peephole.cpp
@ -1,4 +1,197 @@
 // 窥孔优化（Peephole）：
 // - 删除冗余 move、合并常见指令模式
-// - 提升最终汇编质量（按实现范围裁剪）
+// - 提升最终汇编质量

+#include "mir/MIR.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "utils/Log.h"
+
+namespace mir {
+namespace {
+
+// 检查指令是否有副作用（非纯计算）
+bool HasSideEffects(const MachineInstr& inst) {
+  switch (inst.GetOpcode()) {
+    case Opcode::Call:
+    case Opcode::Ret:
+    case Opcode::B:
+    case Opcode::BCond:
+    case Opcode::StoreStack:
+    case Opcode::StoreStackPair:
+    case Opcode::Prologue:
+    case Opcode::Epilogue:
+      return true;
+    default:
+      return false;
+  }
+}
+
+// 检查是否是纯 move 指令
+bool IsPureMove(const MachineInstr& inst) {
+  return inst.GetOpcode() == Opcode::MovReg;
+}
+
+// 检查指令是否使用了某个物理寄存器
+bool InstUsesReg(const MachineInstr& inst, PhysReg reg) {
+  for (const auto& op : inst.GetOperands()) {
+    if (op.GetKind() == Operand::Kind::Reg && op.GetReg() == reg)
+      return true;
+  }
+  return false;
+}
+
+// 检查指令是否定义了某个物理寄存器
+bool InstDefsReg(const MachineInstr& inst, PhysReg reg) {
+  // 大多数指令的 dest 是第一个操作数
+  if (inst.GetOperands().empty()) return false;
+  const auto& dst = inst.GetOperands()[0];
+  if (dst.GetKind() == Operand::Kind::Reg && dst.GetReg() == reg)
+    return true;
+  // StoreStackPair / LoadStackPair 有特殊格式
+  return false;
+}
+
+// 检查是否恒等操作
+bool IsIdentityOp(const MachineInstr& inst) {
+  if (inst.GetOperands().size() < 3) return false;
+  const auto& op2 = inst.GetOperands()[2];
+  if (op2.GetKind() != Operand::Kind::Imm) return false;
+  if (op2.GetImm() != 0) return false;
+
+  switch (inst.GetOpcode()) {
+    case Opcode::AddRI:
+    case Opcode::SubRI:
+      return true;
+    default:
+      return false;
+  }
+}
+
+// 检查两个指令操作相同的栈偏移
+bool IsSameStackOffset(const MachineInstr& a, const MachineInstr& b) {
+  if (a.GetOperands().size() < 2 || b.GetOperands().size() < 2) return false;
+  const auto& aOff = a.GetOperands()[1];
+  const auto& bOff = b.GetOperands()[1];
+  if (aOff.GetKind() == Operand::Kind::FrameIndex &&
+      bOff.GetKind() == Operand::Kind::FrameIndex) {
+    return aOff.GetFrameIndex() == bOff.GetFrameIndex();
+  }
+  return false;
+}
+
+// 单基本块窥孔优化（一次扫描）
+int PeepholeBlock(MachineBasicBlock& bb) {
+  auto& insts = bb.GetInstructions();
+  int changes = 0;
+  bool changed = true;
+
+  // 迭代直到收敛
+  while (changed) {
+    changed = false;
+    std::vector<MachineInstr> newInsts;
+    size_t n = insts.size();
+
+    for (size_t i = 0; i < n; ++i) {
+      MachineInstr& curr = insts[i];
+
+      // 跳过已标记删除的指令（通过空操作码）
+      if (curr.GetOpcode() == Opcode::Nop && curr.GetOperands().empty()) {
+        // 跳过（已经是 nop 但被标记删除）
+        if (curr.GetOperands().empty()) continue;
+      }
+
+      // --- 规则1: 恒等操作消除 add/sub ..., #0 → mov ---
+      if (IsIdentityOp(curr)) {
+        const auto& dst = curr.GetOperands()[0];
+        const auto& src = curr.GetOperands()[1];
+        if (dst.GetKind() == Operand::Kind::Reg && src.GetKind() == Operand::Kind::Reg) {
+          MachineInstr mov(Opcode::MovReg,
+              std::vector<Operand>{dst, Operand::Reg(src.GetReg())});
+          newInsts.push_back(mov);
+          changed = true;
+          ++changes;
+          continue;
+        }
+      }
+
+      // --- 规则2: mov wA, wA → 删除（自赋值） ---
+      if (IsPureMove(curr) && curr.GetOperands().size() >= 2) {
+        const auto& dst = curr.GetOperands()[0];
+        const auto& src = curr.GetOperands()[1];
+        if (dst.GetKind() == Operand::Kind::Reg && src.GetKind() == Operand::Kind::Reg &&
+            dst.GetReg() == src.GetReg()) {
+          changed = true;
+          ++changes;
+          continue;  // 删除
+        }
+      }
+
+      // --- 规则3: 冗余 mov → 删除第一条 ---
+      // mov wA, wB; mov wA, wC → 删除第一条（如果中间无其他使用 wA）
+      if (IsPureMove(curr) && i + 1 < n) {
+        const auto& dst0 = curr.GetOperands()[0];
+        MachineInstr& next = insts[i + 1];
+        if (IsPureMove(next) && next.GetOperands().size() >= 2) {
+          const auto& dst1 = next.GetOperands()[0];
+          if (dst0.GetKind() == Operand::Kind::Reg &&
+              dst1.GetKind() == Operand::Kind::Reg &&
+              dst0.GetReg() == dst1.GetReg()) {
+            // 第一条 mov 的 dest 在第一条之后、第二条之前没有被使用
+            // （两条相邻，中间无其他指令）
+            changed = true;
+            ++changes;
+            continue;  // 删除第一条
+          }
+        }
+      }
+
+      // --- 规则4: Load after Store 消除 ---
+      // stur wA, [x29, #n]; ldur wB, [x29, #n] → mov wB, wA
+      if (curr.GetOpcode() == Opcode::StoreStack && i + 1 < n) {
+        MachineInstr& next = insts[i + 1];
+        if (next.GetOpcode() == Opcode::LoadStack &&
+            IsSameStackOffset(curr, next)) {
+          const auto& storeVal = curr.GetOperands()[0];
+          const auto& loadDst = next.GetOperands()[0];
+          if (storeVal.GetKind() == Operand::Kind::Reg &&
+              loadDst.GetKind() == Operand::Kind::Reg) {
+            MachineInstr mov(Opcode::MovReg,
+                std::vector<Operand>{loadDst, Operand::Reg(storeVal.GetReg())});
+            newInsts.push_back(curr);  // 保留 store
+            newInsts.push_back(mov);   // mov 替换 load
+            ++i;  // 跳过 next
+            changed = true;
+            ++changes;
+            continue;
+          }
+        }
+      }
+
+      newInsts.push_back(curr);
+    }
+
+    insts = std::move(newInsts);
+  }
+
+  return changes;
+}
+
+}  // namespace
+
+// ========== RunPeephole（模块版本） ==========
+void RunPeephole(MachineModule& module) {
+  int totalChanges = 0;
+  for (auto& func : module.GetFunctions()) {
+    for (auto& bb : func->GetBasicBlocks()) {
+      totalChanges += PeepholeBlock(*bb);
+    }
+  }
+}
+
+}  // namespace mir
Author	SHA1	Message	Date
ftt	d20639d4ba	线性扫描改图着色	3 days ago
mxr	e73f7cc871	feat(ra)进行不同版本的性能比较	4 days ago
mxr	8ece3ac163	feat(ra)通过测试	4 days ago
mxr	0d170d1af8	feat(ra)初步实现功能	6 days ago