feat(ra)后端优化

2 days ago · 5b8303df6e
parent d20639d4ba
commit 5b8303df6e
3 changed files with 157 additions and 39 deletions
--- a/src/mir/Lowering.cpp
+++ b/src/mir/Lowering.cpp
@ -170,20 +170,17 @@ int EmitValueToVReg(const ir::Value* value, VRegContext& ctx,
    return vreg;
  }

-  // 浮点常量：每次都走栈槽加载（不缓存）
+  // 浮点常量：通过整数寄存器传位模式至浮点寄存器（fmov Sd, Wn）
  if (auto* fconstant = dynamic_cast<const ir::ConstantFloat*>(value)) {
    float fval = fconstant->GetValue();
    uint32_t bits = FloatToBits(fval);
-    int slot = function.CreateFrameIndex(4);
    int tmp = ctx.NewVReg(VRegType::kInt32);
    EmitMovImm(tmp, bits, block);
-    auto& s = block.Append(Opcode::StoreStack,
-        {Operand::VReg(tmp), Operand::FrameIndex(slot)});
-    s.AddUse(tmp);
    int fvreg = ctx.NewVReg(VRegType::kFloat32);
-    auto& l = block.Append(Opcode::LoadStack,
-        {Operand::VReg(fvreg), Operand::FrameIndex(slot)});
-    l.AddDef(fvreg);
+    auto& mv = block.Append(Opcode::MovReg,
+        {Operand::VReg(fvreg), Operand::VReg(tmp)});
+    mv.AddDef(fvreg);
+    mv.AddUse(tmp);
    return fvreg;
  }

--- a/src/mir/RegAlloc.cpp
+++ b/src/mir/RegAlloc.cpp
@ -329,19 +329,7 @@ InterferenceGraph BuildInterferenceGraph(
    ig.remaining.insert(v);
  }

-  // 构建干涉边：反向遍历指令，正确模拟活跃集合
-  for (const auto& bb : function.GetBasicBlocks()) {
-    auto& insts = bb->GetInstructions();
-    if (insts.empty()) continue;
-
-    // 反向遍历：从liveOut开始
-    std::set<int> live = liveIn.at(bb.get());
-
-    // 注意：我们需要正向检查活跃集合，但用正确的数据流
-    // 更简单的方法：直接用活跃区间重叠来构建边
-  }
-
-  // 用活跃区间重叠构建干涉边（更可靠）
+  // 用活跃区间重叠构建干涉边
  for (auto it1 = vregs.begin(); it1 != vregs.end(); ++it1) {
    auto it2 = it1;
    ++it2;
--- a/src/mir/passes/Peephole.cpp
+++ b/src/mir/passes/Peephole.cpp
@ -6,6 +6,7 @@

 #include <algorithm>
 #include <cstdint>
+#include <memory>
 #include <set>
 #include <string>
 #include <vector>
@ -46,14 +47,29 @@ bool InstUsesReg(const MachineInstr& inst, PhysReg reg) {
  return false;
 }

-// 检查指令是否定义了某个物理寄存器
+// Wn 和 Xn 是同一物理寄存器的不同视图，写入其中之一会影响另一个
+bool SamePhysReg(PhysReg a, PhysReg b) {
+  if (a == b) return true;
+  int ai = static_cast<int>(a);
+  int bi = static_cast<int>(b);
+  // Wn ↔ Xn 映射：Wn 偏移 n（0..30），Xn 偏移 31+n
+  if (ai <= 30 && bi >= 31 && bi <= 61 && (bi - 31) == ai) return true;
+  if (bi <= 30 && ai >= 31 && ai <= 61 && (ai - 31) == bi) return true;
+  return false;
+}
+
+// 检查指令是否定义了某个物理寄存器（排除 StoreStack，其 ops[0] 是被存储的值而非定义）
 bool InstDefsReg(const MachineInstr& inst, PhysReg reg) {
-  // 大多数指令的 dest 是第一个操作数
  if (inst.GetOperands().empty()) return false;
+  auto op = inst.GetOpcode();
+  // 这些指令的 ops[0] 不是寄存器定义
+  if (op == Opcode::StoreStack || op == Opcode::StoreStackPair) return false;
+  if (op == Opcode::B || op == Opcode::BCond || op == Opcode::Ret) return false;
+  if (op == Opcode::CmpRR || op == Opcode::CmpRI || op == Opcode::FCmpRR) return false;
+  if (op == Opcode::Call) return false;  // Call 的寄存器效果在扫描中通过 break 处理
  const auto& dst = inst.GetOperands()[0];
-  if (dst.GetKind() == Operand::Kind::Reg && dst.GetReg() == reg)
+  if (dst.GetKind() == Operand::Kind::Reg && SamePhysReg(dst.GetReg(), reg))
    return true;
-  // StoreStackPair / LoadStackPair 有特殊格式
  return false;
 }

@ -85,25 +101,144 @@ bool IsSameStackOffset(const MachineInstr& a, const MachineInstr& b) {
  return false;
 }

-// 单基本块窥孔优化（一次扫描）
+// 检查指令是否为间接访存（通过寄存器而非 FrameIndex）
+bool IsIndirectMemAccess(const MachineInstr& inst) {
+  auto op = inst.GetOpcode();
+  if (op != Opcode::StoreStack && op != Opcode::LoadStack) return false;
+  const auto& ops = inst.GetOperands();
+  if (ops.size() < 2) return false;
+  return ops[1].GetKind() != Operand::Kind::FrameIndex;
+}
+
+// 单基本块窥孔优化
 int PeepholeBlock(MachineBasicBlock& bb) {
  auto& insts = bb.GetInstructions();
  int changes = 0;
  bool changed = true;

-  // 迭代直到收敛
  while (changed) {
    changed = false;
-    std::vector<MachineInstr> newInsts;
    size_t n = insts.size();

+    // ============================================================
+    // Phase 1: 前向扫描，识别跨指令的优化模式
+    // 记录替换（位置 → 新指令）和死指令（位置集合）
+    // 使用 vector<unique_ptr> 避免 unordered_map 的默认构造需求
+    // ============================================================
+    std::vector<std::unique_ptr<MachineInstr>> replacements(n);
+    std::set<size_t> dead;
+
    for (size_t i = 0; i < n; ++i) {
+      if (dead.count(i)) continue;
+      MachineInstr& curr = insts[i];
+
+      // --- 栈槽 Store 优化扫描 ---
+      // 从 StoreStack(FrameIndex) 出发，向前查找同一栈槽的下一次访存
+      if (curr.GetOpcode() == Opcode::StoreStack &&
+          curr.GetOperands().size() >= 2 &&
+          curr.GetOperands()[1].GetKind() == Operand::Kind::FrameIndex) {
+
+        int slot = curr.GetOperands()[1].GetFrameIndex();
+        PhysReg storeVal = curr.GetOperands()[0].GetReg();
+
+        for (size_t j = i + 1; j < n; ++j) {
+          if (dead.count(j)) continue;
+          MachineInstr& later = insts[j];
+
+          // Call 可能访问任意内存，停止扫描
+          if (later.GetOpcode() == Opcode::Call) break;
+
+          // Label 代表其他控制流路径的入口，跨 label 优化不安全
+          if (later.GetOpcode() == Opcode::Label) break;
+
+          // 间接访存（通过指针）可能别名任意栈槽，停止扫描
+          if (IsIndirectMemAccess(later)) break;
+
+          // 被存储的值若被重定义，停止扫描（转发将使用错误的新值）
+          if (InstDefsReg(later, storeVal)) break;
+
+          // --- 同一栈槽的 Store：当前 store 是死的 ---
+          if (later.GetOpcode() == Opcode::StoreStack &&
+              later.GetOperands().size() >= 2 &&
+              later.GetOperands()[1].GetKind() == Operand::Kind::FrameIndex &&
+              later.GetOperands()[1].GetFrameIndex() == slot) {
+            // 在下次读到 slot n 之前又被写了一次，当前 store 被覆盖
+            dead.insert(i);
+            changed = true;
+            ++changes;
+            break;
+          }
+
+          // --- 同一栈槽的 Load（非相邻）：转发 ---
+          // 相邻情况 (j == i+1) 由 Phase 2 的规则4处理
+          if (j > i + 1 &&
+              later.GetOpcode() == Opcode::LoadStack &&
+              later.GetOperands().size() >= 2 &&
+              later.GetOperands()[1].GetKind() == Operand::Kind::FrameIndex &&
+              later.GetOperands()[1].GetFrameIndex() == slot) {
+            PhysReg loadDst = later.GetOperands()[0].GetReg();
+            replacements[j] = std::unique_ptr<MachineInstr>(
+                new MachineInstr(Opcode::MovReg,
+                    std::vector<Operand>{Operand::Reg(loadDst), Operand::Reg(storeVal)}));
+            changed = true;
+            ++changes;
+            break;
+          }
+        }
+      }
+
+      // --- MovReg 转发链扫描 ---
+      // 从 mov A, B 出发，查找后续 mov C, A 并替换为 mov C, B
+      if (IsPureMove(curr) && curr.GetOperands().size() >= 2) {
+        PhysReg dstA = curr.GetOperands()[0].GetReg();
+        PhysReg srcB = curr.GetOperands()[1].GetReg();
+        if (dstA == srcB) continue;  // 自赋值由规则2处理
+
+        for (size_t j = i + 1; j < n; ++j) {
+          if (dead.count(j)) continue;
+          MachineInstr& later = insts[j];
+
+          // Call 可能修改寄存器，停止
+          if (later.GetOpcode() == Opcode::Call) break;
+
+          // Label 代表其他控制流路径的入口，跨 label 优化不安全
+          if (later.GetOpcode() == Opcode::Label) break;
+
+          // A 或 B 被重定义，停止（转发将不再安全）
+          if (InstDefsReg(later, dstA)) break;
+          if (InstDefsReg(later, srcB)) break;
+
+          // 找到 mov C, A ？ 替换为 mov C, B
+          if (IsPureMove(later) && later.GetOperands().size() >= 2) {
+            PhysReg dstC = later.GetOperands()[0].GetReg();
+            PhysReg src = later.GetOperands()[1].GetReg();
+            if (src == dstA && dstC != dstA) {
+              replacements[j] = std::unique_ptr<MachineInstr>(
+                  new MachineInstr(Opcode::MovReg,
+                      std::vector<Operand>{Operand::Reg(dstC), Operand::Reg(srcB)}));
+              changed = true;
+              ++changes;
+              // 不 break — 继续扫描同一 mov 的其他 use
+            }
+          }
+        }
+      }
+    }
+
+    // ============================================================
+    // Phase 2: 构建新指令列表，应用现有规则 + Phase 1 结果
+    // ============================================================
+    std::vector<MachineInstr> newInsts;
+
+    for (size_t i = 0; i < n; ++i) {
+      if (dead.count(i)) continue;
+
      MachineInstr& curr = insts[i];

-      // 跳过已标记删除的指令（通过空操作码）
-      if (curr.GetOpcode() == Opcode::Nop && curr.GetOperands().empty()) {
-        // 跳过（已经是 nop 但被标记删除）
-        if (curr.GetOperands().empty()) continue;
+      // 如果有替代指令，使用替代版本
+      if (replacements[i]) {
+        newInsts.push_back(*replacements[i]);
+        continue;
      }

      // --- 规则1: 恒等操作消除 add/sub ..., #0 → mov ---
@ -133,8 +268,8 @@ int PeepholeBlock(MachineBasicBlock& bb) {
      }

      // --- 规则3: 冗余 mov → 删除第一条 ---
-      // mov wA, wB; mov wA, wC → 删除第一条（如果中间无其他使用 wA）
-      if (IsPureMove(curr) && i + 1 < n) {
+      // mov wA, wB; mov wA, wC → 删除第一条
+      if (IsPureMove(curr) && i + 1 < n && !dead.count(i + 1)) {
        const auto& dst0 = curr.GetOperands()[0];
        MachineInstr& next = insts[i + 1];
        if (IsPureMove(next) && next.GetOperands().size() >= 2) {
@ -142,8 +277,6 @@ int PeepholeBlock(MachineBasicBlock& bb) {
          if (dst0.GetKind() == Operand::Kind::Reg &&
              dst1.GetKind() == Operand::Kind::Reg &&
              dst0.GetReg() == dst1.GetReg()) {
-            // 第一条 mov 的 dest 在第一条之后、第二条之前没有被使用
-            // （两条相邻，中间无其他指令）
            changed = true;
            ++changes;
            continue;  // 删除第一条
@ -151,9 +284,9 @@ int PeepholeBlock(MachineBasicBlock& bb) {
        }
      }

-      // --- 规则4: Load after Store 消除 ---
+      // --- 规则4: Load after Store 消除（相邻） ---
      // stur wA, [x29, #n]; ldur wB, [x29, #n] → mov wB, wA
-      if (curr.GetOpcode() == Opcode::StoreStack && i + 1 < n) {
+      if (curr.GetOpcode() == Opcode::StoreStack && i + 1 < n && !dead.count(i + 1)) {
        MachineInstr& next = insts[i + 1];
        if (next.GetOpcode() == Opcode::LoadStack &&
            IsSameStackOffset(curr, next)) {
@ -194,4 +327,4 @@ void RunPeephole(MachineModule& module) {
  }
 }

-}  // namespace mir
+}  // namespace mir