From 56b37ac060dae246e19523cd3fe83737cade44b9 Mon Sep 17 00:00:00 2001 From: lzkk <956449176@qq.com> Date: Thu, 28 May 2026 10:05:36 +0800 Subject: [PATCH] =?UTF-8?q?chore:=20=E5=88=A0=E9=99=A4=E6=AD=BB=E4=BB=A3?= =?UTF-8?q?=E7=A0=81=E2=80=94=E2=80=94LinearScanAlloc/GreedyAlloc/InstLive?= =?UTF-8?q?ness=EF=BC=88=E5=85=B12072=E8=A1=8C=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 三个文件未被 CMakeLists 引用、无头文件、无 MIR.h 声明、无调用方。 当前唯一寄存器分配器为 RegAlloc.cpp。 --- src/mir/GreedyAlloc.cpp | 791 ------------------------------------ src/mir/InstLiveness.cpp | 547 ------------------------- src/mir/LinearScanAlloc.cpp | 734 --------------------------------- 3 files changed, 2072 deletions(-) delete mode 100644 src/mir/GreedyAlloc.cpp delete mode 100644 src/mir/InstLiveness.cpp delete mode 100644 src/mir/LinearScanAlloc.cpp diff --git a/src/mir/GreedyAlloc.cpp b/src/mir/GreedyAlloc.cpp deleted file mode 100644 index 65290aeb..00000000 --- a/src/mir/GreedyAlloc.cpp +++ /dev/null @@ -1,791 +0,0 @@ -#include "mir/GreedyAlloc.h" -#include "mir/MIR.h" - -#include -#include -#include -#include -#include -#include -#include - -namespace mir -{ -namespace -{ - -// ---- 寄存器可分配集 ---- -// GP: 排除 x0-x7(参数传递), x13-x14(lowering 临时使用), x18(平台寄存器), x29-x30(FP/LR) -// x16-x17 同时作为 spill fallback,但在 spill 路径中通过 phys<0 映射 -constexpr int GP_ALLOCATABLE[] = {8,9,10,11,12,15,16,17,19,20,21,22,23,24,25,26,27,28}; -constexpr int GP_COUNT = 18; -// S0-S1 是参数/返回值寄存器,不可分配;S2-S9 + S16-S31 可分配 -constexpr int FP_ALLOCATABLE[] = {2,3,4,5,6,7,8,9,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}; -constexpr int FP_COUNT = 24; -constexpr int MAX_ROUNDS = 3; // LLVM: 通常 1-2 轮即可收敛 - -bool IsCallerSavedGP(int phys_reg) { return phys_reg <= 17; } - -const int* GetRegList(RegClass rc, int& count) -{ - if (rc == RegClass::GPR32 || rc == RegClass::GPR64) - { count = GP_COUNT; return GP_ALLOCATABLE; } - else - { count = FP_COUNT; return FP_ALLOCATABLE; } -} - -// ---- 启发式 spill 权重(LLVM 简化版:Normalise(Σ use_freq) / Length)---- -// LLVM 使用完整的 block frequency 分析;我们使用循环深度作为近似。 -// 堆排序:高 cascade(已被驱逐过的)永远排在低 cascade 之后; -// 同等 cascade 按 spill_weight 降序(堆顶权重最大,优先分配)。 - -// heap 存储 vreg 索引 -// Stage 0 (new): 短活范围优先——弦图完美消除序近似 -// Stage 1+ (deferred/evicted): spill_weight 降序 -struct SpillWeightCmp -{ - const std::vector& intervals; - explicit SpillWeightCmp(const std::vector& ivs) : intervals(ivs) {} - bool operator()(int a, int b) const - { - const auto& la = intervals[a]; - const auto& lb = intervals[b]; - if (la.generation != lb.generation) - return la.generation > lb.generation; - if (la.deferred_count == 0 && lb.deferred_count == 0) - return la.Length() > lb.Length(); - return la.spill_weight < lb.spill_weight; - } -}; - -// ---- def/use 提取 ---- -static bool HasVRegDef(Opcode opcode) -{ - switch (opcode) - { - case Opcode::MovImm: case Opcode::LoadStack: case Opcode::LoadGlobal: - case Opcode::LoadGlobalAddr: case Opcode::LoadStackAddr: case Opcode::LoadMem: - case Opcode::AddRR: case Opcode::SubRR: case Opcode::AddImm: - case Opcode::SubImm: case Opcode::MulRR: case Opcode::DivRR: - case Opcode::ModRR: case Opcode::AndRR: case Opcode::OrRR: - case Opcode::XorRR: case Opcode::ShlRR: case Opcode::ShrRR: - case Opcode::AsrRR: case Opcode::Asr64RR: case Opcode::Uxtw: - case Opcode::Sxtw: case Opcode::CSet: case Opcode::Csel: - case Opcode::Smull: case Opcode::Msub: case Opcode::NegRR: - case Opcode::FAddRR: case Opcode::FSubRR: case Opcode::FMulRR: - case Opcode::FDivRR: case Opcode::Scvtf: case Opcode::FCvtzs: - case Opcode::FMovWS: case Opcode::MovReg: - return true; - default: return false; - } -} - -static void ExtractDefUse(const MachineInstr &inst, int &def_vreg, - std::vector &use_vregs) -{ - def_vreg = -1; - use_vregs.clear(); - const auto &ops = inst.GetOperands(); - const auto opcode = inst.GetOpcode(); - if (HasVRegDef(opcode) && !ops.empty() && - ops[0].GetKind() == Operand::Kind::VReg) - def_vreg = ops[0].GetVRegId(); - for (size_t i = 0; i < ops.size(); ++i) - { - if (HasVRegDef(opcode) && i == 0) continue; - if (ops[i].GetKind() == Operand::Kind::VReg) - use_vregs.push_back(ops[i].GetVRegId()); - } -} - -// ---- 循环深度分析 ---- -std::vector AnalyzeLoopDepth(MachineFunction &func) -{ - auto &blocks = func.GetBlocks(); - int n = (int)blocks.size(); - std::vector depth(n, 0); - std::unordered_map label_to_idx; - for (int i = 0; i < n; ++i) - if (blocks[i]) label_to_idx[blocks[i]->GetLabelId()] = i; - - struct Edge { int src; int dst; }; - std::vector back_edges; - for (int i = 0; i < n; ++i) - { - if (!blocks[i]) continue; - for (auto &inst : blocks[i]->GetInstructions()) - { - int target_label = -1; - auto opcode = inst.GetOpcode(); - if (opcode == Opcode::Br && !inst.GetOperands().empty() && - inst.GetOperands()[0].GetKind() == Operand::Kind::Label) - target_label = inst.GetOperands()[0].GetLabel(); - else if (opcode == Opcode::CondBr && inst.GetOperands().size() >= 2 && - inst.GetOperands()[1].GetKind() == Operand::Kind::Label) - target_label = inst.GetOperands()[1].GetLabel(); - if (target_label < 0) continue; - auto it = label_to_idx.find(target_label); - if (it != label_to_idx.end() && (int)it->second <= i) - back_edges.push_back({i, (int)it->second}); - } - } - - for (auto &be : back_edges) - { - int header = be.dst; - std::unordered_set body; - std::queue q; - q.push(be.src); - while (!q.empty()) - { - int cur = q.front(); q.pop(); - if (cur == header || body.count(cur)) continue; - body.insert(cur); - if (cur > 0 && !body.count(cur - 1)) q.push(cur - 1); - for (int p = 0; p < n; ++p) - { - if (!blocks[p]) continue; - for (auto &inst : blocks[p]->GetInstructions()) - { - int tgt = -1; - if (inst.GetOpcode() == Opcode::Br && !inst.GetOperands().empty() && - inst.GetOperands()[0].GetKind() == Operand::Kind::Label) - tgt = inst.GetOperands()[0].GetLabel(); - else if (inst.GetOpcode() == Opcode::CondBr && - inst.GetOperands().size() >= 2 && - inst.GetOperands()[1].GetKind() == Operand::Kind::Label) - tgt = inst.GetOperands()[1].GetLabel(); - auto it2 = label_to_idx.find(tgt); - if (it2 != label_to_idx.end() && (int)it2->second == cur && !body.count(p)) - q.push(p); - } - } - } - body.insert(header); - int max_existing = 0; - for (int b : body) max_existing = std::max(max_existing, depth[b]); - for (int b : body) depth[b] = std::max(depth[b], max_existing + 1); - } - return depth; -} - -// ---- Spill Weight ---- -void ComputeSpillWeights(std::vector &intervals, - const std::vector &block_depth, - const std::vector &pos_to_block) -{ - for (auto &li : intervals) - { - float w = 0.0f; - for (auto &use : li.uses) - { - int block = (use.pos >= 0 && use.pos < (int)pos_to_block.size()) - ? pos_to_block[use.pos] : 0; - int d = (block >= 0 && block < (int)block_depth.size()) - ? block_depth[block] : 0; - float mult = std::pow(10.0f, (float)d); - if (use.is_def) mult *= 0.5f; - w += mult; - } - li.spill_weight = w / li.Length(); - } -} - -// ---- Copy Hints ---- -void PropagateCopyHints(std::vector &intervals, - MachineFunction &func) -{ - for (auto &block : func.GetBlocks()) - { - if (!block) continue; - for (auto &inst : block->GetInstructions()) - { - if (inst.GetOpcode() != Opcode::MovReg) continue; - auto &ops = inst.GetOperands(); - if (ops.size() < 2) continue; - if (ops[0].GetKind() != Operand::Kind::VReg) continue; - if (ops[1].GetKind() != Operand::Kind::VReg) continue; - int dst = ops[0].GetVRegId(); - int src = ops[1].GetVRegId(); - if (dst < 0 || dst >= (int)intervals.size()) continue; - if (src < 0 || src >= (int)intervals.size()) continue; - if (intervals[src].IsAllocated()) - intervals[dst].hint_reg = intervals[src].assigned_reg; - else if (intervals[dst].IsAllocated()) - intervals[src].hint_reg = intervals[dst].assigned_reg; - else if (intervals[src].hint_reg >= 0) - intervals[dst].hint_reg = intervals[src].hint_reg; - } - } -} - -// ---- TryAssign ---- -bool TryAssign(LiveInterval &li, LiveRegMatrix &m, int hint) -{ - if (hint < 0) return false; - if (IsCallerSavedGP(hint) && li.SegmentCrossesCall()) return false; - if (!m.CheckInterference(li, hint) && m.Assign(&li, hint)) - { - li.assigned_reg = hint; - return true; - } - return false; -} - -// ---- TryAnyFreeReg ---- -bool TryAnyFreeReg(LiveInterval &li, LiveRegMatrix &m) -{ - int n = 0; - const int *regs = GetRegList(li.reg_class, n); - for (int i = 0; i < n; ++i) - { - int r = regs[i]; - if (IsCallerSavedGP(r) && li.SegmentCrossesCall()) continue; - if (!m.CheckInterference(li, r) && m.Assign(&li, r)) - { - li.assigned_reg = r; - return true; - } - } - return false; -} - -// ---- TryEvict(LLVM cascade 驱逐策略)---- -// 只能驱逐 generation 严格更低的冲突 vreg。 -// 驱逐后将 victim 设为相同的 cascade,防止 A→B→A 循环。 -bool TryEvict(LiveInterval &li, LiveRegMatrix &m, - std::vector &heap, - const SpillWeightCmp &cmp) -{ - int best_reg = -1; - float best_weight = 1e9f; - LiveInterval *victim = nullptr; - int n = 0; - const int *regs = GetRegList(li.reg_class, n); - - for (int i = 0; i < n; ++i) - { - int r = regs[i]; - if (IsCallerSavedGP(r) && li.SegmentCrossesCall()) continue; - auto *conflict = m.GetConflict(li, r); - if (!conflict && m.Assign(&li, r)) - { - li.assigned_reg = r; - return true; - } - // LLVM 关键收敛规则:只驱逐 generation 严格更低的 vreg - if (conflict->generation >= li.generation) continue; - if (conflict->spill_weight < best_weight) - { - best_weight = conflict->spill_weight; - best_reg = r; - victim = conflict; - } - } - - if (best_reg < 0 || !victim) return false; - - m.Unassign(victim); - victim->assigned_reg = -1; - victim->generation = li.generation; - heap.push_back(victim->vreg); - std::push_heap(heap.begin(), heap.end(), cmp); - - if (m.Assign(&li, best_reg)) - { - li.assigned_reg = best_reg; - return true; - } - return false; -} - -// ---- TrySplit:在最大使用间隙处分裂(LLVM local split 简化版)---- - // 参考: llvm/lib/CodeGen/RegAllocGreedy.cpp tryLocalSplit() - bool TrySplit(int vreg_idx, LiveRegMatrix &m, - std::vector &heap, - std::vector &intervals, - const std::vector &pos_to_block, - std::vector &spilled, - MachineFunction &func, - const SpillWeightCmp &cmp) - { - auto &li = intervals[vreg_idx]; - if (li.uses.size() < 3) return false; - - std::vector sorted_uses; - for (auto &u : li.uses) sorted_uses.push_back(u.pos); - std::sort(sorted_uses.begin(), sorted_uses.end()); - - int best_gap = 0, split_after = -1; - for (size_t i = 1; i < sorted_uses.size(); ++i) { - int gap = sorted_uses[i] - sorted_uses[i - 1]; - if (gap > best_gap && gap >= 2) { best_gap = gap; split_after = sorted_uses[i - 1]; } - } - if (split_after < 0) return false; - - int hot_start = li.FirstUsePos(), hot_end = split_after; - int cold_start = split_after + 1, cold_end = li.LastUsePos(); - if (hot_end < hot_start || cold_end < cold_start) return false; - - LiveInterval cold; - cold.reg_class = li.reg_class; cold.generation = li.generation + 1; - cold.hint_reg = -1; cold.assigned_reg = -1; - cold.vreg = func.CreateVReg(li.vreg_class); - - for (auto &seg : li.segments) { - if (seg.end < cold_start || seg.start > cold_end) continue; - Segment clipped = seg; - clipped.start = std::max(seg.start, cold_start); - clipped.end = std::min(seg.end, cold_end); - cold.segments.push_back(clipped); - } - for (auto &use : li.uses) - if (cold_start <= use.pos && use.pos <= cold_end) cold.uses.push_back(use); - if (cold.uses.empty()) return false; - - float w = 0.0f; - for (auto &use : cold.uses) { - int blk = (use.pos >= 0 && use.pos < (int)pos_to_block.size()) ? pos_to_block[use.pos] : 0; - float mult = use.is_def ? 0.5f : 1.0f; - w += mult; - } - cold.spill_weight = w / cold.Length(); - int cold_vreg = cold.vreg; - intervals.push_back(std::move(cold)); - - auto &li_safe = intervals[vreg_idx]; - std::vector hot_segs; - for (auto &seg : li_safe.segments) { - if (seg.end < hot_start || seg.start > hot_end) continue; - Segment clipped = seg; - clipped.start = std::max(seg.start, hot_start); - clipped.end = std::min(seg.end, hot_end); - if (clipped.start <= clipped.end) hot_segs.push_back(clipped); - } - li_safe.segments = std::move(hot_segs); - li_safe.uses.erase(std::remove_if(li_safe.uses.begin(), li_safe.uses.end(), - [&](const UsePosition &u) { return u.pos < hot_start || u.pos > hot_end; }), li_safe.uses.end()); - - if (!TryAnyFreeReg(li_safe, m)) { li_safe.assigned_reg = -2; spilled.push_back(vreg_idx); } - auto &cold_ref = intervals[cold_vreg]; - if (!TryAnyFreeReg(cold_ref, m)) { heap.push_back(cold_vreg); std::push_heap(heap.begin(), heap.end(), cmp); } - return true; - } -// ---- 主分配函数:对一类寄存器执行贪婪分配 ---- -// 返回 spilled 数量 -int AllocateRegClass(std::vector &intervals, - RegClass rc, - LiveRegMatrix &matrix, - const std::vector &pos_to_block, - MachineFunction &func, - std::vector &spilled) -{ - SpillWeightCmp cmp(intervals); - std::vector heap; - - for (auto &li : intervals) - { - if (li.vreg < 0) continue; - if (li.reg_class == rc && !li.IsAllocated() && !li.IsSpilled()) - heap.push_back(li.vreg); - } - std::make_heap(heap.begin(), heap.end(), cmp); - - int iter_limit = std::max(1000, (int)heap.size() * 3); - int iterations = 0; - - while (!heap.empty()) - { - if (++iterations > iter_limit) - { - // 安全网:剩余未分配 vreg 标记为 spill,而非留下未分配状态 - for (int vreg : heap) - { - if (intervals[vreg].IsAllocated() || intervals[vreg].IsSpilled()) continue; - intervals[vreg].assigned_reg = -2; - spilled.push_back(vreg); - } - break; - } - - std::pop_heap(heap.begin(), heap.end(), cmp); - int vreg = heap.back(); - heap.pop_back(); - - auto &li = intervals[vreg]; - if (li.IsAllocated() || li.IsSpilled()) continue; - - if (TryAssign(li, matrix, li.hint_reg)) continue; - if (TryAnyFreeReg(li, matrix)) continue; - if (rc == RegClass::GPR32 || rc == RegClass::GPR64) - { - if (TryEvict(li, matrix, heap, cmp)) continue; - } - - // LLVM Defer 机制: 首次分配失败时推迟到下一轮,让更小的范围先分配 - // 参考: llvm/lib/CodeGen/RegAllocGreedy.cpp selectOrSplit() RS_New→RS_Deferred - if (li.deferred_count == 0) - { - li.deferred_count = 1; - heap.push_back(vreg); - std::push_heap(heap.begin(), heap.end(), cmp); - continue; - } - - if (TrySplit(vreg, matrix, heap, intervals, - pos_to_block, spilled, func, cmp)) continue; - - li.assigned_reg = -2; - spilled.push_back(vreg); - } - return (int)spilled.size(); -} - -} // anonymous namespace - -// ---- LiveRegMatrix 方法 ---- - -void LiveRegMatrix::Init(int num_regs) -{ reg_assignments_.assign(num_regs, {}); } - -void LiveRegMatrix::ForceAssign(LiveInterval *li, int phys_reg) -{ - if (phys_reg >= 0 && phys_reg < (int)reg_assignments_.size()) - reg_assignments_[phys_reg].push_back(li); -} - -bool LiveRegMatrix::Assign(LiveInterval *li, int phys_reg) -{ - if (phys_reg < 0 || phys_reg >= (int)reg_assignments_.size()) return false; - reg_assignments_[phys_reg].push_back(li); - return true; -} - -void LiveRegMatrix::Unassign(LiveInterval *li) -{ - for (auto &vec : reg_assignments_) - { - auto it = std::find(vec.begin(), vec.end(), li); - if (it != vec.end()) { vec.erase(it); return; } - } -} - -bool LiveRegMatrix::CheckInterference(const LiveInterval &li, int phys_reg) const -{ - if (phys_reg < 0 || phys_reg >= (int)reg_assignments_.size()) return true; - for (auto *other : reg_assignments_[phys_reg]) - { - if (other->vreg == li.vreg) continue; - // Wn/Xn 别名:GPR32/GPR64 共享同一物理寄存器,总是冲突 - // LLVM 用 Register Unit 来处理:Wn 和 Xn 占据相同的 unit - // 参考: llvm/lib/CodeGen/LiveRegMatrix.cpp foreachUnit() - bool gpr32_64_alias = - (li.reg_class == RegClass::GPR32 && other->reg_class == RegClass::GPR64) || - (li.reg_class == RegClass::GPR64 && other->reg_class == RegClass::GPR32); - if (gpr32_64_alias) - return true; - for (auto &sa : li.segments) - for (auto &sb : other->segments) - if (sa.Overlaps(sb)) return true; - } - return false; -} - -LiveInterval *LiveRegMatrix::GetConflict(const LiveInterval &li, - int phys_reg) const -{ - if (phys_reg < 0 || phys_reg >= (int)reg_assignments_.size()) return nullptr; - for (auto *other : reg_assignments_[phys_reg]) - { - if (other->vreg == li.vreg) continue; - bool gpr32_64_alias = - (li.reg_class == RegClass::GPR32 && other->reg_class == RegClass::GPR64) || - (li.reg_class == RegClass::GPR64 && other->reg_class == RegClass::GPR32); - if (gpr32_64_alias) - return other; - for (auto &sa : li.segments) - for (auto &sb : other->segments) - if (sa.Overlaps(sb)) return other; - } - return nullptr; -} - -bool LiveRegMatrix::CheckInterferenceRange(int start, int end, - int phys_reg) const -{ - if (phys_reg < 0 || phys_reg >= (int)reg_assignments_.size()) return true; - Segment range; range.start = start; range.end = end; - for (auto *other : reg_assignments_[phys_reg]) - for (auto &sb : other->segments) - if (range.Overlaps(sb)) return true; - return false; -} - -// ---- 对外入口 ---- -void RunGreedyRegAlloc(MachineFunction &function); -void RunGreedyRegAlloc(MachineModule &module); - -static void AllocateRegistersForFunction(MachineFunction &function) -{ - if (function.GetNumVRegs() == 0) return; - - // ---- 阶段 0:活跃分析 + 预处理 ---- - auto raw = ComputeInstLiveness(function); - auto intervals = EnhanceIntervals(raw, function); - intervals.reserve(function.GetNumVRegs() * 16); - - auto &blocks = function.GetBlocks(); - std::vector pos_to_block; - std::vector block_start_pos(blocks.size(), -1); - int global = 0; - for (int bi = 0; bi < (int)blocks.size(); ++bi) - { - if (!blocks[bi]) continue; - block_start_pos[bi] = global; - int cnt = (int)blocks[bi]->GetInstructions().size(); - for (int j = 0; j < cnt; ++j) pos_to_block.push_back(bi); - global += cnt; - } - - auto block_depth = AnalyzeLoopDepth(function); - ComputeSpillWeights(intervals, block_depth, pos_to_block); - PropagateCopyHints(intervals, function); - intervals.reserve(function.GetNumVRegs() * 16); - - // LLVM 风格:全局 cascade 计数器 - int global_cascade = 1; - - // ---- 阶段 1:分配循环 ---- - for (int round = 0; round < MAX_ROUNDS; ++round) - { - // GP 分配(GPR32 + GPR64 共享同一 LiveRegMatrix) - LiveRegMatrix gp_matrix; - gp_matrix.Init(32); - std::vector gp_spilled; - - // 预填充上一轮已分配的 vreg - for (auto &li : intervals) - { - if (li.vreg >= 0 && li.IsAllocated() && - (li.reg_class == RegClass::GPR32 || li.reg_class == RegClass::GPR64)) - gp_matrix.ForceAssign(&li, li.assigned_reg); - } - - AllocateRegClass(intervals, RegClass::GPR32, gp_matrix, - pos_to_block, function, gp_spilled); - AllocateRegClass(intervals, RegClass::GPR64, gp_matrix, - pos_to_block, function, gp_spilled); - - // FP 分配 - LiveRegMatrix fp_matrix; - fp_matrix.Init(32); - std::vector fp_spilled; - - for (auto &li : intervals) - { - if (li.vreg >= 0 && li.IsAllocated() && li.reg_class == RegClass::FPR32) - fp_matrix.ForceAssign(&li, li.assigned_reg); - } - - AllocateRegClass(intervals, RegClass::FPR32, fp_matrix, - pos_to_block, function, fp_spilled); - - auto spilled = gp_spilled; - spilled.insert(spilled.end(), fp_spilled.begin(), fp_spilled.end()); - - if (spilled.empty()) break; - - // ---- 溢出重写(LLVM-style spill rewrite)---- - // LLVM 关键设计:每次 reload 创建新 vreg,让分配器在下一轮分配不同物理寄存器, - // 避免多个溢出 vreg 共享同一回退寄存器导致互相覆盖。 - // 参考: llvm/lib/CodeGen/InlineSpiller.cpp spill()/reload() - for (int spilled_vreg : spilled) - { - auto &li = intervals[spilled_vreg]; - if (li.spill_slot < 0) - { - int size = 4; - if (li.vreg_class == VRegClass::Ptr) size = 8; - li.spill_slot = function.CreateFrameIndex(size); - } - for (int u = (int)li.uses.size() - 1; u >= 0; --u) - { - auto &use = li.uses[u]; - int blk = pos_to_block[use.pos]; - int local = use.pos - block_start_pos[blk]; - if (use.is_def) - { - // def: 在定义后插入 StoreStack,保存值到栈 - blocks[blk]->InsertInst(local + 1, - MachineInstr(Opcode::StoreStack, - {Operand::VReg(li.vreg, li.vreg_class), - Operand::FrameIndex(li.spill_slot)})); - } - else - { - // use: 创建新 vreg,LoadStack 加载到新 vreg,替换使用点 - int new_vreg = function.CreateVReg(li.vreg_class); - blocks[blk]->InsertInst(local, - MachineInstr(Opcode::LoadStack, - {Operand::VReg(new_vreg, li.vreg_class), - Operand::FrameIndex(li.spill_slot)})); - // 在插入点之后搜索使用溢出 vreg 的指令并替换 - auto &instructions = blocks[blk]->GetInstructions(); - for (int idx = local + 1; idx < (int)instructions.size(); ++idx) - { - bool found = false; - for (auto &op : instructions[idx].GetOperands()) - { - if (op.GetKind() == Operand::Kind::VReg && - op.GetVRegId() == li.vreg) - { - op = Operand::VReg(new_vreg, li.vreg_class); - found = true; - } - } - if (found) break; - } - } - } - } - - // ---- 保存已分配状态 ---- - std::unordered_map prev_assigned; - for (auto &li : intervals) - { - if (li.vreg >= 0 && li.IsAllocated()) - prev_assigned[li.vreg] = li.assigned_reg; - else if (li.vreg >= 0 && li.IsSpilled()) - prev_assigned[li.vreg] = -2; // 保持 spill 状态 - } - - // ---- 重新分析活跃 ---- - raw = ComputeInstLiveness(function); - intervals = EnhanceIntervals(raw, function); - intervals.reserve(function.GetNumVRegs() * 16); - - // ---- 重建位置映射 ---- - pos_to_block.clear(); - block_start_pos.assign(blocks.size(), -1); - int new_global = 0; - for (int bi = 0; bi < (int)blocks.size(); ++bi) - { - if (!blocks[bi]) continue; - block_start_pos[bi] = new_global; - int cnt = (int)blocks[bi]->GetInstructions().size(); - for (int j = 0; j < cnt; ++j) pos_to_block.push_back(bi); - new_global += cnt; - } - - // ---- 恢复已分配状态 + 递增 cascade ---- - int num_new = 0; - for (auto &li : intervals) - { - auto it = prev_assigned.find(li.vreg); - if (it != prev_assigned.end()) - { - li.assigned_reg = it->second; - // 已分配的保持 cascade - } - else - { - // 新 vreg(由 spill 引入的 LoadStack vreg) - li.assigned_reg = -1; - li.generation = 0; - num_new++; - } - } - - if (num_new > 0) - { - // 只对新 vreg 重新计算 spill weight - ComputeSpillWeights(intervals, block_depth, pos_to_block); - } - PropagateCopyHints(intervals, function); - } - - // ---- 最终:vreg → PhysReg 重写 ---- - for (auto &block : blocks) - { - if (!block) continue; - for (auto &inst : block->GetInstructions()) - { - for (auto &op : inst.GetOperands()) - { - if (op.GetKind() != Operand::Kind::VReg) continue; - int vreg = op.GetVRegId(); - int phys = -1; - if (vreg >= 0 && vreg < (int)intervals.size()) - phys = intervals[vreg].assigned_reg; - if (phys < 0) - { - auto vc = function.GetVRegClass(vreg); - if (vc == VRegClass::Ptr) phys = 47; // X16 - else if (vc == VRegClass::Float) phys = 78; // S16 - else phys = 16; // W16 - } - else - { - if (vreg < function.GetNumVRegs()) - { - auto vc = function.GetVRegClass(vreg); - if (vc == VRegClass::Ptr) - phys = phys + 31; // Wn → Xn (PhysReg 31-61) - else if (vc == VRegClass::Float) - phys = phys + 62; // → Sn (PhysReg 62-93) - // VRegClass::Int 保持原值 → Wn (PhysReg 0-30) - } - } - op = Operand::Reg(static_cast(phys)); - } - } - } - - // ---- 收集使用的 callee-saved 寄存器(LLVM PEI 风格:扫描最终 PhysReg)---- - { - int x19 = static_cast(PhysReg::X19); - int x28 = static_cast(PhysReg::X28); - int w19 = static_cast(PhysReg::W19); - int w28 = static_cast(PhysReg::W28); - int s16 = static_cast(PhysReg::S16); - int s31 = static_cast(PhysReg::S31); - - bool used_x[11] = {}; - bool used_s[16] = {}; - for (auto &block : blocks) - { - if (!block) continue; - for (auto &inst : block->GetInstructions()) - { - for (auto &op : inst.GetOperands()) - { - if (op.GetKind() != Operand::Kind::Reg) continue; - int r = static_cast(op.GetReg()); - if (r >= w19 && r <= w28) - used_x[r - w19] = true; - else if (r >= x19 && r <= x28) - used_x[r - x19] = true; - else if (r >= s16 && r <= s31) - used_s[r - s16] = true; - } - } - } - for (int i = 0; i < 11; ++i) - if (used_x[i]) - function.AddCalleeSavedReg(static_cast(x19 + i)); - for (int i = 0; i < 16; ++i) - if (used_s[i]) - function.AddCalleeSavedReg(static_cast(s16 + i)); - } -} - -void RunGreedyRegAlloc(MachineFunction &function) -{ AllocateRegistersForFunction(function); } - -void RunGreedyRegAlloc(MachineModule &module) -{ - for (auto &func : module.GetFunctions()) - if (func) RunGreedyRegAlloc(*func); -} - -} // namespace mir diff --git a/src/mir/InstLiveness.cpp b/src/mir/InstLiveness.cpp deleted file mode 100644 index db71e64b..00000000 --- a/src/mir/InstLiveness.cpp +++ /dev/null @@ -1,547 +0,0 @@ -#include "mir/MIR.h" - -#include -#include -#include -#include -#include - -#include "utils/Log.h" - -namespace mir -{ - namespace - { - - // ---- Phase 1 helpers ------------------------------------------------- - - /// Return true if opcode has a VReg def (always operands[0]). - static bool HasVRegDef(Opcode opcode) - { - switch (opcode) - { - case Opcode::MovImm: - case Opcode::LoadStack: - case Opcode::LoadGlobal: - case Opcode::LoadGlobalAddr: - case Opcode::LoadStackAddr: - case Opcode::LoadMem: - case Opcode::AddRR: - case Opcode::SubRR: - case Opcode::AddImm: - case Opcode::SubImm: - case Opcode::MulRR: - case Opcode::DivRR: - case Opcode::ModRR: - case Opcode::AndRR: - case Opcode::OrRR: - case Opcode::XorRR: - case Opcode::ShlRR: - case Opcode::ShrRR: - case Opcode::AsrRR: - case Opcode::Asr64RR: - case Opcode::Uxtw: - case Opcode::Sxtw: - case Opcode::CSet: - case Opcode::Csel: - case Opcode::Smull: - case Opcode::Msub: - case Opcode::NegRR: - case Opcode::FAddRR: - case Opcode::FSubRR: - case Opcode::FMulRR: - case Opcode::FDivRR: - case Opcode::Scvtf: - case Opcode::FCvtzs: - case Opcode::FMovWS: - case Opcode::MovReg: - return true; - default: - return false; - } - } - - /// Extract def VReg (operands[0] if VReg) and use VRegs from one instruction. - static void ExtractDefUse(const MachineInstr &inst, int &def_vreg, - std::vector &use_vregs) - { - def_vreg = -1; - use_vregs.clear(); - - const auto &ops = inst.GetOperands(); - const auto opcode = inst.GetOpcode(); - - if (HasVRegDef(opcode) && !ops.empty() && - ops[0].GetKind() == Operand::Kind::VReg) - { - def_vreg = ops[0].GetVRegId(); - } - - // All other VReg operands are uses - for (size_t i = 0; i < ops.size(); ++i) - { - // For def-producing instructions, operands[0] is the def (already handled) - if (HasVRegDef(opcode) && i == 0) - continue; - if (ops[i].GetKind() == Operand::Kind::VReg) - use_vregs.push_back(ops[i].GetVRegId()); - } - } - - } // anonymous namespace -} // namespace mir - -namespace mir -{ - - // ---- Block-level dataflow structures -------------------------------- - - struct BlockLiveInfo - { - std::unordered_set def; - std::unordered_set use; - std::unordered_set live_in; - std::unordered_set live_out; - std::vector successors; // block indices - std::vector predecessors; // block indices - }; - - std::vector ComputeInstLiveness(MachineFunction &func) - { - auto &blocks = func.GetBlocks(); - const int num_blocks = static_cast(blocks.size()); - - // ================================================================ - // Phase 1: Block-level backward liveness (fixpoint iteration) - // ================================================================ - - // 1a. Build label → block-index mapping - std::unordered_map label_to_idx; - for (int i = 0; i < num_blocks; ++i) - { - if (!blocks[i]) - continue; - label_to_idx[blocks[i]->GetLabelId()] = i; - } - - // 1b. Compute per-block def/use + successors - std::vector blk_info(num_blocks); - - for (int i = 0; i < num_blocks; ++i) - { - if (!blocks[i]) - continue; - auto &info = blk_info[i]; - auto &insts = blocks[i]->GetInstructions(); - - for (const auto &inst : insts) - { - int def_vreg; - std::vector use_vregs; - ExtractDefUse(inst, def_vreg, use_vregs); - - // All uses are added first, then def is added. This avoids - // counting "def first, then use" in the same block incorrectly. - for (int u : use_vregs) - { - if (info.def.count(u) == 0) - info.use.insert(u); - } - if (def_vreg >= 0) - { - // A vreg used before being defined in this block stays in use set - if (info.use.count(def_vreg) == 0) - info.def.insert(def_vreg); - } - } - - // ---- Determine successors ---- - bool has_br = false; - bool has_condbr = false; - int br_target_label = -1; - int condbr_target_label = -1; - bool has_ret = false; - - for (const auto &inst : insts) - { - const auto opcode = inst.GetOpcode(); - const auto &ops = inst.GetOperands(); - - if (opcode == Opcode::Br && !ops.empty() && - ops[0].GetKind() == Operand::Kind::Label) - { - has_br = true; - br_target_label = ops[0].GetLabel(); - } - else if (opcode == Opcode::CondBr && ops.size() >= 2 && - ops[1].GetKind() == Operand::Kind::Label) - { - has_condbr = true; - condbr_target_label = ops[1].GetLabel(); - } - else if (opcode == Opcode::Ret) - { - has_ret = true; - } - } - - auto add_succ = [&](int label) - { - auto it = label_to_idx.find(label); - if (it != label_to_idx.end()) - info.successors.push_back(it->second); - }; - - if (has_ret) - { - // No successors — function exit - } - else if (has_br) - { - // Unconditional branch: target covers the only outgoing path. - add_succ(br_target_label); - // If there's also a CondBr, its target is taken when condition is - // true — the Br covers the false path. - if (has_condbr) - add_succ(condbr_target_label); - } - else if (has_condbr) - { - // Conditional branch without Br: true path = target, false path = - // falls through to next block in insertion order. - add_succ(condbr_target_label); - if (i + 1 < num_blocks) - info.successors.push_back(i + 1); - } - else - { - // Ordinary block — falls through to next block. - if (i + 1 < num_blocks) - info.successors.push_back(i + 1); - } - } - - // 1c. Build predecessor lists - for (int i = 0; i < num_blocks; ++i) - { - for (int s : blk_info[i].successors) - { - if (s >= 0 && s < num_blocks) - blk_info[s].predecessors.push_back(i); - } - } - - // 1d. Worklist fixpoint - // Initialise live_in with use sets - for (int i = 0; i < num_blocks; ++i) - { - blk_info[i].live_in = blk_info[i].use; - } - - std::queue worklist; - std::vector in_queue(num_blocks, false); - for (int i = 0; i < num_blocks; ++i) - { - if (blocks[i]) - { - worklist.push(i); - in_queue[i] = true; - } - } - - while (!worklist.empty()) - { - int b = worklist.front(); - worklist.pop(); - in_queue[b] = false; - - // Compute new live_out = union of successors' live_in - std::unordered_set new_live_out; - for (int s : blk_info[b].successors) - { - if (s < 0 || s >= num_blocks) - continue; - for (int v : blk_info[s].live_in) - new_live_out.insert(v); - } - - // Compute new live_in = use ∪ (live_out - def) - std::unordered_set new_live_in = blk_info[b].use; - for (int v : new_live_out) - { - if (blk_info[b].def.count(v) == 0) - new_live_in.insert(v); - } - - if (new_live_in != blk_info[b].live_in) - { - blk_info[b].live_out = std::move(new_live_out); - blk_info[b].live_in = std::move(new_live_in); - - // Enqueue all predecessors (their live_out depends on us) - for (int p : blk_info[b].predecessors) - { - if (!in_queue[p]) - { - in_queue[p] = true; - worklist.push(p); - } - } - } - } - - // ================================================================ - // Phase 2: Instruction-level interval computation (reverse scan) - // ================================================================ - std::unordered_map vreg_start; - std::unordered_map vreg_end; - - // Assign global instruction positions - int global_pos = 0; - // Map global_pos → (block_idx, local_instr_idx) for the reverse scan - struct PosInfo - { - int block_idx; - int instr_count; // number of instructions in this block - }; - std::vector pos_to_block; - - for (int i = 0; i < num_blocks; ++i) - { - if (!blocks[i]) - continue; - int count = static_cast(blocks[i]->GetInstructions().size()); - for (int j = 0; j < count; ++j) - pos_to_block.push_back({i, count}); - global_pos += count; - } - - const int total_instrs = global_pos; - - // Reverse scan: process blocks in reverse order - for (int bi = num_blocks - 1; bi >= 0; --bi) - { - if (!blocks[bi]) - continue; - auto &insts = blocks[bi]->GetInstructions(); - const int num_instrs = static_cast(insts.size()); - if (num_instrs == 0) - continue; - - // Compute the starting global position of the first instruction in - // this block - int block_start_pos = 0; - for (int pi = 0; pi < bi; ++pi) - { - if (blocks[pi]) - block_start_pos += static_cast(blocks[pi]->GetInstructions().size()); - } - - // Start with live_out of this block - std::unordered_set live = blk_info[bi].live_out; - - // Process instructions from last to first. - // Correct backward order: uses first (add to live), then record - // (interval extends to this position), then defs (remove from live). - // This ensures that a vreg used at this position IS recorded as - // live here, even if it was not previously in the live set. - for (int j = num_instrs - 1; j >= 0; --j) - { - int pos = block_start_pos + j; - - const auto &inst = insts[j]; - int def_vreg; - std::vector use_vregs; - ExtractDefUse(inst, def_vreg, use_vregs); - - // Uses: going backward, uses make the vreg live before this - // instruction. - for (int u : use_vregs) - live.insert(u); - - // Record: all vregs currently live extend their interval to this - // position. - for (int v : live) - { - auto sit = vreg_start.find(v); - if (sit == vreg_start.end() || pos < sit->second) - vreg_start[v] = pos; - auto eit = vreg_end.find(v); - if (eit == vreg_end.end() || pos > eit->second) - vreg_end[v] = pos; - } - - // Def: going backward, the def is the beginning of the live range - // — remove from live so that earlier positions don't see it - // (unless a later use re-adds it for the prior value). - if (def_vreg >= 0) - { - // 记录 def 位置作为区间起点。即使 def vreg 不在当前活跃集中 - // (跨块数据流边界情况可能导致),区间也必须覆盖 def 位置, - // 确保寄存器分配在定义点能找到对应的范围。 - auto sit = vreg_start.find(def_vreg); - if (sit == vreg_start.end() || pos < sit->second) - vreg_start[def_vreg] = pos; - auto eit = vreg_end.find(def_vreg); - if (eit == vreg_end.end() || pos > eit->second) - vreg_end[def_vreg] = pos; - - live.erase(def_vreg); - } - } - - // After processing all instructions, live should equal live_in. - // Any vreg still in live for the entry block (block 0) is live-in - // at function entry → set start = 0. - if (bi == 0) - { - for (int v : live) - { - vreg_start[v] = 0; - // Also ensure end is at least 0 - auto eit = vreg_end.find(v); - if (eit == vreg_end.end() || 0 > eit->second) - vreg_end[v] = 0; - } - } - } - - // ================================================================ - // Phase 3: Build LiveInterval objects - // ================================================================ - const int num_vregs = func.GetNumVRegs(); - std::vector intervals; - - for (int v = 0; v < num_vregs; ++v) - { - auto sit = vreg_start.find(v); - auto eit = vreg_end.find(v); - - int start = (sit != vreg_start.end()) ? sit->second : 0; - int end = (eit != vreg_end.end()) ? eit->second : 0; - - // Filter out unused vregs - if (start > end) - continue; - - LiveInterval li; - li.vreg = v; - li.start = start; - li.end = end; - li.vreg_class = func.GetVRegClass(v); - li.reg_class = ToRegClass(li.vreg_class); - li.assigned_reg = -1; - li.hint_reg = -1; - li.generation = 0; - intervals.push_back(li); - } - - return intervals; - } - -namespace -{ - -// 全局指令位置 → 块索引 + 局部指令索引 -struct GlobalPosInfo { int block_idx; int local_idx; }; - -} // anonymous namespace - -std::vector EnhanceIntervals( - const std::vector &raw, - MachineFunction &function) -{ - std::vector result = raw; - - auto &blocks = function.GetBlocks(); - - // ---- 构建 pos → block 映射 + block_start_pos ---- - std::vector pos_to_block; - std::vector block_start_pos(blocks.size(), -1); - int global = 0; - for (int bi = 0; bi < (int)blocks.size(); ++bi) - { - if (!blocks[bi]) continue; - block_start_pos[bi] = global; - int cnt = (int)blocks[bi]->GetInstructions().size(); - for (int j = 0; j < cnt; ++j) - pos_to_block.push_back(bi); - global += cnt; - } - - // ---- Pass A:收集 VNInfo + UsePosition(正向扫描)---- - for (int bi = 0; bi < (int)blocks.size(); ++bi) - { - if (!blocks[bi]) continue; - auto &insts = blocks[bi]->GetInstructions(); - int base = block_start_pos[bi]; - for (int j = 0; j < (int)insts.size(); ++j) - { - int pos = base + j; - const auto &inst = insts[j]; - - int def_vreg; - std::vector use_vregs; - ExtractDefUse(inst, def_vreg, use_vregs); - - if (def_vreg >= 0 && def_vreg < (int)result.size()) - { - auto &li = result[def_vreg]; - VNInfo vn; - vn.id = (int)li.valnos.size(); - vn.def_pos = pos; - vn.def_opcode = inst.GetOpcode(); - li.valnos.push_back(vn); - li.uses.push_back({pos, true, vn.id, inst.GetOpcode()}); - } - - for (int u : use_vregs) - { - if (u < 0 || u >= (int)result.size()) continue; - auto &li = result[u]; - int vn_id = li.valnos.empty() ? 0 : (int)li.valnos.size() - 1; - li.uses.push_back({pos, false, vn_id, inst.GetOpcode()}); - } - } - } - - // ---- Pass B:构建初始 segments(单段 [first_use, last_use])---- - for (auto &li : result) - { - if (li.uses.empty()) continue; - Segment seg; - seg.start = li.FirstUsePos(); - seg.end = li.LastUsePos(); - seg.vn_id = 0; - seg.crosses_call = false; - li.segments.push_back(seg); - } - - // ---- Pass C:标记 crosses_call ---- - for (int bi = 0; bi < (int)blocks.size(); ++bi) - { - if (!blocks[bi]) continue; - auto &insts = blocks[bi]->GetInstructions(); - int base = block_start_pos[bi]; - for (int j = 0; j < (int)insts.size(); ++j) - { - if (insts[j].GetOpcode() != Opcode::Call) continue; - int call_pos = base + j; - for (auto &li : result) - { - for (auto &seg : li.segments) - { - if (seg.Contains(call_pos)) - { - seg.crosses_call = true; - break; - } - } - } - } - } - - return result; -} - -} // namespace mir diff --git a/src/mir/LinearScanAlloc.cpp b/src/mir/LinearScanAlloc.cpp deleted file mode 100644 index d05b2aed..00000000 --- a/src/mir/LinearScanAlloc.cpp +++ /dev/null @@ -1,734 +0,0 @@ -#include "mir/MIR.h" - -#include -#include -#include -#include -#include - -#include "utils/Log.h" - -namespace mir -{ - namespace - { - - // ---- AArch64 可分配寄存器 -------------------------------------------- - - // GP 可分配:x19-x28(callee-saved)。当前限定为 callee-saved - // 避免跨函数调用时 caller-saved 寄存器被破坏。TODO:后续可加入 - // caller-saved 寄存器(x8-x12,x15-x17)用于不跨调用活跃的 vreg。 - // x0-x7 参数传递,x13-x14/scratch,x18 平台,x29-31 保留。 - static const int GP_ALLOCATABLE[] = {19, 20, 21, 22, 23, 24, 25, 26, 27, 28}; - static const int K_GP = 10; - - // FP 可分配:s8-s31 - static const int FP_ALLOCATABLE[] = {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; - static const int K_FP = 24; - - // 寄存器号 → PhysReg 转换 - static PhysReg NumberToPhysReg(int num, VRegClass vc) - { - if (vc == VRegClass::Float) - return static_cast(static_cast(PhysReg::S0) + num); - if (vc == VRegClass::Ptr) - return static_cast(static_cast(PhysReg::X0) + num); - return static_cast(static_cast(PhysReg::W0) + num); - } - - // 可分配索引 → PhysReg - static PhysReg AllocIdxToPhysReg(int idx, VRegClass vc) - { - if (vc == VRegClass::Float) - return NumberToPhysReg(FP_ALLOCATABLE[idx], VRegClass::Float); - return NumberToPhysReg(GP_ALLOCATABLE[idx], vc); - } - - // ---- 工具函数 -------------------------------------------------------- - - static bool HasVRegDef(Opcode opcode) - { - switch (opcode) - { - case Opcode::MovImm: - case Opcode::LoadStack: - case Opcode::LoadGlobal: - case Opcode::LoadGlobalAddr: - case Opcode::LoadStackAddr: - case Opcode::LoadMem: - case Opcode::AddRR: - case Opcode::SubRR: - case Opcode::AddImm: - case Opcode::SubImm: - case Opcode::MulRR: - case Opcode::DivRR: - case Opcode::ModRR: - case Opcode::AndRR: - case Opcode::OrRR: - case Opcode::XorRR: - case Opcode::ShlRR: - case Opcode::ShrRR: - case Opcode::AsrRR: - case Opcode::Asr64RR: - case Opcode::Uxtw: - case Opcode::Sxtw: - case Opcode::CSet: - case Opcode::Csel: - case Opcode::Smull: - case Opcode::Msub: - case Opcode::NegRR: - case Opcode::FAddRR: - case Opcode::FSubRR: - case Opcode::FMulRR: - case Opcode::FDivRR: - case Opcode::Scvtf: - case Opcode::FCvtzs: - case Opcode::FMovWS: - case Opcode::MovReg: - case Opcode::Call: - return true; - default: - return false; - } - } - - // ---- 核心数据结构 ----------------------------------------------------- - - // 活跃列表中存活的 vreg + 所占用寄存器 - struct ActiveInterval - { - LiveInterval *interval; - int phys_reg; // 可分配数组中的索引 - }; - - // 每个 vreg 的活区段:位置范围 + 寄存器分配 - struct VRegRange - { - int start; // 指令位置(全局索引) - int end; - int reg_idx; // 可分配数组索引,-1 表示已溢出 - }; - - // 保存点:在指定位置需要把 vreg 从寄存器溢出到栈 - struct SavePoint - { - int pos; // 指令位置 - int vreg; // 溢出 vreg - int reg_idx; // 寄存器 - int spill_slot; - }; - - // ---- 分配器 ---------------------------------------------------------- - - // 从活跃列表中淘汰 end < pos 的区间 - static void ExpireOldIntervals(std::vector &active, - std::vector ®_free, - int pos) - { - for (auto &a : active) - { - if (a.interval->end < pos) - reg_free[a.phys_reg] = true; - } - active.erase( - std::remove_if(active.begin(), active.end(), - [pos](const ActiveInterval &a) - { return a.interval->end < pos; }), - active.end()); - } - - static int FindFreeReg(const std::vector ®_free) - { - for (size_t i = 0; i < reg_free.size(); ++i) - if (reg_free[i]) - return static_cast(i); - return -1; - } - - // 返回活跃列表中 end 最大者的索引 - static int SelectSpill(const std::vector &active) - { - int farthest = -1; - int farthest_end = -1; - for (size_t i = 0; i < active.size(); ++i) - { - if (active[i].interval->end > farthest_end) - { - farthest_end = active[i].interval->end; - farthest = static_cast(i); - } - } - return farthest; - } - - static int GetOrCreateSpillSlot(MachineFunction &func, int vreg, - std::unordered_map &vreg_to_slot) - { - auto it = vreg_to_slot.find(vreg); - if (it != vreg_to_slot.end()) - return it->second; - int size = (func.GetVRegClass(vreg) == VRegClass::Ptr) ? 8 : 4; - int slot = func.CreateFrameIndex(size); - vreg_to_slot[vreg] = slot; - return slot; - } - - // ---- 前向声明 -------------------------------------------------------- - - static void RewriteWithAllocation( - MachineFunction &func, - const std::vector> &vreg_ranges, - const std::unordered_map &vreg_to_slot, - std::vector &save_points); - - // ---- 主分配算法:Wimmer & Mössenböck (2005) 优化区间分割 ---------------- - - static void RunLinearScan(MachineFunction &func) - { - auto intervals = ComputeInstLiveness(func); - if (intervals.empty()) - return; - - const int num_vregs = func.GetNumVRegs(); - - // 按 start 排序 - std::sort(intervals.begin(), intervals.end(), - [](const LiveInterval &a, const LiveInterval &b) - { return a.start < b.start; }); - - // 分配结果 - std::vector> vreg_ranges(num_vregs); - std::vector vreg_has_range(num_vregs, false); - std::unordered_map vreg_to_slot; // vreg -> spill slot - std::vector save_points; - - // 寄存器空闲表 - std::vector gp_free(K_GP, true); - std::vector fp_free(K_FP, true); - - // 活跃列表(按 end 不排序,SelectSpill 扫描查找) - std::vector active; - - // 工作队列(start 有序) + 分割产生的新区间(追加到队尾) - std::vector queue = intervals; - - for (size_t qi = 0; qi < queue.size(); ++qi) - { - LiveInterval &cur = queue[qi]; - - // 检查当前区间是否已被覆盖(split 产生的溢出区间已有 vreg_ranges 条目) - if (cur.vreg >= 0 && cur.vreg < num_vregs) - { - bool already_covered = false; - for (const auto &rng : vreg_ranges[cur.vreg]) - { - if (rng.start <= cur.start && cur.end <= rng.end) - { - already_covered = true; - break; - } - } - if (already_covered) - continue; - } - - // 选择对应寄存器池 - const int K = (cur.vreg_class == VRegClass::Float) ? K_FP : K_GP; - std::vector ®_free = (cur.vreg_class == VRegClass::Float) ? fp_free : gp_free; - - // 1. 淘汰已经结束的活跃区间 - ExpireOldIntervals(active, reg_free, cur.start); - - // 2. 尝试找空闲寄存器 - int free_reg = FindFreeReg(reg_free); - - if (free_reg >= 0) - { - // 分配空闲寄存器 - reg_free[free_reg] = false; - active.push_back({&cur, free_reg}); - vreg_ranges[cur.vreg].push_back({cur.start, cur.end, free_reg}); - vreg_has_range[cur.vreg] = true; - } - else - { - // 3. 需要溢出——选择 end 最大的活跃区间 - int spill_idx = SelectSpill(active); - - if (spill_idx < 0) - { - // 没有活跃区间,强制溢出当前 - int slot = GetOrCreateSpillSlot(func, cur.vreg, vreg_to_slot); - vreg_ranges[cur.vreg].push_back({cur.start, cur.end, -1}); - vreg_has_range[cur.vreg] = true; - cur.spilled = true; - cur.spill_slot = slot; - continue; - } - - ActiveInterval &spill_cand = active[spill_idx]; - - if (spill_cand.interval->end > cur.end) - { - // 4a. 最优分割:偷走最远 end 的寄存器给当前,被偷者的后半段溢出 - int stolen_reg = spill_cand.phys_reg; - int evicted_vreg = spill_cand.interval->vreg; - - // 割开被驱逐 vreg 的范围:前半段保留寄存器,后半段溢出 - // 找到当前活跃的范围并截断 - auto &ranges = vreg_ranges[evicted_vreg]; - if (!ranges.empty()) - { - VRegRange &last = ranges.back(); - if (last.reg_idx == stolen_reg) - { - // 截断范围:寄存器在 cur.start 通过 save point 保存后即被 cur 覆写 - int orig_end = last.end; - last.end = cur.start; - vreg_ranges[evicted_vreg].push_back({cur.start + 1, orig_end, -1}); - - // 在此位置需要保存被驱逐的值到栈 - int slot = GetOrCreateSpillSlot(func, evicted_vreg, vreg_to_slot); - save_points.push_back({cur.start, evicted_vreg, stolen_reg, slot}); - - // 把分割后的溢出部分送回队列(它以 evicted 的 vreg 标识,但 vreg_has_range 已为真) - LiveInterval split_li; - split_li.vreg = evicted_vreg; - split_li.start = cur.start + 1; - split_li.end = orig_end; - split_li.vreg_class = spill_cand.interval->vreg_class; - split_li.spilled = true; - split_li.spill_slot = slot; - // vreg_has_range 标记已在上面设置,split_li 的处理会被跳过 - } - } - - // 从活跃列表移除被驱逐项 - active.erase(active.begin() + spill_idx); - - // 当前 vreg 获得偷来的寄存器 - reg_free[stolen_reg] = false; - active.push_back({&cur, stolen_reg}); - vreg_ranges[cur.vreg].push_back({cur.start, cur.end, stolen_reg}); - vreg_has_range[cur.vreg] = true; - } - else - { - // 4b. 没有更远 end 的——直接溢出当前 - int slot = GetOrCreateSpillSlot(func, cur.vreg, vreg_to_slot); - vreg_ranges[cur.vreg].push_back({cur.start, cur.end, -1}); - vreg_has_range[cur.vreg] = true; - cur.spilled = true; - cur.spill_slot = slot; - // 不占用寄存器,不加入活跃列表 - } - } - } - - // ---- 记录 callee-saved 寄存器使用 ---- - // 当前 GP_ALLOCATABLE 全为 callee-saved(x19-x28),遍历已分配的 - // 范围找出实际使用的寄存器,通知 FrameLowering 保存/恢复。 - std::unordered_set used_callee_gp; - std::unordered_set used_callee_fp; - for (int vi = 0; vi < num_vregs; ++vi) - { - for (const auto &rng : vreg_ranges[vi]) - { - if (rng.reg_idx < 0) - continue; - VRegClass vc = func.GetVRegClass(vi); - if (vc == VRegClass::Float) - used_callee_fp.insert(rng.reg_idx); - else - used_callee_gp.insert(rng.reg_idx); - } - } - for (int idx : used_callee_gp) - func.AddCalleeSavedReg(AllocIdxToPhysReg(idx, VRegClass::Int)); - for (int idx : used_callee_fp) - func.AddCalleeSavedReg(AllocIdxToPhysReg(idx, VRegClass::Float)); - - // ---- 重写指令 ---------------------------------------------------------- - RewriteWithAllocation(func, vreg_ranges, vreg_to_slot, save_points); - } - - // ---- 临时寄存器选择器 ------------------------------------------------ - - // 在已分配寄存器中找一个不被当前指令 def/use 占用的作为 scratch - static int PickGPScratchReg(const MachineInstr &inst, - const std::unordered_map &pos_regs) - { - // x14 优先(不在可分配列表中,天然安全) - bool x14_free = true; - for (const auto &op : inst.GetOperands()) - { - if (op.GetKind() == Operand::Kind::Reg) - { - int r = static_cast(op.GetReg()) - static_cast(PhysReg::W0); - if (r == 14) { x14_free = false; break; } - } - } - if (x14_free) - { - // 检查当前在寄存器的 vreg 是否占用 14 - bool other_used = false; - for (const auto &kv : pos_regs) - { - if (kv.second == 14) { other_used = true; break; } - } - if (!other_used) return 14; - } - - // 遍历可分配列表找一个不冲突的 - for (int r : GP_ALLOCATABLE) - { - bool conflict = false; - for (const auto &op : inst.GetOperands()) - { - if (op.GetKind() == Operand::Kind::Reg) - { - int pr = static_cast(op.GetReg()) - static_cast(PhysReg::W0); - if (pr == r) { conflict = true; break; } - } - } - if (!conflict) - { - bool other_used = false; - for (const auto &kv : pos_regs) - { - if (kv.second == r) { other_used = true; break; } - } - if (!other_used) return r; - } - } - return GP_ALLOCATABLE[0]; - } - - static int PickFPScratchReg(const MachineInstr &inst, - const std::unordered_map &pos_regs) - { - for (int r : FP_ALLOCATABLE) - { - bool conflict = false; - for (const auto &op : inst.GetOperands()) - { - if (op.GetKind() == Operand::Kind::Reg) - { - int pr = static_cast(op.GetReg()) - static_cast(PhysReg::S0); - if (pr == r) { conflict = true; break; } - } - } - if (!conflict) - { - bool other_used = false; - for (const auto &kv : pos_regs) - { - if (kv.second == r) { other_used = true; break; } - } - if (!other_used) return r; - } - } - return FP_ALLOCATABLE[0]; - } - - // ---- 保存点排序 -------------------------------------------------------- - - static void SortSavePoints(std::vector &save_points) - { - std::sort(save_points.begin(), save_points.end(), - [](const SavePoint &a, const SavePoint &b) - { return a.pos < b.pos; }); - } - - // ---- RewriteWithAllocation ------------------------------------------- - - static void RewriteWithAllocation( - MachineFunction &func, - const std::vector> &vreg_ranges, - const std::unordered_map &vreg_to_slot, - std::vector &save_points) - { - SortSavePoints(save_points); - size_t next_save = 0; - - // 全局指令位置计数器(基于原始指令顺序) - int global_pos = 0; - - for (auto &block : func.GetBlocks()) - { - std::vector new_insts; - - for (auto &inst : block->GetInstructions()) - { - auto opcode = inst.GetOpcode(); - auto &ops = inst.GetOperands(); - - // ---- 保存点:在此位置前保存被驱逐 vreg 的值 ---- - while (next_save < save_points.size() && - save_points[next_save].pos <= global_pos) - { - const auto &sp = save_points[next_save]; - VRegClass vc = func.GetVRegClass(sp.vreg); - PhysReg pr = AllocIdxToPhysReg(sp.reg_idx, vc); - new_insts.push_back( - MachineInstr(Opcode::StoreStack, - {Operand::Reg(pr), Operand::FrameIndex(sp.spill_slot)})); - ++next_save; - } - - // ---- 确定当前位置 def/use 的 vreg 对应哪个范围 ---- - // 构建 "当前位置已在使用中的寄存器" 集合(用于 scratch 选择) - std::unordered_map pos_regs; // vreg -> reg_idx at this position - std::unordered_map vreg_range_idx; // vreg -> range index - - bool has_def = HasVRegDef(opcode); - int def_vreg = -1; - - for (size_t i = 0; i < ops.size(); ++i) - { - if (ops[i].GetKind() != Operand::Kind::VReg) - continue; - - // 跳过 def 位置上已经被处理过的 - if (has_def && i == 0) - { - def_vreg = ops[i].GetVRegId(); - continue; - } - - int v = ops[i].GetVRegId(); - if (v < 0 || v >= static_cast(vreg_ranges.size())) - continue; - - // 寻找覆盖当前位置的范围 - int reg_idx = -1; - for (size_t ri = 0; ri < vreg_ranges[v].size(); ++ri) - { - const auto &rng = vreg_ranges[v][ri]; - if (rng.start <= global_pos && global_pos <= rng.end) - { - reg_idx = rng.reg_idx; - break; - } - } - - if (reg_idx >= 0) - pos_regs[v] = reg_idx; - } - - // 也处理 def vreg - if (def_vreg >= 0 && def_vreg < static_cast(vreg_ranges.size())) - { - int reg_idx = -1; - for (size_t ri = 0; ri < vreg_ranges[def_vreg].size(); ++ri) - { - const auto &rng = vreg_ranges[def_vreg][ri]; - if (rng.start <= global_pos && global_pos <= rng.end) - { - reg_idx = rng.reg_idx; - break; - } - } - if (reg_idx >= 0) - pos_regs[def_vreg] = reg_idx; - } - - // ---- 处理溢出 uses:插入 LoadStack ---- - // 收集所有溢出 use vreg(在当前范围中 reg_idx == -1) - std::unordered_set spilled_uses; - for (size_t i = 0; i < ops.size(); ++i) - { - if (ops[i].GetKind() != Operand::Kind::VReg) - continue; - if (has_def && i == 0) - continue; - int v = ops[i].GetVRegId(); - if (v < 0 || v >= static_cast(vreg_ranges.size())) - continue; - // 检查范围:如果覆盖当前位置的范围 reg_idx == -1,则需加载 - bool needs_load = false; - for (const auto &rng : vreg_ranges[v]) - { - if (rng.start <= global_pos && global_pos <= rng.end) - { - if (rng.reg_idx == -1) - needs_load = true; - break; - } - } - if (needs_load && !spilled_uses.count(v)) - spilled_uses.insert(v); - } - - for (int v : spilled_uses) - { - auto slot_it = vreg_to_slot.find(v); - if (slot_it == vreg_to_slot.end()) - continue; - - int slot = slot_it->second; - VRegClass vc = func.GetVRegClass(v); - - int scratch = (vc == VRegClass::Float) - ? PickFPScratchReg(inst, pos_regs) - : PickGPScratchReg(inst, pos_regs); - - PhysReg load_reg = NumberToPhysReg(scratch, vc); - new_insts.push_back( - MachineInstr(Opcode::LoadStack, - {Operand::Reg(load_reg), Operand::FrameIndex(slot)})); - - // 将该 vreg 在此处映射到此 scratch 寄存器 - pos_regs[v] = scratch; - - // 替换指令中的该 vreg 操作数 - for (auto &op : ops) - { - if (op.GetKind() == Operand::Kind::VReg && op.GetVRegId() == v) - { - const_cast(op) = Operand::Reg(load_reg); - } - } - } - - // ---- 替换所有 VReg 操作数为 PhysReg ---- - for (auto &op : ops) - { - if (op.GetKind() != Operand::Kind::VReg) - continue; - - int v = op.GetVRegId(); - VRegClass vc = func.GetVRegClass(v); - - if (v < 0 || v >= static_cast(vreg_ranges.size())) - { - // vreg 超出范围(临时 vreg):用 scratch 替换 - int fallback = (vc == VRegClass::Float) - ? PickFPScratchReg(inst, pos_regs) - : PickGPScratchReg(inst, pos_regs); - const_cast(op) = Operand::Reg(NumberToPhysReg(fallback, vc)); - continue; - } - - // 找到当前位置对应的 reg - int reg_idx = -1; - for (const auto &rng : vreg_ranges[v]) - { - if (rng.start <= global_pos && global_pos <= rng.end) - { - reg_idx = rng.reg_idx; - break; - } - } - - if (reg_idx >= 0) - { - // 有寄存器:直接替换 - const_cast(op) = Operand::Reg(AllocIdxToPhysReg(reg_idx, vc)); - } - else - { - // 溢出或无范围覆盖:用 scratch 替换 - auto slot_it = vreg_to_slot.find(v); - int scratch = (vc == VRegClass::Float) - ? PickFPScratchReg(inst, pos_regs) - : PickGPScratchReg(inst, pos_regs); - const_cast(op) = Operand::Reg(NumberToPhysReg(scratch, vc)); - if (slot_it == vreg_to_slot.end()) - { - // 无 slot 也无寄存器,记录 scratch(不 store,因为没有 slot) - } - else - { - pos_regs[v] = scratch; - } - } - } - - // ---- 压入指令 ---- - new_insts.push_back(std::move(const_cast(inst))); - - // ---- 处理溢出 def:插入 StoreStack ---- - if (def_vreg >= 0 && def_vreg < static_cast(vreg_ranges.size())) - { - // 检查 def vreg 在此位置是否溢出 - bool needs_store = false; - for (const auto &rng : vreg_ranges[def_vreg]) - { - if (rng.start <= global_pos && global_pos <= rng.end) - { - if (rng.reg_idx == -1) - needs_store = true; - break; - } - } - - if (needs_store) - { - auto slot_it = vreg_to_slot.find(def_vreg); - if (slot_it != vreg_to_slot.end()) - { - // 从刚压入的指令中找到结果寄存器 - const auto &last_inst = new_insts.back(); - PhysReg result_reg = PhysReg::W0; - VRegClass vc = func.GetVRegClass(def_vreg); - for (const auto &op : last_inst.GetOperands()) - { - if (op.GetKind() == Operand::Kind::Reg) - { - PhysReg r = op.GetReg(); - bool is_gp = (r >= PhysReg::W0 && r <= PhysReg::W30) || - (r >= PhysReg::X0 && r <= PhysReg::X30); - bool is_fp = (r >= PhysReg::S0 && r <= PhysReg::S31); - if ((vc == VRegClass::Float && is_fp) || - (vc != VRegClass::Float && is_gp)) - { - result_reg = r; - break; - } - } - } - - new_insts.push_back( - MachineInstr(Opcode::StoreStack, - {Operand::Reg(result_reg), Operand::FrameIndex(slot_it->second)})); - } - } - } - - ++global_pos; - } - - block->GetInstructions() = std::move(new_insts); - } - } - - } // anonymous namespace -} // namespace mir - -// ---- 公开 API ----------------------------------------------------------- - -namespace mir -{ - -#if 0 - void RunLinearScanRegAlloc(MachineFunction &func) - { - if (func.GetNumVRegs() == 0) - return; - RunLinearScan(func); - } -#endif - -#if 0 - void RunLinearScanRegAlloc(MachineModule &module) - { - for (auto &function : module.GetFunctions()) - { - if (function) - RunLinearScanRegAlloc(*function); - } - } -#endif - -} // namespace mir