chore: 删除死代码——LinearScanAlloc/GreedyAlloc/InstLiveness(共2072行)

三个文件未被 CMakeLists 引用、无头文件、无 MIR.h 声明、无调用方。
当前唯一寄存器分配器为 RegAlloc.cpp。
lzk
lzkk 3 days ago
parent 862c1bfe7b
commit 56b37ac060

@ -1,791 +0,0 @@
#include "mir/GreedyAlloc.h"
#include "mir/MIR.h"
#include <algorithm>
#include <cmath>
#include <limits>
#include <queue>
#include <unordered_map>
#include <unordered_set>
#include <vector>
namespace mir
{
namespace
{
// ---- 寄存器可分配集 ----
// GP: 排除 x0-x7(参数传递), x13-x14(lowering 临时使用), x18(平台寄存器), x29-x30(FP/LR)
// x16-x17 同时作为 spill fallback但在 spill 路径中通过 phys<0 映射
constexpr int GP_ALLOCATABLE[] = {8,9,10,11,12,15,16,17,19,20,21,22,23,24,25,26,27,28};
constexpr int GP_COUNT = 18;
// S0-S1 是参数/返回值寄存器不可分配S2-S9 + S16-S31 可分配
constexpr int FP_ALLOCATABLE[] = {2,3,4,5,6,7,8,9,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
constexpr int FP_COUNT = 24;
constexpr int MAX_ROUNDS = 3; // LLVM: 通常 1-2 轮即可收敛
bool IsCallerSavedGP(int phys_reg) { return phys_reg <= 17; }
const int* GetRegList(RegClass rc, int& count)
{
if (rc == RegClass::GPR32 || rc == RegClass::GPR64)
{ count = GP_COUNT; return GP_ALLOCATABLE; }
else
{ count = FP_COUNT; return FP_ALLOCATABLE; }
}
// ---- 启发式 spill 权重LLVM 简化版Normalise(Σ use_freq) / Length----
// LLVM 使用完整的 block frequency 分析;我们使用循环深度作为近似。
// 堆排序:高 cascade已被驱逐过的永远排在低 cascade 之后;
// 同等 cascade 按 spill_weight 降序(堆顶权重最大,优先分配)。
// heap 存储 vreg 索引
// Stage 0 (new): 短活范围优先——弦图完美消除序近似
// Stage 1+ (deferred/evicted): spill_weight 降序
struct SpillWeightCmp
{
const std::vector<LiveInterval>& intervals;
explicit SpillWeightCmp(const std::vector<LiveInterval>& ivs) : intervals(ivs) {}
bool operator()(int a, int b) const
{
const auto& la = intervals[a];
const auto& lb = intervals[b];
if (la.generation != lb.generation)
return la.generation > lb.generation;
if (la.deferred_count == 0 && lb.deferred_count == 0)
return la.Length() > lb.Length();
return la.spill_weight < lb.spill_weight;
}
};
// ---- def/use 提取 ----
static bool HasVRegDef(Opcode opcode)
{
switch (opcode)
{
case Opcode::MovImm: case Opcode::LoadStack: case Opcode::LoadGlobal:
case Opcode::LoadGlobalAddr: case Opcode::LoadStackAddr: case Opcode::LoadMem:
case Opcode::AddRR: case Opcode::SubRR: case Opcode::AddImm:
case Opcode::SubImm: case Opcode::MulRR: case Opcode::DivRR:
case Opcode::ModRR: case Opcode::AndRR: case Opcode::OrRR:
case Opcode::XorRR: case Opcode::ShlRR: case Opcode::ShrRR:
case Opcode::AsrRR: case Opcode::Asr64RR: case Opcode::Uxtw:
case Opcode::Sxtw: case Opcode::CSet: case Opcode::Csel:
case Opcode::Smull: case Opcode::Msub: case Opcode::NegRR:
case Opcode::FAddRR: case Opcode::FSubRR: case Opcode::FMulRR:
case Opcode::FDivRR: case Opcode::Scvtf: case Opcode::FCvtzs:
case Opcode::FMovWS: case Opcode::MovReg:
return true;
default: return false;
}
}
static void ExtractDefUse(const MachineInstr &inst, int &def_vreg,
std::vector<int> &use_vregs)
{
def_vreg = -1;
use_vregs.clear();
const auto &ops = inst.GetOperands();
const auto opcode = inst.GetOpcode();
if (HasVRegDef(opcode) && !ops.empty() &&
ops[0].GetKind() == Operand::Kind::VReg)
def_vreg = ops[0].GetVRegId();
for (size_t i = 0; i < ops.size(); ++i)
{
if (HasVRegDef(opcode) && i == 0) continue;
if (ops[i].GetKind() == Operand::Kind::VReg)
use_vregs.push_back(ops[i].GetVRegId());
}
}
// ---- 循环深度分析 ----
std::vector<int> AnalyzeLoopDepth(MachineFunction &func)
{
auto &blocks = func.GetBlocks();
int n = (int)blocks.size();
std::vector<int> depth(n, 0);
std::unordered_map<int, int> label_to_idx;
for (int i = 0; i < n; ++i)
if (blocks[i]) label_to_idx[blocks[i]->GetLabelId()] = i;
struct Edge { int src; int dst; };
std::vector<Edge> back_edges;
for (int i = 0; i < n; ++i)
{
if (!blocks[i]) continue;
for (auto &inst : blocks[i]->GetInstructions())
{
int target_label = -1;
auto opcode = inst.GetOpcode();
if (opcode == Opcode::Br && !inst.GetOperands().empty() &&
inst.GetOperands()[0].GetKind() == Operand::Kind::Label)
target_label = inst.GetOperands()[0].GetLabel();
else if (opcode == Opcode::CondBr && inst.GetOperands().size() >= 2 &&
inst.GetOperands()[1].GetKind() == Operand::Kind::Label)
target_label = inst.GetOperands()[1].GetLabel();
if (target_label < 0) continue;
auto it = label_to_idx.find(target_label);
if (it != label_to_idx.end() && (int)it->second <= i)
back_edges.push_back({i, (int)it->second});
}
}
for (auto &be : back_edges)
{
int header = be.dst;
std::unordered_set<int> body;
std::queue<int> q;
q.push(be.src);
while (!q.empty())
{
int cur = q.front(); q.pop();
if (cur == header || body.count(cur)) continue;
body.insert(cur);
if (cur > 0 && !body.count(cur - 1)) q.push(cur - 1);
for (int p = 0; p < n; ++p)
{
if (!blocks[p]) continue;
for (auto &inst : blocks[p]->GetInstructions())
{
int tgt = -1;
if (inst.GetOpcode() == Opcode::Br && !inst.GetOperands().empty() &&
inst.GetOperands()[0].GetKind() == Operand::Kind::Label)
tgt = inst.GetOperands()[0].GetLabel();
else if (inst.GetOpcode() == Opcode::CondBr &&
inst.GetOperands().size() >= 2 &&
inst.GetOperands()[1].GetKind() == Operand::Kind::Label)
tgt = inst.GetOperands()[1].GetLabel();
auto it2 = label_to_idx.find(tgt);
if (it2 != label_to_idx.end() && (int)it2->second == cur && !body.count(p))
q.push(p);
}
}
}
body.insert(header);
int max_existing = 0;
for (int b : body) max_existing = std::max(max_existing, depth[b]);
for (int b : body) depth[b] = std::max(depth[b], max_existing + 1);
}
return depth;
}
// ---- Spill Weight ----
void ComputeSpillWeights(std::vector<LiveInterval> &intervals,
const std::vector<int> &block_depth,
const std::vector<int> &pos_to_block)
{
for (auto &li : intervals)
{
float w = 0.0f;
for (auto &use : li.uses)
{
int block = (use.pos >= 0 && use.pos < (int)pos_to_block.size())
? pos_to_block[use.pos] : 0;
int d = (block >= 0 && block < (int)block_depth.size())
? block_depth[block] : 0;
float mult = std::pow(10.0f, (float)d);
if (use.is_def) mult *= 0.5f;
w += mult;
}
li.spill_weight = w / li.Length();
}
}
// ---- Copy Hints ----
void PropagateCopyHints(std::vector<LiveInterval> &intervals,
MachineFunction &func)
{
for (auto &block : func.GetBlocks())
{
if (!block) continue;
for (auto &inst : block->GetInstructions())
{
if (inst.GetOpcode() != Opcode::MovReg) continue;
auto &ops = inst.GetOperands();
if (ops.size() < 2) continue;
if (ops[0].GetKind() != Operand::Kind::VReg) continue;
if (ops[1].GetKind() != Operand::Kind::VReg) continue;
int dst = ops[0].GetVRegId();
int src = ops[1].GetVRegId();
if (dst < 0 || dst >= (int)intervals.size()) continue;
if (src < 0 || src >= (int)intervals.size()) continue;
if (intervals[src].IsAllocated())
intervals[dst].hint_reg = intervals[src].assigned_reg;
else if (intervals[dst].IsAllocated())
intervals[src].hint_reg = intervals[dst].assigned_reg;
else if (intervals[src].hint_reg >= 0)
intervals[dst].hint_reg = intervals[src].hint_reg;
}
}
}
// ---- TryAssign ----
bool TryAssign(LiveInterval &li, LiveRegMatrix &m, int hint)
{
if (hint < 0) return false;
if (IsCallerSavedGP(hint) && li.SegmentCrossesCall()) return false;
if (!m.CheckInterference(li, hint) && m.Assign(&li, hint))
{
li.assigned_reg = hint;
return true;
}
return false;
}
// ---- TryAnyFreeReg ----
bool TryAnyFreeReg(LiveInterval &li, LiveRegMatrix &m)
{
int n = 0;
const int *regs = GetRegList(li.reg_class, n);
for (int i = 0; i < n; ++i)
{
int r = regs[i];
if (IsCallerSavedGP(r) && li.SegmentCrossesCall()) continue;
if (!m.CheckInterference(li, r) && m.Assign(&li, r))
{
li.assigned_reg = r;
return true;
}
}
return false;
}
// ---- TryEvictLLVM cascade 驱逐策略)----
// 只能驱逐 generation 严格更低的冲突 vreg。
// 驱逐后将 victim 设为相同的 cascade防止 A→B→A 循环。
bool TryEvict(LiveInterval &li, LiveRegMatrix &m,
std::vector<int> &heap,
const SpillWeightCmp &cmp)
{
int best_reg = -1;
float best_weight = 1e9f;
LiveInterval *victim = nullptr;
int n = 0;
const int *regs = GetRegList(li.reg_class, n);
for (int i = 0; i < n; ++i)
{
int r = regs[i];
if (IsCallerSavedGP(r) && li.SegmentCrossesCall()) continue;
auto *conflict = m.GetConflict(li, r);
if (!conflict && m.Assign(&li, r))
{
li.assigned_reg = r;
return true;
}
// LLVM 关键收敛规则:只驱逐 generation 严格更低的 vreg
if (conflict->generation >= li.generation) continue;
if (conflict->spill_weight < best_weight)
{
best_weight = conflict->spill_weight;
best_reg = r;
victim = conflict;
}
}
if (best_reg < 0 || !victim) return false;
m.Unassign(victim);
victim->assigned_reg = -1;
victim->generation = li.generation;
heap.push_back(victim->vreg);
std::push_heap(heap.begin(), heap.end(), cmp);
if (m.Assign(&li, best_reg))
{
li.assigned_reg = best_reg;
return true;
}
return false;
}
// ---- TrySplit在最大使用间隙处分裂LLVM local split 简化版)----
// 参考: llvm/lib/CodeGen/RegAllocGreedy.cpp tryLocalSplit()
bool TrySplit(int vreg_idx, LiveRegMatrix &m,
std::vector<int> &heap,
std::vector<LiveInterval> &intervals,
const std::vector<int> &pos_to_block,
std::vector<int> &spilled,
MachineFunction &func,
const SpillWeightCmp &cmp)
{
auto &li = intervals[vreg_idx];
if (li.uses.size() < 3) return false;
std::vector<int> sorted_uses;
for (auto &u : li.uses) sorted_uses.push_back(u.pos);
std::sort(sorted_uses.begin(), sorted_uses.end());
int best_gap = 0, split_after = -1;
for (size_t i = 1; i < sorted_uses.size(); ++i) {
int gap = sorted_uses[i] - sorted_uses[i - 1];
if (gap > best_gap && gap >= 2) { best_gap = gap; split_after = sorted_uses[i - 1]; }
}
if (split_after < 0) return false;
int hot_start = li.FirstUsePos(), hot_end = split_after;
int cold_start = split_after + 1, cold_end = li.LastUsePos();
if (hot_end < hot_start || cold_end < cold_start) return false;
LiveInterval cold;
cold.reg_class = li.reg_class; cold.generation = li.generation + 1;
cold.hint_reg = -1; cold.assigned_reg = -1;
cold.vreg = func.CreateVReg(li.vreg_class);
for (auto &seg : li.segments) {
if (seg.end < cold_start || seg.start > cold_end) continue;
Segment clipped = seg;
clipped.start = std::max(seg.start, cold_start);
clipped.end = std::min(seg.end, cold_end);
cold.segments.push_back(clipped);
}
for (auto &use : li.uses)
if (cold_start <= use.pos && use.pos <= cold_end) cold.uses.push_back(use);
if (cold.uses.empty()) return false;
float w = 0.0f;
for (auto &use : cold.uses) {
int blk = (use.pos >= 0 && use.pos < (int)pos_to_block.size()) ? pos_to_block[use.pos] : 0;
float mult = use.is_def ? 0.5f : 1.0f;
w += mult;
}
cold.spill_weight = w / cold.Length();
int cold_vreg = cold.vreg;
intervals.push_back(std::move(cold));
auto &li_safe = intervals[vreg_idx];
std::vector<Segment> hot_segs;
for (auto &seg : li_safe.segments) {
if (seg.end < hot_start || seg.start > hot_end) continue;
Segment clipped = seg;
clipped.start = std::max(seg.start, hot_start);
clipped.end = std::min(seg.end, hot_end);
if (clipped.start <= clipped.end) hot_segs.push_back(clipped);
}
li_safe.segments = std::move(hot_segs);
li_safe.uses.erase(std::remove_if(li_safe.uses.begin(), li_safe.uses.end(),
[&](const UsePosition &u) { return u.pos < hot_start || u.pos > hot_end; }), li_safe.uses.end());
if (!TryAnyFreeReg(li_safe, m)) { li_safe.assigned_reg = -2; spilled.push_back(vreg_idx); }
auto &cold_ref = intervals[cold_vreg];
if (!TryAnyFreeReg(cold_ref, m)) { heap.push_back(cold_vreg); std::push_heap(heap.begin(), heap.end(), cmp); }
return true;
}
// ---- 主分配函数:对一类寄存器执行贪婪分配 ----
// 返回 spilled 数量
int AllocateRegClass(std::vector<LiveInterval> &intervals,
RegClass rc,
LiveRegMatrix &matrix,
const std::vector<int> &pos_to_block,
MachineFunction &func,
std::vector<int> &spilled)
{
SpillWeightCmp cmp(intervals);
std::vector<int> heap;
for (auto &li : intervals)
{
if (li.vreg < 0) continue;
if (li.reg_class == rc && !li.IsAllocated() && !li.IsSpilled())
heap.push_back(li.vreg);
}
std::make_heap(heap.begin(), heap.end(), cmp);
int iter_limit = std::max(1000, (int)heap.size() * 3);
int iterations = 0;
while (!heap.empty())
{
if (++iterations > iter_limit)
{
// 安全网:剩余未分配 vreg 标记为 spill而非留下未分配状态
for (int vreg : heap)
{
if (intervals[vreg].IsAllocated() || intervals[vreg].IsSpilled()) continue;
intervals[vreg].assigned_reg = -2;
spilled.push_back(vreg);
}
break;
}
std::pop_heap(heap.begin(), heap.end(), cmp);
int vreg = heap.back();
heap.pop_back();
auto &li = intervals[vreg];
if (li.IsAllocated() || li.IsSpilled()) continue;
if (TryAssign(li, matrix, li.hint_reg)) continue;
if (TryAnyFreeReg(li, matrix)) continue;
if (rc == RegClass::GPR32 || rc == RegClass::GPR64)
{
if (TryEvict(li, matrix, heap, cmp)) continue;
}
// LLVM Defer 机制: 首次分配失败时推迟到下一轮,让更小的范围先分配
// 参考: llvm/lib/CodeGen/RegAllocGreedy.cpp selectOrSplit() RS_New→RS_Deferred
if (li.deferred_count == 0)
{
li.deferred_count = 1;
heap.push_back(vreg);
std::push_heap(heap.begin(), heap.end(), cmp);
continue;
}
if (TrySplit(vreg, matrix, heap, intervals,
pos_to_block, spilled, func, cmp)) continue;
li.assigned_reg = -2;
spilled.push_back(vreg);
}
return (int)spilled.size();
}
} // anonymous namespace
// ---- LiveRegMatrix 方法 ----
void LiveRegMatrix::Init(int num_regs)
{ reg_assignments_.assign(num_regs, {}); }
void LiveRegMatrix::ForceAssign(LiveInterval *li, int phys_reg)
{
if (phys_reg >= 0 && phys_reg < (int)reg_assignments_.size())
reg_assignments_[phys_reg].push_back(li);
}
bool LiveRegMatrix::Assign(LiveInterval *li, int phys_reg)
{
if (phys_reg < 0 || phys_reg >= (int)reg_assignments_.size()) return false;
reg_assignments_[phys_reg].push_back(li);
return true;
}
void LiveRegMatrix::Unassign(LiveInterval *li)
{
for (auto &vec : reg_assignments_)
{
auto it = std::find(vec.begin(), vec.end(), li);
if (it != vec.end()) { vec.erase(it); return; }
}
}
bool LiveRegMatrix::CheckInterference(const LiveInterval &li, int phys_reg) const
{
if (phys_reg < 0 || phys_reg >= (int)reg_assignments_.size()) return true;
for (auto *other : reg_assignments_[phys_reg])
{
if (other->vreg == li.vreg) continue;
// Wn/Xn 别名GPR32/GPR64 共享同一物理寄存器,总是冲突
// LLVM 用 Register Unit 来处理Wn 和 Xn 占据相同的 unit
// 参考: llvm/lib/CodeGen/LiveRegMatrix.cpp foreachUnit()
bool gpr32_64_alias =
(li.reg_class == RegClass::GPR32 && other->reg_class == RegClass::GPR64) ||
(li.reg_class == RegClass::GPR64 && other->reg_class == RegClass::GPR32);
if (gpr32_64_alias)
return true;
for (auto &sa : li.segments)
for (auto &sb : other->segments)
if (sa.Overlaps(sb)) return true;
}
return false;
}
LiveInterval *LiveRegMatrix::GetConflict(const LiveInterval &li,
int phys_reg) const
{
if (phys_reg < 0 || phys_reg >= (int)reg_assignments_.size()) return nullptr;
for (auto *other : reg_assignments_[phys_reg])
{
if (other->vreg == li.vreg) continue;
bool gpr32_64_alias =
(li.reg_class == RegClass::GPR32 && other->reg_class == RegClass::GPR64) ||
(li.reg_class == RegClass::GPR64 && other->reg_class == RegClass::GPR32);
if (gpr32_64_alias)
return other;
for (auto &sa : li.segments)
for (auto &sb : other->segments)
if (sa.Overlaps(sb)) return other;
}
return nullptr;
}
bool LiveRegMatrix::CheckInterferenceRange(int start, int end,
int phys_reg) const
{
if (phys_reg < 0 || phys_reg >= (int)reg_assignments_.size()) return true;
Segment range; range.start = start; range.end = end;
for (auto *other : reg_assignments_[phys_reg])
for (auto &sb : other->segments)
if (range.Overlaps(sb)) return true;
return false;
}
// ---- 对外入口 ----
void RunGreedyRegAlloc(MachineFunction &function);
void RunGreedyRegAlloc(MachineModule &module);
static void AllocateRegistersForFunction(MachineFunction &function)
{
if (function.GetNumVRegs() == 0) return;
// ---- 阶段 0活跃分析 + 预处理 ----
auto raw = ComputeInstLiveness(function);
auto intervals = EnhanceIntervals(raw, function);
intervals.reserve(function.GetNumVRegs() * 16);
auto &blocks = function.GetBlocks();
std::vector<int> pos_to_block;
std::vector<int> block_start_pos(blocks.size(), -1);
int global = 0;
for (int bi = 0; bi < (int)blocks.size(); ++bi)
{
if (!blocks[bi]) continue;
block_start_pos[bi] = global;
int cnt = (int)blocks[bi]->GetInstructions().size();
for (int j = 0; j < cnt; ++j) pos_to_block.push_back(bi);
global += cnt;
}
auto block_depth = AnalyzeLoopDepth(function);
ComputeSpillWeights(intervals, block_depth, pos_to_block);
PropagateCopyHints(intervals, function);
intervals.reserve(function.GetNumVRegs() * 16);
// LLVM 风格:全局 cascade 计数器
int global_cascade = 1;
// ---- 阶段 1分配循环 ----
for (int round = 0; round < MAX_ROUNDS; ++round)
{
// GP 分配GPR32 + GPR64 共享同一 LiveRegMatrix
LiveRegMatrix gp_matrix;
gp_matrix.Init(32);
std::vector<int> gp_spilled;
// 预填充上一轮已分配的 vreg
for (auto &li : intervals)
{
if (li.vreg >= 0 && li.IsAllocated() &&
(li.reg_class == RegClass::GPR32 || li.reg_class == RegClass::GPR64))
gp_matrix.ForceAssign(&li, li.assigned_reg);
}
AllocateRegClass(intervals, RegClass::GPR32, gp_matrix,
pos_to_block, function, gp_spilled);
AllocateRegClass(intervals, RegClass::GPR64, gp_matrix,
pos_to_block, function, gp_spilled);
// FP 分配
LiveRegMatrix fp_matrix;
fp_matrix.Init(32);
std::vector<int> fp_spilled;
for (auto &li : intervals)
{
if (li.vreg >= 0 && li.IsAllocated() && li.reg_class == RegClass::FPR32)
fp_matrix.ForceAssign(&li, li.assigned_reg);
}
AllocateRegClass(intervals, RegClass::FPR32, fp_matrix,
pos_to_block, function, fp_spilled);
auto spilled = gp_spilled;
spilled.insert(spilled.end(), fp_spilled.begin(), fp_spilled.end());
if (spilled.empty()) break;
// ---- 溢出重写LLVM-style spill rewrite----
// LLVM 关键设计:每次 reload 创建新 vreg让分配器在下一轮分配不同物理寄存器
// 避免多个溢出 vreg 共享同一回退寄存器导致互相覆盖。
// 参考: llvm/lib/CodeGen/InlineSpiller.cpp spill()/reload()
for (int spilled_vreg : spilled)
{
auto &li = intervals[spilled_vreg];
if (li.spill_slot < 0)
{
int size = 4;
if (li.vreg_class == VRegClass::Ptr) size = 8;
li.spill_slot = function.CreateFrameIndex(size);
}
for (int u = (int)li.uses.size() - 1; u >= 0; --u)
{
auto &use = li.uses[u];
int blk = pos_to_block[use.pos];
int local = use.pos - block_start_pos[blk];
if (use.is_def)
{
// def: 在定义后插入 StoreStack保存值到栈
blocks[blk]->InsertInst(local + 1,
MachineInstr(Opcode::StoreStack,
{Operand::VReg(li.vreg, li.vreg_class),
Operand::FrameIndex(li.spill_slot)}));
}
else
{
// use: 创建新 vregLoadStack 加载到新 vreg替换使用点
int new_vreg = function.CreateVReg(li.vreg_class);
blocks[blk]->InsertInst(local,
MachineInstr(Opcode::LoadStack,
{Operand::VReg(new_vreg, li.vreg_class),
Operand::FrameIndex(li.spill_slot)}));
// 在插入点之后搜索使用溢出 vreg 的指令并替换
auto &instructions = blocks[blk]->GetInstructions();
for (int idx = local + 1; idx < (int)instructions.size(); ++idx)
{
bool found = false;
for (auto &op : instructions[idx].GetOperands())
{
if (op.GetKind() == Operand::Kind::VReg &&
op.GetVRegId() == li.vreg)
{
op = Operand::VReg(new_vreg, li.vreg_class);
found = true;
}
}
if (found) break;
}
}
}
}
// ---- 保存已分配状态 ----
std::unordered_map<int, int> prev_assigned;
for (auto &li : intervals)
{
if (li.vreg >= 0 && li.IsAllocated())
prev_assigned[li.vreg] = li.assigned_reg;
else if (li.vreg >= 0 && li.IsSpilled())
prev_assigned[li.vreg] = -2; // 保持 spill 状态
}
// ---- 重新分析活跃 ----
raw = ComputeInstLiveness(function);
intervals = EnhanceIntervals(raw, function);
intervals.reserve(function.GetNumVRegs() * 16);
// ---- 重建位置映射 ----
pos_to_block.clear();
block_start_pos.assign(blocks.size(), -1);
int new_global = 0;
for (int bi = 0; bi < (int)blocks.size(); ++bi)
{
if (!blocks[bi]) continue;
block_start_pos[bi] = new_global;
int cnt = (int)blocks[bi]->GetInstructions().size();
for (int j = 0; j < cnt; ++j) pos_to_block.push_back(bi);
new_global += cnt;
}
// ---- 恢复已分配状态 + 递增 cascade ----
int num_new = 0;
for (auto &li : intervals)
{
auto it = prev_assigned.find(li.vreg);
if (it != prev_assigned.end())
{
li.assigned_reg = it->second;
// 已分配的保持 cascade
}
else
{
// 新 vreg由 spill 引入的 LoadStack vreg
li.assigned_reg = -1;
li.generation = 0;
num_new++;
}
}
if (num_new > 0)
{
// 只对新 vreg 重新计算 spill weight
ComputeSpillWeights(intervals, block_depth, pos_to_block);
}
PropagateCopyHints(intervals, function);
}
// ---- 最终vreg → PhysReg 重写 ----
for (auto &block : blocks)
{
if (!block) continue;
for (auto &inst : block->GetInstructions())
{
for (auto &op : inst.GetOperands())
{
if (op.GetKind() != Operand::Kind::VReg) continue;
int vreg = op.GetVRegId();
int phys = -1;
if (vreg >= 0 && vreg < (int)intervals.size())
phys = intervals[vreg].assigned_reg;
if (phys < 0)
{
auto vc = function.GetVRegClass(vreg);
if (vc == VRegClass::Ptr) phys = 47; // X16
else if (vc == VRegClass::Float) phys = 78; // S16
else phys = 16; // W16
}
else
{
if (vreg < function.GetNumVRegs())
{
auto vc = function.GetVRegClass(vreg);
if (vc == VRegClass::Ptr)
phys = phys + 31; // Wn → Xn (PhysReg 31-61)
else if (vc == VRegClass::Float)
phys = phys + 62; // → Sn (PhysReg 62-93)
// VRegClass::Int 保持原值 → Wn (PhysReg 0-30)
}
}
op = Operand::Reg(static_cast<PhysReg>(phys));
}
}
}
// ---- 收集使用的 callee-saved 寄存器LLVM PEI 风格:扫描最终 PhysReg----
{
int x19 = static_cast<int>(PhysReg::X19);
int x28 = static_cast<int>(PhysReg::X28);
int w19 = static_cast<int>(PhysReg::W19);
int w28 = static_cast<int>(PhysReg::W28);
int s16 = static_cast<int>(PhysReg::S16);
int s31 = static_cast<int>(PhysReg::S31);
bool used_x[11] = {};
bool used_s[16] = {};
for (auto &block : blocks)
{
if (!block) continue;
for (auto &inst : block->GetInstructions())
{
for (auto &op : inst.GetOperands())
{
if (op.GetKind() != Operand::Kind::Reg) continue;
int r = static_cast<int>(op.GetReg());
if (r >= w19 && r <= w28)
used_x[r - w19] = true;
else if (r >= x19 && r <= x28)
used_x[r - x19] = true;
else if (r >= s16 && r <= s31)
used_s[r - s16] = true;
}
}
}
for (int i = 0; i < 11; ++i)
if (used_x[i])
function.AddCalleeSavedReg(static_cast<PhysReg>(x19 + i));
for (int i = 0; i < 16; ++i)
if (used_s[i])
function.AddCalleeSavedReg(static_cast<PhysReg>(s16 + i));
}
}
void RunGreedyRegAlloc(MachineFunction &function)
{ AllocateRegistersForFunction(function); }
void RunGreedyRegAlloc(MachineModule &module)
{
for (auto &func : module.GetFunctions())
if (func) RunGreedyRegAlloc(*func);
}
} // namespace mir

@ -1,547 +0,0 @@
#include "mir/MIR.h"
#include <algorithm>
#include <queue>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "utils/Log.h"
namespace mir
{
namespace
{
// ---- Phase 1 helpers -------------------------------------------------
/// Return true if opcode has a VReg def (always operands[0]).
static bool HasVRegDef(Opcode opcode)
{
switch (opcode)
{
case Opcode::MovImm:
case Opcode::LoadStack:
case Opcode::LoadGlobal:
case Opcode::LoadGlobalAddr:
case Opcode::LoadStackAddr:
case Opcode::LoadMem:
case Opcode::AddRR:
case Opcode::SubRR:
case Opcode::AddImm:
case Opcode::SubImm:
case Opcode::MulRR:
case Opcode::DivRR:
case Opcode::ModRR:
case Opcode::AndRR:
case Opcode::OrRR:
case Opcode::XorRR:
case Opcode::ShlRR:
case Opcode::ShrRR:
case Opcode::AsrRR:
case Opcode::Asr64RR:
case Opcode::Uxtw:
case Opcode::Sxtw:
case Opcode::CSet:
case Opcode::Csel:
case Opcode::Smull:
case Opcode::Msub:
case Opcode::NegRR:
case Opcode::FAddRR:
case Opcode::FSubRR:
case Opcode::FMulRR:
case Opcode::FDivRR:
case Opcode::Scvtf:
case Opcode::FCvtzs:
case Opcode::FMovWS:
case Opcode::MovReg:
return true;
default:
return false;
}
}
/// Extract def VReg (operands[0] if VReg) and use VRegs from one instruction.
static void ExtractDefUse(const MachineInstr &inst, int &def_vreg,
std::vector<int> &use_vregs)
{
def_vreg = -1;
use_vregs.clear();
const auto &ops = inst.GetOperands();
const auto opcode = inst.GetOpcode();
if (HasVRegDef(opcode) && !ops.empty() &&
ops[0].GetKind() == Operand::Kind::VReg)
{
def_vreg = ops[0].GetVRegId();
}
// All other VReg operands are uses
for (size_t i = 0; i < ops.size(); ++i)
{
// For def-producing instructions, operands[0] is the def (already handled)
if (HasVRegDef(opcode) && i == 0)
continue;
if (ops[i].GetKind() == Operand::Kind::VReg)
use_vregs.push_back(ops[i].GetVRegId());
}
}
} // anonymous namespace
} // namespace mir
namespace mir
{
// ---- Block-level dataflow structures --------------------------------
struct BlockLiveInfo
{
std::unordered_set<int> def;
std::unordered_set<int> use;
std::unordered_set<int> live_in;
std::unordered_set<int> live_out;
std::vector<int> successors; // block indices
std::vector<int> predecessors; // block indices
};
std::vector<LiveInterval> ComputeInstLiveness(MachineFunction &func)
{
auto &blocks = func.GetBlocks();
const int num_blocks = static_cast<int>(blocks.size());
// ================================================================
// Phase 1: Block-level backward liveness (fixpoint iteration)
// ================================================================
// 1a. Build label → block-index mapping
std::unordered_map<int, int> label_to_idx;
for (int i = 0; i < num_blocks; ++i)
{
if (!blocks[i])
continue;
label_to_idx[blocks[i]->GetLabelId()] = i;
}
// 1b. Compute per-block def/use + successors
std::vector<BlockLiveInfo> blk_info(num_blocks);
for (int i = 0; i < num_blocks; ++i)
{
if (!blocks[i])
continue;
auto &info = blk_info[i];
auto &insts = blocks[i]->GetInstructions();
for (const auto &inst : insts)
{
int def_vreg;
std::vector<int> use_vregs;
ExtractDefUse(inst, def_vreg, use_vregs);
// All uses are added first, then def is added. This avoids
// counting "def first, then use" in the same block incorrectly.
for (int u : use_vregs)
{
if (info.def.count(u) == 0)
info.use.insert(u);
}
if (def_vreg >= 0)
{
// A vreg used before being defined in this block stays in use set
if (info.use.count(def_vreg) == 0)
info.def.insert(def_vreg);
}
}
// ---- Determine successors ----
bool has_br = false;
bool has_condbr = false;
int br_target_label = -1;
int condbr_target_label = -1;
bool has_ret = false;
for (const auto &inst : insts)
{
const auto opcode = inst.GetOpcode();
const auto &ops = inst.GetOperands();
if (opcode == Opcode::Br && !ops.empty() &&
ops[0].GetKind() == Operand::Kind::Label)
{
has_br = true;
br_target_label = ops[0].GetLabel();
}
else if (opcode == Opcode::CondBr && ops.size() >= 2 &&
ops[1].GetKind() == Operand::Kind::Label)
{
has_condbr = true;
condbr_target_label = ops[1].GetLabel();
}
else if (opcode == Opcode::Ret)
{
has_ret = true;
}
}
auto add_succ = [&](int label)
{
auto it = label_to_idx.find(label);
if (it != label_to_idx.end())
info.successors.push_back(it->second);
};
if (has_ret)
{
// No successors — function exit
}
else if (has_br)
{
// Unconditional branch: target covers the only outgoing path.
add_succ(br_target_label);
// If there's also a CondBr, its target is taken when condition is
// true — the Br covers the false path.
if (has_condbr)
add_succ(condbr_target_label);
}
else if (has_condbr)
{
// Conditional branch without Br: true path = target, false path =
// falls through to next block in insertion order.
add_succ(condbr_target_label);
if (i + 1 < num_blocks)
info.successors.push_back(i + 1);
}
else
{
// Ordinary block — falls through to next block.
if (i + 1 < num_blocks)
info.successors.push_back(i + 1);
}
}
// 1c. Build predecessor lists
for (int i = 0; i < num_blocks; ++i)
{
for (int s : blk_info[i].successors)
{
if (s >= 0 && s < num_blocks)
blk_info[s].predecessors.push_back(i);
}
}
// 1d. Worklist fixpoint
// Initialise live_in with use sets
for (int i = 0; i < num_blocks; ++i)
{
blk_info[i].live_in = blk_info[i].use;
}
std::queue<int> worklist;
std::vector<bool> in_queue(num_blocks, false);
for (int i = 0; i < num_blocks; ++i)
{
if (blocks[i])
{
worklist.push(i);
in_queue[i] = true;
}
}
while (!worklist.empty())
{
int b = worklist.front();
worklist.pop();
in_queue[b] = false;
// Compute new live_out = union of successors' live_in
std::unordered_set<int> new_live_out;
for (int s : blk_info[b].successors)
{
if (s < 0 || s >= num_blocks)
continue;
for (int v : blk_info[s].live_in)
new_live_out.insert(v);
}
// Compute new live_in = use (live_out - def)
std::unordered_set<int> new_live_in = blk_info[b].use;
for (int v : new_live_out)
{
if (blk_info[b].def.count(v) == 0)
new_live_in.insert(v);
}
if (new_live_in != blk_info[b].live_in)
{
blk_info[b].live_out = std::move(new_live_out);
blk_info[b].live_in = std::move(new_live_in);
// Enqueue all predecessors (their live_out depends on us)
for (int p : blk_info[b].predecessors)
{
if (!in_queue[p])
{
in_queue[p] = true;
worklist.push(p);
}
}
}
}
// ================================================================
// Phase 2: Instruction-level interval computation (reverse scan)
// ================================================================
std::unordered_map<int, int> vreg_start;
std::unordered_map<int, int> vreg_end;
// Assign global instruction positions
int global_pos = 0;
// Map global_pos → (block_idx, local_instr_idx) for the reverse scan
struct PosInfo
{
int block_idx;
int instr_count; // number of instructions in this block
};
std::vector<PosInfo> pos_to_block;
for (int i = 0; i < num_blocks; ++i)
{
if (!blocks[i])
continue;
int count = static_cast<int>(blocks[i]->GetInstructions().size());
for (int j = 0; j < count; ++j)
pos_to_block.push_back({i, count});
global_pos += count;
}
const int total_instrs = global_pos;
// Reverse scan: process blocks in reverse order
for (int bi = num_blocks - 1; bi >= 0; --bi)
{
if (!blocks[bi])
continue;
auto &insts = blocks[bi]->GetInstructions();
const int num_instrs = static_cast<int>(insts.size());
if (num_instrs == 0)
continue;
// Compute the starting global position of the first instruction in
// this block
int block_start_pos = 0;
for (int pi = 0; pi < bi; ++pi)
{
if (blocks[pi])
block_start_pos += static_cast<int>(blocks[pi]->GetInstructions().size());
}
// Start with live_out of this block
std::unordered_set<int> live = blk_info[bi].live_out;
// Process instructions from last to first.
// Correct backward order: uses first (add to live), then record
// (interval extends to this position), then defs (remove from live).
// This ensures that a vreg used at this position IS recorded as
// live here, even if it was not previously in the live set.
for (int j = num_instrs - 1; j >= 0; --j)
{
int pos = block_start_pos + j;
const auto &inst = insts[j];
int def_vreg;
std::vector<int> use_vregs;
ExtractDefUse(inst, def_vreg, use_vregs);
// Uses: going backward, uses make the vreg live before this
// instruction.
for (int u : use_vregs)
live.insert(u);
// Record: all vregs currently live extend their interval to this
// position.
for (int v : live)
{
auto sit = vreg_start.find(v);
if (sit == vreg_start.end() || pos < sit->second)
vreg_start[v] = pos;
auto eit = vreg_end.find(v);
if (eit == vreg_end.end() || pos > eit->second)
vreg_end[v] = pos;
}
// Def: going backward, the def is the beginning of the live range
// — remove from live so that earlier positions don't see it
// (unless a later use re-adds it for the prior value).
if (def_vreg >= 0)
{
// 记录 def 位置作为区间起点。即使 def vreg 不在当前活跃集中
// (跨块数据流边界情况可能导致),区间也必须覆盖 def 位置,
// 确保寄存器分配在定义点能找到对应的范围。
auto sit = vreg_start.find(def_vreg);
if (sit == vreg_start.end() || pos < sit->second)
vreg_start[def_vreg] = pos;
auto eit = vreg_end.find(def_vreg);
if (eit == vreg_end.end() || pos > eit->second)
vreg_end[def_vreg] = pos;
live.erase(def_vreg);
}
}
// After processing all instructions, live should equal live_in.
// Any vreg still in live for the entry block (block 0) is live-in
// at function entry → set start = 0.
if (bi == 0)
{
for (int v : live)
{
vreg_start[v] = 0;
// Also ensure end is at least 0
auto eit = vreg_end.find(v);
if (eit == vreg_end.end() || 0 > eit->second)
vreg_end[v] = 0;
}
}
}
// ================================================================
// Phase 3: Build LiveInterval objects
// ================================================================
const int num_vregs = func.GetNumVRegs();
std::vector<LiveInterval> intervals;
for (int v = 0; v < num_vregs; ++v)
{
auto sit = vreg_start.find(v);
auto eit = vreg_end.find(v);
int start = (sit != vreg_start.end()) ? sit->second : 0;
int end = (eit != vreg_end.end()) ? eit->second : 0;
// Filter out unused vregs
if (start > end)
continue;
LiveInterval li;
li.vreg = v;
li.start = start;
li.end = end;
li.vreg_class = func.GetVRegClass(v);
li.reg_class = ToRegClass(li.vreg_class);
li.assigned_reg = -1;
li.hint_reg = -1;
li.generation = 0;
intervals.push_back(li);
}
return intervals;
}
namespace
{
// 全局指令位置 → 块索引 + 局部指令索引
struct GlobalPosInfo { int block_idx; int local_idx; };
} // anonymous namespace
std::vector<LiveInterval> EnhanceIntervals(
const std::vector<LiveInterval> &raw,
MachineFunction &function)
{
std::vector<LiveInterval> result = raw;
auto &blocks = function.GetBlocks();
// ---- 构建 pos → block 映射 + block_start_pos ----
std::vector<int> pos_to_block;
std::vector<int> block_start_pos(blocks.size(), -1);
int global = 0;
for (int bi = 0; bi < (int)blocks.size(); ++bi)
{
if (!blocks[bi]) continue;
block_start_pos[bi] = global;
int cnt = (int)blocks[bi]->GetInstructions().size();
for (int j = 0; j < cnt; ++j)
pos_to_block.push_back(bi);
global += cnt;
}
// ---- Pass A收集 VNInfo + UsePosition正向扫描----
for (int bi = 0; bi < (int)blocks.size(); ++bi)
{
if (!blocks[bi]) continue;
auto &insts = blocks[bi]->GetInstructions();
int base = block_start_pos[bi];
for (int j = 0; j < (int)insts.size(); ++j)
{
int pos = base + j;
const auto &inst = insts[j];
int def_vreg;
std::vector<int> use_vregs;
ExtractDefUse(inst, def_vreg, use_vregs);
if (def_vreg >= 0 && def_vreg < (int)result.size())
{
auto &li = result[def_vreg];
VNInfo vn;
vn.id = (int)li.valnos.size();
vn.def_pos = pos;
vn.def_opcode = inst.GetOpcode();
li.valnos.push_back(vn);
li.uses.push_back({pos, true, vn.id, inst.GetOpcode()});
}
for (int u : use_vregs)
{
if (u < 0 || u >= (int)result.size()) continue;
auto &li = result[u];
int vn_id = li.valnos.empty() ? 0 : (int)li.valnos.size() - 1;
li.uses.push_back({pos, false, vn_id, inst.GetOpcode()});
}
}
}
// ---- Pass B构建初始 segments单段 [first_use, last_use]----
for (auto &li : result)
{
if (li.uses.empty()) continue;
Segment seg;
seg.start = li.FirstUsePos();
seg.end = li.LastUsePos();
seg.vn_id = 0;
seg.crosses_call = false;
li.segments.push_back(seg);
}
// ---- Pass C标记 crosses_call ----
for (int bi = 0; bi < (int)blocks.size(); ++bi)
{
if (!blocks[bi]) continue;
auto &insts = blocks[bi]->GetInstructions();
int base = block_start_pos[bi];
for (int j = 0; j < (int)insts.size(); ++j)
{
if (insts[j].GetOpcode() != Opcode::Call) continue;
int call_pos = base + j;
for (auto &li : result)
{
for (auto &seg : li.segments)
{
if (seg.Contains(call_pos))
{
seg.crosses_call = true;
break;
}
}
}
}
}
return result;
}
} // namespace mir

@ -1,734 +0,0 @@
#include "mir/MIR.h"
#include <algorithm>
#include <limits>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "utils/Log.h"
namespace mir
{
namespace
{
// ---- AArch64 可分配寄存器 --------------------------------------------
// GP 可分配x19-x28callee-saved。当前限定为 callee-saved
// 避免跨函数调用时 caller-saved 寄存器被破坏。TODO后续可加入
// caller-saved 寄存器x8-x12,x15-x17用于不跨调用活跃的 vreg。
// x0-x7 参数传递x13-x14/scratchx18 平台x29-31 保留。
static const int GP_ALLOCATABLE[] = {19, 20, 21, 22, 23, 24, 25, 26, 27, 28};
static const int K_GP = 10;
// FP 可分配s8-s31
static const int FP_ALLOCATABLE[] = {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
static const int K_FP = 24;
// 寄存器号 → PhysReg 转换
static PhysReg NumberToPhysReg(int num, VRegClass vc)
{
if (vc == VRegClass::Float)
return static_cast<PhysReg>(static_cast<int>(PhysReg::S0) + num);
if (vc == VRegClass::Ptr)
return static_cast<PhysReg>(static_cast<int>(PhysReg::X0) + num);
return static_cast<PhysReg>(static_cast<int>(PhysReg::W0) + num);
}
// 可分配索引 → PhysReg
static PhysReg AllocIdxToPhysReg(int idx, VRegClass vc)
{
if (vc == VRegClass::Float)
return NumberToPhysReg(FP_ALLOCATABLE[idx], VRegClass::Float);
return NumberToPhysReg(GP_ALLOCATABLE[idx], vc);
}
// ---- 工具函数 --------------------------------------------------------
static bool HasVRegDef(Opcode opcode)
{
switch (opcode)
{
case Opcode::MovImm:
case Opcode::LoadStack:
case Opcode::LoadGlobal:
case Opcode::LoadGlobalAddr:
case Opcode::LoadStackAddr:
case Opcode::LoadMem:
case Opcode::AddRR:
case Opcode::SubRR:
case Opcode::AddImm:
case Opcode::SubImm:
case Opcode::MulRR:
case Opcode::DivRR:
case Opcode::ModRR:
case Opcode::AndRR:
case Opcode::OrRR:
case Opcode::XorRR:
case Opcode::ShlRR:
case Opcode::ShrRR:
case Opcode::AsrRR:
case Opcode::Asr64RR:
case Opcode::Uxtw:
case Opcode::Sxtw:
case Opcode::CSet:
case Opcode::Csel:
case Opcode::Smull:
case Opcode::Msub:
case Opcode::NegRR:
case Opcode::FAddRR:
case Opcode::FSubRR:
case Opcode::FMulRR:
case Opcode::FDivRR:
case Opcode::Scvtf:
case Opcode::FCvtzs:
case Opcode::FMovWS:
case Opcode::MovReg:
case Opcode::Call:
return true;
default:
return false;
}
}
// ---- 核心数据结构 -----------------------------------------------------
// 活跃列表中存活的 vreg + 所占用寄存器
struct ActiveInterval
{
LiveInterval *interval;
int phys_reg; // 可分配数组中的索引
};
// 每个 vreg 的活区段:位置范围 + 寄存器分配
struct VRegRange
{
int start; // 指令位置(全局索引)
int end;
int reg_idx; // 可分配数组索引,-1 表示已溢出
};
// 保存点:在指定位置需要把 vreg 从寄存器溢出到栈
struct SavePoint
{
int pos; // 指令位置
int vreg; // 溢出 vreg
int reg_idx; // 寄存器
int spill_slot;
};
// ---- 分配器 ----------------------------------------------------------
// 从活跃列表中淘汰 end < pos 的区间
static void ExpireOldIntervals(std::vector<ActiveInterval> &active,
std::vector<bool> &reg_free,
int pos)
{
for (auto &a : active)
{
if (a.interval->end < pos)
reg_free[a.phys_reg] = true;
}
active.erase(
std::remove_if(active.begin(), active.end(),
[pos](const ActiveInterval &a)
{ return a.interval->end < pos; }),
active.end());
}
static int FindFreeReg(const std::vector<bool> &reg_free)
{
for (size_t i = 0; i < reg_free.size(); ++i)
if (reg_free[i])
return static_cast<int>(i);
return -1;
}
// 返回活跃列表中 end 最大者的索引
static int SelectSpill(const std::vector<ActiveInterval> &active)
{
int farthest = -1;
int farthest_end = -1;
for (size_t i = 0; i < active.size(); ++i)
{
if (active[i].interval->end > farthest_end)
{
farthest_end = active[i].interval->end;
farthest = static_cast<int>(i);
}
}
return farthest;
}
static int GetOrCreateSpillSlot(MachineFunction &func, int vreg,
std::unordered_map<int, int> &vreg_to_slot)
{
auto it = vreg_to_slot.find(vreg);
if (it != vreg_to_slot.end())
return it->second;
int size = (func.GetVRegClass(vreg) == VRegClass::Ptr) ? 8 : 4;
int slot = func.CreateFrameIndex(size);
vreg_to_slot[vreg] = slot;
return slot;
}
// ---- 前向声明 --------------------------------------------------------
static void RewriteWithAllocation(
MachineFunction &func,
const std::vector<std::vector<VRegRange>> &vreg_ranges,
const std::unordered_map<int, int> &vreg_to_slot,
std::vector<SavePoint> &save_points);
// ---- 主分配算法Wimmer & Mössenböck (2005) 优化区间分割 ----------------
static void RunLinearScan(MachineFunction &func)
{
auto intervals = ComputeInstLiveness(func);
if (intervals.empty())
return;
const int num_vregs = func.GetNumVRegs();
// 按 start 排序
std::sort(intervals.begin(), intervals.end(),
[](const LiveInterval &a, const LiveInterval &b)
{ return a.start < b.start; });
// 分配结果
std::vector<std::vector<VRegRange>> vreg_ranges(num_vregs);
std::vector<bool> vreg_has_range(num_vregs, false);
std::unordered_map<int, int> vreg_to_slot; // vreg -> spill slot
std::vector<SavePoint> save_points;
// 寄存器空闲表
std::vector<bool> gp_free(K_GP, true);
std::vector<bool> fp_free(K_FP, true);
// 活跃列表(按 end 不排序SelectSpill 扫描查找)
std::vector<ActiveInterval> active;
// 工作队列start 有序) + 分割产生的新区间(追加到队尾)
std::vector<LiveInterval> queue = intervals;
for (size_t qi = 0; qi < queue.size(); ++qi)
{
LiveInterval &cur = queue[qi];
// 检查当前区间是否已被覆盖split 产生的溢出区间已有 vreg_ranges 条目)
if (cur.vreg >= 0 && cur.vreg < num_vregs)
{
bool already_covered = false;
for (const auto &rng : vreg_ranges[cur.vreg])
{
if (rng.start <= cur.start && cur.end <= rng.end)
{
already_covered = true;
break;
}
}
if (already_covered)
continue;
}
// 选择对应寄存器池
const int K = (cur.vreg_class == VRegClass::Float) ? K_FP : K_GP;
std::vector<bool> &reg_free = (cur.vreg_class == VRegClass::Float) ? fp_free : gp_free;
// 1. 淘汰已经结束的活跃区间
ExpireOldIntervals(active, reg_free, cur.start);
// 2. 尝试找空闲寄存器
int free_reg = FindFreeReg(reg_free);
if (free_reg >= 0)
{
// 分配空闲寄存器
reg_free[free_reg] = false;
active.push_back({&cur, free_reg});
vreg_ranges[cur.vreg].push_back({cur.start, cur.end, free_reg});
vreg_has_range[cur.vreg] = true;
}
else
{
// 3. 需要溢出——选择 end 最大的活跃区间
int spill_idx = SelectSpill(active);
if (spill_idx < 0)
{
// 没有活跃区间,强制溢出当前
int slot = GetOrCreateSpillSlot(func, cur.vreg, vreg_to_slot);
vreg_ranges[cur.vreg].push_back({cur.start, cur.end, -1});
vreg_has_range[cur.vreg] = true;
cur.spilled = true;
cur.spill_slot = slot;
continue;
}
ActiveInterval &spill_cand = active[spill_idx];
if (spill_cand.interval->end > cur.end)
{
// 4a. 最优分割:偷走最远 end 的寄存器给当前,被偷者的后半段溢出
int stolen_reg = spill_cand.phys_reg;
int evicted_vreg = spill_cand.interval->vreg;
// 割开被驱逐 vreg 的范围:前半段保留寄存器,后半段溢出
// 找到当前活跃的范围并截断
auto &ranges = vreg_ranges[evicted_vreg];
if (!ranges.empty())
{
VRegRange &last = ranges.back();
if (last.reg_idx == stolen_reg)
{
// 截断范围:寄存器在 cur.start 通过 save point 保存后即被 cur 覆写
int orig_end = last.end;
last.end = cur.start;
vreg_ranges[evicted_vreg].push_back({cur.start + 1, orig_end, -1});
// 在此位置需要保存被驱逐的值到栈
int slot = GetOrCreateSpillSlot(func, evicted_vreg, vreg_to_slot);
save_points.push_back({cur.start, evicted_vreg, stolen_reg, slot});
// 把分割后的溢出部分送回队列(它以 evicted 的 vreg 标识,但 vreg_has_range 已为真)
LiveInterval split_li;
split_li.vreg = evicted_vreg;
split_li.start = cur.start + 1;
split_li.end = orig_end;
split_li.vreg_class = spill_cand.interval->vreg_class;
split_li.spilled = true;
split_li.spill_slot = slot;
// vreg_has_range 标记已在上面设置split_li 的处理会被跳过
}
}
// 从活跃列表移除被驱逐项
active.erase(active.begin() + spill_idx);
// 当前 vreg 获得偷来的寄存器
reg_free[stolen_reg] = false;
active.push_back({&cur, stolen_reg});
vreg_ranges[cur.vreg].push_back({cur.start, cur.end, stolen_reg});
vreg_has_range[cur.vreg] = true;
}
else
{
// 4b. 没有更远 end 的——直接溢出当前
int slot = GetOrCreateSpillSlot(func, cur.vreg, vreg_to_slot);
vreg_ranges[cur.vreg].push_back({cur.start, cur.end, -1});
vreg_has_range[cur.vreg] = true;
cur.spilled = true;
cur.spill_slot = slot;
// 不占用寄存器,不加入活跃列表
}
}
}
// ---- 记录 callee-saved 寄存器使用 ----
// 当前 GP_ALLOCATABLE 全为 callee-savedx19-x28遍历已分配的
// 范围找出实际使用的寄存器,通知 FrameLowering 保存/恢复。
std::unordered_set<int> used_callee_gp;
std::unordered_set<int> used_callee_fp;
for (int vi = 0; vi < num_vregs; ++vi)
{
for (const auto &rng : vreg_ranges[vi])
{
if (rng.reg_idx < 0)
continue;
VRegClass vc = func.GetVRegClass(vi);
if (vc == VRegClass::Float)
used_callee_fp.insert(rng.reg_idx);
else
used_callee_gp.insert(rng.reg_idx);
}
}
for (int idx : used_callee_gp)
func.AddCalleeSavedReg(AllocIdxToPhysReg(idx, VRegClass::Int));
for (int idx : used_callee_fp)
func.AddCalleeSavedReg(AllocIdxToPhysReg(idx, VRegClass::Float));
// ---- 重写指令 ----------------------------------------------------------
RewriteWithAllocation(func, vreg_ranges, vreg_to_slot, save_points);
}
// ---- 临时寄存器选择器 ------------------------------------------------
// 在已分配寄存器中找一个不被当前指令 def/use 占用的作为 scratch
static int PickGPScratchReg(const MachineInstr &inst,
const std::unordered_map<int, int> &pos_regs)
{
// x14 优先(不在可分配列表中,天然安全)
bool x14_free = true;
for (const auto &op : inst.GetOperands())
{
if (op.GetKind() == Operand::Kind::Reg)
{
int r = static_cast<int>(op.GetReg()) - static_cast<int>(PhysReg::W0);
if (r == 14) { x14_free = false; break; }
}
}
if (x14_free)
{
// 检查当前在寄存器的 vreg 是否占用 14
bool other_used = false;
for (const auto &kv : pos_regs)
{
if (kv.second == 14) { other_used = true; break; }
}
if (!other_used) return 14;
}
// 遍历可分配列表找一个不冲突的
for (int r : GP_ALLOCATABLE)
{
bool conflict = false;
for (const auto &op : inst.GetOperands())
{
if (op.GetKind() == Operand::Kind::Reg)
{
int pr = static_cast<int>(op.GetReg()) - static_cast<int>(PhysReg::W0);
if (pr == r) { conflict = true; break; }
}
}
if (!conflict)
{
bool other_used = false;
for (const auto &kv : pos_regs)
{
if (kv.second == r) { other_used = true; break; }
}
if (!other_used) return r;
}
}
return GP_ALLOCATABLE[0];
}
static int PickFPScratchReg(const MachineInstr &inst,
const std::unordered_map<int, int> &pos_regs)
{
for (int r : FP_ALLOCATABLE)
{
bool conflict = false;
for (const auto &op : inst.GetOperands())
{
if (op.GetKind() == Operand::Kind::Reg)
{
int pr = static_cast<int>(op.GetReg()) - static_cast<int>(PhysReg::S0);
if (pr == r) { conflict = true; break; }
}
}
if (!conflict)
{
bool other_used = false;
for (const auto &kv : pos_regs)
{
if (kv.second == r) { other_used = true; break; }
}
if (!other_used) return r;
}
}
return FP_ALLOCATABLE[0];
}
// ---- 保存点排序 --------------------------------------------------------
static void SortSavePoints(std::vector<SavePoint> &save_points)
{
std::sort(save_points.begin(), save_points.end(),
[](const SavePoint &a, const SavePoint &b)
{ return a.pos < b.pos; });
}
// ---- RewriteWithAllocation -------------------------------------------
static void RewriteWithAllocation(
MachineFunction &func,
const std::vector<std::vector<VRegRange>> &vreg_ranges,
const std::unordered_map<int, int> &vreg_to_slot,
std::vector<SavePoint> &save_points)
{
SortSavePoints(save_points);
size_t next_save = 0;
// 全局指令位置计数器(基于原始指令顺序)
int global_pos = 0;
for (auto &block : func.GetBlocks())
{
std::vector<MachineInstr> new_insts;
for (auto &inst : block->GetInstructions())
{
auto opcode = inst.GetOpcode();
auto &ops = inst.GetOperands();
// ---- 保存点:在此位置前保存被驱逐 vreg 的值 ----
while (next_save < save_points.size() &&
save_points[next_save].pos <= global_pos)
{
const auto &sp = save_points[next_save];
VRegClass vc = func.GetVRegClass(sp.vreg);
PhysReg pr = AllocIdxToPhysReg(sp.reg_idx, vc);
new_insts.push_back(
MachineInstr(Opcode::StoreStack,
{Operand::Reg(pr), Operand::FrameIndex(sp.spill_slot)}));
++next_save;
}
// ---- 确定当前位置 def/use 的 vreg 对应哪个范围 ----
// 构建 "当前位置已在使用中的寄存器" 集合(用于 scratch 选择)
std::unordered_map<int, int> pos_regs; // vreg -> reg_idx at this position
std::unordered_map<int, int> vreg_range_idx; // vreg -> range index
bool has_def = HasVRegDef(opcode);
int def_vreg = -1;
for (size_t i = 0; i < ops.size(); ++i)
{
if (ops[i].GetKind() != Operand::Kind::VReg)
continue;
// 跳过 def 位置上已经被处理过的
if (has_def && i == 0)
{
def_vreg = ops[i].GetVRegId();
continue;
}
int v = ops[i].GetVRegId();
if (v < 0 || v >= static_cast<int>(vreg_ranges.size()))
continue;
// 寻找覆盖当前位置的范围
int reg_idx = -1;
for (size_t ri = 0; ri < vreg_ranges[v].size(); ++ri)
{
const auto &rng = vreg_ranges[v][ri];
if (rng.start <= global_pos && global_pos <= rng.end)
{
reg_idx = rng.reg_idx;
break;
}
}
if (reg_idx >= 0)
pos_regs[v] = reg_idx;
}
// 也处理 def vreg
if (def_vreg >= 0 && def_vreg < static_cast<int>(vreg_ranges.size()))
{
int reg_idx = -1;
for (size_t ri = 0; ri < vreg_ranges[def_vreg].size(); ++ri)
{
const auto &rng = vreg_ranges[def_vreg][ri];
if (rng.start <= global_pos && global_pos <= rng.end)
{
reg_idx = rng.reg_idx;
break;
}
}
if (reg_idx >= 0)
pos_regs[def_vreg] = reg_idx;
}
// ---- 处理溢出 uses插入 LoadStack ----
// 收集所有溢出 use vreg在当前范围中 reg_idx == -1
std::unordered_set<int> spilled_uses;
for (size_t i = 0; i < ops.size(); ++i)
{
if (ops[i].GetKind() != Operand::Kind::VReg)
continue;
if (has_def && i == 0)
continue;
int v = ops[i].GetVRegId();
if (v < 0 || v >= static_cast<int>(vreg_ranges.size()))
continue;
// 检查范围:如果覆盖当前位置的范围 reg_idx == -1则需加载
bool needs_load = false;
for (const auto &rng : vreg_ranges[v])
{
if (rng.start <= global_pos && global_pos <= rng.end)
{
if (rng.reg_idx == -1)
needs_load = true;
break;
}
}
if (needs_load && !spilled_uses.count(v))
spilled_uses.insert(v);
}
for (int v : spilled_uses)
{
auto slot_it = vreg_to_slot.find(v);
if (slot_it == vreg_to_slot.end())
continue;
int slot = slot_it->second;
VRegClass vc = func.GetVRegClass(v);
int scratch = (vc == VRegClass::Float)
? PickFPScratchReg(inst, pos_regs)
: PickGPScratchReg(inst, pos_regs);
PhysReg load_reg = NumberToPhysReg(scratch, vc);
new_insts.push_back(
MachineInstr(Opcode::LoadStack,
{Operand::Reg(load_reg), Operand::FrameIndex(slot)}));
// 将该 vreg 在此处映射到此 scratch 寄存器
pos_regs[v] = scratch;
// 替换指令中的该 vreg 操作数
for (auto &op : ops)
{
if (op.GetKind() == Operand::Kind::VReg && op.GetVRegId() == v)
{
const_cast<Operand &>(op) = Operand::Reg(load_reg);
}
}
}
// ---- 替换所有 VReg 操作数为 PhysReg ----
for (auto &op : ops)
{
if (op.GetKind() != Operand::Kind::VReg)
continue;
int v = op.GetVRegId();
VRegClass vc = func.GetVRegClass(v);
if (v < 0 || v >= static_cast<int>(vreg_ranges.size()))
{
// vreg 超出范围(临时 vreg用 scratch 替换
int fallback = (vc == VRegClass::Float)
? PickFPScratchReg(inst, pos_regs)
: PickGPScratchReg(inst, pos_regs);
const_cast<Operand &>(op) = Operand::Reg(NumberToPhysReg(fallback, vc));
continue;
}
// 找到当前位置对应的 reg
int reg_idx = -1;
for (const auto &rng : vreg_ranges[v])
{
if (rng.start <= global_pos && global_pos <= rng.end)
{
reg_idx = rng.reg_idx;
break;
}
}
if (reg_idx >= 0)
{
// 有寄存器:直接替换
const_cast<Operand &>(op) = Operand::Reg(AllocIdxToPhysReg(reg_idx, vc));
}
else
{
// 溢出或无范围覆盖:用 scratch 替换
auto slot_it = vreg_to_slot.find(v);
int scratch = (vc == VRegClass::Float)
? PickFPScratchReg(inst, pos_regs)
: PickGPScratchReg(inst, pos_regs);
const_cast<Operand &>(op) = Operand::Reg(NumberToPhysReg(scratch, vc));
if (slot_it == vreg_to_slot.end())
{
// 无 slot 也无寄存器,记录 scratch不 store因为没有 slot
}
else
{
pos_regs[v] = scratch;
}
}
}
// ---- 压入指令 ----
new_insts.push_back(std::move(const_cast<MachineInstr &>(inst)));
// ---- 处理溢出 def插入 StoreStack ----
if (def_vreg >= 0 && def_vreg < static_cast<int>(vreg_ranges.size()))
{
// 检查 def vreg 在此位置是否溢出
bool needs_store = false;
for (const auto &rng : vreg_ranges[def_vreg])
{
if (rng.start <= global_pos && global_pos <= rng.end)
{
if (rng.reg_idx == -1)
needs_store = true;
break;
}
}
if (needs_store)
{
auto slot_it = vreg_to_slot.find(def_vreg);
if (slot_it != vreg_to_slot.end())
{
// 从刚压入的指令中找到结果寄存器
const auto &last_inst = new_insts.back();
PhysReg result_reg = PhysReg::W0;
VRegClass vc = func.GetVRegClass(def_vreg);
for (const auto &op : last_inst.GetOperands())
{
if (op.GetKind() == Operand::Kind::Reg)
{
PhysReg r = op.GetReg();
bool is_gp = (r >= PhysReg::W0 && r <= PhysReg::W30) ||
(r >= PhysReg::X0 && r <= PhysReg::X30);
bool is_fp = (r >= PhysReg::S0 && r <= PhysReg::S31);
if ((vc == VRegClass::Float && is_fp) ||
(vc != VRegClass::Float && is_gp))
{
result_reg = r;
break;
}
}
}
new_insts.push_back(
MachineInstr(Opcode::StoreStack,
{Operand::Reg(result_reg), Operand::FrameIndex(slot_it->second)}));
}
}
}
++global_pos;
}
block->GetInstructions() = std::move(new_insts);
}
}
} // anonymous namespace
} // namespace mir
// ---- 公开 API -----------------------------------------------------------
namespace mir
{
#if 0
void RunLinearScanRegAlloc(MachineFunction &func)
{
if (func.GetNumVRegs() == 0)
return;
RunLinearScan(func);
}
#endif
#if 0
void RunLinearScanRegAlloc(MachineModule &module)
{
for (auto &function : module.GetFunctions())
{
if (function)
RunLinearScanRegAlloc(*function);
}
}
#endif
} // namespace mir
Loading…
Cancel
Save