|
|
|
|
@ -1,791 +0,0 @@
|
|
|
|
|
#include "mir/GreedyAlloc.h"
|
|
|
|
|
#include "mir/MIR.h"
|
|
|
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
|
#include <cmath>
|
|
|
|
|
#include <limits>
|
|
|
|
|
#include <queue>
|
|
|
|
|
#include <unordered_map>
|
|
|
|
|
#include <unordered_set>
|
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
|
|
namespace mir
|
|
|
|
|
{
|
|
|
|
|
namespace
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
// ---- 寄存器可分配集 ----
|
|
|
|
|
// GP: 排除 x0-x7(参数传递), x13-x14(lowering 临时使用), x18(平台寄存器), x29-x30(FP/LR)
|
|
|
|
|
// x16-x17 同时作为 spill fallback,但在 spill 路径中通过 phys<0 映射
|
|
|
|
|
constexpr int GP_ALLOCATABLE[] = {8,9,10,11,12,15,16,17,19,20,21,22,23,24,25,26,27,28};
|
|
|
|
|
constexpr int GP_COUNT = 18;
|
|
|
|
|
// S0-S1 是参数/返回值寄存器,不可分配;S2-S9 + S16-S31 可分配
|
|
|
|
|
constexpr int FP_ALLOCATABLE[] = {2,3,4,5,6,7,8,9,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
|
|
|
|
|
constexpr int FP_COUNT = 24;
|
|
|
|
|
constexpr int MAX_ROUNDS = 3; // LLVM: 通常 1-2 轮即可收敛
|
|
|
|
|
|
|
|
|
|
bool IsCallerSavedGP(int phys_reg) { return phys_reg <= 17; }
|
|
|
|
|
|
|
|
|
|
const int* GetRegList(RegClass rc, int& count)
|
|
|
|
|
{
|
|
|
|
|
if (rc == RegClass::GPR32 || rc == RegClass::GPR64)
|
|
|
|
|
{ count = GP_COUNT; return GP_ALLOCATABLE; }
|
|
|
|
|
else
|
|
|
|
|
{ count = FP_COUNT; return FP_ALLOCATABLE; }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---- 启发式 spill 权重(LLVM 简化版:Normalise(Σ use_freq) / Length)----
|
|
|
|
|
// LLVM 使用完整的 block frequency 分析;我们使用循环深度作为近似。
|
|
|
|
|
// 堆排序:高 cascade(已被驱逐过的)永远排在低 cascade 之后;
|
|
|
|
|
// 同等 cascade 按 spill_weight 降序(堆顶权重最大,优先分配)。
|
|
|
|
|
|
|
|
|
|
// heap 存储 vreg 索引
|
|
|
|
|
// Stage 0 (new): 短活范围优先——弦图完美消除序近似
|
|
|
|
|
// Stage 1+ (deferred/evicted): spill_weight 降序
|
|
|
|
|
struct SpillWeightCmp
|
|
|
|
|
{
|
|
|
|
|
const std::vector<LiveInterval>& intervals;
|
|
|
|
|
explicit SpillWeightCmp(const std::vector<LiveInterval>& ivs) : intervals(ivs) {}
|
|
|
|
|
bool operator()(int a, int b) const
|
|
|
|
|
{
|
|
|
|
|
const auto& la = intervals[a];
|
|
|
|
|
const auto& lb = intervals[b];
|
|
|
|
|
if (la.generation != lb.generation)
|
|
|
|
|
return la.generation > lb.generation;
|
|
|
|
|
if (la.deferred_count == 0 && lb.deferred_count == 0)
|
|
|
|
|
return la.Length() > lb.Length();
|
|
|
|
|
return la.spill_weight < lb.spill_weight;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// ---- def/use 提取 ----
|
|
|
|
|
static bool HasVRegDef(Opcode opcode)
|
|
|
|
|
{
|
|
|
|
|
switch (opcode)
|
|
|
|
|
{
|
|
|
|
|
case Opcode::MovImm: case Opcode::LoadStack: case Opcode::LoadGlobal:
|
|
|
|
|
case Opcode::LoadGlobalAddr: case Opcode::LoadStackAddr: case Opcode::LoadMem:
|
|
|
|
|
case Opcode::AddRR: case Opcode::SubRR: case Opcode::AddImm:
|
|
|
|
|
case Opcode::SubImm: case Opcode::MulRR: case Opcode::DivRR:
|
|
|
|
|
case Opcode::ModRR: case Opcode::AndRR: case Opcode::OrRR:
|
|
|
|
|
case Opcode::XorRR: case Opcode::ShlRR: case Opcode::ShrRR:
|
|
|
|
|
case Opcode::AsrRR: case Opcode::Asr64RR: case Opcode::Uxtw:
|
|
|
|
|
case Opcode::Sxtw: case Opcode::CSet: case Opcode::Csel:
|
|
|
|
|
case Opcode::Smull: case Opcode::Msub: case Opcode::NegRR:
|
|
|
|
|
case Opcode::FAddRR: case Opcode::FSubRR: case Opcode::FMulRR:
|
|
|
|
|
case Opcode::FDivRR: case Opcode::Scvtf: case Opcode::FCvtzs:
|
|
|
|
|
case Opcode::FMovWS: case Opcode::MovReg:
|
|
|
|
|
return true;
|
|
|
|
|
default: return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void ExtractDefUse(const MachineInstr &inst, int &def_vreg,
|
|
|
|
|
std::vector<int> &use_vregs)
|
|
|
|
|
{
|
|
|
|
|
def_vreg = -1;
|
|
|
|
|
use_vregs.clear();
|
|
|
|
|
const auto &ops = inst.GetOperands();
|
|
|
|
|
const auto opcode = inst.GetOpcode();
|
|
|
|
|
if (HasVRegDef(opcode) && !ops.empty() &&
|
|
|
|
|
ops[0].GetKind() == Operand::Kind::VReg)
|
|
|
|
|
def_vreg = ops[0].GetVRegId();
|
|
|
|
|
for (size_t i = 0; i < ops.size(); ++i)
|
|
|
|
|
{
|
|
|
|
|
if (HasVRegDef(opcode) && i == 0) continue;
|
|
|
|
|
if (ops[i].GetKind() == Operand::Kind::VReg)
|
|
|
|
|
use_vregs.push_back(ops[i].GetVRegId());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---- 循环深度分析 ----
|
|
|
|
|
std::vector<int> AnalyzeLoopDepth(MachineFunction &func)
|
|
|
|
|
{
|
|
|
|
|
auto &blocks = func.GetBlocks();
|
|
|
|
|
int n = (int)blocks.size();
|
|
|
|
|
std::vector<int> depth(n, 0);
|
|
|
|
|
std::unordered_map<int, int> label_to_idx;
|
|
|
|
|
for (int i = 0; i < n; ++i)
|
|
|
|
|
if (blocks[i]) label_to_idx[blocks[i]->GetLabelId()] = i;
|
|
|
|
|
|
|
|
|
|
struct Edge { int src; int dst; };
|
|
|
|
|
std::vector<Edge> back_edges;
|
|
|
|
|
for (int i = 0; i < n; ++i)
|
|
|
|
|
{
|
|
|
|
|
if (!blocks[i]) continue;
|
|
|
|
|
for (auto &inst : blocks[i]->GetInstructions())
|
|
|
|
|
{
|
|
|
|
|
int target_label = -1;
|
|
|
|
|
auto opcode = inst.GetOpcode();
|
|
|
|
|
if (opcode == Opcode::Br && !inst.GetOperands().empty() &&
|
|
|
|
|
inst.GetOperands()[0].GetKind() == Operand::Kind::Label)
|
|
|
|
|
target_label = inst.GetOperands()[0].GetLabel();
|
|
|
|
|
else if (opcode == Opcode::CondBr && inst.GetOperands().size() >= 2 &&
|
|
|
|
|
inst.GetOperands()[1].GetKind() == Operand::Kind::Label)
|
|
|
|
|
target_label = inst.GetOperands()[1].GetLabel();
|
|
|
|
|
if (target_label < 0) continue;
|
|
|
|
|
auto it = label_to_idx.find(target_label);
|
|
|
|
|
if (it != label_to_idx.end() && (int)it->second <= i)
|
|
|
|
|
back_edges.push_back({i, (int)it->second});
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (auto &be : back_edges)
|
|
|
|
|
{
|
|
|
|
|
int header = be.dst;
|
|
|
|
|
std::unordered_set<int> body;
|
|
|
|
|
std::queue<int> q;
|
|
|
|
|
q.push(be.src);
|
|
|
|
|
while (!q.empty())
|
|
|
|
|
{
|
|
|
|
|
int cur = q.front(); q.pop();
|
|
|
|
|
if (cur == header || body.count(cur)) continue;
|
|
|
|
|
body.insert(cur);
|
|
|
|
|
if (cur > 0 && !body.count(cur - 1)) q.push(cur - 1);
|
|
|
|
|
for (int p = 0; p < n; ++p)
|
|
|
|
|
{
|
|
|
|
|
if (!blocks[p]) continue;
|
|
|
|
|
for (auto &inst : blocks[p]->GetInstructions())
|
|
|
|
|
{
|
|
|
|
|
int tgt = -1;
|
|
|
|
|
if (inst.GetOpcode() == Opcode::Br && !inst.GetOperands().empty() &&
|
|
|
|
|
inst.GetOperands()[0].GetKind() == Operand::Kind::Label)
|
|
|
|
|
tgt = inst.GetOperands()[0].GetLabel();
|
|
|
|
|
else if (inst.GetOpcode() == Opcode::CondBr &&
|
|
|
|
|
inst.GetOperands().size() >= 2 &&
|
|
|
|
|
inst.GetOperands()[1].GetKind() == Operand::Kind::Label)
|
|
|
|
|
tgt = inst.GetOperands()[1].GetLabel();
|
|
|
|
|
auto it2 = label_to_idx.find(tgt);
|
|
|
|
|
if (it2 != label_to_idx.end() && (int)it2->second == cur && !body.count(p))
|
|
|
|
|
q.push(p);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
body.insert(header);
|
|
|
|
|
int max_existing = 0;
|
|
|
|
|
for (int b : body) max_existing = std::max(max_existing, depth[b]);
|
|
|
|
|
for (int b : body) depth[b] = std::max(depth[b], max_existing + 1);
|
|
|
|
|
}
|
|
|
|
|
return depth;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---- Spill Weight ----
|
|
|
|
|
void ComputeSpillWeights(std::vector<LiveInterval> &intervals,
|
|
|
|
|
const std::vector<int> &block_depth,
|
|
|
|
|
const std::vector<int> &pos_to_block)
|
|
|
|
|
{
|
|
|
|
|
for (auto &li : intervals)
|
|
|
|
|
{
|
|
|
|
|
float w = 0.0f;
|
|
|
|
|
for (auto &use : li.uses)
|
|
|
|
|
{
|
|
|
|
|
int block = (use.pos >= 0 && use.pos < (int)pos_to_block.size())
|
|
|
|
|
? pos_to_block[use.pos] : 0;
|
|
|
|
|
int d = (block >= 0 && block < (int)block_depth.size())
|
|
|
|
|
? block_depth[block] : 0;
|
|
|
|
|
float mult = std::pow(10.0f, (float)d);
|
|
|
|
|
if (use.is_def) mult *= 0.5f;
|
|
|
|
|
w += mult;
|
|
|
|
|
}
|
|
|
|
|
li.spill_weight = w / li.Length();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---- Copy Hints ----
|
|
|
|
|
void PropagateCopyHints(std::vector<LiveInterval> &intervals,
|
|
|
|
|
MachineFunction &func)
|
|
|
|
|
{
|
|
|
|
|
for (auto &block : func.GetBlocks())
|
|
|
|
|
{
|
|
|
|
|
if (!block) continue;
|
|
|
|
|
for (auto &inst : block->GetInstructions())
|
|
|
|
|
{
|
|
|
|
|
if (inst.GetOpcode() != Opcode::MovReg) continue;
|
|
|
|
|
auto &ops = inst.GetOperands();
|
|
|
|
|
if (ops.size() < 2) continue;
|
|
|
|
|
if (ops[0].GetKind() != Operand::Kind::VReg) continue;
|
|
|
|
|
if (ops[1].GetKind() != Operand::Kind::VReg) continue;
|
|
|
|
|
int dst = ops[0].GetVRegId();
|
|
|
|
|
int src = ops[1].GetVRegId();
|
|
|
|
|
if (dst < 0 || dst >= (int)intervals.size()) continue;
|
|
|
|
|
if (src < 0 || src >= (int)intervals.size()) continue;
|
|
|
|
|
if (intervals[src].IsAllocated())
|
|
|
|
|
intervals[dst].hint_reg = intervals[src].assigned_reg;
|
|
|
|
|
else if (intervals[dst].IsAllocated())
|
|
|
|
|
intervals[src].hint_reg = intervals[dst].assigned_reg;
|
|
|
|
|
else if (intervals[src].hint_reg >= 0)
|
|
|
|
|
intervals[dst].hint_reg = intervals[src].hint_reg;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---- TryAssign ----
|
|
|
|
|
bool TryAssign(LiveInterval &li, LiveRegMatrix &m, int hint)
|
|
|
|
|
{
|
|
|
|
|
if (hint < 0) return false;
|
|
|
|
|
if (IsCallerSavedGP(hint) && li.SegmentCrossesCall()) return false;
|
|
|
|
|
if (!m.CheckInterference(li, hint) && m.Assign(&li, hint))
|
|
|
|
|
{
|
|
|
|
|
li.assigned_reg = hint;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---- TryAnyFreeReg ----
|
|
|
|
|
bool TryAnyFreeReg(LiveInterval &li, LiveRegMatrix &m)
|
|
|
|
|
{
|
|
|
|
|
int n = 0;
|
|
|
|
|
const int *regs = GetRegList(li.reg_class, n);
|
|
|
|
|
for (int i = 0; i < n; ++i)
|
|
|
|
|
{
|
|
|
|
|
int r = regs[i];
|
|
|
|
|
if (IsCallerSavedGP(r) && li.SegmentCrossesCall()) continue;
|
|
|
|
|
if (!m.CheckInterference(li, r) && m.Assign(&li, r))
|
|
|
|
|
{
|
|
|
|
|
li.assigned_reg = r;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---- TryEvict(LLVM cascade 驱逐策略)----
|
|
|
|
|
// 只能驱逐 generation 严格更低的冲突 vreg。
|
|
|
|
|
// 驱逐后将 victim 设为相同的 cascade,防止 A→B→A 循环。
|
|
|
|
|
bool TryEvict(LiveInterval &li, LiveRegMatrix &m,
|
|
|
|
|
std::vector<int> &heap,
|
|
|
|
|
const SpillWeightCmp &cmp)
|
|
|
|
|
{
|
|
|
|
|
int best_reg = -1;
|
|
|
|
|
float best_weight = 1e9f;
|
|
|
|
|
LiveInterval *victim = nullptr;
|
|
|
|
|
int n = 0;
|
|
|
|
|
const int *regs = GetRegList(li.reg_class, n);
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < n; ++i)
|
|
|
|
|
{
|
|
|
|
|
int r = regs[i];
|
|
|
|
|
if (IsCallerSavedGP(r) && li.SegmentCrossesCall()) continue;
|
|
|
|
|
auto *conflict = m.GetConflict(li, r);
|
|
|
|
|
if (!conflict && m.Assign(&li, r))
|
|
|
|
|
{
|
|
|
|
|
li.assigned_reg = r;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
// LLVM 关键收敛规则:只驱逐 generation 严格更低的 vreg
|
|
|
|
|
if (conflict->generation >= li.generation) continue;
|
|
|
|
|
if (conflict->spill_weight < best_weight)
|
|
|
|
|
{
|
|
|
|
|
best_weight = conflict->spill_weight;
|
|
|
|
|
best_reg = r;
|
|
|
|
|
victim = conflict;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (best_reg < 0 || !victim) return false;
|
|
|
|
|
|
|
|
|
|
m.Unassign(victim);
|
|
|
|
|
victim->assigned_reg = -1;
|
|
|
|
|
victim->generation = li.generation;
|
|
|
|
|
heap.push_back(victim->vreg);
|
|
|
|
|
std::push_heap(heap.begin(), heap.end(), cmp);
|
|
|
|
|
|
|
|
|
|
if (m.Assign(&li, best_reg))
|
|
|
|
|
{
|
|
|
|
|
li.assigned_reg = best_reg;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---- TrySplit:在最大使用间隙处分裂(LLVM local split 简化版)----
|
|
|
|
|
// 参考: llvm/lib/CodeGen/RegAllocGreedy.cpp tryLocalSplit()
|
|
|
|
|
bool TrySplit(int vreg_idx, LiveRegMatrix &m,
|
|
|
|
|
std::vector<int> &heap,
|
|
|
|
|
std::vector<LiveInterval> &intervals,
|
|
|
|
|
const std::vector<int> &pos_to_block,
|
|
|
|
|
std::vector<int> &spilled,
|
|
|
|
|
MachineFunction &func,
|
|
|
|
|
const SpillWeightCmp &cmp)
|
|
|
|
|
{
|
|
|
|
|
auto &li = intervals[vreg_idx];
|
|
|
|
|
if (li.uses.size() < 3) return false;
|
|
|
|
|
|
|
|
|
|
std::vector<int> sorted_uses;
|
|
|
|
|
for (auto &u : li.uses) sorted_uses.push_back(u.pos);
|
|
|
|
|
std::sort(sorted_uses.begin(), sorted_uses.end());
|
|
|
|
|
|
|
|
|
|
int best_gap = 0, split_after = -1;
|
|
|
|
|
for (size_t i = 1; i < sorted_uses.size(); ++i) {
|
|
|
|
|
int gap = sorted_uses[i] - sorted_uses[i - 1];
|
|
|
|
|
if (gap > best_gap && gap >= 2) { best_gap = gap; split_after = sorted_uses[i - 1]; }
|
|
|
|
|
}
|
|
|
|
|
if (split_after < 0) return false;
|
|
|
|
|
|
|
|
|
|
int hot_start = li.FirstUsePos(), hot_end = split_after;
|
|
|
|
|
int cold_start = split_after + 1, cold_end = li.LastUsePos();
|
|
|
|
|
if (hot_end < hot_start || cold_end < cold_start) return false;
|
|
|
|
|
|
|
|
|
|
LiveInterval cold;
|
|
|
|
|
cold.reg_class = li.reg_class; cold.generation = li.generation + 1;
|
|
|
|
|
cold.hint_reg = -1; cold.assigned_reg = -1;
|
|
|
|
|
cold.vreg = func.CreateVReg(li.vreg_class);
|
|
|
|
|
|
|
|
|
|
for (auto &seg : li.segments) {
|
|
|
|
|
if (seg.end < cold_start || seg.start > cold_end) continue;
|
|
|
|
|
Segment clipped = seg;
|
|
|
|
|
clipped.start = std::max(seg.start, cold_start);
|
|
|
|
|
clipped.end = std::min(seg.end, cold_end);
|
|
|
|
|
cold.segments.push_back(clipped);
|
|
|
|
|
}
|
|
|
|
|
for (auto &use : li.uses)
|
|
|
|
|
if (cold_start <= use.pos && use.pos <= cold_end) cold.uses.push_back(use);
|
|
|
|
|
if (cold.uses.empty()) return false;
|
|
|
|
|
|
|
|
|
|
float w = 0.0f;
|
|
|
|
|
for (auto &use : cold.uses) {
|
|
|
|
|
int blk = (use.pos >= 0 && use.pos < (int)pos_to_block.size()) ? pos_to_block[use.pos] : 0;
|
|
|
|
|
float mult = use.is_def ? 0.5f : 1.0f;
|
|
|
|
|
w += mult;
|
|
|
|
|
}
|
|
|
|
|
cold.spill_weight = w / cold.Length();
|
|
|
|
|
int cold_vreg = cold.vreg;
|
|
|
|
|
intervals.push_back(std::move(cold));
|
|
|
|
|
|
|
|
|
|
auto &li_safe = intervals[vreg_idx];
|
|
|
|
|
std::vector<Segment> hot_segs;
|
|
|
|
|
for (auto &seg : li_safe.segments) {
|
|
|
|
|
if (seg.end < hot_start || seg.start > hot_end) continue;
|
|
|
|
|
Segment clipped = seg;
|
|
|
|
|
clipped.start = std::max(seg.start, hot_start);
|
|
|
|
|
clipped.end = std::min(seg.end, hot_end);
|
|
|
|
|
if (clipped.start <= clipped.end) hot_segs.push_back(clipped);
|
|
|
|
|
}
|
|
|
|
|
li_safe.segments = std::move(hot_segs);
|
|
|
|
|
li_safe.uses.erase(std::remove_if(li_safe.uses.begin(), li_safe.uses.end(),
|
|
|
|
|
[&](const UsePosition &u) { return u.pos < hot_start || u.pos > hot_end; }), li_safe.uses.end());
|
|
|
|
|
|
|
|
|
|
if (!TryAnyFreeReg(li_safe, m)) { li_safe.assigned_reg = -2; spilled.push_back(vreg_idx); }
|
|
|
|
|
auto &cold_ref = intervals[cold_vreg];
|
|
|
|
|
if (!TryAnyFreeReg(cold_ref, m)) { heap.push_back(cold_vreg); std::push_heap(heap.begin(), heap.end(), cmp); }
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
// ---- 主分配函数:对一类寄存器执行贪婪分配 ----
|
|
|
|
|
// 返回 spilled 数量
|
|
|
|
|
int AllocateRegClass(std::vector<LiveInterval> &intervals,
|
|
|
|
|
RegClass rc,
|
|
|
|
|
LiveRegMatrix &matrix,
|
|
|
|
|
const std::vector<int> &pos_to_block,
|
|
|
|
|
MachineFunction &func,
|
|
|
|
|
std::vector<int> &spilled)
|
|
|
|
|
{
|
|
|
|
|
SpillWeightCmp cmp(intervals);
|
|
|
|
|
std::vector<int> heap;
|
|
|
|
|
|
|
|
|
|
for (auto &li : intervals)
|
|
|
|
|
{
|
|
|
|
|
if (li.vreg < 0) continue;
|
|
|
|
|
if (li.reg_class == rc && !li.IsAllocated() && !li.IsSpilled())
|
|
|
|
|
heap.push_back(li.vreg);
|
|
|
|
|
}
|
|
|
|
|
std::make_heap(heap.begin(), heap.end(), cmp);
|
|
|
|
|
|
|
|
|
|
int iter_limit = std::max(1000, (int)heap.size() * 3);
|
|
|
|
|
int iterations = 0;
|
|
|
|
|
|
|
|
|
|
while (!heap.empty())
|
|
|
|
|
{
|
|
|
|
|
if (++iterations > iter_limit)
|
|
|
|
|
{
|
|
|
|
|
// 安全网:剩余未分配 vreg 标记为 spill,而非留下未分配状态
|
|
|
|
|
for (int vreg : heap)
|
|
|
|
|
{
|
|
|
|
|
if (intervals[vreg].IsAllocated() || intervals[vreg].IsSpilled()) continue;
|
|
|
|
|
intervals[vreg].assigned_reg = -2;
|
|
|
|
|
spilled.push_back(vreg);
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::pop_heap(heap.begin(), heap.end(), cmp);
|
|
|
|
|
int vreg = heap.back();
|
|
|
|
|
heap.pop_back();
|
|
|
|
|
|
|
|
|
|
auto &li = intervals[vreg];
|
|
|
|
|
if (li.IsAllocated() || li.IsSpilled()) continue;
|
|
|
|
|
|
|
|
|
|
if (TryAssign(li, matrix, li.hint_reg)) continue;
|
|
|
|
|
if (TryAnyFreeReg(li, matrix)) continue;
|
|
|
|
|
if (rc == RegClass::GPR32 || rc == RegClass::GPR64)
|
|
|
|
|
{
|
|
|
|
|
if (TryEvict(li, matrix, heap, cmp)) continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// LLVM Defer 机制: 首次分配失败时推迟到下一轮,让更小的范围先分配
|
|
|
|
|
// 参考: llvm/lib/CodeGen/RegAllocGreedy.cpp selectOrSplit() RS_New→RS_Deferred
|
|
|
|
|
if (li.deferred_count == 0)
|
|
|
|
|
{
|
|
|
|
|
li.deferred_count = 1;
|
|
|
|
|
heap.push_back(vreg);
|
|
|
|
|
std::push_heap(heap.begin(), heap.end(), cmp);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (TrySplit(vreg, matrix, heap, intervals,
|
|
|
|
|
pos_to_block, spilled, func, cmp)) continue;
|
|
|
|
|
|
|
|
|
|
li.assigned_reg = -2;
|
|
|
|
|
spilled.push_back(vreg);
|
|
|
|
|
}
|
|
|
|
|
return (int)spilled.size();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} // anonymous namespace
|
|
|
|
|
|
|
|
|
|
// ---- LiveRegMatrix 方法 ----
|
|
|
|
|
|
|
|
|
|
void LiveRegMatrix::Init(int num_regs)
|
|
|
|
|
{ reg_assignments_.assign(num_regs, {}); }
|
|
|
|
|
|
|
|
|
|
void LiveRegMatrix::ForceAssign(LiveInterval *li, int phys_reg)
|
|
|
|
|
{
|
|
|
|
|
if (phys_reg >= 0 && phys_reg < (int)reg_assignments_.size())
|
|
|
|
|
reg_assignments_[phys_reg].push_back(li);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool LiveRegMatrix::Assign(LiveInterval *li, int phys_reg)
|
|
|
|
|
{
|
|
|
|
|
if (phys_reg < 0 || phys_reg >= (int)reg_assignments_.size()) return false;
|
|
|
|
|
reg_assignments_[phys_reg].push_back(li);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void LiveRegMatrix::Unassign(LiveInterval *li)
|
|
|
|
|
{
|
|
|
|
|
for (auto &vec : reg_assignments_)
|
|
|
|
|
{
|
|
|
|
|
auto it = std::find(vec.begin(), vec.end(), li);
|
|
|
|
|
if (it != vec.end()) { vec.erase(it); return; }
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool LiveRegMatrix::CheckInterference(const LiveInterval &li, int phys_reg) const
|
|
|
|
|
{
|
|
|
|
|
if (phys_reg < 0 || phys_reg >= (int)reg_assignments_.size()) return true;
|
|
|
|
|
for (auto *other : reg_assignments_[phys_reg])
|
|
|
|
|
{
|
|
|
|
|
if (other->vreg == li.vreg) continue;
|
|
|
|
|
// Wn/Xn 别名:GPR32/GPR64 共享同一物理寄存器,总是冲突
|
|
|
|
|
// LLVM 用 Register Unit 来处理:Wn 和 Xn 占据相同的 unit
|
|
|
|
|
// 参考: llvm/lib/CodeGen/LiveRegMatrix.cpp foreachUnit()
|
|
|
|
|
bool gpr32_64_alias =
|
|
|
|
|
(li.reg_class == RegClass::GPR32 && other->reg_class == RegClass::GPR64) ||
|
|
|
|
|
(li.reg_class == RegClass::GPR64 && other->reg_class == RegClass::GPR32);
|
|
|
|
|
if (gpr32_64_alias)
|
|
|
|
|
return true;
|
|
|
|
|
for (auto &sa : li.segments)
|
|
|
|
|
for (auto &sb : other->segments)
|
|
|
|
|
if (sa.Overlaps(sb)) return true;
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
LiveInterval *LiveRegMatrix::GetConflict(const LiveInterval &li,
|
|
|
|
|
int phys_reg) const
|
|
|
|
|
{
|
|
|
|
|
if (phys_reg < 0 || phys_reg >= (int)reg_assignments_.size()) return nullptr;
|
|
|
|
|
for (auto *other : reg_assignments_[phys_reg])
|
|
|
|
|
{
|
|
|
|
|
if (other->vreg == li.vreg) continue;
|
|
|
|
|
bool gpr32_64_alias =
|
|
|
|
|
(li.reg_class == RegClass::GPR32 && other->reg_class == RegClass::GPR64) ||
|
|
|
|
|
(li.reg_class == RegClass::GPR64 && other->reg_class == RegClass::GPR32);
|
|
|
|
|
if (gpr32_64_alias)
|
|
|
|
|
return other;
|
|
|
|
|
for (auto &sa : li.segments)
|
|
|
|
|
for (auto &sb : other->segments)
|
|
|
|
|
if (sa.Overlaps(sb)) return other;
|
|
|
|
|
}
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool LiveRegMatrix::CheckInterferenceRange(int start, int end,
|
|
|
|
|
int phys_reg) const
|
|
|
|
|
{
|
|
|
|
|
if (phys_reg < 0 || phys_reg >= (int)reg_assignments_.size()) return true;
|
|
|
|
|
Segment range; range.start = start; range.end = end;
|
|
|
|
|
for (auto *other : reg_assignments_[phys_reg])
|
|
|
|
|
for (auto &sb : other->segments)
|
|
|
|
|
if (range.Overlaps(sb)) return true;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---- 对外入口 ----
|
|
|
|
|
void RunGreedyRegAlloc(MachineFunction &function);
|
|
|
|
|
void RunGreedyRegAlloc(MachineModule &module);
|
|
|
|
|
|
|
|
|
|
static void AllocateRegistersForFunction(MachineFunction &function)
|
|
|
|
|
{
|
|
|
|
|
if (function.GetNumVRegs() == 0) return;
|
|
|
|
|
|
|
|
|
|
// ---- 阶段 0:活跃分析 + 预处理 ----
|
|
|
|
|
auto raw = ComputeInstLiveness(function);
|
|
|
|
|
auto intervals = EnhanceIntervals(raw, function);
|
|
|
|
|
intervals.reserve(function.GetNumVRegs() * 16);
|
|
|
|
|
|
|
|
|
|
auto &blocks = function.GetBlocks();
|
|
|
|
|
std::vector<int> pos_to_block;
|
|
|
|
|
std::vector<int> block_start_pos(blocks.size(), -1);
|
|
|
|
|
int global = 0;
|
|
|
|
|
for (int bi = 0; bi < (int)blocks.size(); ++bi)
|
|
|
|
|
{
|
|
|
|
|
if (!blocks[bi]) continue;
|
|
|
|
|
block_start_pos[bi] = global;
|
|
|
|
|
int cnt = (int)blocks[bi]->GetInstructions().size();
|
|
|
|
|
for (int j = 0; j < cnt; ++j) pos_to_block.push_back(bi);
|
|
|
|
|
global += cnt;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto block_depth = AnalyzeLoopDepth(function);
|
|
|
|
|
ComputeSpillWeights(intervals, block_depth, pos_to_block);
|
|
|
|
|
PropagateCopyHints(intervals, function);
|
|
|
|
|
intervals.reserve(function.GetNumVRegs() * 16);
|
|
|
|
|
|
|
|
|
|
// LLVM 风格:全局 cascade 计数器
|
|
|
|
|
int global_cascade = 1;
|
|
|
|
|
|
|
|
|
|
// ---- 阶段 1:分配循环 ----
|
|
|
|
|
for (int round = 0; round < MAX_ROUNDS; ++round)
|
|
|
|
|
{
|
|
|
|
|
// GP 分配(GPR32 + GPR64 共享同一 LiveRegMatrix)
|
|
|
|
|
LiveRegMatrix gp_matrix;
|
|
|
|
|
gp_matrix.Init(32);
|
|
|
|
|
std::vector<int> gp_spilled;
|
|
|
|
|
|
|
|
|
|
// 预填充上一轮已分配的 vreg
|
|
|
|
|
for (auto &li : intervals)
|
|
|
|
|
{
|
|
|
|
|
if (li.vreg >= 0 && li.IsAllocated() &&
|
|
|
|
|
(li.reg_class == RegClass::GPR32 || li.reg_class == RegClass::GPR64))
|
|
|
|
|
gp_matrix.ForceAssign(&li, li.assigned_reg);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
AllocateRegClass(intervals, RegClass::GPR32, gp_matrix,
|
|
|
|
|
pos_to_block, function, gp_spilled);
|
|
|
|
|
AllocateRegClass(intervals, RegClass::GPR64, gp_matrix,
|
|
|
|
|
pos_to_block, function, gp_spilled);
|
|
|
|
|
|
|
|
|
|
// FP 分配
|
|
|
|
|
LiveRegMatrix fp_matrix;
|
|
|
|
|
fp_matrix.Init(32);
|
|
|
|
|
std::vector<int> fp_spilled;
|
|
|
|
|
|
|
|
|
|
for (auto &li : intervals)
|
|
|
|
|
{
|
|
|
|
|
if (li.vreg >= 0 && li.IsAllocated() && li.reg_class == RegClass::FPR32)
|
|
|
|
|
fp_matrix.ForceAssign(&li, li.assigned_reg);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
AllocateRegClass(intervals, RegClass::FPR32, fp_matrix,
|
|
|
|
|
pos_to_block, function, fp_spilled);
|
|
|
|
|
|
|
|
|
|
auto spilled = gp_spilled;
|
|
|
|
|
spilled.insert(spilled.end(), fp_spilled.begin(), fp_spilled.end());
|
|
|
|
|
|
|
|
|
|
if (spilled.empty()) break;
|
|
|
|
|
|
|
|
|
|
// ---- 溢出重写(LLVM-style spill rewrite)----
|
|
|
|
|
// LLVM 关键设计:每次 reload 创建新 vreg,让分配器在下一轮分配不同物理寄存器,
|
|
|
|
|
// 避免多个溢出 vreg 共享同一回退寄存器导致互相覆盖。
|
|
|
|
|
// 参考: llvm/lib/CodeGen/InlineSpiller.cpp spill()/reload()
|
|
|
|
|
for (int spilled_vreg : spilled)
|
|
|
|
|
{
|
|
|
|
|
auto &li = intervals[spilled_vreg];
|
|
|
|
|
if (li.spill_slot < 0)
|
|
|
|
|
{
|
|
|
|
|
int size = 4;
|
|
|
|
|
if (li.vreg_class == VRegClass::Ptr) size = 8;
|
|
|
|
|
li.spill_slot = function.CreateFrameIndex(size);
|
|
|
|
|
}
|
|
|
|
|
for (int u = (int)li.uses.size() - 1; u >= 0; --u)
|
|
|
|
|
{
|
|
|
|
|
auto &use = li.uses[u];
|
|
|
|
|
int blk = pos_to_block[use.pos];
|
|
|
|
|
int local = use.pos - block_start_pos[blk];
|
|
|
|
|
if (use.is_def)
|
|
|
|
|
{
|
|
|
|
|
// def: 在定义后插入 StoreStack,保存值到栈
|
|
|
|
|
blocks[blk]->InsertInst(local + 1,
|
|
|
|
|
MachineInstr(Opcode::StoreStack,
|
|
|
|
|
{Operand::VReg(li.vreg, li.vreg_class),
|
|
|
|
|
Operand::FrameIndex(li.spill_slot)}));
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
// use: 创建新 vreg,LoadStack 加载到新 vreg,替换使用点
|
|
|
|
|
int new_vreg = function.CreateVReg(li.vreg_class);
|
|
|
|
|
blocks[blk]->InsertInst(local,
|
|
|
|
|
MachineInstr(Opcode::LoadStack,
|
|
|
|
|
{Operand::VReg(new_vreg, li.vreg_class),
|
|
|
|
|
Operand::FrameIndex(li.spill_slot)}));
|
|
|
|
|
// 在插入点之后搜索使用溢出 vreg 的指令并替换
|
|
|
|
|
auto &instructions = blocks[blk]->GetInstructions();
|
|
|
|
|
for (int idx = local + 1; idx < (int)instructions.size(); ++idx)
|
|
|
|
|
{
|
|
|
|
|
bool found = false;
|
|
|
|
|
for (auto &op : instructions[idx].GetOperands())
|
|
|
|
|
{
|
|
|
|
|
if (op.GetKind() == Operand::Kind::VReg &&
|
|
|
|
|
op.GetVRegId() == li.vreg)
|
|
|
|
|
{
|
|
|
|
|
op = Operand::VReg(new_vreg, li.vreg_class);
|
|
|
|
|
found = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (found) break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---- 保存已分配状态 ----
|
|
|
|
|
std::unordered_map<int, int> prev_assigned;
|
|
|
|
|
for (auto &li : intervals)
|
|
|
|
|
{
|
|
|
|
|
if (li.vreg >= 0 && li.IsAllocated())
|
|
|
|
|
prev_assigned[li.vreg] = li.assigned_reg;
|
|
|
|
|
else if (li.vreg >= 0 && li.IsSpilled())
|
|
|
|
|
prev_assigned[li.vreg] = -2; // 保持 spill 状态
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---- 重新分析活跃 ----
|
|
|
|
|
raw = ComputeInstLiveness(function);
|
|
|
|
|
intervals = EnhanceIntervals(raw, function);
|
|
|
|
|
intervals.reserve(function.GetNumVRegs() * 16);
|
|
|
|
|
|
|
|
|
|
// ---- 重建位置映射 ----
|
|
|
|
|
pos_to_block.clear();
|
|
|
|
|
block_start_pos.assign(blocks.size(), -1);
|
|
|
|
|
int new_global = 0;
|
|
|
|
|
for (int bi = 0; bi < (int)blocks.size(); ++bi)
|
|
|
|
|
{
|
|
|
|
|
if (!blocks[bi]) continue;
|
|
|
|
|
block_start_pos[bi] = new_global;
|
|
|
|
|
int cnt = (int)blocks[bi]->GetInstructions().size();
|
|
|
|
|
for (int j = 0; j < cnt; ++j) pos_to_block.push_back(bi);
|
|
|
|
|
new_global += cnt;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---- 恢复已分配状态 + 递增 cascade ----
|
|
|
|
|
int num_new = 0;
|
|
|
|
|
for (auto &li : intervals)
|
|
|
|
|
{
|
|
|
|
|
auto it = prev_assigned.find(li.vreg);
|
|
|
|
|
if (it != prev_assigned.end())
|
|
|
|
|
{
|
|
|
|
|
li.assigned_reg = it->second;
|
|
|
|
|
// 已分配的保持 cascade
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
// 新 vreg(由 spill 引入的 LoadStack vreg)
|
|
|
|
|
li.assigned_reg = -1;
|
|
|
|
|
li.generation = 0;
|
|
|
|
|
num_new++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (num_new > 0)
|
|
|
|
|
{
|
|
|
|
|
// 只对新 vreg 重新计算 spill weight
|
|
|
|
|
ComputeSpillWeights(intervals, block_depth, pos_to_block);
|
|
|
|
|
}
|
|
|
|
|
PropagateCopyHints(intervals, function);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---- 最终:vreg → PhysReg 重写 ----
|
|
|
|
|
for (auto &block : blocks)
|
|
|
|
|
{
|
|
|
|
|
if (!block) continue;
|
|
|
|
|
for (auto &inst : block->GetInstructions())
|
|
|
|
|
{
|
|
|
|
|
for (auto &op : inst.GetOperands())
|
|
|
|
|
{
|
|
|
|
|
if (op.GetKind() != Operand::Kind::VReg) continue;
|
|
|
|
|
int vreg = op.GetVRegId();
|
|
|
|
|
int phys = -1;
|
|
|
|
|
if (vreg >= 0 && vreg < (int)intervals.size())
|
|
|
|
|
phys = intervals[vreg].assigned_reg;
|
|
|
|
|
if (phys < 0)
|
|
|
|
|
{
|
|
|
|
|
auto vc = function.GetVRegClass(vreg);
|
|
|
|
|
if (vc == VRegClass::Ptr) phys = 47; // X16
|
|
|
|
|
else if (vc == VRegClass::Float) phys = 78; // S16
|
|
|
|
|
else phys = 16; // W16
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
if (vreg < function.GetNumVRegs())
|
|
|
|
|
{
|
|
|
|
|
auto vc = function.GetVRegClass(vreg);
|
|
|
|
|
if (vc == VRegClass::Ptr)
|
|
|
|
|
phys = phys + 31; // Wn → Xn (PhysReg 31-61)
|
|
|
|
|
else if (vc == VRegClass::Float)
|
|
|
|
|
phys = phys + 62; // → Sn (PhysReg 62-93)
|
|
|
|
|
// VRegClass::Int 保持原值 → Wn (PhysReg 0-30)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
op = Operand::Reg(static_cast<PhysReg>(phys));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---- 收集使用的 callee-saved 寄存器(LLVM PEI 风格:扫描最终 PhysReg)----
|
|
|
|
|
{
|
|
|
|
|
int x19 = static_cast<int>(PhysReg::X19);
|
|
|
|
|
int x28 = static_cast<int>(PhysReg::X28);
|
|
|
|
|
int w19 = static_cast<int>(PhysReg::W19);
|
|
|
|
|
int w28 = static_cast<int>(PhysReg::W28);
|
|
|
|
|
int s16 = static_cast<int>(PhysReg::S16);
|
|
|
|
|
int s31 = static_cast<int>(PhysReg::S31);
|
|
|
|
|
|
|
|
|
|
bool used_x[11] = {};
|
|
|
|
|
bool used_s[16] = {};
|
|
|
|
|
for (auto &block : blocks)
|
|
|
|
|
{
|
|
|
|
|
if (!block) continue;
|
|
|
|
|
for (auto &inst : block->GetInstructions())
|
|
|
|
|
{
|
|
|
|
|
for (auto &op : inst.GetOperands())
|
|
|
|
|
{
|
|
|
|
|
if (op.GetKind() != Operand::Kind::Reg) continue;
|
|
|
|
|
int r = static_cast<int>(op.GetReg());
|
|
|
|
|
if (r >= w19 && r <= w28)
|
|
|
|
|
used_x[r - w19] = true;
|
|
|
|
|
else if (r >= x19 && r <= x28)
|
|
|
|
|
used_x[r - x19] = true;
|
|
|
|
|
else if (r >= s16 && r <= s31)
|
|
|
|
|
used_s[r - s16] = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
for (int i = 0; i < 11; ++i)
|
|
|
|
|
if (used_x[i])
|
|
|
|
|
function.AddCalleeSavedReg(static_cast<PhysReg>(x19 + i));
|
|
|
|
|
for (int i = 0; i < 16; ++i)
|
|
|
|
|
if (used_s[i])
|
|
|
|
|
function.AddCalleeSavedReg(static_cast<PhysReg>(s16 + i));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void RunGreedyRegAlloc(MachineFunction &function)
|
|
|
|
|
{ AllocateRegistersForFunction(function); }
|
|
|
|
|
|
|
|
|
|
void RunGreedyRegAlloc(MachineModule &module)
|
|
|
|
|
{
|
|
|
|
|
for (auto &func : module.GetFunctions())
|
|
|
|
|
if (func) RunGreedyRegAlloc(*func);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} // namespace mir
|