|
|
|
|
@ -3,6 +3,7 @@
|
|
|
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
|
#include <cmath>
|
|
|
|
|
#include <limits>
|
|
|
|
|
#include <queue>
|
|
|
|
|
#include <unordered_map>
|
|
|
|
|
#include <unordered_set>
|
|
|
|
|
@ -14,11 +15,14 @@ namespace
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
// ---- 寄存器可分配集 ----
|
|
|
|
|
// GP: 排除 x0-x7(参数传递), x13-x14(lowering 临时使用), x18(平台寄存器), x29-x30(FP/LR)
|
|
|
|
|
// x16-x17 同时作为 spill fallback,但在 spill 路径中通过 phys<0 映射
|
|
|
|
|
constexpr int GP_ALLOCATABLE[] = {8,9,10,11,12,15,16,17,19,20,21,22,23,24,25,26,27,28};
|
|
|
|
|
constexpr int GP_COUNT = 18;
|
|
|
|
|
constexpr int FP_ALLOCATABLE[] = {0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
|
|
|
|
|
// S0-S1 是参数/返回值寄存器,不可分配;S2-S9 + S16-S31 可分配
|
|
|
|
|
constexpr int FP_ALLOCATABLE[] = {2,3,4,5,6,7,8,9,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
|
|
|
|
|
constexpr int FP_COUNT = 24;
|
|
|
|
|
constexpr int MAX_ROUNDS = 5;
|
|
|
|
|
constexpr int MAX_ROUNDS = 3; // LLVM: 通常 1-2 轮即可收敛
|
|
|
|
|
|
|
|
|
|
bool IsCallerSavedGP(int phys_reg) { return phys_reg <= 17; }
|
|
|
|
|
|
|
|
|
|
@ -30,13 +34,27 @@ const int* GetRegList(RegClass rc, int& count)
|
|
|
|
|
{ count = FP_COUNT; return FP_ALLOCATABLE; }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---- 启发式 spill 权重(LLVM 简化版:Normalise(Σ use_freq) / Length)----
|
|
|
|
|
// LLVM 使用完整的 block frequency 分析;我们使用循环深度作为近似。
|
|
|
|
|
// 堆排序:高 cascade(已被驱逐过的)永远排在低 cascade 之后;
|
|
|
|
|
// 同等 cascade 按 spill_weight 降序(堆顶权重最大,优先分配)。
|
|
|
|
|
|
|
|
|
|
// heap 存储 vreg 索引,避免 TrySplit 中 intervals.push_back 导致指针失效
|
|
|
|
|
struct SpillWeightCmp
|
|
|
|
|
{
|
|
|
|
|
bool operator()(LiveInterval* a, LiveInterval* b) const
|
|
|
|
|
{ return a->spill_weight < b->spill_weight; }
|
|
|
|
|
const std::vector<LiveInterval>& intervals;
|
|
|
|
|
explicit SpillWeightCmp(const std::vector<LiveInterval>& ivs) : intervals(ivs) {}
|
|
|
|
|
bool operator()(int a, int b) const
|
|
|
|
|
{
|
|
|
|
|
const auto& la = intervals[a];
|
|
|
|
|
const auto& lb = intervals[b];
|
|
|
|
|
if (la.generation != lb.generation)
|
|
|
|
|
return la.generation > lb.generation;
|
|
|
|
|
return la.spill_weight < lb.spill_weight;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// ---- def/use 提取(与 InstLiveness.cpp 保持一致)----
|
|
|
|
|
// ---- def/use 提取 ----
|
|
|
|
|
static bool HasVRegDef(Opcode opcode)
|
|
|
|
|
{
|
|
|
|
|
switch (opcode)
|
|
|
|
|
@ -93,8 +111,8 @@ std::vector<int> AnalyzeLoopDepth(MachineFunction &func)
|
|
|
|
|
if (!blocks[i]) continue;
|
|
|
|
|
for (auto &inst : blocks[i]->GetInstructions())
|
|
|
|
|
{
|
|
|
|
|
auto opcode = inst.GetOpcode();
|
|
|
|
|
int target_label = -1;
|
|
|
|
|
auto opcode = inst.GetOpcode();
|
|
|
|
|
if (opcode == Opcode::Br && !inst.GetOperands().empty() &&
|
|
|
|
|
inst.GetOperands()[0].GetKind() == Operand::Kind::Label)
|
|
|
|
|
target_label = inst.GetOperands()[0].GetLabel();
|
|
|
|
|
@ -197,7 +215,7 @@ void PropagateCopyHints(std::vector<LiveInterval> &intervals,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---- TryAssign / TryAnyFreeReg ----
|
|
|
|
|
// ---- TryAssign ----
|
|
|
|
|
bool TryAssign(LiveInterval &li, LiveRegMatrix &m, int hint)
|
|
|
|
|
{
|
|
|
|
|
if (hint < 0) return false;
|
|
|
|
|
@ -211,6 +229,7 @@ bool TryAssign(LiveInterval &li, LiveRegMatrix &m, int hint)
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---- TryAnyFreeReg ----
|
|
|
|
|
bool TryAnyFreeReg(LiveInterval &li, LiveRegMatrix &m)
|
|
|
|
|
{
|
|
|
|
|
int n = 0;
|
|
|
|
|
@ -229,16 +248,19 @@ bool TryAnyFreeReg(LiveInterval &li, LiveRegMatrix &m)
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---- TryEvict ----
|
|
|
|
|
// ---- TryEvict(LLVM cascade 驱逐策略)----
|
|
|
|
|
// 只能驱逐 generation 严格更低的冲突 vreg。
|
|
|
|
|
// 驱逐后将 victim 设为相同的 cascade,防止 A→B→A 循环。
|
|
|
|
|
bool TryEvict(LiveInterval &li, LiveRegMatrix &m,
|
|
|
|
|
std::vector<LiveInterval *> &heap,
|
|
|
|
|
std::vector<int> &heap,
|
|
|
|
|
const SpillWeightCmp &cmp)
|
|
|
|
|
{
|
|
|
|
|
int best_reg = -1;
|
|
|
|
|
float min_weight = 1e9f;
|
|
|
|
|
float best_weight = 1e9f;
|
|
|
|
|
LiveInterval *victim = nullptr;
|
|
|
|
|
int n = 0;
|
|
|
|
|
const int *regs = GetRegList(li.reg_class, n);
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < n; ++i)
|
|
|
|
|
{
|
|
|
|
|
int r = regs[i];
|
|
|
|
|
@ -250,98 +272,78 @@ bool TryEvict(LiveInterval &li, LiveRegMatrix &m,
|
|
|
|
|
li.assigned_reg = r;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
if (conflict->spill_weight < min_weight)
|
|
|
|
|
// LLVM 关键收敛规则:只驱逐 generation 严格更低的 vreg
|
|
|
|
|
if (conflict->generation >= li.generation) continue;
|
|
|
|
|
if (conflict->spill_weight < best_weight)
|
|
|
|
|
{
|
|
|
|
|
min_weight = conflict->spill_weight;
|
|
|
|
|
best_weight = conflict->spill_weight;
|
|
|
|
|
best_reg = r;
|
|
|
|
|
victim = conflict;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (best_reg < 0 || !victim) return false;
|
|
|
|
|
|
|
|
|
|
m.Unassign(victim);
|
|
|
|
|
victim->assigned_reg = -1;
|
|
|
|
|
victim->generation++;
|
|
|
|
|
heap.push_back(victim);
|
|
|
|
|
victim->generation = li.generation;
|
|
|
|
|
heap.push_back(victim->vreg);
|
|
|
|
|
std::push_heap(heap.begin(), heap.end(), cmp);
|
|
|
|
|
|
|
|
|
|
m.Assign(&li, best_reg);
|
|
|
|
|
li.assigned_reg = best_reg;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---- CreateChild ----
|
|
|
|
|
bool CreateChild(const LiveInterval &parent, int start_pos, int end_pos,
|
|
|
|
|
LiveInterval &child)
|
|
|
|
|
{
|
|
|
|
|
child = LiveInterval();
|
|
|
|
|
child.reg_class = parent.reg_class;
|
|
|
|
|
child.generation = parent.generation + 1;
|
|
|
|
|
child.hint_reg = -1;
|
|
|
|
|
child.assigned_reg = -1;
|
|
|
|
|
child.valnos = parent.valnos;
|
|
|
|
|
for (auto &seg : parent.segments)
|
|
|
|
|
{
|
|
|
|
|
if (seg.end < start_pos || seg.start > end_pos) continue;
|
|
|
|
|
Segment clipped = seg;
|
|
|
|
|
clipped.start = std::max(seg.start, start_pos);
|
|
|
|
|
clipped.end = std::min(seg.end, end_pos);
|
|
|
|
|
child.segments.push_back(clipped);
|
|
|
|
|
}
|
|
|
|
|
for (auto &use : parent.uses)
|
|
|
|
|
if (start_pos <= use.pos && use.pos <= end_pos)
|
|
|
|
|
child.uses.push_back(use);
|
|
|
|
|
return !child.uses.empty();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---- FindBestSplitPos ----
|
|
|
|
|
int FindBestSplitPos(const LiveInterval &li, LiveRegMatrix &m)
|
|
|
|
|
{
|
|
|
|
|
for (int i = (int)li.uses.size() - 2; i >= 0; --i)
|
|
|
|
|
{
|
|
|
|
|
int end_pos = li.uses[i].pos;
|
|
|
|
|
int hot_start = li.FirstUsePos();
|
|
|
|
|
int n = 0;
|
|
|
|
|
const int *regs = GetRegList(li.reg_class, n);
|
|
|
|
|
for (int r_idx = 0; r_idx < n; ++r_idx)
|
|
|
|
|
{
|
|
|
|
|
int r = regs[r_idx];
|
|
|
|
|
if (IsCallerSavedGP(r) && li.SegmentCrossesCall()) continue;
|
|
|
|
|
if (!m.CheckInterferenceRange(hot_start, end_pos, r))
|
|
|
|
|
return end_pos;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---- TrySplit ----
|
|
|
|
|
// ---- TrySplit(简化版——只用于最复杂的 vreg)----
|
|
|
|
|
bool TrySplit(LiveInterval &li, LiveRegMatrix &m,
|
|
|
|
|
std::vector<LiveInterval *> &heap,
|
|
|
|
|
std::vector<int> &heap,
|
|
|
|
|
std::vector<LiveInterval> &intervals,
|
|
|
|
|
const std::vector<int> &block_depth,
|
|
|
|
|
const std::vector<int> &pos_to_block,
|
|
|
|
|
std::vector<LiveInterval *> &spilled,
|
|
|
|
|
std::vector<int> &spilled,
|
|
|
|
|
MachineFunction &func,
|
|
|
|
|
const SpillWeightCmp &cmp)
|
|
|
|
|
{
|
|
|
|
|
int split_pos = FindBestSplitPos(li, m);
|
|
|
|
|
if (split_pos < 0) return false;
|
|
|
|
|
if (li.uses.size() < 3) return false;
|
|
|
|
|
|
|
|
|
|
// 在中间位置分裂:hot 段尝试分配,cold 段入堆
|
|
|
|
|
int mid = (int)li.uses.size() / 2;
|
|
|
|
|
int split_pos = li.uses[mid].pos;
|
|
|
|
|
int hot_start = li.FirstUsePos();
|
|
|
|
|
int hot_end = split_pos;
|
|
|
|
|
int cold_start = split_pos + 1;
|
|
|
|
|
int cold_end = li.LastUsePos();
|
|
|
|
|
|
|
|
|
|
LiveInterval hot;
|
|
|
|
|
if (!CreateChild(li, li.FirstUsePos(), split_pos, hot))
|
|
|
|
|
return false;
|
|
|
|
|
hot.vreg = li.vreg;
|
|
|
|
|
if (hot_end < hot_start || cold_end < cold_start) return false;
|
|
|
|
|
|
|
|
|
|
// 构建 cold 子区间
|
|
|
|
|
LiveInterval cold;
|
|
|
|
|
CreateChild(li, split_pos + 1, li.LastUsePos(), cold);
|
|
|
|
|
cold.vreg = func.CreateVReg(li.vreg_class);
|
|
|
|
|
cold.reg_class = li.reg_class;
|
|
|
|
|
cold.generation = li.generation + 1;
|
|
|
|
|
cold.hint_reg = -1;
|
|
|
|
|
cold.assigned_reg = -1;
|
|
|
|
|
cold.vreg = func.CreateVReg(li.vreg_class);
|
|
|
|
|
|
|
|
|
|
for (auto &seg : li.segments)
|
|
|
|
|
{
|
|
|
|
|
if (seg.end < cold_start || seg.start > cold_end) continue;
|
|
|
|
|
Segment clipped = seg;
|
|
|
|
|
clipped.start = std::max(seg.start, cold_start);
|
|
|
|
|
clipped.end = std::min(seg.end, cold_end);
|
|
|
|
|
cold.segments.push_back(clipped);
|
|
|
|
|
}
|
|
|
|
|
for (auto &use : li.uses)
|
|
|
|
|
if (cold_start <= use.pos && use.pos <= cold_end)
|
|
|
|
|
cold.uses.push_back(use);
|
|
|
|
|
|
|
|
|
|
if (cold.uses.empty()) return false;
|
|
|
|
|
|
|
|
|
|
float w = 0.0f;
|
|
|
|
|
for (auto &use : cold.uses)
|
|
|
|
|
{
|
|
|
|
|
int blk = (use.pos >= 0 && use.pos < (int)pos_to_block.size())
|
|
|
|
|
? pos_to_block[use.pos] : 0;
|
|
|
|
|
int d = (blk >= 0 && blk < (int)block_depth.size())
|
|
|
|
|
? block_depth[blk] : 0;
|
|
|
|
|
float mult = std::pow(10.0f, (float)d);
|
|
|
|
|
float mult = 1.0f;
|
|
|
|
|
if (use.is_def) mult *= 0.5f;
|
|
|
|
|
w += mult;
|
|
|
|
|
}
|
|
|
|
|
@ -350,29 +352,102 @@ bool TrySplit(LiveInterval &li, LiveRegMatrix &m,
|
|
|
|
|
intervals.push_back(std::move(cold));
|
|
|
|
|
LiveInterval &cold_ref = intervals.back();
|
|
|
|
|
|
|
|
|
|
if (TryAnyFreeReg(hot, m))
|
|
|
|
|
// 修剪 li 为 hot 段
|
|
|
|
|
li.segments.clear();
|
|
|
|
|
for (auto &seg : intervals[li.vreg].segments)
|
|
|
|
|
{
|
|
|
|
|
li.assigned_reg = hot.assigned_reg;
|
|
|
|
|
li.segments = std::move(hot.segments);
|
|
|
|
|
li.uses = std::move(hot.uses);
|
|
|
|
|
if (seg.end < hot_start || seg.start > hot_end) continue;
|
|
|
|
|
Segment clipped = seg;
|
|
|
|
|
clipped.start = std::max(seg.start, hot_start);
|
|
|
|
|
clipped.end = std::min(seg.end, hot_end);
|
|
|
|
|
if (clipped.start <= clipped.end)
|
|
|
|
|
li.segments.push_back(clipped);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
li.uses.erase(
|
|
|
|
|
std::remove_if(li.uses.begin(), li.uses.end(),
|
|
|
|
|
[&](const UsePosition &u) {
|
|
|
|
|
return u.pos < hot_start || u.pos > hot_end;
|
|
|
|
|
}),
|
|
|
|
|
li.uses.end());
|
|
|
|
|
|
|
|
|
|
// 尝试给 hot 分配
|
|
|
|
|
if (!TryAnyFreeReg(li, m))
|
|
|
|
|
{
|
|
|
|
|
li.assigned_reg = -2;
|
|
|
|
|
spilled.push_back(&li);
|
|
|
|
|
spilled.push_back(li.vreg);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// cold 入堆
|
|
|
|
|
if (!TryAnyFreeReg(cold_ref, m))
|
|
|
|
|
{
|
|
|
|
|
heap.push_back(&cold_ref);
|
|
|
|
|
heap.push_back(cold_ref.vreg);
|
|
|
|
|
std::push_heap(heap.begin(), heap.end(), cmp);
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---- 主分配函数:对一类寄存器执行贪婪分配 ----
|
|
|
|
|
// 返回 spilled 数量
|
|
|
|
|
int AllocateRegClass(std::vector<LiveInterval> &intervals,
|
|
|
|
|
RegClass rc,
|
|
|
|
|
LiveRegMatrix &matrix,
|
|
|
|
|
const std::vector<int> &pos_to_block,
|
|
|
|
|
MachineFunction &func,
|
|
|
|
|
std::vector<int> &spilled)
|
|
|
|
|
{
|
|
|
|
|
SpillWeightCmp cmp(intervals);
|
|
|
|
|
std::vector<int> heap;
|
|
|
|
|
|
|
|
|
|
for (auto &li : intervals)
|
|
|
|
|
{
|
|
|
|
|
if (li.vreg < 0) continue;
|
|
|
|
|
if (li.reg_class == rc && !li.IsAllocated() && !li.IsSpilled())
|
|
|
|
|
heap.push_back(li.vreg);
|
|
|
|
|
}
|
|
|
|
|
std::make_heap(heap.begin(), heap.end(), cmp);
|
|
|
|
|
|
|
|
|
|
int iter_limit = std::max(1000, (int)heap.size() * 3);
|
|
|
|
|
int iterations = 0;
|
|
|
|
|
|
|
|
|
|
while (!heap.empty())
|
|
|
|
|
{
|
|
|
|
|
if (++iterations > iter_limit)
|
|
|
|
|
{
|
|
|
|
|
// 安全网:剩余未分配 vreg 标记为 spill,而非留下未分配状态
|
|
|
|
|
for (int vreg : heap)
|
|
|
|
|
{
|
|
|
|
|
if (intervals[vreg].IsAllocated() || intervals[vreg].IsSpilled()) continue;
|
|
|
|
|
intervals[vreg].assigned_reg = -2;
|
|
|
|
|
spilled.push_back(vreg);
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::pop_heap(heap.begin(), heap.end(), cmp);
|
|
|
|
|
int vreg = heap.back();
|
|
|
|
|
heap.pop_back();
|
|
|
|
|
|
|
|
|
|
auto &li = intervals[vreg];
|
|
|
|
|
if (li.IsAllocated() || li.IsSpilled()) continue;
|
|
|
|
|
|
|
|
|
|
if (TryAssign(li, matrix, li.hint_reg)) continue;
|
|
|
|
|
if (TryAnyFreeReg(li, matrix)) continue;
|
|
|
|
|
if (rc == RegClass::GPR32 || rc == RegClass::GPR64)
|
|
|
|
|
{
|
|
|
|
|
if (TryEvict(li, matrix, heap, cmp)) continue;
|
|
|
|
|
}
|
|
|
|
|
if (TrySplit(li, matrix, heap, intervals,
|
|
|
|
|
pos_to_block, spilled, func, cmp)) continue;
|
|
|
|
|
|
|
|
|
|
li.assigned_reg = -2;
|
|
|
|
|
spilled.push_back(vreg);
|
|
|
|
|
}
|
|
|
|
|
return (int)spilled.size();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} // anonymous namespace
|
|
|
|
|
|
|
|
|
|
// ---- LiveRegMatrix 方法(namespace mir 内,不在匿名命名空间中)----
|
|
|
|
|
// ---- LiveRegMatrix 方法 ----
|
|
|
|
|
|
|
|
|
|
void LiveRegMatrix::Init(int num_regs)
|
|
|
|
|
{ reg_assignments_.assign(num_regs, {}); }
|
|
|
|
|
@ -398,6 +473,14 @@ bool LiveRegMatrix::CheckInterference(const LiveInterval &li, int phys_reg) cons
|
|
|
|
|
for (auto *other : reg_assignments_[phys_reg])
|
|
|
|
|
{
|
|
|
|
|
if (other->vreg == li.vreg) continue;
|
|
|
|
|
// Wn/Xn 别名:GPR32/GPR64 共享同一物理寄存器,总是冲突
|
|
|
|
|
// LLVM 用 Register Unit 来处理:Wn 和 Xn 占据相同的 unit
|
|
|
|
|
// 参考: llvm/lib/CodeGen/LiveRegMatrix.cpp foreachUnit()
|
|
|
|
|
bool gpr32_64_alias =
|
|
|
|
|
(li.reg_class == RegClass::GPR32 && other->reg_class == RegClass::GPR64) ||
|
|
|
|
|
(li.reg_class == RegClass::GPR64 && other->reg_class == RegClass::GPR32);
|
|
|
|
|
if (gpr32_64_alias && !li.segments.empty() && !other->segments.empty())
|
|
|
|
|
return true;
|
|
|
|
|
for (auto &sa : li.segments)
|
|
|
|
|
for (auto &sb : other->segments)
|
|
|
|
|
if (sa.Overlaps(sb)) return true;
|
|
|
|
|
@ -412,6 +495,11 @@ LiveInterval *LiveRegMatrix::GetConflict(const LiveInterval &li,
|
|
|
|
|
for (auto *other : reg_assignments_[phys_reg])
|
|
|
|
|
{
|
|
|
|
|
if (other->vreg == li.vreg) continue;
|
|
|
|
|
bool gpr32_64_alias =
|
|
|
|
|
(li.reg_class == RegClass::GPR32 && other->reg_class == RegClass::GPR64) ||
|
|
|
|
|
(li.reg_class == RegClass::GPR64 && other->reg_class == RegClass::GPR32);
|
|
|
|
|
if (gpr32_64_alias && !li.segments.empty() && !other->segments.empty())
|
|
|
|
|
return other;
|
|
|
|
|
for (auto &sa : li.segments)
|
|
|
|
|
for (auto &sb : other->segments)
|
|
|
|
|
if (sa.Overlaps(sb)) return other;
|
|
|
|
|
@ -460,104 +548,154 @@ static void AllocateRegistersForFunction(MachineFunction &function)
|
|
|
|
|
PropagateCopyHints(intervals, function);
|
|
|
|
|
intervals.reserve(function.GetNumVRegs() * 4);
|
|
|
|
|
|
|
|
|
|
SpillWeightCmp cmp;
|
|
|
|
|
std::vector<LiveInterval *> spilled;
|
|
|
|
|
// LLVM 风格:全局 cascade 计数器
|
|
|
|
|
int global_cascade = 1;
|
|
|
|
|
|
|
|
|
|
// ---- 阶段 1:分配循环 ----
|
|
|
|
|
for (int round = 0; round < MAX_ROUNDS; ++round)
|
|
|
|
|
{
|
|
|
|
|
spilled.clear();
|
|
|
|
|
// GP 分配(GPR32 + GPR64 共享同一 LiveRegMatrix)
|
|
|
|
|
LiveRegMatrix gp_matrix;
|
|
|
|
|
gp_matrix.Init(32);
|
|
|
|
|
std::vector<int> gp_spilled;
|
|
|
|
|
|
|
|
|
|
for (auto rc : {RegClass::GPR32, RegClass::FPR32})
|
|
|
|
|
// 预填充上一轮已分配的 vreg
|
|
|
|
|
for (auto &li : intervals)
|
|
|
|
|
{
|
|
|
|
|
// 构建堆:所有有效且未 split 的 vreg
|
|
|
|
|
std::vector<LiveInterval *> heap;
|
|
|
|
|
for (auto &li : intervals)
|
|
|
|
|
{
|
|
|
|
|
if (li.vreg < 0) continue;
|
|
|
|
|
if (li.reg_class == rc && !li.IsSplit())
|
|
|
|
|
heap.push_back(&li);
|
|
|
|
|
}
|
|
|
|
|
// 新轮次:重置所有 vreg 的分配状态
|
|
|
|
|
for (auto *p : heap) p->assigned_reg = -1;
|
|
|
|
|
if (li.vreg >= 0 && li.IsAllocated() &&
|
|
|
|
|
(li.reg_class == RegClass::GPR32 || li.reg_class == RegClass::GPR64))
|
|
|
|
|
gp_matrix.Assign(&li, li.assigned_reg);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::make_heap(heap.begin(), heap.end(), cmp);
|
|
|
|
|
AllocateRegClass(intervals, RegClass::GPR32, gp_matrix,
|
|
|
|
|
pos_to_block, function, gp_spilled);
|
|
|
|
|
AllocateRegClass(intervals, RegClass::GPR64, gp_matrix,
|
|
|
|
|
pos_to_block, function, gp_spilled);
|
|
|
|
|
|
|
|
|
|
LiveRegMatrix matrix;
|
|
|
|
|
matrix.Init(32);
|
|
|
|
|
// FP 分配
|
|
|
|
|
LiveRegMatrix fp_matrix;
|
|
|
|
|
fp_matrix.Init(32);
|
|
|
|
|
std::vector<int> fp_spilled;
|
|
|
|
|
|
|
|
|
|
while (!heap.empty())
|
|
|
|
|
{
|
|
|
|
|
std::pop_heap(heap.begin(), heap.end(), cmp);
|
|
|
|
|
LiveInterval *li = heap.back();
|
|
|
|
|
heap.pop_back();
|
|
|
|
|
|
|
|
|
|
if (li->IsAllocated() || li->IsSplit()) continue;
|
|
|
|
|
|
|
|
|
|
// 尝试分配(按优先级)
|
|
|
|
|
if (TryAssign(*li, matrix, li->hint_reg)) continue;
|
|
|
|
|
if (TryAnyFreeReg(*li, matrix)) continue;
|
|
|
|
|
if (rc == RegClass::GPR32 && TryEvict(*li, matrix, heap, cmp)) continue;
|
|
|
|
|
if (TrySplit(*li, matrix, heap, intervals,
|
|
|
|
|
block_depth, pos_to_block, spilled, function, cmp)) continue;
|
|
|
|
|
li->assigned_reg = -2;
|
|
|
|
|
spilled.push_back(li);
|
|
|
|
|
}
|
|
|
|
|
for (auto &li : intervals)
|
|
|
|
|
{
|
|
|
|
|
if (li.vreg >= 0 && li.IsAllocated() && li.reg_class == RegClass::FPR32)
|
|
|
|
|
fp_matrix.Assign(&li, li.assigned_reg);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
AllocateRegClass(intervals, RegClass::FPR32, fp_matrix,
|
|
|
|
|
pos_to_block, function, fp_spilled);
|
|
|
|
|
|
|
|
|
|
auto spilled = gp_spilled;
|
|
|
|
|
spilled.insert(spilled.end(), fp_spilled.begin(), fp_spilled.end());
|
|
|
|
|
|
|
|
|
|
if (spilled.empty()) break;
|
|
|
|
|
|
|
|
|
|
// ---- 溢出重写 ----
|
|
|
|
|
for (auto *li : spilled)
|
|
|
|
|
// ---- 溢出重写(LLVM-style spill rewrite)----
|
|
|
|
|
// LLVM 关键设计:每次 reload 创建新 vreg,让分配器在下一轮分配不同物理寄存器,
|
|
|
|
|
// 避免多个溢出 vreg 共享同一回退寄存器导致互相覆盖。
|
|
|
|
|
// 参考: llvm/lib/CodeGen/InlineSpiller.cpp spill()/reload()
|
|
|
|
|
for (int spilled_vreg : spilled)
|
|
|
|
|
{
|
|
|
|
|
if (li->spill_slot < 0) li->spill_slot = li->vreg;
|
|
|
|
|
// 反向遍历 uses
|
|
|
|
|
for (int u = (int)li->uses.size() - 1; u >= 0; --u)
|
|
|
|
|
auto &li = intervals[spilled_vreg];
|
|
|
|
|
if (li.spill_slot < 0)
|
|
|
|
|
{
|
|
|
|
|
auto &use = li->uses[u];
|
|
|
|
|
int size = 4;
|
|
|
|
|
if (li.vreg_class == VRegClass::Ptr) size = 8;
|
|
|
|
|
li.spill_slot = function.CreateFrameIndex(size);
|
|
|
|
|
}
|
|
|
|
|
for (int u = (int)li.uses.size() - 1; u >= 0; --u)
|
|
|
|
|
{
|
|
|
|
|
auto &use = li.uses[u];
|
|
|
|
|
int blk = pos_to_block[use.pos];
|
|
|
|
|
int local = use.pos - block_start_pos[blk];
|
|
|
|
|
if (use.is_def)
|
|
|
|
|
{
|
|
|
|
|
// 定义点后插入 StoreStack
|
|
|
|
|
// def: 在定义后插入 StoreStack,保存值到栈
|
|
|
|
|
blocks[blk]->InsertInst(local + 1,
|
|
|
|
|
MachineInstr(Opcode::StoreStack,
|
|
|
|
|
{Operand::VReg(li->vreg, li->vreg_class),
|
|
|
|
|
Operand::FrameIndex(li->spill_slot)}));
|
|
|
|
|
{Operand::VReg(li.vreg, li.vreg_class),
|
|
|
|
|
Operand::FrameIndex(li.spill_slot)}));
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
// 使用点前插入 LoadStack
|
|
|
|
|
int new_vreg = function.CreateVReg(li->vreg_class);
|
|
|
|
|
// use: 创建新 vreg,LoadStack 加载到新 vreg,替换使用点
|
|
|
|
|
int new_vreg = function.CreateVReg(li.vreg_class);
|
|
|
|
|
blocks[blk]->InsertInst(local,
|
|
|
|
|
MachineInstr(Opcode::LoadStack,
|
|
|
|
|
{Operand::VReg(new_vreg, li->vreg_class),
|
|
|
|
|
Operand::FrameIndex(li->spill_slot)}));
|
|
|
|
|
blocks[blk]->ReplaceVReg(local + 1, li->vreg, new_vreg);
|
|
|
|
|
{Operand::VReg(new_vreg, li.vreg_class),
|
|
|
|
|
Operand::FrameIndex(li.spill_slot)}));
|
|
|
|
|
// 在插入点之后搜索使用溢出 vreg 的指令并替换
|
|
|
|
|
auto &instructions = blocks[blk]->GetInstructions();
|
|
|
|
|
for (int idx = local + 1; idx < (int)instructions.size(); ++idx)
|
|
|
|
|
{
|
|
|
|
|
bool found = false;
|
|
|
|
|
for (auto &op : instructions[idx].GetOperands())
|
|
|
|
|
{
|
|
|
|
|
if (op.GetKind() == Operand::Kind::VReg &&
|
|
|
|
|
op.GetVRegId() == li.vreg)
|
|
|
|
|
{
|
|
|
|
|
op = Operand::VReg(new_vreg, li.vreg_class);
|
|
|
|
|
found = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (found) break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---- 重新分析(每轮全新分配,不保留 prev_assigned)----
|
|
|
|
|
// ---- 保存已分配状态 ----
|
|
|
|
|
std::unordered_map<int, int> prev_assigned;
|
|
|
|
|
for (auto &li : intervals)
|
|
|
|
|
{
|
|
|
|
|
if (li.vreg >= 0 && li.IsAllocated())
|
|
|
|
|
prev_assigned[li.vreg] = li.assigned_reg;
|
|
|
|
|
else if (li.vreg >= 0 && li.IsSpilled())
|
|
|
|
|
prev_assigned[li.vreg] = -2; // 保持 spill 状态
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---- 重新分析活跃 ----
|
|
|
|
|
raw = ComputeInstLiveness(function);
|
|
|
|
|
intervals = EnhanceIntervals(raw, function);
|
|
|
|
|
if (function.GetNumVRegs() > (int)intervals.size())
|
|
|
|
|
intervals.resize(function.GetNumVRegs());
|
|
|
|
|
|
|
|
|
|
// 重建位置映射(指令数已变)
|
|
|
|
|
// ---- 重建位置映射 ----
|
|
|
|
|
pos_to_block.clear();
|
|
|
|
|
block_start_pos.assign(blocks.size(), -1);
|
|
|
|
|
global = 0;
|
|
|
|
|
int new_global = 0;
|
|
|
|
|
for (int bi = 0; bi < (int)blocks.size(); ++bi)
|
|
|
|
|
{
|
|
|
|
|
if (!blocks[bi]) continue;
|
|
|
|
|
block_start_pos[bi] = global;
|
|
|
|
|
block_start_pos[bi] = new_global;
|
|
|
|
|
int cnt = (int)blocks[bi]->GetInstructions().size();
|
|
|
|
|
for (int j = 0; j < cnt; ++j) pos_to_block.push_back(bi);
|
|
|
|
|
global += cnt;
|
|
|
|
|
new_global += cnt;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ComputeSpillWeights(intervals, block_depth, pos_to_block);
|
|
|
|
|
// ---- 恢复已分配状态 + 递增 cascade ----
|
|
|
|
|
int num_new = 0;
|
|
|
|
|
for (auto &li : intervals)
|
|
|
|
|
{
|
|
|
|
|
auto it = prev_assigned.find(li.vreg);
|
|
|
|
|
if (it != prev_assigned.end())
|
|
|
|
|
{
|
|
|
|
|
li.assigned_reg = it->second;
|
|
|
|
|
// 已分配的保持 cascade
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
// 新 vreg(由 spill 引入的 LoadStack vreg)
|
|
|
|
|
li.assigned_reg = -1;
|
|
|
|
|
li.generation = 0;
|
|
|
|
|
num_new++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (num_new > 0)
|
|
|
|
|
{
|
|
|
|
|
// 只对新 vreg 重新计算 spill weight
|
|
|
|
|
ComputeSpillWeights(intervals, block_depth, pos_to_block);
|
|
|
|
|
}
|
|
|
|
|
PropagateCopyHints(intervals, function);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@ -574,11 +712,66 @@ static void AllocateRegistersForFunction(MachineFunction &function)
|
|
|
|
|
int phys = -1;
|
|
|
|
|
if (vreg >= 0 && vreg < (int)intervals.size())
|
|
|
|
|
phys = intervals[vreg].assigned_reg;
|
|
|
|
|
if (phys < 0) phys = 48; // 兜底 X16(应对未分配 vreg)
|
|
|
|
|
if (phys < 0)
|
|
|
|
|
{
|
|
|
|
|
auto vc = function.GetVRegClass(vreg);
|
|
|
|
|
if (vc == VRegClass::Ptr) phys = 47; // X16
|
|
|
|
|
else if (vc == VRegClass::Float) phys = 78; // S16
|
|
|
|
|
else phys = 16; // W16
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
if (vreg < function.GetNumVRegs())
|
|
|
|
|
{
|
|
|
|
|
auto vc = function.GetVRegClass(vreg);
|
|
|
|
|
if (vc == VRegClass::Ptr)
|
|
|
|
|
phys = phys + 31; // Wn → Xn (PhysReg 31-61)
|
|
|
|
|
else if (vc == VRegClass::Float)
|
|
|
|
|
phys = phys + 62; // → Sn (PhysReg 62-93)
|
|
|
|
|
// VRegClass::Int 保持原值 → Wn (PhysReg 0-30)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
op = Operand::Reg(static_cast<PhysReg>(phys));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---- 收集使用的 callee-saved 寄存器(LLVM PEI 风格:扫描最终 PhysReg)----
|
|
|
|
|
{
|
|
|
|
|
int x19 = static_cast<int>(PhysReg::X19);
|
|
|
|
|
int x28 = static_cast<int>(PhysReg::X28);
|
|
|
|
|
int w19 = static_cast<int>(PhysReg::W19);
|
|
|
|
|
int w28 = static_cast<int>(PhysReg::W28);
|
|
|
|
|
int s16 = static_cast<int>(PhysReg::S16);
|
|
|
|
|
int s31 = static_cast<int>(PhysReg::S31);
|
|
|
|
|
|
|
|
|
|
bool used_x[11] = {};
|
|
|
|
|
bool used_s[16] = {};
|
|
|
|
|
for (auto &block : blocks)
|
|
|
|
|
{
|
|
|
|
|
if (!block) continue;
|
|
|
|
|
for (auto &inst : block->GetInstructions())
|
|
|
|
|
{
|
|
|
|
|
for (auto &op : inst.GetOperands())
|
|
|
|
|
{
|
|
|
|
|
if (op.GetKind() != Operand::Kind::Reg) continue;
|
|
|
|
|
int r = static_cast<int>(op.GetReg());
|
|
|
|
|
if (r >= w19 && r <= w28)
|
|
|
|
|
used_x[r - w19] = true;
|
|
|
|
|
else if (r >= x19 && r <= x28)
|
|
|
|
|
used_x[r - x19] = true;
|
|
|
|
|
else if (r >= s16 && r <= s31)
|
|
|
|
|
used_s[r - s16] = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
for (int i = 0; i < 11; ++i)
|
|
|
|
|
if (used_x[i])
|
|
|
|
|
function.AddCalleeSavedReg(static_cast<PhysReg>(x19 + i));
|
|
|
|
|
for (int i = 0; i < 16; ++i)
|
|
|
|
|
if (used_s[i])
|
|
|
|
|
function.AddCalleeSavedReg(static_cast<PhysReg>(s16 + i));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void RunGreedyRegAlloc(MachineFunction &function)
|
|
|
|
|
|