perf(mir): 帧槽按大小重排序——小槽优先获得 ldur 范围内偏移

4字节标量优先分配,大数组靠后,使热访问在 [-256,0] 的 ldur 范围。
crypto -176(-10.3%),消除大量 mov x13,x29 + sub 地址计算链。
lzk
lzkk 3 days ago
parent f65fe9fc20
commit c277aa2226

@ -39,15 +39,35 @@ namespace mir
}
callee_saved_bytes = AlignTo(callee_saved_bytes, 16);
// 将小槽位(频繁访问的标量)排在前面,使偏移在 ldur 范围 [-256,0] 内
std::vector<size_t> slot_order(slots.size());
for (size_t i = 0; i < slots.size(); ++i)
slot_order[i] = i;
std::stable_sort(slot_order.begin(), slot_order.end(),
[&](size_t a, size_t b) {
const auto &sa = slots[a];
const auto &sb = slots[b];
if (sa.is_stack_arg != sb.is_stack_arg)
return !sa.is_stack_arg;
if (sa.is_callee_stack_arg != sb.is_callee_stack_arg)
return !sa.is_callee_stack_arg;
return sa.size < sb.size;
});
std::vector<int> new_offsets(slots.size(), 0);
int offset = 0;
for (auto &slot : slots)
for (size_t si : slot_order)
{
auto &slot = slots[si];
if (slot.is_stack_arg || slot.is_callee_stack_arg)
continue;
offset -= slot.size;
offset = AlignTo(offset, slot.size >= 8 ? 8 : 4);
slot.offset = offset;
new_offsets[si] = offset;
}
for (size_t i = 0; i < slots.size(); ++i)
if (!slots[i].is_stack_arg && !slots[i].is_callee_stack_arg)
slots[i].offset = new_offsets[i];
offset -= callee_saved_bytes;
offset -= 16;

Loading…
Cancel
Save