forked from NUDT-compiler/nudt-compiler-cpp
parent
a14a9cde0d
commit
ad4591607f
@ -0,0 +1,623 @@
|
||||
#include "mir/MIR.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <optional>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
namespace mir {
|
||||
namespace {
|
||||
|
||||
bool IsControlTransfer(const MachineInstr& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case Opcode::B:
|
||||
case Opcode::Bcond:
|
||||
case Opcode::FBcond:
|
||||
case Opcode::Cbnz:
|
||||
case Opcode::Cbz:
|
||||
case Opcode::Ret:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<int> GetLoadSlot(const MachineInstr& inst) {
|
||||
const auto& ops = inst.GetOperands();
|
||||
if (inst.GetOpcode() != Opcode::LoadStack || ops.size() < 2 ||
|
||||
!ops[1].IsFrameIndex()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return ops[1].GetFrameIndex();
|
||||
}
|
||||
|
||||
std::optional<int> GetStoreSlot(const MachineInstr& inst) {
|
||||
const auto& ops = inst.GetOperands();
|
||||
if (inst.GetOpcode() != Opcode::StoreStack || ops.size() < 2 ||
|
||||
!ops[1].IsFrameIndex()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return ops[1].GetFrameIndex();
|
||||
}
|
||||
|
||||
bool IsOpaqueSlotUse(const MachineInstr& inst, int* slot) {
|
||||
const auto& ops = inst.GetOperands();
|
||||
switch (inst.GetOpcode()) {
|
||||
case Opcode::LoadStackOffset:
|
||||
case Opcode::StoreStackOffset:
|
||||
case Opcode::LoadStackAddr:
|
||||
if (ops.size() >= 2 && ops[1].IsFrameIndex()) {
|
||||
*slot = ops[1].GetFrameIndex();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool SameReg(PhysReg lhs, PhysReg rhs) {
|
||||
return lhs == rhs;
|
||||
}
|
||||
|
||||
bool IsPromotableWReg(PhysReg reg) {
|
||||
if (reg >= PhysReg::W0 && reg <= PhysReg::W11) return true;
|
||||
return reg == PhysReg::W19 || reg == PhysReg::W20 || reg == PhysReg::W21 ||
|
||||
reg == PhysReg::W22 || reg == PhysReg::W23 || reg == PhysReg::W24;
|
||||
}
|
||||
|
||||
bool IsPromotableXReg(PhysReg reg) {
|
||||
if (reg >= PhysReg::X0 && reg <= PhysReg::X11) return true;
|
||||
return reg == PhysReg::X19 || reg == PhysReg::X20 || reg == PhysReg::X21 ||
|
||||
reg == PhysReg::X22 || reg == PhysReg::X23 || reg == PhysReg::X24;
|
||||
}
|
||||
|
||||
bool IsPromotableSReg(PhysReg reg) {
|
||||
return reg >= PhysReg::S0 && reg <= PhysReg::S10;
|
||||
}
|
||||
|
||||
size_t FirstTerminatorIndex(const std::vector<MachineInstr>& insts) {
|
||||
for (size_t i = 0; i < insts.size(); ++i) {
|
||||
if (IsControlTransfer(insts[i])) return i;
|
||||
}
|
||||
return insts.size();
|
||||
}
|
||||
|
||||
void InsertBeforeTerminators(std::vector<MachineInstr>& insts,
|
||||
const std::vector<MachineInstr>& inserted) {
|
||||
const size_t pos = FirstTerminatorIndex(insts);
|
||||
insts.insert(insts.begin() + static_cast<long>(pos), inserted.begin(),
|
||||
inserted.end());
|
||||
}
|
||||
|
||||
struct SlotUseInfo {
|
||||
enum class RegKind { Unknown, W, X, S, Invalid };
|
||||
|
||||
int slot = -1;
|
||||
int loads = 0;
|
||||
int stores = 0;
|
||||
int body_loads = 0;
|
||||
int body_stores = 0;
|
||||
int after_call_uses = 0;
|
||||
RegKind reg_kind = RegKind::Unknown;
|
||||
std::unordered_set<size_t> use_blocks;
|
||||
};
|
||||
|
||||
struct SlotPick {
|
||||
int slot = -1;
|
||||
SlotUseInfo::RegKind reg_kind = SlotUseInfo::RegKind::Unknown;
|
||||
bool write_back = true;
|
||||
};
|
||||
|
||||
struct LoopCandidate {
|
||||
size_t header = 0;
|
||||
size_t latch = 0;
|
||||
int score = 0;
|
||||
std::vector<SlotPick> slots;
|
||||
std::unordered_set<size_t> blocks;
|
||||
};
|
||||
|
||||
struct Promotion {
|
||||
int slot = -1;
|
||||
PhysReg reg = PhysReg::W19;
|
||||
SlotUseInfo::RegKind reg_kind = SlotUseInfo::RegKind::Unknown;
|
||||
bool write_back = true;
|
||||
};
|
||||
|
||||
SlotUseInfo::RegKind ClassifyPromotableReg(PhysReg reg) {
|
||||
if (IsPromotableWReg(reg)) return SlotUseInfo::RegKind::W;
|
||||
if (IsPromotableXReg(reg)) return SlotUseInfo::RegKind::X;
|
||||
if (IsPromotableSReg(reg)) return SlotUseInfo::RegKind::S;
|
||||
return SlotUseInfo::RegKind::Invalid;
|
||||
}
|
||||
|
||||
void NoteSlotRegUse(SlotUseInfo& info, PhysReg reg) {
|
||||
SlotUseInfo::RegKind use_kind = ClassifyPromotableReg(reg);
|
||||
if (use_kind == SlotUseInfo::RegKind::Invalid ||
|
||||
(info.reg_kind != SlotUseInfo::RegKind::Unknown &&
|
||||
info.reg_kind != use_kind)) {
|
||||
info.reg_kind = SlotUseInfo::RegKind::Invalid;
|
||||
return;
|
||||
}
|
||||
info.reg_kind = use_kind;
|
||||
}
|
||||
|
||||
int SlotScore(const SlotUseInfo& info) {
|
||||
int score = (info.body_loads + info.body_stores) * 4 + info.loads +
|
||||
info.stores;
|
||||
if (info.stores == 0) {
|
||||
score += 80 + info.body_loads * 6;
|
||||
}
|
||||
if (info.body_loads > 0 && info.body_stores > 0) {
|
||||
score += info.use_blocks.size() > 1 ? 140 : 20;
|
||||
}
|
||||
if (info.use_blocks.size() > 1) {
|
||||
score += static_cast<int>(info.use_blocks.size() - 1) * 24;
|
||||
}
|
||||
if (info.reg_kind == SlotUseInfo::RegKind::S && info.after_call_uses > 0) {
|
||||
score += 180 + info.after_call_uses * 8;
|
||||
}
|
||||
return score;
|
||||
}
|
||||
|
||||
PhysReg GprForIndex(SlotUseInfo::RegKind kind, size_t index) {
|
||||
static const std::vector<PhysReg> w_regs = {PhysReg::W19, PhysReg::W20,
|
||||
PhysReg::W21, PhysReg::W22,
|
||||
PhysReg::W23, PhysReg::W24};
|
||||
static const std::vector<PhysReg> x_regs = {PhysReg::X19, PhysReg::X20,
|
||||
PhysReg::X21, PhysReg::X22,
|
||||
PhysReg::X23, PhysReg::X24};
|
||||
if (kind == SlotUseInfo::RegKind::X) return x_regs[index];
|
||||
return w_regs[index];
|
||||
}
|
||||
|
||||
std::vector<size_t> GetSuccessors(
|
||||
const MachineFunction& function, size_t block_index,
|
||||
const std::unordered_map<std::string, size_t>& block_index_by_name) {
|
||||
const auto& blocks = function.GetBlocks();
|
||||
const auto& insts = blocks[block_index]->GetInstructions();
|
||||
std::vector<size_t> succs;
|
||||
for (const auto& inst : insts) {
|
||||
const auto& ops = inst.GetOperands();
|
||||
switch (inst.GetOpcode()) {
|
||||
case Opcode::B:
|
||||
case Opcode::Bcond:
|
||||
case Opcode::FBcond:
|
||||
if (!ops.empty() && ops[0].IsSymbol()) {
|
||||
auto it = block_index_by_name.find(ops[0].GetSymbol());
|
||||
if (it != block_index_by_name.end()) succs.push_back(it->second);
|
||||
}
|
||||
break;
|
||||
case Opcode::Cbnz:
|
||||
case Opcode::Cbz:
|
||||
if (ops.size() > 1 && ops[1].IsSymbol()) {
|
||||
auto it = block_index_by_name.find(ops[1].GetSymbol());
|
||||
if (it != block_index_by_name.end()) succs.push_back(it->second);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!insts.empty()) {
|
||||
Opcode last = insts.back().GetOpcode();
|
||||
if (last != Opcode::B && last != Opcode::Ret &&
|
||||
block_index + 1 < blocks.size()) {
|
||||
succs.push_back(block_index + 1);
|
||||
}
|
||||
}
|
||||
std::sort(succs.begin(), succs.end());
|
||||
succs.erase(std::unique(succs.begin(), succs.end()), succs.end());
|
||||
return succs;
|
||||
}
|
||||
|
||||
bool InLoop(const LoopCandidate& loop, size_t index) {
|
||||
return loop.blocks.count(index) != 0;
|
||||
}
|
||||
|
||||
std::vector<size_t> SortedLoopBlocks(const LoopCandidate& loop) {
|
||||
std::vector<size_t> blocks(loop.blocks.begin(), loop.blocks.end());
|
||||
std::sort(blocks.begin(), blocks.end());
|
||||
return blocks;
|
||||
}
|
||||
|
||||
std::vector<std::vector<size_t>> BuildSuccessors(
|
||||
const MachineFunction& function,
|
||||
const std::unordered_map<std::string, size_t>& block_index_by_name) {
|
||||
std::vector<std::vector<size_t>> succs(function.GetBlocks().size());
|
||||
for (size_t i = 0; i < succs.size(); ++i) {
|
||||
succs[i] = GetSuccessors(function, i, block_index_by_name);
|
||||
}
|
||||
return succs;
|
||||
}
|
||||
|
||||
std::vector<std::vector<size_t>> BuildPredecessors(
|
||||
const std::vector<std::vector<size_t>>& succs) {
|
||||
std::vector<std::vector<size_t>> preds(succs.size());
|
||||
for (size_t i = 0; i < succs.size(); ++i) {
|
||||
for (size_t succ : succs[i]) {
|
||||
preds[succ].push_back(i);
|
||||
}
|
||||
}
|
||||
for (auto& pred_list : preds) {
|
||||
std::sort(pred_list.begin(), pred_list.end());
|
||||
pred_list.erase(std::unique(pred_list.begin(), pred_list.end()),
|
||||
pred_list.end());
|
||||
}
|
||||
return preds;
|
||||
}
|
||||
|
||||
std::vector<std::set<size_t>> ComputeDominators(
|
||||
size_t block_count, const std::vector<std::vector<size_t>>& preds) {
|
||||
std::vector<std::set<size_t>> doms(block_count);
|
||||
if (block_count == 0) return doms;
|
||||
|
||||
doms[0].insert(0);
|
||||
for (size_t i = 1; i < block_count; ++i) {
|
||||
for (size_t j = 0; j < block_count; ++j) doms[i].insert(j);
|
||||
}
|
||||
|
||||
bool changed = true;
|
||||
while (changed) {
|
||||
changed = false;
|
||||
for (size_t block = 1; block < block_count; ++block) {
|
||||
std::set<size_t> next;
|
||||
bool first_pred = true;
|
||||
for (size_t pred : preds[block]) {
|
||||
if (first_pred) {
|
||||
next = doms[pred];
|
||||
first_pred = false;
|
||||
continue;
|
||||
}
|
||||
std::set<size_t> intersection;
|
||||
std::set_intersection(next.begin(), next.end(), doms[pred].begin(),
|
||||
doms[pred].end(),
|
||||
std::inserter(intersection,
|
||||
intersection.begin()));
|
||||
next = std::move(intersection);
|
||||
}
|
||||
next.insert(block);
|
||||
if (next != doms[block]) {
|
||||
doms[block] = std::move(next);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return doms;
|
||||
}
|
||||
|
||||
std::unordered_set<size_t> BuildNaturalLoop(
|
||||
size_t header, size_t latch,
|
||||
const std::vector<std::vector<size_t>>& preds) {
|
||||
std::unordered_set<size_t> loop_blocks;
|
||||
std::vector<size_t> worklist;
|
||||
loop_blocks.insert(header);
|
||||
loop_blocks.insert(latch);
|
||||
worklist.push_back(latch);
|
||||
|
||||
while (!worklist.empty()) {
|
||||
size_t block = worklist.back();
|
||||
worklist.pop_back();
|
||||
for (size_t pred : preds[block]) {
|
||||
if (loop_blocks.insert(pred).second && pred != header) {
|
||||
worklist.push_back(pred);
|
||||
}
|
||||
}
|
||||
}
|
||||
return loop_blocks;
|
||||
}
|
||||
|
||||
bool HasSingleEntry(size_t header, const std::unordered_set<size_t>& loop_blocks,
|
||||
const std::vector<std::vector<size_t>>& preds) {
|
||||
for (size_t block : loop_blocks) {
|
||||
if (block == header) continue;
|
||||
for (size_t pred : preds[block]) {
|
||||
if (loop_blocks.count(pred) == 0) return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<LoopCandidate> FindLoopCandidates(MachineFunction& function) {
|
||||
const auto& blocks = function.GetBlocks();
|
||||
std::unordered_map<std::string, size_t> block_index_by_name;
|
||||
for (size_t i = 0; i < blocks.size(); ++i) {
|
||||
block_index_by_name[blocks[i]->GetName()] = i;
|
||||
}
|
||||
|
||||
std::unordered_set<int> opaque_slots;
|
||||
for (const auto& bb : blocks) {
|
||||
for (const auto& inst : bb->GetInstructions()) {
|
||||
int slot = -1;
|
||||
if (IsOpaqueSlotUse(inst, &slot)) opaque_slots.insert(slot);
|
||||
}
|
||||
}
|
||||
|
||||
auto succs = BuildSuccessors(function, block_index_by_name);
|
||||
auto preds = BuildPredecessors(succs);
|
||||
auto doms = ComputeDominators(blocks.size(), preds);
|
||||
|
||||
std::vector<LoopCandidate> candidates;
|
||||
for (size_t latch = 0; latch < blocks.size(); ++latch) {
|
||||
for (size_t header : succs[latch]) {
|
||||
if (header == latch) continue;
|
||||
if (header >= doms.size() || doms[latch].count(header) == 0) continue;
|
||||
|
||||
auto loop_blocks = BuildNaturalLoop(header, latch, preds);
|
||||
if (loop_blocks.size() > 24) continue;
|
||||
if (!HasSingleEntry(header, loop_blocks, preds)) continue;
|
||||
|
||||
std::unordered_map<int, SlotUseInfo> slot_info;
|
||||
for (size_t bi : loop_blocks) {
|
||||
bool seen_call = false;
|
||||
for (const auto& cur : blocks[bi]->GetInstructions()) {
|
||||
if (cur.GetOpcode() == Opcode::Bl) {
|
||||
seen_call = true;
|
||||
}
|
||||
if (auto slot = GetLoadSlot(cur);
|
||||
slot.has_value() && !opaque_slots.count(*slot)) {
|
||||
auto& info = slot_info[*slot];
|
||||
info.slot = *slot;
|
||||
const auto& ops = cur.GetOperands();
|
||||
if (ops.empty() || !ops[0].IsReg()) {
|
||||
info.reg_kind = SlotUseInfo::RegKind::Invalid;
|
||||
} else {
|
||||
NoteSlotRegUse(info, ops[0].GetReg());
|
||||
}
|
||||
++info.loads;
|
||||
info.use_blocks.insert(bi);
|
||||
if (seen_call) ++info.after_call_uses;
|
||||
if (bi != header) ++info.body_loads;
|
||||
}
|
||||
if (auto slot = GetStoreSlot(cur);
|
||||
slot.has_value() && !opaque_slots.count(*slot)) {
|
||||
auto& info = slot_info[*slot];
|
||||
info.slot = *slot;
|
||||
const auto& ops = cur.GetOperands();
|
||||
if (ops.empty() || !ops[0].IsReg()) {
|
||||
info.reg_kind = SlotUseInfo::RegKind::Invalid;
|
||||
} else {
|
||||
NoteSlotRegUse(info, ops[0].GetReg());
|
||||
}
|
||||
++info.stores;
|
||||
info.use_blocks.insert(bi);
|
||||
if (seen_call) ++info.after_call_uses;
|
||||
if (bi != header) ++info.body_stores;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<SlotUseInfo> ranked;
|
||||
for (const auto& [slot, info] : slot_info) {
|
||||
if (info.reg_kind == SlotUseInfo::RegKind::Invalid ||
|
||||
info.reg_kind == SlotUseInfo::RegKind::Unknown) {
|
||||
continue;
|
||||
}
|
||||
const int slot_size = function.GetFrameSlot(slot).size;
|
||||
if (info.reg_kind == SlotUseInfo::RegKind::X) {
|
||||
if (slot_size != 8) continue;
|
||||
} else if (slot_size != 4) {
|
||||
continue;
|
||||
}
|
||||
if (info.loads == 0) continue;
|
||||
if (info.stores == 0 && info.loads < 2) continue;
|
||||
if (info.stores > 0 && info.loads + info.stores < 2) continue;
|
||||
ranked.push_back(info);
|
||||
}
|
||||
std::sort(ranked.begin(), ranked.end(),
|
||||
[](const SlotUseInfo& lhs, const SlotUseInfo& rhs) {
|
||||
int lhs_score = SlotScore(lhs);
|
||||
int rhs_score = SlotScore(rhs);
|
||||
if (lhs_score != rhs_score) return lhs_score > rhs_score;
|
||||
return lhs.slot < rhs.slot;
|
||||
});
|
||||
if (ranked.empty()) continue;
|
||||
|
||||
LoopCandidate cand;
|
||||
cand.header = header;
|
||||
cand.latch = latch;
|
||||
cand.blocks = std::move(loop_blocks);
|
||||
int gpr_slots = 0;
|
||||
int s_slots = 0;
|
||||
constexpr int kMaxGprSlots = 6;
|
||||
constexpr int kMaxSSlots = 3;
|
||||
for (const auto& info : ranked) {
|
||||
if (info.reg_kind == SlotUseInfo::RegKind::W ||
|
||||
info.reg_kind == SlotUseInfo::RegKind::X) {
|
||||
if (gpr_slots >= kMaxGprSlots) continue;
|
||||
++gpr_slots;
|
||||
} else if (info.reg_kind == SlotUseInfo::RegKind::S) {
|
||||
if (s_slots >= kMaxSSlots) continue;
|
||||
++s_slots;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
cand.slots.push_back(
|
||||
SlotPick{info.slot, info.reg_kind, info.stores > 0});
|
||||
cand.score += SlotScore(info);
|
||||
}
|
||||
if (cand.slots.empty()) continue;
|
||||
candidates.push_back(std::move(cand));
|
||||
}
|
||||
}
|
||||
std::sort(candidates.begin(), candidates.end(),
|
||||
[](const LoopCandidate& lhs, const LoopCandidate& rhs) {
|
||||
if (lhs.score != rhs.score) return lhs.score > rhs.score;
|
||||
if (lhs.blocks.size() != rhs.blocks.size()) {
|
||||
return lhs.blocks.size() > rhs.blocks.size();
|
||||
}
|
||||
return lhs.header < rhs.header;
|
||||
});
|
||||
return candidates;
|
||||
}
|
||||
|
||||
void PromoteLoopSlots(MachineFunction& function, const LoopCandidate& loop) {
|
||||
const std::vector<PhysReg> s_regs = {PhysReg::S8, PhysReg::S9,
|
||||
PhysReg::S10};
|
||||
std::unordered_map<int, Promotion> slot_to_promotion;
|
||||
std::vector<Promotion> promotions;
|
||||
size_t next_gpr_reg = 0;
|
||||
size_t next_s_reg = 0;
|
||||
for (const auto& slot : loop.slots) {
|
||||
PhysReg reg = PhysReg::W19;
|
||||
if (slot.reg_kind == SlotUseInfo::RegKind::W ||
|
||||
slot.reg_kind == SlotUseInfo::RegKind::X) {
|
||||
if (next_gpr_reg >= 6) continue;
|
||||
reg = GprForIndex(slot.reg_kind, next_gpr_reg++);
|
||||
} else if (slot.reg_kind == SlotUseInfo::RegKind::S) {
|
||||
if (next_s_reg >= s_regs.size()) continue;
|
||||
reg = s_regs[next_s_reg++];
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
Promotion promotion{slot.slot, reg, slot.reg_kind, slot.write_back};
|
||||
slot_to_promotion[slot.slot] = promotion;
|
||||
promotions.push_back(promotion);
|
||||
function.AddUsedCalleeSaved(reg);
|
||||
}
|
||||
|
||||
const auto& blocks = function.GetBlocks();
|
||||
std::unordered_map<std::string, size_t> block_index_by_name;
|
||||
for (size_t i = 0; i < blocks.size(); ++i) {
|
||||
block_index_by_name[blocks[i]->GetName()] = i;
|
||||
}
|
||||
auto succs = BuildSuccessors(function, block_index_by_name);
|
||||
auto preds = BuildPredecessors(succs);
|
||||
|
||||
for (size_t bi : SortedLoopBlocks(loop)) {
|
||||
auto& insts = blocks[bi]->GetInstructions();
|
||||
std::vector<MachineInstr> rewritten;
|
||||
rewritten.reserve(insts.size());
|
||||
for (const auto& inst : insts) {
|
||||
if (auto slot = GetLoadSlot(inst); slot.has_value()) {
|
||||
auto it = slot_to_promotion.find(*slot);
|
||||
if (it != slot_to_promotion.end()) {
|
||||
const auto& ops = inst.GetOperands();
|
||||
PhysReg dst = ops[0].GetReg();
|
||||
if (!SameReg(dst, it->second.reg)) {
|
||||
Opcode mov_opcode =
|
||||
it->second.reg_kind == SlotUseInfo::RegKind::S
|
||||
? Opcode::FMovReg
|
||||
: Opcode::MovReg;
|
||||
rewritten.emplace_back(
|
||||
mov_opcode,
|
||||
std::vector<Operand>{Operand::Reg(dst),
|
||||
Operand::Reg(it->second.reg)});
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (auto slot = GetStoreSlot(inst); slot.has_value()) {
|
||||
auto it = slot_to_promotion.find(*slot);
|
||||
if (it != slot_to_promotion.end()) {
|
||||
const auto& ops = inst.GetOperands();
|
||||
PhysReg src = ops[0].GetReg();
|
||||
if (!SameReg(src, it->second.reg)) {
|
||||
Opcode mov_opcode =
|
||||
it->second.reg_kind == SlotUseInfo::RegKind::S
|
||||
? Opcode::FMovReg
|
||||
: Opcode::MovReg;
|
||||
rewritten.emplace_back(
|
||||
mov_opcode,
|
||||
std::vector<Operand>{Operand::Reg(it->second.reg),
|
||||
Operand::Reg(src)});
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
rewritten.push_back(inst);
|
||||
}
|
||||
insts = std::move(rewritten);
|
||||
}
|
||||
|
||||
for (size_t pred = 0; pred < blocks.size(); ++pred) {
|
||||
if (std::find(succs[pred].begin(), succs[pred].end(), loop.header) ==
|
||||
succs[pred].end()) {
|
||||
continue;
|
||||
}
|
||||
if (InLoop(loop, pred)) continue;
|
||||
std::vector<MachineInstr> loads;
|
||||
for (const auto& promotion : promotions) {
|
||||
loads.emplace_back(Opcode::LoadStack,
|
||||
std::vector<Operand>{
|
||||
Operand::Reg(promotion.reg),
|
||||
Operand::FrameIndex(promotion.slot)});
|
||||
}
|
||||
InsertBeforeTerminators(blocks[pred]->GetInstructions(), loads);
|
||||
}
|
||||
|
||||
std::unordered_set<size_t> exit_blocks_with_stores;
|
||||
for (size_t bi : SortedLoopBlocks(loop)) {
|
||||
bool needs_local_exit_store = false;
|
||||
for (size_t succ : succs[bi]) {
|
||||
if (InLoop(loop, succ)) continue;
|
||||
|
||||
bool exit_has_only_loop_preds = true;
|
||||
for (size_t pred : preds[succ]) {
|
||||
if (!InLoop(loop, pred)) {
|
||||
exit_has_only_loop_preds = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (exit_has_only_loop_preds) {
|
||||
if (exit_blocks_with_stores.insert(succ).second) {
|
||||
std::vector<MachineInstr> stores;
|
||||
for (const auto& promotion : promotions) {
|
||||
if (!promotion.write_back) continue;
|
||||
stores.emplace_back(
|
||||
Opcode::StoreStack,
|
||||
std::vector<Operand>{
|
||||
Operand::Reg(promotion.reg),
|
||||
Operand::FrameIndex(promotion.slot)});
|
||||
}
|
||||
auto& exit_insts = blocks[succ]->GetInstructions();
|
||||
exit_insts.insert(exit_insts.begin(), stores.begin(), stores.end());
|
||||
}
|
||||
} else {
|
||||
needs_local_exit_store = true;
|
||||
}
|
||||
}
|
||||
if (!needs_local_exit_store) continue;
|
||||
std::vector<MachineInstr> stores;
|
||||
for (const auto& promotion : promotions) {
|
||||
if (!promotion.write_back) continue;
|
||||
stores.emplace_back(Opcode::StoreStack,
|
||||
std::vector<Operand>{
|
||||
Operand::Reg(promotion.reg),
|
||||
Operand::FrameIndex(promotion.slot)});
|
||||
}
|
||||
InsertBeforeTerminators(blocks[bi]->GetInstructions(), stores);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void RunLoopSlotPromotion(MachineFunction& function) {
|
||||
auto candidates = FindLoopCandidates(function);
|
||||
std::unordered_set<size_t> promoted_blocks;
|
||||
int promoted_loop_count = 0;
|
||||
constexpr int kMaxPromotedLoops = 4;
|
||||
constexpr int kMinLoopScore = 32;
|
||||
|
||||
for (const auto& loop : candidates) {
|
||||
if (loop.score < kMinLoopScore) break;
|
||||
|
||||
bool overlaps_existing_loop = false;
|
||||
for (size_t block : loop.blocks) {
|
||||
if (promoted_blocks.count(block) != 0) {
|
||||
overlaps_existing_loop = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (overlaps_existing_loop) continue;
|
||||
|
||||
PromoteLoopSlots(function, loop);
|
||||
promoted_blocks.insert(loop.blocks.begin(), loop.blocks.end());
|
||||
++promoted_loop_count;
|
||||
if (promoted_loop_count >= kMaxPromotedLoops) break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace mir
|
||||
Binary file not shown.
Loading…
Reference in new issue