feat(opt): 切换至队友代码基线——100%功能正确

Chaitin-Briggs 图着色寄存器分配,K=16无需spill。
IRGen starttime/stoptime 修复(去掉 _sysy_ 前缀和 lineno 参数)。
此提交为后续优化工作的安全起点。
lzk
lzkk 3 days ago
parent bfe105c2cd
commit ee3b42ac40

@ -147,8 +147,6 @@ namespace mir
StoreMem,
AddRR,
SubRR,
AddImm,
SubImm,
MulRR,
DivRR,
ModRR,
@ -166,6 +164,7 @@ namespace mir
FCmpRR,
CSet,
Csel,
Csneg,
Smull,
Msub,
NegRR,
@ -184,18 +183,6 @@ namespace mir
MovReg,
};
// ---- 寄存器类别 ----
enum class RegClass { GPR32, GPR64, FPR32, FPR64, Unknown };
inline RegClass ToRegClass(VRegClass vc) {
switch (vc) {
case VRegClass::Int: return RegClass::GPR32;
case VRegClass::Float: return RegClass::FPR32;
case VRegClass::Ptr: return RegClass::GPR64;
default: return RegClass::Unknown;
}
}
enum class CondCode
{
EQ,
@ -257,23 +244,16 @@ namespace mir
bool IsRematerializable() const { return is_rematerializable_; }
MachineInstr &SetRematerializable(bool val)
{
is_rematerializable_ = val;
return *this;
}
{ is_rematerializable_ = val; return *this; }
int GetRematImm() const { return remat_imm_; }
MachineInstr &SetRematImm(int val)
{
remat_imm_ = val;
return *this;
}
{ remat_imm_ = val; return *this; }
private:
Opcode opcode_;
std::vector<Operand> operands_;
bool is_rematerializable_ = false;
int remat_imm_ = 0;
Opcode opcode_;
std::vector<Operand> operands_;
};
struct FrameSlot
@ -300,9 +280,6 @@ namespace mir
MachineInstr &Append(Opcode opcode,
std::initializer_list<Operand> operands = {});
void InsertInst(int local_idx, MachineInstr inst);
void ReplaceVReg(int local_idx, int old_vreg, int new_vreg);
private:
std::string name_;
int label_id_ = -1;
@ -348,9 +325,6 @@ namespace mir
int GetFrameSize() const { return frame_size_; }
void SetFrameSize(int size) { frame_size_ = size; }
bool HasCall() const { return has_call_; }
void SetHasCall(bool v = true) { has_call_ = v; }
int CreateVReg(VRegClass vreg_class);
VRegClass GetVRegClass(int vreg_id) const;
int GetNumVRegs() const { return static_cast<int>(vreg_classes_.size()); }
@ -365,7 +339,6 @@ namespace mir
std::vector<FrameSlot> frame_slots_;
int frame_size_ = 0;
bool has_call_ = false;
int next_label_id_ = 0;
std::vector<VRegClass> vreg_classes_;
@ -436,9 +409,8 @@ namespace mir
std::unique_ptr<MachineModule> LowerModuleToMIR(const ir::Module &module);
std::unique_ptr<MachineFunction> LowerToMIR(const ir::Module &module);
// ---- 贪婪寄存器分配器入口 ----
void RunGreedyRegAlloc(MachineFunction &function);
void RunGreedyRegAlloc(MachineModule &module);
void RunRegAlloc(MachineFunction &function);
void RunRegAlloc(MachineModule &module);
void RunFrameLowering(MachineFunction &function);
void RunFrameLowering(MachineModule &module);
@ -446,120 +418,10 @@ namespace mir
void RunPeephole(MachineFunction &function);
void RunPeephole(MachineModule &module);
void VerifyMIR(MachineFunction &func);
void VerifyMIR(MachineModule &module);
void VerifyRegAlloc(MachineFunction &func);
void VerifyRegAlloc(MachineModule &module);
void RunBlockLayout(MachineFunction &function);
void RunBlockLayout(MachineModule &module);
void PrintAsm(const MachineFunction &function, std::ostream &os);
void PrintAsm(const MachineModule &module, std::ostream &os);
struct VNInfo
{
int id = -1;
int def_pos = -1;
Opcode def_opcode = Opcode::Ret;
bool IsRematable() const
{
return def_opcode == Opcode::MovImm ||
def_opcode == Opcode::LoadStackAddr;
}
};
struct UsePosition
{
int pos = -1;
bool is_def = false;
int vn_id = -1;
Opcode opcode = Opcode::Ret;
};
struct Segment
{
int start = -1;
int end = -1;
int vn_id = -1;
bool crosses_call = false;
bool Contains(int pos) const { return start <= pos && pos <= end; }
bool Overlaps(const Segment &o) const
{
return !(end < o.start || o.end < start);
}
};
struct LiveInterval
{
int vreg = -1;
RegClass reg_class = RegClass::Unknown;
std::vector<VNInfo> valnos;
std::vector<Segment> segments;
std::vector<UsePosition> uses;
int assigned_reg = -1;
float spill_weight = 0.0f;
int hint_reg = -1;
int generation = 0;
int deferred_count = 0; // LLVM: RS_New→RS_Deferred→RS_Split stage tracking
// 保留旧字段以兼容 ComputeInstLiveness
int start = -1;
int end = -1;
VRegClass vreg_class = VRegClass::Int;
bool spilled = false;
int spill_slot = -1;
bool IsSpilled() const { return assigned_reg == -2; }
bool IsSplit() const { return assigned_reg == -3; }
bool IsAllocated() const { return assigned_reg >= 0; }
int FirstUsePos() const
{
if (!uses.empty()) return uses.front().pos;
return start;
}
int LastUsePos() const
{
if (!segments.empty()) return segments.back().end;
return end;
}
bool SegmentCrossesCall() const
{
for (auto &seg : segments)
if (seg.crosses_call) return true;
return false;
}
float Length() const
{
int total = 0;
for (auto &seg : segments)
total += seg.end - seg.start + 1;
return total > 0 ? (float)total : 1.0f;
}
};
class LiveRegMatrix
{
std::vector<std::vector<LiveInterval *>> reg_assignments_;
public:
void Init(int num_regs);
bool Assign(LiveInterval *li, int phys_reg);
void ForceAssign(LiveInterval *li, int phys_reg);
void Unassign(LiveInterval *li);
bool CheckInterference(const LiveInterval &li, int phys_reg) const;
LiveInterval *GetConflict(const LiveInterval &li, int phys_reg) const;
bool CheckInterferenceRange(int start, int end, int phys_reg) const;
};
// ---- 增强活跃分析 ----
std::vector<LiveInterval> EnhanceIntervals(
const std::vector<LiveInterval> &raw,
MachineFunction &function);
std::vector<LiveInterval> ComputeInstLiveness(MachineFunction &func);
} // namespace mir

@ -0,0 +1,29 @@
#pragma once
#include <map>
#include <vector>
namespace mir
{
class MachineBasicBlock;
class MachineFunction;
struct CFGEdge
{
MachineBasicBlock *src = nullptr;
MachineBasicBlock *dst = nullptr;
double weight = 0.0;
};
struct CFGAnalysisResult
{
std::map<MachineBasicBlock *, std::vector<MachineBasicBlock *>> successors;
std::map<MachineBasicBlock *, std::vector<MachineBasicBlock *>> predecessors;
std::map<MachineBasicBlock *, double> block_freq;
std::vector<CFGEdge> edges;
};
CFGAnalysisResult AnalyzeCFG(MachineFunction &function);
} // namespace mir

@ -0,0 +1,621 @@
#include "ir/IR.h"
#include <algorithm>
#include <iostream>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
namespace ir {
namespace {
constexpr bool kDebugInline = false;
constexpr int kMaxInlineSize = 200;
constexpr int kMaxMultiBlockInlineSize = 50;
bool IsRecursive(Function* func) {
if (!func || func->IsExternal()) return true;
for (auto& bb : func->GetBlocks()) {
for (auto& inst : bb->GetInstructions()) {
if (auto* call = dynamic_cast<CallInst*>(inst.get())) {
if (call->GetCallee() == func) return true;
}
}
}
return false;
}
int CountInstructions(Function* func) {
int count = 0;
for (auto& bb : func->GetBlocks()) {
count += bb->GetInstructions().size();
}
return count;
}
Value* MapValue(Value* v, const std::unordered_map<Value*, Value*>& value_map) {
auto it = value_map.find(v);
if (it != value_map.end()) return it->second;
return v;
}
void CloneInstruction(Instruction* inst,
const std::unordered_map<Value*, Value*>& value_map,
std::vector<std::unique_ptr<Instruction>>& out) {
std::unique_ptr<Instruction> cloned;
switch (inst->GetOpcode()) {
case Opcode::Add:
case Opcode::Sub:
case Opcode::Mul:
case Opcode::Div:
case Opcode::Mod:
case Opcode::Eq:
case Opcode::Ne:
case Opcode::Lt:
case Opcode::Le:
case Opcode::Gt:
case Opcode::Ge: {
auto* bin = static_cast<BinaryInst*>(inst);
Value* lhs = MapValue(bin->GetLhs(), value_map);
Value* rhs = MapValue(bin->GetRhs(), value_map);
cloned = std::make_unique<BinaryInst>(
inst->GetOpcode(), inst->GetType(), lhs, rhs,
inst->GetName() + ".inl");
break;
}
case Opcode::SIToFP:
case Opcode::FPToSI:
case Opcode::ZExt: {
auto* cast = static_cast<CastInst*>(inst);
Value* operand = MapValue(cast->GetOperandValue(), value_map);
cloned = std::make_unique<CastInst>(
inst->GetOpcode(), inst->GetType(), operand,
inst->GetName() + ".inl");
break;
}
case Opcode::Load: {
auto* load = static_cast<LoadInst*>(inst);
Value* ptr = MapValue(load->GetPtr(), value_map);
cloned = std::make_unique<LoadInst>(
load->GetType(), ptr, inst->GetName() + ".inl");
break;
}
case Opcode::Store: {
auto* store = static_cast<StoreInst*>(inst);
Value* val = MapValue(store->GetValue(), value_map);
Value* ptr = MapValue(store->GetPtr(), value_map);
cloned = std::make_unique<StoreInst>(Type::GetVoidType(), val, ptr);
break;
}
case Opcode::GEP: {
auto* gep = static_cast<GetElementPtrInst*>(inst);
Value* base = MapValue(gep->GetBasePtr(), value_map);
Value* index = MapValue(gep->GetIndex(), value_map);
cloned = std::make_unique<GetElementPtrInst>(
gep->GetType(), base, index, inst->GetName() + ".inl");
break;
}
case Opcode::Call: {
auto* orig_call = static_cast<CallInst*>(inst);
Function* callee_func = orig_call->GetCallee();
std::vector<Value*> args;
for (size_t i = 0; i < orig_call->GetNumArgs(); ++i) {
args.push_back(MapValue(orig_call->GetArg(i), value_map));
}
cloned = std::make_unique<CallInst>(
orig_call->GetType(), callee_func, args,
inst->GetName() + ".inl");
break;
}
case Opcode::Alloca: {
auto* alloca_inst = static_cast<AllocaInst*>(inst);
if (alloca_inst->IsArrayAlloca()) {
Value* count = MapValue(alloca_inst->GetCount(), value_map);
cloned = std::make_unique<AllocaInst>(
alloca_inst->GetElementType(),
alloca_inst->GetName() + ".inl", count);
} else {
cloned = std::make_unique<AllocaInst>(
alloca_inst->GetElementType(),
alloca_inst->GetName() + ".inl");
}
break;
}
default:
break;
}
if (cloned) {
out.push_back(std::move(cloned));
}
}
bool InlineCall(CallInst* call, Function* callee, Function* caller,
BasicBlock* call_bb, Module* module) {
if (kDebugInline) {
std::cerr << "[Inline] Inlining " << callee->GetName()
<< " (" << callee->GetBlocks().size() << " blocks)"
<< " into " << caller->GetName() << std::endl;
}
bool is_single_block = (callee->GetBlocks().size() == 1);
std::unordered_map<Value*, Value*> value_map;
for (auto& gv : module->GetGlobals()) {
value_map[gv.get()] = gv.get();
}
for (auto& other_func : module->GetFunctions()) {
value_map[other_func.get()] = other_func.get();
}
for (auto& arg : caller->GetParams()) {
value_map[arg.get()] = arg.get();
}
{
auto& blocks = caller->GetBlocks();
for (size_t bi = 0; bi < blocks.size(); ++bi) {
auto& insts = blocks[bi]->GetInstructions();
for (size_t ii = 0; ii < insts.size(); ++ii) {
value_map[insts[ii].get()] = insts[ii].get();
}
}
}
for (size_t i = 0; i < callee->GetParams().size(); ++i) {
auto* formal_arg = callee->GetParams()[i].get();
auto* actual_arg = call->GetArg(i);
value_map[formal_arg] = actual_arg;
}
auto& call_bb_insts = const_cast<std::vector<std::unique_ptr<Instruction>>&>(
call_bb->GetInstructions());
size_t call_idx = 0;
for (size_t i = 0; i < call_bb_insts.size(); ++i) {
if (call_bb_insts[i].get() == call) {
call_idx = i;
break;
}
}
if (is_single_block) {
auto* callee_entry = callee->GetEntry();
Value* return_value = nullptr;
std::vector<std::unique_ptr<Instruction>> cloned_insts;
std::vector<std::unique_ptr<Instruction>> alloca_insts;
for (auto& inst : callee_entry->GetInstructions()) {
if (inst->GetOpcode() == Opcode::Alloca) {
std::vector<std::unique_ptr<Instruction>> tmp;
CloneInstruction(inst.get(), value_map, tmp);
if (!tmp.empty()) {
value_map[inst.get()] = tmp.back().get();
alloca_insts.push_back(std::move(tmp.back()));
}
continue;
}
if (inst->GetOpcode() == Opcode::Ret) {
auto* ret_inst = static_cast<ReturnInst*>(inst.get());
if (ret_inst->HasValue()) {
return_value = MapValue(ret_inst->GetValue(), value_map);
}
continue;
}
std::vector<std::unique_ptr<Instruction>> tmp;
CloneInstruction(inst.get(), value_map, tmp);
if (!tmp.empty()) {
value_map[inst.get()] = tmp.back().get();
cloned_insts.push_back(std::move(tmp.back()));
}
}
if (return_value) {
call->ReplaceAllUsesWith(return_value);
} else if (!call->GetType()->IsVoid()) {
call->ReplaceAllUsesWith(module->GetContext().GetConstInt(0));
}
auto* entry_bb = caller->GetEntry();
auto& entry_insts = const_cast<std::vector<std::unique_ptr<Instruction>>&>(
entry_bb->GetInstructions());
size_t alloca_insert_pos = 0;
for (size_t i = 0; i < entry_insts.size(); ++i) {
if (entry_insts[i]->GetOpcode() == Opcode::Alloca) {
alloca_insert_pos = i + 1;
} else {
break;
}
}
for (auto& alloca : alloca_insts) {
alloca->SetParent(entry_bb);
entry_insts.insert(entry_insts.begin() + alloca_insert_pos, std::move(alloca));
alloca_insert_pos++;
}
size_t insert_pos = call_idx;
for (auto& cloned : cloned_insts) {
cloned->SetParent(call_bb);
call_bb_insts.insert(call_bb_insts.begin() + insert_pos, std::move(cloned));
insert_pos++;
}
for (size_t i = 0; i < call_bb_insts.size(); ++i) {
if (call_bb_insts[i].get() == call) {
for (size_t oi = 0; oi < call->GetNumOperands(); ++oi) {
auto* op = call->GetOperand(oi);
if (auto* op_inst = dynamic_cast<Instruction*>(op)) {
op_inst->RemoveUse(call, oi);
}
}
call_bb_insts.erase(call_bb_insts.begin() + i);
break;
}
}
return true;
}
// === Multi-block inlining ===
// 1. Create after_bb: move instructions after call from call_bb to after_bb
BasicBlock* after_bb = caller->CreateBlock(call_bb->GetName() + ".after");
std::vector<std::unique_ptr<Instruction>> after_insts;
for (size_t i = call_idx + 1; i < call_bb_insts.size(); ++i) {
after_insts.push_back(std::move(call_bb_insts[i]));
}
call_bb_insts.resize(call_idx + 1);
for (auto& inst : after_insts) {
inst->SetParent(after_bb);
after_bb->GetMutablePredecessors();
}
auto& after_bb_insts = const_cast<std::vector<std::unique_ptr<Instruction>>&>(
after_bb->GetInstructions());
for (auto& inst : after_insts) {
after_bb_insts.push_back(std::move(inst));
}
// 1b. Fix phi nodes: any phi that had call_bb as predecessor should now use after_bb
for (auto& bb : caller->GetBlocks()) {
for (auto& inst : bb->GetInstructions()) {
if (inst->GetOpcode() != Opcode::Phi) break;
auto* phi = static_cast<PhiInst*>(inst.get());
size_t num_ops = phi->GetNumOperands();
for (size_t i = 0; i + 1 < num_ops; i += 2) {
auto* bb_ptr = dynamic_cast<BasicBlock*>(phi->GetOperand(i + 1));
if (bb_ptr == call_bb) {
phi->SetOperand(i + 1, after_bb);
}
}
}
}
// 2. Create cloned blocks for callee
std::unordered_map<BasicBlock*, BasicBlock*> bb_map;
std::vector<BasicBlock*> cloned_bbs;
for (auto& bb : callee->GetBlocks()) {
BasicBlock* cloned_bb = caller->CreateBlock(bb->GetName() + ".inl");
bb_map[bb.get()] = cloned_bb;
cloned_bbs.push_back(cloned_bb);
}
BasicBlock* cloned_entry = bb_map[callee->GetEntry()];
// 2b. Reorder blocks: move cloned blocks and after_bb right after call_bb
// IMPORTANT: after_bb must come AFTER all cloned blocks, because
// after_bb may use values defined in the cloned blocks (e.g., call results
// from nested inlines). The lowering processes blocks in order, so values
// must be defined before they are used.
{
auto& blocks = const_cast<std::vector<std::unique_ptr<BasicBlock>>&>(caller->GetBlocks());
std::vector<size_t> move_indices;
for (auto* cb : cloned_bbs) {
for (size_t i = 0; i < blocks.size(); ++i) {
if (blocks[i].get() == cb) { move_indices.push_back(i); break; }
}
}
for (size_t i = 0; i < blocks.size(); ++i) {
if (blocks[i].get() == after_bb) { move_indices.push_back(i); break; }
}
size_t call_bb_idx = 0;
for (size_t i = 0; i < blocks.size(); ++i) {
if (blocks[i].get() == call_bb) { call_bb_idx = i; break; }
}
std::vector<std::unique_ptr<BasicBlock>> extracted;
for (auto idx : move_indices) {
extracted.push_back(std::move(blocks[idx]));
}
size_t insert_pos = call_bb_idx + 1;
for (auto& b : extracted) {
blocks.insert(blocks.begin() + insert_pos, std::move(b));
insert_pos++;
}
blocks.erase(std::remove_if(blocks.begin(), blocks.end(),
[](const std::unique_ptr<BasicBlock>& b) { return b == nullptr; }),
blocks.end());
}
// 4. Create alloca for return value (if non-void)
AllocaInst* ret_alloca = nullptr;
bool has_return = !call->GetType()->IsVoid();
if (has_return) {
auto* entry_bb = caller->GetEntry();
auto& entry_insts = const_cast<std::vector<std::unique_ptr<Instruction>>&>(
entry_bb->GetInstructions());
auto alloca = std::make_unique<AllocaInst>(call->GetType(), "__ret.inl");
alloca->SetParent(entry_bb);
ret_alloca = static_cast<AllocaInst*>(alloca.get());
size_t alloca_insert_pos = 0;
for (size_t i = 0; i < entry_insts.size(); ++i) {
if (entry_insts[i]->GetOpcode() == Opcode::Alloca) {
alloca_insert_pos = i + 1;
} else {
break;
}
}
entry_insts.insert(entry_insts.begin() + alloca_insert_pos, std::move(alloca));
}
// 5. Clone all instructions from callee blocks into cloned blocks
// Pass 1: Create cloned instructions with original operands, build value_map
std::vector<std::unique_ptr<Instruction>> alloca_insts;
std::vector<std::pair<Instruction*, Instruction*>> remap_list;
for (auto& bb : callee->GetBlocks()) {
BasicBlock* cloned_bb = bb_map[bb.get()];
auto& cloned_insts = const_cast<std::vector<std::unique_ptr<Instruction>>&>(
cloned_bb->GetInstructions());
for (auto& inst : bb->GetInstructions()) {
if (inst->GetOpcode() == Opcode::Alloca) {
std::vector<std::unique_ptr<Instruction>> tmp;
CloneInstruction(inst.get(), value_map, tmp);
if (!tmp.empty()) {
value_map[inst.get()] = tmp.back().get();
alloca_insts.push_back(std::move(tmp.back()));
}
continue;
}
if (inst->GetOpcode() == Opcode::Phi) {
auto* phi = static_cast<PhiInst*>(inst.get());
auto new_phi = std::make_unique<PhiInst>(phi->GetType(), phi->GetName() + ".inl");
new_phi->SetParent(cloned_bb);
value_map[inst.get()] = new_phi.get();
cloned_insts.push_back(std::move(new_phi));
continue;
}
if (inst->IsTerminator()) continue;
std::vector<std::unique_ptr<Instruction>> tmp;
CloneInstruction(inst.get(), value_map, tmp);
if (!tmp.empty()) {
tmp.back()->SetParent(cloned_bb);
value_map[inst.get()] = tmp.back().get();
remap_list.push_back({inst.get(), tmp.back().get()});
cloned_insts.push_back(std::move(tmp.back()));
}
}
}
// Pass 1b: Remap operands of cloned instructions now that value_map is complete
for (auto& [orig, cloned] : remap_list) {
for (size_t i = 0; i < orig->GetNumOperands(); ++i) {
Value* orig_op = orig->GetOperand(i);
Value* mapped = MapValue(orig_op, value_map);
if (mapped != orig_op) {
cloned->SetOperand(i, mapped);
}
}
}
// Pass 2: fill phi operands and handle terminators
for (auto& bb : callee->GetBlocks()) {
BasicBlock* cloned_bb = bb_map[bb.get()];
auto& cloned_insts = const_cast<std::vector<std::unique_ptr<Instruction>>&>(
cloned_bb->GetInstructions());
for (auto& inst : bb->GetInstructions()) {
if (inst->GetOpcode() == Opcode::Phi) {
auto* orig_phi = static_cast<PhiInst*>(inst.get());
auto* cloned_phi = static_cast<PhiInst*>(value_map[orig_phi]);
if (!cloned_phi) continue;
for (size_t i = 0; i < orig_phi->GetNumOperands(); i += 2) {
Value* val = MapValue(orig_phi->GetOperand(i), value_map);
auto* orig_pred = static_cast<BasicBlock*>(orig_phi->GetOperand(i + 1));
auto pred_it = bb_map.find(orig_pred);
BasicBlock* pred = (pred_it != bb_map.end()) ? pred_it->second : orig_pred;
cloned_phi->AddOperand(val);
cloned_phi->AddOperand(pred);
}
continue;
}
if (inst->GetOpcode() == Opcode::Ret) {
auto* ret_inst = static_cast<ReturnInst*>(inst.get());
if (ret_inst->HasValue() && has_return) {
Value* ret_val = MapValue(ret_inst->GetValue(), value_map);
auto store = std::make_unique<StoreInst>(
Type::GetVoidType(), ret_val, ret_alloca);
store->SetParent(cloned_bb);
cloned_insts.push_back(std::move(store));
}
auto br = std::make_unique<BranchInst>(Type::GetVoidType(), after_bb);
br->SetParent(cloned_bb);
cloned_insts.push_back(std::move(br));
continue;
}
if (inst->GetOpcode() == Opcode::Br) {
auto* br = static_cast<BranchInst*>(inst.get());
auto it = bb_map.find(br->GetTarget());
BasicBlock* target = (it != bb_map.end()) ? it->second : br->GetTarget();
auto new_br = std::make_unique<BranchInst>(Type::GetVoidType(), target);
new_br->SetParent(cloned_bb);
cloned_insts.push_back(std::move(new_br));
continue;
}
if (inst->GetOpcode() == Opcode::CondBr) {
auto* cbr = static_cast<CondBranchInst*>(inst.get());
Value* cond = MapValue(cbr->GetCond(), value_map);
auto true_it = bb_map.find(cbr->GetTrueTarget());
BasicBlock* true_target = (true_it != bb_map.end()) ? true_it->second : cbr->GetTrueTarget();
auto false_it = bb_map.find(cbr->GetFalseTarget());
BasicBlock* false_target = (false_it != bb_map.end()) ? false_it->second : cbr->GetFalseTarget();
auto new_cbr = std::make_unique<CondBranchInst>(
Type::GetVoidType(), cond, true_target, false_target);
new_cbr->SetParent(cloned_bb);
cloned_insts.push_back(std::move(new_cbr));
continue;
}
}
}
// 7. Insert alloca_insts into caller entry
{
auto* entry_bb = caller->GetEntry();
auto& entry_insts = const_cast<std::vector<std::unique_ptr<Instruction>>&>(
entry_bb->GetInstructions());
size_t alloca_insert_pos = 0;
for (size_t i = 0; i < entry_insts.size(); ++i) {
if (entry_insts[i]->GetOpcode() == Opcode::Alloca) {
alloca_insert_pos = i + 1;
} else {
break;
}
}
for (auto& alloca : alloca_insts) {
alloca->SetParent(entry_bb);
entry_insts.insert(entry_insts.begin() + alloca_insert_pos, std::move(alloca));
alloca_insert_pos++;
}
}
// 8-9. Handle return value and remove call
auto call_type = call->GetType();
if (has_return) {
auto load_ret = std::make_unique<LoadInst>(
call_type, ret_alloca, "__ret.load.inl");
load_ret->SetParent(after_bb);
Value* ret_val = load_ret.get();
after_bb_insts.insert(after_bb_insts.begin(), std::move(load_ret));
call->ReplaceAllUsesWith(ret_val);
} else {
call->ReplaceAllUsesWith(module->GetContext().GetConstInt(0));
}
// Remove the call and add branch to cloned_entry
for (size_t i = 0; i < call_bb_insts.size(); ++i) {
if (call_bb_insts[i].get() == call) {
for (size_t oi = 0; oi < call->GetNumOperands(); ++oi) {
auto* op = call->GetOperand(oi);
if (auto* op_inst = dynamic_cast<Instruction*>(op)) {
op_inst->RemoveUse(call, oi);
}
}
call_bb_insts.erase(call_bb_insts.begin() + i);
break;
}
}
auto br_to_entry = std::make_unique<BranchInst>(Type::GetVoidType(), cloned_entry);
br_to_entry->SetParent(call_bb);
call_bb_insts.push_back(std::move(br_to_entry));
if (kDebugInline) {
std::cerr << "[Inline] Done inlining " << callee->GetName() << std::endl;
}
return true;
}
} // namespace
void RunInline(Module* module) {
if (!module) return;
std::unordered_map<std::string, int> func_sizes;
std::unordered_set<std::string> recursive_funcs;
for (auto& func : module->GetFunctions()) {
if (func->IsExternal()) continue;
func_sizes[func->GetName()] = CountInstructions(func.get());
if (IsRecursive(func.get())) {
recursive_funcs.insert(func->GetName());
}
}
struct InlineSite {
CallInst* call;
Function* caller;
BasicBlock* call_bb;
};
std::vector<InlineSite> inline_sites;
for (auto& caller : module->GetFunctions()) {
if (caller->IsExternal()) continue;
for (auto& bb : caller->GetBlocks()) {
for (auto& inst : bb->GetInstructions()) {
auto* call = dynamic_cast<CallInst*>(inst.get());
if (!call) continue;
auto* callee = call->GetCallee();
if (!callee) continue;
if (callee->IsExternal()) continue;
if (recursive_funcs.count(callee->GetName())) continue;
auto size_it = func_sizes.find(callee->GetName());
int callee_size = (size_it != func_sizes.end()) ? size_it->second : 9999;
if (callee_size > kMaxInlineSize) continue;
if (callee == caller.get()) continue;
if (callee->GetBlocks().size() > 1 && callee_size > kMaxMultiBlockInlineSize) continue;
inline_sites.push_back({call, caller.get(), bb.get()});
}
}
}
for (auto& site : inline_sites) {
auto* callee = site.call->GetCallee();
if (!callee) continue;
bool still_valid = false;
BasicBlock* actual_bb = nullptr;
for (auto& bb : site.caller->GetBlocks()) {
for (auto& inst : bb->GetInstructions()) {
if (inst.get() == site.call) {
still_valid = true;
actual_bb = bb.get();
break;
}
}
if (still_valid) break;
}
if (!still_valid) continue;
InlineCall(site.call, callee, site.caller, actual_bb, module);
}
}
} // namespace ir

@ -656,16 +656,9 @@ std::any IRGenImpl::visitUnaryExp(SysYParser::UnaryExpContext* ctx) {
}
}
if (callee_name == "starttime" || callee_name == "stoptime") {
int lineno = ctx->getStart()->getLine();
args.push_back(static_cast<ir::Value*>(builder_.CreateConstInt(lineno)));
}
if (args.size() != func_it->second->GetParams().size()) {
if (callee_name != "starttime" && callee_name != "stoptime") {
throw std::runtime_error(
FormatError("irgen", "函数参数个数不匹配: " + callee_name));
}
throw std::runtime_error(
FormatError("irgen", "函数参数个数不匹配: " + callee_name));
}
for (size_t i = 0; i < args.size(); ++i) {
args[i] = CastValueTo(args[i], func_it->second->GetParams()[i]->GetType());

@ -119,11 +119,9 @@ std::any IRGenImpl::visitCompUnit(SysYParser::CompUnitContext* ctx) {
auto* putch = module_.CreateFunction("putch", ir::Type::GetVoidType(), true);
putch->AddParam("%arg.x", ir::Type::GetInt32Type());
function_map_["putch"] = putch;
auto* sysy_starttime = module_.CreateFunction("_sysy_starttime", ir::Type::GetVoidType(), true);
sysy_starttime->AddParam("%arg.lineno", ir::Type::GetInt32Type());
auto* sysy_starttime = module_.CreateFunction("starttime", ir::Type::GetVoidType(), true);
function_map_["starttime"] = sysy_starttime;
auto* sysy_stoptime = module_.CreateFunction("_sysy_stoptime", ir::Type::GetVoidType(), true);
sysy_stoptime->AddParam("%arg.lineno", ir::Type::GetInt32Type());
auto* sysy_stoptime = module_.CreateFunction("stoptime", ir::Type::GetVoidType(), true);
function_map_["stoptime"] = sysy_stoptime;
SysYParser::FuncDefContext* main_func = nullptr;

@ -44,42 +44,17 @@ int main(int argc, char** argv) {
// 执行优化(如果启用)
if (opts.optimize) {
ir::PassManagerModule pass_manager(module.get());
pass_manager.Run();
ir::PassManager pass_manager;
pass_manager.RunScalarOptimizationPasses(module.get());
}
// Debug 模式:验证 IR 合法性
#ifndef NDEBUG
ir::VerifyIR(*module);
#endif
// 汇编输出到文件或标准输出
if (opts.emit_asm) {
auto machine_module = mir::LowerModuleToMIR(*module);
#ifndef NDEBUG
mir::VerifyMIR(*machine_module);
#endif
mir::RunGreedyRegAlloc(*machine_module);
#ifndef NDEBUG
mir::VerifyRegAlloc(*machine_module);
mir::VerifyMIR(*machine_module);
#endif
mir::RunRegAlloc(*machine_module);
mir::RunFrameLowering(*machine_module);
#ifndef NDEBUG
mir::VerifyMIR(*machine_module);
#endif
mir::RunPeephole(*machine_module);
#ifndef NDEBUG
mir::VerifyMIR(*machine_module);
#endif
std::ostringstream asm_ss;
mir::PrintAsm(*machine_module, asm_ss);

@ -42,10 +42,8 @@ namespace mir
case Opcode::StoreStack:
return "stur";
case Opcode::AddRR:
case Opcode::AddImm:
return "add";
case Opcode::SubRR:
case Opcode::SubImm:
return "sub";
case Opcode::MulRR:
return "mul";
@ -79,6 +77,8 @@ namespace mir
return "fcmp";
case Opcode::CSet:
return "cset";
case Opcode::Csneg:
return "csneg";
case Opcode::Scvtf:
return "scvtf";
case Opcode::FCvtzs:
@ -223,11 +223,6 @@ namespace mir
{
continue;
}
// 跳过前导零——直接用移位后的 movz避免浪费 movz #0
if (!emitted && part == 0)
{
continue;
}
if (!emitted)
{
@ -256,36 +251,8 @@ namespace mir
}
}
// ADRP 缓存——避免连续访问同一全局变量时重复发射 ADRP
std::string g_cached_adrp_symbol;
bool g_adrp_cache_valid = false;
// 帧基址缓存——x13 持有 x29 + g_frame_base_offset避免重复计算地址
int g_frame_base_offset = 0;
bool g_frame_base_valid = false;
void InvalidateFrameBase()
{
g_frame_base_valid = false;
}
void InvalidateAdrpCache()
{
g_adrp_cache_valid = false;
}
void EmitStackAdjust(const char *op, int amount, std::ostream &os)
{
if (amount > 12285)
{
InvalidateAdrpCache();
InvalidateFrameBase();
os << " movz x13, #" << (amount & 0xFFFF) << "\n";
if ((amount >> 16) != 0)
os << " movk x13, #" << ((amount >> 16) & 0xFFFF) << ", lsl #16\n";
os << " " << op << " sp, sp, x13\n";
return;
}
while (amount > 0)
{
const int chunk = amount > 4095 ? 4095 : amount;
@ -305,38 +272,6 @@ namespace mir
void EmitAddressFromBase(PhysReg target_xreg, PhysReg base_reg, int offset,
std::ostream &os)
{
// 使用 x13 时ADRP 和帧基址缓存同时失效
if (target_xreg == PrinterScratchXReg())
{
InvalidateAdrpCache();
InvalidateFrameBase();
}
if (offset > 12285)
{
// 使用 x13 作为立即数暂存,必须失效帧基址和 ADRP 缓存
InvalidateAdrpCache();
InvalidateFrameBase();
os << " movz x13, #" << (offset & 0xFFFF) << "\n";
if ((offset >> 16) != 0)
os << " movk x13, #" << ((offset >> 16) & 0xFFFF) << ", lsl #16\n";
os << " add " << PhysRegName(target_xreg) << ", "
<< PhysRegName(base_reg) << ", x13\n";
return;
}
if (offset < -12285)
{
int abs_off = -offset;
// 使用 x13 作为立即数暂存,必须失效帧基址和 ADRP 缓存
InvalidateAdrpCache();
InvalidateFrameBase();
os << " movz x13, #" << (abs_off & 0xFFFF) << "\n";
if ((abs_off >> 16) != 0)
os << " movk x13, #" << ((abs_off >> 16) & 0xFFFF) << ", lsl #16\n";
os << " sub " << PhysRegName(target_xreg) << ", "
<< PhysRegName(base_reg) << ", x13\n";
return;
}
os << " mov " << PhysRegName(target_xreg) << ", "
<< PhysRegName(base_reg) << "\n";
@ -362,7 +297,6 @@ namespace mir
const char *narrow_op = (opcode == Opcode::LoadStack) ? "ldur" : "stur";
const char *wide_op = (opcode == Opcode::LoadStack) ? "ldr" : "str";
// x29 可达的窄范围直接用 ldur/stur
if (offset >= -256 && offset <= 255)
{
os << " " << narrow_op << " ";
@ -372,41 +306,7 @@ namespace mir
}
const PhysReg scratch_xreg = PrinterScratchXReg();
bool is_32bit = IsWReg(reg.GetReg()) || IsSReg(reg.GetReg());
// 尝试帧基址缓存——x13 已持有之前的地址
if (g_frame_base_valid)
{
int diff = offset - g_frame_base_offset;
// ldur/stur范围 ±256
if (diff >= -256 && diff <= 255)
{
os << " " << narrow_op << " ";
PrintOperand(reg, os);
os << ", [" << PhysRegName(scratch_xreg) << ", #" << diff << "]\n";
return;
}
// ldr/str 无符号立即数(正偏移)
if (diff >= 0)
{
int max_imm = is_32bit ? 16380 : 32760;
int align = is_32bit ? 4 : 8;
if (diff <= max_imm && diff % align == 0)
{
os << " " << wide_op << " ";
PrintOperand(reg, os);
os << ", [" << PhysRegName(scratch_xreg) << ", #" << diff << "]\n";
return;
}
}
}
// 缓存未命中——完整计算地址到 x13
EmitAddressFromBase(scratch_xreg, PhysReg::X29, offset, os);
g_frame_base_offset = offset;
g_frame_base_valid = true;
os << " " << wide_op << " ";
PrintOperand(reg, os);
@ -433,18 +333,7 @@ namespace mir
const std::string asm_symbol = NormalizeAsmSymbol(symbol);
const PhysReg scratch_xreg = PrinterScratchXReg();
if (g_adrp_cache_valid && g_cached_adrp_symbol == asm_symbol)
{
// x13 已持有该全局变量的页面地址,跳过 ADRP
}
else
{
os << " adrp " << PhysRegName(scratch_xreg) << ", " << asm_symbol << "\n";
g_cached_adrp_symbol = asm_symbol;
g_adrp_cache_valid = true;
InvalidateFrameBase();
}
os << " adrp " << PhysRegName(scratch_xreg) << ", " << asm_symbol << "\n";
os << " " << (opcode == Opcode::LoadGlobal ? "ldr " : "str ");
PrintOperand(reg, os);
os << ", [" << PhysRegName(scratch_xreg) << ", #:lo12:" << asm_symbol << "]\n";
@ -513,67 +402,25 @@ namespace mir
case Opcode::Prologue:
{
const auto &cs_regs = function.GetCalleeSavedRegs();
const bool is_leaf = !function.HasCall();
const bool no_frame = (function.GetFrameSize() == 0 && cs_regs.empty());
// 叶函数无帧且无 callee-saved 寄存器:完全跳过帧设置
if (is_leaf && no_frame)
{
return;
}
// 叶函数仅保存 x29LR 不会被修改),非叶函数保存 x29+x30
if (is_leaf)
{
os << " str x29, [sp, #-8]!\n";
}
else
{
os << " stp x29, x30, [sp, #-16]!\n";
}
os << " stp x29, x30, [sp, #-16]!\n";
os << " mov x29, sp\n";
if (function.GetFrameSize() > 0)
{
EmitStackAdjust("sub", function.GetFrameSize(), os);
}
// X(64-bit) 和 S(32-bit) 分两组配对 stp
std::vector<PhysReg> x_regs, s_regs;
int cs_offset = 0;
for (auto r : cs_regs)
{
if (r >= PhysReg::X0 && r <= PhysReg::X30)
x_regs.push_back(r);
{
os << " str " << PhysRegName(r) << ", [sp, #" << cs_offset << "]\n";
cs_offset += 8;
}
else if (r >= PhysReg::S0 && r <= PhysReg::S31)
s_regs.push_back(r);
else
x_regs.push_back(r); // 兜底:非 X 非 S 按 X 处理
}
int cs_offset = 0;
for (size_t i = 0; i + 1 < x_regs.size(); i += 2)
{
os << " stp " << PhysRegName(x_regs[i]) << ", "
<< PhysRegName(x_regs[i + 1])
<< ", [sp, #" << cs_offset << "]\n";
cs_offset += 16;
}
if (x_regs.size() % 2 == 1)
{
os << " str " << PhysRegName(x_regs.back())
<< ", [sp, #" << cs_offset << "]\n";
cs_offset += 8;
}
for (size_t i = 0; i + 1 < s_regs.size(); i += 2)
{
os << " stp " << PhysRegName(s_regs[i]) << ", "
<< PhysRegName(s_regs[i + 1])
<< ", [sp, #" << cs_offset << "]\n";
cs_offset += 8;
}
if (s_regs.size() % 2 == 1)
{
os << " str " << PhysRegName(s_regs.back())
<< ", [sp, #" << cs_offset << "]\n";
{
os << " str " << PhysRegName(r) << ", [sp, #" << cs_offset << "]\n";
cs_offset += 4;
}
}
return;
}
@ -581,67 +428,25 @@ namespace mir
case Opcode::Epilogue:
{
const auto &cs_regs = function.GetCalleeSavedRegs();
const bool is_leaf = !function.HasCall();
const bool no_frame = (function.GetFrameSize() == 0 && cs_regs.empty());
// 叶函数无帧且无 callee-saved 寄存器——直接返回
if (is_leaf && no_frame)
{
os << " ret\n";
return;
}
// 恢复 callee-saved 寄存器(叶函数也需要——它们属于调用者)
std::vector<PhysReg> x_regs, s_regs;
int cs_offset = 0;
for (auto r : cs_regs)
{
if (r >= PhysReg::X0 && r <= PhysReg::X30)
x_regs.push_back(r);
{
os << " ldr " << PhysRegName(r) << ", [sp, #" << cs_offset << "]\n";
cs_offset += 8;
}
else if (r >= PhysReg::S0 && r <= PhysReg::S31)
s_regs.push_back(r);
else
x_regs.push_back(r);
}
int cs_offset = 0;
for (size_t i = 0; i + 1 < x_regs.size(); i += 2)
{
os << " ldp " << PhysRegName(x_regs[i]) << ", "
<< PhysRegName(x_regs[i + 1])
<< ", [sp, #" << cs_offset << "]\n";
cs_offset += 16;
}
if (x_regs.size() % 2 == 1)
{
os << " ldr " << PhysRegName(x_regs.back())
<< ", [sp, #" << cs_offset << "]\n";
cs_offset += 8;
}
for (size_t i = 0; i + 1 < s_regs.size(); i += 2)
{
os << " ldp " << PhysRegName(s_regs[i]) << ", "
<< PhysRegName(s_regs[i + 1])
<< ", [sp, #" << cs_offset << "]\n";
cs_offset += 8;
}
if (s_regs.size() % 2 == 1)
{
os << " ldr " << PhysRegName(s_regs.back())
<< ", [sp, #" << cs_offset << "]\n";
{
os << " ldr " << PhysRegName(r) << ", [sp, #" << cs_offset << "]\n";
cs_offset += 4;
}
}
if (function.GetFrameSize() > 0)
{
EmitStackAdjust("add", function.GetFrameSize(), os);
}
if (is_leaf)
{
os << " ldr x29, [sp], #8\n";
}
else
{
os << " ldp x29, x30, [sp], #16\n";
}
os << " ldp x29, x30, [sp], #16\n";
os << " ret\n";
return;
}
@ -854,6 +659,19 @@ namespace mir
}
return;
case Opcode::Csneg:
if (operands.size() >= 4)
{
os << " csneg ";
PrintOperand(operands[0], os);
os << ", ";
PrintOperand(operands[1], os);
os << ", ";
PrintOperand(operands[2], os);
os << ", " << CondCodeToAsm(static_cast<CondCode>(operands[3].GetImm())) << "\n";
}
return;
case Opcode::Smull:
if (operands.size() >= 3)
{
@ -941,11 +759,6 @@ namespace mir
}
return;
case Opcode::Call:
InvalidateAdrpCache();
InvalidateFrameBase(); // x13 是 caller-saved被调用破坏
// 不 break落到 default 让泛型打印机输出 bl 指令
default:
break;
}
@ -973,6 +786,47 @@ namespace mir
for (const auto &global : module.GetGlobals())
{
const std::string asm_name = NormalizeAsmSymbol(global.name);
bool is_zero_init = false;
if (global.kind == MachineGlobal::Kind::I32Scalar && global.init_value == 0)
{
is_zero_init = true;
}
if (global.kind == MachineGlobal::Kind::I32Array)
{
bool all_zero = true;
for (auto v : global.init_values)
{
if (v != 0)
{
all_zero = false;
break;
}
}
if (all_zero)
{
is_zero_init = true;
}
}
if (is_zero_init)
{
os << " .bss\n";
os << " .globl " << asm_name << "\n";
os << " .p2align 2\n";
os << asm_name << ":\n";
if (global.kind == MachineGlobal::Kind::I32Scalar)
{
os << " .space 4\n";
}
else
{
os << " .space " << (global.array_size * 4) << "\n";
}
os << " .data\n";
continue;
}
os << " .globl " << asm_name << "\n";
os << " .p2align 2\n";
os << asm_name << ":\n";
@ -1001,8 +855,6 @@ namespace mir
void PrintAsm(const MachineFunction &function, std::ostream &os)
{
g_adrp_cache_valid = false;
g_frame_base_valid = false;
const std::string asm_name = NormalizeAsmSymbol(function.GetName());
os << " .text\n";
@ -1018,9 +870,6 @@ namespace mir
}
const auto &block = *block_ptr;
// 每个基本块重置缓存——跨块时 x13 可能已被 call/clobber 破坏
g_adrp_cache_valid = false;
g_frame_base_valid = false;
PrintBlockLabelRef(function, block.GetLabelId(), os);
os << ":\n";

@ -8,10 +8,7 @@ add_library(mir_core STATIC
RegAlloc.cpp
FrameLowering.cpp
AsmPrinter.cpp
MIRVerifier.cpp
RegAllocVerifier.cpp
InstLiveness.cpp
GreedyAlloc.cpp
analysis/CFGAnalysis.cpp
)
target_link_libraries(mir_core PUBLIC

@ -59,9 +59,7 @@ namespace mir
{
if (slot.is_callee_stack_arg)
{
// 叶函数仅保存 x298字节非叶函数保存 x29+x3016字节
// 栈参数偏移需根据实际情况调整
slot.offset = (function.HasCall() ? 16 : 8) + slot.offset;
slot.offset = 16 + slot.offset;
}
}
}

@ -115,24 +115,6 @@ namespace mir
}
}
// 交换比较操作数时反转条件码a<b 变成 b>a
static CondCode SwapCondCode(CondCode cond)
{
switch (cond)
{
case CondCode::LT:
return CondCode::GT;
case CondCode::LE:
return CondCode::GE;
case CondCode::GT:
return CondCode::LT;
case CondCode::GE:
return CondCode::LE;
default:
return cond; // EQ/NE 对称
}
}
static PhysReg GetArgWReg(size_t index)
{
static const PhysReg regs[] = {
@ -356,43 +338,16 @@ namespace mir
{
if (IsIntegerCompareOpcode(bin->GetOpcode()))
{
// 常量折叠到 CmpImm消除冗余 MovImm
int lhs_imm, rhs_imm;
bool lhs_const = TryGetConstantInt(bin->GetLhs(), lhs_imm);
bool rhs_const = TryGetConstantInt(bin->GetRhs(), rhs_imm);
auto imm_fits = [](int imm) { return imm >= 0 && imm <= 4095; };
CondCode cond = GetCondCodeForCompareOpcode(bin->GetOpcode());
if (rhs_const && imm_fits(rhs_imm))
{
int lhs = EmitIntValue(bin->GetLhs(), function, value_vregs,
scalar_slots, array_slots, block);
block.Append(Opcode::CmpImm,
{Operand::VReg(lhs, VRegClass::Int), Operand::Imm(rhs_imm)});
}
else if (lhs_const && imm_fits(lhs_imm))
{
int rhs = EmitIntValue(bin->GetRhs(), function, value_vregs,
scalar_slots, array_slots, block);
block.Append(Opcode::CmpImm,
{Operand::VReg(rhs, VRegClass::Int), Operand::Imm(lhs_imm)});
cond = SwapCondCode(cond);
}
else
{
int lhs = EmitIntValue(bin->GetLhs(), function, value_vregs,
scalar_slots, array_slots, block);
int rhs = EmitIntValue(bin->GetRhs(), function, value_vregs,
scalar_slots, array_slots, block);
block.Append(Opcode::CmpRR,
{Operand::VReg(lhs, VRegClass::Int), Operand::VReg(rhs, VRegClass::Int)});
}
int lhs = EmitIntValue(bin->GetLhs(), function, value_vregs,
scalar_slots, array_slots, block);
int rhs = EmitIntValue(bin->GetRhs(), function, value_vregs,
scalar_slots, array_slots, block);
block.Append(Opcode::CmpRR,
{Operand::VReg(lhs, VRegClass::Int), Operand::VReg(rhs, VRegClass::Int)});
int dst = function.CreateVReg(VRegClass::Int);
block.Append(Opcode::CSet,
{Operand::VReg(dst, VRegClass::Int),
Operand::Imm(static_cast<int>(cond))});
Operand::Imm(static_cast<int>(GetCondCodeForCompareOpcode(bin->GetOpcode())))});
value_vregs[value] = dst;
return dst;
}
@ -473,7 +428,101 @@ namespace mir
value_vregs[value] = dst;
return dst;
}
// 2的幂次除法含正负改用 sdiv比移位序列更短
if (val > 0 && (val & (val - 1)) == 0)
{
int shift = 0;
int tmp = val;
while (tmp > 1)
{
tmp >>= 1;
++shift;
}
int bias = (1 << shift) - 1;
int biased = function.CreateVReg(VRegClass::Int);
if (bias <= 4095)
{
block.Append(Opcode::AddRR,
{Operand::VReg(biased, VRegClass::Int),
Operand::VReg(lhs, VRegClass::Int),
Operand::Imm(bias)});
}
else
{
int bias_reg = function.CreateVReg(VRegClass::Int);
block.Append(Opcode::MovImm,
{Operand::VReg(bias_reg, VRegClass::Int),
Operand::Imm(bias)}).SetRematerializable(true).SetRematImm(bias);
block.Append(Opcode::AddRR,
{Operand::VReg(biased, VRegClass::Int),
Operand::VReg(lhs, VRegClass::Int),
Operand::VReg(bias_reg, VRegClass::Int)});
}
block.Append(Opcode::CmpImm,
{Operand::VReg(lhs, VRegClass::Int),
Operand::Imm(0)});
int selected = function.CreateVReg(VRegClass::Int);
block.Append(Opcode::Csel,
{Operand::VReg(selected, VRegClass::Int),
Operand::VReg(biased, VRegClass::Int),
Operand::VReg(lhs, VRegClass::Int),
Operand::Imm(static_cast<int>(CondCode::LT))});
block.Append(Opcode::AsrRR,
{Operand::VReg(dst, VRegClass::Int),
Operand::VReg(selected, VRegClass::Int),
Operand::Imm(shift)});
value_vregs[value] = dst;
return dst;
}
if (val < 0 && (-val & (-val - 1)) == 0 && val != -1)
{
int abs_val = -val;
int shift = 0;
int tmp = abs_val;
while (tmp > 1)
{
tmp >>= 1;
++shift;
}
int bias = (1 << shift) - 1;
int biased = function.CreateVReg(VRegClass::Int);
if (bias <= 4095)
{
block.Append(Opcode::AddRR,
{Operand::VReg(biased, VRegClass::Int),
Operand::VReg(lhs, VRegClass::Int),
Operand::Imm(bias)});
}
else
{
int bias_reg = function.CreateVReg(VRegClass::Int);
block.Append(Opcode::MovImm,
{Operand::VReg(bias_reg, VRegClass::Int),
Operand::Imm(bias)}).SetRematerializable(true).SetRematImm(bias);
block.Append(Opcode::AddRR,
{Operand::VReg(biased, VRegClass::Int),
Operand::VReg(lhs, VRegClass::Int),
Operand::VReg(bias_reg, VRegClass::Int)});
}
block.Append(Opcode::CmpImm,
{Operand::VReg(lhs, VRegClass::Int),
Operand::Imm(0)});
int selected = function.CreateVReg(VRegClass::Int);
block.Append(Opcode::Csel,
{Operand::VReg(selected, VRegClass::Int),
Operand::VReg(biased, VRegClass::Int),
Operand::VReg(lhs, VRegClass::Int),
Operand::Imm(static_cast<int>(CondCode::LT))});
int pos_q = function.CreateVReg(VRegClass::Int);
block.Append(Opcode::AsrRR,
{Operand::VReg(pos_q, VRegClass::Int),
Operand::VReg(selected, VRegClass::Int),
Operand::Imm(shift)});
block.Append(Opcode::NegRR,
{Operand::VReg(dst, VRegClass::Int),
Operand::VReg(pos_q, VRegClass::Int)});
value_vregs[value] = dst;
return dst;
}
}
}
@ -483,43 +532,128 @@ namespace mir
if (rhs_const)
{
int val = rhs_const->GetValue();
// x % 1 == 0, x % -1 == 0
if (val == 1 || val == -1)
if (val > 0 && (val & (val - 1)) == 0)
{
int bias = val - 1;
int biased = function.CreateVReg(VRegClass::Int);
if (bias <= 4095)
{
block.Append(Opcode::AddRR,
{Operand::VReg(biased, VRegClass::Int),
Operand::VReg(lhs, VRegClass::Int),
Operand::Imm(bias)});
}
else
{
int bias_reg = function.CreateVReg(VRegClass::Int);
block.Append(Opcode::MovImm,
{Operand::VReg(bias_reg, VRegClass::Int),
Operand::Imm(bias)}).SetRematerializable(true).SetRematImm(bias);
block.Append(Opcode::AddRR,
{Operand::VReg(biased, VRegClass::Int),
Operand::VReg(lhs, VRegClass::Int),
Operand::VReg(bias_reg, VRegClass::Int)});
}
int shift = 0;
int tmp = val;
while (tmp > 1)
{
tmp >>= 1;
++shift;
}
block.Append(Opcode::CmpImm,
{Operand::VReg(lhs, VRegClass::Int),
Operand::Imm(0)});
int selected = function.CreateVReg(VRegClass::Int);
block.Append(Opcode::Csel,
{Operand::VReg(selected, VRegClass::Int),
Operand::VReg(biased, VRegClass::Int),
Operand::VReg(lhs, VRegClass::Int),
Operand::Imm(static_cast<int>(CondCode::LT))});
int q_dst = function.CreateVReg(VRegClass::Int);
block.Append(Opcode::AsrRR,
{Operand::VReg(q_dst, VRegClass::Int),
Operand::VReg(selected, VRegClass::Int),
Operand::Imm(shift)});
int d_reg = function.CreateVReg(VRegClass::Int);
block.Append(Opcode::MovImm,
{Operand::VReg(d_reg, VRegClass::Int),
Operand::Imm(val)}).SetRematerializable(true).SetRematImm(val);
block.Append(Opcode::Msub,
{Operand::VReg(dst, VRegClass::Int),
Operand::Imm(0)}).SetRematerializable(true).SetRematImm(0);
Operand::VReg(q_dst, VRegClass::Int),
Operand::VReg(d_reg, VRegClass::Int),
Operand::VReg(lhs, VRegClass::Int)});
value_vregs[value] = dst;
return dst;
}
if (val < 0 && (-val & (-val - 1)) == 0 && val != -1)
{
int abs_val = -val;
int bias = abs_val - 1;
int biased = function.CreateVReg(VRegClass::Int);
if (bias <= 4095)
{
block.Append(Opcode::AddRR,
{Operand::VReg(biased, VRegClass::Int),
Operand::VReg(lhs, VRegClass::Int),
Operand::Imm(bias)});
}
else
{
int bias_reg = function.CreateVReg(VRegClass::Int);
block.Append(Opcode::MovImm,
{Operand::VReg(bias_reg, VRegClass::Int),
Operand::Imm(bias)}).SetRematerializable(true).SetRematImm(bias);
block.Append(Opcode::AddRR,
{Operand::VReg(biased, VRegClass::Int),
Operand::VReg(lhs, VRegClass::Int),
Operand::VReg(bias_reg, VRegClass::Int)});
}
int shift = 0;
int tmp = abs_val;
while (tmp > 1)
{
tmp >>= 1;
++shift;
}
block.Append(Opcode::CmpImm,
{Operand::VReg(lhs, VRegClass::Int),
Operand::Imm(0)});
int selected = function.CreateVReg(VRegClass::Int);
block.Append(Opcode::Csel,
{Operand::VReg(selected, VRegClass::Int),
Operand::VReg(biased, VRegClass::Int),
Operand::VReg(lhs, VRegClass::Int),
Operand::Imm(static_cast<int>(CondCode::LT))});
int asr_result = function.CreateVReg(VRegClass::Int);
block.Append(Opcode::AsrRR,
{Operand::VReg(asr_result, VRegClass::Int),
Operand::VReg(selected, VRegClass::Int),
Operand::Imm(shift)});
int q_dst = function.CreateVReg(VRegClass::Int);
block.Append(Opcode::NegRR,
{Operand::VReg(q_dst, VRegClass::Int),
Operand::VReg(asr_result, VRegClass::Int)});
int d_reg = function.CreateVReg(VRegClass::Int);
block.Append(Opcode::MovImm,
{Operand::VReg(d_reg, VRegClass::Int),
Operand::Imm(val)}).SetRematerializable(true).SetRematImm(val);
block.Append(Opcode::Msub,
{Operand::VReg(dst, VRegClass::Int),
Operand::VReg(q_dst, VRegClass::Int),
Operand::VReg(d_reg, VRegClass::Int),
Operand::VReg(lhs, VRegClass::Int)});
value_vregs[value] = dst;
return dst;
}
// 2的幂次取模含正负改用 ModRRsdiv+msub比移位序列更短
}
}
// Add/Sub 常量折叠到立即数操作码
int rhs_imm_val;
bool rhs_is_imm = false;
if ((opcode == Opcode::AddRR || opcode == Opcode::SubRR) &&
bin->GetRhs() && TryGetConstantInt(bin->GetRhs(), rhs_imm_val) &&
rhs_imm_val >= 0 && rhs_imm_val <= 4095)
{
rhs_is_imm = true;
if (opcode == Opcode::AddRR)
opcode = Opcode::AddImm;
else
opcode = Opcode::SubImm;
block.Append(opcode,
{Operand::VReg(dst, VRegClass::Int),
Operand::VReg(lhs, VRegClass::Int),
Operand::Imm(rhs_imm_val)});
}
else
{
block.Append(opcode,
{Operand::VReg(dst, VRegClass::Int),
Operand::VReg(lhs, VRegClass::Int),
Operand::VReg(rhs, VRegClass::Int)});
}
block.Append(opcode,
{Operand::VReg(dst, VRegClass::Int),
Operand::VReg(lhs, VRegClass::Int),
Operand::VReg(rhs, VRegClass::Int)});
value_vregs[value] = dst;
return dst;
}
@ -824,35 +958,12 @@ namespace mir
return;
}
// 常量折叠到 CmpImm
int lhs_imm, rhs_imm;
bool lhs_const = TryGetConstantInt(bin.GetLhs(), lhs_imm);
bool rhs_const = TryGetConstantInt(bin.GetRhs(), rhs_imm);
auto imm_fits = [](int imm) { return imm >= 0 && imm <= 4095; };
if (rhs_const && imm_fits(rhs_imm))
{
int lhs = EmitIntValue(bin.GetLhs(), function, value_vregs,
scalar_slots, array_slots, block);
block.Append(Opcode::CmpImm,
{Operand::VReg(lhs, VRegClass::Int), Operand::Imm(rhs_imm)});
}
else if (lhs_const && imm_fits(lhs_imm))
{
int rhs = EmitIntValue(bin.GetRhs(), function, value_vregs,
scalar_slots, array_slots, block);
block.Append(Opcode::CmpImm,
{Operand::VReg(rhs, VRegClass::Int), Operand::Imm(lhs_imm)});
}
else
{
int lhs = EmitIntValue(bin.GetLhs(), function, value_vregs,
scalar_slots, array_slots, block);
int rhs = EmitIntValue(bin.GetRhs(), function, value_vregs,
scalar_slots, array_slots, block);
block.Append(Opcode::CmpRR,
{Operand::VReg(lhs, VRegClass::Int), Operand::VReg(rhs, VRegClass::Int)});
}
int lhs = EmitIntValue(bin.GetLhs(), function, value_vregs,
scalar_slots, array_slots, block);
int rhs = EmitIntValue(bin.GetRhs(), function, value_vregs,
scalar_slots, array_slots, block);
block.Append(Opcode::CmpRR,
{Operand::VReg(lhs, VRegClass::Int), Operand::VReg(rhs, VRegClass::Int)});
}
static bool TryEmitCondValueToFlags(const ir::Value *value,
@ -1565,7 +1676,6 @@ namespace mir
}
block.Append(Opcode::Call, {Operand::Symbol(callee->GetName())});
function.SetHasCall();
if (aligned_stack_arg_bytes > 0)
{

File diff suppressed because it is too large Load Diff

@ -0,0 +1,26 @@
--- src/mir/Lowering.cpp
+++ src/mir/Lowering.cpp
@@ -339,10 +339,19 @@
{
if (IsIntegerCompareOpcode(bin->GetOpcode()))
{
int lhs = EmitIntValue(bin->GetLhs(), function, value_vregs,
scalar_slots, array_slots, block);
- int rhs = EmitIntValue(bin->GetRhs(), function, value_vregs,
- scalar_slots, array_slots, block);
- block.Append(Opcode::CmpRR,
- {Operand::VReg(lhs, VRegClass::Int), Operand::VReg(rhs, VRegClass::Int)});
+ int rhs_imm;
+ if (TryGetConstantInt(bin->GetRhs(), rhs_imm))
+ {
+ block.Append(Opcode::CmpImm,
+ {Operand::VReg(lhs, VRegClass::Int), Operand::Imm(rhs_imm)});
+ }
+ else
+ {
+ int rhs = EmitIntValue(bin->GetRhs(), function, value_vregs,
+ scalar_slots, array_slots, block);
+ block.Append(Opcode::CmpRR,
+ {Operand::VReg(lhs, VRegClass::Int), Operand::VReg(rhs, VRegClass::Int)});
+ }

@ -15,22 +15,4 @@ namespace mir
return instructions_.back();
}
void MachineBasicBlock::InsertInst(int local_idx, MachineInstr inst)
{
if (local_idx < 0 || local_idx > (int)instructions_.size()) return;
instructions_.insert(instructions_.begin() + local_idx, std::move(inst));
}
void MachineBasicBlock::ReplaceVReg(int local_idx, int old_vreg,
int new_vreg)
{
if (local_idx < 0 || local_idx >= (int)instructions_.size()) return;
for (auto &op : instructions_[local_idx].GetOperands())
{
if (op.GetKind() == Operand::Kind::VReg &&
op.GetVRegId() == old_vreg)
op = Operand::VReg(new_vreg, op.GetVRegClass());
}
}
} // namespace mir

File diff suppressed because it is too large Load Diff

@ -0,0 +1,177 @@
#include "mir/analysis/CFGAnalysis.h"
#include "mir/MIR.h"
namespace mir
{
namespace
{
MachineBasicBlock *FindBlockByLabel(MachineFunction &function,
int label_id)
{
if (label_id < 0)
return nullptr;
for (auto &block : function.GetBlocks())
{
if (block && block->GetLabelId() == label_id)
return block.get();
}
return nullptr;
}
void BuildSuccessors(MachineFunction &function,
CFGAnalysisResult &result)
{
for (auto &block : function.GetBlocks())
{
if (!block)
continue;
const auto &insts = block->GetInstructions();
if (insts.empty())
continue;
auto &succs = result.successors[block.get()];
for (const auto &inst : insts)
{
if (inst.GetOpcode() == Opcode::Br)
{
const auto &ops = inst.GetOperands();
if (!ops.empty() && ops[0].GetKind() == Operand::Kind::Label)
{
auto *target = FindBlockByLabel(function, ops[0].GetLabel());
if (target)
{
bool dup = false;
for (auto *s : succs)
if (s == target)
{
dup = true;
break;
}
if (!dup)
succs.push_back(target);
}
}
}
else if (inst.GetOpcode() == Opcode::CondBr)
{
const auto &ops = inst.GetOperands();
if (ops.size() >= 2 && ops[1].GetKind() == Operand::Kind::Label)
{
auto *target = FindBlockByLabel(function, ops[1].GetLabel());
if (target)
{
bool dup = false;
for (auto *s : succs)
if (s == target)
{
dup = true;
break;
}
if (!dup)
succs.push_back(target);
}
}
}
}
}
}
void BuildPredecessors(CFGAnalysisResult &result)
{
for (auto &kv : result.successors)
{
auto *src = kv.first;
for (auto *dst : kv.second)
{
result.predecessors[dst].push_back(src);
}
}
}
void BuildEdges(CFGAnalysisResult &result)
{
for (auto &kv : result.successors)
{
auto *src = kv.first;
for (auto *dst : kv.second)
{
CFGEdge edge;
edge.src = src;
edge.dst = dst;
result.edges.push_back(edge);
}
}
}
void EstimateBlockFrequencies(MachineFunction &function,
CFGAnalysisResult &result)
{
if (function.GetBlocks().empty())
return;
auto *entry = function.GetEntryPtr();
if (!entry)
return;
result.block_freq[entry] = 1.0;
for (auto &block : function.GetBlocks())
{
if (block && block.get() != entry)
result.block_freq[block.get()] = 0.0;
}
for (int iter = 0; iter < 20; ++iter)
{
for (auto &block : function.GetBlocks())
{
if (!block)
continue;
auto it = result.successors.find(block.get());
if (it == result.successors.end() || it->second.empty())
continue;
double freq = result.block_freq[block.get()];
if (freq <= 0.0)
continue;
double per_succ = freq / static_cast<double>(it->second.size());
for (auto *succ : it->second)
{
result.block_freq[succ] += per_succ;
}
}
}
}
void ComputeEdgeWeights(CFGAnalysisResult &result)
{
for (auto &edge : result.edges)
{
auto it = result.successors.find(edge.src);
if (it == result.successors.end() || it->second.empty())
continue;
double src_freq = 0.0;
auto fit = result.block_freq.find(edge.src);
if (fit != result.block_freq.end())
src_freq = fit->second;
edge.weight = src_freq / static_cast<double>(it->second.size());
}
}
} // namespace
CFGAnalysisResult AnalyzeCFG(MachineFunction &function)
{
CFGAnalysisResult result;
BuildSuccessors(function, result);
BuildPredecessors(result);
BuildEdges(result);
EstimateBlockFrequencies(function, result);
ComputeEdgeWeights(result);
return result;
}
} // namespace mir
Loading…
Cancel
Save