You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
nudt-compiler-cpp/include/mir/MIR.h

395 lines
13 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#pragma once
#include <cstdint>
#include <initializer_list>
#include <iosfwd>
#include <memory>
#include <set>
#include <string>
#include <unordered_map>
#include <vector>
namespace ir {
class Module;
}
namespace mir {
class MIRContext {
public:
MIRContext() = default;
};
MIRContext& DefaultContext();
enum class PhysReg {
//W0, W8, W9, X29, X30, SP
// 32位通用寄存器
W0, W1, W2, W3, W4, W5, W6, W7, // 参数传递/临时
W8, W9, // 临时寄存器(当前主要使用)
W10, W11, W12, W13, W14, W15, // 临时寄存器(扩展)
W16, W17, // intra-procedure-call 临时
W18, // 平台预留
W19, W20, W21, W22, W23, W24, // 被调用者保存(扩展用)
W25, W26, W27, W28, // 被调用者保存
W29, // 帧指针 (FP)
W30, // 链接寄存器 (LR)
// 64位版本
X0, X1, X2, X3, X4, X5, X6, X7,
X8, X9, X10, X11, X12, X13, X14, X15,
X16, X17, X18,
X19, X20, X21, X22, X23, X24, X25, X26, X27, X28,
X29, // FP
X30, // LR
// 浮点寄存器 (32位)
S0, S1, S2, S3, S4, S5, S6, S7,
S8, S9, S10, S11, S12, S13, S14, S15,
S16, S17, S18, S19, S20, S21, S22, S23,
S24, S25, S26, S27, S28, S29, S30, S31,
// 特殊寄存器
SP, // 栈指针
ZR, // 零寄存器
};
const char* PhysRegName(PhysReg reg);
// ========== 条件码枚举(用于 BCond 指令)==========
enum class CondCode {
EQ, // 相等 (equal)
NE, // 不等 (not equal)
CS, // 进位设置 (carry set) / 无符号大于等于
CC, // 进位清除 (carry clear) / 无符号小于
MI, // 负数 (minus)
PL, // 非负数 (plus)
VS, // 溢出 (overflow set)
VC, // 无溢出 (overflow clear)
HI, // 无符号大于 (higher)
LS, // 无符号小于等于 (lower or same)
GE, // 有符号大于等于 (greater or equal)
LT, // 有符号小于 (less than)
GT, // 有符号大于 (greater than)
LE, // 有符号小于等于 (less or equal)
AL, // 总是 (always)
};
const char* CondCodeName(CondCode cc);
// ========== MIR 指令操作码枚举 ==========
enum class Opcode {
// ---------- 栈帧相关 ----------
Prologue, // 函数序言(伪指令)
Epilogue, // 函数尾声(伪指令)
// ---------- 数据传输 ----------
MovImm, // 立即数移动到寄存器: MOV w8, #imm
MovReg, // 寄存器之间移动: MOV w8, w9
LoadStack, // 从栈槽加载: LDR w8, [sp, #offset]
StoreStack, // 存储到栈槽: STR w8, [sp, #offset]
LoadStackPair,// 成对加载: LDP x29, x30, [sp], #16
StoreStackPair,// 成对存储: STP x29, x30, [sp, #-16]!
// ---------- 整数算术运算 ----------
AddRR, // 加法: ADD w8, w8, w9
AddRI, // 加法(立即数): ADD w8, w8, #imm
SubRR, // 减法: SUB w8, w8, w9
SubRI, // 减法(立即数): SUB w8, w8, #imm
MulRR, // 乘法: MUL w8, w8, w9
SDivRR, // 有符号除法: SDIV w8, w8, w9
UDivRR, // 无符号除法: UDIV w8, w8, w9
// ---------- 浮点算术运算 ----------
FAddRR, // 浮点加法: FADD s0, s0, s1
FSubRR, // 浮点减法: FSUB s0, s0, s1
FMulRR, // 浮点乘法: FMUL s0, s0, s1
FDivRR, // 浮点除法: FDIV s0, s0, s1
// ---------- 比较运算 ----------
CmpRR, // 比较(寄存器): CMP w8, w9
CmpRI, // 比较(立即数): CMP w8, #imm
FCmpRR, // 浮点比较: FCMP s0, s1
// ---------- 类型转换 ----------
SIToFP, // 有符号整数转浮点: SCVTF s0, w0
FPToSI, // 浮点转有符号整数: FCVTZS w0, s0
ZExt, // 零扩展i1 -> i32: AND w8, w8, #1
// ---------- 控制流 ----------
B, // 无条件跳转: B label
BCond, // 条件跳转: B.EQ label, B.NE label, B.GT label 等
Call, // 函数调用: BL target
Ret, // 函数返回: RET
// ---------- 逻辑运算 ----------
AndRR, // 按位与: AND w8, w8, w9
OrRR, // 按位或: ORR w8, w8, w9
EorRR, // 按位异或: EOR w8, w8, w9
LslRR, // 逻辑左移: LSL w8, w8, w9
LsrRR, // 逻辑右移: LSR w8, w8, w9
AsrRR, // 算术右移: ASR w8, w8, w9
// ---------- 特殊 ----------
Nop, // 空操作: NOP
Label, // 内联标签,不生成实际指令,仅输出标签名
// 添加
Movk, // movk Rd, #imm16, lsl #shift
// 添加
LoadStackAddr, // 将栈帧地址加载到寄存器 (add xd, sp, #offset)
// 用于全局变量地址计算
Adrp, // ADRP Xd, label
AddLabel, // ADD Xd, Xn, :lo12:label
// 新增
Sxtw, // 符号扩展字到双字sxtw Xd, Wn
};
// ========== 操作数类 ==========
class Operand {
public:
enum class Kind { Reg, VReg, Imm, FrameIndex, Cond, Label };
static Operand Reg(PhysReg reg);
static Operand VReg(int id);
static Operand Imm(int value);
static Operand FrameIndex(int index);
static Operand Cond(CondCode cc);
static Operand Label(const std::string& label);
Kind GetKind() const { return kind_; }
PhysReg GetReg() const { return reg_; }
int GetVReg() const { return imm_; }
int GetImm() const { return imm_; }
int GetFrameIndex() const { return imm_; }
CondCode GetCondCode() const { return cc_; }
const std::string& GetLabel() const { return label_; }
bool IsVReg() const { return kind_ == Kind::VReg; }
bool IsPhysReg() const { return kind_ == Kind::Reg; }
private:
Operand(Kind kind, PhysReg reg, int imm, CondCode cc, const std::string& label);
Kind kind_;
PhysReg reg_;
int imm_;
CondCode cc_;
std::string label_;
};
// ========== MIR 指令类 ==========
class MachineInstr {
public:
MachineInstr(Opcode opcode, std::vector<Operand> operands = {});
Opcode GetOpcode() const { return opcode_; }
const std::vector<Operand>& GetOperands() const { return operands_; }
std::vector<Operand>& GetOperands() { return operands_; }
// def/use 信息(用于活跃性分析)
const std::vector<int>& GetDefs() const { return defs_; }
const std::vector<int>& GetUses() const { return uses_; }
std::vector<int>& GetDefs() { return defs_; }
std::vector<int>& GetUses() { return uses_; }
void AddDef(int vreg) { defs_.push_back(vreg); }
void AddUse(int vreg) { uses_.push_back(vreg); }
// 指令分类
bool IsCall() const { return opcode_ == Opcode::Call; }
bool IsTerminator() const {
return opcode_ == Opcode::B || opcode_ == Opcode::BCond || opcode_ == Opcode::Ret;
}
bool IsMove() const { return opcode_ == Opcode::MovReg; }
private:
Opcode opcode_;
std::vector<Operand> operands_;
std::vector<int> defs_;
std::vector<int> uses_;
};
// ========== 栈槽结构 ==========
struct FrameSlot {
int index = 0;
int size = 4;
int offset = 0;
};
// ========== MIR 基本块 ==========
class MachineBasicBlock {
public:
explicit MachineBasicBlock(std::string name);
const std::string& GetName() const { return name_; }
std::vector<MachineInstr>& GetInstructions() { return instructions_; }
const std::vector<MachineInstr>& GetInstructions() const { return instructions_; }
MachineInstr& Append(Opcode opcode,
std::initializer_list<Operand> operands = {});
MachineInstr& Append(Opcode opcode, std::vector<Operand> operands);
// 控制流信息
std::vector<MachineBasicBlock*>& GetSuccessors() { return successors_; }
const std::vector<MachineBasicBlock*>& GetSuccessors() const { return successors_; }
void AddSuccessor(MachineBasicBlock* succ) { successors_.push_back(succ); }
std::vector<MachineBasicBlock*>& GetPredecessors() { return predecessors_; }
const std::vector<MachineBasicBlock*>& GetPredecessors() const { return predecessors_; }
void AddPredecessor(MachineBasicBlock* pred) { predecessors_.push_back(pred); }
private:
std::string name_;
std::vector<MachineInstr> instructions_;
std::vector<MachineBasicBlock*> successors_;
std::vector<MachineBasicBlock*> predecessors_;
};
// ========== MIR 函数 ==========
class MachineFunction {
public:
explicit MachineFunction(std::string name);
const std::string& GetName() const { return name_; }
// 基本块管理
MachineBasicBlock& GetEntry() { return entry_; }
const MachineBasicBlock& GetEntry() const { return entry_; }
std::vector<std::unique_ptr<MachineBasicBlock>>& GetBasicBlocks() {
return basic_blocks_;
}
const std::vector<std::unique_ptr<MachineBasicBlock>>& GetBasicBlocks() const {
return basic_blocks_;
}
void AddBasicBlock(std::unique_ptr<MachineBasicBlock> bb) {
basic_blocks_.push_back(std::move(bb));
}
MachineBasicBlock* GetBlockByName(const std::string& name) {
for (auto& bb : basic_blocks_) {
if (bb->GetName() == name) return bb.get();
}
return nullptr;
}
// 栈槽管理
int CreateFrameIndex(int size = 4);
FrameSlot& GetFrameSlot(int index);
const FrameSlot& GetFrameSlot(int index) const;
std::vector<FrameSlot>& GetFrameSlots() { return frame_slots_; }
const std::vector<FrameSlot>& GetFrameSlots() const { return frame_slots_; }
// 栈帧大小
int GetFrameSize() const { return frame_size_; }
void SetFrameSize(int size) { frame_size_ = size; }
// callee-saved 寄存器管理
void MarkCalleeSaved(PhysReg reg) { used_callee_saved_regs_.insert(reg); }
const std::set<PhysReg>& GetCalleeSavedRegs() const { return used_callee_saved_regs_; }
bool IsCalleeSavedUsed(PhysReg reg) const {
return used_callee_saved_regs_.count(reg) > 0;
}
// spill 槽管理
int CreateSpillSlot(int size = 4);
bool IsSpillSlot(int index) const;
// vreg 类型管理(由 Lowering 填充RA 使用)
enum class VRegType : uint8_t { kInt32 = 0, kInt64 = 1, kFloat32 = 2 };
void SetVRegType(int vreg, VRegType type) { vreg_types_[vreg] = type; }
VRegType GetVRegType(int vreg) const {
auto it = vreg_types_.find(vreg);
return it != vreg_types_.end() ? it->second : VRegType::kInt32;
}
bool HasVRegType(int vreg) const { return vreg_types_.count(vreg) > 0; }
private:
std::string name_;
MachineBasicBlock entry_;
std::vector<std::unique_ptr<MachineBasicBlock>> basic_blocks_;
std::vector<FrameSlot> frame_slots_;
std::set<int> spill_slot_indices_;
int frame_size_ = 0;
std::set<PhysReg> used_callee_saved_regs_;
std::unordered_map<int, VRegType> vreg_types_;
};
// ========== MIR 模块 ==========
class MachineModule {
public:
MachineModule() = default;
// 添加 MachineFunction
void AddFunction(std::unique_ptr<MachineFunction> func) {
functions_.push_back(std::move(func));
}
// 获取所有函数
const std::vector<std::unique_ptr<MachineFunction>>& GetFunctions() const {
return functions_;
}
std::vector<std::unique_ptr<MachineFunction>>& GetFunctions() {
return functions_;
}
// 根据名称查找函数
MachineFunction* GetFunction(const std::string& name) {
for (auto& func : functions_) {
if (func->GetName() == name) {
return func.get();
}
}
return nullptr;
}
const MachineFunction* GetFunction(const std::string& name) const {
for (const auto& func : functions_) {
if (func->GetName() == name) {
return func.get();
}
}
return nullptr;
}
struct GlobalDecl {
std::string name;
int size; // 字节大小
int alignment; // 对齐要求(通常为 4 或 8
bool is_zero_init; // 是否为零初始化
bool has_init_data; // 是否包含初始化数据(用于标量常量)
uint64_t init_data; // 初始化数据≤8字节
// 构造函数,默认零初始化
GlobalDecl(const std::string& n, int sz, int align, bool zero = true,
bool has_data = false, uint64_t data = 0)
: name(n), size(sz), alignment(align), is_zero_init(zero),
has_init_data(has_data), init_data(data) {}
};
void AddGlobal(const std::string& name, int size, int alignment,
bool is_zero_init = true,
bool has_init_data = false, uint64_t init_data = 0) {
globals_.emplace_back(name, size, alignment, is_zero_init,
has_init_data, init_data);
}
const std::vector<GlobalDecl>& GetGlobals() const { return globals_; }
private:
std::vector<std::unique_ptr<MachineFunction>> functions_;
std::vector<GlobalDecl> globals_;
};
// ========== 后端流程函数 ==========
std::unique_ptr<MachineModule> LowerToMIR(const ir::Module& module);
void RunRegAlloc(MachineModule& module);
void RunMIRPasses(MachineModule& module);
void RunFrameLowering(MachineModule& module);
void PrintAsm(const MachineModule& module, std::ostream& os);
} // namespace mir