diff --git a/include/ir/IR.h b/include/ir/IR.h index b961192..f0b0738 100644 --- a/include/ir/IR.h +++ b/include/ir/IR.h @@ -1,318 +1,686 @@ -// 当前只支撑 i32、i32*、void 以及最小的内存/算术指令,演示用。 -// -// 当前已经实现: -// 1. 基础类型系统:void / i32 / i32* -// 2. Value 体系:Value / ConstantValue / ConstantInt / Function / BasicBlock / User / GlobalValue / Instruction -// 3. 最小指令集:Add / Alloca / Load / Store / Ret -// 4. BasicBlock / Function / Module 三层组织结构 -// 5. IRBuilder:便捷创建常量和最小指令 -// 6. def-use 关系的轻量实现: -// - Instruction 保存 operand 列表 -// - Value 保存 uses -// - 支持 ReplaceAllUsesWith 的简化实现 -// -// 当前尚未实现或只做了最小占位: -// 1. 完整类型系统:数组、函数类型、label 类型等 -// 2. 更完整的指令系统:br / condbr / call / phi / gep 等 -// 3. 更成熟的 Use 管理(例如 LLVM 风格的双向链式结构) -// 4. 更完整的 IR verifier 和优化基础设施 -// -// 当前需要特别说明的两个简化点: -// 1. BasicBlock 虽然已经纳入 Value 体系,但其类型目前仍用 void 作为占位, -// 后续如果补 label type,可以再改成更合理的块标签类型。 -// 2. ConstantValue 体系目前只实现了 ConstantInt,后续可以继续补 ConstantFloat、 -// ConstantArray等更完整的常量种类。 -// -// 建议的扩展顺序: -// 1. 先补更多指令和类型 -// 2. 再补控制流相关 IR -// 3. 最后再考虑把 Value/User/Use 进一步抽象成更完整的框架 - +// IR_try.h (修正版) #pragma once +#include "utils.h" +#include #include #include #include #include #include #include +#include #include +#include namespace ir { -class Type; +// 前向声明 class Value; class User; -class ConstantValue; -class ConstantInt; -class GlobalValue; -class Instruction; class BasicBlock; class Function; +class Instruction; +class ConstantInt; +class ConstantFloat; +class ConstantI1; +class ConstantArrayValue; +class Type; -// Use 表示一个 Value 的一次使用记录。 -// 当前实现设计: -// - value:被使用的值 -// - user:使用该值的 User -// - operand_index:该值在 user 操作数列表中的位置 - +// ----------------------------------------------------------------------------- +// Use +// ----------------------------------------------------------------------------- class Use { - public: - Use() = default; - Use(Value* value, User* user, size_t operand_index) - : value_(value), user_(user), operand_index_(operand_index) {} - - Value* GetValue() const { return value_; } - User* GetUser() const { return user_; } - size_t GetOperandIndex() const { return operand_index_; } - - void SetValue(Value* value) { value_ = value; } - void SetUser(User* user) { user_ = user; } - void SetOperandIndex(size_t operand_index) { operand_index_ = operand_index; } - - private: - Value* value_ = nullptr; - User* user_ = nullptr; - size_t operand_index_ = 0; +public: + Use() = default; + Use(Value* value, User* user, size_t operand_index) + : value_(value), user_(user), operand_index_(operand_index) {} + + Value* GetValue() const { return value_; } + User* GetUser() const { return user_; } + size_t GetOperandIndex() const { return operand_index_; } + + void SetValue(Value* value) { value_ = value; } + void SetUser(User* user) { user_ = user; } + void SetOperandIndex(size_t operand_index) { operand_index_ = operand_index; } + +private: + Value* value_ = nullptr; + User* user_ = nullptr; + size_t operand_index_ = 0; }; -// IR 上下文:集中管理类型、常量等共享资源,便于复用与扩展。 +// ----------------------------------------------------------------------------- +// Context +// ----------------------------------------------------------------------------- class Context { - public: - Context() = default; - ~Context(); - // 去重创建 i32 常量。 - ConstantInt* GetConstInt(int v); +public: + Context() = default; + ~Context(); - std::string NextTemp(); + ConstantInt* GetConstInt(int v); + std::string NextTemp(); - private: - std::unordered_map> const_ints_; - int temp_index_ = -1; +private: + std::unordered_map> const_ints_; + int temp_index_ = -1; }; +// ----------------------------------------------------------------------------- +// Type +// ----------------------------------------------------------------------------- class Type { - public: - enum class Kind { Void, Int32, PtrInt32 }; - explicit Type(Kind k); - // 使用静态共享对象获取类型。 - // 同一类型可直接比较返回值是否相等,例如: - // Type::GetInt32Type() == Type::GetInt32Type() - static const std::shared_ptr& GetVoidType(); - static const std::shared_ptr& GetInt32Type(); - static const std::shared_ptr& GetPtrInt32Type(); - Kind GetKind() const; - bool IsVoid() const; - bool IsInt32() const; - bool IsPtrInt32() const; - - private: - Kind kind_; +public: + enum class Kind { Void, Int1, Int32, Float, Label, Function, PtrInt32, Array }; + + explicit Type(Kind k); + + static const std::shared_ptr& GetVoidType(); + static const std::shared_ptr& GetInt1Type(); + static const std::shared_ptr& GetInt32Type(); + static const std::shared_ptr& GetFloatType(); + static const std::shared_ptr& GetLabelType(); + static const std::shared_ptr& GetFunctionType(); + static const std::shared_ptr& GetBoolType(); + static const std::shared_ptr& GetPtrInt32Type(); + static const std::shared_ptr& GetArrayType(); + + Kind GetKind() const { return kind_; } + bool IsVoid() const { return kind_ == Kind::Void; } + bool IsInt1() const { return kind_ == Kind::Int1; } + bool IsInt32() const { return kind_ == Kind::Int32; } + bool IsFloat() const { return kind_ == Kind::Float; } + bool IsLabel() const { return kind_ == Kind::Label; } + bool IsFunction() const { return kind_ == Kind::Function; } + bool IsBool() const { return kind_ == Kind::Int1; } // 通常与 Int1 相同 + bool IsPtrInt32() const { return kind_ == Kind::PtrInt32; } + bool IsArray() const { return kind_ == Kind::Array; } + + int GetSize() const; + void Print(std::ostream& os) const; + + template + std::enable_if_t, T*> As() const { + return dynamic_cast(const_cast(this)); + } + +private: + Kind kind_; }; +// ----------------------------------------------------------------------------- +// Value +// ----------------------------------------------------------------------------- class Value { - public: - Value(std::shared_ptr ty, std::string name); - virtual ~Value() = default; - const std::shared_ptr& GetType() const; - const std::string& GetName() const; - void SetName(std::string n); - bool IsVoid() const; - bool IsInt32() const; - bool IsPtrInt32() const; - bool IsConstant() const; - bool IsInstruction() const; - bool IsUser() const; - bool IsFunction() const; - void AddUse(User* user, size_t operand_index); - void RemoveUse(User* user, size_t operand_index); - const std::vector& GetUses() const; - void ReplaceAllUsesWith(Value* new_value); - - protected: - std::shared_ptr type_; - std::string name_; - std::vector uses_; -}; - -// ConstantValue 是常量体系的基类。 -// 当前只实现了 ConstantInt,后续可继续扩展更多常量种类。 +public: + Value(std::shared_ptr ty, std::string name); + virtual ~Value() = default; + + const std::shared_ptr& GetType() const { return type_; } + const std::string& GetName() const { return name_; } + void SetName(std::string n) { name_ = std::move(n); } + + bool IsVoid() const { return type_->IsVoid(); } + bool IsInt32() const { return type_->IsInt32(); } + bool IsPtrInt32() const { return type_->IsPtrInt32(); } + bool IsFloat() const { return type_->IsFloat(); } + bool IsBool() const { return type_->IsBool(); } + bool IsArray() const { return type_->IsArray(); } + bool IsLabel() const { return type_->IsLabel(); } + + // 派生类身份判断(通过虚函数或枚举,这里使用虚函数) + virtual bool IsConstant() const { return false; } + virtual bool IsInstruction() const { return false; } + virtual bool IsUser() const { return false; } + virtual bool IsFunction() const { return false; } + + void AddUse(User* user, size_t operand_index); + void RemoveUse(User* user, size_t operand_index); + const std::vector& GetUses() const { return uses_; } + void ReplaceAllUsesWith(Value* new_value); + void Print(std::ostream& os) const; + +protected: + std::shared_ptr type_; + std::string name_; + std::vector uses_; +}; + +// ----------------------------------------------------------------------------- +// isa / dyncast 辅助函数 +// ----------------------------------------------------------------------------- +template +inline std::enable_if_t, bool> isa(const Value* value) { + return T::classof(value); +} + +template +inline std::enable_if_t, T*> dyncast(Value* value) { + return isa(value) ? static_cast(value) : nullptr; +} + +template +inline std::enable_if_t, const T*> dyncast(const Value* value) { + return isa(value) ? static_cast(value) : nullptr; +} + +// ----------------------------------------------------------------------------- +// ConstantValue 系列 +// ----------------------------------------------------------------------------- class ConstantValue : public Value { - public: - ConstantValue(std::shared_ptr ty, std::string name = ""); +public: + ConstantValue(std::shared_ptr ty, std::string name = ""); + bool IsConstant() const override final { return true; } }; class ConstantInt : public ConstantValue { - public: - ConstantInt(std::shared_ptr ty, int v); - int GetValue() const { return value_; } +public: + ConstantInt(std::shared_ptr ty, int v); + int GetValue() const { return value_; } + static bool classof(const Value* v) { return v->IsConstant() && dynamic_cast(v) != nullptr; } +private: + int value_; +}; - private: - int value_{}; +class ConstantFloat : public ConstantValue { +public: + ConstantFloat(std::shared_ptr ty, float v); + float GetValue() const { return value_; } + static bool classof(const Value* v) { return v->IsConstant() && dynamic_cast(v) != nullptr; } +private: + float value_; }; -// 后续还需要扩展更多指令类型。 -enum class Opcode { Add, Sub, Mul, Alloca, Load, Store, Ret }; +class ConstantI1 : public ConstantValue { +public: + ConstantI1(std::shared_ptr ty, bool v); + bool GetValue() const { return value_; } + static bool classof(const Value* v) { return v->IsConstant() && dynamic_cast(v) != nullptr; } +private: + bool value_; +}; -// User 是所有“会使用其他 Value 作为输入”的 IR 对象的抽象基类。 -// 当前实现中只有 Instruction 继承自 User。 +class ConstantArrayValue : public Value { +public: + ConstantArrayValue(std::shared_ptr elemType, + const std::vector& elements, + const std::vector& dims, + const std::string& name = ""); + const std::vector& GetElements() const { return elements_; } + const std::vector& GetDims() const { return dims_; } + void Print(std::ostream& os) const; + static bool classof(const Value* v) { return v->IsConstant() && dynamic_cast(v) != nullptr; } +private: + std::vector elements_; + std::vector dims_; +}; + +// ----------------------------------------------------------------------------- +// Opcode 枚举 +// ----------------------------------------------------------------------------- +enum class Opcode { + Add, Sub, Mul, Div, Rem, FAdd, FSub, FMul, FDiv, FRem, + And, Or, Xor, Shl, AShr, LShr, + ICmpEQ, ICmpNE, ICmpLT, ICmpGT, ICmpLE, ICmpGE, + FCmpEQ, FCmpNE, FCmpLT, FCmpGT, FCmpLE, FCmpGE, + Neg, Not, FNeg, FtoI, IToF, + Call, CondBr, Br, Return, Unreachable, + Alloca, Load, Store, Memset, + GetElementPtr, Phi, Zext +}; + +// ----------------------------------------------------------------------------- +// User +// ----------------------------------------------------------------------------- class User : public Value { - public: - User(std::shared_ptr ty, std::string name); - size_t GetNumOperands() const; - Value* GetOperand(size_t index) const; - void SetOperand(size_t index, Value* value); +public: + User(std::shared_ptr ty, std::string name); + bool IsUser() const override final { return true; } - protected: - // 统一的 operand 入口。 - void AddOperand(Value* value); + size_t GetNumOperands() const { return operands_.size(); } + Value* GetOperand(size_t index) const; + void SetOperand(size_t index, Value* value); - private: - std::vector operands_; + void AddOperand(Value* value); + void AddOperands(const std::vector& values); + void RemoveOperand(size_t index); + void ClearAllOperands(); + +protected: + std::vector operands_; }; -// GlobalValue 是全局值/全局变量体系的空壳占位类。 -// 当前只补齐类层次,具体初始化器、打印和链接语义后续再补。 +// ----------------------------------------------------------------------------- +// GlobalValue +// ----------------------------------------------------------------------------- class GlobalValue : public User { - public: - GlobalValue(std::shared_ptr ty, std::string name); +public: + GlobalValue(std::shared_ptr type, const std::string& name, + bool isConst = false, Value* init = nullptr); + GlobalValue(std::shared_ptr type, const std::string& name, + const std::vector& dims, + bool isConst = false, Value* init = nullptr); + + bool IsConstant() const { return isConst_; } + bool HasInitializer() const { return init_ != nullptr; } + Value* GetInitializer() const { return init_; } + size_t GetNumDims() const { return dims_.size(); } + Value* GetDim(size_t i) const { return dims_[i]; } + + void SetConstant(bool c) { isConst_ = c; } + void SetInitializer(Value* v) { init_ = v; } + + static bool classof(const Value* v) { return v->IsUser() && dynamic_cast(v) != nullptr; } + +private: + bool isConst_; + Value* init_; + std::vector dims_; }; +// ----------------------------------------------------------------------------- +// Instruction 及其子类 +// ----------------------------------------------------------------------------- class Instruction : public User { - public: - Instruction(Opcode op, std::shared_ptr ty, std::string name = ""); - Opcode GetOpcode() const; - bool IsTerminator() const; - BasicBlock* GetParent() const; - void SetParent(BasicBlock* parent); +public: + Instruction(Opcode op, std::shared_ptr ty, + BasicBlock* parent = nullptr, const std::string& name = ""); + bool IsInstruction() const override final { return true; } + + Opcode GetOpcode() const { return opcode_; } + bool IsTerminator() const { + return opcode_ == Opcode::Br || opcode_ == Opcode::CondBr || + opcode_ == Opcode::Return || opcode_ == Opcode::Unreachable; + } + BasicBlock* GetParent() const { return parent_; } + void SetParent(BasicBlock* parent) { parent_ = parent; } - private: - Opcode opcode_; - BasicBlock* parent_ = nullptr; + static bool classof(const Value* v) { return v->IsInstruction(); } + +private: + Opcode opcode_; + BasicBlock* parent_; }; class BinaryInst : public Instruction { - public: - BinaryInst(Opcode op, std::shared_ptr ty, Value* lhs, Value* rhs, - std::string name); - Value* GetLhs() const; - Value* GetRhs() const; +public: + BinaryInst(Opcode op, std::shared_ptr ty, Value* lhs, Value* rhs, + BasicBlock* parent = nullptr, const std::string& name = ""); + Value* GetLhs() const { return GetOperand(0); } + Value* GetRhs() const { return GetOperand(1); } + static bool classof(const Value* v) { + return Instruction::classof(v) && static_cast(v)->GetOpcode() >= Opcode::Add && + static_cast(v)->GetOpcode() <= Opcode::FCmpGE; + } +}; + +class UnaryInst : public Instruction { +public: + UnaryInst(Opcode op, std::shared_ptr ty, Value* operand, + BasicBlock* parent = nullptr, const std::string& name = ""); + Value* GetOprd() const { return GetOperand(0); } + static bool classof(const Value* v) { + return Instruction::classof(v) && static_cast(v)->GetOpcode() >= Opcode::Neg && + static_cast(v)->GetOpcode() <= Opcode::IToF; + } }; class ReturnInst : public Instruction { - public: - ReturnInst(std::shared_ptr void_ty, Value* val); - Value* GetValue() const; +public: + ReturnInst(Value* val = nullptr, BasicBlock* parent = nullptr); + bool HasReturnValue() const { return GetNumOperands() > 0; } + Value* GetReturnValue() const { return HasReturnValue() ? GetOperand(0) : nullptr; } + static bool classof(const Value* v) { + return Instruction::classof(v) && static_cast(v)->GetOpcode() == Opcode::Return; + } }; class AllocaInst : public Instruction { - public: - AllocaInst(std::shared_ptr ptr_ty, std::string name); +public: + AllocaInst(std::shared_ptr elemType, BasicBlock* parent = nullptr, + const std::string& name = ""); + AllocaInst(std::shared_ptr elemType, const std::vector& dims, + BasicBlock* parent = nullptr, const std::string& name = ""); + size_t GetNumDims() const { return dims_.size(); } + Value* GetDim(size_t i) const { return dims_[i]; } + static bool classof(const Value* v) { + return Instruction::classof(v) && static_cast(v)->GetOpcode() == Opcode::Alloca; + } +private: + std::vector dims_; }; class LoadInst : public Instruction { - public: - LoadInst(std::shared_ptr val_ty, Value* ptr, std::string name); - Value* GetPtr() const; +public: + LoadInst(Value* ptr, BasicBlock* parent = nullptr, const std::string& name = ""); + Value* GetPtr() const { return GetOperand(0); } + static bool classof(const Value* v) { + return Instruction::classof(v) && static_cast(v)->GetOpcode() == Opcode::Load; + } }; class StoreInst : public Instruction { - public: - StoreInst(std::shared_ptr void_ty, Value* val, Value* ptr); - Value* GetValue() const; - Value* GetPtr() const; +public: + StoreInst(Value* val, Value* ptr, BasicBlock* parent = nullptr); + Value* GetValue() const { return GetOperand(0); } + Value* GetPtr() const { return GetOperand(1); } + static bool classof(const Value* v) { + return Instruction::classof(v) && static_cast(v)->GetOpcode() == Opcode::Store; + } +}; + +class UncondBrInst : public Instruction { +public: + UncondBrInst(BasicBlock* dest, const std::vector& args = {}, + BasicBlock* parent = nullptr); + BasicBlock* GetDest() const { return dyncast(GetOperand(0)); } + std::vector GetArguments() const { + std::vector result; + for (size_t i = 1; i < GetNumOperands(); ++i) + result.push_back(GetOperand(i)); + return result; + } + static bool classof(const Value* v) { + return Instruction::classof(v) && static_cast(v)->GetOpcode() == Opcode::Br; + } +}; + +class CondBrInst : public Instruction { +public: + CondBrInst(Value* cond, BasicBlock* thenBlock, BasicBlock* elseBlock, + const std::vector& thenArgs = {}, + const std::vector& elseArgs = {}, + BasicBlock* parent = nullptr); + Value* GetCondition() const { return GetOperand(0); } + BasicBlock* GetThenBlock() const { return dyncast(GetOperand(1)); } + BasicBlock* GetElseBlock() const { return dyncast(GetOperand(2)); } + std::vector GetThenArguments() const { + std::vector result; + size_t num = GetThenBlock()->GetNumArguments(); + for (size_t i = 0; i < num; ++i) + result.push_back(GetOperand(3 + i)); + return result; + } + std::vector GetElseArguments() const { + std::vector result; + size_t num = GetThenBlock()->GetNumArguments(); + size_t start = 3 + num; + for (size_t i = 0; i < GetElseBlock()->GetNumArguments(); ++i) + result.push_back(GetOperand(start + i)); + return result; + } + static bool classof(const Value* v) { + return Instruction::classof(v) && static_cast(v)->GetOpcode() == Opcode::CondBr; + } +}; + +class UnreachableInst : public Instruction { +public: + explicit UnreachableInst(BasicBlock* parent = nullptr); + static bool classof(const Value* v) { + return Instruction::classof(v) && static_cast(v)->GetOpcode() == Opcode::Unreachable; + } }; -// BasicBlock 已纳入 Value 体系,便于后续向更完整 IR 类图靠拢。 -// 当前其类型仍使用 void 作为占位,后续可替换为专门的 label type。 +class CallInst : public Instruction { +public: + CallInst(Function* callee, const std::vector& args = {}, + BasicBlock* parent = nullptr, const std::string& name = ""); + Function* GetCallee() const { return dyncast(GetOperand(0)); } + std::vector GetArguments() const { + std::vector result; + for (size_t i = 1; i < GetNumOperands(); ++i) + result.push_back(GetOperand(i)); + return result; + } + static bool classof(const Value* v) { + return Instruction::classof(v) && static_cast(v)->GetOpcode() == Opcode::Call; + } +}; + +class GetElementPtrInst : public Instruction { +public: + GetElementPtrInst(Value* ptr, const std::vector& indices, + const std::vector& dims = {}, + const std::vector& curDims = {}, + BasicBlock* parent = nullptr, const std::string& name = ""); + Value* GetPointer() const { return GetOperand(0); } + size_t GetNumIndices() const { return GetNumOperands() - 1; } + Value* GetIndex(size_t i) const { return GetOperand(i + 1); } + const std::vector& GetDims() const { return dims_; } + const std::vector& GetCurDims() const { return curDims_; } + static bool classof(const Value* v) { + return Instruction::classof(v) && static_cast(v)->GetOpcode() == Opcode::GetElementPtr; + } +private: + std::vector dims_; + std::vector curDims_; +}; + +class PhiInst : public Instruction { +public: + PhiInst(std::shared_ptr type, BasicBlock* parent = nullptr, + const std::string& name = ""); + void AddIncoming(Value* value, BasicBlock* block); + int GetNumIncomings() const { return static_cast(GetNumOperands() / 2); } + Value* GetIncomingValue(int i) const { return GetOperand(2 * i); } + BasicBlock* GetIncomingBlock(int i) const { return dyncast(GetOperand(2 * i + 1)); } + static bool classof(const Value* v) { + return Instruction::classof(v) && static_cast(v)->GetOpcode() == Opcode::Phi; + } +}; + +class ZextInst : public Instruction { +public: + ZextInst(Value* val, std::shared_ptr targetType, + BasicBlock* parent = nullptr, const std::string& name = ""); + Value* GetValue() const { return GetOperand(0); } + static bool classof(const Value* v) { + return Instruction::classof(v) && static_cast(v)->GetOpcode() == Opcode::Zext; + } +}; + +class MemsetInst : public Instruction { +public: + MemsetInst(Value* dst, Value* val, Value* len, Value* isVolatile, + BasicBlock* parent = nullptr); + Value* GetDest() const { return GetOperand(0); } + Value* GetValue() const { return GetOperand(1); } + Value* GetLength() const { return GetOperand(2); } + Value* GetIsVolatile() const { return GetOperand(3); } + static bool classof(const Value* v) { + return Instruction::classof(v) && static_cast(v)->GetOpcode() == Opcode::Memset; + } +}; + +// ----------------------------------------------------------------------------- +// BasicBlock +// ----------------------------------------------------------------------------- class BasicBlock : public Value { - public: - explicit BasicBlock(std::string name); - Function* GetParent() const; - void SetParent(Function* parent); - bool HasTerminator() const; - const std::vector>& GetInstructions() const; - const std::vector& GetPredecessors() const; - const std::vector& GetSuccessors() const; - template - T* Append(Args&&... args) { - if (HasTerminator()) { - throw std::runtime_error("BasicBlock 已有 terminator,不能继续追加指令: " + - name_); +public: + explicit BasicBlock(const std::string& name); + BasicBlock(Function* parent, const std::string& name); + + Function* GetParent() const { return parent_; } + void SetParent(Function* parent) { parent_ = parent; } + + bool HasTerminator() const; + const std::vector>& GetInstructions() const { return instructions_; } + + void AddPredecessor(BasicBlock* pred); + void AddSuccessor(BasicBlock* succ); + void RemovePredecessor(BasicBlock* pred); + void RemoveSuccessor(BasicBlock* succ); + const std::vector& GetPredecessors() const { return predecessors_; } + const std::vector& GetSuccessors() const { return successors_; } + + template + T* Append(Args&&... args) { + if (HasTerminator()) { + throw std::runtime_error("BasicBlock already has terminator"); + } + auto inst = std::make_unique(std::forward(args)...); + auto* ptr = inst.get(); + ptr->SetParent(this); + instructions_.push_back(std::move(inst)); + return ptr; } - auto inst = std::make_unique(std::forward(args)...); - auto* ptr = inst.get(); - ptr->SetParent(this); - instructions_.push_back(std::move(inst)); - return ptr; - } - - private: - Function* parent_ = nullptr; - std::vector> instructions_; - std::vector predecessors_; - std::vector successors_; -}; - -// Function 当前也采用了最小实现。 -// 需要特别注意:由于项目里还没有单独的 FunctionType, -// Function 继承自 Value 后,其 type_ 目前只保存“返回类型”, -// 并不能完整表达“返回类型 + 形参列表”这一整套函数签名。 -// 这对当前只支持 int main() 的最小 IR 足够,但后续若补普通函数、 -// 形参和调用,通常需要引入专门的函数类型表示。 + + static bool classof(const Value* v) { return dynamic_cast(v) != nullptr; } + size_t GetNumArguments() const { return 0; } // 当前版本无参数 + +private: + Function* parent_; + std::vector> instructions_; + std::vector predecessors_; + std::vector successors_; +}; + +// ----------------------------------------------------------------------------- +// Function +// ----------------------------------------------------------------------------- class Function : public Value { - public: - // 当前构造函数接收的也是返回类型,而不是完整函数类型。 - Function(std::string name, std::shared_ptr ret_type); - BasicBlock* CreateBlock(const std::string& name); - BasicBlock* GetEntry(); - const BasicBlock* GetEntry() const; - const std::vector>& GetBlocks() const; +public: + Function(std::string name, std::shared_ptr ret_type); + Function(std::string name, std::shared_ptr ret_type, + const std::vector>& param_types); + + bool IsFunction() const override final { return true; } + + std::shared_ptr GetReturnType() const { return GetType(); } + const std::vector>& GetParamTypes() const { return param_types_; } - private: - BasicBlock* entry_ = nullptr; - std::vector> blocks_; + BasicBlock* GetEntryBlock() const { return entry_; } + void SetEntryBlock(BasicBlock* bb); + + BasicBlock* CreateBlock(const std::string& name); + BasicBlock* AddBlock(std::unique_ptr block); + const std::vector>& GetBlocks() const { return blocks_; } + + static bool classof(const Value* v) { return v->IsFunction(); } + +private: + std::vector> param_types_; + BasicBlock* entry_; + std::vector> blocks_; }; +// ----------------------------------------------------------------------------- +// Module +// ----------------------------------------------------------------------------- class Module { - public: - Module() = default; - Context& GetContext(); - const Context& GetContext() const; - // 创建函数时当前只显式传入返回类型,尚未接入完整的 FunctionType。 - Function* CreateFunction(const std::string& name, - std::shared_ptr ret_type); - const std::vector>& GetFunctions() const; - - private: - Context context_; - std::vector> functions_; +public: + Module() = default; + + Context& GetContext() { return context_; } + const Context& GetContext() const { return context_; } + + Function* CreateFunction(const std::string& name, std::shared_ptr ret_type); + Function* CreateFunction(const std::string& name, std::shared_ptr ret_type, + const std::vector>& param_types); + Function* GetFunction(const std::string& name) const; + const std::vector>& GetFunctions() const { return functions_; } + + GlobalValue* CreateGlobalValue(const std::string& name, std::shared_ptr type, + bool isConst = false, Value* init = nullptr); + GlobalValue* GetGlobalValue(const std::string& name) const; + const std::vector>& GetGlobalValues() const { return globals_; } + +private: + Context context_; + std::vector> functions_; + std::map function_map_; + std::vector> globals_; + std::map global_map_; }; +// ----------------------------------------------------------------------------- +// IRBuilder +// ----------------------------------------------------------------------------- class IRBuilder { - public: - IRBuilder(Context& ctx, BasicBlock* bb); - void SetInsertPoint(BasicBlock* bb); - BasicBlock* GetInsertBlock() const; - - // 构造常量、二元运算、返回指令的最小集合。 - ConstantInt* CreateConstInt(int v); - BinaryInst* CreateBinary(Opcode op, Value* lhs, Value* rhs, - const std::string& name); - BinaryInst* CreateAdd(Value* lhs, Value* rhs, const std::string& name); - AllocaInst* CreateAllocaI32(const std::string& name); - LoadInst* CreateLoad(Value* ptr, const std::string& name); - StoreInst* CreateStore(Value* val, Value* ptr); - ReturnInst* CreateRet(Value* v); - - private: - Context& ctx_; - BasicBlock* insert_block_; +public: + IRBuilder(Context& ctx, BasicBlock* bb); + + void SetInsertPoint(BasicBlock* bb); + BasicBlock* GetInsertBlock() const { return insert_block_; } + + ConstantInt* CreateConstInt(int v); + ConstantFloat* CreateConstFloat(float v); + ConstantI1* CreateConstBool(bool v); + ConstantArrayValue* CreateConstArray(std::shared_ptr elem_type, + const std::vector& elements, + const std::vector& dims, + const std::string& name = ""); + + BinaryInst* CreateBinary(Opcode op, Value* lhs, Value* rhs, + const std::string& name = ""); + BinaryInst* CreateAdd(Value* lhs, Value* rhs, const std::string& name = ""); + BinaryInst* CreateSub(Value* lhs, Value* rhs, const std::string& name = ""); + BinaryInst* CreateMul(Value* lhs, Value* rhs, const std::string& name = ""); + BinaryInst* CreateDiv(Value* lhs, Value* rhs, const std::string& name = ""); + BinaryInst* CreateRem(Value* lhs, Value* rhs, const std::string& name = ""); + BinaryInst* CreateAnd(Value* lhs, Value* rhs, const std::string& name = ""); + BinaryInst* CreateOr(Value* lhs, Value* rhs, const std::string& name = ""); + BinaryInst* CreateXor(Value* lhs, Value* rhs, const std::string& name = ""); + BinaryInst* CreateShl(Value* lhs, Value* rhs, const std::string& name = ""); + BinaryInst* CreateAShr(Value* lhs, Value* rhs, const std::string& name = ""); + BinaryInst* CreateLShr(Value* lhs, Value* rhs, const std::string& name = ""); + BinaryInst* CreateICmp(Opcode op, Value* lhs, Value* rhs, const std::string& name = ""); + BinaryInst* CreateFCmp(Opcode op, Value* lhs, Value* rhs, const std::string& name = ""); + + UnaryInst* CreateNeg(Value* operand, const std::string& name = ""); + UnaryInst* CreateNot(Value* operand, const std::string& name = ""); + UnaryInst* CreateFNeg(Value* operand, const std::string& name = ""); + UnaryInst* CreateFtoI(Value* operand, const std::string& name = ""); + UnaryInst* CreateIToF(Value* operand, const std::string& name = ""); + + AllocaInst* CreateAlloca(std::shared_ptr elem_type, const std::string& name = ""); + AllocaInst* CreateAllocaArray(std::shared_ptr elem_type, const std::vector& dims, + const std::string& name = ""); + LoadInst* CreateLoad(Value* ptr, const std::string& name = ""); + StoreInst* CreateStore(Value* val, Value* ptr); + + UncondBrInst* CreateBr(BasicBlock* dest, const std::vector& args = {}); + CondBrInst* CreateCondBr(Value* cond, BasicBlock* then_bb, BasicBlock* else_bb, + const std::vector& then_args = {}, + const std::vector& else_args = {}); + ReturnInst* CreateRet(Value* val = nullptr); + UnreachableInst* CreateUnreachable(); + + CallInst* CreateCall(Function* callee, const std::vector& args, + const std::string& name = ""); + + GetElementPtrInst* CreateGEP(Value* ptr, const std::vector& indices, + const std::vector& dims = {}, + const std::vector& cur_dims = {}, + const std::string& name = ""); + PhiInst* CreatePhi(std::shared_ptr type, const std::string& name = ""); + ZextInst* CreateZext(Value* val, std::shared_ptr target_type, const std::string& name = ""); + MemsetInst* CreateMemset(Value* dst, Value* val, Value* len, Value* is_volatile); + +private: + Context& ctx_; + BasicBlock* insert_block_; }; +// ----------------------------------------------------------------------------- +// IRPrinter +// ----------------------------------------------------------------------------- class IRPrinter { - public: - void Print(const Module& module, std::ostream& os); +public: + void Print(const Module& module, std::ostream& os); }; -} // namespace ir +// ----------------------------------------------------------------------------- +// 输出运算符重载 +// ----------------------------------------------------------------------------- +inline std::ostream& operator<<(std::ostream& os, const Type& type) { + type.Print(os); + return os; +} +inline std::ostream& operator<<(std::ostream& os, const Value& value) { + value.Print(os); + return os; +} + +} // namespace ir \ No newline at end of file diff --git a/include/ir/IR_org.h b/include/ir/IR_org.h new file mode 100644 index 0000000..9ad9758 --- /dev/null +++ b/include/ir/IR_org.h @@ -0,0 +1,373 @@ +// 当前只支撑 i32、i32*、void 以及最小的内存/算术指令,演示用。 +// +// 当前已经实现: +// 1. 基础类型系统:void / i32 / i32* +// 2. Value 体系:Value / ConstantValue / ConstantInt / Function / BasicBlock / User / GlobalValue / Instruction +// 3. 最小指令集:Add / Alloca / Load / Store / Ret +// 4. BasicBlock / Function / Module 三层组织结构 +// 5. IRBuilder:便捷创建常量和最小指令 +// 6. def-use 关系的轻量实现: +// - Instruction 保存 operand 列表 +// - Value 保存 uses +// - 支持 ReplaceAllUsesWith 的简化实现 +// +// 当前尚未实现或只做了最小占位: +// 1. 完整类型系统:数组、函数类型、label 类型等 +// 2. 更完整的指令系统:br / condbr / call / phi / gep 等 +// 3. 更成熟的 Use 管理(例如 LLVM 风格的双向链式结构) +// 4. 更完整的 IR verifier 和优化基础设施 +// +// 当前需要特别说明的两个简化点: +// 1. BasicBlock 虽然已经纳入 Value 体系,但其类型目前仍用 void 作为占位, +// 后续如果补 label type,可以再改成更合理的块标签类型。 +// 2. ConstantValue 体系目前只实现了 ConstantInt,后续可以继续补 ConstantFloat、 +// ConstantArray等更完整的常量种类。 +// +// 建议的扩展顺序: +// 1. 先补更多指令和类型 +// 2. 再补控制流相关 IR +// 3. 最后再考虑把 Value/User/Use 进一步抽象成更完整的框架 + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace ir { + +class Type; +class Value; +class User; +class ConstantValue; +class ConstantInt; +class GlobalValue; +class Instruction; +class BasicBlock; +class Function; + +// Use 表示一个 Value 的一次使用记录。 +// 当前实现设计: +// - value:被使用的值 +// - user:使用该值的 User +// - operand_index:该值在 user 操作数列表中的位置 + +class Use { + public: + Use() = default; + Use(Value* value, User* user, size_t operand_index) + : value_(value), user_(user), operand_index_(operand_index) {} + + Value* GetValue() const { return value_; } + User* GetUser() const { return user_; } + size_t GetOperandIndex() const { return operand_index_; } + + void SetValue(Value* value) { value_ = value; } + void SetUser(User* user) { user_ = user; } + void SetOperandIndex(size_t operand_index) { operand_index_ = operand_index; } + + private: + Value* value_ = nullptr; + User* user_ = nullptr; + size_t operand_index_ = 0; +}; + +// IR 上下文:集中管理类型、常量等共享资源,便于复用与扩展。 +class Context { + public: + Context() = default; + ~Context(); + // 去重创建 i32 常量。 + ConstantInt* GetConstInt(int v); + + std::string NextTemp(); + + private: + std::unordered_map> const_ints_; + int temp_index_ = -1; +}; + +class Type { +public: + enum class Kind { Void, Int1, Int32, Float, Label, Function, PtrInt32, Array }; + + explicit Type(Kind k); + + // 静态工厂方法:返回对应类型的共享单例 + static const std::shared_ptr& GetVoidType(); + static const std::shared_ptr& GetInt1Type(); + static const std::shared_ptr& GetInt32Type(); + static const std::shared_ptr& GetFloatType(); + static const std::shared_ptr& GetLabelType(); + static const std::shared_ptr& GetFunctionType(); + static const std::shared_ptr& GetBoolType(); + static const std::shared_ptr& GetPtrInt32Type(); + static const std::shared_ptr& GetArrayType(); + + Kind GetKind() const; + + // 便捷类型判断 + bool IsVoid() const; + bool IsInt1() const; + bool IsInt32() const; + bool IsFloat() const; + bool IsLabel() const; + bool IsFunction() const; + bool IsBool() const; + bool IsPtrInt32() const; + bool IsArray() const; + +private: + Kind kind_; +}; + +class Value { + public: + Value(std::shared_ptr ty, std::string name); + virtual ~Value() = default; + const std::shared_ptr& GetType() const; + const std::string& GetName() const; + void SetName(std::string n); + bool IsVoid() const; + bool IsInt32() const; + bool IsPtrInt32() const; + bool IsConstant() const; + bool IsInstruction() const; + bool IsUser() const; + bool IsFunction() const; + void AddUse(User* user, size_t operand_index); + void RemoveUse(User* user, size_t operand_index); + const std::vector& GetUses() const; + void ReplaceAllUsesWith(Value* new_value); + + protected: + std::shared_ptr type_; + std::string name_; + std::vector uses_; +}; + +// ConstantValue 是常量体系的基类。 +// 当前只实现了 ConstantInt,后续可继续扩展更多常量种类。 +class ConstantValue : public Value { + public: + ConstantValue(std::shared_ptr ty, std::string name = ""); +}; + +class ConstantInt : public ConstantValue { + public: + ConstantInt(std::shared_ptr ty, int v); + int GetValue() const { return value_; } + + private: + int value_{}; +}; + +class ConstantFloat : public ConstantValue { + public: + ConstantFloat(std::shared_ptr ty, float v); + float GetValue() const { return value_; } + + private: + float value_{}; +}; + +class ConstantI1 : public ConstantValue { + public: + ConstantI1(std::shared_ptr ty, bool v); + int GetValue() const { return value_; } + + private: + bool value_{}; +}; + +class ConstantArrayValue : public Value { + public: + ConstantArrayValue() + +}; + +//暂时先设计这些 +enum class Opcode { + // 二元算术 + Add,Sub,Mul,Div,Rem,FAdd,FSub,FMul,FDiv,FRem, + // 位运算 + And,Or,Xor,Shl,AShr,LShr, + // 整数比较 + ICmpEQ,ICmpNE,ICmpLT,ICmpGT,ICmpLE,ICmpGE, + // 浮点比较 + FCmpEQ,FCmpNE,FCmpLT,FCmpGT,FCmpLE,FCmpGE, + // 一元运算 + Neg,Not,FNeg,FtoI,IToF, + // 调用与终止 + Call,CondBr,Br,Return,Unreachable, + // 内存操作 + Alloca,Load,Store,Memset, + // 其他 + GetElementPtr,Phi,Zext +}; + +// User 是所有“会使用其他 Value 作为输入”的 IR 对象的抽象基类。 +// 当前实现中只有 Instruction 继承自 User。 +class User : public Value { + public: + User(std::shared_ptr ty, std::string name); + size_t GetNumOperands() const; + Value* GetOperand(size_t index) const; + void SetOperand(size_t index, Value* value); + + protected: + void AddOperand(Value* value); + + private: + std::vector operands_; +}; + +// GlobalValue 是全局值/全局变量体系的空壳占位类。 +// 当前只补齐类层次,具体初始化器、打印和链接语义后续再补。 +class GlobalValue : public User { + public: + GlobalValue(std::shared_ptr ty, std::string name); +}; + +class Instruction : public User { + public: + Instruction(Opcode op, std::shared_ptr ty, std::string name = ""); + Opcode GetOpcode() const; + bool IsTerminator() const; + BasicBlock* GetParent() const; + void SetParent(BasicBlock* parent); + + private: + Opcode opcode_; + BasicBlock* parent_ = nullptr; +}; + +class BinaryInst : public Instruction { + public: + BinaryInst(Opcode op, std::shared_ptr ty, Value* lhs, Value* rhs, + std::string name); + Value* GetLhs() const; + Value* GetRhs() const; +}; + +class ReturnInst : public Instruction { + public: + ReturnInst(std::shared_ptr void_ty, Value* val); + Value* GetValue() const; +}; + +class AllocaInst : public Instruction { + public: + AllocaInst(std::shared_ptr ptr_ty, std::string name); +}; + +class LoadInst : public Instruction { + public: + LoadInst(std::shared_ptr val_ty, Value* ptr, std::string name); + Value* GetPtr() const; +}; + +class StoreInst : public Instruction { + public: + StoreInst(std::shared_ptr void_ty, Value* val, Value* ptr); + Value* GetValue() const; + Value* GetPtr() const; +}; + +// BasicBlock 已纳入 Value 体系,便于后续向更完整 IR 类图靠拢。 +// 当前其类型仍使用 void 作为占位,后续可替换为专门的 label type。 +class BasicBlock : public Value { + public: + explicit BasicBlock(std::string name); + Function* GetParent() const; + void SetParent(Function* parent); + bool HasTerminator() const; + const std::vector>& GetInstructions() const; + const std::vector& GetPredecessors() const; + const std::vector& GetSuccessors() const; + template + T* Append(Args&&... args) { + if (HasTerminator()) { + throw std::runtime_error("BasicBlock 已有 terminator,不能继续追加指令: " + + name_); + } + auto inst = std::make_unique(std::forward(args)...); + auto* ptr = inst.get(); + ptr->SetParent(this); + instructions_.push_back(std::move(inst)); + return ptr; + } + + private: + Function* parent_ = nullptr; + std::vector> instructions_; + std::vector predecessors_; + std::vector successors_; +}; + +// Function 当前也采用了最小实现。 +// 需要特别注意:由于项目里还没有单独的 FunctionType, +// Function 继承自 Value 后,其 type_ 目前只保存“返回类型”, +// 并不能完整表达“返回类型 + 形参列表”这一整套函数签名。 +// 这对当前只支持 int main() 的最小 IR 足够,但后续若补普通函数、 +// 形参和调用,通常需要引入专门的函数类型表示。 +class Function : public Value { + public: + // 当前构造函数接收的也是返回类型,而不是完整函数类型。 + Function(std::string name, std::shared_ptr ret_type); + BasicBlock* CreateBlock(const std::string& name); + BasicBlock* GetEntry(); + const BasicBlock* GetEntry() const; + const std::vector>& GetBlocks() const; + + private: + BasicBlock* entry_ = nullptr; + std::vector> blocks_; +}; + +class Module { + public: + Module() = default; + Context& GetContext(); + const Context& GetContext() const; + // 创建函数时当前只显式传入返回类型,尚未接入完整的 FunctionType。 + Function* CreateFunction(const std::string& name, + std::shared_ptr ret_type); + const std::vector>& GetFunctions() const; + + private: + Context context_; + std::vector> functions_; +}; + +class IRBuilder { + public: + IRBuilder(Context& ctx, BasicBlock* bb); + void SetInsertPoint(BasicBlock* bb); + BasicBlock* GetInsertBlock() const; + + // 构造常量、二元运算、返回指令的最小集合。 + ConstantInt* CreateConstInt(int v); + BinaryInst* CreateBinary(Opcode op, Value* lhs, Value* rhs, + const std::string& name); + BinaryInst* CreateAdd(Value* lhs, Value* rhs, const std::string& name); + AllocaInst* CreateAllocaI32(const std::string& name); + LoadInst* CreateLoad(Value* ptr, const std::string& name); + StoreInst* CreateStore(Value* val, Value* ptr); + ReturnInst* CreateRet(Value* v); + + private: + Context& ctx_; + BasicBlock* insert_block_; +}; + +class IRPrinter { + public: + void Print(const Module& module, std::ostream& os); +}; + +} // namespace ir diff --git a/include/ir/utils.h b/include/ir/utils.h new file mode 100644 index 0000000..3b4a310 --- /dev/null +++ b/include/ir/utils.h @@ -0,0 +1,41 @@ +#pragma once + +#include + +namespace ir { + +template struct range { + using iterator = IterT; + using value_type = typename std::iterator_traits::value_type; + using reference = typename std::iterator_traits::reference; + +private: + iterator b; + iterator e; + +public: + explicit range(iterator b, iterator e) : b(b), e(e) {} + iterator begin() { return b; } + iterator end() { return e; } + iterator begin() const { return b; } + iterator end() const { return e; } + auto size() const { return std::distance(b, e); } + auto empty() const { return b == e; } +}; + +//! create `range` object from iterator pair [begin, end) +template range make_range(IterT b, IterT e) { + return range(b, e); +} +//! create `range` object from a container who has `begin()` and `end()` methods +template +range make_range(ContainerT &c) { + return make_range(c.begin(), c.end()); +} +//! create `range` object from a container who has `begin()` and `end()` methods +template +range make_range(const ContainerT &c) { + return make_range(c.begin(), c.end()); +} + +} // namespace ir \ No newline at end of file