diff --git a/extlibs/frontend/AntlrDriver.h b/extlibs/frontend/AntlrDriver.h new file mode 100644 index 00000000..ee22da95 --- /dev/null +++ b/extlibs/frontend/AntlrDriver.h @@ -0,0 +1,20 @@ +// 包装 ANTLR4,提供简易的解析入口。 +#pragma once + +#include +#include + +#include "SysYLexer.h" +#include "SysYParser.h" +#include "antlr4-runtime.h" + +struct AntlrResult { + std::unique_ptr input; + std::unique_ptr lexer; + std::unique_ptr tokens; + std::unique_ptr parser; + antlr4::tree::ParseTree* tree = nullptr; // owned by parser +}; + +// 解析指定文件,发生错误时抛出 std::runtime_error。 +AntlrResult ParseFileWithAntlr(const std::string& path); diff --git a/extlibs/frontend/SyntaxTreePrinter.h b/extlibs/frontend/SyntaxTreePrinter.h new file mode 100644 index 00000000..4633b5ec --- /dev/null +++ b/extlibs/frontend/SyntaxTreePrinter.h @@ -0,0 +1,9 @@ +#pragma once + +#include + +#include "antlr4-runtime.h" + +// 以树状缩进形式直接打印 ANTLR parse tree。 +void PrintSyntaxTree(antlr4::tree::ParseTree* tree, antlr4::Parser* parser, + std::ostream& os); diff --git a/extlibs/ir/IR.h b/extlibs/ir/IR.h new file mode 100644 index 00000000..87a35e0e --- /dev/null +++ b/extlibs/ir/IR.h @@ -0,0 +1,545 @@ +// 当前只支撑 i32、i32*、void 以及最小的内存/算术指令,演示用。 +// +// 当前已经实现: +// 1. 基础类型系统:void / i32 / i32* +// 2. Value 体系:Value / ConstantValue / ConstantInt / Function / BasicBlock / User / GlobalValue / Instruction +// 3. 最小指令集:Add / Alloca / Load / Store / Ret +// 4. BasicBlock / Function / Module 三层组织结构 +// 5. IRBuilder:便捷创建常量和最小指令 +// 6. def-use 关系的轻量实现: +// - Instruction 保存 operand 列表 +// - Value 保存 uses +// - 支持 ReplaceAllUsesWith 的简化实现 +// +// 当前尚未实现或只做了最小占位: +// 1. 完整类型系统:数组、函数类型、label 类型等 +// 2. 更完整的指令系统:br / condbr / call / phi / gep 等 +// 3. 更成熟的 Use 管理(例如 LLVM 风格的双向链式结构) +// 4. 更完整的 IR verifier 和优化基础设施 +// +// 当前需要特别说明的两个简化点: +// 1. BasicBlock 虽然已经纳入 Value 体系,但其类型目前仍用 void 作为占位, +// 后续如果补 label type,可以再改成更合理的块标签类型。 +// 2. ConstantValue 体系目前只实现了 ConstantInt,后续可以继续补 ConstantFloat、 +// ConstantArray等更完整的常量种类。 +// +// 建议的扩展顺序: +// 1. 先补更多指令和类型 +// 2. 再补控制流相关 IR +// 3. 最后再考虑把 Value/User/Use 进一步抽象成更完整的框架 + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace ir { + +class Type; +class Value; +class User; +class ConstantValue; +class ConstantInt; +class ConstantFloat; +class GlobalValue; +class Instruction; +class BasicBlock; +class Function; +class Argument; +class GlobalVariable; + + + +// Use 表示一个 Value 的一次使用记录。 +// 当前实现设计: +// - value:被使用的值 +// - user:使用该值的 User +// - operand_index:该值在 user 操作数列表中的位置 + +class Use { + public: + Use() = default; + Use(Value* value, User* user, size_t operand_index) + : value_(value), user_(user), operand_index_(operand_index) {} + + Value* GetValue() const { return value_; } + User* GetUser() const { return user_; } + size_t GetOperandIndex() const { return operand_index_; } + + void SetValue(Value* value) { value_ = value; } + void SetUser(User* user) { user_ = user; } + void SetOperandIndex(size_t operand_index) { operand_index_ = operand_index; } + + private: + Value* value_ = nullptr; + User* user_ = nullptr; + size_t operand_index_ = 0; +}; + +// IR 上下文:集中管理类型、常量等共享资源,便于复用与扩展。 +class Context { + public: + Context() = default; + ~Context(); + // 去重创建 i32 常量。 + ConstantInt* GetConstInt(int v); + ConstantFloat* GetConstFloat(double v); + // 去重创建 i1 常量(0 或 1)。 + ConstantInt* GetConstBool(int v); + + std::string NextTemp(); + + private: + std::unordered_map> const_ints_; + std::unordered_map> const_floats_; + std::unordered_map> const_bools_; + int temp_index_ = -1; +}; + +class Type { + public: + enum class Kind { Void, Int1, Int32, Float32, PtrInt32, PtrFloat32 }; + explicit Type(Kind k); + // 使用静态共享对象获取类型。 + // 同一类型可直接比较返回值是否相等,例如: + // Type::GetInt32Type() == Type::GetInt32Type() + static const std::shared_ptr& GetVoidType(); + static const std::shared_ptr& GetInt1Type(); + static const std::shared_ptr& GetInt32Type(); + static const std::shared_ptr& GetFloat32Type(); + static const std::shared_ptr& GetPtrInt32Type(); + static const std::shared_ptr& GetPtrFloat32Type(); + Kind GetKind() const; + bool IsVoid() const; + bool IsInt1() const; + bool IsInt32() const; + bool IsFloat32() const; + bool IsPtrInt32() const; + bool IsPtrFloat32() const; + + private: + Kind kind_; +}; + +class Value { + public: + Value(std::shared_ptr ty, std::string name); + virtual ~Value() = default; + const std::shared_ptr& GetType() const; + const std::string& GetName() const; + void SetName(std::string n); + bool IsVoid() const; + bool IsInt32() const; + bool IsFloat32() const; + bool IsPtrInt32() const; + bool IsPtrFloat32() const; + bool IsConstant() const; + bool IsInstruction() const; + bool IsUser() const; + bool IsFunction() const; + void AddUse(User* user, size_t operand_index); + void RemoveUse(User* user, size_t operand_index); + const std::vector& GetUses() const; + void ReplaceAllUsesWith(Value* new_value); + + protected: + std::shared_ptr type_; + std::string name_; + std::vector uses_; +}; + +// ConstantValue 是常量体系的基类。 +// 当前只实现了 ConstantInt,后续可继续扩展更多常量种类。 +class ConstantValue : public Value { + public: + ConstantValue(std::shared_ptr ty, std::string name = ""); +}; + +class ConstantInt : public ConstantValue { + public: + ConstantInt(std::shared_ptr ty, int v); + int GetValue() const { return value_; } + + private: + int value_{}; +}; + +class ConstantFloat : public ConstantValue { + public: + ConstantFloat(std::shared_ptr ty, double v); + double GetValue() const { return value_; } + + private: + double value_{}; +}; + +// 后续还需要扩展更多指令类型。 +enum class Opcode { + Add, + Sub, + Mul, + Div, + Mod, + SIToFP, + FPToSI, + ZExt, + Eq, + Ne, + Lt, + Le, + Gt, + Ge, + Alloca, + Load, + Store, + GEP, + Call, + Br, + CondBr, + Ret, + Phi +}; + +// User 是所有“会使用其他 Value 作为输入”的 IR 对象的抽象基类。 +// 当前实现中只有 Instruction 继承自 User。 +class User : public Value { + public: + User(std::shared_ptr ty, std::string name); + size_t GetNumOperands() const; + Value* GetOperand(size_t index) const; + void SetOperand(size_t index, Value* value); + void AddOperand(Value* value); + + private: + std::vector operands_; +}; + +// GlobalValue 是全局值/全局变量体系的空壳占位类。 +// 当前只补齐类层次,具体初始化器、打印和链接语义后续再补。 +class GlobalValue : public User { + public: + GlobalValue(std::shared_ptr ty, std::string name); +}; + +class GlobalVariable : public GlobalValue { + public: + enum class StorageKind { + Scalar, + Array, + }; + + enum class ElemKind { + Int32, + Float32, + }; + + GlobalVariable(std::string name, int init_value); + GlobalVariable(std::string name, double init_value); + GlobalVariable(std::string name, size_t array_size); + GlobalVariable(std::string name, size_t array_size, ElemKind elem_kind); + GlobalVariable(std::string name, size_t array_size, const std::vector& init_values); + GlobalVariable(std::string name, size_t array_size, const std::vector& init_values); + StorageKind GetStorageKind() const; + bool IsArray() const; + ElemKind GetElemKind() const; + bool IsFloatElem() const; + int GetInitValue() const; + double GetInitFloatValue() const; + size_t GetArraySize() const; + const std::vector& GetInitValues() const; + const std::vector& GetInitFloatValues() const; + bool HasInitValues() const; + + private: + StorageKind storage_kind_ = StorageKind::Scalar; + ElemKind elem_kind_ = ElemKind::Int32; + int init_value_ = 0; + double init_float_value_ = 0.0; + size_t array_size_ = 0; + std::vector init_values_; + std::vector init_float_values_; +}; + +class Instruction : public User { + public: + Instruction(Opcode op, std::shared_ptr ty, std::string name = ""); + Opcode GetOpcode() const; + bool IsTerminator() const; + BasicBlock* GetParent() const; + void SetParent(BasicBlock* parent); + + private: + Opcode opcode_; + BasicBlock* parent_ = nullptr; +}; + +class BinaryInst : public Instruction { + public: + BinaryInst(Opcode op, std::shared_ptr ty, Value* lhs, Value* rhs, + std::string name); + Value* GetLhs() const; + Value* GetRhs() const; +}; + +class CastInst : public Instruction { + public: + CastInst(Opcode op, std::shared_ptr ty, Value* operand, + std::string name); + Value* GetOperandValue() const; +}; + +class BranchInst : public Instruction { + public: + BranchInst(std::shared_ptr void_ty, BasicBlock* target); + BasicBlock* GetTarget() const; +}; + +class CondBranchInst : public Instruction { + public: + CondBranchInst(std::shared_ptr void_ty, Value* cond, BasicBlock* true_bb, + BasicBlock* false_bb); + Value* GetCond() const; + BasicBlock* GetTrueTarget() const; + BasicBlock* GetFalseTarget() const; +}; + +class CallInst : public Instruction { + public: + CallInst(std::shared_ptr ret_ty, Function* callee, + const std::vector& args, std::string name); + Function* GetCallee() const; + size_t GetNumArgs() const; + Value* GetArg(size_t index) const; +}; + +class ReturnInst : public Instruction { + public: + ReturnInst(std::shared_ptr void_ty, Value* val = nullptr); + Value* GetValue() const; + bool HasValue() const; +}; + +class AllocaInst : public Instruction { + public: + AllocaInst(std::shared_ptr elem_ty, std::string name, + Value* count = nullptr); + bool IsArrayAlloca() const; + Value* GetCount() const; + std::shared_ptr GetElementType() const; +}; + +class GetElementPtrInst : public Instruction { + public: + GetElementPtrInst(std::shared_ptr ptr_ty, Value* base_ptr, + Value* index, std::string name); + Value* GetBasePtr() const; + Value* GetIndex() const; +}; + +class LoadInst : public Instruction { + public: + LoadInst(std::shared_ptr val_ty, Value* ptr, std::string name); + Value* GetPtr() const; +}; + +class StoreInst : public Instruction { +public: + StoreInst(std::shared_ptr void_ty, Value* val, Value* ptr); + Value* GetValue() const; + Value* GetPtr() const; +}; + +class PhiInst : public Instruction { +public: + PhiInst(std::shared_ptr ty, std::string name); + AllocaInst* GetAlloca() const { return alloca_; } + void SetAlloca(AllocaInst* alloca) { alloca_ = alloca; } + +private: + AllocaInst* alloca_; +}; + +class Argument : public Value { + public: + Argument(std::shared_ptr ty, std::string name, size_t index); + size_t GetIndex() const; + + private: + size_t index_ = 0; +}; + +// BasicBlock 已纳入 Value 体系,便于后续向更完整 IR 类图靠拢。 +// 当前其类型仍使用 void 作为占位,后续可替换为专门的 label type。 +class BasicBlock : public Value { + public: + explicit BasicBlock(std::string name); + Function* GetParent() const; + void SetParent(Function* parent); + bool HasTerminator() const; + const std::vector>& GetInstructions() const; + const std::vector& GetPredecessors() const; + const std::vector& GetSuccessors() const; + std::vector& GetMutablePredecessors() { + return predecessors_; + } + std::vector& GetMutableSuccessors() { + return successors_; + } + template + T* Append(Args&&... args) { + if (HasTerminator()) { + throw std::runtime_error("BasicBlock 已有 terminator,不能继续追加指令: " + + name_); + } + auto inst = std::make_unique(std::forward(args)...); + auto* ptr = inst.get(); + ptr->SetParent(this); + instructions_.push_back(std::move(inst)); + return ptr; + } + template + T* Prepend(Args&&... args) { + auto inst = std::make_unique(std::forward(args)...); + auto* ptr = inst.get(); + ptr->SetParent(this); + instructions_.insert(instructions_.begin(), std::move(inst)); + return ptr; + } + template + T* InsertAlloca(Args&&... args) { + auto inst = std::make_unique(std::forward(args)...); + auto* ptr = inst.get(); + ptr->SetParent(this); + instructions_.insert(instructions_.begin() + alloca_insert_index_, std::move(inst)); + ++alloca_insert_index_; + return ptr; + } + void RemoveInstruction(Instruction* inst) { + for (auto it = instructions_.begin(); it != instructions_.end(); ++it) { + if (it->get() == inst) { + instructions_.erase(it); + break; + } + } + } + std::unique_ptr TakeInstruction(Instruction* inst); + void InsertInstructionBeforeTerminator(std::unique_ptr inst); + + private: + Function* parent_ = nullptr; + std::vector> instructions_; + std::vector predecessors_; + std::vector successors_; + size_t alloca_insert_index_ = 0; +}; + +// Function 当前也采用了最小实现。 +// 需要特别注意:由于项目里还没有单独的 FunctionType, +// Function 继承自 Value 后,其 type_ 目前只保存“返回类型”, +// 并不能完整表达“返回类型 + 形参列表”这一整套函数签名。 +// 这对当前只支持 int main() 的最小 IR 足够,但后续若补普通函数、 +// 形参和调用,通常需要引入专门的函数类型表示。 +class Function : public Value { + public: + // 当前构造函数接收的也是返回类型,而不是完整函数类型。 + Function(std::string name, std::shared_ptr ret_type, + bool is_external = false); + Argument* AddParam(const std::string& name, std::shared_ptr type); + const std::vector>& GetParams() const; + bool IsExternal() const; + BasicBlock* CreateBlock(const std::string& name); + BasicBlock* GetEntry(); + const BasicBlock* GetEntry() const; + const std::vector>& GetBlocks() const; + + private: + bool is_external_ = false; + BasicBlock* entry_ = nullptr; + std::vector> params_; + std::vector> blocks_; +}; + +class Module { + public: + Module() = default; + Context& GetContext(); + const Context& GetContext() const; + // 创建函数时当前只显式传入返回类型,尚未接入完整的 FunctionType。 + Function* CreateFunction(const std::string& name, + std::shared_ptr ret_type, + bool is_external = false); + Function* GetFunction(const std::string& name) const; + GlobalVariable* CreateGlobalI32(const std::string& name, int init_value); + GlobalVariable* CreateGlobalF32(const std::string& name, double init_value); + GlobalVariable* CreateGlobalArrayI32(const std::string& name, + size_t array_size); + GlobalVariable* CreateGlobalArrayF32(const std::string& name, + size_t array_size); + GlobalVariable* CreateGlobalArrayI32(const std::string& name, + size_t array_size, + const std::vector& init_values); + GlobalVariable* CreateGlobalArrayF32(const std::string& name, + size_t array_size, + const std::vector& init_values); + GlobalVariable* GetGlobal(const std::string& name) const; + const std::vector>& GetGlobals() const; + const std::vector>& GetFunctions() const; + + private: + Context context_; + std::vector> globals_; + std::vector> functions_; +}; + +class IRBuilder { + public: + IRBuilder(Context& ctx, BasicBlock* bb); + void SetInsertPoint(BasicBlock* bb); + BasicBlock* GetInsertBlock() const; + + // 构造常量、二元运算、返回指令的最小集合。 + ConstantInt* CreateConstInt(int v); + ConstantFloat* CreateConstFloat(double v); + BinaryInst* CreateBinary(Opcode op, Value* lhs, Value* rhs, + const std::string& name); + BinaryInst* CreateAdd(Value* lhs, Value* rhs, const std::string& name); + BinaryInst* CreateICmp(Opcode op, Value* lhs, Value* rhs, + const std::string& name); + CastInst* CreateSIToFP(Value* operand, const std::string& name); + CastInst* CreateFPToSI(Value* operand, const std::string& name); + CastInst* CreateZExt(Value* operand, std::shared_ptr target_ty, const std::string& name); + AllocaInst* CreateAlloca(std::shared_ptr elem_ty, const std::string& name, + Value* count = nullptr); + AllocaInst* CreateAllocaI32(const std::string& name, + Value* count = nullptr); + AllocaInst* CreateAllocaF32(const std::string& name, + Value* count = nullptr); + LoadInst* CreateLoad(Value* ptr, const std::string& name); + StoreInst* CreateStore(Value* val, Value* ptr); + GetElementPtrInst* CreateGEP(Value* base_ptr, Value* index, + const std::string& name); + CallInst* CreateCall(Function* callee, const std::vector& args, + const std::string& name); + BranchInst* CreateBr(BasicBlock* target); + CondBranchInst* CreateCondBr(Value* cond, BasicBlock* true_bb, + BasicBlock* false_bb); + ReturnInst* CreateRet(Value* v); + ReturnInst* CreateRetVoid(); + PhiInst* CreatePhi(std::shared_ptr ty, const std::string& name); + + private: + Context& ctx_; + BasicBlock* insert_block_; +}; + +class IRPrinter { + public: + void Print(const Module& module, std::ostream& os); +}; + +} // namespace ir diff --git a/extlibs/irgen/IRGen.h b/extlibs/irgen/IRGen.h new file mode 100644 index 00000000..861f6fcb --- /dev/null +++ b/extlibs/irgen/IRGen.h @@ -0,0 +1,122 @@ +// 将语法树翻译为 IR。 +// 实现拆分在 IRGenFunc/IRGenStmt/IRGenExp/IRGenDecl。 + +#pragma once + +#include +#include +#include +#include +#include + +#include "SysYBaseVisitor.h" +#include "SysYParser.h" +#include "ir/IR.h" +#include "sem/Sema.h" + +namespace ir { +class Module; +class Function; +class IRBuilder; +class Value; +} + +class IRGenImpl final : public SysYBaseVisitor { + public: + IRGenImpl(ir::Module& module, const SemanticContext& sema); + + std::any visitCompUnit(SysYParser::CompUnitContext* ctx) override; + std::any visitFuncDef(SysYParser::FuncDefContext* ctx) override; + std::any visitBlock(SysYParser::BlockContext* ctx) override; + std::any visitBlockItem(SysYParser::BlockItemContext* ctx) override; + std::any visitDecl(SysYParser::DeclContext* ctx) override; + std::any visitVarDecl(SysYParser::VarDeclContext* ctx) override; + std::any visitStmt(SysYParser::StmtContext* ctx) override; + std::any visitVarDef(SysYParser::VarDefContext* ctx) override; + std::any visitExp(SysYParser::ExpContext* ctx) override; + std::any visitAddExp(SysYParser::AddExpContext* ctx) override; + std::any visitMulExp(SysYParser::MulExpContext* ctx) override; + std::any visitUnaryExp(SysYParser::UnaryExpContext* ctx) override; + std::any visitPrimaryExp(SysYParser::PrimaryExpContext* ctx) override; + std::any visitLVal(SysYParser::LValContext* ctx) override; + std::any visitNumber(SysYParser::NumberContext* ctx) override; + + private: + enum class BlockFlow { + Continue, + Terminated, + }; + + BlockFlow VisitBlockItemResult(SysYParser::BlockItemContext& item); + ir::Value* EvalExpr(SysYParser::ExpContext& expr); + ir::Value* EvalBinaryOrFold(ir::Opcode op, ir::Value* lhs, ir::Value* rhs); + std::shared_ptr ResolveBType(SysYParser::BTypeContext* btype) const; + int EvalConstIntExpr(SysYParser::ExpContext& expr); + int EvalConstIntExpr(SysYParser::ConstExpContext& expr); + int EvalConstIntAddExp(SysYParser::AddExpContext& expr); + int EvalConstIntMulExp(SysYParser::MulExpContext& expr); + int EvalConstIntUnaryExp(SysYParser::UnaryExpContext& expr); + int EvalConstIntPrimaryExp(SysYParser::PrimaryExpContext& expr); + double EvalConstFloatExpr(SysYParser::ConstExpContext& expr); + double EvalConstFloatAddExp(SysYParser::AddExpContext& expr); + double EvalConstFloatMulExp(SysYParser::MulExpContext& expr); + double EvalConstFloatUnaryExp(SysYParser::UnaryExpContext& expr); + double EvalConstFloatPrimaryExp(SysYParser::PrimaryExpContext& expr); + std::vector EvalArrayExtents( + const std::vector& dims); + std::vector GetArrayExtentsForDecl(SysYParser::VarDefContext* decl); + std::vector GetArrayExtentsForConstDecl( + SysYParser::ConstDefContext* decl); + std::vector GetArrayExtentsForLVal(SysYParser::LValContext& lval, + bool& is_array); + ir::Value* BuildLinearizedIndex( + const std::vector& indices, + const std::vector& extents_with_first_dim) ; + ir::Value* CastValueTo(ir::Value* value, + const std::shared_ptr& target_type); + ir::Value* GetLValAddress(SysYParser::LValContext& lval); + ir::AllocaInst* CreateEntryBlockAlloca(std::shared_ptr elem_ty, + const std::string& name, + ir::Value* count = nullptr); + std::string NextBlockName(const std::string& prefix); + void EmitCondBranch(SysYParser::CondContext& cond, ir::BasicBlock* true_bb, + ir::BasicBlock* false_bb); + void EmitLOrBranch(SysYParser::LOrExpContext& expr, ir::BasicBlock* true_bb, + ir::BasicBlock* false_bb); + void EmitLAndBranch(SysYParser::LAndExpContext& expr, ir::BasicBlock* true_bb, + ir::BasicBlock* false_bb); + void EmitEqBranch(SysYParser::EqExpContext& expr, ir::BasicBlock* true_bb, + ir::BasicBlock* false_bb); + void EmitRelBranch(SysYParser::RelExpContext& expr, ir::BasicBlock* true_bb, + ir::BasicBlock* false_bb); + ir::Value* EvalEqValue(SysYParser::EqExpContext& expr); + ir::Value* EvalRelValue(SysYParser::RelExpContext& expr); + + ir::Module& module_; + const SemanticContext& sema_; + ir::Function* func_; + ir::IRBuilder builder_; + std::unordered_map function_map_; + std::unordered_map const_value_map_; + std::vector> local_const_stack_; + std::vector> const_value_history_; + std::unordered_map> + array_extents_map_; + std::unordered_map> + const_array_extents_map_; + std::unordered_map> param_array_extents_map_; + std::unordered_map param_storage_map_; + std::unordered_map param_pointer_map_; + std::unordered_map global_storage_map_; + std::unordered_map + const_global_storage_map_; + // 名称绑定由 Sema 负责;IRGen 只维护“声明 -> 存储槽位”的代码生成状态。 + std::unordered_map storage_map_; + std::unordered_map + const_storage_map_; + std::vector> loop_stack_; + int block_index_ = 0; +}; + +std::unique_ptr GenerateIR(SysYParser::CompUnitContext& tree, + const SemanticContext& sema); diff --git a/extlibs/mir/MIR.h b/extlibs/mir/MIR.h new file mode 100644 index 00000000..dabbd02c --- /dev/null +++ b/extlibs/mir/MIR.h @@ -0,0 +1,414 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace ir +{ + class Module; +} + +namespace mir +{ + + class MIRContext + { + public: + MIRContext() = default; + }; + + MIRContext &DefaultContext(); + + enum class PhysReg + { + W0, + W1, + W2, + W3, + W4, + W5, + W6, + W7, + W8, + W9, + W10, + W11, + W12, + W13, + W14, + W15, + W16, + W17, + W18, + W19, + W20, + W21, + W22, + W23, + W24, + W25, + W26, + W27, + W28, + W29, + W30, + X0, + X1, + X2, + X3, + X4, + X5, + X6, + X7, + X8, + X9, + X10, + X11, + X12, + X13, + X14, + X15, + X16, + X17, + X18, + X19, + X20, + X21, + X22, + X23, + X24, + X25, + X26, + X27, + X28, + X29, + X30, + S0, + S1, + S2, + S3, + S4, + S5, + S6, + S7, + S8, + S9, + S10, + S11, + S12, + S13, + S14, + S15, + S16, + S17, + S18, + S19, + S20, + S21, + S22, + S23, + S24, + S25, + S26, + S27, + S28, + S29, + S30, + S31, + XZR, + SP, + WZR + }; + + const char *PhysRegName(PhysReg reg); + + enum class VRegClass + { + Int, + Float, + Ptr + }; + + enum class Opcode + { + Prologue, + Epilogue, + MovImm, + LoadStack, + StoreStack, + LoadStackAddr, + LoadGlobal, + StoreGlobal, + LoadGlobalAddr, + LoadMem, + StoreMem, + AddRR, + SubRR, + MulRR, + DivRR, + ModRR, + AndRR, + OrRR, + XorRR, + ShlRR, + ShrRR, + AsrRR, + Asr64RR, + Uxtw, + Sxtw, + CmpRR, + CmpImm, + FCmpRR, + CSet, + Csel, + Smull, + Msub, + NegRR, + FAddRR, + FSubRR, + FMulRR, + FDivRR, + Scvtf, + FCvtzs, + FMovWS, + Br, + CondBr, + Call, + Ret, + LoadAddr, + MovReg, + }; + + enum class CondCode + { + EQ, + NE, + LT, + LE, + GT, + GE + }; + + class Operand + { + public: + enum class Kind + { + Reg, + VReg, + Imm, + FrameIndex, + Label, + Symbol + }; + + static Operand Reg(PhysReg reg); + static Operand VReg(int id, VRegClass vreg_class); + static Operand Imm(int value); + static Operand FrameIndex(int index); + static Operand Label(int label_id); + static Operand Symbol(std::string symbol); + + Kind GetKind() const { return kind_; } + PhysReg GetReg() const { return reg_; } + int GetImm() const { return imm_; } + int GetFrameIndex() const { return imm_; } + int GetLabel() const { return imm_; } + const std::string &GetSymbol() const { return symbol_; } + int GetVRegId() const { return imm_; } + VRegClass GetVRegClass() const { return vreg_class_; } + + private: + Operand(Kind kind, PhysReg reg, int imm, + VRegClass vreg_class = VRegClass::Int, std::string symbol = ""); + + Kind kind_; + PhysReg reg_; + int imm_; + std::string symbol_; + VRegClass vreg_class_; + }; + + class MachineInstr + { + public: + MachineInstr(Opcode opcode, std::vector operands = {}); + + Opcode GetOpcode() const { return opcode_; } + const std::vector &GetOperands() const { return operands_; } + std::vector &GetOperands() { return operands_; } + + private: + Opcode opcode_; + std::vector operands_; + }; + + struct FrameSlot + { + int index = 0; + int size = 4; + int offset = 0; + bool is_stack_arg = false; + bool is_callee_stack_arg = false; + }; + + class MachineBasicBlock + { + public: + explicit MachineBasicBlock(std::string name, int label_id = -1); + + const std::string &GetName() const { return name_; } + int GetLabelId() const { return label_id_; } + void SetLabelId(int label_id) { label_id_ = label_id; } + + std::vector &GetInstructions() { return instructions_; } + const std::vector &GetInstructions() const { return instructions_; } + + MachineInstr &Append(Opcode opcode, + std::initializer_list operands = {}); + + private: + std::string name_; + int label_id_ = -1; + std::vector instructions_; + }; + + class MachineFunction + { + public: + explicit MachineFunction(std::string name); + + const std::string &GetName() const { return name_; } + + MachineBasicBlock &GetEntry() { return *entry_; } + const MachineBasicBlock &GetEntry() const { return *entry_; } + + MachineBasicBlock *GetEntryPtr() { return entry_; } + const MachineBasicBlock *GetEntryPtr() const { return entry_; } + + MachineBasicBlock &CreateBlock(std::string name); + MachineBasicBlock *FindBlock(const std::string &name); + const MachineBasicBlock *FindBlock(const std::string &name) const; + + std::vector> &GetBlocks() + { + return blocks_; + } + const std::vector> &GetBlocks() const + { + return blocks_; + } + + int CreateLabel(); + + int CreateFrameIndex(int size = 4); + int CreateStackArgFrameIndex(int size = 4); + int CreateCalleeStackArgFrameIndex(int size = 4); + FrameSlot &GetFrameSlot(int index); + const FrameSlot &GetFrameSlot(int index) const; + const std::vector &GetFrameSlots() const { return frame_slots_; } + std::vector &GetFrameSlots() { return frame_slots_; } + + int GetFrameSize() const { return frame_size_; } + void SetFrameSize(int size) { frame_size_ = size; } + + int CreateVReg(VRegClass vreg_class); + VRegClass GetVRegClass(int vreg_id) const; + int GetNumVRegs() const { return static_cast(vreg_classes_.size()); } + + void AddCalleeSavedReg(PhysReg reg); + const std::vector &GetCalleeSavedRegs() const { return callee_saved_regs_; } + + private: + std::string name_; + std::vector> blocks_; + MachineBasicBlock *entry_ = nullptr; + + std::vector frame_slots_; + int frame_size_ = 0; + int next_label_id_ = 0; + + std::vector vreg_classes_; + std::vector callee_saved_regs_; + }; + + struct MachineGlobal + { + enum class Kind + { + I32Scalar, + I32Array + }; + + std::string name; + Kind kind = Kind::I32Scalar; + int init_value = 0; + size_t array_size = 0; + std::vector init_values; + }; + + class MachineModule + { + public: + MachineModule() = default; + + MachineFunction &CreateFunction(std::string name); + MachineFunction *GetFunction(const std::string &name); + const MachineFunction *GetFunction(const std::string &name) const; + + std::vector> &GetFunctions() + { + return functions_; + } + const std::vector> &GetFunctions() const + { + return functions_; + } + + void AddGlobalI32(std::string name, int init_value) + { + MachineGlobal g; + g.name = std::move(name); + g.kind = MachineGlobal::Kind::I32Scalar; + g.init_value = init_value; + globals_.push_back(std::move(g)); + } + + void AddGlobalArrayI32(std::string name, size_t array_size, + std::vector init_values = {}) + { + MachineGlobal g; + g.name = std::move(name); + g.kind = MachineGlobal::Kind::I32Array; + g.array_size = array_size; + g.init_values = std::move(init_values); + globals_.push_back(std::move(g)); + } + + std::vector &GetGlobals() { return globals_; } + const std::vector &GetGlobals() const { return globals_; } + + private: + std::vector> functions_; + std::vector globals_; + }; + + std::unique_ptr LowerModuleToMIR(const ir::Module &module); + std::unique_ptr LowerToMIR(const ir::Module &module); + + void RunRegAlloc(MachineFunction &function); + void RunRegAlloc(MachineModule &module); + + void RunFrameLowering(MachineFunction &function); + void RunFrameLowering(MachineModule &module); + + void RunPeephole(MachineFunction &function); + void RunPeephole(MachineModule &module); + + void PrintAsm(const MachineFunction &function, std::ostream &os); + void PrintAsm(const MachineModule &module, std::ostream &os); + +} // namespace mir diff --git a/extlibs/sem/Sema.h b/extlibs/sem/Sema.h new file mode 100644 index 00000000..5a677fd0 --- /dev/null +++ b/extlibs/sem/Sema.h @@ -0,0 +1,92 @@ +// 基于语法树的语义检查与名称绑定。 +#pragma once + +#include + +#include "SysYParser.h" + +class SemanticContext { + public: + void BindVarUse(SysYParser::LValContext* use, + SysYParser::VarDefContext* decl) { + var_uses_[use] = decl; + } + + SysYParser::VarDefContext* ResolveVarUse( + const SysYParser::LValContext* use) const { + auto it = var_uses_.find(use); + return it == var_uses_.end() ? nullptr : it->second; + } + + void BindConstArrayUse(SysYParser::LValContext* use, + SysYParser::ConstDefContext* decl) { + const_array_uses_[use] = decl; + } + + SysYParser::ConstDefContext* ResolveConstArrayUse( + const SysYParser::LValContext* use) const { + auto it = const_array_uses_.find(use); + return it == const_array_uses_.end() ? nullptr : it->second; + } + + void BindConstScalarUse(SysYParser::LValContext* use, + SysYParser::ConstDefContext* decl) { + const_scalar_uses_[use] = decl; + } + + SysYParser::ConstDefContext* ResolveConstScalarUse( + const SysYParser::LValContext* use) const { + auto it = const_scalar_uses_.find(use); + return it == const_scalar_uses_.end() ? nullptr : it->second; + } + + void BindConstUse(SysYParser::LValContext* use, int value) { + const_uses_[use] = value; + } + + const int* ResolveConstUse(const SysYParser::LValContext* use) const { + auto it = const_uses_.find(use); + return it == const_uses_.end() ? nullptr : &it->second; + } + + void BindConstFloatUse(SysYParser::LValContext* use, double value) { + const_float_uses_[use] = value; + } + + const double* ResolveConstFloatUse(const SysYParser::LValContext* use) const { + auto it = const_float_uses_.find(use); + return it == const_float_uses_.end() ? nullptr : &it->second; + } + + void BindCallUse(SysYParser::UnaryExpContext* call, + SysYParser::FuncDefContext* decl) { + call_uses_[call] = decl; + } + + SysYParser::FuncDefContext* ResolveCallUse( + const SysYParser::UnaryExpContext* call) const { + auto it = call_uses_.find(call); + return it == call_uses_.end() ? nullptr : it->second; + } + + private: + std::unordered_map + var_uses_; + std::unordered_map const_uses_; + std::unordered_map const_float_uses_; + std::unordered_map + const_array_uses_; + std::unordered_map + const_scalar_uses_; + std::unordered_map + call_uses_; +}; + +// 目前仅检查: +// - 变量先声明后使用 +// - 局部变量不允许重复定义 +SemanticContext RunSema(SysYParser::CompUnitContext& comp_unit); diff --git a/extlibs/sem/SymbolTable.h b/extlibs/sem/SymbolTable.h new file mode 100644 index 00000000..61275509 --- /dev/null +++ b/extlibs/sem/SymbolTable.h @@ -0,0 +1,22 @@ +// 极简符号表:记录局部变量定义点。 +#pragma once + +#include +#include +#include + +#include "SysYParser.h" + +class SymbolTable { + public: + void EnterScope(); + void ExitScope(); + void Add(const std::string& name, SysYParser::VarDefContext* decl); + bool ContainsInCurrent(const std::string& name) const; + bool Contains(const std::string& name) const; + SysYParser::VarDefContext* Lookup(const std::string& name) const; + + private: + std::vector> + scopes_; +}; diff --git a/extlibs/utils/CLI.h b/extlibs/utils/CLI.h new file mode 100644 index 00000000..a06106b6 --- /dev/null +++ b/extlibs/utils/CLI.h @@ -0,0 +1,17 @@ +// 命令行解析:支持比赛要求的 -S -o -O1 格式 +#pragma once + +#include + +struct CLIOptions { + std::string input; + std::string output; // -o 指定的输出文件路径 + bool emit_parse_tree = false; + bool emit_ir = false; + bool emit_asm = false; + bool show_help = false; + bool optimize = false; // -O 或 -O1 + int opt_level = 0; // 优化级别: 0, 1, 2, 3 +}; + +CLIOptions ParseCLI(int argc, char** argv); diff --git a/extlibs/utils/Log.h b/extlibs/utils/Log.h new file mode 100644 index 00000000..303f1a11 --- /dev/null +++ b/extlibs/utils/Log.h @@ -0,0 +1,20 @@ +// 轻量日志接口。 +#pragma once + +#include +#include +#include +#include +#include + +void LogInfo(std::string_view msg, std::ostream& os); +void LogError(std::string_view msg, std::ostream& os); + +std::string FormatError(std::string_view stage, std::string_view msg); +std::string FormatErrorAt(std::string_view stage, std::size_t line, + std::size_t column, std::string_view msg); +bool HasErrorPrefix(std::string_view msg, std::string_view stage); +void PrintException(std::ostream& os, const std::exception& ex); + +// 打印命令行帮助信息(用于 `compiler --help`)。 +void PrintHelp(std::ostream& os);