diff --git a/src/ir/BasicBlock.cpp b/src/ir/BasicBlock.cpp index c85d923..ef257bc 100644 --- a/src/ir/BasicBlock.cpp +++ b/src/ir/BasicBlock.cpp @@ -3,3 +3,22 @@ // - 维护或可计算前驱/后继关系,用于 CFG 分析与优化 #include "ir/IR.h" + +#include + +namespace ir { + +BasicBlock::BasicBlock(std::string name) : name_(std::move(name)) {} + +const std::string& BasicBlock::name() const { return name_; } + +bool BasicBlock::HasTerminator() const { + return !instructions_.empty() && instructions_.back()->IsTerminator(); +} + +const std::vector>& BasicBlock::instructions() + const { + return instructions_; +} + +} // namespace ir diff --git a/src/ir/Function.cpp b/src/ir/Function.cpp index 3772494..33312b0 100644 --- a/src/ir/Function.cpp +++ b/src/ir/Function.cpp @@ -20,4 +20,12 @@ BasicBlock* Function::CreateBlock(const std::string& name) { return ptr; } +BasicBlock* Function::entry() { return entry_; } + +const BasicBlock* Function::entry() const { return entry_; } + +const std::vector>& Function::blocks() const { + return blocks_; +} + } // namespace ir diff --git a/src/ir/IR.h b/src/ir/IR.h index 1f03ee1..2876333 100644 --- a/src/ir/IR.h +++ b/src/ir/IR.h @@ -41,8 +41,8 @@ Context& DefaultContext(); class Type { public: enum class Kind { Void, Int32, PtrInt32 }; - explicit Type(Kind k) : kind_(k) {} - Kind kind() const { return kind_; } + explicit Type(Kind k); + Kind kind() const; static std::shared_ptr Void(); static std::shared_ptr Int32(); static std::shared_ptr PtrInt32(); @@ -53,14 +53,13 @@ class Type { class Value { public: - Value(std::shared_ptr ty, std::string name) - : type_(std::move(ty)), name_(std::move(name)) {} + Value(std::shared_ptr ty, std::string name); virtual ~Value() = default; - const std::shared_ptr& type() const { return type_; } - const std::string& name() const { return name_; } - void set_name(std::string n) { name_ = std::move(n); } - void AddUser(Instruction* user) { users_.push_back(user); } - const std::vector& users() const { return users_; } + const std::shared_ptr& type() const; + const std::string& name() const; + void set_name(std::string n); + void AddUser(Instruction* user); + const std::vector& users() const; protected: std::shared_ptr type_; @@ -77,16 +76,16 @@ class ConstantInt : public Value { int value_{}; }; +// 后续还需要扩展更多指令类型。 enum class Opcode { Add, Sub, Mul, Alloca, Load, Store, Ret }; class Instruction : public Value { public: - Instruction(Opcode op, std::shared_ptr ty, std::string name = "") - : Value(std::move(ty), std::move(name)), opcode_(op) {} - Opcode opcode() const { return opcode_; } - bool IsTerminator() const { return opcode_ == Opcode::Ret; } - BasicBlock* parent() const { return parent_; } - void set_parent(BasicBlock* parent) { parent_ = parent; } + Instruction(Opcode op, std::shared_ptr ty, std::string name = ""); + Opcode opcode() const; + bool IsTerminator() const; + BasicBlock* parent() const; + void set_parent(BasicBlock* parent); private: Opcode opcode_; @@ -97,8 +96,8 @@ class BinaryInst : public Instruction { public: BinaryInst(Opcode op, std::shared_ptr ty, Value* lhs, Value* rhs, std::string name); - Value* lhs() const { return lhs_; } - Value* rhs() const { return rhs_; } + Value* lhs() const; + Value* rhs() const; private: Value* lhs_; @@ -108,7 +107,7 @@ class BinaryInst : public Instruction { class ReturnInst : public Instruction { public: explicit ReturnInst(Value* val); - Value* value() const { return value_; } + Value* value() const; private: Value* value_; @@ -122,7 +121,7 @@ class AllocaInst : public Instruction { class LoadInst : public Instruction { public: LoadInst(Value* ptr, std::string name); - Value* ptr() const { return ptr_; } + Value* ptr() const; private: Value* ptr_; @@ -131,8 +130,8 @@ class LoadInst : public Instruction { class StoreInst : public Instruction { public: StoreInst(Value* val, Value* ptr); - Value* value() const { return value_; } - Value* ptr() const { return ptr_; } + Value* value() const; + Value* ptr() const; private: Value* value_; @@ -141,14 +140,10 @@ class StoreInst : public Instruction { class BasicBlock { public: - explicit BasicBlock(std::string name) : name_(std::move(name)) {} - const std::string& name() const { return name_; } - bool HasTerminator() const { - return !instructions_.empty() && instructions_.back()->IsTerminator(); - } - const std::vector>& instructions() const { - return instructions_; - } + explicit BasicBlock(std::string name); + const std::string& name() const; + bool HasTerminator() const; + const std::vector>& instructions() const; template T* Append(Args&&... args) { if (HasTerminator()) { @@ -172,11 +167,9 @@ class Function : public Value { // 允许显式指定返回类型,便于后续扩展多种函数签名。 Function(std::string name, std::shared_ptr ret_type); BasicBlock* CreateBlock(const std::string& name); - BasicBlock* entry() { return entry_; } - const BasicBlock* entry() const { return entry_; } - const std::vector>& blocks() const { - return blocks_; - } + BasicBlock* entry(); + const BasicBlock* entry() const; + const std::vector>& blocks() const; private: BasicBlock* entry_ = nullptr; @@ -188,9 +181,7 @@ class Module { // 创建函数时显式传入返回类型,便于在 IRGen 中根据语法树信息选择类型。 Function* CreateFunction(const std::string& name, std::shared_ptr ret_type); - const std::vector>& functions() const { - return functions_; - } + const std::vector>& functions() const; private: std::vector> functions_; @@ -198,17 +189,15 @@ class Module { class IRBuilder { public: - explicit IRBuilder(BasicBlock* bb) : insertBlock_(bb) {} - void SetInsertPoint(BasicBlock* bb) { insertBlock_ = bb; } - BasicBlock* GetInsertBlock() const { return insertBlock_; } + explicit IRBuilder(BasicBlock* bb); + void SetInsertPoint(BasicBlock* bb); + BasicBlock* GetInsertBlock() const; // 构造常量、二元运算、返回指令的最小集合。 ConstantInt* CreateConstInt(int v); BinaryInst* CreateBinary(Opcode op, Value* lhs, Value* rhs, const std::string& name); - BinaryInst* CreateAdd(Value* lhs, Value* rhs, const std::string& name) { - return CreateBinary(Opcode::Add, lhs, rhs, name); - } + BinaryInst* CreateAdd(Value* lhs, Value* rhs, const std::string& name); AllocaInst* CreateAllocaI32(const std::string& name); LoadInst* CreateLoad(Value* ptr, const std::string& name); StoreInst* CreateStore(Value* val, Value* ptr); diff --git a/src/ir/IRBuilder.cpp b/src/ir/IRBuilder.cpp index 1812f14..58f08de 100644 --- a/src/ir/IRBuilder.cpp +++ b/src/ir/IRBuilder.cpp @@ -18,6 +18,12 @@ bool IsPtrInt32Type(const std::shared_ptr& ty) { } // namespace +IRBuilder::IRBuilder(BasicBlock* bb) : insertBlock_(bb) {} + +void IRBuilder::SetInsertPoint(BasicBlock* bb) { insertBlock_ = bb; } + +BasicBlock* IRBuilder::GetInsertBlock() const { return insertBlock_; } + ConstantInt* IRBuilder::CreateConstInt(int v) { // 常量不需要挂在基本块里,由 Context 负责去重与生命周期。 return DefaultContext().GetConstInt(v); @@ -47,6 +53,11 @@ BinaryInst* IRBuilder::CreateBinary(Opcode op, Value* lhs, Value* rhs, return insertBlock_->Append(op, lhs->type(), lhs, rhs, name); } +BinaryInst* IRBuilder::CreateAdd(Value* lhs, Value* rhs, + const std::string& name) { + return CreateBinary(Opcode::Add, lhs, rhs, name); +} + AllocaInst* IRBuilder::CreateAllocaI32(const std::string& name) { if (!insertBlock_) { throw std::runtime_error("IRBuilder 未设置插入点"); diff --git a/src/ir/Instruction.cpp b/src/ir/Instruction.cpp index 596724a..a808daa 100644 --- a/src/ir/Instruction.cpp +++ b/src/ir/Instruction.cpp @@ -18,6 +18,17 @@ bool IsPtrInt32Type(const std::shared_ptr& ty) { } // namespace +Instruction::Instruction(Opcode op, std::shared_ptr ty, std::string name) + : Value(std::move(ty), std::move(name)), opcode_(op) {} + +Opcode Instruction::opcode() const { return opcode_; } + +bool Instruction::IsTerminator() const { return opcode_ == Opcode::Ret; } + +BasicBlock* Instruction::parent() const { return parent_; } + +void Instruction::set_parent(BasicBlock* parent) { parent_ = parent; } + BinaryInst::BinaryInst(Opcode op, std::shared_ptr ty, Value* lhs, Value* rhs, std::string name) : Instruction(op, std::move(ty), std::move(name)), lhs_(lhs), rhs_(rhs) { @@ -45,6 +56,10 @@ BinaryInst::BinaryInst(Opcode op, std::shared_ptr ty, Value* lhs, } } +Value* BinaryInst::lhs() const { return lhs_; } + +Value* BinaryInst::rhs() const { return rhs_; } + ReturnInst::ReturnInst(Value* val) : Instruction(Opcode::Ret, Type::Void(), ""), value_(val) { if (!value_) { @@ -53,6 +68,8 @@ ReturnInst::ReturnInst(Value* val) value_->AddUser(this); } +Value* ReturnInst::value() const { return value_; } + AllocaInst::AllocaInst(std::string name) : Instruction(Opcode::Alloca, Type::PtrInt32(), std::move(name)) {} @@ -67,6 +84,8 @@ LoadInst::LoadInst(Value* ptr, std::string name) ptr_->AddUser(this); } +Value* LoadInst::ptr() const { return ptr_; } + StoreInst::StoreInst(Value* val, Value* ptr) : Instruction(Opcode::Store, Type::Void(), ""), value_(val), ptr_(ptr) { if (!value_) { @@ -85,4 +104,8 @@ StoreInst::StoreInst(Value* val, Value* ptr) ptr_->AddUser(this); } +Value* StoreInst::value() const { return value_; } + +Value* StoreInst::ptr() const { return ptr_; } + } // namespace ir diff --git a/src/ir/Module.cpp b/src/ir/Module.cpp index 92ca9df..0a2df76 100644 --- a/src/ir/Module.cpp +++ b/src/ir/Module.cpp @@ -12,4 +12,8 @@ Function* Module::CreateFunction(const std::string& name, return functions_.back().get(); } +const std::vector>& Module::functions() const { + return functions_; +} + } // namespace ir diff --git a/src/ir/Type.cpp b/src/ir/Type.cpp index dc6501b..4712485 100644 --- a/src/ir/Type.cpp +++ b/src/ir/Type.cpp @@ -6,6 +6,10 @@ namespace ir { +Type::Type(Kind k) : kind_(k) {} + +Type::Kind Type::kind() const { return kind_; } + std::shared_ptr Type::Void() { return DefaultContext().Void(); } std::shared_ptr Type::Int32() { return DefaultContext().Int32(); } diff --git a/src/ir/Value.cpp b/src/ir/Value.cpp index e544895..9bd2fa7 100644 --- a/src/ir/Value.cpp +++ b/src/ir/Value.cpp @@ -5,6 +5,19 @@ namespace ir { +Value::Value(std::shared_ptr ty, std::string name) + : type_(std::move(ty)), name_(std::move(name)) {} + +const std::shared_ptr& Value::type() const { return type_; } + +const std::string& Value::name() const { return name_; } + +void Value::set_name(std::string n) { name_ = std::move(n); } + +void Value::AddUser(Instruction* user) { users_.push_back(user); } + +const std::vector& Value::users() const { return users_; } + ConstantInt::ConstantInt(int v) : Value(Type::Int32(), ""), value_(v) {} } // namespace ir diff --git a/src/irgen/IRGen.h b/src/irgen/IRGen.h index d3c9d28..f05d63f 100644 --- a/src/irgen/IRGen.h +++ b/src/irgen/IRGen.h @@ -9,6 +9,7 @@ #include "SysYParser.h" #include "ir/IR.h" +#include "sem/Sema.h" namespace antlr4 { namespace tree { @@ -25,7 +26,7 @@ class Value; class IRGenImpl { public: - explicit IRGenImpl(ir::Module& module); + IRGenImpl(ir::Module& module, const SemanticContext& sema); void Gen(SysYParser::CompUnitContext& cu); @@ -43,10 +44,12 @@ class IRGenImpl { ir::Value* GenPrimary(SysYParser::PrimaryContext& primary); ir::Module& module_; + const SemanticContext& sema_; ir::Function* func_; ir::IRBuilder builder_; - // 当前只维护函数级局部变量表;若后续引入嵌套块作用域,需要改成作用域栈。 - std::unordered_map locals_; + // 名称绑定由 Sema 负责;IRGen 只维护“声明 -> 存储槽位”的代码生成状态。 + std::unordered_map storage_map_; }; -std::unique_ptr GenerateIR(antlr4::tree::ParseTree* tree); +std::unique_ptr GenerateIR(SysYParser::CompUnitContext& tree, + const SemanticContext& sema); diff --git a/src/irgen/IRGenDecl.cpp b/src/irgen/IRGenDecl.cpp index 948c72f..72c8cfb 100644 --- a/src/irgen/IRGenDecl.cpp +++ b/src/irgen/IRGenDecl.cpp @@ -36,12 +36,11 @@ void IRGenImpl::GenDecl(SysYParser::DeclContext& decl) { } void IRGenImpl::GenVarDecl(SysYParser::VarDeclContext& decl) { - const std::string name = decl.Ident()->getText(); - if (locals_.find(name) != locals_.end()) { - throw std::runtime_error("[irgen] 重复定义变量: " + name); + if (storage_map_.find(&decl) != storage_map_.end()) { + throw std::runtime_error("[irgen] 声明重复生成存储槽位"); } auto* slot = builder_.CreateAllocaI32(ir::DefaultContext().NextTemp()); - locals_[name] = slot; + storage_map_[&decl] = slot; ir::Value* init = nullptr; if (decl.exp()) { diff --git a/src/irgen/IRGenDriver.cpp b/src/irgen/IRGenDriver.cpp index 014b60a..aaeb297 100644 --- a/src/irgen/IRGenDriver.cpp +++ b/src/irgen/IRGenDriver.cpp @@ -4,21 +4,12 @@ #include #include "SysYParser.h" -#include "antlr4-runtime.h" #include "ir/IR.h" -std::unique_ptr GenerateIR(antlr4::tree::ParseTree* tree) { - if (!tree) { - throw std::runtime_error("[irgen] 语法树为空"); - } - - auto* cu = dynamic_cast(tree); - if (!cu) { - throw std::runtime_error("[irgen] 语法树根节点不是 compUnit"); - } - +std::unique_ptr GenerateIR(SysYParser::CompUnitContext& tree, + const SemanticContext& sema) { auto module = std::make_unique(); - IRGenImpl gen(*module); - gen.Gen(*cu); + IRGenImpl gen(*module, sema); + gen.Gen(tree); return module; } diff --git a/src/irgen/IRGenExp.cpp b/src/irgen/IRGenExp.cpp index e62ca91..a7c7292 100644 --- a/src/irgen/IRGenExp.cpp +++ b/src/irgen/IRGenExp.cpp @@ -33,10 +33,15 @@ ir::Value* IRGenImpl::GenPrimary(SysYParser::PrimaryContext& primary) { return ir::DefaultContext().GetConstInt(std::stoi(primary.Number()->getText())); } if (primary.Ident()) { - const std::string name = primary.Ident()->getText(); - auto it = locals_.find(name); - if (it == locals_.end()) { - throw std::runtime_error("[irgen] 变量未找到: " + name); + auto* decl = sema_.ResolveVarUse(&primary); + if (!decl) { + throw std::runtime_error("[irgen] 变量使用缺少语义绑定: " + + primary.Ident()->getText()); + } + auto it = storage_map_.find(decl); + if (it == storage_map_.end()) { + throw std::runtime_error("[irgen] 变量声明缺少存储槽位: " + + primary.Ident()->getText()); } return builder_.CreateLoad(it->second, ir::DefaultContext().NextTemp()); } diff --git a/src/irgen/IRGenFunc.cpp b/src/irgen/IRGenFunc.cpp index 4c0500a..8b2edc8 100644 --- a/src/irgen/IRGenFunc.cpp +++ b/src/irgen/IRGenFunc.cpp @@ -19,8 +19,8 @@ void VerifyFunctionStructure(const ir::Function& func) { } // namespace -IRGenImpl::IRGenImpl(ir::Module& module) - : module_(module), func_(nullptr), builder_(nullptr) {} +IRGenImpl::IRGenImpl(ir::Module& module, const SemanticContext& sema) + : module_(module), sema_(sema), func_(nullptr), builder_(nullptr) {} void IRGenImpl::Gen(SysYParser::CompUnitContext& cu) { if (!cu.funcDef()) { @@ -39,7 +39,7 @@ void IRGenImpl::GenFuncDef(SysYParser::FuncDefContext& func) { func_ = module_.CreateFunction(func.Ident()->getText(), ir::Type::Int32()); builder_.SetInsertPoint(func_->entry()); - locals_.clear(); + storage_map_.clear(); GenBlock(*func.block()); // 语义正确性主要由 sema 保证,这里只兜底检查 IR 结构是否合法。 diff --git a/src/main.cpp b/src/main.cpp index da355f6..c798420 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -30,9 +30,9 @@ int main(int argc, char** argv) { if (!comp_unit) { throw std::runtime_error("[main] 语法树根节点不是 compUnit"); } - RunSema(*comp_unit); + auto sema = RunSema(*comp_unit); - auto module = GenerateIR(antlr.tree); + auto module = GenerateIR(*comp_unit, sema); if (opts.emit_ir) { ir::IRPrinter printer; if (need_blank_line) { diff --git a/src/sem/Sema.cpp b/src/sem/Sema.cpp index d64d9f0..e586036 100644 --- a/src/sem/Sema.cpp +++ b/src/sem/Sema.cpp @@ -7,43 +7,47 @@ namespace { -void CheckExpr(SysYParser::ExpContext& exp, const SymbolTable& table); +void CheckExpr(SysYParser::ExpContext& exp, const SymbolTable& table, + SemanticContext& sema); void CheckPrimary(SysYParser::PrimaryContext& primary, - const SymbolTable& table) { + const SymbolTable& table, SemanticContext& sema) { if (primary.Number()) { return; } if (primary.Ident()) { const std::string name = primary.Ident()->getText(); - if (!table.Contains(name)) { + auto* decl = table.Lookup(name); + if (!decl) { throw std::runtime_error("[sema] 使用了未定义的变量: " + name); } + sema.BindVarUse(&primary, decl); return; } if (primary.exp()) { - CheckExpr(*primary.exp(), table); + CheckExpr(*primary.exp(), table, sema); return; } throw std::runtime_error("[sema] 暂不支持的 primary 形式"); } -void CheckExpr(SysYParser::ExpContext& exp, const SymbolTable& table) { +void CheckExpr(SysYParser::ExpContext& exp, const SymbolTable& table, + SemanticContext& sema) { if (!exp.addExp()) { throw std::runtime_error("[sema] 非法表达式"); } const auto& terms = exp.addExp()->primary(); for (auto* term : terms) { - CheckPrimary(*term, table); + CheckPrimary(*term, table, sema); } } } // namespace -void RunSema(SysYParser::CompUnitContext& comp_unit) { +SemanticContext RunSema(SysYParser::CompUnitContext& comp_unit) { auto* func = comp_unit.funcDef(); if (!func || !func->block()) { throw std::runtime_error("[sema] 缺少 main 函数定义"); @@ -53,6 +57,7 @@ void RunSema(SysYParser::CompUnitContext& comp_unit) { } SymbolTable table; + SemanticContext sema; bool seen_return = false; const auto& items = func->block()->blockItem(); @@ -74,14 +79,14 @@ void RunSema(SysYParser::CompUnitContext& comp_unit) { throw std::runtime_error("[sema] 重复定义变量: " + name); } if (decl->exp()) { - CheckExpr(*decl->exp(), table); + CheckExpr(*decl->exp(), table, sema); } - table.Add(name); + table.Add(name, decl); continue; } if (auto* stmt = item->stmt(); stmt && stmt->returnStmt()) { auto* ret = stmt->returnStmt(); - CheckExpr(*ret->exp(), table); + CheckExpr(*ret->exp(), table, sema); seen_return = true; if (i + 1 != items.size()) { throw std::runtime_error("[sema] return 必须是 main 函数中的最后一条语句"); @@ -94,4 +99,6 @@ void RunSema(SysYParser::CompUnitContext& comp_unit) { if (!seen_return) { throw std::runtime_error("[sema] main 函数必须包含 return 语句"); } + + return sema; } diff --git a/src/sem/Sema.h b/src/sem/Sema.h index 8f9d9a7..3dcc830 100644 --- a/src/sem/Sema.h +++ b/src/sem/Sema.h @@ -1,9 +1,30 @@ -// 基于语法树的极简语义检查。 +// 基于语法树的极简语义检查与名称绑定。 #pragma once +#include + #include "SysYParser.h" +class SemanticContext { + public: + void BindVarUse(SysYParser::PrimaryContext* use, + SysYParser::VarDeclContext* decl) { + var_uses_[use] = decl; + } + + SysYParser::VarDeclContext* ResolveVarUse( + const SysYParser::PrimaryContext* use) const { + auto it = var_uses_.find(use); + return it == var_uses_.end() ? nullptr : it->second; + } + + private: + std::unordered_map + var_uses_; +}; + // 目前仅检查: // - 变量先声明后使用 // - 局部变量不允许重复定义 -void RunSema(SysYParser::CompUnitContext& comp_unit); +SemanticContext RunSema(SysYParser::CompUnitContext& comp_unit); diff --git a/src/sem/SymbolTable.h b/src/sem/SymbolTable.h index eaf565e..06d59f4 100644 --- a/src/sem/SymbolTable.h +++ b/src/sem/SymbolTable.h @@ -1,16 +1,24 @@ -// 极简符号表:记录局部变量是否定义。 +// 极简符号表:记录局部变量定义点。 #pragma once #include #include +#include "SysYParser.h" + class SymbolTable { public: - void Add(const std::string& name) { table_[name] = true; } + void Add(const std::string& name, SysYParser::VarDeclContext* decl) { + table_[name] = decl; + } bool Contains(const std::string& name) const { return table_.find(name) != table_.end(); } + SysYParser::VarDeclContext* Lookup(const std::string& name) const { + auto it = table_.find(name); + return it == table_.end() ? nullptr : it->second; + } private: - std::unordered_map table_; + std::unordered_map table_; };