From c2759c27cd087c9db49f2a05cfc383fcf49021b4 Mon Sep 17 00:00:00 2001 From: brkstar <1576878717@qq.com> Date: Fri, 20 Mar 2026 23:35:59 +0800 Subject: [PATCH] =?UTF-8?q?feat(frontend):=20=E5=AE=8C=E6=88=90=20Lab1=20S?= =?UTF-8?q?ysY=20=E6=96=87=E6=B3=95=E6=89=A9=E5=B1=95=E4=B8=8E=E8=AF=AD?= =?UTF-8?q?=E6=B3=95=E6=A0=91=E8=BE=93=E5=87=BA=E6=94=AF=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- include/irgen/IRGen.h | 37 ++--- include/sem/Sema.h | 6 +- src/antlr4/SysY.g4 | 308 +++++++++++++++++++++++++++++++------- src/irgen/IRGenDecl.cpp | 118 +++++---------- src/irgen/IRGenDriver.cpp | 2 +- src/irgen/IRGenExp.cpp | 147 +++++++++++------- src/irgen/IRGenFunc.cpp | 65 +++----- src/irgen/IRGenStmt.cpp | 30 +--- src/main.cpp | 3 + src/sem/Sema.cpp | 298 ++++++++++++++++++------------------ 10 files changed, 581 insertions(+), 433 deletions(-) diff --git a/include/irgen/IRGen.h b/include/irgen/IRGen.h index 231ba90..a76a3cc 100644 --- a/include/irgen/IRGen.h +++ b/include/irgen/IRGen.h @@ -3,12 +3,10 @@ #pragma once -#include #include #include #include -#include "SysYBaseVisitor.h" #include "SysYParser.h" #include "ir/IR.h" #include "sem/Sema.h" @@ -20,31 +18,26 @@ class IRBuilder; class Value; } -class IRGenImpl final : public SysYBaseVisitor { +class IRGenImpl { public: IRGenImpl(ir::Module& module, const SemanticContext& sema); - std::any visitCompUnit(SysYParser::CompUnitContext* ctx) override; - std::any visitFuncDef(SysYParser::FuncDefContext* ctx) override; - std::any visitBlockStmt(SysYParser::BlockStmtContext* ctx) override; - std::any visitBlockItem(SysYParser::BlockItemContext* ctx) override; - std::any visitDecl(SysYParser::DeclContext* ctx) override; - std::any visitStmt(SysYParser::StmtContext* ctx) override; - std::any visitVarDef(SysYParser::VarDefContext* ctx) override; - std::any visitReturnStmt(SysYParser::ReturnStmtContext* ctx) override; - std::any visitParenExp(SysYParser::ParenExpContext* ctx) override; - std::any visitNumberExp(SysYParser::NumberExpContext* ctx) override; - std::any visitVarExp(SysYParser::VarExpContext* ctx) override; - std::any visitAdditiveExp(SysYParser::AdditiveExpContext* ctx) override; + void Gen(SysYParser::CompUnitContext& cu); private: - enum class BlockFlow { - Continue, - Terminated, - }; - - BlockFlow VisitBlockItemResult(SysYParser::BlockItemContext& item); - ir::Value* EvalExpr(SysYParser::ExpContext& expr); + void GenFuncDef(SysYParser::FuncDefContext& func); + void GenBlock(SysYParser::BlockContext& block); + bool GenBlockItem(SysYParser::BlockItemContext& item); + void GenDecl(SysYParser::DeclContext& decl); + bool GenStmt(SysYParser::StmtContext& stmt); + void GenVarDecl(SysYParser::VarDeclContext& decl); + void GenReturnStmt(SysYParser::ReturnStmtContext& ret); + + ir::Value* GenExpr(SysYParser::ExpContext& expr); + ir::Value* GenAddExpr(SysYParser::AddExpContext& add); + ir::Value* GenMulExpr(SysYParser::MulExpContext& mul); + ir::Value* GenUnaryExpr(SysYParser::UnaryExpContext& unary); + ir::Value* GenPrimary(SysYParser::PrimaryContext& primary); ir::Module& module_; const SemanticContext& sema_; diff --git a/include/sem/Sema.h b/include/sem/Sema.h index 9ac057b..2f0499f 100644 --- a/include/sem/Sema.h +++ b/include/sem/Sema.h @@ -7,19 +7,19 @@ class SemanticContext { public: - void BindVarUse(SysYParser::VarContext* use, + void BindVarUse(SysYParser::LValContext* use, SysYParser::VarDefContext* decl) { var_uses_[use] = decl; } SysYParser::VarDefContext* ResolveVarUse( - const SysYParser::VarContext* use) const { + const SysYParser::LValContext* use) const { auto it = var_uses_.find(use); return it == var_uses_.end() ? nullptr : it->second; } private: - std::unordered_map var_uses_; }; diff --git a/src/antlr4/SysY.g4 b/src/antlr4/SysY.g4 index 263aeef..c904753 100644 --- a/src/antlr4/SysY.g4 +++ b/src/antlr4/SysY.g4 @@ -1,68 +1,65 @@ -// SysY 子集语法:支持形如 -// int main() { int a = 1; int b = 2; return a + b; } -// 的最小返回表达式编译。 - -// 后续需要自行添加 - grammar SysY; -/*===-------------------------------------------===*/ -/* Lexer rules */ -/*===-------------------------------------------===*/ - -INT: 'int'; -RETURN: 'return'; - -ASSIGN: '='; -ADD: '+'; - -LPAREN: '('; -RPAREN: ')'; -LBRACE: '{'; -RBRACE: '}'; -SEMICOLON: ';'; - -ID: [a-zA-Z_][a-zA-Z_0-9]*; -ILITERAL: [0-9]+; +compUnit + : (decl | funcDef)+ EOF + ; -WS: [ \t\r\n] -> skip; -LINECOMMENT: '//' ~[\r\n]* -> skip; -BLOCKCOMMENT: '/*' .*? '*/' -> skip; +decl + : constDecl + | varDecl + ; -/*===-------------------------------------------===*/ -/* Syntax rules */ -/*===-------------------------------------------===*/ +constDecl + : Const bType constDef (Comma constDef)* Semi + ; -compUnit - : funcDef EOF +varDecl + : bType varDef (Comma varDef)* Semi ; -decl - : btype varDef SEMICOLON +bType + : Int + | Float ; -btype - : INT +constDef + : Ident (L_BRACK constExp R_BRACK)* Assign constInitVal ; varDef - : lValue (ASSIGN initValue)? + : Ident (L_BRACK constExp R_BRACK)* (Assign initVal)? + ; + +constInitVal + : constExp + | L_BRACE (constInitVal (Comma constInitVal)*)? R_BRACE ; -initValue +initVal : exp + | L_BRACE (initVal (Comma initVal)*)? R_BRACE ; funcDef - : funcType ID LPAREN RPAREN blockStmt + : funcType Ident L_PAREN funcFParams? R_PAREN block ; funcType - : INT + : Void + | Int + | Float + ; + +funcFParams + : funcFParam (Comma funcFParam)* + ; + +funcFParam + : bType Ident (L_BRACK R_BRACK (L_BRACK exp R_BRACK)*)? ; -blockStmt - : LBRACE blockItem* RBRACE +block + : L_BRACE blockItem* R_BRACE ; blockItem @@ -71,28 +68,231 @@ blockItem ; stmt - : returnStmt + : assignStmt + | expStmt + | block + | ifStmt + | whileStmt + | breakStmt + | continueStmt + | returnStmt + ; + +assignStmt + : lVal Assign exp Semi + ; + +expStmt + : exp? Semi + ; + +ifStmt + : If L_PAREN cond R_PAREN stmt (Else stmt)? + ; + +whileStmt + : While L_PAREN cond R_PAREN stmt + ; + +breakStmt + : Break Semi + ; + +continueStmt + : Continue Semi ; returnStmt - : RETURN exp SEMICOLON + : Return exp? Semi ; exp - : LPAREN exp RPAREN # parenExp - | var # varExp - | number # numberExp - | exp ADD exp # additiveExp + : addExp + ; + +cond + : lOrExp + ; + +lVal + : Ident (L_BRACK exp R_BRACK)* + ; + +primary + : Number + | lVal + | L_PAREN exp R_PAREN + ; + +unaryExp + : primary + | Ident L_PAREN funcRParams? R_PAREN + | unaryOp unaryExp + ; + +unaryOp + : Add + | Sub + | Not + ; + +funcRParams + : exp (Comma exp)* + ; + +mulExp + : unaryExp ((Mul | Div | Mod) unaryExp)* + ; + +addExp + : mulExp ((Add | Sub) mulExp)* + ; + +relExp + : addExp ((Lt | Gt | Le | Ge) addExp)* + ; + +eqExp + : relExp ((Eq | Ne) relExp)* + ; + +lAndExp + : eqExp (And eqExp)* + ; + +lOrExp + : lAndExp (Or lAndExp)* + ; + +constExp + : addExp + ; + +Const : 'const'; +Int : 'int'; +Float : 'float'; +Void : 'void'; +If : 'if'; +Else : 'else'; +While : 'while'; +Break : 'break'; +Continue : 'continue'; +Return : 'return'; + +Add : '+'; +Sub : '-'; +Mul : '*'; +Div : '/'; +Mod : '%'; +Assign : '='; +Eq : '=='; +Ne : '!='; +Lt : '<'; +Gt : '>'; +Le : '<='; +Ge : '>='; +Not : '!'; +And : '&&'; +Or : '||'; +Comma : ','; +Semi : ';'; +L_PAREN : '('; +R_PAREN : ')'; +L_BRACE : '{'; +R_BRACE : '}'; +L_BRACK : '['; +R_BRACK : ']'; + +Ident + : IdentifierNondigit IdentifierChar* + ; + +Number + : HexFloatConst + | DecFloatConst + | HexIntConst + | OctIntConst + | DecIntConst + ; + +WS + : [ \t\r\n]+ -> skip + ; + +COMMENT + : '//' ~[\r\n]* -> skip + ; + +BLOCK_COMMENT + : '/*' .*? '*/' -> skip + ; + +fragment IdentifierNondigit + : [a-zA-Z_] + ; + +fragment IdentifierChar + : IdentifierNondigit + | [0-9] + ; + +fragment DecIntConst + : '0' + | [1-9] [0-9]* + ; + +fragment OctIntConst + : '0' [0-7]+ + ; + +fragment HexIntConst + : HexPrefix HexDigit+ + ; + +fragment DecFloatConst + : FractionalConst ExponentPart? + | DigitSequence ExponentPart + ; + +fragment HexFloatConst + : HexPrefix HexFractionalConst BinaryExponentPart + | HexPrefix HexDigit+ BinaryExponentPart + ; + +fragment FractionalConst + : DigitSequence? Dot DigitSequence + | DigitSequence Dot + ; + +fragment HexFractionalConst + : HexDigit* Dot HexDigit+ + | HexDigit+ Dot + ; + +fragment ExponentPart + : [eE] Sign? DigitSequence + ; + +fragment BinaryExponentPart + : [pP] Sign? DigitSequence + ; + +fragment Sign + : [+-] + ; + +fragment HexPrefix + : '0' [xX] ; -var - : ID +fragment DigitSequence + : [0-9]+ ; -lValue - : ID +fragment HexDigit + : [0-9a-fA-F] ; -number - : ILITERAL +fragment Dot + : '.' ; diff --git a/src/irgen/IRGenDecl.cpp b/src/irgen/IRGenDecl.cpp index 0eb62ae..9b7c2d9 100644 --- a/src/irgen/IRGenDecl.cpp +++ b/src/irgen/IRGenDecl.cpp @@ -6,102 +6,64 @@ #include "ir/IR.h" #include "utils/Log.h" -namespace { - -std::string GetLValueName(SysYParser::LValueContext& lvalue) { - if (!lvalue.ID()) { - throw std::runtime_error(FormatError("irgen", "非法左值")); - } - return lvalue.ID()->getText(); -} - -} // namespace - -std::any IRGenImpl::visitBlockStmt(SysYParser::BlockStmtContext* ctx) { - if (!ctx) { - throw std::runtime_error(FormatError("irgen", "缺少语句块")); - } - for (auto* item : ctx->blockItem()) { +void IRGenImpl::GenBlock(SysYParser::BlockContext& block) { + for (auto* item : block.blockItem()) { if (item) { - if (VisitBlockItemResult(*item) == BlockFlow::Terminated) { + if (GenBlockItem(*item)) { // 当前语法要求 return 为块内最后一条语句;命中后可停止生成。 break; } } } - return {}; -} - -IRGenImpl::BlockFlow IRGenImpl::VisitBlockItemResult( - SysYParser::BlockItemContext& item) { - return std::any_cast(item.accept(this)); } -std::any IRGenImpl::visitBlockItem(SysYParser::BlockItemContext* ctx) { - if (!ctx) { - throw std::runtime_error(FormatError("irgen", "缺少块内项")); +bool IRGenImpl::GenBlockItem(SysYParser::BlockItemContext& item) { + if (item.decl()) { + GenDecl(*item.decl()); + return false; } - if (ctx->decl()) { - ctx->decl()->accept(this); - return BlockFlow::Continue; - } - if (ctx->stmt()) { - return ctx->stmt()->accept(this); + if (item.stmt()) { + return GenStmt(*item.stmt()); } throw std::runtime_error(FormatError("irgen", "暂不支持的语句或声明")); } -// 变量声明的 IR 生成目前也是最小实现: -// - 先检查声明的基础类型,当前仅支持局部 int; -// - 再把 Decl 中的变量定义交给 visitVarDef 继续处理。 -// -// 和更完整的版本相比,这里还没有: -// - 一个 Decl 中多个变量定义的顺序处理; -// - const、数组、全局变量等不同声明形态; -// - 更丰富的类型系统。 -std::any IRGenImpl::visitDecl(SysYParser::DeclContext* ctx) { - if (!ctx) { - throw std::runtime_error(FormatError("irgen", "缺少变量声明")); - } - if (!ctx->btype() || !ctx->btype()->INT()) { - throw std::runtime_error(FormatError("irgen", "当前仅支持局部 int 变量声明")); +void IRGenImpl::GenDecl(SysYParser::DeclContext& decl) { + if (decl.varDecl()) { + GenVarDecl(*decl.varDecl()); + return; } - auto* var_def = ctx->varDef(); - if (!var_def) { - throw std::runtime_error(FormatError("irgen", "非法变量声明")); - } - var_def->accept(this); - return {}; + throw std::runtime_error(FormatError("irgen", "暂不支持的声明类型")); } - -// 当前仍是教学用的最小版本,因此这里只支持: -// - 局部 int 变量; -// - 标量初始化; -// - 一个 VarDef 对应一个槽位。 -std::any IRGenImpl::visitVarDef(SysYParser::VarDefContext* ctx) { - if (!ctx) { - throw std::runtime_error(FormatError("irgen", "缺少变量定义")); - } - if (!ctx->lValue()) { - throw std::runtime_error(FormatError("irgen", "变量声明缺少名称")); +void IRGenImpl::GenVarDecl(SysYParser::VarDeclContext& decl) { + if (!decl.bType() || !decl.bType()->Int()) { + throw std::runtime_error(FormatError("irgen", "当前 IR 仅支持 int 标量局部变量")); } - GetLValueName(*ctx->lValue()); - if (storage_map_.find(ctx) != storage_map_.end()) { - throw std::runtime_error(FormatError("irgen", "声明重复生成存储槽位")); - } - auto* slot = builder_.CreateAllocaI32(module_.GetContext().NextTemp()); - storage_map_[ctx] = slot; - ir::Value* init = nullptr; - if (auto* init_value = ctx->initValue()) { - if (!init_value->exp()) { - throw std::runtime_error(FormatError("irgen", "当前不支持聚合初始化")); + for (auto* def : decl.varDef()) { + if (!def) { + continue; + } + if (storage_map_.find(def) != storage_map_.end()) { + throw std::runtime_error(FormatError("irgen", "声明重复生成存储槽位")); + } + if (!def->constExp().empty()) { + throw std::runtime_error( + FormatError("irgen", "当前 IR 仅支持 int 标量局部变量")); + } + + auto* slot = builder_.CreateAllocaI32(module_.GetContext().NextTemp()); + storage_map_[def] = slot; + + ir::Value* init = builder_.CreateConstInt(0); + if (auto* init_val = def->initVal()) { + if (!init_val->exp()) { + throw std::runtime_error( + FormatError("irgen", "当前 IR 仅支持表达式初始化")); + } + init = GenExpr(*init_val->exp()); } - init = EvalExpr(*init_value->exp()); - } else { - init = builder_.CreateConstInt(0); + builder_.CreateStore(init, slot); } - builder_.CreateStore(init, slot); - return {}; } diff --git a/src/irgen/IRGenDriver.cpp b/src/irgen/IRGenDriver.cpp index ff94412..6f2a775 100644 --- a/src/irgen/IRGenDriver.cpp +++ b/src/irgen/IRGenDriver.cpp @@ -10,6 +10,6 @@ std::unique_ptr GenerateIR(SysYParser::CompUnitContext& tree, const SemanticContext& sema) { auto module = std::make_unique(); IRGenImpl gen(*module, sema); - tree.accept(&gen); + gen.Gen(tree); return module; } diff --git a/src/irgen/IRGenExp.cpp b/src/irgen/IRGenExp.cpp index cf4797c..2c57209 100644 --- a/src/irgen/IRGenExp.cpp +++ b/src/irgen/IRGenExp.cpp @@ -6,75 +6,112 @@ #include "ir/IR.h" #include "utils/Log.h" -// 表达式生成当前也只实现了很小的一个子集。 -// 目前支持: -// - 整数字面量 -// - 普通局部变量读取 -// - 括号表达式 -// - 二元加法 -// -// 还未支持: -// - 减乘除与一元运算 -// - 赋值表达式 -// - 函数调用 -// - 数组、指针、下标访问 -// - 条件与比较表达式 -// - ... -ir::Value* IRGenImpl::EvalExpr(SysYParser::ExpContext& expr) { - return std::any_cast(expr.accept(this)); +ir::Value* IRGenImpl::GenExpr(SysYParser::ExpContext& expr) { + if (!expr.addExp()) { + throw std::runtime_error(FormatError("irgen", "非法表达式")); + } + return GenAddExpr(*expr.addExp()); } +ir::Value* IRGenImpl::GenAddExpr(SysYParser::AddExpContext& add) { + const auto& terms = add.mulExp(); + if (terms.empty()) { + throw std::runtime_error(FormatError("irgen", "空加法表达式")); + } -std::any IRGenImpl::visitParenExp(SysYParser::ParenExpContext* ctx) { - if (!ctx || !ctx->exp()) { - throw std::runtime_error(FormatError("irgen", "非法括号表达式")); + ir::Value* acc = GenMulExpr(*terms[0]); + for (size_t i = 1; i < terms.size(); ++i) { + ir::Value* rhs = GenMulExpr(*terms[i]); + std::string name = module_.GetContext().NextTemp(); + auto* op = add.children[2 * i - 1]; + if (!op) { + throw std::runtime_error(FormatError("irgen", "加法表达式缺少运算符")); + } + const std::string text = op->getText(); + if (text == "+") { + acc = builder_.CreateBinary(ir::Opcode::Add, acc, rhs, name); + } else if (text == "-") { + acc = builder_.CreateBinary(ir::Opcode::Sub, acc, rhs, name); + } else { + throw std::runtime_error(FormatError("irgen", "暂不支持的加法运算符: " + text)); + } } - return EvalExpr(*ctx->exp()); + return acc; } +ir::Value* IRGenImpl::GenMulExpr(SysYParser::MulExpContext& mul) { + const auto& terms = mul.unaryExp(); + if (terms.empty()) { + throw std::runtime_error(FormatError("irgen", "空乘法表达式")); + } -std::any IRGenImpl::visitNumberExp(SysYParser::NumberExpContext* ctx) { - if (!ctx || !ctx->number() || !ctx->number()->ILITERAL()) { - throw std::runtime_error(FormatError("irgen", "当前仅支持整数字面量")); + ir::Value* acc = GenUnaryExpr(*terms[0]); + for (size_t i = 1; i < terms.size(); ++i) { + ir::Value* rhs = GenUnaryExpr(*terms[i]); + std::string name = module_.GetContext().NextTemp(); + auto* op = mul.children[2 * i - 1]; + if (!op) { + throw std::runtime_error(FormatError("irgen", "乘法表达式缺少运算符")); + } + const std::string text = op->getText(); + if (text == "*") { + acc = builder_.CreateBinary(ir::Opcode::Mul, acc, rhs, name); + continue; + } + throw std::runtime_error( + FormatError("irgen", "当前 IR 暂不支持的乘法类运算符: " + text)); } - return static_cast( - builder_.CreateConstInt(std::stoi(ctx->number()->getText()))); + return acc; } -// 变量使用的处理流程: -// 1. 先通过语义分析结果把变量使用绑定回声明; -// 2. 再通过 storage_map_ 找到该声明对应的栈槽位; -// 3. 最后生成 load,把内存中的值读出来。 -// -// 因此当前 IRGen 自己不再做名字查找,而是直接消费 Sema 的绑定结果。 -std::any IRGenImpl::visitVarExp(SysYParser::VarExpContext* ctx) { - if (!ctx || !ctx->var() || !ctx->var()->ID()) { - throw std::runtime_error(FormatError("irgen", "当前仅支持普通整型变量")); - } - auto* decl = sema_.ResolveVarUse(ctx->var()); - if (!decl) { - throw std::runtime_error( - FormatError("irgen", - "变量使用缺少语义绑定: " + ctx->var()->ID()->getText())); +ir::Value* IRGenImpl::GenUnaryExpr(SysYParser::UnaryExpContext& unary) { + if (unary.primary()) { + return GenPrimary(*unary.primary()); } - auto it = storage_map_.find(decl); - if (it == storage_map_.end()) { + + if (unary.unaryExp()) { + if (!unary.unaryOp()) { + throw std::runtime_error(FormatError("irgen", "一元表达式缺少运算符")); + } + const std::string op = unary.unaryOp()->getText(); + if (op == "+") { + return GenUnaryExpr(*unary.unaryExp()); + } + if (op == "-") { + auto* rhs = GenUnaryExpr(*unary.unaryExp()); + return builder_.CreateBinary(ir::Opcode::Sub, builder_.CreateConstInt(0), + rhs, module_.GetContext().NextTemp()); + } throw std::runtime_error( - FormatError("irgen", - "变量声明缺少存储槽位: " + ctx->var()->ID()->getText())); + FormatError("irgen", "当前 IR 暂不支持的一元运算符: " + op)); } - return static_cast( - builder_.CreateLoad(it->second, module_.GetContext().NextTemp())); -} + throw std::runtime_error(FormatError("irgen", "当前 IR 暂不支持函数调用")); +} -std::any IRGenImpl::visitAdditiveExp(SysYParser::AdditiveExpContext* ctx) { - if (!ctx || !ctx->exp(0) || !ctx->exp(1)) { - throw std::runtime_error(FormatError("irgen", "非法加法表达式")); +ir::Value* IRGenImpl::GenPrimary(SysYParser::PrimaryContext& primary) { + if (primary.Number()) { + return builder_.CreateConstInt(std::stoi(primary.Number()->getText(), nullptr, 0)); + } + if (primary.lVal()) { + if (!primary.lVal()->exp().empty()) { + throw std::runtime_error( + FormatError("irgen", "当前 IR 暂不支持数组取值表达式")); + } + auto* decl = sema_.ResolveVarUse(primary.lVal()); + if (!decl || !primary.lVal()->Ident()) { + throw std::runtime_error(FormatError("irgen", "变量使用缺少语义绑定")); + } + auto it = storage_map_.find(decl); + if (it == storage_map_.end()) { + throw std::runtime_error( + FormatError("irgen", + "变量声明缺少存储槽位: " + primary.lVal()->Ident()->getText())); + } + return builder_.CreateLoad(it->second, module_.GetContext().NextTemp()); + } + if (primary.exp()) { + return GenExpr(*primary.exp()); } - ir::Value* lhs = EvalExpr(*ctx->exp(0)); - ir::Value* rhs = EvalExpr(*ctx->exp(1)); - return static_cast( - builder_.CreateBinary(ir::Opcode::Add, lhs, rhs, - module_.GetContext().NextTemp())); + throw std::runtime_error(FormatError("irgen", "暂不支持的表达式形式")); } diff --git a/src/irgen/IRGenFunc.cpp b/src/irgen/IRGenFunc.cpp index 4912d03..4571c14 100644 --- a/src/irgen/IRGenFunc.cpp +++ b/src/irgen/IRGenFunc.cpp @@ -19,6 +19,15 @@ void VerifyFunctionStructure(const ir::Function& func) { } } +SysYParser::FuncDefContext* FindMainFunc(SysYParser::CompUnitContext& cu) { + for (auto* func : cu.funcDef()) { + if (func && func->Ident() && func->Ident()->getText() == "main") { + return func; + } + } + return nullptr; +} + } // namespace IRGenImpl::IRGenImpl(ir::Module& module, const SemanticContext& sema) @@ -27,61 +36,31 @@ IRGenImpl::IRGenImpl(ir::Module& module, const SemanticContext& sema) func_(nullptr), builder_(module.GetContext(), nullptr) {} -// 编译单元的 IR 生成当前只实现了最小功能: -// - Module 已在 GenerateIR 中创建,这里只负责继续生成其中的内容; -// - 当前会读取编译单元中的函数定义,并交给 visitFuncDef 生成函数 IR; -// -// 当前还没有实现: -// - 多个函数定义的遍历与生成; -// - 全局变量、全局常量的 IR 生成。 -std::any IRGenImpl::visitCompUnit(SysYParser::CompUnitContext* ctx) { - if (!ctx) { - throw std::runtime_error(FormatError("irgen", "缺少编译单元")); +void IRGenImpl::Gen(SysYParser::CompUnitContext& cu) { + auto* main_func = FindMainFunc(cu); + if (!main_func) { + throw std::runtime_error(FormatError("irgen", "缺少 main 定义")); } - auto* func = ctx->funcDef(); - if (!func) { - throw std::runtime_error(FormatError("irgen", "缺少函数定义")); - } - func->accept(this); - return {}; + GenFuncDef(*main_func); } -// 函数 IR 生成当前实现了: -// 1. 获取函数名; -// 2. 检查函数返回类型; -// 3. 在 Module 中创建 Function; -// 4. 将 builder 插入点设置到入口基本块; -// 5. 继续生成函数体。 -// -// 当前还没有实现: -// - 通用函数返回类型处理; -// - 形参列表遍历与参数类型收集; -// - FunctionType 这样的函数类型对象; -// - Argument/形式参数 IR 对象; -// - 入口块中的参数初始化逻辑。 -// ... - -// 因此这里目前只支持最小的“无参 int 函数”生成。 -std::any IRGenImpl::visitFuncDef(SysYParser::FuncDefContext* ctx) { - if (!ctx) { - throw std::runtime_error(FormatError("irgen", "缺少函数定义")); - } - if (!ctx->blockStmt()) { +void IRGenImpl::GenFuncDef(SysYParser::FuncDefContext& func) { + if (!func.block()) { throw std::runtime_error(FormatError("irgen", "函数体为空")); } - if (!ctx->ID()) { + if (!func.Ident()) { throw std::runtime_error(FormatError("irgen", "缺少函数名")); } - if (!ctx->funcType() || !ctx->funcType()->INT()) { - throw std::runtime_error(FormatError("irgen", "当前仅支持无参 int 函数")); + if (!func.funcType() || !func.funcType()->Int()) { + throw std::runtime_error( + FormatError("irgen", "当前 IR 仅支持返回 int 的 main 函数")); } - func_ = module_.CreateFunction(ctx->ID()->getText(), ir::Type::GetInt32Type()); + func_ = module_.CreateFunction(func.Ident()->getText(), ir::Type::GetInt32Type()); builder_.SetInsertPoint(func_->GetEntry()); storage_map_.clear(); - ctx->blockStmt()->accept(this); + GenBlock(*func.block()); // 语义正确性主要由 sema 保证,这里只兜底检查 IR 结构是否合法。 VerifyFunctionStructure(*func_); - return {}; } diff --git a/src/irgen/IRGenStmt.cpp b/src/irgen/IRGenStmt.cpp index 751550c..67ce213 100644 --- a/src/irgen/IRGenStmt.cpp +++ b/src/irgen/IRGenStmt.cpp @@ -6,34 +6,18 @@ #include "ir/IR.h" #include "utils/Log.h" -// 语句生成当前只实现了最小子集。 -// 目前支持: -// - return ; -// -// 还未支持: -// - 赋值语句 -// - if / while 等控制流 -// - 空语句、块语句嵌套分发之外的更多语句形态 - -std::any IRGenImpl::visitStmt(SysYParser::StmtContext* ctx) { - if (!ctx) { - throw std::runtime_error(FormatError("irgen", "缺少语句")); - } - if (ctx->returnStmt()) { - return ctx->returnStmt()->accept(this); +bool IRGenImpl::GenStmt(SysYParser::StmtContext& stmt) { + if (stmt.returnStmt()) { + GenReturnStmt(*stmt.returnStmt()); + return true; } throw std::runtime_error(FormatError("irgen", "暂不支持的语句类型")); } - -std::any IRGenImpl::visitReturnStmt(SysYParser::ReturnStmtContext* ctx) { - if (!ctx) { - throw std::runtime_error(FormatError("irgen", "缺少 return 语句")); - } - if (!ctx->exp()) { +void IRGenImpl::GenReturnStmt(SysYParser::ReturnStmtContext& ret) { + if (!ret.exp()) { throw std::runtime_error(FormatError("irgen", "return 缺少表达式")); } - ir::Value* v = EvalExpr(*ctx->exp()); + ir::Value* v = GenExpr(*ret.exp()); builder_.CreateRet(v); - return BlockFlow::Terminated; } diff --git a/src/main.cpp b/src/main.cpp index 88ed747..fc87747 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -29,6 +29,9 @@ int main(int argc, char** argv) { } #if !COMPILER_PARSE_ONLY + if (!opts.emit_ir && !opts.emit_asm) { + return 0; + } auto* comp_unit = dynamic_cast(antlr.tree); if (!comp_unit) { throw std::runtime_error(FormatError("main", "语法树根节点不是 compUnit")); diff --git a/src/sem/Sema.cpp b/src/sem/Sema.cpp index 745374c..e4b4015 100644 --- a/src/sem/Sema.cpp +++ b/src/sem/Sema.cpp @@ -1,200 +1,190 @@ #include "sem/Sema.h" -#include #include #include -#include "SysYBaseVisitor.h" #include "sem/SymbolTable.h" #include "utils/Log.h" namespace { -std::string GetLValueName(SysYParser::LValueContext& lvalue) { - if (!lvalue.ID()) { - throw std::runtime_error(FormatError("sema", "非法左值")); - } - return lvalue.ID()->getText(); -} - -class SemaVisitor final : public SysYBaseVisitor { - public: - std::any visitCompUnit(SysYParser::CompUnitContext* ctx) override { - if (!ctx) { - throw std::runtime_error(FormatError("sema", "缺少编译单元")); +SysYParser::FuncDefContext* FindMainFunc(SysYParser::CompUnitContext& comp_unit) { + SysYParser::FuncDefContext* main_func = nullptr; + for (auto* func : comp_unit.funcDef()) { + if (!func || !func->Ident()) { + continue; } - auto* func = ctx->funcDef(); - if (!func || !func->blockStmt()) { - throw std::runtime_error(FormatError("sema", "缺少 main 函数定义")); + if (func->Ident()->getText() != "main") { + continue; } - if (!func->ID() || func->ID()->getText() != "main") { - throw std::runtime_error(FormatError("sema", "缺少 main 函数定义")); - } - func->accept(this); - if (!seen_return_) { - throw std::runtime_error( - FormatError("sema", "main 函数必须包含 return 语句")); + if (main_func) { + throw std::runtime_error(FormatError("sema", "main 函数定义重复")); } - return {}; + main_func = func; } + return main_func; +} - std::any visitFuncDef(SysYParser::FuncDefContext* ctx) override { - if (!ctx || !ctx->blockStmt()) { - throw std::runtime_error(FormatError("sema", "缺少 main 函数定义")); - } - if (!ctx->funcType() || !ctx->funcType()->INT()) { - throw std::runtime_error(FormatError("sema", "当前仅支持 int main")); - } - const auto& items = ctx->blockStmt()->blockItem(); - if (items.empty()) { - throw std::runtime_error( - FormatError("sema", "main 函数不能为空,且必须以 return 结束")); +void CheckExpr(SysYParser::ExpContext& exp, const SymbolTable& table, + SemanticContext& sema); + +void CheckLVal(SysYParser::LValContext& lval, const SymbolTable& table, + SemanticContext& sema) { + if (!lval.Ident()) { + throw std::runtime_error(FormatError("sema", "左值缺少标识符")); + } + const std::string name = lval.Ident()->getText(); + auto* decl = table.Lookup(name); + if (!decl) { + throw std::runtime_error(FormatError("sema", "使用了未定义的变量: " + name)); + } + sema.BindVarUse(&lval, decl); + for (auto* index : lval.exp()) { + if (index) { + CheckExpr(*index, table, sema); } - ctx->blockStmt()->accept(this); - return {}; } +} - std::any visitBlockStmt(SysYParser::BlockStmtContext* ctx) override { - if (!ctx) { - throw std::runtime_error(FormatError("sema", "缺少语句块")); - } - const auto& items = ctx->blockItem(); - for (size_t i = 0; i < items.size(); ++i) { - auto* item = items[i]; - if (!item) { - continue; - } - if (seen_return_) { - throw std::runtime_error( - FormatError("sema", "return 必须是 main 函数中的最后一条语句")); - } - current_item_index_ = i; - total_items_ = items.size(); - item->accept(this); - } - return {}; +void CheckPrimary(SysYParser::PrimaryContext& primary, const SymbolTable& table, + SemanticContext& sema) { + if (primary.Number()) { + return; } - std::any visitBlockItem(SysYParser::BlockItemContext* ctx) override { - if (!ctx) { - throw std::runtime_error(FormatError("sema", "暂不支持的语句或声明")); - } - if (ctx->decl()) { - ctx->decl()->accept(this); - return {}; - } - if (ctx->stmt()) { - ctx->stmt()->accept(this); - return {}; - } - throw std::runtime_error(FormatError("sema", "暂不支持的语句或声明")); + if (primary.lVal()) { + CheckLVal(*primary.lVal(), table, sema); + return; } - std::any visitDecl(SysYParser::DeclContext* ctx) override { - if (!ctx) { - throw std::runtime_error(FormatError("sema", "非法变量声明")); - } - if (!ctx->btype() || !ctx->btype()->INT()) { - throw std::runtime_error(FormatError("sema", "当前仅支持局部 int 变量声明")); - } - auto* var_def = ctx->varDef(); - if (!var_def || !var_def->lValue()) { - throw std::runtime_error(FormatError("sema", "非法变量声明")); - } - const std::string name = GetLValueName(*var_def->lValue()); - if (table_.Contains(name)) { - throw std::runtime_error(FormatError("sema", "重复定义变量: " + name)); - } - if (auto* init = var_def->initValue()) { - if (!init->exp()) { - throw std::runtime_error(FormatError("sema", "当前不支持聚合初始化")); - } - init->exp()->accept(this); - } - table_.Add(name, var_def); - return {}; + if (primary.exp()) { + CheckExpr(*primary.exp(), table, sema); + return; } - std::any visitStmt(SysYParser::StmtContext* ctx) override { - if (!ctx || !ctx->returnStmt()) { - throw std::runtime_error(FormatError("sema", "暂不支持的语句或声明")); - } - ctx->returnStmt()->accept(this); - return {}; + throw std::runtime_error(FormatError("sema", "暂不支持的表达式形式")); +} + +void CheckUnaryExpr(SysYParser::UnaryExpContext& unary, const SymbolTable& table, + SemanticContext& sema) { + if (unary.primary()) { + CheckPrimary(*unary.primary(), table, sema); + return; } - std::any visitReturnStmt(SysYParser::ReturnStmtContext* ctx) override { - if (!ctx || !ctx->exp()) { - throw std::runtime_error(FormatError("sema", "return 缺少表达式")); - } - ctx->exp()->accept(this); - seen_return_ = true; - if (current_item_index_ + 1 != total_items_) { - throw std::runtime_error( - FormatError("sema", "return 必须是 main 函数中的最后一条语句")); - } - return {}; + if (unary.unaryExp()) { + CheckUnaryExpr(*unary.unaryExp(), table, sema); + return; } - std::any visitParenExp(SysYParser::ParenExpContext* ctx) override { - if (!ctx || !ctx->exp()) { - throw std::runtime_error(FormatError("sema", "非法括号表达式")); + if (unary.funcRParams()) { + for (auto* arg : unary.funcRParams()->exp()) { + if (arg) { + CheckExpr(*arg, table, sema); + } } - ctx->exp()->accept(this); - return {}; } +} - std::any visitVarExp(SysYParser::VarExpContext* ctx) override { - if (!ctx || !ctx->var()) { - throw std::runtime_error(FormatError("sema", "非法变量表达式")); +void CheckMulExpr(SysYParser::MulExpContext& mul, const SymbolTable& table, + SemanticContext& sema) { + for (auto* unary : mul.unaryExp()) { + if (unary) { + CheckUnaryExpr(*unary, table, sema); } - ctx->var()->accept(this); - return {}; } +} - std::any visitNumberExp(SysYParser::NumberExpContext* ctx) override { - if (!ctx || !ctx->number() || !ctx->number()->ILITERAL()) { - throw std::runtime_error(FormatError("sema", "当前仅支持整数字面量")); +void CheckAddExpr(SysYParser::AddExpContext& add, const SymbolTable& table, + SemanticContext& sema) { + for (auto* mul : add.mulExp()) { + if (mul) { + CheckMulExpr(*mul, table, sema); } - return {}; } +} - std::any visitAdditiveExp(SysYParser::AdditiveExpContext* ctx) override { - if (!ctx || !ctx->exp(0) || !ctx->exp(1)) { - throw std::runtime_error(FormatError("sema", "暂不支持的表达式形式")); - } - ctx->exp(0)->accept(this); - ctx->exp(1)->accept(this); - return {}; +void CheckExpr(SysYParser::ExpContext& exp, const SymbolTable& table, + SemanticContext& sema) { + if (!exp.addExp()) { + throw std::runtime_error(FormatError("sema", "非法表达式")); + } + CheckAddExpr(*exp.addExp(), table, sema); +} + +} // namespace + +SemanticContext RunSema(SysYParser::CompUnitContext& comp_unit) { + auto* func = FindMainFunc(comp_unit); + if (!func || !func->block()) { + throw std::runtime_error(FormatError("sema", "缺少 main 函数定义")); + } + + SymbolTable table; + SemanticContext sema; + bool seen_return = false; + + const auto& items = func->block()->blockItem(); + if (items.empty()) { + throw std::runtime_error( + FormatError("sema", "main 函数不能为空,且必须以 return 结束")); } - std::any visitVar(SysYParser::VarContext* ctx) override { - if (!ctx || !ctx->ID()) { - throw std::runtime_error(FormatError("sema", "非法变量引用")); + for (size_t i = 0; i < items.size(); ++i) { + auto* item = items[i]; + if (!item) { + continue; } - const std::string name = ctx->ID()->getText(); - auto* decl = table_.Lookup(name); - if (!decl) { - throw std::runtime_error(FormatError("sema", "使用了未定义的变量: " + name)); + if (seen_return) { + throw std::runtime_error( + FormatError("sema", "return 必须是 main 函数中的最后一条语句")); + } + + if (auto* decl = item->decl() ? item->decl()->varDecl() : nullptr) { + for (auto* def : decl->varDef()) { + if (!def || !def->Ident()) { + continue; + } + const std::string name = def->Ident()->getText(); + if (table.Contains(name)) { + throw std::runtime_error(FormatError("sema", "重复定义变量: " + name)); + } + if (!def->constExp().empty()) { + throw std::runtime_error( + FormatError("sema", "当前 IR 仅支持标量局部变量")); + } + if (auto* init = def->initVal()) { + if (!init->exp()) { + throw std::runtime_error( + FormatError("sema", "当前 IR 仅支持标量表达式初始化")); + } + CheckExpr(*init->exp(), table, sema); + } + table.Add(name, def); + } + continue; } - sema_.BindVarUse(ctx, decl); - return {}; - } - SemanticContext TakeSemanticContext() { return std::move(sema_); } + if (auto* stmt = item->stmt(); stmt && stmt->returnStmt()) { + auto* ret = stmt->returnStmt(); + if (!ret->exp()) { + throw std::runtime_error(FormatError("sema", "main 函数必须返回一个值")); + } + CheckExpr(*ret->exp(), table, sema); + seen_return = true; + if (i + 1 != items.size()) { + throw std::runtime_error( + FormatError("sema", "return 必须是 main 函数中的最后一条语句")); + } + continue; + } - private: - SymbolTable table_; - SemanticContext sema_; - bool seen_return_ = false; - size_t current_item_index_ = 0; - size_t total_items_ = 0; -}; + throw std::runtime_error(FormatError("sema", "暂不支持的语句或声明")); + } -} // namespace + if (!seen_return) { + throw std::runtime_error(FormatError("sema", "main 函数必须包含 return 语句")); + } -SemanticContext RunSema(SysYParser::CompUnitContext& comp_unit) { - SemaVisitor visitor; - comp_unit.accept(&visitor); - return visitor.TakeSemanticContext(); + return sema; }