diff --git a/src/antlr4/SysY.g4 b/src/antlr4/SysY.g4 index c22ab5f..82a624f 100644 --- a/src/antlr4/SysY.g4 +++ b/src/antlr4/SysY.g4 @@ -1,35 +1,73 @@ +// SysY 子集语法:支持形如 +// int main() { int a = 1; int b = 2; return a + b; } +// 的最小返回表达式编译。 + +// 后续需要自行添加 grammar SysY; -// 说明: -// - 这是一个“最小可用”的 SysY.g4,用于避免空文件导致的 ANTLR 解析报错。 -// - 后续请按 SysY 语言规范逐步补全 lexer/parser 规则。 -// - 本工程约定:ANTLR 生成的 C++ 源码/头文件不进入仓库,统一生成到构建目录(例如 build/generated/antlr4/)。 +compUnit + : funcDef EOF + ; + +funcDef + : Int Main L_PAREN R_PAREN block + ; + +block + : L_BRACE stmt* R_BRACE + ; + +stmt + : varDecl + | returnStmt + ; + +varDecl + : Int Ident (Assign exp)? Semi + ; + +returnStmt + : Return exp Semi + ; + +exp + : addExp + ; -compilationUnit - : (statement)* EOF - ; +addExp + : primary (AddOp primary)* + ; -statement - : 'return' expression? ';' - | ';' - ; +primary + : Number + | Ident + | L_PAREN exp R_PAREN + ; -expression - : IntegerLiteral - | Identifier - ; +Int : 'int'; +Return : 'return'; +Main : 'main'; -// -------- lexer -------- +AddOp : '+'; +Assign : '='; +Semi : ';'; +L_PAREN : '('; +R_PAREN : ')'; +L_BRACE : '{'; +R_BRACE : '}'; -IntegerLiteral - : [0-9]+ - ; +Ident + : [a-zA-Z_][a-zA-Z_0-9]* + ; -Identifier - : [a-zA-Z_] [a-zA-Z0-9_]* - ; +Number + : [0-9]+ + ; -Whitespace - : [ \t\r\n]+ -> skip - ; +WS + : [ \t\r\n]+ -> skip + ; +COMMENT + : '//' ~[\r\n]* -> skip + ; diff --git a/src/ast/AstNodes.cpp b/src/ast/AstNodes.cpp index 39c5b37..5493907 100644 --- a/src/ast/AstNodes.cpp +++ b/src/ast/AstNodes.cpp @@ -1,4 +1,5 @@ + // AST 节点定义与实现: // - 表达式、语句、声明、函数、类型等节点 // - 支持后续阶段在节点上附加信息(类型、符号绑定、常量值等) - +#include "ast/AstNodes.h" diff --git a/src/ast/AstNodes.h b/src/ast/AstNodes.h new file mode 100644 index 0000000..5433cd1 --- /dev/null +++ b/src/ast/AstNodes.h @@ -0,0 +1,70 @@ + +#pragma once + +#include +#include +#include + +namespace ast { + +enum class BinaryOp { Add, Sub, Mul, Div }; + +struct Expr { + virtual ~Expr() = default; +}; + +struct NumberExpr : Expr { + int value{}; + explicit NumberExpr(int v) : value(v) {} +}; + +struct VarExpr : Expr { + std::string name; + explicit VarExpr(std::string n) : name(std::move(n)) {} +}; + +struct BinaryExpr : Expr { + BinaryOp op; + std::shared_ptr lhs; + std::shared_ptr rhs; + BinaryExpr(BinaryOp op, std::shared_ptr lhs, std::shared_ptr rhs) + : op(op), lhs(std::move(lhs)), rhs(std::move(rhs)) {} +}; + +struct Stmt { + virtual ~Stmt() = default; +}; + +struct ReturnStmt : Stmt { + std::shared_ptr value; + explicit ReturnStmt(std::shared_ptr v) : value(std::move(v)) {} +}; + +struct VarDecl { + std::string name; + std::shared_ptr init; // nullptr if no initializer + VarDecl(std::string n, std::shared_ptr i) + : name(std::move(n)), init(std::move(i)) {} +}; + +struct Block { + std::vector> varDecls; + std::vector> stmts; +}; + +struct FuncDef { + std::string name; + std::shared_ptr body; + FuncDef(std::string n, std::shared_ptr b) + : name(std::move(n)), body(std::move(b)) {} +}; + +struct CompUnit { + std::shared_ptr func; + explicit CompUnit(std::shared_ptr f) : func(std::move(f)) {} +}; + +// 调试打印 +void PrintAST(const CompUnit& cu); + +} // namespace ast diff --git a/src/ast/AstPrinter.cpp b/src/ast/AstPrinter.cpp index 1e37684..d3e71b1 100644 --- a/src/ast/AstPrinter.cpp +++ b/src/ast/AstPrinter.cpp @@ -1,4 +1,72 @@ -// AST 调试打印: -// - 以可读形式打印 AST 结构 -// - 用于验证 AST 构建与语义分析结果,便于定位问题 +// 简单 AST 调试打印,便于前端验证。 +#include "ast/AstNodes.h" + +#include + +namespace ast { + +static void PrintExpr(const Expr* expr); + +static void PrintIndent(int depth) { + for (int i = 0; i < depth; ++i) std::cout << " "; +} + +static void PrintExpr(const Expr* expr) { + if (auto num = dynamic_cast(expr)) { + std::cout << num->value; + } else if (auto var = dynamic_cast(expr)) { + std::cout << var->name; + } else if (auto bin = dynamic_cast(expr)) { + std::cout << "("; + PrintExpr(bin->lhs.get()); + const char* op = "?"; + switch (bin->op) { + case BinaryOp::Add: + op = "+"; + break; + case BinaryOp::Sub: + op = "-"; + break; + case BinaryOp::Mul: + op = "*"; + break; + case BinaryOp::Div: + op = "/"; + break; + } + std::cout << " " << op << " "; + PrintExpr(bin->rhs.get()); + std::cout << ")"; + } +} + +void PrintAST(const CompUnit& cu) { + if (!cu.func) return; + std::cout << "func " << cu.func->name << " () {\n"; + const auto& body = cu.func->body; + if (!body) { + std::cout << "}\n"; + return; + } + for (const auto& decl : body->varDecls) { + PrintIndent(1); + std::cout << "var " << decl->name; + if (decl->init) { + std::cout << " = "; + PrintExpr(decl->init.get()); + } + std::cout << ";\n"; + } + for (const auto& stmt : body->stmts) { + if (auto ret = dynamic_cast(stmt.get())) { + PrintIndent(1); + std::cout << "return "; + PrintExpr(ret->value.get()); + std::cout << ";\n"; + } + } + std::cout << "}\n"; +} + +} // namespace ast diff --git a/src/frontend/AntlrDriver.cpp b/src/frontend/AntlrDriver.cpp index 571511a..29eb1c3 100644 --- a/src/frontend/AntlrDriver.cpp +++ b/src/frontend/AntlrDriver.cpp @@ -1,5 +1,34 @@ -// 前端解析驱动: -// - 读取源代码 -// - 调用 ANTLR 生成的 lexer/parser 得到 parse tree -// - 对外提供“可用的解析入口”(语法正确性由测试保证) +// 调用 ANTLR 生成的 Lexer/Parser,返回 parse tree。 +#include "frontend/AntlrDriver.h" +#include +#include +#include + +#include "SysYLexer.h" +#include "SysYParser.h" +#include "antlr4-runtime.h" + +AntlrResult ParseFileWithAntlr(const std::string& path) { + std::ifstream fin(path); + if (!fin.is_open()) { + throw std::runtime_error("无法打开输入文件: " + path); + } + std::ostringstream ss; + ss << fin.rdbuf(); + + auto input = std::make_unique(ss.str()); + auto lexer = std::make_unique(input.get()); + auto tokens = std::make_unique(lexer.get()); + auto parser = std::make_unique(tokens.get()); + parser->removeErrorListeners(); + auto tree = parser->compUnit(); + + AntlrResult result; + result.input = std::move(input); + result.lexer = std::move(lexer); + result.tokens = std::move(tokens); + result.parser = std::move(parser); + result.tree = tree; + return result; +} diff --git a/src/frontend/AntlrDriver.h b/src/frontend/AntlrDriver.h new file mode 100644 index 0000000..ee22da9 --- /dev/null +++ b/src/frontend/AntlrDriver.h @@ -0,0 +1,20 @@ +// 包装 ANTLR4,提供简易的解析入口。 +#pragma once + +#include +#include + +#include "SysYLexer.h" +#include "SysYParser.h" +#include "antlr4-runtime.h" + +struct AntlrResult { + std::unique_ptr input; + std::unique_ptr lexer; + std::unique_ptr tokens; + std::unique_ptr parser; + antlr4::tree::ParseTree* tree = nullptr; // owned by parser +}; + +// 解析指定文件,发生错误时抛出 std::runtime_error。 +AntlrResult ParseFileWithAntlr(const std::string& path); diff --git a/src/frontend/AstBuilder.cpp b/src/frontend/AstBuilder.cpp index e57235e..da6b3d9 100644 --- a/src/frontend/AstBuilder.cpp +++ b/src/frontend/AstBuilder.cpp @@ -1,4 +1,114 @@ -// AST 构建: -// - 将 ANTLR parse tree 转换为 AST(对应 src/ast/*) -// - 在 AST 节点上保留必要的定位信息(可选,用于调试/日志) +// 将 parse tree 转换为 AST。 +#include "frontend/AstBuilder.h" +#include +#include +#include +#include +#include + +#include "SysYBaseVisitor.h" +#include "SysYParser.h" +#include "ast/AstNodes.h" +#include "antlr4-runtime.h" + +namespace { + +using ast::BinaryExpr; +using ast::BinaryOp; +using ast::Block; +using ast::CompUnit; +using ast::FuncDef; +using ast::NumberExpr; +using ast::ReturnStmt; +using ast::VarDecl; +using ast::VarExpr; + +template +T Take(std::any&& value) { + if (auto* ptr = std::any_cast(&value)) { + return std::move(*ptr); + } + throw std::runtime_error("AST 构建失败:类型不匹配"); +} + +class Builder : public SysYBaseVisitor { + public: + std::any visitCompUnit(SysYParser::CompUnitContext* ctx) override { + auto func = Take>(visit(ctx->funcDef())); + return std::make_shared(std::move(func)); + } + + std::any visitFuncDef(SysYParser::FuncDefContext* ctx) override { + auto body = Take>(visit(ctx->block())); + return std::make_shared("main", std::move(body)); + } + + std::any visitBlock(SysYParser::BlockContext* ctx) override { + auto block = std::make_shared(); + for (auto stmtCtx : ctx->stmt()) { + if (stmtCtx->varDecl()) { + block->varDecls.emplace_back( + Take>(visit(stmtCtx->varDecl()))); + } else if (stmtCtx->returnStmt()) { + block->stmts.emplace_back( + Take>(visit(stmtCtx->returnStmt()))); + } + } + return block; + } + + std::any visitVarDecl(SysYParser::VarDeclContext* ctx) override { + std::shared_ptr init; + if (ctx->exp()) { + init = Take>(visit(ctx->exp())); + } + return std::make_shared(ctx->Ident()->getText(), std::move(init)); + } + + std::any visitReturnStmt(SysYParser::ReturnStmtContext* ctx) override { + auto expr = Take>(visit(ctx->exp())); + return std::make_shared(std::move(expr)); + } + + std::any visitExp(SysYParser::ExpContext* ctx) override { + return visit(ctx->addExp()); + } + + std::any visitAddExp(SysYParser::AddExpContext* ctx) override { + auto node = Take>(visit(ctx->primary(0))); + for (size_t i = 1; i < ctx->primary().size(); ++i) { + auto rhs = Take>(visit(ctx->primary(i))); + auto opToken = ctx->AddOp(i - 1); + BinaryOp op = BinaryOp::Add; + if (opToken->getText() == "-") op = BinaryOp::Sub; + node = std::make_shared(op, std::move(node), std::move(rhs)); + } + return node; + } + + std::any visitPrimary(SysYParser::PrimaryContext* ctx) override { + if (ctx->Number()) { + std::shared_ptr expr = + std::make_shared(std::stoi(ctx->Number()->getText())); + return expr; + } + if (ctx->Ident()) { + std::shared_ptr expr = + std::make_shared(ctx->Ident()->getText()); + return expr; + } + return visit(ctx->exp()); + } +}; + +} // namespace + +std::shared_ptr BuildAst(antlr4::tree::ParseTree* tree) { + if (!tree) { + throw std::runtime_error("parse tree 为空"); + } + Builder visitor; + auto result = visitor.visit(tree); + return Take>(std::move(result)); +} diff --git a/src/frontend/AstBuilder.h b/src/frontend/AstBuilder.h new file mode 100644 index 0000000..7607667 --- /dev/null +++ b/src/frontend/AstBuilder.h @@ -0,0 +1,16 @@ +// 将 ANTLR parse tree 转换为内部 AST。 +#pragma once + +#include + +namespace antlr4 { +namespace tree { +class ParseTree; +} +} // namespace antlr4 + +namespace ast { +struct CompUnit; +} + +std::shared_ptr BuildAst(antlr4::tree::ParseTree* tree);