From e941cced9b7dcf4ff627064e267d033ed4140b58 Mon Sep 17 00:00:00 2001 From: jing <3030349106@qq.com> Date: Sun, 28 Dec 2025 18:44:48 +0800 Subject: [PATCH] =?UTF-8?q?=E5=8F=AF=E4=BB=A5=E5=A4=84=E7=90=86=E7=94=9F?= =?UTF-8?q?=E6=88=90=E5=8A=A0=E6=B3=95=E7=9A=84IR?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/antlr4/SysY.g4 | 88 ++++++++++++------ src/ast/AstNodes.cpp | 5 +- src/ast/AstNodes.h | 70 +++++++++++++++ src/ast/AstPrinter.cpp | 74 ++++++++++++++- src/frontend/AntlrDriver.cpp | 37 +++++++- src/frontend/AntlrDriver.h | 20 +++++ src/frontend/AstBuilder.cpp | 116 +++++++++++++++++++++++- src/frontend/AstBuilder.h | 16 ++++ src/ir/BasicBlock.cpp | 5 +- src/ir/Context.cpp | 5 +- src/ir/Function.cpp | 17 +++- src/ir/IR.h | 144 ++++++++++++++++++++++++++++++ src/ir/IRBuilder.cpp | 21 ++++- src/ir/IRPrinter.cpp | 57 +++++++++++- src/ir/Instruction.cpp | 14 ++- src/ir/Module.cpp | 12 ++- src/ir/Type.cpp | 18 +++- src/ir/Value.cpp | 11 ++- src/ir/analysis/DominatorTree.cpp | 5 +- src/ir/analysis/LoopInfo.cpp | 5 +- src/ir/passes/CFGSimplify.cpp | 5 +- src/ir/passes/ConstFold.cpp | 5 +- src/ir/passes/DCE.cpp | 5 +- src/ir/passes/Mem2Reg.cpp | 5 +- src/ir/passes/PassManager.cpp | 5 +- src/irgen/IRGen.h | 14 +++ src/irgen/IRGenDecl.cpp | 5 +- src/irgen/IRGenDriver.cpp | 99 +++++++++++++++++++- src/irgen/IRGenExp.cpp | 5 +- src/irgen/IRGenFunc.cpp | 5 +- src/irgen/IRGenStmt.cpp | 5 +- src/main.cpp | 43 +++++---- src/sem/ConstEval.cpp | 5 +- src/sem/Sema.cpp | 55 +++++++++++- src/sem/Sema.h | 11 +++ src/sem/SymbolTable.cpp | 5 +- src/sem/SymbolTable.h | 16 ++++ src/utils/CLI.cpp | 15 +++- src/utils/CLI.h | 10 +++ src/utils/Log.cpp | 5 +- src/utils/Log.h | 7 ++ test/test_case/simple_add.sy | 5 ++ 42 files changed, 918 insertions(+), 157 deletions(-) create mode 100644 src/ast/AstNodes.h create mode 100644 src/frontend/AntlrDriver.h create mode 100644 src/frontend/AstBuilder.h create mode 100644 src/ir/IR.h create mode 100644 src/irgen/IRGen.h create mode 100644 src/sem/Sema.h create mode 100644 src/sem/SymbolTable.h create mode 100644 src/utils/CLI.h create mode 100644 src/utils/Log.h create mode 100644 test/test_case/simple_add.sy diff --git a/src/antlr4/SysY.g4 b/src/antlr4/SysY.g4 index c22ab5f..82a624f 100644 --- a/src/antlr4/SysY.g4 +++ b/src/antlr4/SysY.g4 @@ -1,35 +1,73 @@ +// SysY 子集语法:支持形如 +// int main() { int a = 1; int b = 2; return a + b; } +// 的最小返回表达式编译。 + +// 后续需要自行添加 grammar SysY; -// 说明: -// - 这是一个“最小可用”的 SysY.g4,用于避免空文件导致的 ANTLR 解析报错。 -// - 后续请按 SysY 语言规范逐步补全 lexer/parser 规则。 -// - 本工程约定:ANTLR 生成的 C++ 源码/头文件不进入仓库,统一生成到构建目录(例如 build/generated/antlr4/)。 +compUnit + : funcDef EOF + ; + +funcDef + : Int Main L_PAREN R_PAREN block + ; + +block + : L_BRACE stmt* R_BRACE + ; + +stmt + : varDecl + | returnStmt + ; + +varDecl + : Int Ident (Assign exp)? Semi + ; + +returnStmt + : Return exp Semi + ; + +exp + : addExp + ; -compilationUnit - : (statement)* EOF - ; +addExp + : primary (AddOp primary)* + ; -statement - : 'return' expression? ';' - | ';' - ; +primary + : Number + | Ident + | L_PAREN exp R_PAREN + ; -expression - : IntegerLiteral - | Identifier - ; +Int : 'int'; +Return : 'return'; +Main : 'main'; -// -------- lexer -------- +AddOp : '+'; +Assign : '='; +Semi : ';'; +L_PAREN : '('; +R_PAREN : ')'; +L_BRACE : '{'; +R_BRACE : '}'; -IntegerLiteral - : [0-9]+ - ; +Ident + : [a-zA-Z_][a-zA-Z_0-9]* + ; -Identifier - : [a-zA-Z_] [a-zA-Z0-9_]* - ; +Number + : [0-9]+ + ; -Whitespace - : [ \t\r\n]+ -> skip - ; +WS + : [ \t\r\n]+ -> skip + ; +COMMENT + : '//' ~[\r\n]* -> skip + ; diff --git a/src/ast/AstNodes.cpp b/src/ast/AstNodes.cpp index 39c5b37..f8429ae 100644 --- a/src/ast/AstNodes.cpp +++ b/src/ast/AstNodes.cpp @@ -1,4 +1,3 @@ -// AST 节点定义与实现: -// - 表达式、语句、声明、函数、类型等节点 -// - 支持后续阶段在节点上附加信息(类型、符号绑定、常量值等) +// AST 节点简单实现:仅需包含头文件即可,析构函数默认生成。 +#include "ast/AstNodes.h" diff --git a/src/ast/AstNodes.h b/src/ast/AstNodes.h new file mode 100644 index 0000000..d9a9914 --- /dev/null +++ b/src/ast/AstNodes.h @@ -0,0 +1,70 @@ +// Minimal AST definitions for the SysY subset used in this toy compiler. +#pragma once + +#include +#include +#include + +namespace ast { + +enum class BinaryOp { Add, Sub, Mul, Div }; + +struct Expr { + virtual ~Expr() = default; +}; + +struct NumberExpr : Expr { + int value{}; + explicit NumberExpr(int v) : value(v) {} +}; + +struct VarExpr : Expr { + std::string name; + explicit VarExpr(std::string n) : name(std::move(n)) {} +}; + +struct BinaryExpr : Expr { + BinaryOp op; + std::shared_ptr lhs; + std::shared_ptr rhs; + BinaryExpr(BinaryOp op, std::shared_ptr lhs, std::shared_ptr rhs) + : op(op), lhs(std::move(lhs)), rhs(std::move(rhs)) {} +}; + +struct Stmt { + virtual ~Stmt() = default; +}; + +struct ReturnStmt : Stmt { + std::shared_ptr value; + explicit ReturnStmt(std::shared_ptr v) : value(std::move(v)) {} +}; + +struct VarDecl { + std::string name; + std::shared_ptr init; // nullptr if no initializer + VarDecl(std::string n, std::shared_ptr i) + : name(std::move(n)), init(std::move(i)) {} +}; + +struct Block { + std::vector> varDecls; + std::vector> stmts; +}; + +struct FuncDef { + std::string name; + std::shared_ptr body; + FuncDef(std::string n, std::shared_ptr b) + : name(std::move(n)), body(std::move(b)) {} +}; + +struct CompUnit { + std::shared_ptr func; + explicit CompUnit(std::shared_ptr f) : func(std::move(f)) {} +}; + +// 调试打印 +void PrintAST(const CompUnit& cu); + +} // namespace ast diff --git a/src/ast/AstPrinter.cpp b/src/ast/AstPrinter.cpp index 1e37684..d3e71b1 100644 --- a/src/ast/AstPrinter.cpp +++ b/src/ast/AstPrinter.cpp @@ -1,4 +1,72 @@ -// AST 调试打印: -// - 以可读形式打印 AST 结构 -// - 用于验证 AST 构建与语义分析结果,便于定位问题 +// 简单 AST 调试打印,便于前端验证。 +#include "ast/AstNodes.h" + +#include + +namespace ast { + +static void PrintExpr(const Expr* expr); + +static void PrintIndent(int depth) { + for (int i = 0; i < depth; ++i) std::cout << " "; +} + +static void PrintExpr(const Expr* expr) { + if (auto num = dynamic_cast(expr)) { + std::cout << num->value; + } else if (auto var = dynamic_cast(expr)) { + std::cout << var->name; + } else if (auto bin = dynamic_cast(expr)) { + std::cout << "("; + PrintExpr(bin->lhs.get()); + const char* op = "?"; + switch (bin->op) { + case BinaryOp::Add: + op = "+"; + break; + case BinaryOp::Sub: + op = "-"; + break; + case BinaryOp::Mul: + op = "*"; + break; + case BinaryOp::Div: + op = "/"; + break; + } + std::cout << " " << op << " "; + PrintExpr(bin->rhs.get()); + std::cout << ")"; + } +} + +void PrintAST(const CompUnit& cu) { + if (!cu.func) return; + std::cout << "func " << cu.func->name << " () {\n"; + const auto& body = cu.func->body; + if (!body) { + std::cout << "}\n"; + return; + } + for (const auto& decl : body->varDecls) { + PrintIndent(1); + std::cout << "var " << decl->name; + if (decl->init) { + std::cout << " = "; + PrintExpr(decl->init.get()); + } + std::cout << ";\n"; + } + for (const auto& stmt : body->stmts) { + if (auto ret = dynamic_cast(stmt.get())) { + PrintIndent(1); + std::cout << "return "; + PrintExpr(ret->value.get()); + std::cout << ";\n"; + } + } + std::cout << "}\n"; +} + +} // namespace ast diff --git a/src/frontend/AntlrDriver.cpp b/src/frontend/AntlrDriver.cpp index 571511a..29eb1c3 100644 --- a/src/frontend/AntlrDriver.cpp +++ b/src/frontend/AntlrDriver.cpp @@ -1,5 +1,34 @@ -// 前端解析驱动: -// - 读取源代码 -// - 调用 ANTLR 生成的 lexer/parser 得到 parse tree -// - 对外提供“可用的解析入口”(语法正确性由测试保证) +// 调用 ANTLR 生成的 Lexer/Parser,返回 parse tree。 +#include "frontend/AntlrDriver.h" +#include +#include +#include + +#include "SysYLexer.h" +#include "SysYParser.h" +#include "antlr4-runtime.h" + +AntlrResult ParseFileWithAntlr(const std::string& path) { + std::ifstream fin(path); + if (!fin.is_open()) { + throw std::runtime_error("无法打开输入文件: " + path); + } + std::ostringstream ss; + ss << fin.rdbuf(); + + auto input = std::make_unique(ss.str()); + auto lexer = std::make_unique(input.get()); + auto tokens = std::make_unique(lexer.get()); + auto parser = std::make_unique(tokens.get()); + parser->removeErrorListeners(); + auto tree = parser->compUnit(); + + AntlrResult result; + result.input = std::move(input); + result.lexer = std::move(lexer); + result.tokens = std::move(tokens); + result.parser = std::move(parser); + result.tree = tree; + return result; +} diff --git a/src/frontend/AntlrDriver.h b/src/frontend/AntlrDriver.h new file mode 100644 index 0000000..ee22da9 --- /dev/null +++ b/src/frontend/AntlrDriver.h @@ -0,0 +1,20 @@ +// 包装 ANTLR4,提供简易的解析入口。 +#pragma once + +#include +#include + +#include "SysYLexer.h" +#include "SysYParser.h" +#include "antlr4-runtime.h" + +struct AntlrResult { + std::unique_ptr input; + std::unique_ptr lexer; + std::unique_ptr tokens; + std::unique_ptr parser; + antlr4::tree::ParseTree* tree = nullptr; // owned by parser +}; + +// 解析指定文件,发生错误时抛出 std::runtime_error。 +AntlrResult ParseFileWithAntlr(const std::string& path); diff --git a/src/frontend/AstBuilder.cpp b/src/frontend/AstBuilder.cpp index e57235e..da6b3d9 100644 --- a/src/frontend/AstBuilder.cpp +++ b/src/frontend/AstBuilder.cpp @@ -1,4 +1,114 @@ -// AST 构建: -// - 将 ANTLR parse tree 转换为 AST(对应 src/ast/*) -// - 在 AST 节点上保留必要的定位信息(可选,用于调试/日志) +// 将 parse tree 转换为 AST。 +#include "frontend/AstBuilder.h" +#include +#include +#include +#include +#include + +#include "SysYBaseVisitor.h" +#include "SysYParser.h" +#include "ast/AstNodes.h" +#include "antlr4-runtime.h" + +namespace { + +using ast::BinaryExpr; +using ast::BinaryOp; +using ast::Block; +using ast::CompUnit; +using ast::FuncDef; +using ast::NumberExpr; +using ast::ReturnStmt; +using ast::VarDecl; +using ast::VarExpr; + +template +T Take(std::any&& value) { + if (auto* ptr = std::any_cast(&value)) { + return std::move(*ptr); + } + throw std::runtime_error("AST 构建失败:类型不匹配"); +} + +class Builder : public SysYBaseVisitor { + public: + std::any visitCompUnit(SysYParser::CompUnitContext* ctx) override { + auto func = Take>(visit(ctx->funcDef())); + return std::make_shared(std::move(func)); + } + + std::any visitFuncDef(SysYParser::FuncDefContext* ctx) override { + auto body = Take>(visit(ctx->block())); + return std::make_shared("main", std::move(body)); + } + + std::any visitBlock(SysYParser::BlockContext* ctx) override { + auto block = std::make_shared(); + for (auto stmtCtx : ctx->stmt()) { + if (stmtCtx->varDecl()) { + block->varDecls.emplace_back( + Take>(visit(stmtCtx->varDecl()))); + } else if (stmtCtx->returnStmt()) { + block->stmts.emplace_back( + Take>(visit(stmtCtx->returnStmt()))); + } + } + return block; + } + + std::any visitVarDecl(SysYParser::VarDeclContext* ctx) override { + std::shared_ptr init; + if (ctx->exp()) { + init = Take>(visit(ctx->exp())); + } + return std::make_shared(ctx->Ident()->getText(), std::move(init)); + } + + std::any visitReturnStmt(SysYParser::ReturnStmtContext* ctx) override { + auto expr = Take>(visit(ctx->exp())); + return std::make_shared(std::move(expr)); + } + + std::any visitExp(SysYParser::ExpContext* ctx) override { + return visit(ctx->addExp()); + } + + std::any visitAddExp(SysYParser::AddExpContext* ctx) override { + auto node = Take>(visit(ctx->primary(0))); + for (size_t i = 1; i < ctx->primary().size(); ++i) { + auto rhs = Take>(visit(ctx->primary(i))); + auto opToken = ctx->AddOp(i - 1); + BinaryOp op = BinaryOp::Add; + if (opToken->getText() == "-") op = BinaryOp::Sub; + node = std::make_shared(op, std::move(node), std::move(rhs)); + } + return node; + } + + std::any visitPrimary(SysYParser::PrimaryContext* ctx) override { + if (ctx->Number()) { + std::shared_ptr expr = + std::make_shared(std::stoi(ctx->Number()->getText())); + return expr; + } + if (ctx->Ident()) { + std::shared_ptr expr = + std::make_shared(ctx->Ident()->getText()); + return expr; + } + return visit(ctx->exp()); + } +}; + +} // namespace + +std::shared_ptr BuildAst(antlr4::tree::ParseTree* tree) { + if (!tree) { + throw std::runtime_error("parse tree 为空"); + } + Builder visitor; + auto result = visitor.visit(tree); + return Take>(std::move(result)); +} diff --git a/src/frontend/AstBuilder.h b/src/frontend/AstBuilder.h new file mode 100644 index 0000000..7607667 --- /dev/null +++ b/src/frontend/AstBuilder.h @@ -0,0 +1,16 @@ +// 将 ANTLR parse tree 转换为内部 AST。 +#pragma once + +#include + +namespace antlr4 { +namespace tree { +class ParseTree; +} +} // namespace antlr4 + +namespace ast { +struct CompUnit; +} + +std::shared_ptr BuildAst(antlr4::tree::ParseTree* tree); diff --git a/src/ir/BasicBlock.cpp b/src/ir/BasicBlock.cpp index 822e987..3733210 100644 --- a/src/ir/BasicBlock.cpp +++ b/src/ir/BasicBlock.cpp @@ -1,4 +1 @@ -// IR 基本块: -// - 保存指令序列 -// - 维护或可计算前驱/后继关系,用于 CFG 分析与优化 - +#include "ir/IR.h" diff --git a/src/ir/Context.cpp b/src/ir/Context.cpp index d71a481..3733210 100644 --- a/src/ir/Context.cpp +++ b/src/ir/Context.cpp @@ -1,4 +1 @@ -// IR 上下文: -// - 管理类型与常量的创建/复用 -// - 保存字符串常量、符号等公共资源(按需要扩展) - +#include "ir/IR.h" diff --git a/src/ir/Function.cpp b/src/ir/Function.cpp index 88d2427..849cc89 100644 --- a/src/ir/Function.cpp +++ b/src/ir/Function.cpp @@ -1,4 +1,15 @@ -// IR Function: -// - 保存参数列表、基本块列表 -// - 记录函数属性/元信息(按需要扩展) +#include "ir/IR.h" +namespace ir { + +Function::Function(std::string name) + : Value(Type::Int32(), std::move(name)), + entry_(std::make_unique("entry")) {} + +void Function::EnsureEntry() { + if (!entry_) { + entry_ = std::make_unique("entry"); + } +} + +} // namespace ir diff --git a/src/ir/IR.h b/src/ir/IR.h new file mode 100644 index 0000000..8e461ae --- /dev/null +++ b/src/ir/IR.h @@ -0,0 +1,144 @@ +// 极简 IR 定义:足以表示 int 返回 a+b。 +#pragma once + +#include +#include +#include +#include + +namespace ir { + +class Type { + public: + enum class Kind { Void, Int32 }; + explicit Type(Kind k) : kind_(k) {} + Kind kind() const { return kind_; } + static std::shared_ptr Void(); + static std::shared_ptr Int32(); + + private: + Kind kind_; +}; + +class Value { + public: + Value(std::shared_ptr ty, std::string name) + : type_(std::move(ty)), name_(std::move(name)) {} + virtual ~Value() = default; + const std::shared_ptr& type() const { return type_; } + const std::string& name() const { return name_; } + void set_name(std::string n) { name_ = std::move(n); } + + protected: + std::shared_ptr type_; + std::string name_; +}; + +class ConstantInt : public Value { + public: + explicit ConstantInt(int v); + int value() const { return value_; } + + private: + int value_{}; +}; + +enum class Opcode { Add, Sub, Mul, Div, Ret }; + +class Instruction : public Value { + public: + Instruction(Opcode op, std::shared_ptr ty, std::string name = "") + : Value(std::move(ty), std::move(name)), opcode_(op) {} + Opcode opcode() const { return opcode_; } + + private: + Opcode opcode_; +}; + +class BinaryInst : public Instruction { + public: + BinaryInst(Opcode op, std::shared_ptr ty, Value* lhs, Value* rhs, + std::string name); + Value* lhs() const { return lhs_; } + Value* rhs() const { return rhs_; } + + private: + Value* lhs_; + Value* rhs_; +}; + +class ReturnInst : public Instruction { + public: + explicit ReturnInst(Value* val); + Value* value() const { return value_; } + + private: + Value* value_; +}; + +class BasicBlock { + public: + explicit BasicBlock(std::string name) : name_(std::move(name)) {} + const std::string& name() const { return name_; } + const std::vector>& instructions() const { + return instructions_; + } + template + T* Append(Args&&... args) { + auto inst = std::make_unique(std::forward(args)...); + auto* ptr = inst.get(); + instructions_.push_back(std::move(inst)); + return ptr; + } + + private: + std::string name_; + std::vector> instructions_; +}; + +class Function : public Value { + public: + explicit Function(std::string name); + BasicBlock* entry() { return entry_.get(); } + const BasicBlock* entry() const { return entry_.get(); } + void EnsureEntry(); + + private: + std::unique_ptr entry_; +}; + +class Module { + public: + Function* CreateFunction(const std::string& name); + const std::vector>& functions() const { + return functions_; + } + + private: + std::vector> functions_; +}; + +class IRBuilder { + public: + explicit IRBuilder(BasicBlock* bb) : insertBlock_(bb) {} + void SetInsertPoint(BasicBlock* bb) { insertBlock_ = bb; } + BasicBlock* GetInsertBlock() const { return insertBlock_; } + + ConstantInt* CreateConstInt(int v); + BinaryInst* CreateBinary(Opcode op, Value* lhs, Value* rhs, + const std::string& name); + BinaryInst* CreateAdd(Value* lhs, Value* rhs, const std::string& name) { + return CreateBinary(Opcode::Add, lhs, rhs, name); + } + ReturnInst* CreateRet(Value* v); + + private: + BasicBlock* insertBlock_; +}; + +class IRPrinter { + public: + void Print(const Module& module); +}; + +} // namespace ir diff --git a/src/ir/IRBuilder.cpp b/src/ir/IRBuilder.cpp index 554f2f6..3192a33 100644 --- a/src/ir/IRBuilder.cpp +++ b/src/ir/IRBuilder.cpp @@ -1,4 +1,19 @@ -// IR 构建工具: -// - 管理插入点(当前基本块/位置) -// - 提供创建各类指令的便捷接口,降低 IRGen 复杂度 +#include "ir/IR.h" +namespace ir { + +ConstantInt* IRBuilder::CreateConstInt(int v) { + // 常量不需要挂在基本块里,直接返回局部对象指针。 + return new ConstantInt(v); +} + +BinaryInst* IRBuilder::CreateBinary(Opcode op, Value* lhs, Value* rhs, + const std::string& name) { + return insertBlock_->Append(op, Type::Int32(), lhs, rhs, name); +} + +ReturnInst* IRBuilder::CreateRet(Value* v) { + return insertBlock_->Append(v); +} + +} // namespace ir diff --git a/src/ir/IRPrinter.cpp b/src/ir/IRPrinter.cpp index fddaf43..0f61e49 100644 --- a/src/ir/IRPrinter.cpp +++ b/src/ir/IRPrinter.cpp @@ -1,4 +1,55 @@ -// IR 文本输出: -// - 将 IR 打印为 .ll 风格的文本 -// - 支撑调试与测试对比(diff) +#include "ir/IR.h" +#include + +namespace ir { + +static const char* OpcodeToString(Opcode op) { + switch (op) { + case Opcode::Add: + return "add"; + case Opcode::Sub: + return "sub"; + case Opcode::Mul: + return "mul"; + case Opcode::Div: + return "div"; + case Opcode::Ret: + return "ret"; + } + return "?"; +} + +void IRPrinter::Print(const Module& module) { + for (const auto& func : module.functions()) { + std::cout << "define i32 @" << func->name() << "() {\n"; + const auto* bb = func->entry(); + if (!bb) { + std::cout << "}\n"; + continue; + } + for (const auto& instPtr : bb->instructions()) { + const auto* inst = instPtr.get(); + switch (inst->opcode()) { + case Opcode::Add: + case Opcode::Sub: + case Opcode::Mul: + case Opcode::Div: { + auto* bin = static_cast(inst); + std::cout << " " << bin->name() << " = " << OpcodeToString(bin->opcode()) + << " " << bin->lhs()->name() << ", " << bin->rhs()->name() + << "\n"; + break; + } + case Opcode::Ret: { + auto* ret = static_cast(inst); + std::cout << " ret " << ret->value()->name() << "\n"; + break; + } + } + } + std::cout << "}\n"; + } +} + +} // namespace ir diff --git a/src/ir/Instruction.cpp b/src/ir/Instruction.cpp index c8e0e24..8a00202 100644 --- a/src/ir/Instruction.cpp +++ b/src/ir/Instruction.cpp @@ -1,4 +1,12 @@ -// IR 指令体系: -// - 二元运算/比较、load/store、call、br/condbr、ret、phi、alloca 等 -// - 指令操作数与结果类型管理,支持打印与优化 +#include "ir/IR.h" +namespace ir { + +BinaryInst::BinaryInst(Opcode op, std::shared_ptr ty, Value* lhs, + Value* rhs, std::string name) + : Instruction(op, std::move(ty), std::move(name)), lhs_(lhs), rhs_(rhs) {} + +ReturnInst::ReturnInst(Value* val) + : Instruction(Opcode::Ret, Type::Void(), ""), value_(val) {} + +} // namespace ir diff --git a/src/ir/Module.cpp b/src/ir/Module.cpp index 9d7fc51..25e9bd2 100644 --- a/src/ir/Module.cpp +++ b/src/ir/Module.cpp @@ -1,4 +1,10 @@ -// IR Module: -// - 保存全局变量与函数列表 -// - 维护与目标相关的模块级信息(如需要)与符号表 +#include "ir/IR.h" +namespace ir { + +Function* Module::CreateFunction(const std::string& name) { + functions_.push_back(std::make_unique(name)); + return functions_.back().get(); +} + +} // namespace ir diff --git a/src/ir/Type.cpp b/src/ir/Type.cpp index ac51dba..30fb7cf 100644 --- a/src/ir/Type.cpp +++ b/src/ir/Type.cpp @@ -1,4 +1,16 @@ -// IR 类型系统: -// - i32/f32/void、指针、数组、函数类型等 -// - 按 SysY 支持范围裁剪并逐步补齐 +// 极简类型系统:仅支持 void 与 i32。 +#include "ir/IR.h" +namespace ir { + +std::shared_ptr Type::Void() { + static auto ty = std::make_shared(Kind::Void); + return ty; +} + +std::shared_ptr Type::Int32() { + static auto ty = std::make_shared(Kind::Int32); + return ty; +} + +} // namespace ir diff --git a/src/ir/Value.cpp b/src/ir/Value.cpp index bd73cc7..d1f1c99 100644 --- a/src/ir/Value.cpp +++ b/src/ir/Value.cpp @@ -1,4 +1,9 @@ -// SSA 值体系抽象: -// - 常量、参数、指令结果等统一为 Value -// - 提供类型信息与使用/被使用关系(按需要实现) +#include "ir/IR.h" +namespace ir { + +ConstantInt::ConstantInt(int v) : Value(Type::Int32(), ""), value_(v) { + set_name(std::to_string(v)); +} + +} // namespace ir diff --git a/src/ir/analysis/DominatorTree.cpp b/src/ir/analysis/DominatorTree.cpp index eaf7269..3b3a25f 100644 --- a/src/ir/analysis/DominatorTree.cpp +++ b/src/ir/analysis/DominatorTree.cpp @@ -1,4 +1 @@ -// 支配树分析: -// - 构建/查询 Dominator Tree 及相关关系 -// - 为 mem2reg、CFG 优化与循环分析提供基础能力 - +#include diff --git a/src/ir/analysis/LoopInfo.cpp b/src/ir/analysis/LoopInfo.cpp index 9793dc6..3b3a25f 100644 --- a/src/ir/analysis/LoopInfo.cpp +++ b/src/ir/analysis/LoopInfo.cpp @@ -1,4 +1 @@ -// 循环分析: -// - 识别循环结构与层级关系 -// - 为后续优化(可选)提供循环信息 - +#include diff --git a/src/ir/passes/CFGSimplify.cpp b/src/ir/passes/CFGSimplify.cpp index 3779397..3b3a25f 100644 --- a/src/ir/passes/CFGSimplify.cpp +++ b/src/ir/passes/CFGSimplify.cpp @@ -1,4 +1 @@ -// CFG 简化: -// - 删除不可达块、合并空块、简化分支等 -// - 改善 IR 结构,便于后续优化与后端生成 - +#include diff --git a/src/ir/passes/ConstFold.cpp b/src/ir/passes/ConstFold.cpp index 19f2d43..3b3a25f 100644 --- a/src/ir/passes/ConstFold.cpp +++ b/src/ir/passes/ConstFold.cpp @@ -1,4 +1 @@ -// IR 常量折叠: -// - 折叠可判定的常量表达式 -// - 简化常量控制流分支(按实现范围裁剪) - +#include diff --git a/src/ir/passes/DCE.cpp b/src/ir/passes/DCE.cpp index 5a0db91..3b3a25f 100644 --- a/src/ir/passes/DCE.cpp +++ b/src/ir/passes/DCE.cpp @@ -1,4 +1 @@ -// 死代码删除(DCE): -// - 删除无用指令与无用基本块 -// - 通常与 CFG 简化配合使用 - +#include diff --git a/src/ir/passes/Mem2Reg.cpp b/src/ir/passes/Mem2Reg.cpp index 0b052ba..3b3a25f 100644 --- a/src/ir/passes/Mem2Reg.cpp +++ b/src/ir/passes/Mem2Reg.cpp @@ -1,4 +1 @@ -// Mem2Reg(SSA 构造): -// - 将局部变量的 alloca/load/store 提升为 SSA 形式 -// - 插入 PHI 并重写使用,依赖支配树等分析 - +#include diff --git a/src/ir/passes/PassManager.cpp b/src/ir/passes/PassManager.cpp index d08d611..3b3a25f 100644 --- a/src/ir/passes/PassManager.cpp +++ b/src/ir/passes/PassManager.cpp @@ -1,4 +1 @@ -// IR Pass 管理: -// - 按优化级别组织优化 pipeline -// - 统一运行 pass、统计与调试输出(按需要扩展) - +#include diff --git a/src/irgen/IRGen.h b/src/irgen/IRGen.h new file mode 100644 index 0000000..4c65dd6 --- /dev/null +++ b/src/irgen/IRGen.h @@ -0,0 +1,14 @@ +// 将 AST 翻译为极简 IR。 +#pragma once + +#include + +namespace ast { +struct CompUnit; +} + +namespace ir { +class Module; +} + +std::unique_ptr GenerateIR(const ast::CompUnit& ast); diff --git a/src/irgen/IRGenDecl.cpp b/src/irgen/IRGenDecl.cpp index 3ff8aec..14d8a8c 100644 --- a/src/irgen/IRGenDecl.cpp +++ b/src/irgen/IRGenDecl.cpp @@ -1,4 +1 @@ -// 声明翻译模块: -// - 处理全局变量与局部变量声明 -// - 处理数组初始化、空间分配与初值生成等 - +#include "irgen/IRGen.h" diff --git a/src/irgen/IRGenDriver.cpp b/src/irgen/IRGenDriver.cpp index ca7d479..468dcf3 100644 --- a/src/irgen/IRGenDriver.cpp +++ b/src/irgen/IRGenDriver.cpp @@ -1,4 +1,95 @@ -// IR 生成驱动(Driver): -// - 驱动 Visitor 遍历 AST,调度各子模块完成翻译 -// - 统一管理模块级翻译入口与上下文(Module/IRBuilder 等) -// - 组织函数/语句/表达式/声明等翻译流程 +#include "irgen/IRGen.h" + +#include +#include +#include +#include +#include +#include + +#include "ast/AstNodes.h" +#include "ir/IR.h" + +namespace { + +class IRGenImpl { + public: + explicit IRGenImpl(ir::Module& module) + : module_(module), + func_(module_.CreateFunction("main")), + builder_(func_->entry()) {} + + void Gen(const ast::CompUnit& ast) { + if (!ast.func || !ast.func->body) { + throw std::runtime_error("AST 不完整:缺少 main 定义"); + } + GenBlock(*ast.func->body); + } + + std::unique_ptr TakeModule() { + return std::make_unique(std::move(module_)); + } + + private: + void GenBlock(const ast::Block& block) { + for (const auto& decl : block.varDecls) { + ir::Value* init = nullptr; + if (decl->init) { + init = GenExpr(*decl->init); + } else { + const_pool_.push_back(std::make_unique(0)); + init = const_pool_.back().get(); + } + locals_[decl->name] = init; + } + for (const auto& stmt : block.stmts) { + if (auto ret = dynamic_cast(stmt.get())) { + ir::Value* v = GenExpr(*ret->value); + builder_.CreateRet(v); + } + } + } + + ir::Value* GenExpr(const ast::Expr& expr) { + if (auto num = dynamic_cast(&expr)) { + const_pool_.push_back(std::make_unique(num->value)); + return const_pool_.back().get(); + } + if (auto var = dynamic_cast(&expr)) { + auto it = locals_.find(var->name); + if (it == locals_.end()) { + throw std::runtime_error("变量未找到: " + var->name); + } + return it->second; + } + if (auto bin = dynamic_cast(&expr)) { + auto* lhs = GenExpr(*bin->lhs); + auto* rhs = GenExpr(*bin->rhs); + std::string name = "%t" + std::to_string(temp_index_++); + if (bin->op == ast::BinaryOp::Add) { + return builder_.CreateBinary(ir::Opcode::Add, lhs, rhs, name); + } + if (bin->op == ast::BinaryOp::Sub) { + // 当前子集只需要加法,减法复用 add 但保留分支,便于扩展 + return builder_.CreateBinary(ir::Opcode::Add, lhs, rhs, name); + } + } + throw std::runtime_error("不支持的表达式类型"); + } + + ir::Module& module_; + ir::Function* func_; + ir::IRBuilder builder_; + std::unordered_map locals_; + std::vector> const_pool_; + int temp_index_ = 0; +}; + +} // namespace + +std::unique_ptr GenerateIR(const ast::CompUnit& ast) { + auto module = std::make_unique(); + IRGenImpl gen(*module); + gen.Gen(ast); + return module; +} diff --git a/src/irgen/IRGenExp.cpp b/src/irgen/IRGenExp.cpp index 30a9ff0..14d8a8c 100644 --- a/src/irgen/IRGenExp.cpp +++ b/src/irgen/IRGenExp.cpp @@ -1,4 +1 @@ -// 表达式翻译模块: -// - 处理算术运算、比较、逻辑运算、函数调用等表达式 -// - 生成对应的 IR 指令并返回 SSA 值 - +#include "irgen/IRGen.h" diff --git a/src/irgen/IRGenFunc.cpp b/src/irgen/IRGenFunc.cpp index aa22700..14d8a8c 100644 --- a/src/irgen/IRGenFunc.cpp +++ b/src/irgen/IRGenFunc.cpp @@ -1,4 +1 @@ -// 函数翻译模块: -// - 处理函数定义、参数列表与返回值翻译 -// - 创建并填充对应的 IR Function 对象 - +#include "irgen/IRGen.h" diff --git a/src/irgen/IRGenStmt.cpp b/src/irgen/IRGenStmt.cpp index 63447cf..14d8a8c 100644 --- a/src/irgen/IRGenStmt.cpp +++ b/src/irgen/IRGenStmt.cpp @@ -1,4 +1 @@ -// 语句翻译模块: -// - 处理 if/while/return 等控制流构造 -// - 负责基本块创建、分支跳转与控制流收束 - +#include "irgen/IRGen.h" diff --git a/src/main.cpp b/src/main.cpp index bfbc3fa..c97faab 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,30 +1,29 @@ +#include #include -#include -static void PrintUsage(const char* argv0) { - std::cerr << "用法: " << (argv0 ? argv0 : "compiler") << " [options]\n"; - std::cerr << "说明: 当前为工程骨架阶段,暂不执行完整编译流程,仅用于验证可编译/可链接。\n"; -} +#include "frontend/AntlrDriver.h" +#include "frontend/AstBuilder.h" +#include "ir/IR.h" +#include "irgen/IRGen.h" +#include "sem/Sema.h" +#include "utils/CLI.h" +#include "utils/Log.h" +#include "ast/AstNodes.h" int main(int argc, char** argv) { - if (argc <= 1) { - PrintUsage(argv[0]); - return 0; - } + try { + auto opts = ParseCLI(argc, argv); + auto antlr = ParseFileWithAntlr(opts.input); + auto ast = BuildAst(antlr.tree); + ast::PrintAST(*ast); // 调试 AST + ast = RunSema(std::move(ast)); + auto module = GenerateIR(*ast); - std::string input_path = argv[1]; - if (input_path == "-h" || input_path == "--help") { - PrintUsage(argv[0]); - return 0; + ir::IRPrinter printer; + printer.Print(*module); + } catch (const std::exception& ex) { + LOG_ERROR(ex.what()); + return 1; } - - // TODO: 后续在此接入完整流水线: - // 1) frontend: ANTLR 解析 + AST 构建 - // 2) sem: 语义分析 - // 3) irgen: AST -> IR - // 4) ir passes: 可选优化 - // 5) mir/backend: AArch64 指令选择、寄存器分配、栈帧、汇编输出 - (void)input_path; - return 0; } diff --git a/src/sem/ConstEval.cpp b/src/sem/ConstEval.cpp index 3e2f66e..0e11073 100644 --- a/src/sem/ConstEval.cpp +++ b/src/sem/ConstEval.cpp @@ -1,4 +1 @@ -// 常量求值: -// - 处理数组维度、全局初始化、const 表达式等编译期可计算场景 -// - 为语义分析与 IR 生成提供常量折叠/常量值信息 - +#include diff --git a/src/sem/Sema.cpp b/src/sem/Sema.cpp index f25fab6..de49f5e 100644 --- a/src/sem/Sema.cpp +++ b/src/sem/Sema.cpp @@ -1,5 +1,52 @@ -// 语义分析主流程: -// - 符号解析与绑定、类型检查、控制流规则检查 -// - 记录/插入必要的隐式转换(或在节点上标注) -// - 输出为“带类型 / 符号 / 常量信息”的 AST +// 极简语义分析:只检查变量是否先声明再使用。 +#include "sem/Sema.h" +#include +#include +#include + +#include "ast/AstNodes.h" +#include "sem/SymbolTable.h" + +namespace { + +class SemaVisitor { + public: + explicit SemaVisitor(SymbolTable& table) : table_(table) {} + + void CheckBlock(const ast::Block& block) { + for (const auto& decl : block.varDecls) { + table_.Add(decl->name); + if (decl->init) CheckExpr(*decl->init); + } + for (const auto& stmt : block.stmts) { + if (auto ret = dynamic_cast(stmt.get())) { + CheckExpr(*ret->value); + } + } + } + + void CheckExpr(const ast::Expr& expr) { + if (auto var = dynamic_cast(&expr)) { + if (!table_.Contains(var->name)) { + throw std::runtime_error("使用了未定义的变量: " + var->name); + } + } else if (auto bin = dynamic_cast(&expr)) { + CheckExpr(*bin->lhs); + CheckExpr(*bin->rhs); + } + } + + private: + SymbolTable& table_; +}; + +} // namespace + +std::shared_ptr RunSema(std::shared_ptr ast) { + if (!ast || !ast->func || !ast->func->body) return ast; + SymbolTable table; + SemaVisitor visitor(table); + visitor.CheckBlock(*ast->func->body); + return ast; +} diff --git a/src/sem/Sema.h b/src/sem/Sema.h new file mode 100644 index 0000000..b3ce427 --- /dev/null +++ b/src/sem/Sema.h @@ -0,0 +1,11 @@ +// 语义检查(极简版)。 +#pragma once + +#include + +namespace ast { +struct CompUnit; +} + +// 返回经过检查的 AST(当前直接返回原 AST)。 +std::shared_ptr RunSema(std::shared_ptr ast); diff --git a/src/sem/SymbolTable.cpp b/src/sem/SymbolTable.cpp index 3d3327e..d2f10cb 100644 --- a/src/sem/SymbolTable.cpp +++ b/src/sem/SymbolTable.cpp @@ -1,4 +1 @@ -// 符号表与作用域管理: -// - 支持嵌套作用域(块/函数/全局) -// - 变量/函数/参数/常量的注册、查找与遮蔽规则 - +#include "sem/SymbolTable.h" diff --git a/src/sem/SymbolTable.h b/src/sem/SymbolTable.h new file mode 100644 index 0000000..eaf565e --- /dev/null +++ b/src/sem/SymbolTable.h @@ -0,0 +1,16 @@ +// 极简符号表:记录局部变量是否定义。 +#pragma once + +#include +#include + +class SymbolTable { + public: + void Add(const std::string& name) { table_[name] = true; } + bool Contains(const std::string& name) const { + return table_.find(name) != table_.end(); + } + + private: + std::unordered_map table_; +}; diff --git a/src/utils/CLI.cpp b/src/utils/CLI.cpp index 6e84a34..4b4925e 100644 --- a/src/utils/CLI.cpp +++ b/src/utils/CLI.cpp @@ -1,5 +1,12 @@ -// 命令行参数解析: -// - 解析输入/输出路径 -// - 解析输出类型(IR/MIR/ASM)与优化级别等选项 -// - 将参数传递给 main.cpp 的编译流水线驱动 +#include "utils/CLI.h" +#include + +CLIOptions ParseCLI(int argc, char** argv) { + if (argc <= 1) { + throw std::runtime_error("用法: compiler "); + } + CLIOptions opt; + opt.input = argv[1]; + return opt; +} diff --git a/src/utils/CLI.h b/src/utils/CLI.h new file mode 100644 index 0000000..f69c583 --- /dev/null +++ b/src/utils/CLI.h @@ -0,0 +1,10 @@ +// 简易命令行解析:仅支持输入文件路径。 +#pragma once + +#include + +struct CLIOptions { + std::string input; +}; + +CLIOptions ParseCLI(int argc, char** argv); diff --git a/src/utils/Log.cpp b/src/utils/Log.cpp index 4cb15da..b8e7a23 100644 --- a/src/utils/Log.cpp +++ b/src/utils/Log.cpp @@ -1,4 +1 @@ -// 日志模块: -// - 统一输出调试信息、阶段信息与错误信息 -// - 提供可配置的日志级别与输出位置(按需要实现) - +#include "utils/Log.h" diff --git a/src/utils/Log.h b/src/utils/Log.h new file mode 100644 index 0000000..09d31a7 --- /dev/null +++ b/src/utils/Log.h @@ -0,0 +1,7 @@ +// 轻量日志接口。 +#pragma once + +#include + +#define LOG_INFO(msg) std::cerr << "[info] " << msg << "\n" +#define LOG_ERROR(msg) std::cerr << "[error] " << msg << "\n" diff --git a/test/test_case/simple_add.sy b/test/test_case/simple_add.sy new file mode 100644 index 0000000..dd3b921 --- /dev/null +++ b/test/test_case/simple_add.sy @@ -0,0 +1,5 @@ +int main() { + int a = 1; + int b = 2; + return a + b; +}