feat(frontend): 完成 Lab1 SysY 文法扩展与语法树输出支持

brkstar 3 weeks ago
parent be143f5326
commit c2759c27cd

@ -3,12 +3,10 @@
#pragma once
#include <any>
#include <memory>
#include <string>
#include <unordered_map>
#include "SysYBaseVisitor.h"
#include "SysYParser.h"
#include "ir/IR.h"
#include "sem/Sema.h"
@ -20,31 +18,26 @@ class IRBuilder;
class Value;
}
class IRGenImpl final : public SysYBaseVisitor {
class IRGenImpl {
public:
IRGenImpl(ir::Module& module, const SemanticContext& sema);
std::any visitCompUnit(SysYParser::CompUnitContext* ctx) override;
std::any visitFuncDef(SysYParser::FuncDefContext* ctx) override;
std::any visitBlockStmt(SysYParser::BlockStmtContext* ctx) override;
std::any visitBlockItem(SysYParser::BlockItemContext* ctx) override;
std::any visitDecl(SysYParser::DeclContext* ctx) override;
std::any visitStmt(SysYParser::StmtContext* ctx) override;
std::any visitVarDef(SysYParser::VarDefContext* ctx) override;
std::any visitReturnStmt(SysYParser::ReturnStmtContext* ctx) override;
std::any visitParenExp(SysYParser::ParenExpContext* ctx) override;
std::any visitNumberExp(SysYParser::NumberExpContext* ctx) override;
std::any visitVarExp(SysYParser::VarExpContext* ctx) override;
std::any visitAdditiveExp(SysYParser::AdditiveExpContext* ctx) override;
void Gen(SysYParser::CompUnitContext& cu);
private:
enum class BlockFlow {
Continue,
Terminated,
};
BlockFlow VisitBlockItemResult(SysYParser::BlockItemContext& item);
ir::Value* EvalExpr(SysYParser::ExpContext& expr);
void GenFuncDef(SysYParser::FuncDefContext& func);
void GenBlock(SysYParser::BlockContext& block);
bool GenBlockItem(SysYParser::BlockItemContext& item);
void GenDecl(SysYParser::DeclContext& decl);
bool GenStmt(SysYParser::StmtContext& stmt);
void GenVarDecl(SysYParser::VarDeclContext& decl);
void GenReturnStmt(SysYParser::ReturnStmtContext& ret);
ir::Value* GenExpr(SysYParser::ExpContext& expr);
ir::Value* GenAddExpr(SysYParser::AddExpContext& add);
ir::Value* GenMulExpr(SysYParser::MulExpContext& mul);
ir::Value* GenUnaryExpr(SysYParser::UnaryExpContext& unary);
ir::Value* GenPrimary(SysYParser::PrimaryContext& primary);
ir::Module& module_;
const SemanticContext& sema_;

@ -7,19 +7,19 @@
class SemanticContext {
public:
void BindVarUse(SysYParser::VarContext* use,
void BindVarUse(SysYParser::LValContext* use,
SysYParser::VarDefContext* decl) {
var_uses_[use] = decl;
}
SysYParser::VarDefContext* ResolveVarUse(
const SysYParser::VarContext* use) const {
const SysYParser::LValContext* use) const {
auto it = var_uses_.find(use);
return it == var_uses_.end() ? nullptr : it->second;
}
private:
std::unordered_map<const SysYParser::VarContext*,
std::unordered_map<const SysYParser::LValContext*,
SysYParser::VarDefContext*>
var_uses_;
};

@ -1,68 +1,65 @@
// SysY 子集语法:支持形如
// int main() { int a = 1; int b = 2; return a + b; }
// 的最小返回表达式编译。
// 后续需要自行添加
grammar SysY;
/*===-------------------------------------------===*/
/* Lexer rules */
/*===-------------------------------------------===*/
INT: 'int';
RETURN: 'return';
ASSIGN: '=';
ADD: '+';
LPAREN: '(';
RPAREN: ')';
LBRACE: '{';
RBRACE: '}';
SEMICOLON: ';';
ID: [a-zA-Z_][a-zA-Z_0-9]*;
ILITERAL: [0-9]+;
compUnit
: (decl | funcDef)+ EOF
;
WS: [ \t\r\n] -> skip;
LINECOMMENT: '//' ~[\r\n]* -> skip;
BLOCKCOMMENT: '/*' .*? '*/' -> skip;
decl
: constDecl
| varDecl
;
/*===-------------------------------------------===*/
/* Syntax rules */
/*===-------------------------------------------===*/
constDecl
: Const bType constDef (Comma constDef)* Semi
;
compUnit
: funcDef EOF
varDecl
: bType varDef (Comma varDef)* Semi
;
decl
: btype varDef SEMICOLON
bType
: Int
| Float
;
btype
: INT
constDef
: Ident (L_BRACK constExp R_BRACK)* Assign constInitVal
;
varDef
: lValue (ASSIGN initValue)?
: Ident (L_BRACK constExp R_BRACK)* (Assign initVal)?
;
constInitVal
: constExp
| L_BRACE (constInitVal (Comma constInitVal)*)? R_BRACE
;
initValue
initVal
: exp
| L_BRACE (initVal (Comma initVal)*)? R_BRACE
;
funcDef
: funcType ID LPAREN RPAREN blockStmt
: funcType Ident L_PAREN funcFParams? R_PAREN block
;
funcType
: INT
: Void
| Int
| Float
;
funcFParams
: funcFParam (Comma funcFParam)*
;
funcFParam
: bType Ident (L_BRACK R_BRACK (L_BRACK exp R_BRACK)*)?
;
blockStmt
: LBRACE blockItem* RBRACE
block
: L_BRACE blockItem* R_BRACE
;
blockItem
@ -71,28 +68,231 @@ blockItem
;
stmt
: returnStmt
: assignStmt
| expStmt
| block
| ifStmt
| whileStmt
| breakStmt
| continueStmt
| returnStmt
;
assignStmt
: lVal Assign exp Semi
;
expStmt
: exp? Semi
;
ifStmt
: If L_PAREN cond R_PAREN stmt (Else stmt)?
;
whileStmt
: While L_PAREN cond R_PAREN stmt
;
breakStmt
: Break Semi
;
continueStmt
: Continue Semi
;
returnStmt
: RETURN exp SEMICOLON
: Return exp? Semi
;
exp
: LPAREN exp RPAREN # parenExp
| var # varExp
| number # numberExp
| exp ADD exp # additiveExp
: addExp
;
cond
: lOrExp
;
lVal
: Ident (L_BRACK exp R_BRACK)*
;
primary
: Number
| lVal
| L_PAREN exp R_PAREN
;
unaryExp
: primary
| Ident L_PAREN funcRParams? R_PAREN
| unaryOp unaryExp
;
unaryOp
: Add
| Sub
| Not
;
funcRParams
: exp (Comma exp)*
;
mulExp
: unaryExp ((Mul | Div | Mod) unaryExp)*
;
addExp
: mulExp ((Add | Sub) mulExp)*
;
relExp
: addExp ((Lt | Gt | Le | Ge) addExp)*
;
eqExp
: relExp ((Eq | Ne) relExp)*
;
lAndExp
: eqExp (And eqExp)*
;
lOrExp
: lAndExp (Or lAndExp)*
;
constExp
: addExp
;
Const : 'const';
Int : 'int';
Float : 'float';
Void : 'void';
If : 'if';
Else : 'else';
While : 'while';
Break : 'break';
Continue : 'continue';
Return : 'return';
Add : '+';
Sub : '-';
Mul : '*';
Div : '/';
Mod : '%';
Assign : '=';
Eq : '==';
Ne : '!=';
Lt : '<';
Gt : '>';
Le : '<=';
Ge : '>=';
Not : '!';
And : '&&';
Or : '||';
Comma : ',';
Semi : ';';
L_PAREN : '(';
R_PAREN : ')';
L_BRACE : '{';
R_BRACE : '}';
L_BRACK : '[';
R_BRACK : ']';
Ident
: IdentifierNondigit IdentifierChar*
;
Number
: HexFloatConst
| DecFloatConst
| HexIntConst
| OctIntConst
| DecIntConst
;
WS
: [ \t\r\n]+ -> skip
;
COMMENT
: '//' ~[\r\n]* -> skip
;
BLOCK_COMMENT
: '/*' .*? '*/' -> skip
;
fragment IdentifierNondigit
: [a-zA-Z_]
;
fragment IdentifierChar
: IdentifierNondigit
| [0-9]
;
fragment DecIntConst
: '0'
| [1-9] [0-9]*
;
fragment OctIntConst
: '0' [0-7]+
;
fragment HexIntConst
: HexPrefix HexDigit+
;
fragment DecFloatConst
: FractionalConst ExponentPart?
| DigitSequence ExponentPart
;
fragment HexFloatConst
: HexPrefix HexFractionalConst BinaryExponentPart
| HexPrefix HexDigit+ BinaryExponentPart
;
fragment FractionalConst
: DigitSequence? Dot DigitSequence
| DigitSequence Dot
;
fragment HexFractionalConst
: HexDigit* Dot HexDigit+
| HexDigit+ Dot
;
fragment ExponentPart
: [eE] Sign? DigitSequence
;
fragment BinaryExponentPart
: [pP] Sign? DigitSequence
;
fragment Sign
: [+-]
;
fragment HexPrefix
: '0' [xX]
;
var
: ID
fragment DigitSequence
: [0-9]+
;
lValue
: ID
fragment HexDigit
: [0-9a-fA-F]
;
number
: ILITERAL
fragment Dot
: '.'
;

@ -6,102 +6,64 @@
#include "ir/IR.h"
#include "utils/Log.h"
namespace {
std::string GetLValueName(SysYParser::LValueContext& lvalue) {
if (!lvalue.ID()) {
throw std::runtime_error(FormatError("irgen", "非法左值"));
}
return lvalue.ID()->getText();
}
} // namespace
std::any IRGenImpl::visitBlockStmt(SysYParser::BlockStmtContext* ctx) {
if (!ctx) {
throw std::runtime_error(FormatError("irgen", "缺少语句块"));
}
for (auto* item : ctx->blockItem()) {
void IRGenImpl::GenBlock(SysYParser::BlockContext& block) {
for (auto* item : block.blockItem()) {
if (item) {
if (VisitBlockItemResult(*item) == BlockFlow::Terminated) {
if (GenBlockItem(*item)) {
// 当前语法要求 return 为块内最后一条语句;命中后可停止生成。
break;
}
}
}
return {};
}
IRGenImpl::BlockFlow IRGenImpl::VisitBlockItemResult(
SysYParser::BlockItemContext& item) {
return std::any_cast<BlockFlow>(item.accept(this));
}
std::any IRGenImpl::visitBlockItem(SysYParser::BlockItemContext* ctx) {
if (!ctx) {
throw std::runtime_error(FormatError("irgen", "缺少块内项"));
bool IRGenImpl::GenBlockItem(SysYParser::BlockItemContext& item) {
if (item.decl()) {
GenDecl(*item.decl());
return false;
}
if (ctx->decl()) {
ctx->decl()->accept(this);
return BlockFlow::Continue;
}
if (ctx->stmt()) {
return ctx->stmt()->accept(this);
if (item.stmt()) {
return GenStmt(*item.stmt());
}
throw std::runtime_error(FormatError("irgen", "暂不支持的语句或声明"));
}
// 变量声明的 IR 生成目前也是最小实现:
// - 先检查声明的基础类型,当前仅支持局部 int
// - 再把 Decl 中的变量定义交给 visitVarDef 继续处理。
//
// 和更完整的版本相比,这里还没有:
// - 一个 Decl 中多个变量定义的顺序处理;
// - const、数组、全局变量等不同声明形态
// - 更丰富的类型系统。
std::any IRGenImpl::visitDecl(SysYParser::DeclContext* ctx) {
if (!ctx) {
throw std::runtime_error(FormatError("irgen", "缺少变量声明"));
}
if (!ctx->btype() || !ctx->btype()->INT()) {
throw std::runtime_error(FormatError("irgen", "当前仅支持局部 int 变量声明"));
void IRGenImpl::GenDecl(SysYParser::DeclContext& decl) {
if (decl.varDecl()) {
GenVarDecl(*decl.varDecl());
return;
}
auto* var_def = ctx->varDef();
if (!var_def) {
throw std::runtime_error(FormatError("irgen", "非法变量声明"));
}
var_def->accept(this);
return {};
throw std::runtime_error(FormatError("irgen", "暂不支持的声明类型"));
}
// 当前仍是教学用的最小版本,因此这里只支持:
// - 局部 int 变量;
// - 标量初始化;
// - 一个 VarDef 对应一个槽位。
std::any IRGenImpl::visitVarDef(SysYParser::VarDefContext* ctx) {
if (!ctx) {
throw std::runtime_error(FormatError("irgen", "缺少变量定义"));
}
if (!ctx->lValue()) {
throw std::runtime_error(FormatError("irgen", "变量声明缺少名称"));
void IRGenImpl::GenVarDecl(SysYParser::VarDeclContext& decl) {
if (!decl.bType() || !decl.bType()->Int()) {
throw std::runtime_error(FormatError("irgen", "当前 IR 仅支持 int 标量局部变量"));
}
GetLValueName(*ctx->lValue());
if (storage_map_.find(ctx) != storage_map_.end()) {
throw std::runtime_error(FormatError("irgen", "声明重复生成存储槽位"));
}
auto* slot = builder_.CreateAllocaI32(module_.GetContext().NextTemp());
storage_map_[ctx] = slot;
ir::Value* init = nullptr;
if (auto* init_value = ctx->initValue()) {
if (!init_value->exp()) {
throw std::runtime_error(FormatError("irgen", "当前不支持聚合初始化"));
for (auto* def : decl.varDef()) {
if (!def) {
continue;
}
if (storage_map_.find(def) != storage_map_.end()) {
throw std::runtime_error(FormatError("irgen", "声明重复生成存储槽位"));
}
if (!def->constExp().empty()) {
throw std::runtime_error(
FormatError("irgen", "当前 IR 仅支持 int 标量局部变量"));
}
auto* slot = builder_.CreateAllocaI32(module_.GetContext().NextTemp());
storage_map_[def] = slot;
ir::Value* init = builder_.CreateConstInt(0);
if (auto* init_val = def->initVal()) {
if (!init_val->exp()) {
throw std::runtime_error(
FormatError("irgen", "当前 IR 仅支持表达式初始化"));
}
init = GenExpr(*init_val->exp());
}
init = EvalExpr(*init_value->exp());
} else {
init = builder_.CreateConstInt(0);
builder_.CreateStore(init, slot);
}
builder_.CreateStore(init, slot);
return {};
}

@ -10,6 +10,6 @@ std::unique_ptr<ir::Module> GenerateIR(SysYParser::CompUnitContext& tree,
const SemanticContext& sema) {
auto module = std::make_unique<ir::Module>();
IRGenImpl gen(*module, sema);
tree.accept(&gen);
gen.Gen(tree);
return module;
}

@ -6,75 +6,112 @@
#include "ir/IR.h"
#include "utils/Log.h"
// 表达式生成当前也只实现了很小的一个子集。
// 目前支持:
// - 整数字面量
// - 普通局部变量读取
// - 括号表达式
// - 二元加法
//
// 还未支持:
// - 减乘除与一元运算
// - 赋值表达式
// - 函数调用
// - 数组、指针、下标访问
// - 条件与比较表达式
// - ...
ir::Value* IRGenImpl::EvalExpr(SysYParser::ExpContext& expr) {
return std::any_cast<ir::Value*>(expr.accept(this));
ir::Value* IRGenImpl::GenExpr(SysYParser::ExpContext& expr) {
if (!expr.addExp()) {
throw std::runtime_error(FormatError("irgen", "非法表达式"));
}
return GenAddExpr(*expr.addExp());
}
ir::Value* IRGenImpl::GenAddExpr(SysYParser::AddExpContext& add) {
const auto& terms = add.mulExp();
if (terms.empty()) {
throw std::runtime_error(FormatError("irgen", "空加法表达式"));
}
std::any IRGenImpl::visitParenExp(SysYParser::ParenExpContext* ctx) {
if (!ctx || !ctx->exp()) {
throw std::runtime_error(FormatError("irgen", "非法括号表达式"));
ir::Value* acc = GenMulExpr(*terms[0]);
for (size_t i = 1; i < terms.size(); ++i) {
ir::Value* rhs = GenMulExpr(*terms[i]);
std::string name = module_.GetContext().NextTemp();
auto* op = add.children[2 * i - 1];
if (!op) {
throw std::runtime_error(FormatError("irgen", "加法表达式缺少运算符"));
}
const std::string text = op->getText();
if (text == "+") {
acc = builder_.CreateBinary(ir::Opcode::Add, acc, rhs, name);
} else if (text == "-") {
acc = builder_.CreateBinary(ir::Opcode::Sub, acc, rhs, name);
} else {
throw std::runtime_error(FormatError("irgen", "暂不支持的加法运算符: " + text));
}
}
return EvalExpr(*ctx->exp());
return acc;
}
ir::Value* IRGenImpl::GenMulExpr(SysYParser::MulExpContext& mul) {
const auto& terms = mul.unaryExp();
if (terms.empty()) {
throw std::runtime_error(FormatError("irgen", "空乘法表达式"));
}
std::any IRGenImpl::visitNumberExp(SysYParser::NumberExpContext* ctx) {
if (!ctx || !ctx->number() || !ctx->number()->ILITERAL()) {
throw std::runtime_error(FormatError("irgen", "当前仅支持整数字面量"));
ir::Value* acc = GenUnaryExpr(*terms[0]);
for (size_t i = 1; i < terms.size(); ++i) {
ir::Value* rhs = GenUnaryExpr(*terms[i]);
std::string name = module_.GetContext().NextTemp();
auto* op = mul.children[2 * i - 1];
if (!op) {
throw std::runtime_error(FormatError("irgen", "乘法表达式缺少运算符"));
}
const std::string text = op->getText();
if (text == "*") {
acc = builder_.CreateBinary(ir::Opcode::Mul, acc, rhs, name);
continue;
}
throw std::runtime_error(
FormatError("irgen", "当前 IR 暂不支持的乘法类运算符: " + text));
}
return static_cast<ir::Value*>(
builder_.CreateConstInt(std::stoi(ctx->number()->getText())));
return acc;
}
// 变量使用的处理流程:
// 1. 先通过语义分析结果把变量使用绑定回声明;
// 2. 再通过 storage_map_ 找到该声明对应的栈槽位;
// 3. 最后生成 load把内存中的值读出来。
//
// 因此当前 IRGen 自己不再做名字查找,而是直接消费 Sema 的绑定结果。
std::any IRGenImpl::visitVarExp(SysYParser::VarExpContext* ctx) {
if (!ctx || !ctx->var() || !ctx->var()->ID()) {
throw std::runtime_error(FormatError("irgen", "当前仅支持普通整型变量"));
}
auto* decl = sema_.ResolveVarUse(ctx->var());
if (!decl) {
throw std::runtime_error(
FormatError("irgen",
"变量使用缺少语义绑定: " + ctx->var()->ID()->getText()));
ir::Value* IRGenImpl::GenUnaryExpr(SysYParser::UnaryExpContext& unary) {
if (unary.primary()) {
return GenPrimary(*unary.primary());
}
auto it = storage_map_.find(decl);
if (it == storage_map_.end()) {
if (unary.unaryExp()) {
if (!unary.unaryOp()) {
throw std::runtime_error(FormatError("irgen", "一元表达式缺少运算符"));
}
const std::string op = unary.unaryOp()->getText();
if (op == "+") {
return GenUnaryExpr(*unary.unaryExp());
}
if (op == "-") {
auto* rhs = GenUnaryExpr(*unary.unaryExp());
return builder_.CreateBinary(ir::Opcode::Sub, builder_.CreateConstInt(0),
rhs, module_.GetContext().NextTemp());
}
throw std::runtime_error(
FormatError("irgen",
"变量声明缺少存储槽位: " + ctx->var()->ID()->getText()));
FormatError("irgen", "当前 IR 暂不支持的一元运算符: " + op));
}
return static_cast<ir::Value*>(
builder_.CreateLoad(it->second, module_.GetContext().NextTemp()));
}
throw std::runtime_error(FormatError("irgen", "当前 IR 暂不支持函数调用"));
}
std::any IRGenImpl::visitAdditiveExp(SysYParser::AdditiveExpContext* ctx) {
if (!ctx || !ctx->exp(0) || !ctx->exp(1)) {
throw std::runtime_error(FormatError("irgen", "非法加法表达式"));
ir::Value* IRGenImpl::GenPrimary(SysYParser::PrimaryContext& primary) {
if (primary.Number()) {
return builder_.CreateConstInt(std::stoi(primary.Number()->getText(), nullptr, 0));
}
if (primary.lVal()) {
if (!primary.lVal()->exp().empty()) {
throw std::runtime_error(
FormatError("irgen", "当前 IR 暂不支持数组取值表达式"));
}
auto* decl = sema_.ResolveVarUse(primary.lVal());
if (!decl || !primary.lVal()->Ident()) {
throw std::runtime_error(FormatError("irgen", "变量使用缺少语义绑定"));
}
auto it = storage_map_.find(decl);
if (it == storage_map_.end()) {
throw std::runtime_error(
FormatError("irgen",
"变量声明缺少存储槽位: " + primary.lVal()->Ident()->getText()));
}
return builder_.CreateLoad(it->second, module_.GetContext().NextTemp());
}
if (primary.exp()) {
return GenExpr(*primary.exp());
}
ir::Value* lhs = EvalExpr(*ctx->exp(0));
ir::Value* rhs = EvalExpr(*ctx->exp(1));
return static_cast<ir::Value*>(
builder_.CreateBinary(ir::Opcode::Add, lhs, rhs,
module_.GetContext().NextTemp()));
throw std::runtime_error(FormatError("irgen", "暂不支持的表达式形式"));
}

@ -19,6 +19,15 @@ void VerifyFunctionStructure(const ir::Function& func) {
}
}
SysYParser::FuncDefContext* FindMainFunc(SysYParser::CompUnitContext& cu) {
for (auto* func : cu.funcDef()) {
if (func && func->Ident() && func->Ident()->getText() == "main") {
return func;
}
}
return nullptr;
}
} // namespace
IRGenImpl::IRGenImpl(ir::Module& module, const SemanticContext& sema)
@ -27,61 +36,31 @@ IRGenImpl::IRGenImpl(ir::Module& module, const SemanticContext& sema)
func_(nullptr),
builder_(module.GetContext(), nullptr) {}
// 编译单元的 IR 生成当前只实现了最小功能:
// - Module 已在 GenerateIR 中创建,这里只负责继续生成其中的内容;
// - 当前会读取编译单元中的函数定义,并交给 visitFuncDef 生成函数 IR
//
// 当前还没有实现:
// - 多个函数定义的遍历与生成;
// - 全局变量、全局常量的 IR 生成。
std::any IRGenImpl::visitCompUnit(SysYParser::CompUnitContext* ctx) {
if (!ctx) {
throw std::runtime_error(FormatError("irgen", "缺少编译单元"));
void IRGenImpl::Gen(SysYParser::CompUnitContext& cu) {
auto* main_func = FindMainFunc(cu);
if (!main_func) {
throw std::runtime_error(FormatError("irgen", "缺少 main 定义"));
}
auto* func = ctx->funcDef();
if (!func) {
throw std::runtime_error(FormatError("irgen", "缺少函数定义"));
}
func->accept(this);
return {};
GenFuncDef(*main_func);
}
// 函数 IR 生成当前实现了:
// 1. 获取函数名;
// 2. 检查函数返回类型;
// 3. 在 Module 中创建 Function
// 4. 将 builder 插入点设置到入口基本块;
// 5. 继续生成函数体。
//
// 当前还没有实现:
// - 通用函数返回类型处理;
// - 形参列表遍历与参数类型收集;
// - FunctionType 这样的函数类型对象;
// - Argument/形式参数 IR 对象;
// - 入口块中的参数初始化逻辑。
// ...
// 因此这里目前只支持最小的“无参 int 函数”生成。
std::any IRGenImpl::visitFuncDef(SysYParser::FuncDefContext* ctx) {
if (!ctx) {
throw std::runtime_error(FormatError("irgen", "缺少函数定义"));
}
if (!ctx->blockStmt()) {
void IRGenImpl::GenFuncDef(SysYParser::FuncDefContext& func) {
if (!func.block()) {
throw std::runtime_error(FormatError("irgen", "函数体为空"));
}
if (!ctx->ID()) {
if (!func.Ident()) {
throw std::runtime_error(FormatError("irgen", "缺少函数名"));
}
if (!ctx->funcType() || !ctx->funcType()->INT()) {
throw std::runtime_error(FormatError("irgen", "当前仅支持无参 int 函数"));
if (!func.funcType() || !func.funcType()->Int()) {
throw std::runtime_error(
FormatError("irgen", "当前 IR 仅支持返回 int 的 main 函数"));
}
func_ = module_.CreateFunction(ctx->ID()->getText(), ir::Type::GetInt32Type());
func_ = module_.CreateFunction(func.Ident()->getText(), ir::Type::GetInt32Type());
builder_.SetInsertPoint(func_->GetEntry());
storage_map_.clear();
ctx->blockStmt()->accept(this);
GenBlock(*func.block());
// 语义正确性主要由 sema 保证,这里只兜底检查 IR 结构是否合法。
VerifyFunctionStructure(*func_);
return {};
}

@ -6,34 +6,18 @@
#include "ir/IR.h"
#include "utils/Log.h"
// 语句生成当前只实现了最小子集。
// 目前支持:
// - return <exp>;
//
// 还未支持:
// - 赋值语句
// - if / while 等控制流
// - 空语句、块语句嵌套分发之外的更多语句形态
std::any IRGenImpl::visitStmt(SysYParser::StmtContext* ctx) {
if (!ctx) {
throw std::runtime_error(FormatError("irgen", "缺少语句"));
}
if (ctx->returnStmt()) {
return ctx->returnStmt()->accept(this);
bool IRGenImpl::GenStmt(SysYParser::StmtContext& stmt) {
if (stmt.returnStmt()) {
GenReturnStmt(*stmt.returnStmt());
return true;
}
throw std::runtime_error(FormatError("irgen", "暂不支持的语句类型"));
}
std::any IRGenImpl::visitReturnStmt(SysYParser::ReturnStmtContext* ctx) {
if (!ctx) {
throw std::runtime_error(FormatError("irgen", "缺少 return 语句"));
}
if (!ctx->exp()) {
void IRGenImpl::GenReturnStmt(SysYParser::ReturnStmtContext& ret) {
if (!ret.exp()) {
throw std::runtime_error(FormatError("irgen", "return 缺少表达式"));
}
ir::Value* v = EvalExpr(*ctx->exp());
ir::Value* v = GenExpr(*ret.exp());
builder_.CreateRet(v);
return BlockFlow::Terminated;
}

@ -29,6 +29,9 @@ int main(int argc, char** argv) {
}
#if !COMPILER_PARSE_ONLY
if (!opts.emit_ir && !opts.emit_asm) {
return 0;
}
auto* comp_unit = dynamic_cast<SysYParser::CompUnitContext*>(antlr.tree);
if (!comp_unit) {
throw std::runtime_error(FormatError("main", "语法树根节点不是 compUnit"));

@ -1,200 +1,190 @@
#include "sem/Sema.h"
#include <any>
#include <stdexcept>
#include <string>
#include "SysYBaseVisitor.h"
#include "sem/SymbolTable.h"
#include "utils/Log.h"
namespace {
std::string GetLValueName(SysYParser::LValueContext& lvalue) {
if (!lvalue.ID()) {
throw std::runtime_error(FormatError("sema", "非法左值"));
}
return lvalue.ID()->getText();
}
class SemaVisitor final : public SysYBaseVisitor {
public:
std::any visitCompUnit(SysYParser::CompUnitContext* ctx) override {
if (!ctx) {
throw std::runtime_error(FormatError("sema", "缺少编译单元"));
SysYParser::FuncDefContext* FindMainFunc(SysYParser::CompUnitContext& comp_unit) {
SysYParser::FuncDefContext* main_func = nullptr;
for (auto* func : comp_unit.funcDef()) {
if (!func || !func->Ident()) {
continue;
}
auto* func = ctx->funcDef();
if (!func || !func->blockStmt()) {
throw std::runtime_error(FormatError("sema", "缺少 main 函数定义"));
if (func->Ident()->getText() != "main") {
continue;
}
if (!func->ID() || func->ID()->getText() != "main") {
throw std::runtime_error(FormatError("sema", "缺少 main 函数定义"));
}
func->accept(this);
if (!seen_return_) {
throw std::runtime_error(
FormatError("sema", "main 函数必须包含 return 语句"));
if (main_func) {
throw std::runtime_error(FormatError("sema", "main 函数定义重复"));
}
return {};
main_func = func;
}
return main_func;
}
std::any visitFuncDef(SysYParser::FuncDefContext* ctx) override {
if (!ctx || !ctx->blockStmt()) {
throw std::runtime_error(FormatError("sema", "缺少 main 函数定义"));
}
if (!ctx->funcType() || !ctx->funcType()->INT()) {
throw std::runtime_error(FormatError("sema", "当前仅支持 int main"));
}
const auto& items = ctx->blockStmt()->blockItem();
if (items.empty()) {
throw std::runtime_error(
FormatError("sema", "main 函数不能为空,且必须以 return 结束"));
void CheckExpr(SysYParser::ExpContext& exp, const SymbolTable& table,
SemanticContext& sema);
void CheckLVal(SysYParser::LValContext& lval, const SymbolTable& table,
SemanticContext& sema) {
if (!lval.Ident()) {
throw std::runtime_error(FormatError("sema", "左值缺少标识符"));
}
const std::string name = lval.Ident()->getText();
auto* decl = table.Lookup(name);
if (!decl) {
throw std::runtime_error(FormatError("sema", "使用了未定义的变量: " + name));
}
sema.BindVarUse(&lval, decl);
for (auto* index : lval.exp()) {
if (index) {
CheckExpr(*index, table, sema);
}
ctx->blockStmt()->accept(this);
return {};
}
}
std::any visitBlockStmt(SysYParser::BlockStmtContext* ctx) override {
if (!ctx) {
throw std::runtime_error(FormatError("sema", "缺少语句块"));
}
const auto& items = ctx->blockItem();
for (size_t i = 0; i < items.size(); ++i) {
auto* item = items[i];
if (!item) {
continue;
}
if (seen_return_) {
throw std::runtime_error(
FormatError("sema", "return 必须是 main 函数中的最后一条语句"));
}
current_item_index_ = i;
total_items_ = items.size();
item->accept(this);
}
return {};
void CheckPrimary(SysYParser::PrimaryContext& primary, const SymbolTable& table,
SemanticContext& sema) {
if (primary.Number()) {
return;
}
std::any visitBlockItem(SysYParser::BlockItemContext* ctx) override {
if (!ctx) {
throw std::runtime_error(FormatError("sema", "暂不支持的语句或声明"));
}
if (ctx->decl()) {
ctx->decl()->accept(this);
return {};
}
if (ctx->stmt()) {
ctx->stmt()->accept(this);
return {};
}
throw std::runtime_error(FormatError("sema", "暂不支持的语句或声明"));
if (primary.lVal()) {
CheckLVal(*primary.lVal(), table, sema);
return;
}
std::any visitDecl(SysYParser::DeclContext* ctx) override {
if (!ctx) {
throw std::runtime_error(FormatError("sema", "非法变量声明"));
}
if (!ctx->btype() || !ctx->btype()->INT()) {
throw std::runtime_error(FormatError("sema", "当前仅支持局部 int 变量声明"));
}
auto* var_def = ctx->varDef();
if (!var_def || !var_def->lValue()) {
throw std::runtime_error(FormatError("sema", "非法变量声明"));
}
const std::string name = GetLValueName(*var_def->lValue());
if (table_.Contains(name)) {
throw std::runtime_error(FormatError("sema", "重复定义变量: " + name));
}
if (auto* init = var_def->initValue()) {
if (!init->exp()) {
throw std::runtime_error(FormatError("sema", "当前不支持聚合初始化"));
}
init->exp()->accept(this);
}
table_.Add(name, var_def);
return {};
if (primary.exp()) {
CheckExpr(*primary.exp(), table, sema);
return;
}
std::any visitStmt(SysYParser::StmtContext* ctx) override {
if (!ctx || !ctx->returnStmt()) {
throw std::runtime_error(FormatError("sema", "暂不支持的语句或声明"));
}
ctx->returnStmt()->accept(this);
return {};
throw std::runtime_error(FormatError("sema", "暂不支持的表达式形式"));
}
void CheckUnaryExpr(SysYParser::UnaryExpContext& unary, const SymbolTable& table,
SemanticContext& sema) {
if (unary.primary()) {
CheckPrimary(*unary.primary(), table, sema);
return;
}
std::any visitReturnStmt(SysYParser::ReturnStmtContext* ctx) override {
if (!ctx || !ctx->exp()) {
throw std::runtime_error(FormatError("sema", "return 缺少表达式"));
}
ctx->exp()->accept(this);
seen_return_ = true;
if (current_item_index_ + 1 != total_items_) {
throw std::runtime_error(
FormatError("sema", "return 必须是 main 函数中的最后一条语句"));
}
return {};
if (unary.unaryExp()) {
CheckUnaryExpr(*unary.unaryExp(), table, sema);
return;
}
std::any visitParenExp(SysYParser::ParenExpContext* ctx) override {
if (!ctx || !ctx->exp()) {
throw std::runtime_error(FormatError("sema", "非法括号表达式"));
if (unary.funcRParams()) {
for (auto* arg : unary.funcRParams()->exp()) {
if (arg) {
CheckExpr(*arg, table, sema);
}
}
ctx->exp()->accept(this);
return {};
}
}
std::any visitVarExp(SysYParser::VarExpContext* ctx) override {
if (!ctx || !ctx->var()) {
throw std::runtime_error(FormatError("sema", "非法变量表达式"));
void CheckMulExpr(SysYParser::MulExpContext& mul, const SymbolTable& table,
SemanticContext& sema) {
for (auto* unary : mul.unaryExp()) {
if (unary) {
CheckUnaryExpr(*unary, table, sema);
}
ctx->var()->accept(this);
return {};
}
}
std::any visitNumberExp(SysYParser::NumberExpContext* ctx) override {
if (!ctx || !ctx->number() || !ctx->number()->ILITERAL()) {
throw std::runtime_error(FormatError("sema", "当前仅支持整数字面量"));
void CheckAddExpr(SysYParser::AddExpContext& add, const SymbolTable& table,
SemanticContext& sema) {
for (auto* mul : add.mulExp()) {
if (mul) {
CheckMulExpr(*mul, table, sema);
}
return {};
}
}
std::any visitAdditiveExp(SysYParser::AdditiveExpContext* ctx) override {
if (!ctx || !ctx->exp(0) || !ctx->exp(1)) {
throw std::runtime_error(FormatError("sema", "暂不支持的表达式形式"));
}
ctx->exp(0)->accept(this);
ctx->exp(1)->accept(this);
return {};
void CheckExpr(SysYParser::ExpContext& exp, const SymbolTable& table,
SemanticContext& sema) {
if (!exp.addExp()) {
throw std::runtime_error(FormatError("sema", "非法表达式"));
}
CheckAddExpr(*exp.addExp(), table, sema);
}
} // namespace
SemanticContext RunSema(SysYParser::CompUnitContext& comp_unit) {
auto* func = FindMainFunc(comp_unit);
if (!func || !func->block()) {
throw std::runtime_error(FormatError("sema", "缺少 main 函数定义"));
}
SymbolTable table;
SemanticContext sema;
bool seen_return = false;
const auto& items = func->block()->blockItem();
if (items.empty()) {
throw std::runtime_error(
FormatError("sema", "main 函数不能为空,且必须以 return 结束"));
}
std::any visitVar(SysYParser::VarContext* ctx) override {
if (!ctx || !ctx->ID()) {
throw std::runtime_error(FormatError("sema", "非法变量引用"));
for (size_t i = 0; i < items.size(); ++i) {
auto* item = items[i];
if (!item) {
continue;
}
const std::string name = ctx->ID()->getText();
auto* decl = table_.Lookup(name);
if (!decl) {
throw std::runtime_error(FormatError("sema", "使用了未定义的变量: " + name));
if (seen_return) {
throw std::runtime_error(
FormatError("sema", "return 必须是 main 函数中的最后一条语句"));
}
if (auto* decl = item->decl() ? item->decl()->varDecl() : nullptr) {
for (auto* def : decl->varDef()) {
if (!def || !def->Ident()) {
continue;
}
const std::string name = def->Ident()->getText();
if (table.Contains(name)) {
throw std::runtime_error(FormatError("sema", "重复定义变量: " + name));
}
if (!def->constExp().empty()) {
throw std::runtime_error(
FormatError("sema", "当前 IR 仅支持标量局部变量"));
}
if (auto* init = def->initVal()) {
if (!init->exp()) {
throw std::runtime_error(
FormatError("sema", "当前 IR 仅支持标量表达式初始化"));
}
CheckExpr(*init->exp(), table, sema);
}
table.Add(name, def);
}
continue;
}
sema_.BindVarUse(ctx, decl);
return {};
}
SemanticContext TakeSemanticContext() { return std::move(sema_); }
if (auto* stmt = item->stmt(); stmt && stmt->returnStmt()) {
auto* ret = stmt->returnStmt();
if (!ret->exp()) {
throw std::runtime_error(FormatError("sema", "main 函数必须返回一个值"));
}
CheckExpr(*ret->exp(), table, sema);
seen_return = true;
if (i + 1 != items.size()) {
throw std::runtime_error(
FormatError("sema", "return 必须是 main 函数中的最后一条语句"));
}
continue;
}
private:
SymbolTable table_;
SemanticContext sema_;
bool seen_return_ = false;
size_t current_item_index_ = 0;
size_t total_items_ = 0;
};
throw std::runtime_error(FormatError("sema", "暂不支持的语句或声明"));
}
} // namespace
if (!seen_return) {
throw std::runtime_error(FormatError("sema", "main 函数必须包含 return 语句"));
}
SemanticContext RunSema(SysYParser::CompUnitContext& comp_unit) {
SemaVisitor visitor;
comp_unit.accept(&visitor);
return visitor.TakeSemanticContext();
return sema;
}

Loading…
Cancel
Save