From 15cbb3743576210dce56888c4b7878be1cc522d1 Mon Sep 17 00:00:00 2001 From: hp <1278334840@qq.com> Date: Mon, 23 Mar 2026 16:24:59 +0800 Subject: [PATCH] lab1 --- build.sh | 10 +++ src/antlr4/SysY.g4 | 158 ++++++++++++++++++++++++++++++++++----------- 2 files changed, 132 insertions(+), 36 deletions(-) create mode 100644 build.sh diff --git a/build.sh b/build.sh new file mode 100644 index 0000000..a31b154 --- /dev/null +++ b/build.sh @@ -0,0 +1,10 @@ +#!/bin/bash +mkdir -p build/generated/antlr4 +java -jar third_party/antlr-4.13.2-complete.jar \ + -Dlanguage=Cpp \ + -visitor -no-listener \ + -Xexact-output-dir \ + -o build/generated/antlr4 \ + src/antlr4/SysY.g4 +cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DCOMPILER_PARSE_ONLY=ON +cmake --build build -j "$(nproc)" diff --git a/src/antlr4/SysY.g4 b/src/antlr4/SysY.g4 index c8f4794..0907727 100644 --- a/src/antlr4/SysY.g4 +++ b/src/antlr4/SysY.g4 @@ -1,41 +1,71 @@ -// SysY 完整语法文法 -// 支持完整的 SysY 语言子集,包括: -// - int/float/void 类型 -// - 全局/局部变量和常量声明 -// - 数组声明和初始化(一维和多维) -// - 函数定义和调用 -// - if-else, while, break, continue -// - 各种运算符(算术、关系、逻辑、一元) -// - 库函数调用 - -// SysY 子集语法:支持形如 -// int main() { int a = 1; int b = 2; return a + b; } -// 的最小返回表达式编译。 - -// 后续需要自行添加 - grammar SysY; /*===-------------------------------------------===*/ /* Lexer rules */ /*===-------------------------------------------===*/ +CONST: 'const'; INT: 'int'; +FLOAT: 'float'; +VOID: 'void'; +IF: 'if'; +ELSE: 'else'; +WHILE: 'while'; +BREAK: 'break'; +CONTINUE: 'continue'; RETURN: 'return'; ASSIGN: '='; ADD: '+'; +SUB: '-'; +MUL: '*'; +DIV: '/'; +MOD: '%'; +EQ: '=='; +NEQ: '!='; +LT: '<'; +GT: '>'; +LE: '<='; +GE: '>='; +NOT: '!'; +AND: '&&'; +OR: '||'; LPAREN: '('; RPAREN: ')'; +LBRACKET: '['; +RBRACKET: ']'; LBRACE: '{'; RBRACE: '}'; +COMMA: ','; SEMICOLON: ';'; ID: [a-zA-Z_][a-zA-Z_0-9]*; -ILITERAL: [0-9]+; -WS: [ \t\r\n] -> skip; +ILITERAL: DEC_LIT | OCT_LIT | HEX_LIT; +fragment DEC_LIT: [1-9][0-9]* | '0'; +fragment OCT_LIT: '0'[0-7]+; +fragment HEX_LIT: ('0x' | '0X') [0-9a-fA-F]+; + +FLITERAL: DEC_FLOAT_LIT | HEX_FLOAT_LIT; +fragment DEC_FLOAT_LIT + : [0-9]+ '.' [0-9]* EXPONENT? + | '.' [0-9]+ EXPONENT? + | [0-9]+ EXPONENT + ; +fragment EXPONENT: ('e'|'E') ('+'|'-')? [0-9]+; + +fragment HEX_FLOAT_LIT + : ('0x'|'0X') HEX_MANTISSA HEX_EXPONENT + ; +fragment HEX_MANTISSA + : [0-9a-fA-F]+ '.' [0-9a-fA-F]* + | '.' [0-9a-fA-F]+ + | [0-9a-fA-F]+ + ; +fragment HEX_EXPONENT: ('p'|'P') ('+'|'-')? [0-9]+; + +WS: [ \t\r\n]+ -> skip; LINECOMMENT: '//' ~[\r\n]* -> skip; BLOCKCOMMENT: '/*' .*? '*/' -> skip; @@ -44,31 +74,62 @@ BLOCKCOMMENT: '/*' .*? '*/' -> skip; /*===-------------------------------------------===*/ compUnit - : funcDef EOF + : (decl | funcDef)+ EOF ; decl - : btype varDef SEMICOLON + : constDecl + | varDecl + ; + +constDecl + : CONST btype constDef (COMMA constDef)* SEMICOLON ; btype : INT + | FLOAT + ; + +constDef + : ID (LBRACKET constExp RBRACKET)* ASSIGN constInitVal + ; + +constInitVal + : constExp + | LBRACE (constInitVal (COMMA constInitVal)*)? RBRACE + ; + +varDecl + : btype varDef (COMMA varDef)* SEMICOLON ; varDef - : lValue (ASSIGN initValue)? + : ID (LBRACKET constExp RBRACKET)* + | ID (LBRACKET constExp RBRACKET)* ASSIGN initVal ; -initValue +initVal : exp + | LBRACE (initVal (COMMA initVal)*)? RBRACE ; funcDef - : funcType ID LPAREN RPAREN blockStmt + : funcType ID LPAREN funcFParams? RPAREN blockStmt ; funcType - : INT + : VOID + | INT + | FLOAT + ; + +funcFParams + : funcFParam (COMMA funcFParam)* + ; + +funcFParam + : btype ID (LBRACKET RBRACKET (LBRACKET exp RBRACKET)*)? ; blockStmt @@ -81,28 +142,53 @@ blockItem ; stmt - : returnStmt - ; - -returnStmt - : RETURN exp SEMICOLON + : lValue ASSIGN exp SEMICOLON # assignStmt + | exp? SEMICOLON # exprStmt + | blockStmt # blockStmtNode + | IF LPAREN cond RPAREN stmt (ELSE stmt)? # ifStmt + | WHILE LPAREN cond RPAREN stmt # whileStmt + | BREAK SEMICOLON # breakStmt + | CONTINUE SEMICOLON # continueStmt + | RETURN exp? SEMICOLON # returnStmt ; exp - : LPAREN exp RPAREN # parenExp - | var # varExp - | number # numberExp - | exp ADD exp # additiveExp + : LPAREN exp RPAREN # parenExp + | lValue # lvalExp + | number # numberExp + | ID LPAREN funcRParams? RPAREN # funcCallExp + | unaryOp exp # unaryOpExp + | exp (MUL | DIV | MOD) exp # mulExp + | exp (ADD | SUB) exp # addExp + | exp (LT | GT | LE | GE) exp # relExp + | exp (EQ | NEQ) exp # eqExp + | exp AND exp # lAndExp + | exp OR exp # lOrExp ; -var - : ID +cond + : exp ; lValue - : ID + : ID (LBRACKET exp RBRACKET)* ; number : ILITERAL + | FLITERAL + ; + +unaryOp + : ADD + | SUB + | NOT + ; + +funcRParams + : exp (COMMA exp)* + ; + +constExp + : exp ;