From 3d0361e64827893ffaa9c49e54b289508b28e356 Mon Sep 17 00:00:00 2001 From: olivame Date: Mon, 23 Mar 2026 15:26:32 +0800 Subject: [PATCH] feat(frontend): complete lab1 SysY grammar support --- scripts/test_lab1.sh | 32 +++++++ src/antlr4/SysY.g4 | 219 +++++++++++++++++++++++++++++++++++++------ 2 files changed, 223 insertions(+), 28 deletions(-) create mode 100755 scripts/test_lab1.sh diff --git a/scripts/test_lab1.sh b/scripts/test_lab1.sh new file mode 100755 index 0000000..619ebb1 --- /dev/null +++ b/scripts/test_lab1.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash + +set -euo pipefail + +case_dir="${1:-test/test_case}" + +if [[ ! -d "$case_dir" ]]; then + echo "测试目录不存在: $case_dir" >&2 + exit 1 +fi + +compiler="./build/bin/compiler" +if [[ ! -x "$compiler" ]]; then + echo "未找到编译器: $compiler ,请先构建 parse-only 版本。" >&2 + exit 1 +fi + +mapfile -t cases < <(find "$case_dir" -name '*.sy' | sort) +if [[ ${#cases[@]} -eq 0 ]]; then + echo "未找到任何 .sy 测试文件: $case_dir" >&2 + exit 1 +fi + +for f in "${cases[@]}"; do + echo "TEST $f" + "$compiler" --emit-parse-tree "$f" >/dev/null || { + echo "FAIL $f" >&2 + exit 1 + } +done + +echo "ALL_PARSE_OK (${#cases[@]} cases)" diff --git a/src/antlr4/SysY.g4 b/src/antlr4/SysY.g4 index 263aeef..5f6b0c8 100644 --- a/src/antlr4/SysY.g4 +++ b/src/antlr4/SysY.g4 @@ -1,67 +1,155 @@ -// SysY 子集语法:支持形如 -// int main() { int a = 1; int b = 2; return a + b; } -// 的最小返回表达式编译。 - -// 后续需要自行添加 - grammar SysY; /*===-------------------------------------------===*/ /* Lexer rules */ /*===-------------------------------------------===*/ +CONST: 'const'; INT: 'int'; +FLOAT: 'float'; +VOID: 'void'; +IF: 'if'; +ELSE: 'else'; +WHILE: 'while'; +BREAK: 'break'; +CONTINUE: 'continue'; RETURN: 'return'; +LE: '<='; +GE: '>='; +EQ: '=='; +NE: '!='; +AND: '&&'; +OR: '||'; + ASSIGN: '='; +LT: '<'; +GT: '>'; ADD: '+'; +SUB: '-'; +MUL: '*'; +DIV: '/'; +MOD: '%'; +NOT: '!'; LPAREN: '('; RPAREN: ')'; +LBRACK: '['; +RBRACK: ']'; LBRACE: '{'; RBRACE: '}'; +COMMA: ','; SEMICOLON: ';'; ID: [a-zA-Z_][a-zA-Z_0-9]*; -ILITERAL: [0-9]+; + +HEX_FLOAT_LITERAL + : ('0x' | '0X') HEX_DIGIT* '.' HEX_DIGIT+ BINARY_EXPONENT + | ('0x' | '0X') HEX_DIGIT+ '.' HEX_DIGIT* BINARY_EXPONENT + | ('0x' | '0X') HEX_DIGIT+ BINARY_EXPONENT + ; + +DEC_FLOAT_LITERAL + : DEC_DIGIT+ '.' DEC_DIGIT* DEC_EXPONENT? + | '.' DEC_DIGIT+ DEC_EXPONENT? + | DEC_DIGIT+ DEC_EXPONENT + ; + +HEX_INT_LITERAL + : ('0x' | '0X') HEX_DIGIT+ + ; + +OCT_INT_LITERAL + : '0' OCT_DIGIT+ + ; + +DEC_INT_LITERAL + : '0' + | [1-9] DEC_DIGIT* + ; WS: [ \t\r\n] -> skip; LINECOMMENT: '//' ~[\r\n]* -> skip; BLOCKCOMMENT: '/*' .*? '*/' -> skip; +fragment DEC_DIGIT: [0-9]; +fragment OCT_DIGIT: [0-7]; +fragment HEX_DIGIT: [0-9a-fA-F]; +fragment DEC_EXPONENT: [eE] [+-]? DEC_DIGIT+; +fragment BINARY_EXPONENT: [pP] [+-]? DEC_DIGIT+; + /*===-------------------------------------------===*/ /* Syntax rules */ /*===-------------------------------------------===*/ compUnit - : funcDef EOF + : topLevelItem* EOF + ; + +topLevelItem + : decl + | funcDef ; decl - : btype varDef SEMICOLON + : constDecl + | varDecl + ; + +constDecl + : CONST bType constDef (COMMA constDef)* SEMICOLON ; -btype +varDecl + : bType varDef (COMMA varDef)* SEMICOLON + ; + +bType : INT + | FLOAT + ; + +constDef + : ID constIndex* ASSIGN constInitVal ; varDef - : lValue (ASSIGN initValue)? + : ID constIndex* (ASSIGN initVal)? + ; + +constIndex + : LBRACK constExp RBRACK ; -initValue +constInitVal + : constExp + | LBRACE (constInitVal (COMMA constInitVal)*)? RBRACE + ; + +initVal : exp + | LBRACE (initVal (COMMA initVal)*)? RBRACE ; funcDef - : funcType ID LPAREN RPAREN blockStmt + : funcType ID LPAREN funcFParams? RPAREN block ; funcType - : INT + : VOID + | INT + | FLOAT + ; + +funcFParams + : funcFParam (COMMA funcFParam)* + ; + +funcFParam + : bType ID (LBRACK RBRACK (LBRACK exp RBRACK)*)? ; -blockStmt +block : LBRACE blockItem* RBRACE ; @@ -71,28 +159,103 @@ blockItem ; stmt - : returnStmt + : lVal ASSIGN exp SEMICOLON + | exp? SEMICOLON + | block + | IF LPAREN cond RPAREN stmt (ELSE stmt)? + | WHILE LPAREN cond RPAREN stmt + | BREAK SEMICOLON + | CONTINUE SEMICOLON + | RETURN exp? SEMICOLON ; -returnStmt - : RETURN exp SEMICOLON +exp + : addExp ; -exp - : LPAREN exp RPAREN # parenExp - | var # varExp - | number # numberExp - | exp ADD exp # additiveExp +cond + : lOrExp ; -var - : ID +lVal + : ID (LBRACK exp RBRACK)* ; -lValue - : ID +primaryExp + : LPAREN exp RPAREN + | lVal + | number ; number - : ILITERAL + : intConst + | floatConst + ; + +intConst + : DEC_INT_LITERAL + | OCT_INT_LITERAL + | HEX_INT_LITERAL + ; + +floatConst + : DEC_FLOAT_LITERAL + | HEX_FLOAT_LITERAL + ; + +unaryExp + : primaryExp + | ID LPAREN funcRParams? RPAREN + | unaryOp unaryExp + ; + +unaryOp + : ADD + | SUB + | NOT + ; + +funcRParams + : exp (COMMA exp)* + ; + +mulExp + : unaryExp + | mulExp MUL unaryExp + | mulExp DIV unaryExp + | mulExp MOD unaryExp + ; + +addExp + : mulExp + | addExp ADD mulExp + | addExp SUB mulExp + ; + +relExp + : addExp + | relExp LT addExp + | relExp GT addExp + | relExp LE addExp + | relExp GE addExp + ; + +eqExp + : relExp + | eqExp EQ relExp + | eqExp NE relExp + ; + +lAndExp + : eqExp + | lAndExp AND eqExp + ; + +lOrExp + : lAndExp + | lOrExp OR lAndExp + ; + +constExp + : addExp ;