diff --git a/src/antlr4/SysY.g4 b/src/antlr4/SysY.g4 index 263aeef..17cd890 100644 --- a/src/antlr4/SysY.g4 +++ b/src/antlr4/SysY.g4 @@ -1,68 +1,172 @@ -// SysY 子集语法:支持形如 -// int main() { int a = 1; int b = 2; return a + b; } -// 的最小返回表达式编译。 +// SysY 子集语法 +grammar SysY; -// 后续需要自行添加 +//----词法规则(优先级从高到低)----// +//keywords +Void : 'void'; +Int : 'int'; +Float : 'float'; +Const : 'const'; +If : 'if'; +Else : 'else'; +While : 'while'; +Break : 'break'; +Continue: 'continue'; +Return : 'return'; -grammar SysY; +//Two-character operator (long preferred) +LeOp : '<='; +GeOp : '>='; +EqOp : '=='; +NeOp : '!='; +AndOp : '&&'; +OrOp : '||'; -/*===-------------------------------------------===*/ -/* Lexer rules */ -/*===-------------------------------------------===*/ +//single character operators +AddOp : '+'; +SubOp : '-'; +MulOp : '*'; +DivOp : '/'; +QuoOp : '%'; +NotOp : '!'; +LOp : '<'; +GOp : '>'; +Assign : '='; -INT: 'int'; -RETURN: 'return'; +//Separator +Semi : ';'; +Comma : ','; +L_PAREN : '('; +R_PAREN : ')'; +L_BRACK : '['; +R_BRACK : ']'; +L_BRACE : '{'; +R_BRACE : '}'; -ASSIGN: '='; -ADD: '+'; +//const numeric classes +//Number... change +// float first +// 16进制float first +HEX_FLOAT + : '0' [xX]( + // 形式1: 0x1.921fb6p+1 (有小数点和指数) + HEX_DIGIT* '.' HEX_DIGIT+ [pP] [+-]? DECIMAL_DIGIT+ + | // 形式2: 0x1p+1 (没有小数点,只有指数) + HEX_DIGIT+ [pP] [+-]? DECIMAL_DIGIT+ + | // 形式3: 0x.AP-3 (小数点开头) + '.' HEX_DIGIT+ [pP] [+-]? DECIMAL_DIGIT+ + ) + ; +fragment HEX_DIGIT: [0-9a-fA-F]; +fragment DECIMAL_DIGIT: [0-9]; -LPAREN: '('; -RPAREN: ')'; -LBRACE: '{'; -RBRACE: '}'; -SEMICOLON: ';'; +// 10进制float +DEC_FLOAT + : [0-9]+ '.' [0-9]* EXP? //1.2/1./1.2e10/... + | '.' [0-9]+ EXP? //.1/.1e2 + | [0-9]+ EXP //1e2/1e-2 + ; +fragment EXP: [eE] [+-]? [0-9]+; -ID: [a-zA-Z_][a-zA-Z_0-9]*; -ILITERAL: [0-9]+; +HEX_INT: '0' [xX] [0-9a-fA-F]+; //16进制 +OCTAL_INT: '0' [0-7]*; //8进制 +DECIMAL_INT: [1-9][0-9]*; //10进制 +ZERO: '0'; //单独0 -WS: [ \t\r\n] -> skip; -LINECOMMENT: '//' ~[\r\n]* -> skip; -BLOCKCOMMENT: '/*' .*? '*/' -> skip; +// TODO: 后续完善IR后,移除Number兼容规则 +Number + : HEX_FLOAT + | DEC_FLOAT + | HEX_INT + | OCTAL_INT + | DECIMAL_INT + | ZERO + ; + +// 标识符(放最后) +Ident + : [a-zA-Z_][a-zA-Z_0-9]* + ; -/*===-------------------------------------------===*/ -/* Syntax rules */ -/*===-------------------------------------------===*/ +// 注释和空白 +WS + : [ \t\r\n]+ -> skip + ; + +COMMENT + : '//' ~[\r\n]* -> skip + ; + +BLOCK_COMMENT + : '/*' .*? '*/' -> skip + ; +//----语法规则----// compUnit - : funcDef EOF + : (funcDef|decl|program) EOF + ; + +program + : (decl|funcDef)+ ; decl - : btype varDef SEMICOLON + : varDecl + | constDecl + ; + +constDecl + : Const bType constDef (Comma constDef)* Semi + ; + +bType + : Int + | Float ; -btype - : INT +constDef + : Ident (L_BRACK constExp R_BRACK)* Assign constInitVal + ; + +constInitVal + : constExp + | L_BRACE (constInitVal (Comma constInitVal)*)? R_BRACE + ; + +varDecl + : bType varDef (Comma varDef)* Semi + | Int Ident (Assign exp)? Semi ; varDef - : lValue (ASSIGN initValue)? + : Ident (L_BRACK constExp R_BRACK)* (Assign initVal)? ; -initValue +initVal : exp + | L_BRACE (initVal (Comma initVal)*)? R_BRACE ; funcDef - : funcType ID LPAREN RPAREN blockStmt + : funcType Ident L_PAREN (funcFParams)? R_PAREN block ; funcType - : INT + : Void + | Int + | Float + ; + +funcFParams + : funcFParam (Comma funcFParam)* + ; + +funcFParam + : bType Ident (L_BRACK R_BRACK (L_BRACK exp R_BRACK)*)? ; -blockStmt - : LBRACE blockItem* RBRACE +block + : L_BRACE (blockItem)* R_BRACE ; blockItem @@ -71,28 +175,92 @@ blockItem ; stmt - : returnStmt + : lVal Assign exp Semi + | (exp)? Semi + | block + | If L_PAREN cond R_PAREN stmt (Else stmt)? + | While L_PAREN cond R_PAREN stmt + | Break Semi + | Continue Semi + | returnStmt ; returnStmt - : RETURN exp SEMICOLON + : Return (exp)? Semi ; exp - : LPAREN exp RPAREN # parenExp - | var # varExp - | number # numberExp - | exp ADD exp # additiveExp + : addExp + ; + +cond + : lOrExp + ; + +lVal + : Ident (L_BRACK exp R_BRACK)* + ; + +primary + : L_PAREN exp R_PAREN + | lVal + | Number // 让旧代码能用 + | HEX_FLOAT + | DEC_FLOAT + | HEX_INT + | OCTAL_INT + | DECIMAL_INT + | ZERO + | Ident + ; + +unaryExp + : primary + | Ident L_PAREN (funcRParams)? R_PAREN + | unaryOp unaryExp ; -var - : ID +unaryOp + : AddOp + | SubOp + | NotOp ; -lValue - : ID +funcRParams + : exp (Comma exp)* ; -number - : ILITERAL +mulExp + : unaryExp + | mulExp (MulOp|DivOp|QuoOp) unaryExp ; + +addExp + : mulExp + | addExp (AddOp|SubOp) mulExp + | primary (AddOp primary)* + ; + +relExp + : addExp + | relExp (LOp|GOp|LeOp|GeOp) addExp + ; + +eqExp + : relExp + | eqExp (EqOp|NeOp) relExp + ; + +lAndExp + : eqExp + | lAndExp AndOp eqExp + ; + +lOrExp + : lAndExp + | lOrExp OrOp lAndExp + ; + +constExp + : addExp + ; \ No newline at end of file