diff --git a/.gitignore b/.gitignore index 1ee33a1..5d98489 100644 --- a/.gitignore +++ b/.gitignore @@ -68,3 +68,8 @@ Thumbs.db # Project outputs # ========================= test/test_result/ + + +# Added by cargo + +/target diff --git a/CMakeLists.txt b/CMakeLists.txt index 74dcb27..3ac5b22 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,6 +2,8 @@ cmake_minimum_required(VERSION 3.20) project(compiler LANGUAGES C CXX) +find_package(Java REQUIRED COMPONENTS Runtime) + # 统一 C++ 标准(按实验环境可调整) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -39,6 +41,11 @@ endif() option(COMPILER_PARSE_ONLY "Build only the frontend parser pipeline" OFF) +set(ANTLR4_JAR "${PROJECT_SOURCE_DIR}/third_party/antlr-4.13.2-complete.jar") +if(NOT EXISTS "${ANTLR4_JAR}") + message(FATAL_ERROR "ANTLR jar not found: ${ANTLR4_JAR}") +endif() + # 使用仓库内 third_party 提供的 ANTLR4 C++ runtime(较新版本) # third_party 目录结构以仓库为准:runtime 源码位于 third_party/antlr4-runtime-4.13.2/runtime/src set(ANTLR4_RUNTIME_SRC_DIR "${PROJECT_SOURCE_DIR}/third_party/antlr4-runtime-4.13.2/runtime/src") diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..bc78ab8 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "nudt-compiler-cpp" +version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..174e3dc --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "nudt-compiler-cpp" +version = "0.1.0" +edition = "2024" + +[dependencies] diff --git a/src/antlr4/SysY.g4 b/src/antlr4/SysY.g4 new file mode 100644 index 0000000..3027924 --- /dev/null +++ b/src/antlr4/SysY.g4 @@ -0,0 +1,228 @@ +grammar SysY; + +/*===-------------------------------------------===*/ +/* Lexer rules */ +/*===-------------------------------------------===*/ + +CONST: 'const'; +INT: 'int'; +FLOAT: 'float'; +VOID: 'void'; +IF: 'if'; +ELSE: 'else'; +WHILE: 'while'; +BREAK: 'break'; +CONTINUE: 'continue'; +RETURN: 'return'; + +ASSIGN: '='; +EQ: '=='; +NE: '!='; +LT: '<'; +GT: '>'; +LE: '<='; +GE: '>='; + +ADD: '+'; +SUB: '-'; +MUL: '*'; +DIV: '/'; +MOD: '%'; +NOT: '!'; +LAND: '&&'; +LOR: '||'; + +LPAREN: '('; +RPAREN: ')'; +LBRACE: '{'; +RBRACE: '}'; +LBRACK: '['; +RBRACK: ']'; +COMMA: ','; +SEMICOLON: ';'; + +FLOAT_CONST + : DEC_FLOAT_CONST + | HEX_FLOAT_CONST + ; + +INT_CONST + : HEX_PREFIX HEX_DIGIT+ + | '0' [0-7]+ + | '0' + | [1-9] DIGIT* + ; + +ID: [a-zA-Z_][a-zA-Z_0-9]*; + +WS: [ \t\r\n] -> skip; +LINECOMMENT: '//' ~[\r\n]* -> skip; +BLOCKCOMMENT: '/*' .*? '*/' -> skip; + +fragment DEC_FLOAT_CONST + : DIGIT+ '.' DIGIT* EXP_PART? + | '.' DIGIT+ EXP_PART? + | DIGIT+ EXP_PART + ; + +fragment HEX_FLOAT_CONST + : HEX_PREFIX HEX_DIGIT+ '.' HEX_DIGIT* BIN_EXP_PART + | HEX_PREFIX '.' HEX_DIGIT+ BIN_EXP_PART + | HEX_PREFIX HEX_DIGIT+ BIN_EXP_PART + ; + +fragment EXP_PART: [eE] [+-]? DIGIT+; +fragment BIN_EXP_PART: [pP] [+-]? DIGIT+; +fragment HEX_PREFIX: '0' [xX]; +fragment HEX_DIGIT: [0-9a-fA-F]; +fragment DIGIT: [0-9]; + +/*===-------------------------------------------===*/ +/* Syntax rules */ +/*===-------------------------------------------===*/ + +compUnit + : (decl | funcDef)+ EOF + ; + +decl + : constDecl + | varDecl + ; + +constDecl + : CONST bType constDef (COMMA constDef)* SEMICOLON + ; + +varDecl + : bType varDef (COMMA varDef)* SEMICOLON + ; + +bType + : INT + | FLOAT + ; + +constDef + : ID (LBRACK constExp RBRACK)* ASSIGN constInitVal + ; + +constInitVal + : constExp + | LBRACE (constInitVal (COMMA constInitVal)*)? RBRACE + ; + +varDef + : ID (LBRACK constExp RBRACK)* (ASSIGN initVal)? + ; + +initVal + : exp + | LBRACE (initVal (COMMA initVal)*)? RBRACE + ; + +funcDef + : funcType ID LPAREN funcFParams? RPAREN block + ; + +funcType + : VOID + | INT + | FLOAT + ; + +funcFParams + : funcFParam (COMMA funcFParam)* + ; + +funcFParam + : bType ID (LBRACK RBRACK (LBRACK exp RBRACK)*)? + ; + +block + : LBRACE blockItem* RBRACE + ; + +blockItem + : decl + | stmt + ; + +stmt + : lVal ASSIGN exp SEMICOLON + | exp? SEMICOLON + | block + | IF LPAREN cond RPAREN stmt (ELSE stmt)? + | WHILE LPAREN cond RPAREN stmt + | BREAK SEMICOLON + | CONTINUE SEMICOLON + | RETURN exp? SEMICOLON + ; + +exp + : addExp + ; + +cond + : lOrExp + ; + +lVal + : ID (LBRACK exp RBRACK)* + ; + +primaryExp + : LPAREN exp RPAREN + | lVal + | number + ; + +number + : INT_CONST + | FLOAT_CONST + ; + +unaryExp + : primaryExp + | ID LPAREN funcRParams? RPAREN + | unaryOp unaryExp + ; + +unaryOp + : ADD + | SUB + | NOT + ; + +funcRParams + : exp (COMMA exp)* + ; + +mulExp + : unaryExp ((MUL | DIV | MOD) unaryExp)* + ; + +addExp + : mulExp ((ADD | SUB) mulExp)* + ; + +relExp + : addExp ((LT | GT | LE | GE) addExp)* + ; + +eqExp + : relExp ((EQ | NE) relExp)* + ; + +lAndExp + : eqExp (LAND eqExp)* + ; + +lOrExp + : lAndExp (LOR lAndExp)* + ; + +constExp + : addExp + ; + diff --git a/src/frontend/CMakeLists.txt b/src/frontend/CMakeLists.txt index 524fcd6..de5b7f2 100644 --- a/src/frontend/CMakeLists.txt +++ b/src/frontend/CMakeLists.txt @@ -3,15 +3,44 @@ add_library(frontend STATIC SyntaxTreePrinter.cpp ) +set(ANTLR4_GRAMMAR "${PROJECT_SOURCE_DIR}/src/antlr4/SysY.g4") +set(ANTLR4_GENERATED_FILES + "${ANTLR4_GENERATED_DIR}/SysYLexer.cpp" + "${ANTLR4_GENERATED_DIR}/SysYLexer.h" + "${ANTLR4_GENERATED_DIR}/SysYLexer.interp" + "${ANTLR4_GENERATED_DIR}/SysYLexer.tokens" + "${ANTLR4_GENERATED_DIR}/SysYParser.cpp" + "${ANTLR4_GENERATED_DIR}/SysYParser.h" + "${ANTLR4_GENERATED_DIR}/SysY.interp" + "${ANTLR4_GENERATED_DIR}/SysY.tokens" + "${ANTLR4_GENERATED_DIR}/SysYBaseVisitor.h" + "${ANTLR4_GENERATED_DIR}/SysYVisitor.h" +) + +add_custom_command( + OUTPUT ${ANTLR4_GENERATED_FILES} + COMMAND ${CMAKE_COMMAND} -E make_directory "${ANTLR4_GENERATED_DIR}" + COMMAND ${Java_JAVA_EXECUTABLE} -jar "${ANTLR4_JAR}" + -Dlanguage=Cpp + -visitor + -no-listener + -Xexact-output-dir + -o "${ANTLR4_GENERATED_DIR}" + "${ANTLR4_GRAMMAR}" + DEPENDS "${ANTLR4_GRAMMAR}" "${ANTLR4_JAR}" + COMMENT "Generating ANTLR4 parser sources from SysY.g4" + VERBATIM +) + +add_custom_target(antlr4_generated DEPENDS ${ANTLR4_GENERATED_FILES}) +add_dependencies(frontend antlr4_generated) + +target_sources(frontend PRIVATE + "${ANTLR4_GENERATED_DIR}/SysYLexer.cpp" + "${ANTLR4_GENERATED_DIR}/SysYParser.cpp" +) + target_link_libraries(frontend PUBLIC build_options ${ANTLR4_RUNTIME_TARGET} ) - -# 自动纳入构建目录中的 Lexer/Parser 生成源码(若存在) -file(GLOB_RECURSE ANTLR4_GENERATED_SOURCES CONFIGURE_DEPENDS - "${ANTLR4_GENERATED_DIR}/*.cpp" -) -if(ANTLR4_GENERATED_SOURCES) - target_sources(frontend PRIVATE ${ANTLR4_GENERATED_SOURCES}) -endif() diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..e7a11a9 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + println!("Hello, world!"); +}