From 63953bd4ae2a09a4e79ad4cc68f480ca6ec5295b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=AF=95=E6=81=A9=E5=87=AF?= <15609889+biankai001@user.noreply.gitee.com> Date: Mon, 23 Mar 2026 20:47:10 +0800 Subject: [PATCH 1/2] committed --- src/antlr4/SysY.g4 | 185 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 157 insertions(+), 28 deletions(-) diff --git a/src/antlr4/SysY.g4 b/src/antlr4/SysY.g4 index 263aeef..d3633df 100644 --- a/src/antlr4/SysY.g4 +++ b/src/antlr4/SysY.g4 @@ -1,67 +1,145 @@ -// SysY 子集语法:支持形如 -// int main() { int a = 1; int b = 2; return a + b; } -// 的最小返回表达式编译。 - -// 后续需要自行添加 - grammar SysY; /*===-------------------------------------------===*/ /* Lexer rules */ /*===-------------------------------------------===*/ +CONST: 'const'; INT: 'int'; +FLOAT: 'float'; +VOID: 'void'; +IF: 'if'; +ELSE: 'else'; +WHILE: 'while'; +BREAK: 'break'; +CONTINUE: 'continue'; RETURN: 'return'; ASSIGN: '='; +EQ: '=='; +NE: '!='; +LT: '<'; +GT: '>'; +LE: '<='; +GE: '>='; + ADD: '+'; +SUB: '-'; +MUL: '*'; +DIV: '/'; +MOD: '%'; +NOT: '!'; +LAND: '&&'; +LOR: '||'; LPAREN: '('; RPAREN: ')'; LBRACE: '{'; RBRACE: '}'; +LBRACK: '['; +RBRACK: ']'; +COMMA: ','; SEMICOLON: ';'; +FLOAT_CONST + : DEC_FLOAT_CONST + | HEX_FLOAT_CONST + ; + +INT_CONST + : HEX_PREFIX HEX_DIGIT+ + | '0' [0-7]+ + | '0' + | [1-9] DIGIT* + ; + ID: [a-zA-Z_][a-zA-Z_0-9]*; -ILITERAL: [0-9]+; WS: [ \t\r\n] -> skip; LINECOMMENT: '//' ~[\r\n]* -> skip; BLOCKCOMMENT: '/*' .*? '*/' -> skip; +fragment DEC_FLOAT_CONST + : DIGIT+ '.' DIGIT* EXP_PART? + | '.' DIGIT+ EXP_PART? + | DIGIT+ EXP_PART + ; + +fragment HEX_FLOAT_CONST + : HEX_PREFIX HEX_DIGIT+ '.' HEX_DIGIT* BIN_EXP_PART + | HEX_PREFIX '.' HEX_DIGIT+ BIN_EXP_PART + | HEX_PREFIX HEX_DIGIT+ BIN_EXP_PART + ; + +fragment EXP_PART: [eE] [+-]? DIGIT+; +fragment BIN_EXP_PART: [pP] [+-]? DIGIT+; +fragment HEX_PREFIX: '0' [xX]; +fragment HEX_DIGIT: [0-9a-fA-F]; +fragment DIGIT: [0-9]; + /*===-------------------------------------------===*/ /* Syntax rules */ /*===-------------------------------------------===*/ compUnit - : funcDef EOF + : (decl | funcDef)+ EOF ; decl - : btype varDef SEMICOLON + : constDecl + | varDecl ; -btype +constDecl + : CONST bType constDef (COMMA constDef)* SEMICOLON + ; + +varDecl + : bType varDef (COMMA varDef)* SEMICOLON + ; + +bType : INT + | FLOAT + ; + +constDef + : ID (LBRACK constExp RBRACK)* ASSIGN constInitVal + ; + +constInitVal + : constExp + | LBRACE (constInitVal (COMMA constInitVal)*)? RBRACE ; varDef - : lValue (ASSIGN initValue)? + : ID (LBRACK constExp RBRACK)* (ASSIGN initVal)? ; -initValue +initVal : exp + | LBRACE (initVal (COMMA initVal)*)? RBRACE ; funcDef - : funcType ID LPAREN RPAREN blockStmt + : funcType ID LPAREN funcFParams? RPAREN block ; funcType - : INT + : VOID + | INT + | FLOAT ; -blockStmt +funcFParams + : funcFParam (COMMA funcFParam)* + ; + +funcFParam + : bType ID (LBRACK RBRACK (LBRACK exp RBRACK)*)? + ; + +block : LBRACE blockItem* RBRACE ; @@ -71,28 +149,79 @@ blockItem ; stmt - : returnStmt + : lVal ASSIGN exp SEMICOLON + | exp? SEMICOLON + | block + | IF LPAREN cond RPAREN stmt (ELSE stmt)? + | WHILE LPAREN cond RPAREN stmt + | BREAK SEMICOLON + | CONTINUE SEMICOLON + | RETURN exp? SEMICOLON ; -returnStmt - : RETURN exp SEMICOLON +exp + : addExp ; -exp - : LPAREN exp RPAREN # parenExp - | var # varExp - | number # numberExp - | exp ADD exp # additiveExp +cond + : lOrExp ; -var - : ID +lVal + : ID (LBRACK exp RBRACK)* ; -lValue - : ID +primaryExp + : LPAREN exp RPAREN + | lVal + | number ; number - : ILITERAL + : INT_CONST + | FLOAT_CONST + ; + +unaryExp + : primaryExp + | ID LPAREN funcRParams? RPAREN + | unaryOp unaryExp + ; + +unaryOp + : ADD + | SUB + | NOT + ; + +funcRParams + : exp (COMMA exp)* + ; + +mulExp + : unaryExp ((MUL | DIV | MOD) unaryExp)* + ; + +addExp + : mulExp ((ADD | SUB) mulExp)* + ; + +relExp + : addExp ((LT | GT | LE | GE) addExp)* + ; + +eqExp + : relExp ((EQ | NE) relExp)* + ; + +lAndExp + : eqExp (LAND eqExp)* + ; + +lOrExp + : lAndExp (LOR lAndExp)* + ; + +constExp + : addExp ; From 86ce9bcf50e5258c6dd831cfb6a64d853643398d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=AF=95=E6=81=A9=E5=87=AF?= <15609889+biankai001@user.noreply.gitee.com> Date: Mon, 23 Mar 2026 21:10:21 +0800 Subject: [PATCH 2/2] ... --- .gitignore | 5 +++ CMakeLists.txt | 7 ++++ Cargo.lock | 7 ++++ Cargo.toml | 6 +++ src/antlr4/SysY.g4 | 1 + src/frontend/CMakeLists.txt | 45 +++++++++++++++++---- src/main.rs | 3 ++ test_parse.sh | 78 +++++++++++++++++++++++++++++++++++++ 8 files changed, 144 insertions(+), 8 deletions(-) create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 src/main.rs create mode 100755 test_parse.sh diff --git a/.gitignore b/.gitignore index 1ee33a1..5d98489 100644 --- a/.gitignore +++ b/.gitignore @@ -68,3 +68,8 @@ Thumbs.db # Project outputs # ========================= test/test_result/ + + +# Added by cargo + +/target diff --git a/CMakeLists.txt b/CMakeLists.txt index 74dcb27..3ac5b22 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,6 +2,8 @@ cmake_minimum_required(VERSION 3.20) project(compiler LANGUAGES C CXX) +find_package(Java REQUIRED COMPONENTS Runtime) + # 统一 C++ 标准(按实验环境可调整) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -39,6 +41,11 @@ endif() option(COMPILER_PARSE_ONLY "Build only the frontend parser pipeline" OFF) +set(ANTLR4_JAR "${PROJECT_SOURCE_DIR}/third_party/antlr-4.13.2-complete.jar") +if(NOT EXISTS "${ANTLR4_JAR}") + message(FATAL_ERROR "ANTLR jar not found: ${ANTLR4_JAR}") +endif() + # 使用仓库内 third_party 提供的 ANTLR4 C++ runtime(较新版本) # third_party 目录结构以仓库为准:runtime 源码位于 third_party/antlr4-runtime-4.13.2/runtime/src set(ANTLR4_RUNTIME_SRC_DIR "${PROJECT_SOURCE_DIR}/third_party/antlr4-runtime-4.13.2/runtime/src") diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..bc78ab8 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "nudt-compiler-cpp" +version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..174e3dc --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "nudt-compiler-cpp" +version = "0.1.0" +edition = "2024" + +[dependencies] diff --git a/src/antlr4/SysY.g4 b/src/antlr4/SysY.g4 index d3633df..3027924 100644 --- a/src/antlr4/SysY.g4 +++ b/src/antlr4/SysY.g4 @@ -225,3 +225,4 @@ lOrExp constExp : addExp ; + diff --git a/src/frontend/CMakeLists.txt b/src/frontend/CMakeLists.txt index 524fcd6..de5b7f2 100644 --- a/src/frontend/CMakeLists.txt +++ b/src/frontend/CMakeLists.txt @@ -3,15 +3,44 @@ add_library(frontend STATIC SyntaxTreePrinter.cpp ) +set(ANTLR4_GRAMMAR "${PROJECT_SOURCE_DIR}/src/antlr4/SysY.g4") +set(ANTLR4_GENERATED_FILES + "${ANTLR4_GENERATED_DIR}/SysYLexer.cpp" + "${ANTLR4_GENERATED_DIR}/SysYLexer.h" + "${ANTLR4_GENERATED_DIR}/SysYLexer.interp" + "${ANTLR4_GENERATED_DIR}/SysYLexer.tokens" + "${ANTLR4_GENERATED_DIR}/SysYParser.cpp" + "${ANTLR4_GENERATED_DIR}/SysYParser.h" + "${ANTLR4_GENERATED_DIR}/SysY.interp" + "${ANTLR4_GENERATED_DIR}/SysY.tokens" + "${ANTLR4_GENERATED_DIR}/SysYBaseVisitor.h" + "${ANTLR4_GENERATED_DIR}/SysYVisitor.h" +) + +add_custom_command( + OUTPUT ${ANTLR4_GENERATED_FILES} + COMMAND ${CMAKE_COMMAND} -E make_directory "${ANTLR4_GENERATED_DIR}" + COMMAND ${Java_JAVA_EXECUTABLE} -jar "${ANTLR4_JAR}" + -Dlanguage=Cpp + -visitor + -no-listener + -Xexact-output-dir + -o "${ANTLR4_GENERATED_DIR}" + "${ANTLR4_GRAMMAR}" + DEPENDS "${ANTLR4_GRAMMAR}" "${ANTLR4_JAR}" + COMMENT "Generating ANTLR4 parser sources from SysY.g4" + VERBATIM +) + +add_custom_target(antlr4_generated DEPENDS ${ANTLR4_GENERATED_FILES}) +add_dependencies(frontend antlr4_generated) + +target_sources(frontend PRIVATE + "${ANTLR4_GENERATED_DIR}/SysYLexer.cpp" + "${ANTLR4_GENERATED_DIR}/SysYParser.cpp" +) + target_link_libraries(frontend PUBLIC build_options ${ANTLR4_RUNTIME_TARGET} ) - -# 自动纳入构建目录中的 Lexer/Parser 生成源码(若存在) -file(GLOB_RECURSE ANTLR4_GENERATED_SOURCES CONFIGURE_DEPENDS - "${ANTLR4_GENERATED_DIR}/*.cpp" -) -if(ANTLR4_GENERATED_SOURCES) - target_sources(frontend PRIVATE ${ANTLR4_GENERATED_SOURCES}) -endif() diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..e7a11a9 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + println!("Hello, world!"); +} diff --git a/test_parse.sh b/test_parse.sh new file mode 100755 index 0000000..0b5a077 --- /dev/null +++ b/test_parse.sh @@ -0,0 +1,78 @@ +#!/bin/bash +# ================================================ +# SysY 编译器 Lab1 批量解析测试脚本 +# 文件名:scripts/test_parse.sh +# 适用环境:Arch Linux(bash 原生支持,无需额外安装) +# 功能: +# - 遍历 test/test_case 下所有 .sy 文件(functional + performance) +# - 执行 --emit-parse-tree 检查是否能成功解析 +# - 输出简洁的 PASS/FAIL 结果 + 统计 +# - 错误文件会自动打印最后 10 行报错信息(方便调试) +# - 所有结果保存到 test/test_result/parse_test.log +# ================================================ + +set -u # 遇到未定义变量直接报错 + +# ================== 配置 ================== +COMPILER="./build/bin/compiler" +TEST_DIR="test/test_case" +LOG_FILE="test/test_result/parse_test.log" +MAX_ERROR_LINES=10 + +# 检查编译器是否存在 +if [[ ! -x "$COMPILER" ]]; then + echo "❌ 错误:找不到编译器 $COMPILER" + echo " 请先执行 Lab1 构建命令:" + echo " cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DCOMPILER_PARSE_ONLY=ON" + echo " cmake --build build -j \"\$(nproc)\"" + exit 1 +fi + +# 创建日志目录(如果不存在) +mkdir -p "$(dirname "$LOG_FILE")" +> "$LOG_FILE" # 清空日志 + +echo "开始 Lab1 批量语法树测试..." | tee -a "$LOG_FILE" +echo "测试目录:$TEST_DIR" | tee -a "$LOG_FILE" +echo "编译器:$COMPILER" | tee -a "$LOG_FILE" +echo "========================================" | tee -a "$LOG_FILE" + +pass=0 +fail=0 +total=0 + +# 遍历所有 .sy 文件(支持子目录) +while IFS= read -r -d '' sy_file; do + ((total++)) + echo -n "[$total] 测试: $sy_file ... " | tee -a "$LOG_FILE" + + # 执行解析(把输出丢到 /dev/null,防止刷屏) + if "$COMPILER" --emit-parse-tree "$sy_file" > /dev/null 2>&1; then + echo "✅PASS" | tee -a "$LOG_FILE" + ((pass++)) + else + echo "FAIL" | tee -a "$LOG_FILE" + ((fail++)) + + # 打印错误信息到日志(最后几行) + echo " └── 错误详情(最后 $MAX_ERROR_LINES 行):" >> "$LOG_FILE" + "$COMPILER" --emit-parse-tree "$sy_file" 2>&1 | tail -n "$MAX_ERROR_LINES" >> "$LOG_FILE" + echo "" >> "$LOG_FILE" + fi +done < <(find "$TEST_DIR" -name "*.sy" -print0 | sort -z) + +# ================== 总结 ================== +echo "========================================" | tee -a "$LOG_FILE" +echo "测试完成!" | tee -a "$LOG_FILE" +echo "总文件数 : $total" | tee -a "$LOG_FILE" +echo "通过 : $pass" | tee -a "$LOG_FILE" +echo "失败 : $fail" | tee -a "$LOG_FILE" + +if [[ $fail -eq 0 ]]; then + echo "恭喜!Lab1 语法树构建全部通过!可以进入 Lab2 啦~" | tee -a "$LOG_FILE" +else + echo "有 $fail 个文件解析失败,请检查 SysY.g4 或报错日志" | tee -a "$LOG_FILE" + echo " 日志文件:$LOG_FILE" | tee -a "$LOG_FILE" +fi + +echo "========================================" | tee -a "$LOG_FILE" \ No newline at end of file