From 2f09b39f56dbdd6d7f9c0701861dd8acd867b301 Mon Sep 17 00:00:00 2001 From: David Pichardie Date: Tue, 17 Nov 2020 08:05:38 -0800 Subject: [PATCH] New java source parser for declaration locations (experimental) Summary: The current source parser is based on ocamllex only. In order to track field declaration locations, we propose a new parser using ocamllex/menhir. This is a more ambitious project that closely follows the official Java syntax. Reviewed By: jvillard Differential Revision: D24858280 fbshipit-source-id: 22d6766e5 --- infer/man/man1/infer-full.txt | 4 + infer/src/base/Config.ml | 7 + infer/src/base/Config.mli | 2 + infer/src/infer.ml | 4 +- infer/src/java/JavaFrontendStubs.ml | 4 + infer/src/java/JavaFrontendStubs.mli | 4 + infer/src/java/dune.in | 5 +- infer/src/java/jFrontend.ml | 4 +- infer/src/java/jSourceAST.ml | 43 + infer/src/java/jSourceAST.mli | 20 + infer/src/java/jSourceLexer.mll | 244 ++++++ infer/src/java/jSourceLocations.ml | 53 ++ infer/src/java/jSourceLocations.mli | 13 + infer/src/java/jSourceParser.mly | 773 ++++++++++++++++++ .../build_systems/java_source_parser/Makefile | 13 +- 15 files changed, 1185 insertions(+), 8 deletions(-) create mode 100644 infer/src/java/jSourceAST.ml create mode 100644 infer/src/java/jSourceAST.mli create mode 100644 infer/src/java/jSourceLexer.mll create mode 100644 infer/src/java/jSourceLocations.ml create mode 100644 infer/src/java/jSourceLocations.mli create mode 100644 infer/src/java/jSourceParser.mly diff --git a/infer/man/man1/infer-full.txt b/infer/man/man1/infer-full.txt index 32f72b953..b34f7f8ea 100644 --- a/infer/man/man1/infer-full.txt +++ b/infer/man/man1/infer-full.txt @@ -1566,6 +1566,10 @@ INTERNAL OPTIONS --java-jar-compiler-reset Cancel the effect of --java-jar-compiler. + --java-source-parser-experimental + Activates: The experimental Java source parser for declaration + locations. (Conversely: --no-java-source-parser-experimental) + --java-version-reset Cancel the effect of --java-version. diff --git a/infer/src/base/Config.ml b/infer/src/base/Config.ml index f68581afc..38e817398 100644 --- a/infer/src/base/Config.ml +++ b/infer/src/base/Config.ml @@ -1460,6 +1460,11 @@ and java_jar_compiler = ~meta:"path" "Specify the Java compiler jar used to generate the bytecode" +and java_source_parser_experimental = + CLOpt.mk_bool ~long:"java-source-parser-experimental" + "The experimental Java source parser for declaration locations." + + and java_version = CLOpt.mk_int_opt ~long:"java-version" ?default:Version.java_version ~in_help:InferCommand.[(Capture, manual_java); (Analyze, manual_java)] @@ -2927,6 +2932,8 @@ and java_debug_source_file_info = !java_debug_source_file_info and java_jar_compiler = !java_jar_compiler +and java_source_parser_experimental = !java_source_parser_experimental + and java_version = !java_version and javac_classes_out = !javac_classes_out diff --git a/infer/src/base/Config.mli b/infer/src/base/Config.mli index 581acd88e..dbfb0ece8 100644 --- a/infer/src/base/Config.mli +++ b/infer/src/base/Config.mli @@ -344,6 +344,8 @@ val java_debug_source_file_info : string option val java_jar_compiler : string option +val java_source_parser_experimental : bool + val java_version : int option val javac_classes_out : string diff --git a/infer/src/infer.ml b/infer/src/infer.ml index 1f10780f8..2dc128202 100644 --- a/infer/src/infer.ml +++ b/infer/src/infer.ml @@ -157,7 +157,9 @@ let () = | _ when Config.test_determinator && not Config.process_clang_ast -> TestDeterminator.compute_and_emit_test_to_run () | _ when Option.is_some Config.java_debug_source_file_info -> - JSourceFileInfo.debug_on_file (Option.value_exn Config.java_debug_source_file_info) + if Config.java_source_parser_experimental then + JSourceLocations.debug_on_file (Option.value_exn Config.java_debug_source_file_info) + else JSourceFileInfo.debug_on_file (Option.value_exn Config.java_debug_source_file_info) | Analyze -> run Driver.Analyze | Capture | Compile | Run -> diff --git a/infer/src/java/JavaFrontendStubs.ml b/infer/src/java/JavaFrontendStubs.ml index 3871b9c35..dcf03900e 100644 --- a/infer/src/java/JavaFrontendStubs.ml +++ b/infer/src/java/JavaFrontendStubs.ml @@ -16,3 +16,7 @@ end module JSourceFileInfo = struct let debug_on_file _ = () end + +module JSourceLocations = struct + let debug_on_file _ = () +end diff --git a/infer/src/java/JavaFrontendStubs.mli b/infer/src/java/JavaFrontendStubs.mli index 83fcde890..b0efd2185 100644 --- a/infer/src/java/JavaFrontendStubs.mli +++ b/infer/src/java/JavaFrontendStubs.mli @@ -22,3 +22,7 @@ end module JSourceFileInfo : sig val debug_on_file : string -> unit end + +module JSourceLocations : sig + val debug_on_file : string -> unit +end diff --git a/infer/src/java/dune.in b/infer/src/java/dune.in index 0f3a5e5c8..d6b793328 100644 --- a/infer/src/java/dune.in +++ b/infer/src/java/dune.in @@ -7,7 +7,8 @@ *) (* NOTE: prepend dune.common to this file! *) -let lexer = "(ocamllex jSourceFileInfo)" +let lexer = "(ocamllex jSourceLexer jSourceFileInfo)" +let jparser = "(menhir (modules jSourceParser))" let java = Printf.sprintf @@ -30,4 +31,4 @@ let java = let documentation = "(documentation (package infer) (mld_files JavaFrontend))" ;; -Jbuild_plugin.V1.send (String.concat "\n" [lexer; java; documentation]) +Jbuild_plugin.V1.send (String.concat "\n" [lexer; jparser; java; documentation]) diff --git a/infer/src/java/jFrontend.ml b/infer/src/java/jFrontend.ml index a07613fee..6c5d4e167 100644 --- a/infer/src/java/jFrontend.ml +++ b/infer/src/java/jFrontend.ml @@ -217,7 +217,9 @@ let compute_source_icfg program tenv source_basename package_opt source_file = if test node then try procedure cn node with Bir.Subroutine -> () in (* we must set the java location for all classes in the source file before translation *) - JSourceFileInfo.collect_class_location program source_file ; + if Config.java_source_parser_experimental then + JSourceLocations.collect_class_location program source_file + else JSourceFileInfo.collect_class_location program source_file ; let () = JBasics.ClassMap.iter (select diff --git a/infer/src/java/jSourceAST.ml b/infer/src/java/jSourceAST.ml new file mode 100644 index 000000000..af17fc28f --- /dev/null +++ b/infer/src/java/jSourceAST.ml @@ -0,0 +1,43 @@ +(* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) + +open! IStd + +type location = {line: int; col: int} + +type class_kind = Class of string | Interface of string | AnonymousClass | Enum of string + +type class_or_interface = + {location: location; kind: class_kind; inner_elements: class_or_interface list} + +type file_content = {package: string option; classes: class_or_interface list} + +type context = {prefix: string; mutable counter: int} + +let name_of_kind context = function + | Class id | Interface id | Enum id -> + id + | AnonymousClass -> + context.counter <- context.counter + 1 ; + string_of_int context.counter + + +let rec iter ~action_on_class_location context {location; kind; inner_elements} = + let previous_prefix = context.prefix in + let name = name_of_kind context kind in + let context = {prefix= Printf.sprintf "%s%s$" context.prefix name; counter= 0} in + let classname = previous_prefix ^ name in + let col = location.col in + let line = location.line in + let _ = action_on_class_location ~classname ~col ~line in + List.iter inner_elements ~f:(iter ~action_on_class_location context) + + +let iter_on_declarations ~action_on_class_location {package; classes} = + let prefix = Option.fold ~init:"" ~f:(fun _ s -> s ^ ".") package in + let context = {prefix; counter= 0} in + List.iter classes ~f:(iter ~action_on_class_location context) diff --git a/infer/src/java/jSourceAST.mli b/infer/src/java/jSourceAST.mli new file mode 100644 index 000000000..8fb284cb7 --- /dev/null +++ b/infer/src/java/jSourceAST.mli @@ -0,0 +1,20 @@ +(* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) + +open! IStd + +type location = {line: int; col: int} + +type class_kind = Class of string | Interface of string | AnonymousClass | Enum of string + +type class_or_interface = + {location: location; kind: class_kind; inner_elements: class_or_interface list} + +type file_content = {package: string option; classes: class_or_interface list} + +val iter_on_declarations : + action_on_class_location:(classname:string -> col:int -> line:int -> unit) -> file_content -> unit diff --git a/infer/src/java/jSourceLexer.mll b/infer/src/java/jSourceLexer.mll new file mode 100644 index 000000000..520c8ec5b --- /dev/null +++ b/infer/src/java/jSourceLexer.mll @@ -0,0 +1,244 @@ +(* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) + +{ + + (** classic Ocamllex function to update current lexbuf line at each end of + line *) + let incr_linenum lexbuf = + let pos = lexbuf.Lexing.lex_curr_p in + lexbuf.Lexing.lex_curr_p <- { pos with + Lexing.pos_lnum = pos.Lexing.pos_lnum + 1; + Lexing.pos_bol = pos.Lexing.pos_cnum; + } + + open JSourceParser + +} + +let whitespace = [' ' '\t'] +let eol = whitespace*("\r")?"\n" (* end of line *) +let eol_comment = "//" [^'\n']* +let id = ['a'-'z' 'A'-'Z' '_' '$'] ['a'-'z' 'A'-'Z' '0'-'9' '_' '$']* +let char = "'\\''" | "'\"'" | "'" [ ^'\'' ]+ "'" +let modifier = "public"|"protected"|"private"|"abstract"|"static"| + "final"|"strictfp"|"transient"|"volatile" +let numeric_type = "byte"|"short"|"int"|"long"|"char"|"float"|"double" +let primitive_type = "boolean"|numeric_type +let assignment_operator = "*="|"/="|"%="|"+="|"-="|"<<="|">>="|">>>="|"&="|"^="|"|=" +let binop = "||"|"&&"|"&"|"^"|"=="|"!="|"<="|">="|"<<"|">>"|">>>"|"+"|"-"|"*"|"/"|"%" + +let binary_numeral_prefix = "0" ("b"|"B") +let hex_numeral_prefix = "0" ("x"|"X") +let numeral_prefix = ['0'-'9'] | binary_numeral_prefix | hex_numeral_prefix +let numeral_digit = ['0'-'9' 'a'-'f' 'A'-'F' '_'] +let integer_literal = numeral_prefix numeral_digit* ['l' 'L']? +let hexadecimal_floating_point_literal = hex_numeral_prefix (numeral_digit | ".")+ ("p"|"P") +let decimal_floating_point_literal = hex_numeral_prefix (numeral_digit | ".")+ ("p"|"P") +let digits = ['0'-'9']+ +let float_type_suffix = ['f' 'F' 'd' 'D'] +let exponent_part = ['e' 'E'] ['-' '+']? digits +let floating_point_literal = + (digits "." digits? exponent_part? float_type_suffix?) +| ("." digits exponent_part? float_type_suffix?) +| (digits exponent_part float_type_suffix?) +| (digits exponent_part? float_type_suffix) + + +(* We follow an abstraction of the official grammar described here: + https://docs.oracle.com/javase/specs/jls/se14/html/jls-19.html *) +rule class_scan = parse + | whitespace+ + { class_scan lexbuf } + | eol_comment + { class_scan lexbuf } + | "/*" + { skip_comments (class_scan) lexbuf } + | eol + { incr_linenum lexbuf; + class_scan lexbuf } + | "package" + { PACKAGE } + | "import" + { IMPORT } + | "class" + { CLASS } + | "instanceof" + { INSTANCEOF } + | "interface" + { INTERFACE } + | "void" + { VOID } + | "throws" + { THROWS } + | "enum" + { ENUM } + | modifier + { class_scan lexbuf } + | primitive_type + { PRIMTYPE } + | "<" + { LANGLE } + | ">" + { RANGLE } + | "new" + { NEW } + | "var" + { VAR } + | "extends" + { EXTENDS } + | "super" + { SUPER } + | "implements" + { IMPLEMENTS } + | "assert" + { ASSERT } + | "do" + { DO } + | "while" + { WHILE } + | "if" + { IF } + | "else" + { ELSE } + | "try" + { TRY } + | "catch" + { CATCH } + | "finally" + { FINALLY } + | "for" + { FOR } + | "break" + { BREAK } + | "continue" + { CONTINUE } + | "return" + { RETURN } + | "throw" + { THROW } + | "synchronized" + { SYNCHRONIZED } + | "yield" + { YIELD } + | "null" + { NULL } + | "true" + { TRUE } + | "false" + { FALSE } + + | (floating_point_literal as f) + { FLOATINGPOINT f } + | (integer_literal as i) + { INTEGER i } + + + | (id as name) + { IDENT name } + + | "\"" ([^ '\"']* as s) "\"" + { STRING s } + | (char as s) + { CHAR s } + | ";" + { SEMICOLON } + | ":" + { COLON } + | "." + { DOT } + | "{" + { LBRACKET } + | "}" + { RBRACKET } + | "[" + { LSBRACKET } + | "]" + { RSBRACKET } + | "(" + { LPAREN } + | ")" + { RPAREN } + | "," + { COMMA } + | "?" + { QMARK } + | ("++"|"--") + { INCR_DECR } + | "|" + { PIPE } + | "=" + { EQ } + | "!" + { BANG } + | "~" + { TILDE } + | "..." + { THREEDOTS } + | assignment_operator + { ASSIGNOP} + | binop + { BINOP } + + | "@" whitespace* id ("." id)* "(" + { skip_well_parenthesized_parentheses 1 + (class_scan) lexbuf } + | "@" whitespace* id ("." id)* + { class_scan lexbuf } + + | _ + { class_scan lexbuf } + | eof + { EOF } + +(* we skip type annotation arguments (...) because they may contain brackets *) +and skip_well_parenthesized_parentheses width action = parse + | eol + { incr_linenum lexbuf; + skip_well_parenthesized_parentheses width action lexbuf } + | "(" + { skip_well_parenthesized_parentheses (width+1) action lexbuf } + | ")" + { if width<=1 then action lexbuf + else skip_well_parenthesized_parentheses (width-1) action lexbuf } + | eol_comment + { skip_well_parenthesized_parentheses width action lexbuf } + | "/*" + { skip_comments + (skip_well_parenthesized_parentheses width action) lexbuf } + | "\"" + { skip_string (skip_well_parenthesized_parentheses width action) lexbuf } + | char + { skip_well_parenthesized_parentheses width action lexbuf } + | _ + { skip_well_parenthesized_parentheses width action lexbuf } + +and skip_comments action = parse + | "*/" + { action lexbuf } + | eol + { incr_linenum lexbuf; + skip_comments action lexbuf } + | _ + { skip_comments action lexbuf } + +and skip_string action = parse + | "\\\\" + { skip_string action lexbuf } + | "\\\"" + { skip_string action lexbuf } + | "\"" + { action lexbuf } + | _ + { skip_string action lexbuf } + + +{ + + + +} diff --git a/infer/src/java/jSourceLocations.ml b/infer/src/java/jSourceLocations.ml new file mode 100644 index 000000000..7786eaf58 --- /dev/null +++ b/infer/src/java/jSourceLocations.ml @@ -0,0 +1,53 @@ +(* + * Copyright (c) 2009-2013, Monoidics ltd. + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) + +open! IStd +open Javalib_pack + +let collect_class_location (program : JProgramDesc.t) (file : SourceFile.t) = + let path = SourceFile.to_abs_path file in + if String.is_suffix path ~suffix:".java" then ( + let cin = In_channel.create path in + let filebuf = Lexing.from_channel cin in + let action_on_class_location ~classname ~col ~line = + let loc : Location.t = {line; col; file} in + let cn : JBasics.class_name = JBasics.make_cn classname in + Logging.debug Capture Verbose "set_java_location %s with location %a@." (JBasics.cn_name cn) + Location.pp_file_pos loc ; + JProgramDesc.set_java_location program cn loc + in + ( try + let cl = JSourceParser.main JSourceLexer.class_scan filebuf in + JSourceAST.iter_on_declarations ~action_on_class_location cl + with + | Failure s -> + Logging.debug Capture Verbose "Error parsing source file %s\n%s" + (SourceFile.to_abs_path file) s + | JSourceParser.Error -> + Logging.debug Capture Verbose "JSourceParser error on file %s\n" + (SourceFile.to_abs_path file) ) ; + In_channel.close cin ) + + +let debug_on_file path = + if String.is_suffix path ~suffix:".java" then ( + let cin = In_channel.create path in + let filebuf = Lexing.from_channel cin in + let action_on_class_location ~classname ~col ~line = + Printf.printf "class %s at line %d, column %d\n" classname line col + in + ( try + let cl = JSourceParser.main JSourceLexer.class_scan filebuf in + JSourceAST.iter_on_declarations ~action_on_class_location cl + with JSourceParser.Error -> + let pos = filebuf.Lexing.lex_curr_p in + let buf_length = Lexing.lexeme_end filebuf - Lexing.lexeme_start filebuf in + let line = pos.Lexing.pos_lnum in + let col = pos.Lexing.pos_cnum - pos.Lexing.pos_bol - buf_length in + Printf.eprintf "Java source syntax error at line %d, column %d.\n%!" line col ) ; + In_channel.close cin ) diff --git a/infer/src/java/jSourceLocations.mli b/infer/src/java/jSourceLocations.mli new file mode 100644 index 000000000..bd7b7f44c --- /dev/null +++ b/infer/src/java/jSourceLocations.mli @@ -0,0 +1,13 @@ +(* + * Copyright (c) 2009-2013, Monoidics ltd. + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) + +open! IStd + +val collect_class_location : JProgramDesc.t -> SourceFile.t -> unit + +val debug_on_file : string -> unit diff --git a/infer/src/java/jSourceParser.mly b/infer/src/java/jSourceParser.mly new file mode 100644 index 000000000..52212b5cf --- /dev/null +++ b/infer/src/java/jSourceParser.mly @@ -0,0 +1,773 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ +%{ + open JSourceAST + + let location_of_pos pos = + let line = pos.Lexing.pos_lnum in + let col = pos.Lexing.pos_cnum - pos.Lexing.pos_bol in + { line; col } + +%} + +%token PACKAGE +%token IMPORT +%token CLASS +%token INTERFACE +%token IMPLEMENTS +%token EXTENDS +%token SUPER +%token ENUM +%token NEW +%token INSTANCEOF +%token VOID +%token THROWS +%token VAR +%token ASSERT +%token DO +%token WHILE +%token IF +%token ELSE +%token TRY +%token CATCH +%token FINALLY +%token FOR +%token BREAK +%token CONTINUE +%token RETURN +%token THROW +%token SYNCHRONIZED +%token YIELD +%token THIS +%token NULL +%token TRUE +%token FALSE + + +//non Java terminals +%token PRIMTYPE + +%token LBRACKET +%token RBRACKET +%token LSBRACKET +%token RSBRACKET +%token LPAREN +%token RPAREN +%token LANGLE +%token RANGLE + +%token EQ +%token ASSIGNOP +%token SEMICOLON +%token COLON +%token COMMA +%token DOT +%token QMARK +%token BANG +%token TILDE +%token PIPE + +%token INCR_DECR +%token THREEDOTS + +%token EOF + +%token IDENT +%token INTEGER +%token FLOATINGPOINT +%token STRING +%token CHAR + +%token BINOP + +%left BINOP LANGLE RANGLE +%nonassoc INSTANCEOF + +%start main + +%% + +main: + | package_declaration? import_declaration* app_list(type_declaration) EOF + { { package = $1; classes = $3 } } + +import_declaration: + | IMPORT import_name SEMICOLON + {} + +import_name: + | IDENT import_name_end + {} +import_name_end: + | + | DOT BINOP + | DOT IDENT import_name_end + {} + +type_arguments: + | LANGLE type_argument_list RANGLE + {} + +type_argument_list: + | separated_nonempty_list(COMMA, type_argument) + {} + +type_argument: + | unann_reference_type // If we decide to parse annotations, it will + // require change here + | QMARK wildcard_bounds? + {} + +wildcard_bounds: + | EXTENDS unann_reference_type + | SUPER unann_reference_type + {} + +package_declaration: + | PACKAGE unann_class_or_interface_type SEMICOLON + { $2 } + +type_declaration: + | class_declaration + | interface_declaration + { [$1] } + | SEMICOLON + { [] } + +class_declaration: + | normal_class_declaration + | enum_declaration + { $1 } + +normal_class_declaration: + | CLASS id=identifier superclass? superinterfaces? inner=class_body + { { + location = location_of_pos $startpos(id); + kind = Class id; + inner_elements = inner; + } + } + +enum_declaration: + | ENUM id=identifier superinterfaces? inner=enum_body + { { + location = location_of_pos $startpos(id); + kind = Enum id; + inner_elements = inner; + } + } + +superclass: + | EXTENDS class_type + {} + +superinterfaces: + | IMPLEMENTS separated_nonempty_list(COMMA, class_type) + {} + +enum_body: + | LBRACKET enum_constant_list RBRACKET // TODO add optional comma + { [] } + | LBRACKET enum_constant_list inner=enum_body_declarations RBRACKET + { inner } + +enum_constant_list: + | separated_nonempty_list(COMMA, enum_constant) + {} + +enum_constant: + | identifier class_body? + | identifier LPAREN argument_list? RPAREN class_body? + {} + +enum_body_declarations: + | SEMICOLON app_list(class_body_declaration) + { $2 } + +class_body: + | LBRACKET app_list(class_body_declaration) RBRACKET + { $2 } + +class_body_declaration: + | class_member_declaration + { $1 } + | constructor_declaration + { $1 } + +constructor_declaration: + | constructor_declarator throws? inner=constructor_body + { inner } + +constructor_declarator: + | identifier LPAREN formal_parameter_list RPAREN // TODO add receive_parameter + {} + +constructor_body: + | LBRACKET inner=loption(block_statements) RBRACKET + { inner } + +argument_list: + | app_separated_non_empty_list(COMMA, expression) + { $1 } + +class_member_declaration: + | class_declaration + | interface_declaration + { [$1] } + | field_declaration + | method_declaration + { $1 } + | SEMICOLON + { [] } + +method_declaration: + | method_header method_body + { $2 } + +interface_declaration: + | normal_interface_declaration + { $1 } + +normal_interface_declaration: + | INTERFACE id=identifier inner=interface_body + { { + location = location_of_pos $startpos(id); + kind = Interface id; + inner_elements = inner; + } + } + +interface_body: + | LBRACKET app_list(interface_member_declaration) RBRACKET + { $2 } + +interface_member_declaration: + | constant_declaration + | interface_method_declaration + { [] } + | class_declaration + | interface_declaration + { [$1] } + +interface_method_declaration: + | method_header method_body + {} + +method_header: + | result method_declarator throws? + {} + +%inline +result: + | unann_type + | VOID + {} + +method_declarator: + | identifier LPAREN formal_parameter_list RPAREN dims? //TODO add receiver_parameter + {} + +formal_parameter_list: + | separated_list(COMMA, formal_parameter) + {} + +formal_parameter: + | unann_type variable_declarator_id + | variable_arity_parameter + {} + +variable_arity_parameter: + | unann_type THREEDOTS identifier + {} + +method_body: + | block + { $1 } + | SEMICOLON + { [] } + +block: + | LBRACKET loption(block_statements) RBRACKET + { $2 } + +block_statements: + | app_non_empty_list(block_statement) + { $1 } + +block_statement: + | class_declaration + { [$1] } + | local_variable_declaration_statement + | statement + { $1 } + +local_variable_declaration_statement: + | local_variable_declaration SEMICOLON + { $1 } + +local_variable_declaration: + | local_variable_type variable_declarator_list + { $2 } + +local_variable_type: + | unann_type + | VAR + {} + +statement: + | statement_without_trailing_substatement +// TODO: add labeled_statement + | if_then_statement + | if_then_else_statement + | while_statement + | for_statement + { $1 } + +for_statement: + | basic_for_statement + | enhanced_for_statement + { $1 } + +for_statement_no_short_if: + | basic_for_statement_no_short_if + | enhanced_for_statement_no_short_if + { $1 } + +basic_for_statement: + | FOR LPAREN loption(for_init) SEMICOLON loption(expression) SEMICOLON loption(for_update) RPAREN statement + { $3 @ $5 @ $7 @ $9 } + +enhanced_for_statement: + | FOR LPAREN local_variable_type variable_declarator_id COLON expression RPAREN statement + { $6 @ $8 } + +enhanced_for_statement_no_short_if: + | FOR LPAREN local_variable_type variable_declarator_id COLON expression RPAREN statement_no_short_if + { $6 @ $8 } + +basic_for_statement_no_short_if: + | FOR LPAREN loption(for_init) SEMICOLON loption(expression) SEMICOLON loption(for_update) RPAREN statement_no_short_if + { $3 @ $5 @ $7 @ $9 } + +for_init: + | statement_expression_list + | local_variable_declaration + { $1 } + +for_update: + | statement_expression_list + { $1 } + +statement_expression_list: + | app_separated_non_empty_list(COMMA, statement_expression) + { $1 } + +if_then_statement: + | IF LPAREN expression RPAREN statement + { $3 @ $5 } + +if_then_else_statement: + | IF LPAREN expression RPAREN statement_no_short_if ELSE statement + { $3 @ $5 @ $7 } + +if_then_else_statement_no_short_if: + | IF LPAREN expression RPAREN statement_no_short_if ELSE statement_no_short_if + { $3 @ $5 @ $7 } + +statement_no_short_if: + | statement_without_trailing_substatement + | if_then_else_statement_no_short_if + | while_statement_no_short_if + | for_statement_no_short_if + { $1 } + +while_statement: + | WHILE LPAREN expression RPAREN statement + { $3 @ $5 } + +while_statement_no_short_if: + | WHILE LPAREN expression RPAREN statement_no_short_if + { $3 @ $5 } + +statement_without_trailing_substatement: + | block + | empty_statement + | expression_statement + | assert_statement + | do_statement + | break_statement + | continue_statement + | return_statement + | synchronized_statement + | throw_statement + | try_statement + | yield_statement + { $1 } + +empty_statement: + | SEMICOLON + { [] } + +expression_statement: + | statement_expression SEMICOLON + { $1 } + +statement_expression: + | INCR_DECR unary_expression + { $2 } + | assignment + | postfix_expression INCR_DECR + | method_invocation + | class_instance_creation_expression + { $1 } + +assert_statement: + | ASSERT expression SEMICOLON + { $2 } + | ASSERT expression COLON expression SEMICOLON + { $2 @ $4 } + +do_statement: + | DO statement WHILE LPAREN expression RPAREN SEMICOLON + { $2 @ $5 } + +break_statement: + | BREAK identifier? SEMICOLON + { [] } + +continue_statement: + | CONTINUE identifier? SEMICOLON + { [] } + +return_statement: + | RETURN loption(expression) SEMICOLON + { $2 } + +synchronized_statement: + | SYNCHRONIZED LPAREN expression RPAREN block + { $3 } + +try_statement: + | TRY block catches + { $2 @ $3 } + | TRY block loption(catches) finally + { $2 @ $3 @ $4 } + +catches: + | app_non_empty_list(catch_clause) + { $1 } + +catch_clause: + | CATCH LPAREN catch_formal_parameter RPAREN block + { $5 } + +catch_formal_parameter: + | catch_type variable_declarator_id + {} + +catch_type: + | separated_nonempty_list(PIPE,unann_class_or_interface_type) + {} + +finally: + | FINALLY block + { $2 } + +yield_statement: + | YIELD expression SEMICOLON + { $2 } + +throw_statement: + | THROW expression SEMICOLON + { $2 } + +throws: + | THROWS exception_type_list + { $2 } + +exception_type_list: + | separated_nonempty_list(COMMA, exception_type) + { [] } + +exception_type: + | unann_class_or_interface_type // WE DROP ANNOTS + { [] } + +constant_declaration: + | unann_type variable_declarator_list SEMICOLON + { $2 } + +field_declaration: + | unann_type variable_declarator_list SEMICOLON + { $2 } + +variable_declarator_list: + | app_separated_non_empty_list(COMMA,variable_declarator) + { $1 } + +variable_declarator: + | variable_declarator_id + { [] } + | variable_declarator_id EQ variable_initializer + { $3 } + +variable_declarator_id: + | identifier dims? + {} + +variable_initializer: + | expression + | array_initializer + { $1 } + +array_initializer: + | LBRACKET RBRACKET + | LBRACKET COMMA RBRACKET + { [] } + | LBRACKET variable_initializer array_initializer_end + { $2 @ $3 } + +array_initializer_end: + | RBRACKET + | COMMA RBRACKET + { [] } + | COMMA variable_initializer array_initializer_end + { $2 @ $3 } + +unann_type: + | PRIMTYPE + | unann_reference_type + {} + +unann_reference_type: + | unann_class_or_interface_type + | unann_array_type + {} + +dotted_name: + | identifier + { $1 } + | identifier DOT dotted_name + { $1 ^ "." ^ $3 } +// | package_opt? separated_nonempty_list(DOT, IDENT) +// don't know how to write Java programs like that + +%inline +unann_class_or_interface_type: + | dotted_name { $1 } +%inline +class_type: + | dotted_name { $1 } +%inline +class_or_interface_type_to_instantiate: + | dotted_name { $1 } +%inline +expression_name: + | dotted_name { $1 } + +unann_array_type: + | PRIMTYPE dims + | unann_class_or_interface_type dims + {} + +%inline dim: + | LSBRACKET RSBRACKET + {} +dims: + | dim+ + {} + +expression: + | assignment_expression + { $1 } + +assignment_expression: + | conditional_expression + | assignment + { $1 } + +assignment: + | left_hand_side assignment_operator expression + { $1 @ $3 } + +assignment_operator: + | EQ + | ASSIGNOP + {} + +left_hand_side: + | expression_name + { [] } + | field_access + | array_access + { $1 } + +field_access: + | primary DOT identifier + { $1 } + +array_access: + | expression_name LSBRACKET expression RSBRACKET + { $3 } + | primary_no_new_array LSBRACKET expression RSBRACKET + { $1 @ $3 } + +primary: + | primary_no_new_array + | array_creation_expression + { $1 } + +array_creation_expression: + | NEW PRIMTYPE dim_exprs + | NEW class_type dim_exprs + { $3 } + | NEW PRIMTYPE dims array_initializer + | NEW class_type dims array_initializer + { $4 } + +dim_exprs: + | app_non_empty_list(dim_expr) + { $1 } + +dim_expr: + | LSBRACKET expression RSBRACKET + { $2 } + +primary_no_new_array: + | LPAREN expression RPAREN + { $2 } + | literal + | class_literal + | THIS + { [] } + | class_instance_creation_expression + | field_access + | array_access + | method_invocation + { $1 } + +method_invocation: + | expression_name LPAREN loption(argument_list) RPAREN + { $3 } + | primary DOT identifier LPAREN loption(argument_list) RPAREN + { $1 @ $5 } + +literal: + | INTEGER + | FLOATINGPOINT + | boolean + | CHAR + | STRING + | NULL + {} + +boolean: + | TRUE + | FALSE + {} + +class_literal: + | type_name DOT CLASS + {} + +%inline +identifier: + | id=IDENT { id } + +%inline +type_name: + | IDENT + {} + +class_instance_creation_expression: + | unqualified_class_instance_creation_expression + { $1 } + | identifier DOT unqualified_class_instance_creation_expression + { $3 } + | primary DOT unqualified_class_instance_creation_expression + { $1 @ $3 } + +%inline +unqualified_class_instance_creation_expression: + | NEW class_or_interface_type_to_instantiate LPAREN loption(argument_list) RPAREN + { $4 } + | NEW class_or_interface_type_to_instantiate LPAREN args=loption(argument_list) RPAREN inner=class_body + { args @ + [{ + location = location_of_pos $startpos(inner); + kind = AnonymousClass; + inner_elements = inner; + }] + } + +conditional_expression: + | conditional_or_expression + { $1 } + | conditional_or_expression QMARK expression COLON conditional_expression + { $1 @ $3 @ $5 } + +// we simpify official spec and merge many rules here +conditional_or_expression: + | conditional_or_expression binop conditional_or_expression + { $1 @ $3 } + | conditional_or_expression INSTANCEOF unann_reference_type // WE DROP ANNOTS + | unary_expression + { $1 } + +%inline +binop: + | BINOP | RANGLE | LANGLE + {} + +unary_expression: + | INCR_DECR unary_expression + | BINOP unary_expression + { $2 } + | unary_expression_not_plus_minus + { $1 } + + +unary_expression_not_plus_minus: + | postfix_expression + | cast_expression + { $1 } + | BANG unary_expression + | TILDE unary_expression + { $2 } + +%inline +cast_expression: + | LPAREN PRIMTYPE RPAREN unary_expression + { $4 } + +postfix_expression: + | primary + { $1 } + | expression_name + { [] } + | postfix_expression INCR_DECR + { $1 } + +//speciazed version of Menhir macros using concatenation +app_list(X): + { [] } + | x = X; xs = app_list(X) + { x@xs } + +app_non_empty_list(X): + | x = X; xs = app_list(X) + { x@xs } + +app_separated_list(SEP,X): + xs = loption(app_separated_non_empty_list(SEP,X)) + { xs } + +app_separated_non_empty_list(SEP,X): + x = X + { x } + | x = X; SEP; xs = app_separated_non_empty_list(SEP,X) + { x@xs } diff --git a/infer/tests/build_systems/java_source_parser/Makefile b/infer/tests/build_systems/java_source_parser/Makefile index c49f1d7de..8fbe800bf 100644 --- a/infer/tests/build_systems/java_source_parser/Makefile +++ b/infer/tests/build_systems/java_source_parser/Makefile @@ -6,23 +6,28 @@ TESTS_DIR = ../.. INFER_OPTIONS = --java-debug-source-file-info +INFER_OPTIONS_EXPERIMENTAL = --java-source-parser-experimental --java-debug-source-file-info SOURCES = Main.java -test: parser.output.test +test: parser.output.test experimental_parser.output.test $(call check_no_diff,parser.output,parser.output.test) + $(call check_no_diff,parser.output,experimental_parser.output.test) replace: parser.output.test cp $< parser.output clean: - rm -fr infer-out parser.output.test *.class + $(REMOVE_DIR) infer-out* *.test *.class # we check if the java source file is valid for javac compile: javac *.java -.PHONY: parser.output.test +.PHONY: parser.output.test experimental_parser.output.test parser.output.test: $(SOURCES) $(INFER_BIN) - $(INFER_BIN) $(INFER_OPTIONS) $(SOURCES) > parser.output.test + $(INFER_BIN) $(INFER_OPTIONS) $(SOURCES) > $@ + +experimental_parser.output.test: $(SOURCES) $(INFER_BIN) + $(INFER_BIN) -o infer-out-experimental $(INFER_OPTIONS_EXPERIMENTAL) $(SOURCES) > $@ include $(TESTS_DIR)/base.make