New java source parser for declaration locations (experimental)

Summary:
The current source parser is based on ocamllex only.
In order to track field declaration locations, we propose a
new parser using ocamllex/menhir. This is a more ambitious
project that closely follows the official Java syntax.

Reviewed By: jvillard

Differential Revision: D24858280

fbshipit-source-id: 22d6766e5
master
David Pichardie 4 years ago committed by Facebook GitHub Bot
parent 02625ac1ce
commit 2f09b39f56

@ -1566,6 +1566,10 @@ INTERNAL OPTIONS
--java-jar-compiler-reset
Cancel the effect of --java-jar-compiler.
--java-source-parser-experimental
Activates: The experimental Java source parser for declaration
locations. (Conversely: --no-java-source-parser-experimental)
--java-version-reset
Cancel the effect of --java-version.

@ -1460,6 +1460,11 @@ and java_jar_compiler =
~meta:"path" "Specify the Java compiler jar used to generate the bytecode"
and java_source_parser_experimental =
CLOpt.mk_bool ~long:"java-source-parser-experimental"
"The experimental Java source parser for declaration locations."
and java_version =
CLOpt.mk_int_opt ~long:"java-version" ?default:Version.java_version
~in_help:InferCommand.[(Capture, manual_java); (Analyze, manual_java)]
@ -2927,6 +2932,8 @@ and java_debug_source_file_info = !java_debug_source_file_info
and java_jar_compiler = !java_jar_compiler
and java_source_parser_experimental = !java_source_parser_experimental
and java_version = !java_version
and javac_classes_out = !javac_classes_out

@ -344,6 +344,8 @@ val java_debug_source_file_info : string option
val java_jar_compiler : string option
val java_source_parser_experimental : bool
val java_version : int option
val javac_classes_out : string

@ -157,7 +157,9 @@ let () =
| _ when Config.test_determinator && not Config.process_clang_ast ->
TestDeterminator.compute_and_emit_test_to_run ()
| _ when Option.is_some Config.java_debug_source_file_info ->
JSourceFileInfo.debug_on_file (Option.value_exn Config.java_debug_source_file_info)
if Config.java_source_parser_experimental then
JSourceLocations.debug_on_file (Option.value_exn Config.java_debug_source_file_info)
else JSourceFileInfo.debug_on_file (Option.value_exn Config.java_debug_source_file_info)
| Analyze ->
run Driver.Analyze
| Capture | Compile | Run ->

@ -16,3 +16,7 @@ end
module JSourceFileInfo = struct
let debug_on_file _ = ()
end
module JSourceLocations = struct
let debug_on_file _ = ()
end

@ -22,3 +22,7 @@ end
module JSourceFileInfo : sig
val debug_on_file : string -> unit
end
module JSourceLocations : sig
val debug_on_file : string -> unit
end

@ -7,7 +7,8 @@
*)
(* NOTE: prepend dune.common to this file! *)
let lexer = "(ocamllex jSourceFileInfo)"
let lexer = "(ocamllex jSourceLexer jSourceFileInfo)"
let jparser = "(menhir (modules jSourceParser))"
let java =
Printf.sprintf
@ -30,4 +31,4 @@ let java =
let documentation = "(documentation (package infer) (mld_files JavaFrontend))"
;;
Jbuild_plugin.V1.send (String.concat "\n" [lexer; java; documentation])
Jbuild_plugin.V1.send (String.concat "\n" [lexer; jparser; java; documentation])

@ -217,7 +217,9 @@ let compute_source_icfg program tenv source_basename package_opt source_file =
if test node then try procedure cn node with Bir.Subroutine -> ()
in
(* we must set the java location for all classes in the source file before translation *)
JSourceFileInfo.collect_class_location program source_file ;
if Config.java_source_parser_experimental then
JSourceLocations.collect_class_location program source_file
else JSourceFileInfo.collect_class_location program source_file ;
let () =
JBasics.ClassMap.iter
(select

@ -0,0 +1,43 @@
(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! IStd
type location = {line: int; col: int}
type class_kind = Class of string | Interface of string | AnonymousClass | Enum of string
type class_or_interface =
{location: location; kind: class_kind; inner_elements: class_or_interface list}
type file_content = {package: string option; classes: class_or_interface list}
type context = {prefix: string; mutable counter: int}
let name_of_kind context = function
| Class id | Interface id | Enum id ->
id
| AnonymousClass ->
context.counter <- context.counter + 1 ;
string_of_int context.counter
let rec iter ~action_on_class_location context {location; kind; inner_elements} =
let previous_prefix = context.prefix in
let name = name_of_kind context kind in
let context = {prefix= Printf.sprintf "%s%s$" context.prefix name; counter= 0} in
let classname = previous_prefix ^ name in
let col = location.col in
let line = location.line in
let _ = action_on_class_location ~classname ~col ~line in
List.iter inner_elements ~f:(iter ~action_on_class_location context)
let iter_on_declarations ~action_on_class_location {package; classes} =
let prefix = Option.fold ~init:"" ~f:(fun _ s -> s ^ ".") package in
let context = {prefix; counter= 0} in
List.iter classes ~f:(iter ~action_on_class_location context)

@ -0,0 +1,20 @@
(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! IStd
type location = {line: int; col: int}
type class_kind = Class of string | Interface of string | AnonymousClass | Enum of string
type class_or_interface =
{location: location; kind: class_kind; inner_elements: class_or_interface list}
type file_content = {package: string option; classes: class_or_interface list}
val iter_on_declarations :
action_on_class_location:(classname:string -> col:int -> line:int -> unit) -> file_content -> unit

@ -0,0 +1,244 @@
(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
{
(** classic Ocamllex function to update current lexbuf line at each end of
line *)
let incr_linenum lexbuf =
let pos = lexbuf.Lexing.lex_curr_p in
lexbuf.Lexing.lex_curr_p <- { pos with
Lexing.pos_lnum = pos.Lexing.pos_lnum + 1;
Lexing.pos_bol = pos.Lexing.pos_cnum;
}
open JSourceParser
}
let whitespace = [' ' '\t']
let eol = whitespace*("\r")?"\n" (* end of line *)
let eol_comment = "//" [^'\n']*
let id = ['a'-'z' 'A'-'Z' '_' '$'] ['a'-'z' 'A'-'Z' '0'-'9' '_' '$']*
let char = "'\\''" | "'\"'" | "'" [ ^'\'' ]+ "'"
let modifier = "public"|"protected"|"private"|"abstract"|"static"|
"final"|"strictfp"|"transient"|"volatile"
let numeric_type = "byte"|"short"|"int"|"long"|"char"|"float"|"double"
let primitive_type = "boolean"|numeric_type
let assignment_operator = "*="|"/="|"%="|"+="|"-="|"<<="|">>="|">>>="|"&="|"^="|"|="
let binop = "||"|"&&"|"&"|"^"|"=="|"!="|"<="|">="|"<<"|">>"|">>>"|"+"|"-"|"*"|"/"|"%"
let binary_numeral_prefix = "0" ("b"|"B")
let hex_numeral_prefix = "0" ("x"|"X")
let numeral_prefix = ['0'-'9'] | binary_numeral_prefix | hex_numeral_prefix
let numeral_digit = ['0'-'9' 'a'-'f' 'A'-'F' '_']
let integer_literal = numeral_prefix numeral_digit* ['l' 'L']?
let hexadecimal_floating_point_literal = hex_numeral_prefix (numeral_digit | ".")+ ("p"|"P")
let decimal_floating_point_literal = hex_numeral_prefix (numeral_digit | ".")+ ("p"|"P")
let digits = ['0'-'9']+
let float_type_suffix = ['f' 'F' 'd' 'D']
let exponent_part = ['e' 'E'] ['-' '+']? digits
let floating_point_literal =
(digits "." digits? exponent_part? float_type_suffix?)
| ("." digits exponent_part? float_type_suffix?)
| (digits exponent_part float_type_suffix?)
| (digits exponent_part? float_type_suffix)
(* We follow an abstraction of the official grammar described here:
https://docs.oracle.com/javase/specs/jls/se14/html/jls-19.html *)
rule class_scan = parse
| whitespace+
{ class_scan lexbuf }
| eol_comment
{ class_scan lexbuf }
| "/*"
{ skip_comments (class_scan) lexbuf }
| eol
{ incr_linenum lexbuf;
class_scan lexbuf }
| "package"
{ PACKAGE }
| "import"
{ IMPORT }
| "class"
{ CLASS }
| "instanceof"
{ INSTANCEOF }
| "interface"
{ INTERFACE }
| "void"
{ VOID }
| "throws"
{ THROWS }
| "enum"
{ ENUM }
| modifier
{ class_scan lexbuf }
| primitive_type
{ PRIMTYPE }
| "<"
{ LANGLE }
| ">"
{ RANGLE }
| "new"
{ NEW }
| "var"
{ VAR }
| "extends"
{ EXTENDS }
| "super"
{ SUPER }
| "implements"
{ IMPLEMENTS }
| "assert"
{ ASSERT }
| "do"
{ DO }
| "while"
{ WHILE }
| "if"
{ IF }
| "else"
{ ELSE }
| "try"
{ TRY }
| "catch"
{ CATCH }
| "finally"
{ FINALLY }
| "for"
{ FOR }
| "break"
{ BREAK }
| "continue"
{ CONTINUE }
| "return"
{ RETURN }
| "throw"
{ THROW }
| "synchronized"
{ SYNCHRONIZED }
| "yield"
{ YIELD }
| "null"
{ NULL }
| "true"
{ TRUE }
| "false"
{ FALSE }
| (floating_point_literal as f)
{ FLOATINGPOINT f }
| (integer_literal as i)
{ INTEGER i }
| (id as name)
{ IDENT name }
| "\"" ([^ '\"']* as s) "\""
{ STRING s }
| (char as s)
{ CHAR s }
| ";"
{ SEMICOLON }
| ":"
{ COLON }
| "."
{ DOT }
| "{"
{ LBRACKET }
| "}"
{ RBRACKET }
| "["
{ LSBRACKET }
| "]"
{ RSBRACKET }
| "("
{ LPAREN }
| ")"
{ RPAREN }
| ","
{ COMMA }
| "?"
{ QMARK }
| ("++"|"--")
{ INCR_DECR }
| "|"
{ PIPE }
| "="
{ EQ }
| "!"
{ BANG }
| "~"
{ TILDE }
| "..."
{ THREEDOTS }
| assignment_operator
{ ASSIGNOP}
| binop
{ BINOP }
| "@" whitespace* id ("." id)* "("
{ skip_well_parenthesized_parentheses 1
(class_scan) lexbuf }
| "@" whitespace* id ("." id)*
{ class_scan lexbuf }
| _
{ class_scan lexbuf }
| eof
{ EOF }
(* we skip type annotation arguments (...) because they may contain brackets *)
and skip_well_parenthesized_parentheses width action = parse
| eol
{ incr_linenum lexbuf;
skip_well_parenthesized_parentheses width action lexbuf }
| "("
{ skip_well_parenthesized_parentheses (width+1) action lexbuf }
| ")"
{ if width<=1 then action lexbuf
else skip_well_parenthesized_parentheses (width-1) action lexbuf }
| eol_comment
{ skip_well_parenthesized_parentheses width action lexbuf }
| "/*"
{ skip_comments
(skip_well_parenthesized_parentheses width action) lexbuf }
| "\""
{ skip_string (skip_well_parenthesized_parentheses width action) lexbuf }
| char
{ skip_well_parenthesized_parentheses width action lexbuf }
| _
{ skip_well_parenthesized_parentheses width action lexbuf }
and skip_comments action = parse
| "*/"
{ action lexbuf }
| eol
{ incr_linenum lexbuf;
skip_comments action lexbuf }
| _
{ skip_comments action lexbuf }
and skip_string action = parse
| "\\\\"
{ skip_string action lexbuf }
| "\\\""
{ skip_string action lexbuf }
| "\""
{ action lexbuf }
| _
{ skip_string action lexbuf }
{
}

@ -0,0 +1,53 @@
(*
* Copyright (c) 2009-2013, Monoidics ltd.
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! IStd
open Javalib_pack
let collect_class_location (program : JProgramDesc.t) (file : SourceFile.t) =
let path = SourceFile.to_abs_path file in
if String.is_suffix path ~suffix:".java" then (
let cin = In_channel.create path in
let filebuf = Lexing.from_channel cin in
let action_on_class_location ~classname ~col ~line =
let loc : Location.t = {line; col; file} in
let cn : JBasics.class_name = JBasics.make_cn classname in
Logging.debug Capture Verbose "set_java_location %s with location %a@." (JBasics.cn_name cn)
Location.pp_file_pos loc ;
JProgramDesc.set_java_location program cn loc
in
( try
let cl = JSourceParser.main JSourceLexer.class_scan filebuf in
JSourceAST.iter_on_declarations ~action_on_class_location cl
with
| Failure s ->
Logging.debug Capture Verbose "Error parsing source file %s\n%s"
(SourceFile.to_abs_path file) s
| JSourceParser.Error ->
Logging.debug Capture Verbose "JSourceParser error on file %s\n"
(SourceFile.to_abs_path file) ) ;
In_channel.close cin )
let debug_on_file path =
if String.is_suffix path ~suffix:".java" then (
let cin = In_channel.create path in
let filebuf = Lexing.from_channel cin in
let action_on_class_location ~classname ~col ~line =
Printf.printf "class %s at line %d, column %d\n" classname line col
in
( try
let cl = JSourceParser.main JSourceLexer.class_scan filebuf in
JSourceAST.iter_on_declarations ~action_on_class_location cl
with JSourceParser.Error ->
let pos = filebuf.Lexing.lex_curr_p in
let buf_length = Lexing.lexeme_end filebuf - Lexing.lexeme_start filebuf in
let line = pos.Lexing.pos_lnum in
let col = pos.Lexing.pos_cnum - pos.Lexing.pos_bol - buf_length in
Printf.eprintf "Java source syntax error at line %d, column %d.\n%!" line col ) ;
In_channel.close cin )

@ -0,0 +1,13 @@
(*
* Copyright (c) 2009-2013, Monoidics ltd.
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! IStd
val collect_class_location : JProgramDesc.t -> SourceFile.t -> unit
val debug_on_file : string -> unit

@ -0,0 +1,773 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
%{
open JSourceAST
let location_of_pos pos =
let line = pos.Lexing.pos_lnum in
let col = pos.Lexing.pos_cnum - pos.Lexing.pos_bol in
{ line; col }
%}
%token PACKAGE
%token IMPORT
%token CLASS
%token INTERFACE
%token IMPLEMENTS
%token EXTENDS
%token SUPER
%token ENUM
%token NEW
%token INSTANCEOF
%token VOID
%token THROWS
%token VAR
%token ASSERT
%token DO
%token WHILE
%token IF
%token ELSE
%token TRY
%token CATCH
%token FINALLY
%token FOR
%token BREAK
%token CONTINUE
%token RETURN
%token THROW
%token SYNCHRONIZED
%token YIELD
%token THIS
%token NULL
%token TRUE
%token FALSE
//non Java terminals
%token PRIMTYPE
%token LBRACKET
%token RBRACKET
%token LSBRACKET
%token RSBRACKET
%token LPAREN
%token RPAREN
%token LANGLE
%token RANGLE
%token EQ
%token ASSIGNOP
%token SEMICOLON
%token COLON
%token COMMA
%token DOT
%token QMARK
%token BANG
%token TILDE
%token PIPE
%token INCR_DECR
%token THREEDOTS
%token EOF
%token <string> IDENT
%token <string> INTEGER
%token <string> FLOATINGPOINT
%token <string> STRING
%token <string> CHAR
%token BINOP
%left BINOP LANGLE RANGLE
%nonassoc INSTANCEOF
%start <JSourceAST.file_content> main
%%
main:
| package_declaration? import_declaration* app_list(type_declaration) EOF
{ { package = $1; classes = $3 } }
import_declaration:
| IMPORT import_name SEMICOLON
{}
import_name:
| IDENT import_name_end
{}
import_name_end:
|
| DOT BINOP
| DOT IDENT import_name_end
{}
type_arguments:
| LANGLE type_argument_list RANGLE
{}
type_argument_list:
| separated_nonempty_list(COMMA, type_argument)
{}
type_argument:
| unann_reference_type // If we decide to parse annotations, it will
// require change here
| QMARK wildcard_bounds?
{}
wildcard_bounds:
| EXTENDS unann_reference_type
| SUPER unann_reference_type
{}
package_declaration:
| PACKAGE unann_class_or_interface_type SEMICOLON
{ $2 }
type_declaration:
| class_declaration
| interface_declaration
{ [$1] }
| SEMICOLON
{ [] }
class_declaration:
| normal_class_declaration
| enum_declaration
{ $1 }
normal_class_declaration:
| CLASS id=identifier superclass? superinterfaces? inner=class_body
{ {
location = location_of_pos $startpos(id);
kind = Class id;
inner_elements = inner;
}
}
enum_declaration:
| ENUM id=identifier superinterfaces? inner=enum_body
{ {
location = location_of_pos $startpos(id);
kind = Enum id;
inner_elements = inner;
}
}
superclass:
| EXTENDS class_type
{}
superinterfaces:
| IMPLEMENTS separated_nonempty_list(COMMA, class_type)
{}
enum_body:
| LBRACKET enum_constant_list RBRACKET // TODO add optional comma
{ [] }
| LBRACKET enum_constant_list inner=enum_body_declarations RBRACKET
{ inner }
enum_constant_list:
| separated_nonempty_list(COMMA, enum_constant)
{}
enum_constant:
| identifier class_body?
| identifier LPAREN argument_list? RPAREN class_body?
{}
enum_body_declarations:
| SEMICOLON app_list(class_body_declaration)
{ $2 }
class_body:
| LBRACKET app_list(class_body_declaration) RBRACKET
{ $2 }
class_body_declaration:
| class_member_declaration
{ $1 }
| constructor_declaration
{ $1 }
constructor_declaration:
| constructor_declarator throws? inner=constructor_body
{ inner }
constructor_declarator:
| identifier LPAREN formal_parameter_list RPAREN // TODO add receive_parameter
{}
constructor_body:
| LBRACKET inner=loption(block_statements) RBRACKET
{ inner }
argument_list:
| app_separated_non_empty_list(COMMA, expression)
{ $1 }
class_member_declaration:
| class_declaration
| interface_declaration
{ [$1] }
| field_declaration
| method_declaration
{ $1 }
| SEMICOLON
{ [] }
method_declaration:
| method_header method_body
{ $2 }
interface_declaration:
| normal_interface_declaration
{ $1 }
normal_interface_declaration:
| INTERFACE id=identifier inner=interface_body
{ {
location = location_of_pos $startpos(id);
kind = Interface id;
inner_elements = inner;
}
}
interface_body:
| LBRACKET app_list(interface_member_declaration) RBRACKET
{ $2 }
interface_member_declaration:
| constant_declaration
| interface_method_declaration
{ [] }
| class_declaration
| interface_declaration
{ [$1] }
interface_method_declaration:
| method_header method_body
{}
method_header:
| result method_declarator throws?
{}
%inline
result:
| unann_type
| VOID
{}
method_declarator:
| identifier LPAREN formal_parameter_list RPAREN dims? //TODO add receiver_parameter
{}
formal_parameter_list:
| separated_list(COMMA, formal_parameter)
{}
formal_parameter:
| unann_type variable_declarator_id
| variable_arity_parameter
{}
variable_arity_parameter:
| unann_type THREEDOTS identifier
{}
method_body:
| block
{ $1 }
| SEMICOLON
{ [] }
block:
| LBRACKET loption(block_statements) RBRACKET
{ $2 }
block_statements:
| app_non_empty_list(block_statement)
{ $1 }
block_statement:
| class_declaration
{ [$1] }
| local_variable_declaration_statement
| statement
{ $1 }
local_variable_declaration_statement:
| local_variable_declaration SEMICOLON
{ $1 }
local_variable_declaration:
| local_variable_type variable_declarator_list
{ $2 }
local_variable_type:
| unann_type
| VAR
{}
statement:
| statement_without_trailing_substatement
// TODO: add labeled_statement
| if_then_statement
| if_then_else_statement
| while_statement
| for_statement
{ $1 }
for_statement:
| basic_for_statement
| enhanced_for_statement
{ $1 }
for_statement_no_short_if:
| basic_for_statement_no_short_if
| enhanced_for_statement_no_short_if
{ $1 }
basic_for_statement:
| FOR LPAREN loption(for_init) SEMICOLON loption(expression) SEMICOLON loption(for_update) RPAREN statement
{ $3 @ $5 @ $7 @ $9 }
enhanced_for_statement:
| FOR LPAREN local_variable_type variable_declarator_id COLON expression RPAREN statement
{ $6 @ $8 }
enhanced_for_statement_no_short_if:
| FOR LPAREN local_variable_type variable_declarator_id COLON expression RPAREN statement_no_short_if
{ $6 @ $8 }
basic_for_statement_no_short_if:
| FOR LPAREN loption(for_init) SEMICOLON loption(expression) SEMICOLON loption(for_update) RPAREN statement_no_short_if
{ $3 @ $5 @ $7 @ $9 }
for_init:
| statement_expression_list
| local_variable_declaration
{ $1 }
for_update:
| statement_expression_list
{ $1 }
statement_expression_list:
| app_separated_non_empty_list(COMMA, statement_expression)
{ $1 }
if_then_statement:
| IF LPAREN expression RPAREN statement
{ $3 @ $5 }
if_then_else_statement:
| IF LPAREN expression RPAREN statement_no_short_if ELSE statement
{ $3 @ $5 @ $7 }
if_then_else_statement_no_short_if:
| IF LPAREN expression RPAREN statement_no_short_if ELSE statement_no_short_if
{ $3 @ $5 @ $7 }
statement_no_short_if:
| statement_without_trailing_substatement
| if_then_else_statement_no_short_if
| while_statement_no_short_if
| for_statement_no_short_if
{ $1 }
while_statement:
| WHILE LPAREN expression RPAREN statement
{ $3 @ $5 }
while_statement_no_short_if:
| WHILE LPAREN expression RPAREN statement_no_short_if
{ $3 @ $5 }
statement_without_trailing_substatement:
| block
| empty_statement
| expression_statement
| assert_statement
| do_statement
| break_statement
| continue_statement
| return_statement
| synchronized_statement
| throw_statement
| try_statement
| yield_statement
{ $1 }
empty_statement:
| SEMICOLON
{ [] }
expression_statement:
| statement_expression SEMICOLON
{ $1 }
statement_expression:
| INCR_DECR unary_expression
{ $2 }
| assignment
| postfix_expression INCR_DECR
| method_invocation
| class_instance_creation_expression
{ $1 }
assert_statement:
| ASSERT expression SEMICOLON
{ $2 }
| ASSERT expression COLON expression SEMICOLON
{ $2 @ $4 }
do_statement:
| DO statement WHILE LPAREN expression RPAREN SEMICOLON
{ $2 @ $5 }
break_statement:
| BREAK identifier? SEMICOLON
{ [] }
continue_statement:
| CONTINUE identifier? SEMICOLON
{ [] }
return_statement:
| RETURN loption(expression) SEMICOLON
{ $2 }
synchronized_statement:
| SYNCHRONIZED LPAREN expression RPAREN block
{ $3 }
try_statement:
| TRY block catches
{ $2 @ $3 }
| TRY block loption(catches) finally
{ $2 @ $3 @ $4 }
catches:
| app_non_empty_list(catch_clause)
{ $1 }
catch_clause:
| CATCH LPAREN catch_formal_parameter RPAREN block
{ $5 }
catch_formal_parameter:
| catch_type variable_declarator_id
{}
catch_type:
| separated_nonempty_list(PIPE,unann_class_or_interface_type)
{}
finally:
| FINALLY block
{ $2 }
yield_statement:
| YIELD expression SEMICOLON
{ $2 }
throw_statement:
| THROW expression SEMICOLON
{ $2 }
throws:
| THROWS exception_type_list
{ $2 }
exception_type_list:
| separated_nonempty_list(COMMA, exception_type)
{ [] }
exception_type:
| unann_class_or_interface_type // WE DROP ANNOTS
{ [] }
constant_declaration:
| unann_type variable_declarator_list SEMICOLON
{ $2 }
field_declaration:
| unann_type variable_declarator_list SEMICOLON
{ $2 }
variable_declarator_list:
| app_separated_non_empty_list(COMMA,variable_declarator)
{ $1 }
variable_declarator:
| variable_declarator_id
{ [] }
| variable_declarator_id EQ variable_initializer
{ $3 }
variable_declarator_id:
| identifier dims?
{}
variable_initializer:
| expression
| array_initializer
{ $1 }
array_initializer:
| LBRACKET RBRACKET
| LBRACKET COMMA RBRACKET
{ [] }
| LBRACKET variable_initializer array_initializer_end
{ $2 @ $3 }
array_initializer_end:
| RBRACKET
| COMMA RBRACKET
{ [] }
| COMMA variable_initializer array_initializer_end
{ $2 @ $3 }
unann_type:
| PRIMTYPE
| unann_reference_type
{}
unann_reference_type:
| unann_class_or_interface_type
| unann_array_type
{}
dotted_name:
| identifier
{ $1 }
| identifier DOT dotted_name
{ $1 ^ "." ^ $3 }
// | package_opt? separated_nonempty_list(DOT, IDENT)
// don't know how to write Java programs like that
%inline
unann_class_or_interface_type:
| dotted_name { $1 }
%inline
class_type:
| dotted_name { $1 }
%inline
class_or_interface_type_to_instantiate:
| dotted_name { $1 }
%inline
expression_name:
| dotted_name { $1 }
unann_array_type:
| PRIMTYPE dims
| unann_class_or_interface_type dims
{}
%inline dim:
| LSBRACKET RSBRACKET
{}
dims:
| dim+
{}
expression:
| assignment_expression
{ $1 }
assignment_expression:
| conditional_expression
| assignment
{ $1 }
assignment:
| left_hand_side assignment_operator expression
{ $1 @ $3 }
assignment_operator:
| EQ
| ASSIGNOP
{}
left_hand_side:
| expression_name
{ [] }
| field_access
| array_access
{ $1 }
field_access:
| primary DOT identifier
{ $1 }
array_access:
| expression_name LSBRACKET expression RSBRACKET
{ $3 }
| primary_no_new_array LSBRACKET expression RSBRACKET
{ $1 @ $3 }
primary:
| primary_no_new_array
| array_creation_expression
{ $1 }
array_creation_expression:
| NEW PRIMTYPE dim_exprs
| NEW class_type dim_exprs
{ $3 }
| NEW PRIMTYPE dims array_initializer
| NEW class_type dims array_initializer
{ $4 }
dim_exprs:
| app_non_empty_list(dim_expr)
{ $1 }
dim_expr:
| LSBRACKET expression RSBRACKET
{ $2 }
primary_no_new_array:
| LPAREN expression RPAREN
{ $2 }
| literal
| class_literal
| THIS
{ [] }
| class_instance_creation_expression
| field_access
| array_access
| method_invocation
{ $1 }
method_invocation:
| expression_name LPAREN loption(argument_list) RPAREN
{ $3 }
| primary DOT identifier LPAREN loption(argument_list) RPAREN
{ $1 @ $5 }
literal:
| INTEGER
| FLOATINGPOINT
| boolean
| CHAR
| STRING
| NULL
{}
boolean:
| TRUE
| FALSE
{}
class_literal:
| type_name DOT CLASS
{}
%inline
identifier:
| id=IDENT { id }
%inline
type_name:
| IDENT
{}
class_instance_creation_expression:
| unqualified_class_instance_creation_expression
{ $1 }
| identifier DOT unqualified_class_instance_creation_expression
{ $3 }
| primary DOT unqualified_class_instance_creation_expression
{ $1 @ $3 }
%inline
unqualified_class_instance_creation_expression:
| NEW class_or_interface_type_to_instantiate LPAREN loption(argument_list) RPAREN
{ $4 }
| NEW class_or_interface_type_to_instantiate LPAREN args=loption(argument_list) RPAREN inner=class_body
{ args @
[{
location = location_of_pos $startpos(inner);
kind = AnonymousClass;
inner_elements = inner;
}]
}
conditional_expression:
| conditional_or_expression
{ $1 }
| conditional_or_expression QMARK expression COLON conditional_expression
{ $1 @ $3 @ $5 }
// we simpify official spec and merge many rules here
conditional_or_expression:
| conditional_or_expression binop conditional_or_expression
{ $1 @ $3 }
| conditional_or_expression INSTANCEOF unann_reference_type // WE DROP ANNOTS
| unary_expression
{ $1 }
%inline
binop:
| BINOP | RANGLE | LANGLE
{}
unary_expression:
| INCR_DECR unary_expression
| BINOP unary_expression
{ $2 }
| unary_expression_not_plus_minus
{ $1 }
unary_expression_not_plus_minus:
| postfix_expression
| cast_expression
{ $1 }
| BANG unary_expression
| TILDE unary_expression
{ $2 }
%inline
cast_expression:
| LPAREN PRIMTYPE RPAREN unary_expression
{ $4 }
postfix_expression:
| primary
{ $1 }
| expression_name
{ [] }
| postfix_expression INCR_DECR
{ $1 }
//speciazed version of Menhir macros using concatenation
app_list(X):
{ [] }
| x = X; xs = app_list(X)
{ x@xs }
app_non_empty_list(X):
| x = X; xs = app_list(X)
{ x@xs }
app_separated_list(SEP,X):
xs = loption(app_separated_non_empty_list(SEP,X))
{ xs }
app_separated_non_empty_list(SEP,X):
x = X
{ x }
| x = X; SEP; xs = app_separated_non_empty_list(SEP,X)
{ x@xs }

@ -6,23 +6,28 @@
TESTS_DIR = ../..
INFER_OPTIONS = --java-debug-source-file-info
INFER_OPTIONS_EXPERIMENTAL = --java-source-parser-experimental --java-debug-source-file-info
SOURCES = Main.java
test: parser.output.test
test: parser.output.test experimental_parser.output.test
$(call check_no_diff,parser.output,parser.output.test)
$(call check_no_diff,parser.output,experimental_parser.output.test)
replace: parser.output.test
cp $< parser.output
clean:
rm -fr infer-out parser.output.test *.class
$(REMOVE_DIR) infer-out* *.test *.class
# we check if the java source file is valid for javac
compile:
javac *.java
.PHONY: parser.output.test
.PHONY: parser.output.test experimental_parser.output.test
parser.output.test: $(SOURCES) $(INFER_BIN)
$(INFER_BIN) $(INFER_OPTIONS) $(SOURCES) > parser.output.test
$(INFER_BIN) $(INFER_OPTIONS) $(SOURCES) > $@
experimental_parser.output.test: $(SOURCES) $(INFER_BIN)
$(INFER_BIN) -o infer-out-experimental $(INFER_OPTIONS_EXPERIMENTAL) $(SOURCES) > $@
include $(TESTS_DIR)/base.make

Loading…
Cancel
Save