diff --git a/infer/src/dune.in b/infer/src/dune.in index 76bad75c3..e60af4a80 100644 --- a/infer/src/dune.in +++ b/infer/src/dune.in @@ -129,6 +129,8 @@ let topl_stanzas = ["(ocamllex ToplLexer)"; "(menhir (flags --unused-token INDENT --explain) (modules ToplParser))"] +let java_sources_lexer = if java then ["(ocamllex jSourceFileInfo)"] else [] + let flatten_sources_stanzas = List.map (fun source_dir -> @@ -143,7 +145,7 @@ let flatten_sources_stanzas = (** The build stanzas to be passed to dune *) let stanzas = (env_stanza :: main_lib_stanza :: infer_exe_stanza :: infertop_stanza :: clang_lexer_stanzas) - @ topl_stanzas @ flatten_sources_stanzas + @ java_sources_lexer @ topl_stanzas @ flatten_sources_stanzas ;; diff --git a/infer/src/java/jFrontend.ml b/infer/src/java/jFrontend.ml index 8553f2246..894859f34 100644 --- a/infer/src/java/jFrontend.ml +++ b/infer/src/java/jFrontend.ml @@ -132,12 +132,31 @@ let cache_classname cn = let is_classname_cached cn = Sys.file_exists (path_of_cached_classname cn) = `Yes +let test_source_file_location source_file program cn node = + let is_synthetic = function + | Javalib.JInterface _ -> + false + | Javalib.JClass jc -> + jc.Javalib.c_synthetic + in + if not (is_synthetic node) then + match JClasspath.get_java_location program cn with + | None -> + L.(debug Capture Verbose) + "WARNING SOURCE FILE PARSER: location not found for class %s in source file %s \n" + (JBasics.cn_name cn) + (SourceFile.to_abs_path source_file) + | Some _ -> + () + + (* Given a source file and a class, translates the code of this class. In init - mode, finds out whether this class contains initializers at all, in this case translates it. In standard mode, all methods are translated *) let create_icfg source_file program tenv icfg cn node = L.(debug Capture Verbose) "\tclassname: %s@." (JBasics.cn_name cn) ; if Config.dependency_mode && not (is_classname_cached cn) then cache_classname cn ; + test_source_file_location source_file program cn node ; let translate m = let proc_name = JTransType.translate_method_name program tenv m in JClasspath.set_callee_translated program proc_name ; @@ -194,19 +213,8 @@ let compute_source_icfg program tenv source_basename package_opt source_file = let select test procedure cn node = if test node then try procedure cn node with Bir.Subroutine -> () in - let set_java_location cn _node = - let cn_name = JBasics.cn_name cn in - let loc = JSourceFileInfo.class_name_location source_file cn_name in - L.debug Capture Verbose "set_java_location %s with location %a@." cn_name Location.pp_file_pos - loc ; - JClasspath.set_java_location program cn loc - in (* we must set the java location for all classes in the source file before translation *) - let () = - JBasics.ClassMap.iter - (select (should_capture program package_opt source_basename) set_java_location) - (JClasspath.get_classmap program) - in + JSourceFileInfo.collect_class_location program source_file ; let () = JBasics.ClassMap.iter (select diff --git a/infer/src/java/jSourceFileInfo.ml b/infer/src/java/jSourceFileInfo.ml deleted file mode 100644 index 56a5a1c35..000000000 --- a/infer/src/java/jSourceFileInfo.ml +++ /dev/null @@ -1,13 +0,0 @@ -(* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - *) - -open! IStd - -(** this is a naive/temporary implementation in a near diff, we will 1) parse the source file to - collect location datas for all class names 2) cache the result for later uses but we may have to - adapt a litle some signatures *) -let class_name_location file _cn : Location.t = {line= 0; col= 0; file} diff --git a/infer/src/java/jSourceFileInfo.mli b/infer/src/java/jSourceFileInfo.mli deleted file mode 100644 index b05a009ae..000000000 --- a/infer/src/java/jSourceFileInfo.mli +++ /dev/null @@ -1,12 +0,0 @@ -(* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - *) - -open! IStd - -val class_name_location : SourceFile.t -> string -> Location.t -(** [class_name_location source class_name] searches in file [source] the declaration location for - class name [class_name] *) diff --git a/infer/src/java/jSourceFileInfo.mll b/infer/src/java/jSourceFileInfo.mll new file mode 100644 index 000000000..16f1146e6 --- /dev/null +++ b/infer/src/java/jSourceFileInfo.mll @@ -0,0 +1,401 @@ +(* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) + +{ + open! IStd + + open Lexing + + module Array = struct + include Array + let make len = create ~len + end + + (** classic Ocamllex function to update current lexbuf line at each end of + line *) + let incr_linenum lexbuf = + let pos = lexbuf.Lexing.lex_curr_p in + lexbuf.Lexing.lex_curr_p <- { pos with + Lexing.pos_lnum = pos.Lexing.pos_lnum + 1; + Lexing.pos_bol = pos.Lexing.pos_cnum; + } + + (** position of the char just after lexbuf *) + let end_pos lexbuf = + lexbuf.lex_curr_p.pos_lnum, + lexbuf.lex_curr_p.pos_cnum - lexbuf.lex_curr_p.pos_bol + + (** return the exact position start of the suffix [classname] in [lexbuf] *) + let location_suffix suffix lexbuf = + let length_suffix = String.length suffix in + let l, c = end_pos lexbuf in + l, c - length_suffix + + (** return the position start of [lexbuf] *) + let location_start lexbuf = + let l, c = end_pos lexbuf in + let lexbuf_length = lexeme_end lexbuf - lexeme_start lexbuf in + l, c - lexbuf_length + + (** We traverse the structure of the source file by recording the + encompassing blocks in a stack of frame. + We use the stack to recover the full inner class name at bytecode level *) + type expr = | AllocExpr | OtherExpr + type frame = + { short_class_name: string + ; is_enum : bool + ; next_anonymous_class: int + ; opened_blocks: int + ; exprs: expr list } + type state = + { stack: frame list + ; record_location: classname:string -> col:int -> line:int -> unit } + + let push frame state = { state with stack = frame :: state.stack; } + + exception Missing_opening_bracket + + exception Missing_opening_parenthesis + + let add_package package state = + let record_location ~classname = + let classname = package^"."^classname in + state.record_location ~classname in + { state with record_location; } + + let pop_class state = + match state.stack with + | [] -> raise Missing_opening_bracket + | _ :: stack -> { state with stack; } + + let incr_next_anonymous state = + match state.stack with + | [] -> { state with stack = []; } + | fr :: stack -> + let stack = + {fr with next_anonymous_class = fr.next_anonymous_class+1; } :: stack in + { state with stack; } + + let add_expr e (state:state) : state = + match state.stack with + | [] -> state + | fr :: stack -> + let stack = {fr with exprs = e :: fr.exprs; } :: stack in + { state with stack; } + + let pop_exprs state = + match state.stack with + | [] -> raise Missing_opening_parenthesis + | fr :: stack -> ( + match fr.exprs with + | [] -> raise Missing_opening_parenthesis + | e :: exprs -> + let stack = {fr with exprs; } :: stack in + (e, { state with stack; })) + + let in_field_declaration_area state = + match state.stack with + | [] -> false + | fr :: _ -> Int.equal fr.opened_blocks 0 + + let get_opened_blocks state = + match state.stack with + | [] -> raise Missing_opening_bracket + | fr :: _ -> fr.opened_blocks + + let is_enum state = + match state.stack with + | [] -> false + | fr :: _ -> fr.is_enum && Int.equal fr.opened_blocks 0 + + let get_next_anonymous_class state = + match state.stack with + | [] -> raise Missing_opening_bracket + | fr :: _ -> string_of_int fr.next_anonymous_class + + let decr_opened_blocks state = + let stack = + match state.stack with + | [] -> [] + | fr :: stack -> + {fr with opened_blocks = fr.opened_blocks-1; } :: stack in + { state with stack; } + + let incr_opened_blocks state = + let stack = + match state.stack with + | [] -> [] + | fr :: stack -> + {fr with opened_blocks = fr.opened_blocks+1; } :: stack in + { state with stack; } + + let long_class_name name state = + let f name frame = Printf.sprintf "%s$%s" frame.short_class_name name in + List.fold ~f ~init:name state.stack + +} + +let whitespace = [' ' '\t'] +let eol = whitespace*("\r")?"\n" (* end of line *) +let eol_comment = "//" [^'\n']* +let id = ['a'-'z' 'A'-'Z' '_' '$'] ['a'-'z' 'A'-'Z' '0'-'9' '_' '$']* +let char = "'\\''" | "'\"'" | "'" [ ^'\'' ]+ "'" +let class_keyword = "class"|"interface"|"enum"|"@" whitespace+ "interface" + +(* We follow an abstraction of the official grammar described here: + https://docs.oracle.com/javase/specs/jls/se14/html/jls-19.html *) +rule class_scan state = parse + | whitespace+ + { class_scan state lexbuf } + | eol_comment + { class_scan state lexbuf } + | "/*" + { skip_comments (class_scan state) lexbuf } + | eol + { incr_linenum lexbuf; + class_scan state lexbuf } + | "package" whitespace+ (id ("." id)* as package) whitespace* ";" + { class_scan (add_package package state) lexbuf } + | id + { class_scan state lexbuf } + | class_keyword whitespace+ (id as name) + { + let line, col = location_suffix name lexbuf in + let classname = long_class_name name state in + state.record_location ~classname ~col ~line ; + let frame : frame = + { short_class_name = name; + is_enum = false; + next_anonymous_class = 1; + exprs = []; + opened_blocks = 0 } in + (* we jump to the next left bracket, skipping annotations and + generics <...> contents *) + do_at_next_left_bracket + (fun lexbuf -> + class_scan (push frame state) lexbuf) lexbuf + } + | "new" whitespace+ + { (* may be a declaration of an anonymous class + ```new [TypeArguments] ClassOrInterfaceTypeToInstantiate + ( [ArgumentList] ) [ClassBody] ``` + ^ + so we jump this position | *) + search_anonymous_class_body state lexbuf + } + | id + { if is_enum state + then found_entrance_of_anonymous_class state lexbuf + else class_scan state lexbuf } + | (id as _field) whitespace* ";" + { if in_field_declaration_area state then + (* we only reach this situation in class/interface bodies, and + never inside method bodies *) + () ; (* TODO : record field location *) + class_scan state lexbuf + } + | "\"" + { skip_string (class_scan state) lexbuf } + | char + { class_scan state lexbuf } + | "{" + { class_scan (incr_opened_blocks state) lexbuf } + | "(" + { class_scan (add_expr OtherExpr state) lexbuf } + | ")" + { match pop_exprs state with + | AllocExpr, state -> found_entrance_of_anonymous_class state lexbuf + | OtherExpr, state -> class_scan state lexbuf } + | "@" whitespace* id ("." id)* "(" + { skip_well_parenthesized_parentheses 1 + (class_scan state) lexbuf } + | "}" + { + if Int.equal (get_opened_blocks state) 0 + then class_scan (pop_class state) lexbuf + else class_scan (decr_opened_blocks state) lexbuf + } + | _ + { class_scan state lexbuf } + | eof + { () } + +(* we search for the next left bracket *) +and do_at_next_left_bracket action = parse + | eol + { incr_linenum lexbuf; + do_at_next_left_bracket action lexbuf } + | eol_comment + { do_at_next_left_bracket action lexbuf } + | "/*" + { skip_comments (do_at_next_left_bracket action) lexbuf } + | "{" + { action lexbuf } + | "<" + { skip_well_parenthesized_angles 1 + (do_at_next_left_bracket action) lexbuf } + | "@" whitespace* id "(" + { skip_well_parenthesized_parentheses 1 + (do_at_next_left_bracket action) lexbuf } + | "\"" + { skip_string (do_at_next_left_bracket action) lexbuf } + | _ + { do_at_next_left_bracket action lexbuf } + +(* we search for (...) parentheses *) +and search_anonymous_class_body state = parse + | eol + { incr_linenum lexbuf; + search_anonymous_class_body state lexbuf } + | eol_comment + { search_anonymous_class_body state lexbuf } + | "/*" + { skip_comments + (search_anonymous_class_body state) lexbuf } + | "(" + { class_scan (add_expr AllocExpr state) lexbuf } + | "<" + { skip_well_parenthesized_angles 1 + (search_anonymous_class_body state) lexbuf } + | "@" whitespace* id "(" + { skip_well_parenthesized_parentheses 1 + (search_anonymous_class_body state) lexbuf } + | "\"" + { skip_string + (search_anonymous_class_body state) lexbuf } + | "[" + { (* this is an array allocation, not an anonymous class *) + class_scan state lexbuf + } + | _ + { search_anonymous_class_body state lexbuf } + +(* we test if there is an opening anonymous class body here *) +and found_entrance_of_anonymous_class state = parse + | eol + { incr_linenum lexbuf; + found_entrance_of_anonymous_class state lexbuf } + | eol_comment + { found_entrance_of_anonymous_class state lexbuf } + | "/*" + { skip_comments + (found_entrance_of_anonymous_class state) lexbuf } + | whitespace+ + { found_entrance_of_anonymous_class state lexbuf } + | "{" + { (* this is an anonymous class *) + let line, col = location_start lexbuf in + let name = get_next_anonymous_class state in + let classname = long_class_name name state in + state.record_location ~classname ~col ~line ; + let frame : frame = + { short_class_name = name; + is_enum = false; + next_anonymous_class = 1; + exprs = []; + opened_blocks = 0 } in + class_scan (push frame (incr_next_anonymous state)) lexbuf + } + | _ + { (* this is not an anonymous class *) + class_scan state lexbuf + } + +(* we skip type arguments <...> because they may contain brackets *) +and skip_well_parenthesized_angles width action = parse + | eol + { incr_linenum lexbuf; + skip_well_parenthesized_angles width action lexbuf } + | "<" + { skip_well_parenthesized_angles (width+1) action lexbuf } + | ">" + { if width <= 1 then action lexbuf + else skip_well_parenthesized_angles (width-1) action lexbuf } + | eol_comment + { skip_well_parenthesized_angles width action lexbuf } + | "/*" + { skip_comments + (skip_well_parenthesized_angles width action) lexbuf } + | "\"" + { skip_string (skip_well_parenthesized_angles width action) lexbuf } + | _ + { skip_well_parenthesized_angles width action lexbuf } + +(* we skip type annotation arguments (...) because they may contain brackets *) +and skip_well_parenthesized_parentheses width action = parse + | eol + { incr_linenum lexbuf; + skip_well_parenthesized_parentheses width action lexbuf } + | "(" + { skip_well_parenthesized_parentheses (width+1) action lexbuf } + | ")" + { if width <= 1 then action lexbuf + else skip_well_parenthesized_parentheses (width-1) action lexbuf } + | eol_comment + { skip_well_parenthesized_parentheses width action lexbuf } + | "/*" + { skip_comments + (skip_well_parenthesized_parentheses width action) lexbuf } + | "\"" + { skip_string (skip_well_parenthesized_parentheses width action) lexbuf } + | char + { skip_well_parenthesized_parentheses width action lexbuf } + | _ + { skip_well_parenthesized_parentheses width action lexbuf } + +and skip_string action = parse + | "\\\\" + { skip_string action lexbuf } + | "\\\"" + { skip_string action lexbuf } + | "\"" + { action lexbuf } + | _ + { skip_string action lexbuf } + +and skip_comments action = parse + | "*/" + { action lexbuf } + | eol + { incr_linenum lexbuf; + skip_comments action lexbuf } + | _ + { skip_comments action lexbuf } + +{ + + open Javalib_pack + + (** We scan source file [file] and record location of each class declaration *) + let collect_class_location (program:JClasspath.program) (file:SourceFile.t) = + let cin = In_channel.create (SourceFile.to_abs_path file) in + let stack = [] in + let record_location ~classname ~col ~line = + let loc : Location.t = { line; col; file } in + let cn : JBasics.class_name = JBasics.make_cn classname in + Logging.debug Capture Verbose "set_java_location %s with location %a@." + (JBasics.cn_name cn) Location.pp_file_pos loc; + JClasspath.set_java_location program cn loc in + try ( + class_scan { record_location; stack; } (from_channel cin) ; + In_channel.close cin ) + with + | Failure s -> + raise + (Failure + (Printf.sprintf "Error parsing source file %s\n%s" (SourceFile.to_abs_path file) s)) + | Missing_opening_bracket -> + raise + (Failure (Printf.sprintf "Missing opening bracket error while parsing source file %s\n" + (SourceFile.to_abs_path file))) + | Missing_opening_parenthesis -> + raise + (Failure + (Printf.sprintf "Missing opening parenthesis error while parsing source file %s\n" + (SourceFile.to_abs_path file))) + +}