Java source baby parser for class declaration locations

Reviewed By: mityal

Differential Revision: D20919670

fbshipit-source-id: c7ceb517c
master
David Pichardie 5 years ago committed by Facebook GitHub Bot
parent db965085b9
commit b22f7c83d5

@ -129,6 +129,8 @@ let topl_stanzas =
["(ocamllex ToplLexer)"; "(menhir (flags --unused-token INDENT --explain) (modules ToplParser))"] ["(ocamllex ToplLexer)"; "(menhir (flags --unused-token INDENT --explain) (modules ToplParser))"]
let java_sources_lexer = if java then ["(ocamllex jSourceFileInfo)"] else []
let flatten_sources_stanzas = let flatten_sources_stanzas =
List.map List.map
(fun source_dir -> (fun source_dir ->
@ -143,7 +145,7 @@ let flatten_sources_stanzas =
(** The build stanzas to be passed to dune *) (** The build stanzas to be passed to dune *)
let stanzas = let stanzas =
(env_stanza :: main_lib_stanza :: infer_exe_stanza :: infertop_stanza :: clang_lexer_stanzas) (env_stanza :: main_lib_stanza :: infer_exe_stanza :: infertop_stanza :: clang_lexer_stanzas)
@ topl_stanzas @ flatten_sources_stanzas @ java_sources_lexer @ topl_stanzas @ flatten_sources_stanzas
;; ;;

@ -132,12 +132,31 @@ let cache_classname cn =
let is_classname_cached cn = Sys.file_exists (path_of_cached_classname cn) = `Yes let is_classname_cached cn = Sys.file_exists (path_of_cached_classname cn) = `Yes
let test_source_file_location source_file program cn node =
let is_synthetic = function
| Javalib.JInterface _ ->
false
| Javalib.JClass jc ->
jc.Javalib.c_synthetic
in
if not (is_synthetic node) then
match JClasspath.get_java_location program cn with
| None ->
L.(debug Capture Verbose)
"WARNING SOURCE FILE PARSER: location not found for class %s in source file %s \n"
(JBasics.cn_name cn)
(SourceFile.to_abs_path source_file)
| Some _ ->
()
(* Given a source file and a class, translates the code of this class. (* Given a source file and a class, translates the code of this class.
In init - mode, finds out whether this class contains initializers at all, In init - mode, finds out whether this class contains initializers at all,
in this case translates it. In standard mode, all methods are translated *) in this case translates it. In standard mode, all methods are translated *)
let create_icfg source_file program tenv icfg cn node = let create_icfg source_file program tenv icfg cn node =
L.(debug Capture Verbose) "\tclassname: %s@." (JBasics.cn_name cn) ; L.(debug Capture Verbose) "\tclassname: %s@." (JBasics.cn_name cn) ;
if Config.dependency_mode && not (is_classname_cached cn) then cache_classname cn ; if Config.dependency_mode && not (is_classname_cached cn) then cache_classname cn ;
test_source_file_location source_file program cn node ;
let translate m = let translate m =
let proc_name = JTransType.translate_method_name program tenv m in let proc_name = JTransType.translate_method_name program tenv m in
JClasspath.set_callee_translated program proc_name ; JClasspath.set_callee_translated program proc_name ;
@ -194,19 +213,8 @@ let compute_source_icfg program tenv source_basename package_opt source_file =
let select test procedure cn node = let select test procedure cn node =
if test node then try procedure cn node with Bir.Subroutine -> () if test node then try procedure cn node with Bir.Subroutine -> ()
in in
let set_java_location cn _node =
let cn_name = JBasics.cn_name cn in
let loc = JSourceFileInfo.class_name_location source_file cn_name in
L.debug Capture Verbose "set_java_location %s with location %a@." cn_name Location.pp_file_pos
loc ;
JClasspath.set_java_location program cn loc
in
(* we must set the java location for all classes in the source file before translation *) (* we must set the java location for all classes in the source file before translation *)
let () = JSourceFileInfo.collect_class_location program source_file ;
JBasics.ClassMap.iter
(select (should_capture program package_opt source_basename) set_java_location)
(JClasspath.get_classmap program)
in
let () = let () =
JBasics.ClassMap.iter JBasics.ClassMap.iter
(select (select

@ -1,13 +0,0 @@
(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! IStd
(** this is a naive/temporary implementation in a near diff, we will 1) parse the source file to
collect location datas for all class names 2) cache the result for later uses but we may have to
adapt a litle some signatures *)
let class_name_location file _cn : Location.t = {line= 0; col= 0; file}

@ -1,12 +0,0 @@
(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! IStd
val class_name_location : SourceFile.t -> string -> Location.t
(** [class_name_location source class_name] searches in file [source] the declaration location for
class name [class_name] *)

@ -0,0 +1,401 @@
(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
{
open! IStd
open Lexing
module Array = struct
include Array
let make len = create ~len
end
(** classic Ocamllex function to update current lexbuf line at each end of
line *)
let incr_linenum lexbuf =
let pos = lexbuf.Lexing.lex_curr_p in
lexbuf.Lexing.lex_curr_p <- { pos with
Lexing.pos_lnum = pos.Lexing.pos_lnum + 1;
Lexing.pos_bol = pos.Lexing.pos_cnum;
}
(** position of the char just after lexbuf *)
let end_pos lexbuf =
lexbuf.lex_curr_p.pos_lnum,
lexbuf.lex_curr_p.pos_cnum - lexbuf.lex_curr_p.pos_bol
(** return the exact position start of the suffix [classname] in [lexbuf] *)
let location_suffix suffix lexbuf =
let length_suffix = String.length suffix in
let l, c = end_pos lexbuf in
l, c - length_suffix
(** return the position start of [lexbuf] *)
let location_start lexbuf =
let l, c = end_pos lexbuf in
let lexbuf_length = lexeme_end lexbuf - lexeme_start lexbuf in
l, c - lexbuf_length
(** We traverse the structure of the source file by recording the
encompassing blocks in a stack of frame.
We use the stack to recover the full inner class name at bytecode level *)
type expr = | AllocExpr | OtherExpr
type frame =
{ short_class_name: string
; is_enum : bool
; next_anonymous_class: int
; opened_blocks: int
; exprs: expr list }
type state =
{ stack: frame list
; record_location: classname:string -> col:int -> line:int -> unit }
let push frame state = { state with stack = frame :: state.stack; }
exception Missing_opening_bracket
exception Missing_opening_parenthesis
let add_package package state =
let record_location ~classname =
let classname = package^"."^classname in
state.record_location ~classname in
{ state with record_location; }
let pop_class state =
match state.stack with
| [] -> raise Missing_opening_bracket
| _ :: stack -> { state with stack; }
let incr_next_anonymous state =
match state.stack with
| [] -> { state with stack = []; }
| fr :: stack ->
let stack =
{fr with next_anonymous_class = fr.next_anonymous_class+1; } :: stack in
{ state with stack; }
let add_expr e (state:state) : state =
match state.stack with
| [] -> state
| fr :: stack ->
let stack = {fr with exprs = e :: fr.exprs; } :: stack in
{ state with stack; }
let pop_exprs state =
match state.stack with
| [] -> raise Missing_opening_parenthesis
| fr :: stack -> (
match fr.exprs with
| [] -> raise Missing_opening_parenthesis
| e :: exprs ->
let stack = {fr with exprs; } :: stack in
(e, { state with stack; }))
let in_field_declaration_area state =
match state.stack with
| [] -> false
| fr :: _ -> Int.equal fr.opened_blocks 0
let get_opened_blocks state =
match state.stack with
| [] -> raise Missing_opening_bracket
| fr :: _ -> fr.opened_blocks
let is_enum state =
match state.stack with
| [] -> false
| fr :: _ -> fr.is_enum && Int.equal fr.opened_blocks 0
let get_next_anonymous_class state =
match state.stack with
| [] -> raise Missing_opening_bracket
| fr :: _ -> string_of_int fr.next_anonymous_class
let decr_opened_blocks state =
let stack =
match state.stack with
| [] -> []
| fr :: stack ->
{fr with opened_blocks = fr.opened_blocks-1; } :: stack in
{ state with stack; }
let incr_opened_blocks state =
let stack =
match state.stack with
| [] -> []
| fr :: stack ->
{fr with opened_blocks = fr.opened_blocks+1; } :: stack in
{ state with stack; }
let long_class_name name state =
let f name frame = Printf.sprintf "%s$%s" frame.short_class_name name in
List.fold ~f ~init:name state.stack
}
let whitespace = [' ' '\t']
let eol = whitespace*("\r")?"\n" (* end of line *)
let eol_comment = "//" [^'\n']*
let id = ['a'-'z' 'A'-'Z' '_' '$'] ['a'-'z' 'A'-'Z' '0'-'9' '_' '$']*
let char = "'\\''" | "'\"'" | "'" [ ^'\'' ]+ "'"
let class_keyword = "class"|"interface"|"enum"|"@" whitespace+ "interface"
(* We follow an abstraction of the official grammar described here:
https://docs.oracle.com/javase/specs/jls/se14/html/jls-19.html *)
rule class_scan state = parse
| whitespace+
{ class_scan state lexbuf }
| eol_comment
{ class_scan state lexbuf }
| "/*"
{ skip_comments (class_scan state) lexbuf }
| eol
{ incr_linenum lexbuf;
class_scan state lexbuf }
| "package" whitespace+ (id ("." id)* as package) whitespace* ";"
{ class_scan (add_package package state) lexbuf }
| id
{ class_scan state lexbuf }
| class_keyword whitespace+ (id as name)
{
let line, col = location_suffix name lexbuf in
let classname = long_class_name name state in
state.record_location ~classname ~col ~line ;
let frame : frame =
{ short_class_name = name;
is_enum = false;
next_anonymous_class = 1;
exprs = [];
opened_blocks = 0 } in
(* we jump to the next left bracket, skipping annotations and
generics <...> contents *)
do_at_next_left_bracket
(fun lexbuf ->
class_scan (push frame state) lexbuf) lexbuf
}
| "new" whitespace+
{ (* may be a declaration of an anonymous class
```new [TypeArguments] ClassOrInterfaceTypeToInstantiate
( [ArgumentList] ) [ClassBody] ```
^
so we jump this position | *)
search_anonymous_class_body state lexbuf
}
| id
{ if is_enum state
then found_entrance_of_anonymous_class state lexbuf
else class_scan state lexbuf }
| (id as _field) whitespace* ";"
{ if in_field_declaration_area state then
(* we only reach this situation in class/interface bodies, and
never inside method bodies *)
() ; (* TODO : record field location *)
class_scan state lexbuf
}
| "\""
{ skip_string (class_scan state) lexbuf }
| char
{ class_scan state lexbuf }
| "{"
{ class_scan (incr_opened_blocks state) lexbuf }
| "("
{ class_scan (add_expr OtherExpr state) lexbuf }
| ")"
{ match pop_exprs state with
| AllocExpr, state -> found_entrance_of_anonymous_class state lexbuf
| OtherExpr, state -> class_scan state lexbuf }
| "@" whitespace* id ("." id)* "("
{ skip_well_parenthesized_parentheses 1
(class_scan state) lexbuf }
| "}"
{
if Int.equal (get_opened_blocks state) 0
then class_scan (pop_class state) lexbuf
else class_scan (decr_opened_blocks state) lexbuf
}
| _
{ class_scan state lexbuf }
| eof
{ () }
(* we search for the next left bracket *)
and do_at_next_left_bracket action = parse
| eol
{ incr_linenum lexbuf;
do_at_next_left_bracket action lexbuf }
| eol_comment
{ do_at_next_left_bracket action lexbuf }
| "/*"
{ skip_comments (do_at_next_left_bracket action) lexbuf }
| "{"
{ action lexbuf }
| "<"
{ skip_well_parenthesized_angles 1
(do_at_next_left_bracket action) lexbuf }
| "@" whitespace* id "("
{ skip_well_parenthesized_parentheses 1
(do_at_next_left_bracket action) lexbuf }
| "\""
{ skip_string (do_at_next_left_bracket action) lexbuf }
| _
{ do_at_next_left_bracket action lexbuf }
(* we search for (...) parentheses *)
and search_anonymous_class_body state = parse
| eol
{ incr_linenum lexbuf;
search_anonymous_class_body state lexbuf }
| eol_comment
{ search_anonymous_class_body state lexbuf }
| "/*"
{ skip_comments
(search_anonymous_class_body state) lexbuf }
| "("
{ class_scan (add_expr AllocExpr state) lexbuf }
| "<"
{ skip_well_parenthesized_angles 1
(search_anonymous_class_body state) lexbuf }
| "@" whitespace* id "("
{ skip_well_parenthesized_parentheses 1
(search_anonymous_class_body state) lexbuf }
| "\""
{ skip_string
(search_anonymous_class_body state) lexbuf }
| "["
{ (* this is an array allocation, not an anonymous class *)
class_scan state lexbuf
}
| _
{ search_anonymous_class_body state lexbuf }
(* we test if there is an opening anonymous class body here *)
and found_entrance_of_anonymous_class state = parse
| eol
{ incr_linenum lexbuf;
found_entrance_of_anonymous_class state lexbuf }
| eol_comment
{ found_entrance_of_anonymous_class state lexbuf }
| "/*"
{ skip_comments
(found_entrance_of_anonymous_class state) lexbuf }
| whitespace+
{ found_entrance_of_anonymous_class state lexbuf }
| "{"
{ (* this is an anonymous class *)
let line, col = location_start lexbuf in
let name = get_next_anonymous_class state in
let classname = long_class_name name state in
state.record_location ~classname ~col ~line ;
let frame : frame =
{ short_class_name = name;
is_enum = false;
next_anonymous_class = 1;
exprs = [];
opened_blocks = 0 } in
class_scan (push frame (incr_next_anonymous state)) lexbuf
}
| _
{ (* this is not an anonymous class *)
class_scan state lexbuf
}
(* we skip type arguments <...> because they may contain brackets *)
and skip_well_parenthesized_angles width action = parse
| eol
{ incr_linenum lexbuf;
skip_well_parenthesized_angles width action lexbuf }
| "<"
{ skip_well_parenthesized_angles (width+1) action lexbuf }
| ">"
{ if width <= 1 then action lexbuf
else skip_well_parenthesized_angles (width-1) action lexbuf }
| eol_comment
{ skip_well_parenthesized_angles width action lexbuf }
| "/*"
{ skip_comments
(skip_well_parenthesized_angles width action) lexbuf }
| "\""
{ skip_string (skip_well_parenthesized_angles width action) lexbuf }
| _
{ skip_well_parenthesized_angles width action lexbuf }
(* we skip type annotation arguments (...) because they may contain brackets *)
and skip_well_parenthesized_parentheses width action = parse
| eol
{ incr_linenum lexbuf;
skip_well_parenthesized_parentheses width action lexbuf }
| "("
{ skip_well_parenthesized_parentheses (width+1) action lexbuf }
| ")"
{ if width <= 1 then action lexbuf
else skip_well_parenthesized_parentheses (width-1) action lexbuf }
| eol_comment
{ skip_well_parenthesized_parentheses width action lexbuf }
| "/*"
{ skip_comments
(skip_well_parenthesized_parentheses width action) lexbuf }
| "\""
{ skip_string (skip_well_parenthesized_parentheses width action) lexbuf }
| char
{ skip_well_parenthesized_parentheses width action lexbuf }
| _
{ skip_well_parenthesized_parentheses width action lexbuf }
and skip_string action = parse
| "\\\\"
{ skip_string action lexbuf }
| "\\\""
{ skip_string action lexbuf }
| "\""
{ action lexbuf }
| _
{ skip_string action lexbuf }
and skip_comments action = parse
| "*/"
{ action lexbuf }
| eol
{ incr_linenum lexbuf;
skip_comments action lexbuf }
| _
{ skip_comments action lexbuf }
{
open Javalib_pack
(** We scan source file [file] and record location of each class declaration *)
let collect_class_location (program:JClasspath.program) (file:SourceFile.t) =
let cin = In_channel.create (SourceFile.to_abs_path file) in
let stack = [] in
let record_location ~classname ~col ~line =
let loc : Location.t = { line; col; file } in
let cn : JBasics.class_name = JBasics.make_cn classname in
Logging.debug Capture Verbose "set_java_location %s with location %a@."
(JBasics.cn_name cn) Location.pp_file_pos loc;
JClasspath.set_java_location program cn loc in
try (
class_scan { record_location; stack; } (from_channel cin) ;
In_channel.close cin )
with
| Failure s ->
raise
(Failure
(Printf.sprintf "Error parsing source file %s\n%s" (SourceFile.to_abs_path file) s))
| Missing_opening_bracket ->
raise
(Failure (Printf.sprintf "Missing opening bracket error while parsing source file %s\n"
(SourceFile.to_abs_path file)))
| Missing_opening_parenthesis ->
raise
(Failure
(Printf.sprintf "Missing opening parenthesis error while parsing source file %s\n"
(SourceFile.to_abs_path file)))
}
Loading…
Cancel
Save