From ce39017611675e487b796bd3c2287cdeb73c452e Mon Sep 17 00:00:00 2001 From: Nikos Gorogiannis Date: Thu, 12 Dec 2019 01:50:19 -0800 Subject: [PATCH] [typ][fieldname] make java representation more sharing friendly and typesafe Summary: The `Typ.FIeldname` module has many issues. Among those: - It has 5 different string/printing functions and most of them do radically different things in Java and in Clang. - There is no type safety: creating a Clang field and calling a Java function on it will lead to a crash (`rindex_exn` etc, there are usually no dots in Clang fields). - It uses a single string for Java fields, containing the package, the class and the field, e.g., `java.lang.Object.field`. This is wasteful, because - there is no sharing of strings for packages/classes, and, - string operations need to be performed every time we need the field or the class or the package alone. This diff preserves the behaviour of the module's interface, so the API problems remain. However, by using a saner representation for Java fields we can get small performance and large memory gains (the type environment in Java is much smaller, about 30-40%). In addition, many functions on clang fields would previously do string manipulations (look for `.` and split on it) before returning the final field unchanged -- now they use the type of the field for that. Reviewed By: jvillard Differential Revision: D18908864 fbshipit-source-id: a72d847cc --- infer/src/IR/Typ.ml | 107 +++++++++++------- infer/src/checkers/costDomain.ml | 4 +- .../cpp/bufferoverrun/issues.exp | 2 +- 3 files changed, 66 insertions(+), 47 deletions(-) diff --git a/infer/src/IR/Typ.ml b/infer/src/IR/Typ.ml index 35774835d..8188792d5 100644 --- a/infer/src/IR/Typ.ml +++ b/infer/src/IR/Typ.ml @@ -1420,9 +1420,10 @@ module Procname = struct end module Fieldname = struct - type t = Clang of {class_name: Name.t; field_name: string} | Java of string [@@deriving compare] - - let equal = [%compare.equal: t] + type t = + | Clang of {class_name: Name.t; field_name: string} + | Java of {class_name: string; field_name: string} + [@@deriving compare, equal] let is_java = function Java _ -> true | Clang _ -> false @@ -1435,34 +1436,43 @@ module Fieldname = struct module Set = Caml.Set.Make (T) module Map = Caml.Map.Make (T) - (** Convert a fieldname to a string. *) - let to_string = function Java fname -> fname | Clang {field_name} -> field_name + let dot_join s1 s2 = String.concat ~sep:"." [s1; s2] - (** Convert a fieldname to a simplified string with at most one-level path. *) - let to_simplified_string fn = - let s = to_string fn in - match String.rsplit2 s ~on:'.' with - | Some (s1, s2) -> ( - match String.rsplit2 s1 ~on:'.' with Some (_, s4) -> s4 ^ "." ^ s2 | _ -> s ) - | _ -> - s + let to_string = function + | Java {class_name; field_name} when String.is_empty class_name -> + field_name + | Java {class_name; field_name} -> + dot_join class_name field_name + | Clang {field_name} -> + field_name + + + let to_simplified_string = function + | Java {class_name; field_name} -> + String.rsplit2 class_name ~on:'.' + |> Option.value_map ~default:field_name ~f:(fun (_, class_only) -> + dot_join class_only field_name ) + | Clang {field_name} -> + field_name - let to_full_string fname = - match fname with + let to_full_string = function | Clang {class_name; field_name} -> Name.to_string class_name ^ "::" ^ field_name - | _ -> - to_string fname + | Java {class_name; field_name} -> + dot_join class_name field_name - (** Convert a fieldname to a flat string without path. *) - let to_flat_string fn = - let s = to_string fn in - match String.rsplit2 s ~on:'.' with Some (_, s2) -> s2 | _ -> s + let to_flat_string = function Java {field_name} -> field_name | Clang {field_name} -> field_name + let pp f = function + | Java {class_name; field_name} when String.is_empty class_name -> + Format.pp_print_string f field_name + | Java {class_name; field_name} -> + F.pp_print_string f (dot_join class_name field_name) + | Clang {field_name} -> + Format.pp_print_string f field_name - let pp f = function Java field_name | Clang {field_name} -> Format.pp_print_string f field_name let clang_get_qual_class = function | Clang {class_name} -> @@ -1476,37 +1486,46 @@ module Fieldname = struct end module Java = struct - let from_string n = Java n + let from_string full_fieldname = + let class_name, field_name = + match String.rsplit2 full_fieldname ~on:'.' with + | None -> + ("", full_fieldname) + | Some split -> + split + in + Java {class_name; field_name} - let is_captured_parameter field_name = - match field_name with - | Java _ -> - String.is_prefix ~prefix:"val$" (to_flat_string field_name) + + let is_captured_parameter = function + | Java {field_name} -> + String.is_prefix ~prefix:"val$" field_name | Clang _ -> false - let get_class fn = - let fn = to_string fn in - let ri = String.rindex_exn fn '.' in - String.slice fn 0 ri + let get_class = function + | Java {class_name} -> + class_name + | Clang _ as field -> + L.die InternalError "get_class: fieldname %a is not Java@\n" pp field - let get_field fn = - let fn = to_string fn in - let ri = 1 + String.rindex_exn fn '.' in - String.slice fn ri 0 + let get_field = function + | Java {field_name} -> + field_name + | Clang _ as field -> + L.die InternalError "get_field: fieldname %a is not Java@\n" pp field - let is_outer_instance fn = - let fn = to_string fn in - let fn_len = String.length fn in - fn_len <> 0 - && - let this = ".this$" in - let last_char = fn.[fn_len - 1] in - (last_char >= '0' && last_char <= '9') - && String.is_suffix fn ~suffix:(this ^ String.of_char last_char) + let is_outer_instance = function + | Java {field_name} -> + let this = "this$" in + let last_char = field_name.[String.length field_name - 1] in + (last_char >= '0' && last_char <= '9') + && String.is_suffix field_name ~suffix:(this ^ String.of_char last_char) + | Clang _ -> + false end end diff --git a/infer/src/checkers/costDomain.ml b/infer/src/checkers/costDomain.ml index 35d66181a..606553596 100644 --- a/infer/src/checkers/costDomain.ml +++ b/infer/src/checkers/costDomain.ml @@ -15,11 +15,11 @@ module BasicCost = struct (* NOTE: Increment the version number if you changed the [t] type. This is for avoiding demarshalling failure of cost analysis results in running infer-reportdiff. *) - let version = 1 + let version = 2 end (** - Module to simulate a record + Module to simulate a record {OperationCost:BasicCost.t; AllocationCost: BasicCost.t; IOCost:BasicCost.t} with a map {OperationCost, AllocationCost, IOCost} -> BasicCost.t *) diff --git a/infer/tests/codetoanalyze/cpp/bufferoverrun/issues.exp b/infer/tests/codetoanalyze/cpp/bufferoverrun/issues.exp index f328497c6..eb6d710a9 100644 --- a/infer/tests/codetoanalyze/cpp/bufferoverrun/issues.exp +++ b/infer/tests/codetoanalyze/cpp/bufferoverrun/issues.exp @@ -110,7 +110,7 @@ codetoanalyze/cpp/bufferoverrun/trivial.cpp, trivial, 2, BUFFER_OVERRUN_L1, no_b codetoanalyze/cpp/bufferoverrun/vector.cpp, assert_Bad, 6, BUFFER_OVERRUN_L1, no_bucket, ERROR, [,Array declaration,Array access: Offset: 6 Size: 5] codetoanalyze/cpp/bufferoverrun/vector.cpp, constructor_Bad, 2, BUFFER_OVERRUN_L1, no_bucket, ERROR, [,Array declaration,Assignment,Array access: Offset: 3 Size: 1] codetoanalyze/cpp/bufferoverrun/vector.cpp, data_Bad, 4, BUFFER_OVERRUN_L1, no_bucket, ERROR, [,Assignment,,Array declaration,Assignment,Assignment,Array access: Offset: 10 Size: 5] -codetoanalyze/cpp/bufferoverrun/vector.cpp, out_of_bound_Bad, 2, BUFFER_OVERRUN_L2, no_bucket, ERROR, [,Parameter `*v->vector_elem`,Assignment,,Parameter `*v->vector_elem`,Array access: Offset: v->vector_elem.length Size: v->vector_elem.length] +codetoanalyze/cpp/bufferoverrun/vector.cpp, out_of_bound_Bad, 2, BUFFER_OVERRUN_L2, no_bucket, ERROR, [,Parameter `*v->cpp.vector_elem`,Assignment,,Parameter `*v->cpp.vector_elem`,Array access: Offset: v->cpp.vector_elem.length Size: v->cpp.vector_elem.length] codetoanalyze/cpp/bufferoverrun/vector.cpp, precise_subst_Bad, 3, BUFFER_OVERRUN_L1, no_bucket, ERROR, [Array declaration,Call,Parameter `*init`,Assignment,Call,Parameter `*__param_0->a`,Assignment,Call,,Parameter `count`,Call,Parameter `idx`,Assignment,Array access: Offset: -1 Size: 10 by call to `access_minus_one` ] codetoanalyze/cpp/bufferoverrun/vector.cpp, precise_subst_Good_FP, 3, BUFFER_OVERRUN_L3, no_bucket, ERROR, [Array declaration,Call,Parameter `*init`,Assignment,Call,Parameter `*__param_0->a`,Assignment,Call,,Parameter `count`,Call,Parameter `idx`,Assignment,Array access: Offset: [-1, 0] Size: 10 by call to `access_minus_one` ] codetoanalyze/cpp/bufferoverrun/vector.cpp, push_back_Bad, 3, BUFFER_OVERRUN_L1, no_bucket, ERROR, [,Array declaration,Array access: Offset: 1 Size: 1]