[inefficient-keyset-iterator] New checker for finding inefficient keySet iterator

Summary: This is a simple checker that identifies inefficient uses of `keySet` iterator where (not only the key but also) the value is accessed via `get(key)`. It is more efficient to use `entrySet` iterator which already returns both key-value pairs. This optimization would get rid of many extra lookups which can be expensive. We simply traverse the CFG starting from the loop head upwards and pick up the map that is iterated over. Then, we check in the loop nodes if there is a call to `get(...)` over this map. If, so we report. Reviewed By: ngorogiannis Differential Revision: D15737779 fbshipit-source-id: 702465b4e
6 years ago · d2eb3c8cc6
parent 8e31b136d0
commit d2eb3c8cc6
19 changed files with 230 additions and 7 deletions
--- a/1
+++ b/1
@ -139,6 +139,7 @@ DIRECT_TESTS += \
  java_eradicate \
  java_hoisting \
  java_hoistingExpensive \
+  java_inefficientKeysetIterator \
  java_infer \
  java_performance \
  java_purity \
--- a/infer/man/man1/infer-analyze.txt
+++ b/infer/man/man1/infer-analyze.txt
@ -141,6 +141,15 @@ OPTIONS
           Activates: Enable --immutable-cast and disable all other checkers
           (Conversely: --no-immutable-cast-only)

+       --inefficient-keyset-iterator
+           Activates: Check for inefficient uses of keySet iterator instead
+           of entrySet iterator. (Conversely:
+           --no-inefficient-keyset-iterator)
+
+       --inefficient-keyset-iterator-only
+           Activates: Enable --inefficient-keyset-iterator and disable all
+           other checkers (Conversely: --no-inefficient-keyset-iterator-only)
+
       --jobs,-j int
           Run the specified number of analysis jobs simultaneously (default:
           <number of cores>)
--- a/infer/man/man1/infer-full.txt
+++ b/infer/man/man1/infer-full.txt
@ -389,6 +389,7 @@ OPTIONS
           (disabled by default),
             GRAPHQL_FIELD_ACCESS (enabled by default),
             GUARDEDBY_VIOLATION (enabled by default),
+             INEFFICIENT_KEYSET_ITERATOR (enabled by default),
             INFERBO_ALLOC_IS_BIG (enabled by default),
             INFERBO_ALLOC_IS_NEGATIVE (enabled by default),
             INFERBO_ALLOC_IS_ZERO (enabled by default),
@ -614,6 +615,16 @@ OPTIONS
           Activates: Enable --immutable-cast and disable all other checkers
           (Conversely: --no-immutable-cast-only)           See also infer-analyze(1).

+       --inefficient-keyset-iterator
+           Activates: Check for inefficient uses of keySet iterator instead
+           of entrySet iterator. (Conversely:
+           --no-inefficient-keyset-iterator)           See also infer-analyze(1).
+
+       --inefficient-keyset-iterator-only
+           Activates: Enable --inefficient-keyset-iterator and disable all
+           other checkers (Conversely: --no-inefficient-keyset-iterator-only)       
+    See also infer-analyze(1).
+
       --iphoneos-target-sdk-version string
           Specify the target SDK version to use for iphoneos           See also infer-capture(1).

--- a/infer/man/man1/infer-report.txt
+++ b/infer/man/man1/infer-report.txt
@ -129,6 +129,7 @@ OPTIONS
           (disabled by default),
             GRAPHQL_FIELD_ACCESS (enabled by default),
             GUARDEDBY_VIOLATION (enabled by default),
+             INEFFICIENT_KEYSET_ITERATOR (enabled by default),
             INFERBO_ALLOC_IS_BIG (enabled by default),
             INFERBO_ALLOC_IS_NEGATIVE (enabled by default),
             INFERBO_ALLOC_IS_ZERO (enabled by default),
--- a/infer/man/man1/infer.txt
+++ b/infer/man/man1/infer.txt
@ -389,6 +389,7 @@ OPTIONS
           (disabled by default),
             GRAPHQL_FIELD_ACCESS (enabled by default),
             GUARDEDBY_VIOLATION (enabled by default),
+             INEFFICIENT_KEYSET_ITERATOR (enabled by default),
             INFERBO_ALLOC_IS_BIG (enabled by default),
             INFERBO_ALLOC_IS_NEGATIVE (enabled by default),
             INFERBO_ALLOC_IS_ZERO (enabled by default),
@ -614,6 +615,16 @@ OPTIONS
           Activates: Enable --immutable-cast and disable all other checkers
           (Conversely: --no-immutable-cast-only)           See also infer-analyze(1).

+       --inefficient-keyset-iterator
+           Activates: Check for inefficient uses of keySet iterator instead
+           of entrySet iterator. (Conversely:
+           --no-inefficient-keyset-iterator)           See also infer-analyze(1).
+
+       --inefficient-keyset-iterator-only
+           Activates: Enable --inefficient-keyset-iterator and disable all
+           other checkers (Conversely: --no-inefficient-keyset-iterator-only)       
+    See also infer-analyze(1).
+
       --iphoneos-target-sdk-version string
           Specify the target SDK version to use for iphoneos           See also infer-capture(1).

--- a/infer/src/IR/Instrs.ml
+++ b/infer/src/IR/Instrs.ml
@ -56,6 +56,8 @@ type not_reversed_t = not_reversed t

 let of_array instrs = NotReversed instrs

+let get_underlying_not_reversed = function NotReversed instrs -> instrs
+
 let empty = of_array [||]

 let singleton instr = of_array [|instr|]
--- a/infer/src/IR/Instrs.mli
+++ b/infer/src/IR/Instrs.mli
@ -62,3 +62,5 @@ val pp : Pp.env -> Format.formatter -> _ t -> unit
 val fold : (_ t, Sil.instr, 'a) Container.fold

 val iter : (_ t, Sil.instr) Container.iter
+
+val get_underlying_not_reversed : not_reversed t -> Sil.instr array
--- a/infer/src/base/Config.ml
+++ b/infer/src/base/Config.ml
@ -608,6 +608,7 @@ and ( annotation_reachability
    , eradicate
    , fragment_retains_view
    , immutable_cast
+    , inefficient_keyset_iterator
    , linters
    , litho
    , liveness
@ -654,6 +655,9 @@ and ( annotation_reachability
    mk_checker ~long:"immutable-cast" ~default:false
      "the detection of object cast from immutable type to mutable type. For instance, it will \
       detect cast from ImmutableList to List, ImmutableMap to Map, and ImmutableSet to Set."
+  and inefficient_keyset_iterator =
+    mk_checker ~long:"inefficient-keyset-iterator" ~default:false
+      "Check for inefficient uses of keySet iterator instead of entrySet iterator."
  and linters = mk_checker ~long:"linters" ~default:true "syntactic linters"
  and litho = mk_checker ~long:"litho" "Experimental checkers supporting the Litho framework"
  and liveness =
@ -727,6 +731,7 @@ and ( annotation_reachability
  , eradicate
  , fragment_retains_view
  , immutable_cast
+  , inefficient_keyset_iterator
  , linters
  , litho
  , liveness
@ -2823,6 +2828,8 @@ and icfg_dotty_outfile = !icfg_dotty_outfile

 and immutable_cast = !immutable_cast

+and inefficient_keyset_iterator = !inefficient_keyset_iterator
+
 and iphoneos_target_sdk_version = !iphoneos_target_sdk_version

 and iphoneos_target_sdk_version_path_regex =
--- a/infer/src/base/Config.mli
+++ b/infer/src/base/Config.mli
@ -404,6 +404,8 @@ val icfg_dotty_outfile : string option

 val immutable_cast : bool

+val inefficient_keyset_iterator : bool
+
 val infer_is_clang : bool

 val infer_is_javac : bool
--- a/infer/src/base/IssueType.ml
+++ b/infer/src/base/IssueType.ml
@ -277,6 +277,8 @@ let graphql_field_access = from_string "GRAPHQL_FIELD_ACCESS"

 let guardedby_violation_racerd = from_string "GUARDEDBY_VIOLATION" ~hum:"GuardedBy Violation"

+let inefficient_keyset_iterator = from_string "INEFFICIENT_KEYSET_ITERATOR"
+
 let inferbo_alloc_is_big = from_string "INFERBO_ALLOC_IS_BIG"

 let inferbo_alloc_is_negative = from_string "INFERBO_ALLOC_IS_NEGATIVE"
--- a/infer/src/base/IssueType.mli
+++ b/infer/src/base/IssueType.mli
@ -181,6 +181,8 @@ val graphql_field_access : t

 val guardedby_violation_racerd : t

+val inefficient_keyset_iterator : t
+
 val inferbo_alloc_is_big : t

 val inferbo_alloc_is_negative : t
--- a/infer/src/checkers/cost.ml
+++ b/infer/src/checkers/cost.ml
@ -824,10 +824,7 @@ let checker {Callbacks.tenv; proc_desc; integer_type_widths; summary} : Summary.
  (* computes reaching defs: node -> (var -> node set) *)
  let reaching_defs_invariant_map = ReachingDefs.compute_invariant_map proc_desc tenv in
  (* collect all prune nodes that occur in loop guards, needed for ControlDepAnalyzer *)
-  let control_maps, loop_head_to_loop_nodes =
-    let loop_head_to_source_nodes_map = Loop_control.get_loop_head_to_source_nodes node_cfg in
-    Loop_control.get_control_maps loop_head_to_source_nodes_map
-  in
+  let control_maps, loop_head_to_loop_nodes = Loop_control.get_loop_control_maps node_cfg in
  (* computes the control dependencies: node -> var set *)
  let control_dep_invariant_map = Control.compute_invariant_map proc_desc tenv control_maps in
  (* compute loop invariant map for control var analysis *)
--- a/infer/src/checkers/inefficientKeysetIterator.ml
+++ b/infer/src/checkers/inefficientKeysetIterator.ml
@ -0,0 +1,95 @@
+(*
+ * Copyright (c) 2019-present, Facebook, Inc.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ *)
+open! IStd
+module F = Format
+module CFG = ProcCfg.Normal
+module LoopNodes = AbstractDomain.FiniteSet (Procdesc.Node)
+
+let find_loaded_pvar id = function
+  | Sil.Load (lhs_id, Exp.Lvar rhs_pvar, _, _) when Ident.equal lhs_id id ->
+      Some rhs_pvar
+  | _ ->
+      None
+
+
+let find_first_arg_id ~fun_name ~lhs_f = function
+  | Sil.Call ((lhs_id, _), Exp.Const (Const.Cfun pname), (Exp.Var rhs_id, _) :: _, _, _)
+    when lhs_f lhs_id && String.equal fun_name (Typ.Procname.get_method pname) ->
+      Some rhs_id
+  | _ ->
+      None
+
+
+(** If given a node that has 4 instructions and calls fun_name, 
+    pickup bcvarY, i.e. variable for the first argument 
+     n$X      = *&$bcvarY
+       _      = *n$X 
+     n$X+1    = fun_name(n$X,....)
+    *&$irvarZ = n$X+1
+ *)
+let find_first_arg_pvar node ~fun_name =
+  let instrs = Procdesc.Node.get_instrs node in
+  if Int.equal (Instrs.count instrs) 4 then
+    let instr_arr = Instrs.get_underlying_not_reversed instrs in
+    match instr_arr.(3) with
+    | Sil.Store (Exp.Lvar _, _, Exp.Var rhs_id, _) ->
+        find_first_arg_id ~fun_name ~lhs_f:(Ident.equal rhs_id) instr_arr.(2)
+        |> Option.bind ~f:(fun arg_id -> find_loaded_pvar arg_id instr_arr.(0))
+    | _ ->
+        None
+  else None
+
+
+let report_matching_get summary pvar loop_nodes : unit =
+  LoopNodes.iter
+    (fun node ->
+      let instrs = Procdesc.Node.get_instrs node in
+      if Int.equal (Instrs.count instrs) 5 then
+        let instr_arr = Instrs.get_underlying_not_reversed instrs in
+        find_first_arg_id ~fun_name:"get" ~lhs_f:(fun _ -> true) instr_arr.(3)
+        |> Option.iter ~f:(fun arg_id ->
+               find_loaded_pvar arg_id instr_arr.(0)
+               |> Option.iter ~f:(fun arg_pvar ->
+                      if Pvar.equal arg_pvar pvar then
+                        let pp_m = MarkupFormatter.pp_monospaced in
+                        let exp_desc =
+                          F.asprintf
+                            "Accessing a value using a key that was retrieved from a %a iterator. \
+                             It is more efficient to use an iterator on the %a of the map, \
+                             avoiding the extra %a lookup."
+                            pp_m "keySet" pp_m "entrySet" pp_m "HashMap.get(key)"
+                        in
+                        let loc = Procdesc.Node.get_loc node in
+                        let ltr = [Errlog.make_trace_element 0 loc exp_desc []] in
+                        Reporting.log_error summary ~loc ~ltr IssueType.inefficient_keyset_iterator
+                          exp_desc ) ) )
+    loop_nodes
+
+
+let when_dominating_pred_satisfies idom my_node ~f =
+  let preds =
+    Procdesc.Node.get_preds my_node
+    |> List.filter ~f:(fun node -> Dominators.dominates idom node my_node)
+  in
+  match preds with [pred_node] -> f pred_node | _ -> ()
+
+
+let checker Callbacks.{summary; proc_desc} : Summary.t =
+  let cfg = CFG.from_pdesc proc_desc in
+  let _, loop_head_to_loop_nodes = Loop_control.get_loop_control_maps cfg in
+  let idom = Dominators.get_idoms proc_desc in
+  Procdesc.NodeMap.iter
+    (fun loop_head loop_nodes ->
+      if find_first_arg_pvar loop_head ~fun_name:"hasNext" |> Option.is_some then
+        when_dominating_pred_satisfies idom loop_head ~f:(fun itr_node ->
+            if Option.is_some (find_first_arg_pvar itr_node ~fun_name:"iterator") then
+              when_dominating_pred_satisfies idom itr_node ~f:(fun keySet_node ->
+                  find_first_arg_pvar keySet_node ~fun_name:"keySet"
+                  |> Option.iter ~f:(fun get_pvar ->
+                         report_matching_get summary get_pvar loop_nodes ) ) ) )
+    loop_head_to_loop_nodes ;
+  summary
--- a/infer/src/checkers/loop_control.ml
+++ b/infer/src/checkers/loop_control.ml
@ -184,3 +184,8 @@ let get_control_maps loop_head_to_source_nodes_map =
        { exit_map= Control.ExitNodeToLoopHeads.empty
        ; loop_head_to_guard_nodes= Control.LoopHeadToGuardNodes.empty }
    , LoopInvariant.LoopHeadToLoopNodes.empty )
+
+
+let get_loop_control_maps cfg =
+  let loop_head_to_source_nodes_map = get_loop_head_to_source_nodes cfg in
+  get_control_maps loop_head_to_source_nodes_map
--- a/infer/src/checkers/loop_control.mli
+++ b/infer/src/checkers/loop_control.mli
@ -20,9 +20,8 @@ val get_loop_head_to_source_nodes : Procdesc.t -> Procdesc.Node.t list Procdesc.
  loop_head (target of back-edges) --> source nodes
 *)

-val get_control_maps :
-     Procdesc.Node.t list Procdesc.NodeMap.t
-  -> Control.loop_control_maps * Control.GuardNodes.t Procdesc.NodeMap.t
+val get_loop_control_maps :
+  Procdesc.t -> Control.loop_control_maps * Control.GuardNodes.t Procdesc.NodeMap.t
 (**
  Get a pair of maps (exit_map, loop_head_to_guard_map) where
  exit_map : exit_node -> loop_head set (i.e. target of the back edges) 
--- a/infer/src/checkers/registerCheckers.ml
+++ b/infer/src/checkers/registerCheckers.ml
@ -67,6 +67,9 @@ let all_checkers =
  ; { name= "immutable cast"
    ; active= Config.immutable_cast
    ; callbacks= [(Procedure ImmutableChecker.callback_check_immutable_cast, Language.Java)] }
+  ; { name= "inefficient keyset iterator"
+    ; active= Config.inefficient_keyset_iterator
+    ; callbacks= [(Procedure InefficientKeysetIterator.checker, Language.Java)] }
  ; { name= "liveness"
    ; active= Config.liveness
    ; callbacks= [(Procedure Liveness.checker, Language.Clang)] }
--- a/infer/tests/codetoanalyze/java/inefficientKeysetIterator/Makefile
+++ b/infer/tests/codetoanalyze/java/inefficientKeysetIterator/Makefile
@ -0,0 +1,12 @@
+# Copyright (c) 2019-present, Facebook, Inc.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+TESTS_DIR = ../../..
+
+INFER_OPTIONS = --inefficient-keyset-iterator-only  --debug-exceptions
+INFERPRINT_OPTIONS = --issues-tests
+SOURCES = $(wildcard *.java)
+
+include $(TESTS_DIR)/javac.make
--- a/infer/tests/codetoanalyze/java/inefficientKeysetIterator/Test.java
+++ b/infer/tests/codetoanalyze/java/inefficientKeysetIterator/Test.java
@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2019-present, Facebook, Inc.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+
+class Test {
+
+  void inefficient_loop_bad(HashMap<String, Integer> testMap) {
+    for (String key : testMap.keySet()) {
+      testMap.get(key);
+    }
+  }
+
+  void inefficient_loop_itr_bad(HashMap<String, Integer> testMap) {
+
+    Iterator itr2 = testMap.keySet().iterator();
+    while (itr2.hasNext()) {
+      String key = (String) itr2.next();
+      testMap.get(key);
+    }
+  }
+
+  void inefficient_loop_itr_bad_FN(HashMap<String, Integer> testMap) {
+
+    Iterator itr2 = testMap.keySet().iterator();
+    int i = 0;
+    while (itr2.hasNext()) {
+      String key = (String) itr2.next();
+      testMap.get(key);
+    }
+  }
+
+  void efficient_loop_itr_ok(HashMap<String, Integer> testMap) {
+
+    Iterator<Map.Entry<String, Integer>> itr1 = testMap.entrySet().iterator();
+    while (itr1.hasNext()) {
+      Map.Entry<String, Integer> entry = itr1.next();
+      entry.getKey();
+      entry.getValue();
+    }
+  }
+
+  void efficient_loop_ok(HashMap<String, Integer> testMap) {
+    for (Map.Entry<String, Integer> entry : testMap.entrySet()) {
+      entry.getKey();
+      entry.getValue();
+    }
+  }
+
+  void negative_loop_ok(HashMap<String, Integer> testMap1, HashMap<String, Integer> testMap2) {
+    for (String key : testMap1.keySet()) {
+      testMap2.get(key);
+    }
+  }
+}
--- a/infer/tests/codetoanalyze/java/inefficientKeysetIterator/issues.exp
+++ b/infer/tests/codetoanalyze/java/inefficientKeysetIterator/issues.exp
@ -0,0 +1,2 @@
+codetoanalyze/java/inefficientKeysetIterator/Test.java, Test.inefficient_loop_bad(java.util.HashMap):void, 2, INEFFICIENT_KEYSET_ITERATOR, no_bucket, ERROR, [Accessing a value using a key that was retrieved from a `keySet` iterator. It is more efficient to use an iterator on the `entrySet` of the map, avoiding the extra `HashMap.get(key)` lookup.]
+codetoanalyze/java/inefficientKeysetIterator/Test.java, Test.inefficient_loop_itr_bad(java.util.HashMap):void, 5, INEFFICIENT_KEYSET_ITERATOR, no_bucket, ERROR, [Accessing a value using a key that was retrieved from a `keySet` iterator. It is more efficient to use an iterator on the `entrySet` of the map, avoiding the extra `HashMap.get(key)` lookup.]