[Infer][incremental] Preventing --changed-only incremental mode from corrupting the future

Summary: When someone runs --changed-only mode, there is a risk of corrupting the results for future analyses. The problem is that changed-only mode does not analyze the callers of changed procedures. If a subsequent analysis relies on the specs of one of these callers, they will be stale and may give the wrong results. To be concrete, let's say we know `Parent.foo()` calls `Child.bar()` and we do the following rounds of analysis: Analysis round 1: Analyze all files, including `Parent` and `Child` Analysis round 2: Analyze `Child.bar()` only with `--changed-only flag`. `Parent.foo()` is now stale. Analysis round 3: Add procedure `Parent.baz()` that calls `Parent.foo()`, analyze in (any) incremental mode. The analysis will only analyze `Parent.baz()`. However, the specs for `Parent.foo()` are stale and may give us bad results for `Parent.baz()`. We want the analysis to re-analyze `Parent.baz()`, but before this diff it will not. This diff fixes this problem by adding a `STALE` status bit to procedure summaries. In `--changed-only` mode, the callers of a changed procedures are not re-analyzed, but their summaries are marked as stale. For both `--changed-only` and regular incremental mode, callees of changed procedures that are marked as stale are re-analyzed even if they have not changed. This is better than a more obvious solution like deleting stale procedure summaries, since that would force the next analysis to re-analyze all stale procedures even if it does not need the results for whatever analysis it is doing. This scheme implemented in this diff ensures that each analysis only does the work that it needs to compute reliable results for its changed procedures.
10 years ago · cc2fda8165
parent bdbc524f53
commit cc2fda8165
11 changed files with 254 additions and 32 deletions
--- a/infer/src/backend/cg.mli
+++ b/infer/src/backend/cg.mli
@ -33,6 +33,7 @@ val create : unit -> t (** Create an empty call graph *)
 val extend : t -> t -> unit (** [extend cg1 gc2] extends [cg1] in place with nodes and edges from [gc2]; undefined nodes become defined if at least one side is. *)
 val get_all_children : t -> Procname.t -> Procname.Set.t (** Return all the children of [n], whether defined or not *)
 val get_ancestors : t -> Procname.t -> Procname.Set.t (** Compute the ancestors of the node, if not pre-computed already *)
+val get_heirs : t -> Procname.t -> Procname.Set.t (** Compute the heirs of the node, if not pre-computed already *)
 val get_calls : t -> Procname.t -> in_out_calls (** Return the in/out calls of the node *)
 val get_defined_nodes : t -> Procname.t list (** Return the list of nodes which are defined *)
 val get_defined_children: t -> Procname.t -> Procname.Set.t (** Return the children of [n] which are defined *)
--- a/infer/src/backend/inferanalyze.ml
+++ b/infer/src/backend/inferanalyze.ml
@ -586,6 +586,56 @@ end
 let proc_list_to_set proc_list =
  list_fold_left (fun s p -> Procname.Set.add p s) Procname.Set.empty proc_list

+(** compute the files to analyze map for incremental mode *)
+let compute_to_analyze_map_incremental files_changed_map global_cg exe_env =
+  let changed_fold_f files_changed_map f =
+    let apply_f _ procs acc =
+      Procname.Set.fold (fun proc acc -> Procname.Set.union acc (f proc)) procs acc in
+    Procname.Map.fold apply_f files_changed_map Procname.Set.empty in
+  (* all callers of changed procedures *)
+  let callers_of_changed =
+    changed_fold_f files_changed_map (fun p -> Cg.get_ancestors global_cg p) in
+  (* all callees of changed procedures *)
+  let callees_of_changed =
+    changed_fold_f files_changed_map (fun p -> Cg.get_heirs global_cg p) in
+  (* add a procedure to the file -> (procedures to analyze) map *)
+  let add_proc_to_map proc map =
+    let proc_file_pname = source_file_to_pname (Exe_env.get_source exe_env proc) in
+    let procs_to_analyze =
+      try Procname.Map.find proc_file_pname map
+      with Not_found -> Procname.Set.empty in
+    let procs_to_analyze' = Procname.Set.add proc procs_to_analyze in
+    Procname.Map.add proc_file_pname procs_to_analyze' map in
+  let get_specs_filename pname =
+    Specs.res_dir_specs_filename pname in
+  let is_stale pname =
+    let specs_file = get_specs_filename pname in
+    match Specs.load_summary specs_file with
+    | Some summary -> summary.Specs.status = Specs.STALE
+    | None -> false in
+  (* add stale callees to the map *)
+  let add_stale_callee_to_map callee map =
+    if is_stale callee then add_proc_to_map callee map
+    else map in
+  let files_changed_map' =
+    Procname.Set.fold add_stale_callee_to_map callees_of_changed files_changed_map in
+  if !incremental_mode = ANALYZE_CHANGED_ONLY then
+    (* mark all caller specs stale. this ensures future runs of the analysis that need to use specs
+       from callers will re-compute them first. we do this instead of deleting the specs because
+       that would force the next analysis run to re-compute them even if it doesn't need them. *)
+    let mark_spec_as_stale pname =
+      let specs_file = get_specs_filename pname in
+      match Specs.load_summary specs_file with
+      | Some summary ->
+          let summary' = { summary with Specs.status = Specs.STALE } in
+          Specs.store_summary pname summary'
+      | None -> ()  in
+    Procname.Set.iter (fun caller -> mark_spec_as_stale caller) callers_of_changed;
+    files_changed_map'
+  else (* !incremental_mode = ANALYZE_CHANGED_AND_DEPENDENTS *)
+    (* add callers of changed procedures to the map of stuff to analyze *)
+    Procname.Set.fold add_proc_to_map callers_of_changed files_changed_map'
+
 (** compute the clusters *)
 let compute_clusters exe_env files_changed : cluster list =
  if !trace_clusters then
@ -613,36 +663,17 @@ let compute_clusters exe_env files_changed : cluster list =
  let files = Cg.get_defined_nodes file_cg in
  let num_files = list_length files in
  L.err "@.Found %d defined procedures in %d files.@." (list_length defined_procs) num_files;
-  let to_analyze_map = match !incremental_mode with
-    | ANALYZE_ALL ->
-        (* get procedures defined in a file *)
-        let get_defined_procs file_pname = match file_pname_to_cg file_pname with
-          | None -> Procname.Set.empty
-          | Some cg -> proc_list_to_set (Cg.get_defined_nodes cg) in
-        list_fold_left
-          (fun m file_pname -> Procname.Map.add file_pname (get_defined_procs file_pname) m)
-          Procname.Map.empty
-          files
-    | ANALYZE_CHANGED_ONLY -> files_changed
-    | ANALYZE_CHANGED_AND_DEPENDENCIES ->
-        (* get all callers of changed procedures *)
-        let callers_of_changed =
-          let collect_callers_for_procs _ procs callers =
-            Procname.Set.fold
-              (fun proc callers -> Procname.Set.union callers (Cg.get_ancestors global_cg proc))
-              procs
-              callers in
-          Procname.Map.fold collect_callers_for_procs files_changed Procname.Set.empty in
-        (* add a caller to the file -> (procedures to analyze) map *)
-        let add_caller_to_map caller map =
-          let caller_file_pname = source_file_to_pname (Exe_env.get_source exe_env caller) in
-          let procs_to_analyze =
-            try Procname.Map.find caller_file_pname map
-            with Not_found -> Procname.Set.empty in
-          let procs_to_analyze' = Procname.Set.add caller procs_to_analyze in
-          Procname.Map.add caller_file_pname procs_to_analyze' map in
-        (* add the (transitive) callers of all changed procedures to the [files_changed] map *)
-        Procname.Set.fold add_caller_to_map callers_of_changed files_changed in
+  let to_analyze_map =
+    if !incremental_mode = ANALYZE_ALL then
+      (* get all procedures defined in a file *)
+      let get_defined_procs file_pname = match file_pname_to_cg file_pname with
+        | None -> Procname.Set.empty
+        | Some cg -> proc_list_to_set (Cg.get_defined_nodes cg) in
+      list_fold_left
+        (fun m file_pname -> Procname.Map.add file_pname (get_defined_procs file_pname) m)
+        Procname.Map.empty
+        files
+    else compute_to_analyze_map_incremental files_changed global_cg exe_env in
  L.err "Analyzing %d files.@.@." (Procname.Map.cardinal to_analyze_map);
  let clusters = create_minimal_clusters file_cg exe_env to_analyze_map in
  L.err "Minimal clusters:@.";
--- a/infer/src/backend/specs.ml
+++ b/infer/src/backend/specs.ml
@ -297,7 +297,7 @@ type stats =
    cyclomatic : int;
  }

-type status = ACTIVE | INACTIVE
+type status = ACTIVE | INACTIVE | STALE

 type phase = FOOTPRINT | RE_EXECUTION

--- a/infer/src/backend/specs.mli
+++ b/infer/src/backend/specs.mli
@ -112,7 +112,7 @@ type stats =
    cyclomatic : int;
  }

-type status = ACTIVE | INACTIVE
+type status = ACTIVE | INACTIVE | STALE

 type phase = FOOTPRINT | RE_EXECUTION

--- a/infer/tests/codetoanalyze/java/incremental/changed_only_future/BUCK
+++ b/infer/tests/codetoanalyze/java/incremental/changed_only_future/BUCK
@ -0,0 +1,41 @@
+v1_files = glob([ '**/*.java.v1'])
+v2_files = glob(['**/*.java.v2'])
+v3_files = glob(['**/*.java.v3'])
+sources = v1_files + v2_files + v3_files
+
+def copy_files_strip_suffix_cmd(sfx, files):
+    return ' && '.join([' '.join(['cp', f, f.replace(sfx, '')]) for f in files])
+
+out = 'out'
+clean_out = ' '.join(['rm', '-rf', out])
+clean_java = ' '.join(['rm', '-rf', '*.java'])
+clean_cmd = ' && '.join([clean_out, clean_java])
+stripped_suffix_files = map(lambda f: f.replace('.v1', ''), v1_files)
+to_compile = ' '.join(stripped_suffix_files)
+infer_cmd = ' '.join([
+    'infer',
+    '-i',
+    '--changed-only',
+    '--absolute-paths',
+    '-o', out,
+    '-a', 'infer',
+    '--',
+    'javac',
+    to_compile
+])
+v1_copy_cmd = copy_files_strip_suffix_cmd('.v1', v1_files)
+v2_copy_cmd = copy_files_strip_suffix_cmd('.v2', v2_files)
+v3_copy_cmd = copy_files_strip_suffix_cmd('.v3', v3_files)
+stats_copy_cmd = ' '.join(['cp', out + '/stats.json', '$OUT'])
+command = ' && '.join([clean_cmd, v1_copy_cmd, infer_cmd, v2_copy_cmd, infer_cmd, v3_copy_cmd, infer_cmd, stats_copy_cmd])
+
+genrule(
+    name = 'analyze',
+    srcs = sources,
+    out = 'stats.json',
+    cmd = command,
+    deps = [],
+    visibility = [
+        'PUBLIC',
+    ]
+)
--- a/infer/tests/codetoanalyze/java/incremental/changed_only_future/Child.java.v1
+++ b/infer/tests/codetoanalyze/java/incremental/changed_only_future/Child.java.v1
@ -0,0 +1,18 @@
+/*
+* Copyright (c) 2015 - present Facebook, Inc.
+* All rights reserved.
+*
+* This source code is licensed under the BSD style license found in the
+* LICENSE file in the root directory of this source tree. An additional grant
+* of patent rights can be found in the PATENTS file in the same directory.
+*/
+
+package codetoanalyze.java.incremental.changed_only_future;
+
+class Child {
+
+  Object bar() {
+    return new Object();
+  }
+
+}
--- a/infer/tests/codetoanalyze/java/incremental/changed_only_future/Child.java.v2
+++ b/infer/tests/codetoanalyze/java/incremental/changed_only_future/Child.java.v2
@ -0,0 +1,18 @@
+/*
+* Copyright (c) 2015 - present Facebook, Inc.
+* All rights reserved.
+*
+* This source code is licensed under the BSD style license found in the
+* LICENSE file in the root directory of this source tree. An additional grant
+* of patent rights can be found in the PATENTS file in the same directory.
+*/
+
+package codetoanalyze.java.incremental.changed_only_future;
+
+class Child {
+
+  Object bar() {
+    return null;
+  }
+
+}
--- a/infer/tests/codetoanalyze/java/incremental/changed_only_future/Parent.java.v1
+++ b/infer/tests/codetoanalyze/java/incremental/changed_only_future/Parent.java.v1
@ -0,0 +1,23 @@
+/*
+* Copyright (c) 2015 - present Facebook, Inc.
+* All rights reserved.
+*
+* This source code is licensed under the BSD style license found in the
+* LICENSE file in the root directory of this source tree. An additional grant
+* of patent rights can be found in the PATENTS file in the same directory.
+*/
+
+package codetoanalyze.java.incremental.changed_only_future;
+
+class Parent {
+
+  Object foo() {
+    Child c = new Child();
+    return c.bar();
+  }
+
+  void bar() {
+    Object o1 = new Object();
+  }
+  
+}
--- a/infer/tests/codetoanalyze/java/incremental/changed_only_future/Parent.java.v3
+++ b/infer/tests/codetoanalyze/java/incremental/changed_only_future/Parent.java.v3
@ -0,0 +1,27 @@
+/*
+* Copyright (c) 2015 - present Facebook, Inc.
+* All rights reserved.
+*
+* This source code is licensed under the BSD style license found in the
+* LICENSE file in the root directory of this source tree. An additional grant
+* of patent rights can be found in the PATENTS file in the same directory.
+*/
+
+package codetoanalyze.java.incremental.changed_only_future;
+
+class Parent {
+
+  Object foo() {
+    Child c = new Child();
+    return c.bar();
+  }
+
+  void bar() {
+    Object o1 = new Object();
+  }
+
+  void baz() {
+    foo().toString(); // should be an NPE with Child.v2
+  }
+  
+}
--- a/infer/tests/endtoend/java/incremental/BUCK
+++ b/infer/tests/endtoend/java/incremental/BUCK
@ -12,6 +12,7 @@ java_test(
        '//infer/tests/codetoanalyze/java/incremental/parent_changed:analyze',
        '//infer/tests/codetoanalyze/java/incremental/child_changed:analyze',
        '//infer/tests/codetoanalyze/java/incremental/changed_only_mode:analyze',
+        '//infer/tests/codetoanalyze/java/incremental/changed_only_future:analyze',
    ],
    visibility=[
        'PUBLIC',
--- a/infer/tests/endtoend/java/incremental/ChangedOnlyFuture.java
+++ b/infer/tests/endtoend/java/incremental/ChangedOnlyFuture.java
@ -0,0 +1,62 @@
+/*
+* Copyright (c) 2015 - present Facebook, Inc.
+* All rights reserved.
+*
+* This source code is licensed under the BSD style license found in the
+* LICENSE file in the root directory of this source tree. An additional grant
+* of patent rights can be found in the PATENTS file in the same directory.
+*/
+
+package endtoend.java.incremental;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static utils.matchers.NumberOfFilesAnalyzed.numberOfFilesAnalyzed;
+import static utils.matchers.NumberOfProceduresAnalyzed.numberOfProceduresAnalyzed;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.nio.file.Paths;
+import java.nio.file.Path;
+import java.io.*;
+
+import utils.InferException;
+import utils.InferStats;
+
+/**
+ * Make sure the incremental --changed-only mode doesn't corrupt the results for future analyses by
+ * refusing to analyze callers of changed procedures (and thereby creating stale specs for some poor
+ * future analysis).
+ */
+public class ChangedOnlyFuture {
+
+  public static final String SOURCE_DIR =
+      "/infer/tests/codetoanalyze/java/incremental/changed_only_future/";
+
+  private static InferStats inferStats;
+
+  @BeforeClass
+  public static void loadResults() throws InterruptedException, IOException {
+    inferStats = InferStats.loadInferStats(ChangedOnlyModeTest.class, SOURCE_DIR);
+  }
+
+  @Test
+  public void onlyChangedFileReanalyzedInChangedOnlyMode()
+      throws IOException, InterruptedException, InferException {
+    assertThat(
+        "Only the changed file should be re-analyzed",
+        inferStats,
+        numberOfFilesAnalyzed(1));
+  }
+
+  @Test
+  public void changedAndStaleProcsReanalyze()
+      throws IOException, InterruptedException, InferException {
+    assertThat(
+        "Both the new (changed) procedure and its stale (unchanged) callee should be re-analyzed",
+        inferStats,
+        numberOfProceduresAnalyzed(2));
+  }
+
+}