From cc2fda8165ee6780f358aace6a7a8a145945a447 Mon Sep 17 00:00:00 2001 From: Sam Blackshear Date: Thu, 27 Aug 2015 11:54:18 -0600 Subject: [PATCH] [Infer][incremental] Preventing --changed-only incremental mode from corrupting the future Summary: When someone runs --changed-only mode, there is a risk of corrupting the results for future analyses. The problem is that changed-only mode does not analyze the callers of changed procedures. If a subsequent analysis relies on the specs of one of these callers, they will be stale and may give the wrong results. To be concrete, let's say we know `Parent.foo()` calls `Child.bar()` and we do the following rounds of analysis: Analysis round 1: Analyze all files, including `Parent` and `Child` Analysis round 2: Analyze `Child.bar()` only with `--changed-only flag`. `Parent.foo()` is now stale. Analysis round 3: Add procedure `Parent.baz()` that calls `Parent.foo()`, analyze in (any) incremental mode. The analysis will only analyze `Parent.baz()`. However, the specs for `Parent.foo()` are stale and may give us bad results for `Parent.baz()`. We want the analysis to re-analyze `Parent.baz()`, but before this diff it will not. This diff fixes this problem by adding a `STALE` status bit to procedure summaries. In `--changed-only` mode, the callers of a changed procedures are not re-analyzed, but their summaries are marked as stale. For both `--changed-only` and regular incremental mode, callees of changed procedures that are marked as stale are re-analyzed even if they have not changed. This is better than a more obvious solution like deleting stale procedure summaries, since that would force the next analysis to re-analyze all stale procedures even if it does not need the results for whatever analysis it is doing. This scheme implemented in this diff ensures that each analysis only does the work that it needs to compute reliable results for its changed procedures. --- infer/src/backend/cg.mli | 1 + infer/src/backend/inferanalyze.ml | 91 +++++++++++++------ infer/src/backend/specs.ml | 2 +- infer/src/backend/specs.mli | 2 +- .../java/incremental/changed_only_future/BUCK | 41 +++++++++ .../changed_only_future/Child.java.v1 | 18 ++++ .../changed_only_future/Child.java.v2 | 18 ++++ .../changed_only_future/Parent.java.v1 | 23 +++++ .../changed_only_future/Parent.java.v3 | 27 ++++++ infer/tests/endtoend/java/incremental/BUCK | 1 + .../java/incremental/ChangedOnlyFuture.java | 62 +++++++++++++ 11 files changed, 254 insertions(+), 32 deletions(-) create mode 100644 infer/tests/codetoanalyze/java/incremental/changed_only_future/BUCK create mode 100644 infer/tests/codetoanalyze/java/incremental/changed_only_future/Child.java.v1 create mode 100644 infer/tests/codetoanalyze/java/incremental/changed_only_future/Child.java.v2 create mode 100644 infer/tests/codetoanalyze/java/incremental/changed_only_future/Parent.java.v1 create mode 100644 infer/tests/codetoanalyze/java/incremental/changed_only_future/Parent.java.v3 create mode 100644 infer/tests/endtoend/java/incremental/ChangedOnlyFuture.java diff --git a/infer/src/backend/cg.mli b/infer/src/backend/cg.mli index 15c9b011d..34837e495 100644 --- a/infer/src/backend/cg.mli +++ b/infer/src/backend/cg.mli @@ -33,6 +33,7 @@ val create : unit -> t (** Create an empty call graph *) val extend : t -> t -> unit (** [extend cg1 gc2] extends [cg1] in place with nodes and edges from [gc2]; undefined nodes become defined if at least one side is. *) val get_all_children : t -> Procname.t -> Procname.Set.t (** Return all the children of [n], whether defined or not *) val get_ancestors : t -> Procname.t -> Procname.Set.t (** Compute the ancestors of the node, if not pre-computed already *) +val get_heirs : t -> Procname.t -> Procname.Set.t (** Compute the heirs of the node, if not pre-computed already *) val get_calls : t -> Procname.t -> in_out_calls (** Return the in/out calls of the node *) val get_defined_nodes : t -> Procname.t list (** Return the list of nodes which are defined *) val get_defined_children: t -> Procname.t -> Procname.Set.t (** Return the children of [n] which are defined *) diff --git a/infer/src/backend/inferanalyze.ml b/infer/src/backend/inferanalyze.ml index 5cfcd5d7e..87228d0e2 100644 --- a/infer/src/backend/inferanalyze.ml +++ b/infer/src/backend/inferanalyze.ml @@ -586,6 +586,56 @@ end let proc_list_to_set proc_list = list_fold_left (fun s p -> Procname.Set.add p s) Procname.Set.empty proc_list +(** compute the files to analyze map for incremental mode *) +let compute_to_analyze_map_incremental files_changed_map global_cg exe_env = + let changed_fold_f files_changed_map f = + let apply_f _ procs acc = + Procname.Set.fold (fun proc acc -> Procname.Set.union acc (f proc)) procs acc in + Procname.Map.fold apply_f files_changed_map Procname.Set.empty in + (* all callers of changed procedures *) + let callers_of_changed = + changed_fold_f files_changed_map (fun p -> Cg.get_ancestors global_cg p) in + (* all callees of changed procedures *) + let callees_of_changed = + changed_fold_f files_changed_map (fun p -> Cg.get_heirs global_cg p) in + (* add a procedure to the file -> (procedures to analyze) map *) + let add_proc_to_map proc map = + let proc_file_pname = source_file_to_pname (Exe_env.get_source exe_env proc) in + let procs_to_analyze = + try Procname.Map.find proc_file_pname map + with Not_found -> Procname.Set.empty in + let procs_to_analyze' = Procname.Set.add proc procs_to_analyze in + Procname.Map.add proc_file_pname procs_to_analyze' map in + let get_specs_filename pname = + Specs.res_dir_specs_filename pname in + let is_stale pname = + let specs_file = get_specs_filename pname in + match Specs.load_summary specs_file with + | Some summary -> summary.Specs.status = Specs.STALE + | None -> false in + (* add stale callees to the map *) + let add_stale_callee_to_map callee map = + if is_stale callee then add_proc_to_map callee map + else map in + let files_changed_map' = + Procname.Set.fold add_stale_callee_to_map callees_of_changed files_changed_map in + if !incremental_mode = ANALYZE_CHANGED_ONLY then + (* mark all caller specs stale. this ensures future runs of the analysis that need to use specs + from callers will re-compute them first. we do this instead of deleting the specs because + that would force the next analysis run to re-compute them even if it doesn't need them. *) + let mark_spec_as_stale pname = + let specs_file = get_specs_filename pname in + match Specs.load_summary specs_file with + | Some summary -> + let summary' = { summary with Specs.status = Specs.STALE } in + Specs.store_summary pname summary' + | None -> () in + Procname.Set.iter (fun caller -> mark_spec_as_stale caller) callers_of_changed; + files_changed_map' + else (* !incremental_mode = ANALYZE_CHANGED_AND_DEPENDENTS *) + (* add callers of changed procedures to the map of stuff to analyze *) + Procname.Set.fold add_proc_to_map callers_of_changed files_changed_map' + (** compute the clusters *) let compute_clusters exe_env files_changed : cluster list = if !trace_clusters then @@ -613,36 +663,17 @@ let compute_clusters exe_env files_changed : cluster list = let files = Cg.get_defined_nodes file_cg in let num_files = list_length files in L.err "@.Found %d defined procedures in %d files.@." (list_length defined_procs) num_files; - let to_analyze_map = match !incremental_mode with - | ANALYZE_ALL -> - (* get procedures defined in a file *) - let get_defined_procs file_pname = match file_pname_to_cg file_pname with - | None -> Procname.Set.empty - | Some cg -> proc_list_to_set (Cg.get_defined_nodes cg) in - list_fold_left - (fun m file_pname -> Procname.Map.add file_pname (get_defined_procs file_pname) m) - Procname.Map.empty - files - | ANALYZE_CHANGED_ONLY -> files_changed - | ANALYZE_CHANGED_AND_DEPENDENCIES -> - (* get all callers of changed procedures *) - let callers_of_changed = - let collect_callers_for_procs _ procs callers = - Procname.Set.fold - (fun proc callers -> Procname.Set.union callers (Cg.get_ancestors global_cg proc)) - procs - callers in - Procname.Map.fold collect_callers_for_procs files_changed Procname.Set.empty in - (* add a caller to the file -> (procedures to analyze) map *) - let add_caller_to_map caller map = - let caller_file_pname = source_file_to_pname (Exe_env.get_source exe_env caller) in - let procs_to_analyze = - try Procname.Map.find caller_file_pname map - with Not_found -> Procname.Set.empty in - let procs_to_analyze' = Procname.Set.add caller procs_to_analyze in - Procname.Map.add caller_file_pname procs_to_analyze' map in - (* add the (transitive) callers of all changed procedures to the [files_changed] map *) - Procname.Set.fold add_caller_to_map callers_of_changed files_changed in + let to_analyze_map = + if !incremental_mode = ANALYZE_ALL then + (* get all procedures defined in a file *) + let get_defined_procs file_pname = match file_pname_to_cg file_pname with + | None -> Procname.Set.empty + | Some cg -> proc_list_to_set (Cg.get_defined_nodes cg) in + list_fold_left + (fun m file_pname -> Procname.Map.add file_pname (get_defined_procs file_pname) m) + Procname.Map.empty + files + else compute_to_analyze_map_incremental files_changed global_cg exe_env in L.err "Analyzing %d files.@.@." (Procname.Map.cardinal to_analyze_map); let clusters = create_minimal_clusters file_cg exe_env to_analyze_map in L.err "Minimal clusters:@."; diff --git a/infer/src/backend/specs.ml b/infer/src/backend/specs.ml index fb93a67f0..3c1b0a782 100644 --- a/infer/src/backend/specs.ml +++ b/infer/src/backend/specs.ml @@ -297,7 +297,7 @@ type stats = cyclomatic : int; } -type status = ACTIVE | INACTIVE +type status = ACTIVE | INACTIVE | STALE type phase = FOOTPRINT | RE_EXECUTION diff --git a/infer/src/backend/specs.mli b/infer/src/backend/specs.mli index e33ab8900..bd33f11d2 100644 --- a/infer/src/backend/specs.mli +++ b/infer/src/backend/specs.mli @@ -112,7 +112,7 @@ type stats = cyclomatic : int; } -type status = ACTIVE | INACTIVE +type status = ACTIVE | INACTIVE | STALE type phase = FOOTPRINT | RE_EXECUTION diff --git a/infer/tests/codetoanalyze/java/incremental/changed_only_future/BUCK b/infer/tests/codetoanalyze/java/incremental/changed_only_future/BUCK new file mode 100644 index 000000000..b28914a4d --- /dev/null +++ b/infer/tests/codetoanalyze/java/incremental/changed_only_future/BUCK @@ -0,0 +1,41 @@ +v1_files = glob([ '**/*.java.v1']) +v2_files = glob(['**/*.java.v2']) +v3_files = glob(['**/*.java.v3']) +sources = v1_files + v2_files + v3_files + +def copy_files_strip_suffix_cmd(sfx, files): + return ' && '.join([' '.join(['cp', f, f.replace(sfx, '')]) for f in files]) + +out = 'out' +clean_out = ' '.join(['rm', '-rf', out]) +clean_java = ' '.join(['rm', '-rf', '*.java']) +clean_cmd = ' && '.join([clean_out, clean_java]) +stripped_suffix_files = map(lambda f: f.replace('.v1', ''), v1_files) +to_compile = ' '.join(stripped_suffix_files) +infer_cmd = ' '.join([ + 'infer', + '-i', + '--changed-only', + '--absolute-paths', + '-o', out, + '-a', 'infer', + '--', + 'javac', + to_compile +]) +v1_copy_cmd = copy_files_strip_suffix_cmd('.v1', v1_files) +v2_copy_cmd = copy_files_strip_suffix_cmd('.v2', v2_files) +v3_copy_cmd = copy_files_strip_suffix_cmd('.v3', v3_files) +stats_copy_cmd = ' '.join(['cp', out + '/stats.json', '$OUT']) +command = ' && '.join([clean_cmd, v1_copy_cmd, infer_cmd, v2_copy_cmd, infer_cmd, v3_copy_cmd, infer_cmd, stats_copy_cmd]) + +genrule( + name = 'analyze', + srcs = sources, + out = 'stats.json', + cmd = command, + deps = [], + visibility = [ + 'PUBLIC', + ] +) diff --git a/infer/tests/codetoanalyze/java/incremental/changed_only_future/Child.java.v1 b/infer/tests/codetoanalyze/java/incremental/changed_only_future/Child.java.v1 new file mode 100644 index 000000000..e1f85f2f8 --- /dev/null +++ b/infer/tests/codetoanalyze/java/incremental/changed_only_future/Child.java.v1 @@ -0,0 +1,18 @@ +/* +* Copyright (c) 2015 - present Facebook, Inc. +* All rights reserved. +* +* This source code is licensed under the BSD style license found in the +* LICENSE file in the root directory of this source tree. An additional grant +* of patent rights can be found in the PATENTS file in the same directory. +*/ + +package codetoanalyze.java.incremental.changed_only_future; + +class Child { + + Object bar() { + return new Object(); + } + +} diff --git a/infer/tests/codetoanalyze/java/incremental/changed_only_future/Child.java.v2 b/infer/tests/codetoanalyze/java/incremental/changed_only_future/Child.java.v2 new file mode 100644 index 000000000..39bcc0517 --- /dev/null +++ b/infer/tests/codetoanalyze/java/incremental/changed_only_future/Child.java.v2 @@ -0,0 +1,18 @@ +/* +* Copyright (c) 2015 - present Facebook, Inc. +* All rights reserved. +* +* This source code is licensed under the BSD style license found in the +* LICENSE file in the root directory of this source tree. An additional grant +* of patent rights can be found in the PATENTS file in the same directory. +*/ + +package codetoanalyze.java.incremental.changed_only_future; + +class Child { + + Object bar() { + return null; + } + +} diff --git a/infer/tests/codetoanalyze/java/incremental/changed_only_future/Parent.java.v1 b/infer/tests/codetoanalyze/java/incremental/changed_only_future/Parent.java.v1 new file mode 100644 index 000000000..24137ebf4 --- /dev/null +++ b/infer/tests/codetoanalyze/java/incremental/changed_only_future/Parent.java.v1 @@ -0,0 +1,23 @@ +/* +* Copyright (c) 2015 - present Facebook, Inc. +* All rights reserved. +* +* This source code is licensed under the BSD style license found in the +* LICENSE file in the root directory of this source tree. An additional grant +* of patent rights can be found in the PATENTS file in the same directory. +*/ + +package codetoanalyze.java.incremental.changed_only_future; + +class Parent { + + Object foo() { + Child c = new Child(); + return c.bar(); + } + + void bar() { + Object o1 = new Object(); + } + +} diff --git a/infer/tests/codetoanalyze/java/incremental/changed_only_future/Parent.java.v3 b/infer/tests/codetoanalyze/java/incremental/changed_only_future/Parent.java.v3 new file mode 100644 index 000000000..5f943250d --- /dev/null +++ b/infer/tests/codetoanalyze/java/incremental/changed_only_future/Parent.java.v3 @@ -0,0 +1,27 @@ +/* +* Copyright (c) 2015 - present Facebook, Inc. +* All rights reserved. +* +* This source code is licensed under the BSD style license found in the +* LICENSE file in the root directory of this source tree. An additional grant +* of patent rights can be found in the PATENTS file in the same directory. +*/ + +package codetoanalyze.java.incremental.changed_only_future; + +class Parent { + + Object foo() { + Child c = new Child(); + return c.bar(); + } + + void bar() { + Object o1 = new Object(); + } + + void baz() { + foo().toString(); // should be an NPE with Child.v2 + } + +} diff --git a/infer/tests/endtoend/java/incremental/BUCK b/infer/tests/endtoend/java/incremental/BUCK index 0bae701ce..950b55ff8 100644 --- a/infer/tests/endtoend/java/incremental/BUCK +++ b/infer/tests/endtoend/java/incremental/BUCK @@ -12,6 +12,7 @@ java_test( '//infer/tests/codetoanalyze/java/incremental/parent_changed:analyze', '//infer/tests/codetoanalyze/java/incremental/child_changed:analyze', '//infer/tests/codetoanalyze/java/incremental/changed_only_mode:analyze', + '//infer/tests/codetoanalyze/java/incremental/changed_only_future:analyze', ], visibility=[ 'PUBLIC', diff --git a/infer/tests/endtoend/java/incremental/ChangedOnlyFuture.java b/infer/tests/endtoend/java/incremental/ChangedOnlyFuture.java new file mode 100644 index 000000000..c6e4c1837 --- /dev/null +++ b/infer/tests/endtoend/java/incremental/ChangedOnlyFuture.java @@ -0,0 +1,62 @@ +/* +* Copyright (c) 2015 - present Facebook, Inc. +* All rights reserved. +* +* This source code is licensed under the BSD style license found in the +* LICENSE file in the root directory of this source tree. An additional grant +* of patent rights can be found in the PATENTS file in the same directory. +*/ + +package endtoend.java.incremental; + +import static org.hamcrest.MatcherAssert.assertThat; +import static utils.matchers.NumberOfFilesAnalyzed.numberOfFilesAnalyzed; +import static utils.matchers.NumberOfProceduresAnalyzed.numberOfProceduresAnalyzed; + +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.IOException; +import java.nio.file.Paths; +import java.nio.file.Path; +import java.io.*; + +import utils.InferException; +import utils.InferStats; + +/** + * Make sure the incremental --changed-only mode doesn't corrupt the results for future analyses by + * refusing to analyze callers of changed procedures (and thereby creating stale specs for some poor + * future analysis). + */ +public class ChangedOnlyFuture { + + public static final String SOURCE_DIR = + "/infer/tests/codetoanalyze/java/incremental/changed_only_future/"; + + private static InferStats inferStats; + + @BeforeClass + public static void loadResults() throws InterruptedException, IOException { + inferStats = InferStats.loadInferStats(ChangedOnlyModeTest.class, SOURCE_DIR); + } + + @Test + public void onlyChangedFileReanalyzedInChangedOnlyMode() + throws IOException, InterruptedException, InferException { + assertThat( + "Only the changed file should be re-analyzed", + inferStats, + numberOfFilesAnalyzed(1)); + } + + @Test + public void changedAndStaleProcsReanalyze() + throws IOException, InterruptedException, InferException { + assertThat( + "Both the new (changed) procedure and its stale (unchanged) callee should be re-analyzed", + inferStats, + numberOfProceduresAnalyzed(2)); + } + +}