[Infer][incremental] Preventing --changed-only incremental mode from corrupting the future

Summary:
When someone runs --changed-only mode, there is a risk of corrupting the results
for future analyses. The problem is that changed-only mode does not analyze the callers of changed
procedures. If a subsequent analysis relies on the specs of one of these callers, they will be stale
and may give the wrong results. To be concrete, let's say we know `Parent.foo()` calls `Child.bar()` and we do the following rounds of analysis:

Analysis round 1: Analyze all files, including `Parent` and `Child`
Analysis round 2: Analyze `Child.bar()` only with `--changed-only flag`. `Parent.foo()` is now stale.
Analysis round 3: Add procedure `Parent.baz()` that calls `Parent.foo()`, analyze in (any) incremental mode.

The analysis will only analyze `Parent.baz()`. However, the specs for `Parent.foo()` are stale and may give us bad results for `Parent.baz()`. We want the analysis to re-analyze `Parent.baz()`, but before this diff it will not.

This diff fixes this problem by adding a `STALE` status bit to procedure summaries. In `--changed-only` mode,
the callers of a changed procedures are not re-analyzed, but their summaries are marked as stale. For both
`--changed-only` and regular incremental mode, callees of changed procedures that are marked as stale are
re-analyzed even if they have not changed. This is better than a more obvious solution like deleting stale
procedure summaries, since that would force the next analysis to re-analyze all stale procedures even if it
does not need the results for whatever analysis it is doing. This scheme implemented in this diff ensures
that each analysis only does the work that it needs to compute reliable results for its changed procedures.
master
Sam Blackshear 9 years ago
parent bdbc524f53
commit cc2fda8165

@ -33,6 +33,7 @@ val create : unit -> t (** Create an empty call graph *)
val extend : t -> t -> unit (** [extend cg1 gc2] extends [cg1] in place with nodes and edges from [gc2]; undefined nodes become defined if at least one side is. *) val extend : t -> t -> unit (** [extend cg1 gc2] extends [cg1] in place with nodes and edges from [gc2]; undefined nodes become defined if at least one side is. *)
val get_all_children : t -> Procname.t -> Procname.Set.t (** Return all the children of [n], whether defined or not *) val get_all_children : t -> Procname.t -> Procname.Set.t (** Return all the children of [n], whether defined or not *)
val get_ancestors : t -> Procname.t -> Procname.Set.t (** Compute the ancestors of the node, if not pre-computed already *) val get_ancestors : t -> Procname.t -> Procname.Set.t (** Compute the ancestors of the node, if not pre-computed already *)
val get_heirs : t -> Procname.t -> Procname.Set.t (** Compute the heirs of the node, if not pre-computed already *)
val get_calls : t -> Procname.t -> in_out_calls (** Return the in/out calls of the node *) val get_calls : t -> Procname.t -> in_out_calls (** Return the in/out calls of the node *)
val get_defined_nodes : t -> Procname.t list (** Return the list of nodes which are defined *) val get_defined_nodes : t -> Procname.t list (** Return the list of nodes which are defined *)
val get_defined_children: t -> Procname.t -> Procname.Set.t (** Return the children of [n] which are defined *) val get_defined_children: t -> Procname.t -> Procname.Set.t (** Return the children of [n] which are defined *)

@ -586,6 +586,56 @@ end
let proc_list_to_set proc_list = let proc_list_to_set proc_list =
list_fold_left (fun s p -> Procname.Set.add p s) Procname.Set.empty proc_list list_fold_left (fun s p -> Procname.Set.add p s) Procname.Set.empty proc_list
(** compute the files to analyze map for incremental mode *)
let compute_to_analyze_map_incremental files_changed_map global_cg exe_env =
let changed_fold_f files_changed_map f =
let apply_f _ procs acc =
Procname.Set.fold (fun proc acc -> Procname.Set.union acc (f proc)) procs acc in
Procname.Map.fold apply_f files_changed_map Procname.Set.empty in
(* all callers of changed procedures *)
let callers_of_changed =
changed_fold_f files_changed_map (fun p -> Cg.get_ancestors global_cg p) in
(* all callees of changed procedures *)
let callees_of_changed =
changed_fold_f files_changed_map (fun p -> Cg.get_heirs global_cg p) in
(* add a procedure to the file -> (procedures to analyze) map *)
let add_proc_to_map proc map =
let proc_file_pname = source_file_to_pname (Exe_env.get_source exe_env proc) in
let procs_to_analyze =
try Procname.Map.find proc_file_pname map
with Not_found -> Procname.Set.empty in
let procs_to_analyze' = Procname.Set.add proc procs_to_analyze in
Procname.Map.add proc_file_pname procs_to_analyze' map in
let get_specs_filename pname =
Specs.res_dir_specs_filename pname in
let is_stale pname =
let specs_file = get_specs_filename pname in
match Specs.load_summary specs_file with
| Some summary -> summary.Specs.status = Specs.STALE
| None -> false in
(* add stale callees to the map *)
let add_stale_callee_to_map callee map =
if is_stale callee then add_proc_to_map callee map
else map in
let files_changed_map' =
Procname.Set.fold add_stale_callee_to_map callees_of_changed files_changed_map in
if !incremental_mode = ANALYZE_CHANGED_ONLY then
(* mark all caller specs stale. this ensures future runs of the analysis that need to use specs
from callers will re-compute them first. we do this instead of deleting the specs because
that would force the next analysis run to re-compute them even if it doesn't need them. *)
let mark_spec_as_stale pname =
let specs_file = get_specs_filename pname in
match Specs.load_summary specs_file with
| Some summary ->
let summary' = { summary with Specs.status = Specs.STALE } in
Specs.store_summary pname summary'
| None -> () in
Procname.Set.iter (fun caller -> mark_spec_as_stale caller) callers_of_changed;
files_changed_map'
else (* !incremental_mode = ANALYZE_CHANGED_AND_DEPENDENTS *)
(* add callers of changed procedures to the map of stuff to analyze *)
Procname.Set.fold add_proc_to_map callers_of_changed files_changed_map'
(** compute the clusters *) (** compute the clusters *)
let compute_clusters exe_env files_changed : cluster list = let compute_clusters exe_env files_changed : cluster list =
if !trace_clusters then if !trace_clusters then
@ -613,9 +663,9 @@ let compute_clusters exe_env files_changed : cluster list =
let files = Cg.get_defined_nodes file_cg in let files = Cg.get_defined_nodes file_cg in
let num_files = list_length files in let num_files = list_length files in
L.err "@.Found %d defined procedures in %d files.@." (list_length defined_procs) num_files; L.err "@.Found %d defined procedures in %d files.@." (list_length defined_procs) num_files;
let to_analyze_map = match !incremental_mode with let to_analyze_map =
| ANALYZE_ALL -> if !incremental_mode = ANALYZE_ALL then
(* get procedures defined in a file *) (* get all procedures defined in a file *)
let get_defined_procs file_pname = match file_pname_to_cg file_pname with let get_defined_procs file_pname = match file_pname_to_cg file_pname with
| None -> Procname.Set.empty | None -> Procname.Set.empty
| Some cg -> proc_list_to_set (Cg.get_defined_nodes cg) in | Some cg -> proc_list_to_set (Cg.get_defined_nodes cg) in
@ -623,26 +673,7 @@ let compute_clusters exe_env files_changed : cluster list =
(fun m file_pname -> Procname.Map.add file_pname (get_defined_procs file_pname) m) (fun m file_pname -> Procname.Map.add file_pname (get_defined_procs file_pname) m)
Procname.Map.empty Procname.Map.empty
files files
| ANALYZE_CHANGED_ONLY -> files_changed else compute_to_analyze_map_incremental files_changed global_cg exe_env in
| ANALYZE_CHANGED_AND_DEPENDENCIES ->
(* get all callers of changed procedures *)
let callers_of_changed =
let collect_callers_for_procs _ procs callers =
Procname.Set.fold
(fun proc callers -> Procname.Set.union callers (Cg.get_ancestors global_cg proc))
procs
callers in
Procname.Map.fold collect_callers_for_procs files_changed Procname.Set.empty in
(* add a caller to the file -> (procedures to analyze) map *)
let add_caller_to_map caller map =
let caller_file_pname = source_file_to_pname (Exe_env.get_source exe_env caller) in
let procs_to_analyze =
try Procname.Map.find caller_file_pname map
with Not_found -> Procname.Set.empty in
let procs_to_analyze' = Procname.Set.add caller procs_to_analyze in
Procname.Map.add caller_file_pname procs_to_analyze' map in
(* add the (transitive) callers of all changed procedures to the [files_changed] map *)
Procname.Set.fold add_caller_to_map callers_of_changed files_changed in
L.err "Analyzing %d files.@.@." (Procname.Map.cardinal to_analyze_map); L.err "Analyzing %d files.@.@." (Procname.Map.cardinal to_analyze_map);
let clusters = create_minimal_clusters file_cg exe_env to_analyze_map in let clusters = create_minimal_clusters file_cg exe_env to_analyze_map in
L.err "Minimal clusters:@."; L.err "Minimal clusters:@.";

@ -297,7 +297,7 @@ type stats =
cyclomatic : int; cyclomatic : int;
} }
type status = ACTIVE | INACTIVE type status = ACTIVE | INACTIVE | STALE
type phase = FOOTPRINT | RE_EXECUTION type phase = FOOTPRINT | RE_EXECUTION

@ -112,7 +112,7 @@ type stats =
cyclomatic : int; cyclomatic : int;
} }
type status = ACTIVE | INACTIVE type status = ACTIVE | INACTIVE | STALE
type phase = FOOTPRINT | RE_EXECUTION type phase = FOOTPRINT | RE_EXECUTION

@ -0,0 +1,41 @@
v1_files = glob([ '**/*.java.v1'])
v2_files = glob(['**/*.java.v2'])
v3_files = glob(['**/*.java.v3'])
sources = v1_files + v2_files + v3_files
def copy_files_strip_suffix_cmd(sfx, files):
return ' && '.join([' '.join(['cp', f, f.replace(sfx, '')]) for f in files])
out = 'out'
clean_out = ' '.join(['rm', '-rf', out])
clean_java = ' '.join(['rm', '-rf', '*.java'])
clean_cmd = ' && '.join([clean_out, clean_java])
stripped_suffix_files = map(lambda f: f.replace('.v1', ''), v1_files)
to_compile = ' '.join(stripped_suffix_files)
infer_cmd = ' '.join([
'infer',
'-i',
'--changed-only',
'--absolute-paths',
'-o', out,
'-a', 'infer',
'--',
'javac',
to_compile
])
v1_copy_cmd = copy_files_strip_suffix_cmd('.v1', v1_files)
v2_copy_cmd = copy_files_strip_suffix_cmd('.v2', v2_files)
v3_copy_cmd = copy_files_strip_suffix_cmd('.v3', v3_files)
stats_copy_cmd = ' '.join(['cp', out + '/stats.json', '$OUT'])
command = ' && '.join([clean_cmd, v1_copy_cmd, infer_cmd, v2_copy_cmd, infer_cmd, v3_copy_cmd, infer_cmd, stats_copy_cmd])
genrule(
name = 'analyze',
srcs = sources,
out = 'stats.json',
cmd = command,
deps = [],
visibility = [
'PUBLIC',
]
)

@ -0,0 +1,18 @@
/*
* Copyright (c) 2015 - present Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*/
package codetoanalyze.java.incremental.changed_only_future;
class Child {
Object bar() {
return new Object();
}
}

@ -0,0 +1,18 @@
/*
* Copyright (c) 2015 - present Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*/
package codetoanalyze.java.incremental.changed_only_future;
class Child {
Object bar() {
return null;
}
}

@ -0,0 +1,23 @@
/*
* Copyright (c) 2015 - present Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*/
package codetoanalyze.java.incremental.changed_only_future;
class Parent {
Object foo() {
Child c = new Child();
return c.bar();
}
void bar() {
Object o1 = new Object();
}
}

@ -0,0 +1,27 @@
/*
* Copyright (c) 2015 - present Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*/
package codetoanalyze.java.incremental.changed_only_future;
class Parent {
Object foo() {
Child c = new Child();
return c.bar();
}
void bar() {
Object o1 = new Object();
}
void baz() {
foo().toString(); // should be an NPE with Child.v2
}
}

@ -12,6 +12,7 @@ java_test(
'//infer/tests/codetoanalyze/java/incremental/parent_changed:analyze', '//infer/tests/codetoanalyze/java/incremental/parent_changed:analyze',
'//infer/tests/codetoanalyze/java/incremental/child_changed:analyze', '//infer/tests/codetoanalyze/java/incremental/child_changed:analyze',
'//infer/tests/codetoanalyze/java/incremental/changed_only_mode:analyze', '//infer/tests/codetoanalyze/java/incremental/changed_only_mode:analyze',
'//infer/tests/codetoanalyze/java/incremental/changed_only_future:analyze',
], ],
visibility=[ visibility=[
'PUBLIC', 'PUBLIC',

@ -0,0 +1,62 @@
/*
* Copyright (c) 2015 - present Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*/
package endtoend.java.incremental;
import static org.hamcrest.MatcherAssert.assertThat;
import static utils.matchers.NumberOfFilesAnalyzed.numberOfFilesAnalyzed;
import static utils.matchers.NumberOfProceduresAnalyzed.numberOfProceduresAnalyzed;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.IOException;
import java.nio.file.Paths;
import java.nio.file.Path;
import java.io.*;
import utils.InferException;
import utils.InferStats;
/**
* Make sure the incremental --changed-only mode doesn't corrupt the results for future analyses by
* refusing to analyze callers of changed procedures (and thereby creating stale specs for some poor
* future analysis).
*/
public class ChangedOnlyFuture {
public static final String SOURCE_DIR =
"/infer/tests/codetoanalyze/java/incremental/changed_only_future/";
private static InferStats inferStats;
@BeforeClass
public static void loadResults() throws InterruptedException, IOException {
inferStats = InferStats.loadInferStats(ChangedOnlyModeTest.class, SOURCE_DIR);
}
@Test
public void onlyChangedFileReanalyzedInChangedOnlyMode()
throws IOException, InterruptedException, InferException {
assertThat(
"Only the changed file should be re-analyzed",
inferStats,
numberOfFilesAnalyzed(1));
}
@Test
public void changedAndStaleProcsReanalyze()
throws IOException, InterruptedException, InferException {
assertThat(
"Both the new (changed) procedure and its stale (unchanged) callee should be re-analyzed",
inferStats,
numberOfProceduresAnalyzed(2));
}
}
Loading…
Cancel
Save