[Infer][stats] Getting reliable file count/LOC count by moving computation inside inferanalyze

10 years ago · 1a51254b8c
parent d5fc25f28a
commit 1a51254b8c
3 changed files with 35 additions and 52 deletions
--- a/infer/bin/inferlib.py
+++ b/infer/bin/inferlib.py
@ -391,7 +391,7 @@ class Infer:
                if not os.path.isdir(self.args.infer_out):
                    raise e

-            self.stats = {'int': {}, 'float': {}}
+            self.stats = {'int': {}}
            self.timing = {}

        if self.args.specs_dirs:
@ -561,43 +561,9 @@ class Infer:
        if self.args.buck and exit_status == os.EX_OK:
            clean_infer_out(self.args.infer_out)

-        cfgs = os.path.join(self.args.infer_out, 'captured', '*', '')
-        captured_total = len(glob.glob(cfgs))
-        captured_plural = '' if captured_total <= 1 else 's'
-        print('\n%d file%s analyzed' % (captured_total, captured_plural))
-
-        logging.info('Analyzed file count: %d', captured_total)
-        logging.info('Analysis status: %d', exit_status)
-
        return exit_status

-    def file_stats(self, file, stats):
-        if file is not None:
-            stats['files'] += 1
-            try:
-                with open(file, 'r') as f:
-                    stats['lines'] += len(list(f))
-            except IOError:
-                logging.warning('File {} not found'.format(file))
-
-
-    def javac_stats(self):
-        stats = {'files': 0, 'lines': 0}
-
-        for arg in self.javac.original_arguments:
-            file = None
-            if arg.endswith('.java'):
-                file = arg
-                self.file_stats(file, stats)
-            if arg.startswith('@'):
-                with open(arg[1:], 'r') as f:
-                    for line in f:
-                        file = line.strip()
-                        self.file_stats(file, stats)
-
-        return stats
-
-    def update_stats(self, csv_report):
+    def update_stats_with_warnings(self, csv_report):
        with open(csv_report, 'r') as file_in:
            reader = csv.reader(file_in)
            rows = [row for row in reader][1:]
@ -633,7 +599,7 @@ class Infer:
                          + infer_print_cmd)
        else:
            clean_csv(self.args, csv_report)
-            self.update_stats(csv_report)
+            self.update_stats_with_warnings(csv_report)
            utils.create_json_report(self.args.infer_out)

            print('\n')
@ -646,21 +612,21 @@ class Infer:
        """Print timing information to infer_out/stats.json"""
        stats_path = os.path.join(self.args.infer_out, utils.STATS_FILENAME)

-        self.stats['int'].update(self.javac_stats())
-
-        self.stats['float'].update({
-            'capture_time': self.timing.get('capture', 0.0),
-            'analysis_time': self.timing.get('analysis', 0.0),
-            'reporting_time': self.timing.get('reporting', 0.0),
-        })
-
-        # Adding the analyzer and the version of Infer
-        self.stats['normal'] = {}
-        self.stats['normal']['analyzer'] = self.args.analyzer
-        self.stats['normal']['infer_version'] = utils.infer_version()
-
-        with open(stats_path, 'w') as stats_file:
+        with open(stats_path, 'r+') as stats_file:
+            file_stats = json.load(stats_file)
+            self.stats['int'].update(file_stats)
+            self.stats['float'] = {
+                'capture_time': self.timing.get('capture', 0.0),
+                'analysis_time': self.timing.get('analysis', 0.0),
+                'reporting_time': self.timing.get('reporting', 0.0),
+            }
+            self.stats['normal'] = {
+                'analyzer': self.args.analyzer,
+                'infer_version': utils.infer_version()
+            }
+            stats_file.seek(0)
            json.dump(self.stats, stats_file, indent=2)
+            stats_file.truncate()

    def close(self):
        if self.args.analyzer != COMPILE:
@ -693,6 +659,9 @@ class Infer:
                elapsed = utils.elapsed_time(start_time)
                self.timing['total'] = elapsed
                self.save_stats()
+                files_total = self.stats['int']['files']
+                files_plural = '' if files_total <= 1 else 's'
+                print('\n%d file%s analyzed' % (files_total, files_plural))
                return self.stats
            else:
                return dict({})
--- a/infer/src/backend/config.ml
+++ b/infer/src/backend/config.ml
@ -35,6 +35,8 @@ let default_in_zip_results_dir = "infer"

 let default_buck_out = "buck-out"

+let stats_filename = "stats.json"
+
 let global_tenv_filename = "global.tenv"

 (** List of paths to the directories containing specs for library functions. *)
--- a/infer/src/backend/inferanalyze.ml
+++ b/infer/src/backend/inferanalyze.ml
@ -13,6 +13,7 @@
 module L = Logging
 module F = Format
 open Utils
+open Yojson.Basic.Util

 (* This module, unused by default, generates random c files with procedure calls *)
 module Codegen = struct
@ -355,19 +356,29 @@ let file_pname_to_cg file_pname =
  let cg_fname = DB.source_dir_get_internal_file source_dir ".cg" in
  Cg.load_from_file cg_fname

+let output_json_file_stats num_files num_lines =
+  let file_stats = `Assoc [ ("files", `Int num_files); ("lines", `Int num_lines) ] in
+  (* write stats file to disk, intentionally overwriting old file if it already exists *)
+  let f = open_out (Filename.concat !Config.results_dir Config.stats_filename) in
+  Yojson.Basic.pretty_to_channel f file_stats
+
 (** create clusters of minimal size in the dependence order, with recursive parts grouped together *)
 let create_minimal_clusters file_cg exe_env to_analyze_map : cluster list =
  if !trace_clusters then L.err "[create_minimal_clusters]@.";
  let sorted_files = weak_sort_nodes file_cg in
  let seen = ref Procname.Set.empty in
  let clusters = ref [] in
-  let create_cluster_elem (file_pname, changed_procs) = (* create a cluster_elem for the file *)
+  let total_files = ref 0 in
+  let total_LOC = ref 0 in
+  let create_cluster_elem  (file_pname, changed_procs) = (* create a cluster_elem for the file *)
    let source_file = source_file_from_pname file_pname in
    if !trace_clusters then L.err "      [create_cluster_elem] %s@." (DB.source_file_to_string source_file);
    DB.current_source := source_file;
    match file_pname_to_cg file_pname with
    | None -> { ce_file = source_file; ce_naprocs = 0; ce_active_procs = []; ce_source_map = Procname.Map.empty }
    | Some cg ->
+        total_files := !total_files + 1;
+        total_LOC := !total_LOC + (Cg.get_nLOC cg);
        (* decide whether a proc is active using pname_to_fname, i.e. whether this is the file associated to it *)
        let proc_is_selected pname = match !select_proc with
          | None -> true
@ -427,6 +438,7 @@ let create_minimal_clusters file_cg exe_env to_analyze_map : cluster list =
            end;
          build_clusters list'' in
  build_clusters sorted_files;
+  output_json_file_stats !total_files !total_LOC;
  list_rev !clusters

 let cluster_nfiles cluster = list_length cluster