From 0012b1edcc35e5bd8ced5b95c1531eb8f0d5aa87 Mon Sep 17 00:00:00 2001 From: Sungkeun Cho Date: Thu, 30 Jul 2020 02:55:57 -0700 Subject: [PATCH] [specs] Add an option to shard spec files Summary: This diff adds an option to shard spec files in `infer-out/specs`. For some big analysis targets, there can be too many of spec files in the one directory, which slows down IO speed for reading the spec files. Reviewed By: jvillard Differential Revision: D20002128 fbshipit-source-id: bd7722883 --- infer/man/man1/infer-analyze.txt | 4 ++++ infer/man/man1/infer-full.txt | 4 ++++ infer/man/man1/infer-run.txt | 4 ++++ infer/man/man1/infer.txt | 4 ++++ infer/src/IR/Procname.ml | 6 ++++-- infer/src/IR/Procname.mli | 3 +++ infer/src/backend/SpecsFiles.ml | 19 ++++--------------- infer/src/backend/Summary.ml | 20 ++++++++++++++++---- infer/src/base/Config.ml | 13 +++++++++++++ infer/src/base/Config.mli | 2 ++ infer/src/base/DB.ml | 11 +++++------ infer/src/base/DB.mli | 5 +++-- infer/src/base/Serialization.ml | 3 +++ infer/src/base/Serialization.mli | 3 +++ 14 files changed, 72 insertions(+), 29 deletions(-) diff --git a/infer/man/man1/infer-analyze.txt b/infer/man/man1/infer-analyze.txt index 401e517d1..4f3e05c5e 100644 --- a/infer/man/man1/infer-analyze.txt +++ b/infer/man/man1/infer-analyze.txt @@ -332,6 +332,10 @@ OPTIONS Activates: Enable siof and disable all other checkers (Conversely: --no-siof-only) + --specs-shard-depth int + Specify the depth of the directory structure of specs, used for + "sharding" .specs files. Zero turns sharding off. + --sqlite-cache-size int SQLite cache size in pages (if positive) or kB (if negative), follows formal of corresponding SQLite PRAGMA. diff --git a/infer/man/man1/infer-full.txt b/infer/man/man1/infer-full.txt index 5f092ef7e..3923a77bc 100644 --- a/infer/man/man1/infer-full.txt +++ b/infer/man/man1/infer-full.txt @@ -1089,6 +1089,10 @@ OPTIONS Deactivates: print code excerpts around trace elements (Conversely: --source-preview) See also infer-explore(1). + --specs-shard-depth int + Specify the depth of the directory structure of specs, used for + "sharding" .specs files. Zero turns sharding off. See also infer-analyze(1) and infer-run(1). + --sqlite-cache-size int SQLite cache size in pages (if positive) or kB (if negative), follows formal of corresponding SQLite PRAGMA. See also infer-analyze(1), infer-capture(1), and infer-run(1). diff --git a/infer/man/man1/infer-run.txt b/infer/man/man1/infer-run.txt index 68d8f4ce3..607949440 100644 --- a/infer/man/man1/infer-run.txt +++ b/infer/man/man1/infer-run.txt @@ -133,6 +133,10 @@ OPTIONS Ignore files whose path matches the given prefix (can be specified multiple times) + --specs-shard-depth int + Specify the depth of the directory structure of specs, used for + "sharding" .specs files. Zero turns sharding off. + --sqlite-cache-size int SQLite cache size in pages (if positive) or kB (if negative), follows formal of corresponding SQLite PRAGMA. diff --git a/infer/man/man1/infer.txt b/infer/man/man1/infer.txt index 0d317c539..22f950e4f 100644 --- a/infer/man/man1/infer.txt +++ b/infer/man/man1/infer.txt @@ -1089,6 +1089,10 @@ OPTIONS Deactivates: print code excerpts around trace elements (Conversely: --source-preview) See also infer-explore(1). + --specs-shard-depth int + Specify the depth of the directory structure of specs, used for + "sharding" .specs files. Zero turns sharding off. See also infer-analyze(1) and infer-run(1). + --sqlite-cache-size int SQLite cache size in pages (if positive) or kB (if negative), follows formal of corresponding SQLite PRAGMA. See also infer-analyze(1), infer-capture(1), and infer-run(1). diff --git a/infer/src/IR/Procname.ml b/infer/src/IR/Procname.ml index 2e21fd6c8..ffd134141 100644 --- a/infer/src/IR/Procname.ml +++ b/infer/src/IR/Procname.ml @@ -775,8 +775,7 @@ let get_qualifiers pname = QualifiedCppName.empty -(** Convert a proc name to a filename *) -let to_filename pname = +let to_filename_and_crc pname = (* filenames for clang procs are REVERSED qualifiers with '#' as separator *) let pp_rev_qualified fmt pname = let rev_qualifiers = get_qualifiers pname |> QualifiedCppName.to_rev_list in @@ -797,6 +796,9 @@ let to_filename pname = DB.append_crc_cutoff proc_id +(** Convert a proc name to a filename *) +let to_filename pname = to_filename_and_crc pname |> fst + module SQLite = struct module T = struct type nonrec t = t [@@deriving compare] diff --git a/infer/src/IR/Procname.mli b/infer/src/IR/Procname.mli index 8e7f473a5..51332e8a5 100644 --- a/infer/src/IR/Procname.mli +++ b/infer/src/IR/Procname.mli @@ -339,6 +339,9 @@ val pp_unique_id : F.formatter -> t -> unit val to_unique_id : t -> string (** Convert a proc name into a unique identifier. *) +val to_filename_and_crc : t -> string * string +(** Convert a proc name to a filename and crc. *) + val to_filename : t -> string (** Convert a proc name to a filename. *) diff --git a/infer/src/backend/SpecsFiles.ml b/infer/src/backend/SpecsFiles.ml index 749882d1c..4f3dd87fc 100644 --- a/infer/src/backend/SpecsFiles.ml +++ b/infer/src/backend/SpecsFiles.ml @@ -10,23 +10,12 @@ module F = Format module L = Logging module CLOpt = CommandLineOption -(** return the list of the .specs files in the results dir and libs, if they're defined *) +(** return the list of the .specs files in the results dir *) let load_specfiles () = - let specs_files_in_dir dir = - let is_specs_file fname = - PolyVariantEqual.(Sys.is_directory fname <> `Yes) - && Filename.check_suffix fname Config.specs_files_suffix - in - match Sys.readdir dir with - | exception Sys_error _ -> - [] - | files -> - Array.fold files ~init:[] ~f:(fun acc fname -> - let path = Filename.concat dir fname in - if is_specs_file path then path :: acc else acc ) - in + let is_specs_file fname = Filename.check_suffix fname Config.specs_files_suffix in + let do_file acc path = if is_specs_file path then path :: acc else acc in let result_specs_dir = DB.filename_to_string DB.Results_dir.specs_dir in - specs_files_in_dir result_specs_dir + Utils.directory_fold do_file [] result_specs_dir let print_usage_exit err_s = diff --git a/infer/src/backend/Summary.ml b/infer/src/backend/Summary.ml index 017c9e076..bbaa9383e 100644 --- a/infer/src/backend/Summary.ml +++ b/infer/src/backend/Summary.ml @@ -130,14 +130,26 @@ module OnDisk = struct Procname.Hash.replace cache proc_name summary - let specs_filename pname = - let pname_file = Procname.to_filename pname in - pname_file ^ Config.specs_files_suffix + let specs_filename_and_crc pname = + let pname_file, crc = Procname.to_filename_and_crc pname in + (pname_file ^ Config.specs_files_suffix, crc) + let specs_filename pname = specs_filename_and_crc pname |> fst + (** Return the path to the .specs file for the given procedure in the current results directory *) let specs_filename_of_procname pname = - DB.filename_from_string (ResultsDir.get_path Specs ^/ specs_filename pname) + let filename, crc = specs_filename_and_crc pname in + let sharded_filename = + if Serialization.is_shard_mode then + let shard_dirs = + String.sub crc ~pos:0 ~len:Config.specs_shard_depth + |> String.concat_map ~sep:"/" ~f:Char.to_string + in + shard_dirs ^/ filename + else filename + in + DB.filename_from_string (ResultsDir.get_path Specs ^/ sharded_filename) (** paths to the .specs file for the given procedure in the models folder *) diff --git a/infer/src/base/Config.ml b/infer/src/base/Config.ml index 941746540..254bd9232 100644 --- a/infer/src/base/Config.ml +++ b/infer/src/base/Config.ml @@ -2160,6 +2160,13 @@ and starvation_whole_program = "Run whole-program starvation analysis" +and specs_shard_depth = + CLOpt.mk_int ~long:"specs-shard-depth" ~default:0 + ~in_help:InferCommand.[(Analyze, manual_generic); (Run, manual_generic)] + "Specify the depth of the directory structure of specs, used for \"sharding\" .specs files. \ + Zero turns sharding off." + + and sqlite_cache_size = CLOpt.mk_int ~long:"sqlite-cache-size" ~default:2000 ~in_help: @@ -3098,6 +3105,12 @@ and sources = !sources and sourcepath = !sourcepath +and specs_shard_depth = + if 0 <= !specs_shard_depth && !specs_shard_depth <= 32 then !specs_shard_depth + else + L.die UserError "Invalid number of shard depths %d: must be between 0 and 32" !specs_shard_depth + + and sqlite_cache_size = !sqlite_cache_size and sqlite_page_size = !sqlite_page_size diff --git a/infer/src/base/Config.mli b/infer/src/base/Config.mli index 4ff95491d..fca3c90f9 100644 --- a/infer/src/base/Config.mli +++ b/infer/src/base/Config.mli @@ -553,6 +553,8 @@ val source_files_type_environment : bool val source_preview : bool +val specs_shard_depth : int + val sqlite_cache_size : int val sqlite_page_size : int diff --git a/infer/src/base/DB.ml b/infer/src/base/DB.ml index 59e41b1bc..de75a901e 100644 --- a/infer/src/base/DB.ml +++ b/infer/src/base/DB.ml @@ -25,7 +25,7 @@ let append_crc_cutoff ?(key = "") name = let name_for_crc = name ^ key in Utils.string_crc_hex32 name_for_crc in - Printf.sprintf "%s%c%s" name_up_to_cutoff crc_token crc_str + (Printf.sprintf "%s%c%s" name_up_to_cutoff crc_token crc_str, crc_str) let curr_source_file_encoding = `Enc_crc @@ -41,7 +41,7 @@ let source_file_encoding source_file = | `Enc_crc -> let base = Filename.basename source_file_s in let dir = Filename.dirname source_file_s in - append_crc_cutoff ~key:dir base + append_crc_cutoff ~key:dir base |> fst (** {2 Source Dirs} *) @@ -55,7 +55,7 @@ let source_dir_to_string source_dir = source_dir (** get the path to an internal file with the given extention (.tenv, ...) *) let source_dir_get_internal_file source_dir extension = let source_dir_name = - append_crc_cutoff (Caml.Filename.remove_extension (Filename.basename source_dir)) + append_crc_cutoff (Caml.Filename.remove_extension (Filename.basename source_dir)) |> fst in let fname = source_dir_name ^ extension in Filename.concat source_dir fname @@ -138,9 +138,8 @@ module Results_dir = struct let clean_specs_dir () = (* create dir just in case it doesn't exist to avoid errors *) - Utils.create_dir specs_dir ; - Array.iter (Sys.readdir specs_dir) ~f:(fun specs -> - Filename.concat specs_dir specs |> Sys.remove ) + Utils.rmtree specs_dir ; + Utils.create_dir specs_dir (** create a file at the given path, creating any missing directories *) diff --git a/infer/src/base/DB.mli b/infer/src/base/DB.mli index 89d4e8a60..e9de8c8c9 100644 --- a/infer/src/base/DB.mli +++ b/infer/src/base/DB.mli @@ -55,9 +55,10 @@ module Results_dir : sig (** create a file at the given path, creating any missing directories *) end -val append_crc_cutoff : ?key:string -> string -> string +val append_crc_cutoff : ?key:string -> string -> string * string (** Append a crc to the string, using string_crc_hex32. Cut the string if it exceeds the cutoff - limit. Use an optional key to compute the crc. *) + limit. Use an optional key to compute the crc. Return a pair of the appended result and crc + string. *) val source_file_encoding : SourceFile.t -> string (** string encoding of a source file (including path) as a single filename *) diff --git a/infer/src/base/Serialization.ml b/infer/src/base/Serialization.ml index 3b797ed20..b8d7ef2ce 100644 --- a/infer/src/base/Serialization.ml +++ b/infer/src/base/Serialization.ml @@ -31,6 +31,8 @@ end (** version of the binary files, to be incremented for each change *) let version = 27 +let is_shard_mode = (not Config.biabduction_models_mode) && Config.specs_shard_depth > 0 + let create_serializer (key : Key.t) : 'a serializer = let read_data ((key' : int), (version' : int), (value : 'a)) source_msg = if key.key <> key' then ( @@ -64,6 +66,7 @@ let create_serializer (key : Key.t) : 'a serializer = let filename = DB.filename_to_string fname in PerfEvent.( log (fun logger -> log_begin_event logger ~name:("writing " ^ key.name) ~categories:["io"] ())) ; + if is_shard_mode then Utils.create_dir (Filename.dirname filename) ; Utils.with_intermediate_temp_file_out filename ~f:(fun outc -> Marshal.to_channel outc (key.key, version, data) [] ) ; PerfEvent.(log (fun logger -> log_end_event logger ())) diff --git a/infer/src/base/Serialization.mli b/infer/src/base/Serialization.mli index b97376398..5a8561708 100644 --- a/infer/src/base/Serialization.mli +++ b/infer/src/base/Serialization.mli @@ -27,6 +27,9 @@ end (** Generic serializer *) type 'a serializer +val is_shard_mode : bool +(** If the spec directory is sharded or not *) + val create_serializer : Key.t -> 'a serializer (** create a serializer from a file name given an integer key used as double-check of the file type *)