[specs] Add an option to shard spec files

Summary: This diff adds an option to shard spec files in `infer-out/specs`. For some big analysis targets, there can be too many of spec files in the one directory, which slows down IO speed for reading the spec files.

Reviewed By: jvillard

Differential Revision: D20002128

fbshipit-source-id: bd7722883
master
Sungkeun Cho 4 years ago committed by Facebook GitHub Bot
parent 444ccc4940
commit 0012b1edcc

@ -332,6 +332,10 @@ OPTIONS
Activates: Enable siof and disable all other checkers (Conversely:
--no-siof-only)
--specs-shard-depth int
Specify the depth of the directory structure of specs, used for
"sharding" .specs files. Zero turns sharding off.
--sqlite-cache-size int
SQLite cache size in pages (if positive) or kB (if negative),
follows formal of corresponding SQLite PRAGMA.

@ -1089,6 +1089,10 @@ OPTIONS
Deactivates: print code excerpts around trace elements
(Conversely: --source-preview) See also infer-explore(1).
--specs-shard-depth int
Specify the depth of the directory structure of specs, used for
"sharding" .specs files. Zero turns sharding off. See also infer-analyze(1) and infer-run(1).
--sqlite-cache-size int
SQLite cache size in pages (if positive) or kB (if negative),
follows formal of corresponding SQLite PRAGMA. See also infer-analyze(1), infer-capture(1), and infer-run(1).

@ -133,6 +133,10 @@ OPTIONS
Ignore files whose path matches the given prefix (can be specified
multiple times)
--specs-shard-depth int
Specify the depth of the directory structure of specs, used for
"sharding" .specs files. Zero turns sharding off.
--sqlite-cache-size int
SQLite cache size in pages (if positive) or kB (if negative),
follows formal of corresponding SQLite PRAGMA.

@ -1089,6 +1089,10 @@ OPTIONS
Deactivates: print code excerpts around trace elements
(Conversely: --source-preview) See also infer-explore(1).
--specs-shard-depth int
Specify the depth of the directory structure of specs, used for
"sharding" .specs files. Zero turns sharding off. See also infer-analyze(1) and infer-run(1).
--sqlite-cache-size int
SQLite cache size in pages (if positive) or kB (if negative),
follows formal of corresponding SQLite PRAGMA. See also infer-analyze(1), infer-capture(1), and infer-run(1).

@ -775,8 +775,7 @@ let get_qualifiers pname =
QualifiedCppName.empty
(** Convert a proc name to a filename *)
let to_filename pname =
let to_filename_and_crc pname =
(* filenames for clang procs are REVERSED qualifiers with '#' as separator *)
let pp_rev_qualified fmt pname =
let rev_qualifiers = get_qualifiers pname |> QualifiedCppName.to_rev_list in
@ -797,6 +796,9 @@ let to_filename pname =
DB.append_crc_cutoff proc_id
(** Convert a proc name to a filename *)
let to_filename pname = to_filename_and_crc pname |> fst
module SQLite = struct
module T = struct
type nonrec t = t [@@deriving compare]

@ -339,6 +339,9 @@ val pp_unique_id : F.formatter -> t -> unit
val to_unique_id : t -> string
(** Convert a proc name into a unique identifier. *)
val to_filename_and_crc : t -> string * string
(** Convert a proc name to a filename and crc. *)
val to_filename : t -> string
(** Convert a proc name to a filename. *)

@ -10,23 +10,12 @@ module F = Format
module L = Logging
module CLOpt = CommandLineOption
(** return the list of the .specs files in the results dir and libs, if they're defined *)
(** return the list of the .specs files in the results dir *)
let load_specfiles () =
let specs_files_in_dir dir =
let is_specs_file fname =
PolyVariantEqual.(Sys.is_directory fname <> `Yes)
&& Filename.check_suffix fname Config.specs_files_suffix
in
match Sys.readdir dir with
| exception Sys_error _ ->
[]
| files ->
Array.fold files ~init:[] ~f:(fun acc fname ->
let path = Filename.concat dir fname in
if is_specs_file path then path :: acc else acc )
in
let is_specs_file fname = Filename.check_suffix fname Config.specs_files_suffix in
let do_file acc path = if is_specs_file path then path :: acc else acc in
let result_specs_dir = DB.filename_to_string DB.Results_dir.specs_dir in
specs_files_in_dir result_specs_dir
Utils.directory_fold do_file [] result_specs_dir
let print_usage_exit err_s =

@ -130,14 +130,26 @@ module OnDisk = struct
Procname.Hash.replace cache proc_name summary
let specs_filename pname =
let pname_file = Procname.to_filename pname in
pname_file ^ Config.specs_files_suffix
let specs_filename_and_crc pname =
let pname_file, crc = Procname.to_filename_and_crc pname in
(pname_file ^ Config.specs_files_suffix, crc)
let specs_filename pname = specs_filename_and_crc pname |> fst
(** Return the path to the .specs file for the given procedure in the current results directory *)
let specs_filename_of_procname pname =
DB.filename_from_string (ResultsDir.get_path Specs ^/ specs_filename pname)
let filename, crc = specs_filename_and_crc pname in
let sharded_filename =
if Serialization.is_shard_mode then
let shard_dirs =
String.sub crc ~pos:0 ~len:Config.specs_shard_depth
|> String.concat_map ~sep:"/" ~f:Char.to_string
in
shard_dirs ^/ filename
else filename
in
DB.filename_from_string (ResultsDir.get_path Specs ^/ sharded_filename)
(** paths to the .specs file for the given procedure in the models folder *)

@ -2160,6 +2160,13 @@ and starvation_whole_program =
"Run whole-program starvation analysis"
and specs_shard_depth =
CLOpt.mk_int ~long:"specs-shard-depth" ~default:0
~in_help:InferCommand.[(Analyze, manual_generic); (Run, manual_generic)]
"Specify the depth of the directory structure of specs, used for \"sharding\" .specs files. \
Zero turns sharding off."
and sqlite_cache_size =
CLOpt.mk_int ~long:"sqlite-cache-size" ~default:2000
~in_help:
@ -3098,6 +3105,12 @@ and sources = !sources
and sourcepath = !sourcepath
and specs_shard_depth =
if 0 <= !specs_shard_depth && !specs_shard_depth <= 32 then !specs_shard_depth
else
L.die UserError "Invalid number of shard depths %d: must be between 0 and 32" !specs_shard_depth
and sqlite_cache_size = !sqlite_cache_size
and sqlite_page_size = !sqlite_page_size

@ -553,6 +553,8 @@ val source_files_type_environment : bool
val source_preview : bool
val specs_shard_depth : int
val sqlite_cache_size : int
val sqlite_page_size : int

@ -25,7 +25,7 @@ let append_crc_cutoff ?(key = "") name =
let name_for_crc = name ^ key in
Utils.string_crc_hex32 name_for_crc
in
Printf.sprintf "%s%c%s" name_up_to_cutoff crc_token crc_str
(Printf.sprintf "%s%c%s" name_up_to_cutoff crc_token crc_str, crc_str)
let curr_source_file_encoding = `Enc_crc
@ -41,7 +41,7 @@ let source_file_encoding source_file =
| `Enc_crc ->
let base = Filename.basename source_file_s in
let dir = Filename.dirname source_file_s in
append_crc_cutoff ~key:dir base
append_crc_cutoff ~key:dir base |> fst
(** {2 Source Dirs} *)
@ -55,7 +55,7 @@ let source_dir_to_string source_dir = source_dir
(** get the path to an internal file with the given extention (.tenv, ...) *)
let source_dir_get_internal_file source_dir extension =
let source_dir_name =
append_crc_cutoff (Caml.Filename.remove_extension (Filename.basename source_dir))
append_crc_cutoff (Caml.Filename.remove_extension (Filename.basename source_dir)) |> fst
in
let fname = source_dir_name ^ extension in
Filename.concat source_dir fname
@ -138,9 +138,8 @@ module Results_dir = struct
let clean_specs_dir () =
(* create dir just in case it doesn't exist to avoid errors *)
Utils.create_dir specs_dir ;
Array.iter (Sys.readdir specs_dir) ~f:(fun specs ->
Filename.concat specs_dir specs |> Sys.remove )
Utils.rmtree specs_dir ;
Utils.create_dir specs_dir
(** create a file at the given path, creating any missing directories *)

@ -55,9 +55,10 @@ module Results_dir : sig
(** create a file at the given path, creating any missing directories *)
end
val append_crc_cutoff : ?key:string -> string -> string
val append_crc_cutoff : ?key:string -> string -> string * string
(** Append a crc to the string, using string_crc_hex32. Cut the string if it exceeds the cutoff
limit. Use an optional key to compute the crc. *)
limit. Use an optional key to compute the crc. Return a pair of the appended result and crc
string. *)
val source_file_encoding : SourceFile.t -> string
(** string encoding of a source file (including path) as a single filename *)

@ -31,6 +31,8 @@ end
(** version of the binary files, to be incremented for each change *)
let version = 27
let is_shard_mode = (not Config.biabduction_models_mode) && Config.specs_shard_depth > 0
let create_serializer (key : Key.t) : 'a serializer =
let read_data ((key' : int), (version' : int), (value : 'a)) source_msg =
if key.key <> key' then (
@ -64,6 +66,7 @@ let create_serializer (key : Key.t) : 'a serializer =
let filename = DB.filename_to_string fname in
PerfEvent.(
log (fun logger -> log_begin_event logger ~name:("writing " ^ key.name) ~categories:["io"] ())) ;
if is_shard_mode then Utils.create_dir (Filename.dirname filename) ;
Utils.with_intermediate_temp_file_out filename ~f:(fun outc ->
Marshal.to_channel outc (key.key, version, data) [] ) ;
PerfEvent.(log (fun logger -> log_end_event logger ()))

@ -27,6 +27,9 @@ end
(** Generic serializer *)
type 'a serializer
val is_shard_mode : bool
(** If the spec directory is sharded or not *)
val create_serializer : Key.t -> 'a serializer
(** create a serializer from a file name given an integer key used as double-check of the file type *)

Loading…
Cancel
Save