[specs] Add an option to shard spec files

Summary: This diff adds an option to shard spec files in `infer-out/specs`. For some big analysis targets, there can be too many of spec files in the one directory, which slows down IO speed for reading the spec files.

Reviewed By: jvillard

Differential Revision: D20002128

fbshipit-source-id: bd7722883
master
Sungkeun Cho 4 years ago committed by Facebook GitHub Bot
parent 444ccc4940
commit 0012b1edcc

@ -332,6 +332,10 @@ OPTIONS
Activates: Enable siof and disable all other checkers (Conversely: Activates: Enable siof and disable all other checkers (Conversely:
--no-siof-only) --no-siof-only)
--specs-shard-depth int
Specify the depth of the directory structure of specs, used for
"sharding" .specs files. Zero turns sharding off.
--sqlite-cache-size int --sqlite-cache-size int
SQLite cache size in pages (if positive) or kB (if negative), SQLite cache size in pages (if positive) or kB (if negative),
follows formal of corresponding SQLite PRAGMA. follows formal of corresponding SQLite PRAGMA.

@ -1089,6 +1089,10 @@ OPTIONS
Deactivates: print code excerpts around trace elements Deactivates: print code excerpts around trace elements
(Conversely: --source-preview) See also infer-explore(1). (Conversely: --source-preview) See also infer-explore(1).
--specs-shard-depth int
Specify the depth of the directory structure of specs, used for
"sharding" .specs files. Zero turns sharding off. See also infer-analyze(1) and infer-run(1).
--sqlite-cache-size int --sqlite-cache-size int
SQLite cache size in pages (if positive) or kB (if negative), SQLite cache size in pages (if positive) or kB (if negative),
follows formal of corresponding SQLite PRAGMA. See also infer-analyze(1), infer-capture(1), and infer-run(1). follows formal of corresponding SQLite PRAGMA. See also infer-analyze(1), infer-capture(1), and infer-run(1).

@ -133,6 +133,10 @@ OPTIONS
Ignore files whose path matches the given prefix (can be specified Ignore files whose path matches the given prefix (can be specified
multiple times) multiple times)
--specs-shard-depth int
Specify the depth of the directory structure of specs, used for
"sharding" .specs files. Zero turns sharding off.
--sqlite-cache-size int --sqlite-cache-size int
SQLite cache size in pages (if positive) or kB (if negative), SQLite cache size in pages (if positive) or kB (if negative),
follows formal of corresponding SQLite PRAGMA. follows formal of corresponding SQLite PRAGMA.

@ -1089,6 +1089,10 @@ OPTIONS
Deactivates: print code excerpts around trace elements Deactivates: print code excerpts around trace elements
(Conversely: --source-preview) See also infer-explore(1). (Conversely: --source-preview) See also infer-explore(1).
--specs-shard-depth int
Specify the depth of the directory structure of specs, used for
"sharding" .specs files. Zero turns sharding off. See also infer-analyze(1) and infer-run(1).
--sqlite-cache-size int --sqlite-cache-size int
SQLite cache size in pages (if positive) or kB (if negative), SQLite cache size in pages (if positive) or kB (if negative),
follows formal of corresponding SQLite PRAGMA. See also infer-analyze(1), infer-capture(1), and infer-run(1). follows formal of corresponding SQLite PRAGMA. See also infer-analyze(1), infer-capture(1), and infer-run(1).

@ -775,8 +775,7 @@ let get_qualifiers pname =
QualifiedCppName.empty QualifiedCppName.empty
(** Convert a proc name to a filename *) let to_filename_and_crc pname =
let to_filename pname =
(* filenames for clang procs are REVERSED qualifiers with '#' as separator *) (* filenames for clang procs are REVERSED qualifiers with '#' as separator *)
let pp_rev_qualified fmt pname = let pp_rev_qualified fmt pname =
let rev_qualifiers = get_qualifiers pname |> QualifiedCppName.to_rev_list in let rev_qualifiers = get_qualifiers pname |> QualifiedCppName.to_rev_list in
@ -797,6 +796,9 @@ let to_filename pname =
DB.append_crc_cutoff proc_id DB.append_crc_cutoff proc_id
(** Convert a proc name to a filename *)
let to_filename pname = to_filename_and_crc pname |> fst
module SQLite = struct module SQLite = struct
module T = struct module T = struct
type nonrec t = t [@@deriving compare] type nonrec t = t [@@deriving compare]

@ -339,6 +339,9 @@ val pp_unique_id : F.formatter -> t -> unit
val to_unique_id : t -> string val to_unique_id : t -> string
(** Convert a proc name into a unique identifier. *) (** Convert a proc name into a unique identifier. *)
val to_filename_and_crc : t -> string * string
(** Convert a proc name to a filename and crc. *)
val to_filename : t -> string val to_filename : t -> string
(** Convert a proc name to a filename. *) (** Convert a proc name to a filename. *)

@ -10,23 +10,12 @@ module F = Format
module L = Logging module L = Logging
module CLOpt = CommandLineOption module CLOpt = CommandLineOption
(** return the list of the .specs files in the results dir and libs, if they're defined *) (** return the list of the .specs files in the results dir *)
let load_specfiles () = let load_specfiles () =
let specs_files_in_dir dir = let is_specs_file fname = Filename.check_suffix fname Config.specs_files_suffix in
let is_specs_file fname = let do_file acc path = if is_specs_file path then path :: acc else acc in
PolyVariantEqual.(Sys.is_directory fname <> `Yes)
&& Filename.check_suffix fname Config.specs_files_suffix
in
match Sys.readdir dir with
| exception Sys_error _ ->
[]
| files ->
Array.fold files ~init:[] ~f:(fun acc fname ->
let path = Filename.concat dir fname in
if is_specs_file path then path :: acc else acc )
in
let result_specs_dir = DB.filename_to_string DB.Results_dir.specs_dir in let result_specs_dir = DB.filename_to_string DB.Results_dir.specs_dir in
specs_files_in_dir result_specs_dir Utils.directory_fold do_file [] result_specs_dir
let print_usage_exit err_s = let print_usage_exit err_s =

@ -130,14 +130,26 @@ module OnDisk = struct
Procname.Hash.replace cache proc_name summary Procname.Hash.replace cache proc_name summary
let specs_filename pname = let specs_filename_and_crc pname =
let pname_file = Procname.to_filename pname in let pname_file, crc = Procname.to_filename_and_crc pname in
pname_file ^ Config.specs_files_suffix (pname_file ^ Config.specs_files_suffix, crc)
let specs_filename pname = specs_filename_and_crc pname |> fst
(** Return the path to the .specs file for the given procedure in the current results directory *) (** Return the path to the .specs file for the given procedure in the current results directory *)
let specs_filename_of_procname pname = let specs_filename_of_procname pname =
DB.filename_from_string (ResultsDir.get_path Specs ^/ specs_filename pname) let filename, crc = specs_filename_and_crc pname in
let sharded_filename =
if Serialization.is_shard_mode then
let shard_dirs =
String.sub crc ~pos:0 ~len:Config.specs_shard_depth
|> String.concat_map ~sep:"/" ~f:Char.to_string
in
shard_dirs ^/ filename
else filename
in
DB.filename_from_string (ResultsDir.get_path Specs ^/ sharded_filename)
(** paths to the .specs file for the given procedure in the models folder *) (** paths to the .specs file for the given procedure in the models folder *)

@ -2160,6 +2160,13 @@ and starvation_whole_program =
"Run whole-program starvation analysis" "Run whole-program starvation analysis"
and specs_shard_depth =
CLOpt.mk_int ~long:"specs-shard-depth" ~default:0
~in_help:InferCommand.[(Analyze, manual_generic); (Run, manual_generic)]
"Specify the depth of the directory structure of specs, used for \"sharding\" .specs files. \
Zero turns sharding off."
and sqlite_cache_size = and sqlite_cache_size =
CLOpt.mk_int ~long:"sqlite-cache-size" ~default:2000 CLOpt.mk_int ~long:"sqlite-cache-size" ~default:2000
~in_help: ~in_help:
@ -3098,6 +3105,12 @@ and sources = !sources
and sourcepath = !sourcepath and sourcepath = !sourcepath
and specs_shard_depth =
if 0 <= !specs_shard_depth && !specs_shard_depth <= 32 then !specs_shard_depth
else
L.die UserError "Invalid number of shard depths %d: must be between 0 and 32" !specs_shard_depth
and sqlite_cache_size = !sqlite_cache_size and sqlite_cache_size = !sqlite_cache_size
and sqlite_page_size = !sqlite_page_size and sqlite_page_size = !sqlite_page_size

@ -553,6 +553,8 @@ val source_files_type_environment : bool
val source_preview : bool val source_preview : bool
val specs_shard_depth : int
val sqlite_cache_size : int val sqlite_cache_size : int
val sqlite_page_size : int val sqlite_page_size : int

@ -25,7 +25,7 @@ let append_crc_cutoff ?(key = "") name =
let name_for_crc = name ^ key in let name_for_crc = name ^ key in
Utils.string_crc_hex32 name_for_crc Utils.string_crc_hex32 name_for_crc
in in
Printf.sprintf "%s%c%s" name_up_to_cutoff crc_token crc_str (Printf.sprintf "%s%c%s" name_up_to_cutoff crc_token crc_str, crc_str)
let curr_source_file_encoding = `Enc_crc let curr_source_file_encoding = `Enc_crc
@ -41,7 +41,7 @@ let source_file_encoding source_file =
| `Enc_crc -> | `Enc_crc ->
let base = Filename.basename source_file_s in let base = Filename.basename source_file_s in
let dir = Filename.dirname source_file_s in let dir = Filename.dirname source_file_s in
append_crc_cutoff ~key:dir base append_crc_cutoff ~key:dir base |> fst
(** {2 Source Dirs} *) (** {2 Source Dirs} *)
@ -55,7 +55,7 @@ let source_dir_to_string source_dir = source_dir
(** get the path to an internal file with the given extention (.tenv, ...) *) (** get the path to an internal file with the given extention (.tenv, ...) *)
let source_dir_get_internal_file source_dir extension = let source_dir_get_internal_file source_dir extension =
let source_dir_name = let source_dir_name =
append_crc_cutoff (Caml.Filename.remove_extension (Filename.basename source_dir)) append_crc_cutoff (Caml.Filename.remove_extension (Filename.basename source_dir)) |> fst
in in
let fname = source_dir_name ^ extension in let fname = source_dir_name ^ extension in
Filename.concat source_dir fname Filename.concat source_dir fname
@ -138,9 +138,8 @@ module Results_dir = struct
let clean_specs_dir () = let clean_specs_dir () =
(* create dir just in case it doesn't exist to avoid errors *) (* create dir just in case it doesn't exist to avoid errors *)
Utils.create_dir specs_dir ; Utils.rmtree specs_dir ;
Array.iter (Sys.readdir specs_dir) ~f:(fun specs -> Utils.create_dir specs_dir
Filename.concat specs_dir specs |> Sys.remove )
(** create a file at the given path, creating any missing directories *) (** create a file at the given path, creating any missing directories *)

@ -55,9 +55,10 @@ module Results_dir : sig
(** create a file at the given path, creating any missing directories *) (** create a file at the given path, creating any missing directories *)
end end
val append_crc_cutoff : ?key:string -> string -> string val append_crc_cutoff : ?key:string -> string -> string * string
(** Append a crc to the string, using string_crc_hex32. Cut the string if it exceeds the cutoff (** Append a crc to the string, using string_crc_hex32. Cut the string if it exceeds the cutoff
limit. Use an optional key to compute the crc. *) limit. Use an optional key to compute the crc. Return a pair of the appended result and crc
string. *)
val source_file_encoding : SourceFile.t -> string val source_file_encoding : SourceFile.t -> string
(** string encoding of a source file (including path) as a single filename *) (** string encoding of a source file (including path) as a single filename *)

@ -31,6 +31,8 @@ end
(** version of the binary files, to be incremented for each change *) (** version of the binary files, to be incremented for each change *)
let version = 27 let version = 27
let is_shard_mode = (not Config.biabduction_models_mode) && Config.specs_shard_depth > 0
let create_serializer (key : Key.t) : 'a serializer = let create_serializer (key : Key.t) : 'a serializer =
let read_data ((key' : int), (version' : int), (value : 'a)) source_msg = let read_data ((key' : int), (version' : int), (value : 'a)) source_msg =
if key.key <> key' then ( if key.key <> key' then (
@ -64,6 +66,7 @@ let create_serializer (key : Key.t) : 'a serializer =
let filename = DB.filename_to_string fname in let filename = DB.filename_to_string fname in
PerfEvent.( PerfEvent.(
log (fun logger -> log_begin_event logger ~name:("writing " ^ key.name) ~categories:["io"] ())) ; log (fun logger -> log_begin_event logger ~name:("writing " ^ key.name) ~categories:["io"] ())) ;
if is_shard_mode then Utils.create_dir (Filename.dirname filename) ;
Utils.with_intermediate_temp_file_out filename ~f:(fun outc -> Utils.with_intermediate_temp_file_out filename ~f:(fun outc ->
Marshal.to_channel outc (key.key, version, data) [] ) ; Marshal.to_channel outc (key.key, version, data) [] ) ;
PerfEvent.(log (fun logger -> log_end_event logger ())) PerfEvent.(log (fun logger -> log_end_event logger ()))

@ -27,6 +27,9 @@ end
(** Generic serializer *) (** Generic serializer *)
type 'a serializer type 'a serializer
val is_shard_mode : bool
(** If the spec directory is sharded or not *)
val create_serializer : Key.t -> 'a serializer val create_serializer : Key.t -> 'a serializer
(** create a serializer from a file name given an integer key used as double-check of the file type *) (** create a serializer from a file name given an integer key used as double-check of the file type *)

Loading…
Cancel
Save