From 47c623ff5150c990def5f0582ba2f5b946e2f1a2 Mon Sep 17 00:00:00 2001 From: Cristiano Calcagno Date: Tue, 25 Oct 2016 07:29:44 -0700 Subject: [PATCH] [Merge] Use multilink files to speed up merge with buck projects. Summary: Merging the results directories of targets on buck projects involved creating symbolic links into buck-out. The bulk of files are .attr files: one per procedure. Creating these links can be a bottleneck, and the merge phase can be slower than the analysis phases on projects with many procedures. This diff introduces multilinks to speed up merge. A multilink is a file `multilink.txt` containing a sequence of paths ``` path/to/file1.ext path/to/file2.ext ... ``` A multilink file is a compact way to represent a link for each entry. This diff creates a multilink file for each `attributes/dir` directory, instead of one symbolic link for each file. Reviewed By: jberdine Differential Revision: D4067428 fbshipit-source-id: 911f8a9 --- infer/src/IR/AttributesTable.re | 4 +- infer/src/IR/Ident.re | 5 --- infer/src/backend/mergeCapture.ml | 68 +++++++++++++++++++++++++------ infer/src/base/Multilinks.re | 68 +++++++++++++++++++++++++++++++ infer/src/base/Multilinks.rei | 47 +++++++++++++++++++++ infer/src/base/Utils.ml | 8 ++++ infer/src/base/Utils.mli | 3 ++ 7 files changed, 184 insertions(+), 19 deletions(-) create mode 100644 infer/src/base/Multilinks.re create mode 100644 infer/src/base/Multilinks.rei diff --git a/infer/src/IR/AttributesTable.re b/infer/src/IR/AttributesTable.re index 0aca356f0..6ad5d4d81 100644 --- a/infer/src/IR/AttributesTable.re +++ b/infer/src/IR/AttributesTable.re @@ -43,7 +43,9 @@ let res_dir_attr_filename defined::defined pname => { /* Load the proc attribute for the defined filename if it exists, otherwise try to load the declared filename. */ let load_defined_first proc_name => { - let attributes_file defined => res_dir_attr_filename defined::defined proc_name; + let attributes_file defined => Multilinks.resolve ( + res_dir_attr_filename defined::defined proc_name + ); let attr = Serialization.from_file serializer (attributes_file true); attr != None ? attr : Serialization.from_file serializer (attributes_file false) }; diff --git a/infer/src/IR/Ident.re b/infer/src/IR/Ident.re index 41fdd5e48..c87826fed 100644 --- a/infer/src/IR/Ident.re +++ b/infer/src/IR/Ident.re @@ -127,11 +127,6 @@ let idlist_to_idset ids => IList.fold_left (fun set id => IdentSet.add id set) I /** {2 Conversion between Names and Strings} */ -let module StringHash = Hashtbl.Make { - type t = string; - let equal (s1: string) (s2: string) => s1 == s2; - let hash = Hashtbl.hash; -}; let module NameHash = Hashtbl.Make { type t = name; diff --git a/infer/src/backend/mergeCapture.ml b/infer/src/backend/mergeCapture.ml index 2f4717a74..8d4637e75 100644 --- a/infer/src/backend/mergeCapture.ml +++ b/infer/src/backend/mergeCapture.ml @@ -14,6 +14,8 @@ module F = Format (** Module to merge the results of capture for different buck targets. *) +let use_multilinks = true + (** Flag to control whether the timestamp of symbolic links is used to determine whether a captured directory needs to be merged. *) let check_timestamp_of_symlinks = true @@ -41,12 +43,14 @@ let debug = 0 type stats = { mutable files_linked: int; + mutable files_multilinked: int; mutable targets_merged: int; } let empty_stats () = { files_linked = 0; + files_multilinked = 0; targets_merged = 0; } @@ -56,6 +60,51 @@ let link_exists s = true with Unix.Unix_error _ -> false +(* Table mapping directories to multilinks. + Used for the hashed directories where attrbute files are stored. *) +let multilinks_dir_table = StringHash.create 16 + + +(* Add a multilink for attributes to the internal per-directory table. + The files will be created by create_multilinks. *) +let add_multilink_attr ~stats src dst = + let attr_dir = Filename.dirname dst in + let attr_dir_name = Filename.basename attr_dir in + let multilinks = + try + StringHash.find multilinks_dir_table attr_dir_name + with + | Not_found -> + let multilinks = match Multilinks.read ~dir:attr_dir with + | Some multilinks -> + (* incremental merge: start from the existing file on disk *) + multilinks + | None -> + Multilinks.create () in + StringHash.add multilinks_dir_table attr_dir_name multilinks; + multilinks in + Multilinks.add multilinks src; + stats.files_multilinked <- stats.files_multilinked + 1 + +let create_link ~stats src dst = + if link_exists dst then Unix.unlink dst; + Unix.symlink src dst; + (* Set the accessed and modified time of the original file slightly in the past. Due to + the coarse precision of the timestamps, it is possible for the source and destination of a + link to have the same modification time. When this happens, the files will be considered to + need re-analysis every time, indefinitely. *) + let near_past = Unix.gettimeofday () -. 1. in + Unix.utimes src near_past near_past; + stats.files_linked <- stats.files_linked + 1 + +let create_multilinks () = + let do_dir dir multilinks = + let attributes_dir = + Filename.concat (Filename.concat Config.results_dir Config.attributes_dir_name) dir in + Multilinks.write multilinks ~dir:attributes_dir in + StringHash.iter do_dir multilinks_dir_table + + (** Create symbolic links recursively from the destination to the source. Replicate the structure of the source directory in the destination, with files replaced by links to the source. *) @@ -75,18 +124,9 @@ let rec slink ~stats ~skiplevels src dst = items end else if skiplevels > 0 then () - else - begin - if link_exists dst then Unix.unlink dst; - Unix.symlink src dst; - (* Set the accessed and modified time of the original file slightly in the past. Due to - the coarse precision of the timestamps, it is possible for the source and destination of a - link to have the same modification time. When this happens, the files will be considered to - need re-analysis every time, indefinitely. *) - let near_past = Unix.gettimeofday () -. 1. in - Unix.utimes src near_past near_past; - stats.files_linked <- stats.files_linked + 1; - end + else if use_multilinks && Filename.check_suffix dst ".attr" + then add_multilink_attr ~stats src dst + else create_link ~stats src dst (** Determine if the destination should link to the source. To check if it was linked before, check if all the captured source files @@ -164,9 +204,11 @@ let process_merge_file deps_file = Option.may (fun lines -> IList.iter process_line lines) (read_file deps_file); + create_multilinks (); L.stdout "Captured results merged.@."; L.stdout "Targets merged: %d@." stats.targets_merged; - L.stdout "Files linked: %d@." stats.files_linked + L.stdout "Files linked: %d@." stats.files_linked; + L.stdout "Files multilinked: %d@." stats.files_multilinked let merge_captured_targets () = diff --git a/infer/src/base/Multilinks.re b/infer/src/base/Multilinks.re new file mode 100644 index 000000000..0d813f572 --- /dev/null +++ b/infer/src/base/Multilinks.re @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2015 - present Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ +open! Utils; + +let module F = Format; + +let module L = Logging; + +let multilink_file_name = "multilink.txt"; + +type t = StringHash.t string; + +let add multilinks fname => StringHash.replace multilinks (Filename.basename fname) fname; + +let create () :t => StringHash.create 1; + +/* Cache of multilinks files read from disk */ +let multilink_files_cache = StringHash.create 1; + +let reset_cache () => StringHash.reset multilink_files_cache; + +let read dir::dir :option t => { + let multilink_fname = Filename.concat dir multilink_file_name; + switch (Utils.read_file multilink_fname) { + | None => None + | Some lines => + let links = create (); + IList.iter (fun line => StringHash.add links (Filename.basename line) line) lines; + StringHash.add multilink_files_cache dir links; + Some links + } +}; + +/* Write a multilink file in the given directory */ +let write multilinks dir::dir => { + let fname = Filename.concat dir multilink_file_name; + let outc = open_out fname; + StringHash.iter (fun _ src => output_string outc (src ^ "\n")) multilinks; + close_out outc +}; + +let lookup dir::dir => + try (Some (StringHash.find multilink_files_cache dir)) { + | Not_found => read dir::dir + }; + +let resolve fname => { + let fname_s = DB.filename_to_string fname; + if (Sys.file_exists fname_s) { + fname + } else { + let base = Filename.basename fname_s; + let dir = Filename.dirname fname_s; + switch (lookup dir::dir) { + | None => fname + | Some links => + try (DB.filename_from_string (StringHash.find links base)) { + | Not_found => fname + } + } + } +}; diff --git a/infer/src/base/Multilinks.rei b/infer/src/base/Multilinks.rei new file mode 100644 index 000000000..d7b463d90 --- /dev/null +++ b/infer/src/base/Multilinks.rei @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2015 - present Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ +open! Utils; + +let module F = Format; + +let module L = Logging; + + +/** In-memory representation of multilink files. */ +type t; + + +/** Add a link. */ +let add: t => string => unit; + + +/** Create a new multilink. */ +let create: unit => t; + + +/** Name of the multilink file. + A multilink file is recognized by its file name. */ +let multilink_file_name: string; + + +/** Read a multilink file from disk. */ +let read: dir::string => option t; + + +/** Resolve a filename following multilinks. + The cache is updated if a new multilinks file is read. */ +let resolve: DB.filename => DB.filename; + + +/** Reset the cache of multilink files */ +let reset_cache: unit => unit; + + +/** Write a multilink file in the given directory */ +let write: t => dir::string => unit; diff --git a/infer/src/base/Utils.ml b/infer/src/base/Utils.ml index 4de908e4f..104c44376 100644 --- a/infer/src/base/Utils.ml +++ b/infer/src/base/Utils.ml @@ -107,6 +107,14 @@ module IntSet = let compare = int_compare end) +(** Hash table over strings *) +module StringHash = Hashtbl.Make ( + struct + type t = string + let equal (s1: string) (s2: string) = s1 = s2 + let hash = Hashtbl.hash + end) + (** Set of strings *) module StringSet = Set.Make(String) diff --git a/infer/src/base/Utils.mli b/infer/src/base/Utils.mli index 567ea7a86..e51daf84d 100644 --- a/infer/src/base/Utils.mli +++ b/infer/src/base/Utils.mli @@ -90,6 +90,9 @@ val int_of_bool : bool -> int (** Set of integers *) module IntSet : Set.S with type elt = int +(** Hash table over strings *) +module StringHash : Hashtbl.S with type key = string + (** Set of strings *) module StringSet : Set.S with type elt = string