[Merge] Use multilink files to speed up merge with buck projects.

Summary:
Merging the results directories of targets on buck projects involved creating symbolic links into buck-out.
The bulk of files are .attr files: one per procedure. Creating these links can be a bottleneck, and the merge phase can be slower than the analysis phases on projects with many procedures.
This diff introduces multilinks to speed up merge.
A multilink is a file `multilink.txt` containing a sequence of paths
```
path/to/file1.ext
path/to/file2.ext
...
```
A multilink file is a compact way to represent a link for each entry.

This diff creates a multilink file for each `attributes/dir` directory, instead of one symbolic link for each file.

Reviewed By: jberdine

Differential Revision: D4067428

fbshipit-source-id: 911f8a9
master
Cristiano Calcagno 8 years ago committed by Facebook Github Bot
parent a31658a9b8
commit 47c623ff51

@ -43,7 +43,9 @@ let res_dir_attr_filename defined::defined pname => {
/* Load the proc attribute for the defined filename if it exists, /* Load the proc attribute for the defined filename if it exists,
otherwise try to load the declared filename. */ otherwise try to load the declared filename. */
let load_defined_first proc_name => { let load_defined_first proc_name => {
let attributes_file defined => res_dir_attr_filename defined::defined proc_name; let attributes_file defined => Multilinks.resolve (
res_dir_attr_filename defined::defined proc_name
);
let attr = Serialization.from_file serializer (attributes_file true); let attr = Serialization.from_file serializer (attributes_file true);
attr != None ? attr : Serialization.from_file serializer (attributes_file false) attr != None ? attr : Serialization.from_file serializer (attributes_file false)
}; };

@ -127,11 +127,6 @@ let idlist_to_idset ids => IList.fold_left (fun set id => IdentSet.add id set) I
/** {2 Conversion between Names and Strings} */ /** {2 Conversion between Names and Strings} */
let module StringHash = Hashtbl.Make {
type t = string;
let equal (s1: string) (s2: string) => s1 == s2;
let hash = Hashtbl.hash;
};
let module NameHash = Hashtbl.Make { let module NameHash = Hashtbl.Make {
type t = name; type t = name;

@ -14,6 +14,8 @@ module F = Format
(** Module to merge the results of capture for different buck targets. *) (** Module to merge the results of capture for different buck targets. *)
let use_multilinks = true
(** Flag to control whether the timestamp of symbolic links (** Flag to control whether the timestamp of symbolic links
is used to determine whether a captured directory needs to be merged. *) is used to determine whether a captured directory needs to be merged. *)
let check_timestamp_of_symlinks = true let check_timestamp_of_symlinks = true
@ -41,12 +43,14 @@ let debug = 0
type stats = type stats =
{ {
mutable files_linked: int; mutable files_linked: int;
mutable files_multilinked: int;
mutable targets_merged: int; mutable targets_merged: int;
} }
let empty_stats () = let empty_stats () =
{ {
files_linked = 0; files_linked = 0;
files_multilinked = 0;
targets_merged = 0; targets_merged = 0;
} }
@ -56,6 +60,51 @@ let link_exists s =
true true
with Unix.Unix_error _ -> false with Unix.Unix_error _ -> false
(* Table mapping directories to multilinks.
Used for the hashed directories where attrbute files are stored. *)
let multilinks_dir_table = StringHash.create 16
(* Add a multilink for attributes to the internal per-directory table.
The files will be created by create_multilinks. *)
let add_multilink_attr ~stats src dst =
let attr_dir = Filename.dirname dst in
let attr_dir_name = Filename.basename attr_dir in
let multilinks =
try
StringHash.find multilinks_dir_table attr_dir_name
with
| Not_found ->
let multilinks = match Multilinks.read ~dir:attr_dir with
| Some multilinks ->
(* incremental merge: start from the existing file on disk *)
multilinks
| None ->
Multilinks.create () in
StringHash.add multilinks_dir_table attr_dir_name multilinks;
multilinks in
Multilinks.add multilinks src;
stats.files_multilinked <- stats.files_multilinked + 1
let create_link ~stats src dst =
if link_exists dst then Unix.unlink dst;
Unix.symlink src dst;
(* Set the accessed and modified time of the original file slightly in the past. Due to
the coarse precision of the timestamps, it is possible for the source and destination of a
link to have the same modification time. When this happens, the files will be considered to
need re-analysis every time, indefinitely. *)
let near_past = Unix.gettimeofday () -. 1. in
Unix.utimes src near_past near_past;
stats.files_linked <- stats.files_linked + 1
let create_multilinks () =
let do_dir dir multilinks =
let attributes_dir =
Filename.concat (Filename.concat Config.results_dir Config.attributes_dir_name) dir in
Multilinks.write multilinks ~dir:attributes_dir in
StringHash.iter do_dir multilinks_dir_table
(** Create symbolic links recursively from the destination to the source. (** Create symbolic links recursively from the destination to the source.
Replicate the structure of the source directory in the destination, Replicate the structure of the source directory in the destination,
with files replaced by links to the source. *) with files replaced by links to the source. *)
@ -75,18 +124,9 @@ let rec slink ~stats ~skiplevels src dst =
items items
end end
else if skiplevels > 0 then () else if skiplevels > 0 then ()
else else if use_multilinks && Filename.check_suffix dst ".attr"
begin then add_multilink_attr ~stats src dst
if link_exists dst then Unix.unlink dst; else create_link ~stats src dst
Unix.symlink src dst;
(* Set the accessed and modified time of the original file slightly in the past. Due to
the coarse precision of the timestamps, it is possible for the source and destination of a
link to have the same modification time. When this happens, the files will be considered to
need re-analysis every time, indefinitely. *)
let near_past = Unix.gettimeofday () -. 1. in
Unix.utimes src near_past near_past;
stats.files_linked <- stats.files_linked + 1;
end
(** Determine if the destination should link to the source. (** Determine if the destination should link to the source.
To check if it was linked before, check if all the captured source files To check if it was linked before, check if all the captured source files
@ -164,9 +204,11 @@ let process_merge_file deps_file =
Option.may Option.may
(fun lines -> IList.iter process_line lines) (fun lines -> IList.iter process_line lines)
(read_file deps_file); (read_file deps_file);
create_multilinks ();
L.stdout "Captured results merged.@."; L.stdout "Captured results merged.@.";
L.stdout "Targets merged: %d@." stats.targets_merged; L.stdout "Targets merged: %d@." stats.targets_merged;
L.stdout "Files linked: %d@." stats.files_linked L.stdout "Files linked: %d@." stats.files_linked;
L.stdout "Files multilinked: %d@." stats.files_multilinked
let merge_captured_targets () = let merge_captured_targets () =

@ -0,0 +1,68 @@
/*
* Copyright (c) 2015 - present Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*/
open! Utils;
let module F = Format;
let module L = Logging;
let multilink_file_name = "multilink.txt";
type t = StringHash.t string;
let add multilinks fname => StringHash.replace multilinks (Filename.basename fname) fname;
let create () :t => StringHash.create 1;
/* Cache of multilinks files read from disk */
let multilink_files_cache = StringHash.create 1;
let reset_cache () => StringHash.reset multilink_files_cache;
let read dir::dir :option t => {
let multilink_fname = Filename.concat dir multilink_file_name;
switch (Utils.read_file multilink_fname) {
| None => None
| Some lines =>
let links = create ();
IList.iter (fun line => StringHash.add links (Filename.basename line) line) lines;
StringHash.add multilink_files_cache dir links;
Some links
}
};
/* Write a multilink file in the given directory */
let write multilinks dir::dir => {
let fname = Filename.concat dir multilink_file_name;
let outc = open_out fname;
StringHash.iter (fun _ src => output_string outc (src ^ "\n")) multilinks;
close_out outc
};
let lookup dir::dir =>
try (Some (StringHash.find multilink_files_cache dir)) {
| Not_found => read dir::dir
};
let resolve fname => {
let fname_s = DB.filename_to_string fname;
if (Sys.file_exists fname_s) {
fname
} else {
let base = Filename.basename fname_s;
let dir = Filename.dirname fname_s;
switch (lookup dir::dir) {
| None => fname
| Some links =>
try (DB.filename_from_string (StringHash.find links base)) {
| Not_found => fname
}
}
}
};

@ -0,0 +1,47 @@
/*
* Copyright (c) 2015 - present Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*/
open! Utils;
let module F = Format;
let module L = Logging;
/** In-memory representation of multilink files. */
type t;
/** Add a link. */
let add: t => string => unit;
/** Create a new multilink. */
let create: unit => t;
/** Name of the multilink file.
A multilink file is recognized by its file name. */
let multilink_file_name: string;
/** Read a multilink file from disk. */
let read: dir::string => option t;
/** Resolve a filename following multilinks.
The cache is updated if a new multilinks file is read. */
let resolve: DB.filename => DB.filename;
/** Reset the cache of multilink files */
let reset_cache: unit => unit;
/** Write a multilink file in the given directory */
let write: t => dir::string => unit;

@ -107,6 +107,14 @@ module IntSet =
let compare = int_compare let compare = int_compare
end) end)
(** Hash table over strings *)
module StringHash = Hashtbl.Make (
struct
type t = string
let equal (s1: string) (s2: string) = s1 = s2
let hash = Hashtbl.hash
end)
(** Set of strings *) (** Set of strings *)
module StringSet = Set.Make(String) module StringSet = Set.Make(String)

@ -90,6 +90,9 @@ val int_of_bool : bool -> int
(** Set of integers *) (** Set of integers *)
module IntSet : Set.S with type elt = int module IntSet : Set.S with type elt = int
(** Hash table over strings *)
module StringHash : Hashtbl.S with type key = string
(** Set of strings *) (** Set of strings *)
module StringSet : Set.S with type elt = string module StringSet : Set.S with type elt = string

Loading…
Cancel
Save