[buck/java2] hashcons the global tenv during merging

Summary:
One "interesting" feature of the approach of merging the captured targets in Java, is that we union their type environments, as opposed to store partial tenvs together with each source file, which is the case for Clang.

This means
- the final global type environment is potentially huge because it contains all the types in all targets.
- all analysis workers start by loading that tenv in memory, meaning we consume `|size of tenv| x #cpus` memory, which can tip the balance towards OOMs

This diff attempts to economise on global tenv size. This is done by increasing sharing which is then preserved by marshalling.  It's done in a brute force way, with hashtables for each struct component, and is not fully effective due to the recursion amongst types and types names, as well types appearing inside other constructs such as procnames.

This is done when calling `Tenv.store` so that
- the computation can be parallelised somewhat (capture is parallel, merging is not)
- buck caching will benefit from smaller tenvs.

This saves about 24% of total memory devoted to the type environment.

Reviewed By: mbouaziz

Differential Revision: D15840054

fbshipit-source-id: 6f03be1a4
master
Nikos Gorogiannis 6 years ago committed by Facebook Github Bot
parent 8776a31f7d
commit 013d153538

@ -19,6 +19,10 @@ type t =
; parameters: parameters (** currently only one string parameter *) }
[@@deriving compare]
let equal = [%compare.equal: t]
let hash = Hashtbl.hash
let volatile = {class_name= "volatile"; parameters= []}
let final = {class_name= "final"; parameters= []}

@ -19,6 +19,10 @@ type t =
; parameters: parameters (** currently only one string parameter *) }
[@@deriving compare]
val equal : t -> t -> bool
val hash : t -> int
val volatile : t
(** annotation for fields marked with the "volatile" keyword *)

@ -10,14 +10,8 @@ module L = Logging
(** Module for Type Environments. *)
(** Hash tables on strings. *)
module TypenameHash = Hashtbl.Make (struct
type t = Typ.Name.t
let equal tn1 tn2 = Typ.Name.equal tn1 tn2
let hash = Hashtbl.hash
end)
(** Hash tables on type names. *)
module TypenameHash = Hashtbl.Make (Typ.Name)
(** Type for type environment. *)
type t = Typ.Struct.t TypenameHash.t
@ -163,8 +157,143 @@ let store_to_filename tenv tenv_filename =
if Config.debug_mode then store_debug_file tenv tenv_filename
(** Use a prime for the initial hashtable size, since, typically, we expect a lot of types.
Prime sizes make hash functions happier across table resizes. *)
let medium_size_prime = 1003
module type HashconsS = sig
module Hashtable : Caml.Hashtbl.S
val reset : unit -> unit
val hashcons : Hashtable.key -> Hashtable.key
end
module Hashcons (H : Caml.Hashtbl.S) : HashconsS with module Hashtable = H = struct
module Hashtable = H
let reset, hashcons =
let tbl : H.key H.t = H.create medium_size_prime in
( (fun () -> H.reset tbl)
, fun x -> match H.find_opt tbl x with Some x' -> x' | None -> H.add tbl x x ; x )
end
module HashconsList (T : Caml.Hashtbl.HashedType) (H : HashconsS with type Hashtable.key = T.t) :
HashconsS with type Hashtable.key = T.t list = struct
module Hashtable = Hashtbl.Make (struct
type t = T.t list
let hash = Hashtbl.hash
let equal (x : t) (y : t) =
let open Polymorphic_compare in
x = y
end)
let reset, hashcons =
let tbl : T.t list Hashtable.t = Hashtable.create medium_size_prime in
( (fun () -> Hashtable.reset tbl)
, fun x ->
let x = IList.map_changed x ~equal:phys_equal ~f:H.hashcons in
match Hashtable.find_opt tbl x with Some x' -> x' | None -> Hashtable.add tbl x x ; x )
end
module PnameHC = Hashcons (Typ.Procname.Hash)
module PnameListHC = HashconsList (Typ.Procname) (PnameHC)
module TnameHC = Hashcons (TypenameHash)
module TnameListHC = HashconsList (Typ.Name) (TnameHC)
module StringHC = Hashcons (Hashtbl.Make (String))
module StringListHC = HashconsList (String) (StringHC)
module FieldnameHC = Hashcons (Hashtbl.Make (Typ.Fieldname))
module TypHC = Hashcons (Hashtbl.Make (Typ))
module AnnotHC = struct
include Hashcons (Hashtbl.Make (Annot))
let hashcons ({class_name; parameters} : Annot.t) : Annot.t =
{class_name= StringHC.hashcons class_name; parameters= StringListHC.hashcons parameters}
end
module AnnotVis = struct
type t = Annot.t * bool [@@deriving compare]
let equal = [%compare.equal: t]
let hash = Hashtbl.hash
end
module AnnotVisHC = struct
include Hashcons (Hashtbl.Make (AnnotVis))
let hashcons (annot, visibility) = hashcons (AnnotHC.hashcons annot, visibility)
end
module AnnotItemHC = HashconsList (AnnotVis) (AnnotVisHC)
module Field = struct
type t = Typ.Fieldname.t * Typ.t * Annot.Item.t [@@deriving compare]
let equal = [%compare.equal: t]
let hash = Hashtbl.hash
end
module FieldHC = struct
include Hashcons (Hashtbl.Make (Field))
let hashcons (fieldname, typ, annot_item) =
hashcons (FieldnameHC.hashcons fieldname, TypHC.hashcons typ, AnnotItemHC.hashcons annot_item)
end
module FieldListHC = HashconsList (Field) (FieldHC)
let reset_hashtables () =
PnameHC.reset () ;
PnameListHC.reset () ;
TnameHC.reset () ;
TnameListHC.reset () ;
StringHC.reset () ;
StringListHC.reset () ;
FieldnameHC.reset () ;
TypHC.reset () ;
AnnotHC.reset () ;
AnnotVisHC.reset () ;
AnnotItemHC.reset () ;
FieldHC.reset () ;
FieldListHC.reset ()
(** Global tenv size is a problem in the genrule capture integration for java.
This function tries to improve sharing of values in the tenv, and assumes
Java data structures (it's still correct for Clangs, just not necessarily
as effective. *)
let canonicalize tenv =
reset_hashtables () ;
let result = create () in
let canonicalize_one tname
({fields; statics; supers; methods; exported_objc_methods; annots} : Typ.Struct.t) =
let tname = TnameHC.hashcons tname in
let tstruct =
Typ.Struct.internal_mk_struct ~supers:(TnameListHC.hashcons supers)
~fields:(FieldListHC.hashcons fields) ~statics:(FieldListHC.hashcons statics)
~methods:(PnameListHC.hashcons methods)
~exported_objc_methods:(PnameListHC.hashcons exported_objc_methods)
~annots:(AnnotItemHC.hashcons annots) ()
in
TypenameHash.add result tname tstruct
in
TypenameHash.iter canonicalize_one tenv ;
reset_hashtables () ;
result
let store_global tenv =
(* update in-memory global tenv for later uses by this process, e.g. in single-core mode the
frontend and backend run in the same process *)
L.debug Capture Quiet "Tenv.store: global tenv has size %d bytes.@."
(Obj.(reachable_words (repr tenv)) * (Sys.word_size / 8)) ;
let tenv = canonicalize tenv in
L.debug Capture Quiet "Tenv.store: canonicalized tenv has size %d bytes.@."
(Obj.(reachable_words (repr tenv)) * (Sys.word_size / 8)) ;
global_tenv := Some tenv ;
store_to_filename tenv global_tenv_path

@ -231,6 +231,8 @@ end
include T
let hash = Hashtbl.hash
let mk_type_quals ?default ?is_const ?is_restrict ?is_volatile () =
let default_ = {is_const= false; is_restrict= false; is_volatile= false} in
let mk_aux ?(default = default_) ?(is_const = default.is_const)
@ -349,6 +351,8 @@ module Name = struct
let equal = [%compare.equal: t]
let hash = Hashtbl.hash
let qual_name = function
| CStruct name | CUnion name | ObjcClass name | ObjcProtocol name ->
name
@ -1392,6 +1396,8 @@ module Fieldname = struct
let equal = [%compare.equal: t]
let hash = Hashtbl.hash
module T = struct
type nonrec t = t

@ -127,6 +127,8 @@ and template_spec_info =
; args: template_arg list }
[@@deriving compare]
val hash : t -> int
val pp_template_spec_info : Pp.env -> F.formatter -> template_spec_info -> unit [@@warning "-32"]
val mk : ?default:t -> ?quals:type_quals -> desc -> t
@ -157,6 +159,8 @@ module Name : sig
val equal : t -> t -> bool
(** Equality for typenames *)
val hash : t -> int
val to_string : t -> string
(** convert the typename to a string *)
@ -623,6 +627,9 @@ module Fieldname : sig
type t [@@deriving compare]
val equal : t -> t -> bool
val hash : t -> int
(** Equality for field names. *)
(** Set for fieldnames *)

@ -11,6 +11,7 @@ module L = Logging
(** Module to merge the results of capture for different buck targets. *)
let merge_global_tenvs infer_deps_file =
let time0 = Mtime_clock.counter () in
let global_tenv = Tenv.create () in
let merge infer_out_src =
let global_tenv_path =
@ -20,7 +21,8 @@ let merge_global_tenvs infer_deps_file =
|> Option.iter ~f:(fun tenv -> Tenv.merge ~src:tenv ~dst:global_tenv)
in
MergeResults.iter_infer_deps infer_deps_file ~f:merge ;
Tenv.store_global global_tenv
Tenv.store_global global_tenv ;
L.progress "Merging type environments took %a@." Mtime.Span.pp (Mtime_clock.count time0)
let merge_captured_targets () =

Loading…
Cancel
Save