From 5f8514a8c2300ec9e767b996c94172cd60ccc8e9 Mon Sep 17 00:00:00 2001 From: Mehdi Bouaziz Date: Fri, 21 Jun 2019 02:57:07 -0700 Subject: [PATCH] [sqlite] Normalize blobs used for comparison Summary: Using `Marshal.to_string` to create SQLite values used in comparisons is brittle as there is no guarantee that it will return the same value for structurally equal values. When adding sharing, this will definitely break. From the SQLite queries I found, only `SourceFile` and `Procname` are used in comparisons. I haven't tested performance. It shouldn't change anything for `SourceFile` as there is no possible sharing. It shouldn't change much for `Procname` as they are pretty small anyway. Reviewed By: ngorogiannis Differential Revision: D15923122 fbshipit-source-id: ce4af1fe3 --- infer/src/IR/ProcAttributes.ml | 2 +- infer/src/IR/Procdesc.ml | 2 +- infer/src/IR/Tenv.ml | 12 ++++++++---- infer/src/IR/Typ.ml | 28 +++++++++++++--------------- infer/src/base/SourceFile.ml | 15 ++++++++++----- infer/src/base/SqliteUtils.ml | 26 +++++++++++++++++++------- infer/src/base/SqliteUtils.mli | 9 +++++++-- 7 files changed, 59 insertions(+), 35 deletions(-) diff --git a/infer/src/IR/ProcAttributes.ml b/infer/src/IR/ProcAttributes.ml index 4a2c432a3..fff0b1aae 100644 --- a/infer/src/IR/ProcAttributes.ml +++ b/infer/src/IR/ProcAttributes.ml @@ -173,6 +173,6 @@ let pp f F.fprintf f "; proc_id= %s }@]" (Typ.Procname.to_unique_id proc_name) -module SQLite = SqliteUtils.MarshalledData (struct +module SQLite = SqliteUtils.MarshalledDataNOTForComparison (struct type nonrec t = t end) diff --git a/infer/src/IR/Procdesc.ml b/infer/src/IR/Procdesc.ml index b1c582e17..8f81c4dc9 100644 --- a/infer/src/IR/Procdesc.ml +++ b/infer/src/IR/Procdesc.ml @@ -867,7 +867,7 @@ let is_connected proc_desc = error -module SQLite = SqliteUtils.MarshalledNullableData (struct +module SQLite = SqliteUtils.MarshalledNullableDataNOTForComparison (struct type nonrec t = t end) diff --git a/infer/src/IR/Tenv.ml b/infer/src/IR/Tenv.ml index 10be5d4f3..3c0027f5c 100644 --- a/infer/src/IR/Tenv.ml +++ b/infer/src/IR/Tenv.ml @@ -76,6 +76,10 @@ let pp_per_file fmt = function module SQLite : SqliteUtils.Data with type t = per_file = struct + module Serializer = SqliteUtils.MarshalledDataNOTForComparison (struct + type nonrec t = t + end) + type t = per_file let global_string = "global" @@ -84,14 +88,14 @@ module SQLite : SqliteUtils.Data with type t = per_file = struct | Global -> Sqlite3.Data.TEXT global_string | FileLocal tenv -> - Sqlite3.Data.BLOB (Marshal.to_string tenv []) + Serializer.serialize tenv - let deserialize = function[@warning "-8"] + let deserialize = function | Sqlite3.Data.TEXT g when String.equal g global_string -> Global - | Sqlite3.Data.BLOB b -> - FileLocal (Marshal.from_string b 0) + | blob -> + FileLocal (Serializer.deserialize blob) end let merge ~src ~dst = TypenameHash.iter (fun pname cfg -> TypenameHash.replace dst pname cfg) src diff --git a/infer/src/IR/Typ.ml b/infer/src/IR/Typ.ml index 871fcddc9..769de621f 100644 --- a/infer/src/IR/Typ.ml +++ b/infer/src/IR/Typ.ml @@ -19,7 +19,7 @@ module IntegerWidths = struct let java = {char_width= 16; short_width= 16; int_width= 32; long_width= 64; longlong_width= 64} - module SQLite = SqliteUtils.MarshalledNullableData (struct + module SQLite = SqliteUtils.MarshalledNullableDataNOTForComparison (struct type nonrec t = t end) @@ -1359,33 +1359,31 @@ module Procname = struct let to_filename ?crc_only pname = to_concrete_filename ?crc_only pname module SQLite = struct - let pname_to_key = - Base.Hashtbl.create - ( module struct - type nonrec t = t + module T = struct + type nonrec t = t - let compare = compare + let compare = compare - let hash = hash + let hash = hash - let sexp_of_t p = Sexp.Atom (to_string p) - end ) + let sexp_of_t p = Sexp.Atom (to_string p) + end + + module Serializer = SqliteUtils.MarshalledDataForComparison (T) + let pname_to_key = Base.Hashtbl.create (module T) let serialize pname = - let default () = Sqlite3.Data.BLOB (Marshal.to_string pname []) in + let default () = Serializer.serialize pname in Base.Hashtbl.find_or_add pname_to_key pname ~default - let deserialize : Sqlite3.Data.t -> t = function[@warning "-8"] - | Sqlite3.Data.BLOB b -> - Marshal.from_string b 0 - + let deserialize = Serializer.deserialize let clear_cache () = Base.Hashtbl.clear pname_to_key end - module SQLiteList = SqliteUtils.MarshalledData (struct + module SQLiteList = SqliteUtils.MarshalledDataNOTForComparison (struct type nonrec t = t list end) end diff --git a/infer/src/base/SourceFile.ml b/infer/src/base/SourceFile.ml index 252be1307..ffb15c52e 100644 --- a/infer/src/base/SourceFile.ml +++ b/infer/src/base/SourceFile.ml @@ -180,19 +180,24 @@ let changed_sources_from_changed_files changed_files = module SQLite = struct - type nonrec t = t + module T = struct + type nonrec t = t + end + + module Serializer = SqliteUtils.MarshalledDataForComparison (T) + include T let serialize = function | RelativeProjectRoot path -> (* show the most common paths as text (for debugging, possibly perf) *) Sqlite3.Data.TEXT path | _ as x -> - Sqlite3.Data.BLOB (Marshal.to_string x []) + Serializer.serialize x - let deserialize = function[@warning "-8"] + let deserialize = function | Sqlite3.Data.TEXT rel_path -> RelativeProjectRoot rel_path - | Sqlite3.Data.BLOB b -> - Marshal.from_string b 0 + | blob -> + Serializer.deserialize blob end diff --git a/infer/src/base/SqliteUtils.ml b/infer/src/base/SqliteUtils.ml index 54cbfbf21..6ee16dac1 100644 --- a/infer/src/base/SqliteUtils.ml +++ b/infer/src/base/SqliteUtils.ml @@ -107,10 +107,25 @@ module type Data = sig val deserialize : Sqlite3.Data.t -> t end -module MarshalledData (D : sig +module type T = sig type t -end) = -struct +end + +module MarshalledDataForComparison (D : T) = struct + type t = D.t + + let deserialize = function[@warning "-8"] Sqlite3.Data.BLOB b -> Marshal.from_string b 0 + + (* + If the serialized data is used for comparison (e.g. used in WHERE clause), we need to normalize it. + Marshalling is brittle as it depends on sharing. + + For now let's suppose that marshalling with no sharing is normalizing. + *) + let serialize x = Sqlite3.Data.BLOB (Marshal.to_string x [Marshal.No_sharing]) +end + +module MarshalledDataNOTForComparison (D : T) = struct type t = D.t let deserialize = function[@warning "-8"] Sqlite3.Data.BLOB b -> Marshal.from_string b 0 @@ -118,10 +133,7 @@ struct let serialize x = Sqlite3.Data.BLOB (Marshal.to_string x []) end -module MarshalledNullableData (D : sig - type t -end) = -struct +module MarshalledNullableDataNOTForComparison (D : T) = struct type t = D.t option let deserialize = function[@warning "-8"] diff --git a/infer/src/base/SqliteUtils.mli b/infer/src/base/SqliteUtils.mli index 90c22391e..1b4a99a65 100644 --- a/infer/src/base/SqliteUtils.mli +++ b/infer/src/base/SqliteUtils.mli @@ -72,12 +72,17 @@ module type Data = sig val deserialize : Sqlite3.Data.t -> t end +(** A default implementation of the Data API that encodes every objects as marshalled blobs with no sharing *) +module MarshalledDataForComparison (D : sig + type t +end) : Data with type t = D.t + (** A default implementation of the Data API that encodes every objects as marshalled blobs *) -module MarshalledData (D : sig +module MarshalledDataNOTForComparison (D : sig type t end) : Data with type t = D.t (** A default implementation of the Data API that encodes None as a NULL SQLite value *) -module MarshalledNullableData (D : sig +module MarshalledNullableDataNOTForComparison (D : sig type t end) : Data with type t = D.t option