[clang] reimplement fuzzy matching of list of qualifiers

Summary:
Reimplement whitelists as a match against a single regexp. This allows one to
precompile the whitelist regexp to make fast check against a whitelist of fuzzy
qualifiers, instead of checks linear in the number of items in the whitelist.

Reviewed By: akotulski

Differential Revision: D4588278

fbshipit-source-id: 3bac614
master
Jules Villard 8 years ago committed by Facebook Github Bot
parent 7e1f1f9101
commit f1698f3816

@ -575,51 +575,11 @@ let module Set = Caml.Set.Make {
/** Pretty print a set of proc names */
let pp_set fmt set => Set.iter (fun pname => F.fprintf fmt "%a " pp pname) set;
let fuzzy_qualifiers_equal fuzzy_qualifiers::fuzzy_qualifiers qualifiers => {
let qual_equal q1 q2 => {
/* qual_name may have qualifiers with template parameters -
drop them to whitelist all instantiations */
let no_template_name s => List.hd_exn (String.split on::'<' s);
String.equal (no_template_name q1) (no_template_name q2)
let get_qualifiers pname =>
switch pname {
| C c => fst c |> QualifiedCppName.qualifiers_of_qual_name
| ObjC_Cpp objc_cpp =>
List.append
(QualifiedCppName.qualifiers_of_qual_name objc_cpp.class_name) [objc_cpp.method_name]
| _ => []
};
let is_std_qual = String.equal "std";
switch fuzzy_qualifiers {
| [first, ...rest] when is_std_qual first =>
/* add special handling for std:: namespace to avoid problems with inconsistent
inline namespaces (such as __1 in libc++) */
List.hd qualifiers |> Option.value_map default::false f::is_std_qual &&
List.is_prefix (List.rev qualifiers) prefix::(List.rev rest) equal::qual_equal
| _ => List.equal equal::qual_equal fuzzy_qualifiers qualifiers
}
};
/* This is simplistic and will give the wrong answer in some cases, eg
"foo<bar::baz<goo>>::someMethod" will get parsed as ["foo<bar", "baz<goo>>",
"someMethod"]. Ideally, we would keep the list of qualifiers in the procname, which would save us
from having to properly parse them. */
let qualifiers_of_qual_name = {
let class_sep_regex = Str.regexp_string "::";
/* wait until here to define the function so that [class_sep_regex] is only computed once */
Str.split class_sep_regex
};
let qualifiers_of_fuzzy_qual_name qual_name => {
/* Fail if we detect templates in the fuzzy name. Template instantiations are not taken into
account when fuzzy matching, and templates may produce wrong results when parsing qualified
names. */
if (String.contains qual_name '<') {
failwithf "Unexpected template in fuzzy qualified name %s." qual_name
};
qualifiers_of_qual_name qual_name
};
let fuzzy_equal fuzzy_qualifiers::fuzzy_qualifiers pname => {
let qualifiers =
switch pname {
| C c => fst c |> qualifiers_of_qual_name
| ObjC_Cpp objc_cpp =>
List.append (qualifiers_of_qual_name objc_cpp.class_name) [objc_cpp.method_name]
| _ => []
};
fuzzy_qualifiers_equal fuzzy_qualifiers::fuzzy_qualifiers qualifiers
};

@ -276,37 +276,4 @@ let to_unique_id: t => string;
/** Convert a proc name to a filename. */
let to_filename: t => string;
/** Return whether two qualified C++ procnames match up to namescapes and templating. In particular,
this deals with the following issues:
1. 'std::' namespace may have inline namespace afterwards: std::move becomes std::__1::move. This
happens on libc++ and to some extent on libstdc++. To work around this problem, make matching
against 'std::' more fuzzier: std::X::Y::Z will match std::.*::X::Y::Z (but only for the
'std' namespace).
2. The names are allowed not to commit to a template specialization: we want std::move to match
std::__1::move<const X&> and std::__1::move<int>. To do so, comparison function for qualifiers
will ignore template specializations.
For example:
["std", "move"]:
matches: ["std", "blah", "blah<int>","move"]
does not match: ["std","blah", "move", "BAD"] - we don't want std::.*::X::.* to pass
does not match: ["stdBAD", "move"], - it's not std namespace anymore
["folly", "someFunction"]
matches: ["folly","someFunction"]
matches: ["folly","someFunction<int>"]
matches: ["folly<int>","someFunction"]
does not match: ["folly", "BAD", "someFunction"] - unlike 'std' any other namespace needs all
qualifiers to match
does not match: ["folly","someFunction<int>", "BAD"] - same as previous example
*/
let fuzzy_equal: fuzzy_qualifiers::list string => t => bool;
let fuzzy_qualifiers_equal: fuzzy_qualifiers::list string => list string => bool;
/** parse the argument into a list::of::qualifiers::without::templates */
let qualifiers_of_fuzzy_qual_name: string => list string;
let get_qualifiers: t => list string;

@ -0,0 +1,65 @@
/*
* Copyright (c) 2017 - present Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*/
open! IStd;
type quals_matcher = Str.regexp;
let regexp_string_of_qualifiers quals => {
let is_std_qual = String.equal "std";
let qualifiers_simple_matcher quals => Str.quote (String.concat sep::"::" quals) ^ "$";
switch quals {
| [first, ...[_, ..._] as rest] when is_std_qual first =>
/* add special handling for std:: namespace to avoid problems with inconsistent
inline namespaces (such as __1 in libc++) */
Str.quote first ^ "\\(::[^:]*\\)?::" ^ qualifiers_simple_matcher rest
| _ => qualifiers_simple_matcher quals
}
};
let qualifiers_list_matcher quals_list =>
(
if (List.is_empty quals_list) {
"a^" /* regexp that does not match anything */
} else {
List.map f::regexp_string_of_qualifiers quals_list |> String.concat sep::"\\|"
}
) |> Str.regexp;
let match_qualifiers matcher quals => {
let normalized_qualifiers = {
/* qual_name may have qualifiers with template parameters - drop them to whitelist all
instantiations */
let no_template_name s => List.hd_exn (String.split on::'<' s);
List.map f::no_template_name quals
};
Str.string_match matcher (String.concat sep::"::" normalized_qualifiers) 0
};
/* This is simplistic and will give the wrong answer in some cases, eg
"foo<bar::baz<goo>>::someMethod" will get parsed as ["foo<bar", "baz<goo>>",
"someMethod"]. Ideally, we would keep the list of qualifiers in the procname, which would save us
from having to properly parse them. */
let qualifiers_of_qual_name = {
let class_sep_regex = Str.regexp_string "::";
/* wait until here to define the function so that [class_sep_regex] is only computed once */
Str.split class_sep_regex
};
let qualifiers_of_fuzzy_qual_name qual_name => {
/* Fail if we detect templates in the fuzzy name. Template instantiations are not taken into
account when fuzzy matching, and templates may produce wrong results when parsing qualified
names. */
if (String.contains qual_name '<') {
failwithf "Unexpected template in fuzzy qualified name %s." qual_name
};
qualifiers_of_qual_name qual_name
};
let quals_matcher_of_fuzzy_qual_names fuzzy_qual_names =>
List.map fuzzy_qual_names f::qualifiers_of_fuzzy_qual_name |> qualifiers_list_matcher;

@ -0,0 +1,46 @@
/*
* Copyright (c) 2017 - present Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*/
open! IStd;
/* Module to match qualified C++ procnames "fuzzily", that is up to namescapes and templating. In
particular, this deals with the following issues:
1. 'std::' namespace may have inline namespace afterwards: std::move becomes std::__1::move. This
happens on libc++ and to some extent on libstdc++. To work around this problem, make matching
against 'std::' more fuzzier: std::X::Y::Z will match std::.*::X::Y::Z (but only for the
'std' namespace).
2. The names are allowed not to commit to a template specialization: we want std::move to match
std::__1::move<const X&> and std::__1::move<int>. To do so, comparison function for qualifiers
will ignore template specializations.
For example:
["std", "move"]:
matches: ["std", "blah", "move"]
matches: ["std", "blah<int>", "move"]
does not match: ["std","blah", "move", "BAD"] - we don't want std::.*::X::.* to pass
does not match: ["stdBAD", "move"], - it's not std namespace anymore
["folly", "someFunction"]
matches: ["folly","someFunction"]
matches: ["folly","someFunction<int>"]
matches: ["folly<int>","someFunction"]
does not match: ["folly", "BAD", "someFunction"] - unlike 'std' any other namespace needs all
qualifiers to match
does not match: ["folly","someFunction<int>", "BAD"] - same as previous example
*/
type quals_matcher;
let quals_matcher_of_fuzzy_qual_names: list string => quals_matcher;
let match_qualifiers: quals_matcher => list string => bool;
/** attempts to parse the argument into a list::of::possibly::templated<T>::qualifiers */
let qualifiers_of_qual_name: string => list string;

@ -14,14 +14,11 @@ module L = Logging
module GlobalsAccesses = SiofTrace.GlobalsAccesses
let whitelisted_models =
List.map Config.siof_safe_methods ~f:Procname.qualifiers_of_fuzzy_qual_name
let methods_whitelist = QualifiedCppName.quals_matcher_of_fuzzy_qual_names Config.siof_safe_methods
let is_whitelisted (pname : Procname.t) =
(* This is linear in the number of whitelisted models, which is not good if there are many
models... *)
List.exists whitelisted_models
~f:(fun fuzzy_qualifiers -> Procname.fuzzy_equal pname ~fuzzy_qualifiers)
Procname.get_qualifiers pname
|> QualifiedCppName.match_qualifiers methods_whitelist
module Summary = Summary.Make (struct
type summary = SiofDomain.astate

@ -137,21 +137,22 @@ struct
let process_methods trans_unit_ctx tenv cg cfg curr_class decl_list =
IList.iter (process_one_method_decl trans_unit_ctx tenv cg cfg curr_class) decl_list
let is_whitelisted_qual_name qual_name whitelist =
List.exists whitelist
~f:(fun fuzzy_qualifiers -> Procname.fuzzy_qualifiers_equal ~fuzzy_qualifiers qual_name)
(** Given REVERSED list of method qualifiers (method_name::class_name::rest_quals), return
whether method should be translated based on method and class whitelists *)
let is_whitelisted_cpp_method =
let method_whitelist =
List.map ~f:Procname.qualifiers_of_fuzzy_qual_name Config.whitelisted_cpp_methods in
let class_whitelist =
List.map ~f:Procname.qualifiers_of_fuzzy_qual_name Config.whitelisted_cpp_classes in
let method_matcher =
QualifiedCppName.quals_matcher_of_fuzzy_qual_names Config.whitelisted_cpp_methods in
let class_matcher =
QualifiedCppName.quals_matcher_of_fuzzy_qual_names Config.whitelisted_cpp_classes in
fun qual_method_rev ->
(* method is either explictely whitelisted, or all method of a class are whitelisted *)
is_whitelisted_qual_name (List.rev qual_method_rev) method_whitelist ||
is_whitelisted_qual_name (List.tl_exn qual_method_rev |> List.rev) class_whitelist
(* either the method is explictely whitelisted, or the whole class is whitelisted *)
QualifiedCppName.match_qualifiers method_matcher (List.rev qual_method_rev) ||
match qual_method_rev with
| _::(_::_ as class_name_rev) ->
(* make sure the class name is not empty; in particular, it cannot be a C function *)
QualifiedCppName.match_qualifiers class_matcher (List.rev class_name_rev)
| _ ->
false
let should_translate_decl trans_unit_ctx dec decl_trans_context =
let info = Clang_ast_proj.get_decl_tuple dec in

@ -10,4 +10,5 @@ open! IStd
let tests = [
CiOSVersionNumbersTests.tests;
QualifiedCppNameTests.tests;
]

@ -0,0 +1,133 @@
(*
* Copyright (c) 2017 - present Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*)
open! IStd
open OUnit2
let test_fuzzy_match =
let create_test fuzzy_qual_names qualifiers expected_match _ =
let output =
let matcher = QualifiedCppName.quals_matcher_of_fuzzy_qual_names fuzzy_qual_names in
QualifiedCppName.match_qualifiers matcher qualifiers in
assert_equal ~cmp:Bool.equal expected_match output in
[
(
"test_simple_match1",
["foo::bar::baz"; "foo::baz"; "goo::goo"],
["foo"; "baz"],
true
);
(
"test_simple_match2",
["foo::bar::baz"; "foo::baz"; "goo::goo"],
["foo"; "bar"; "baz"],
true
);
(
"test_simple_match3",
["foo::bar::baz"; "foo::baz"; "goo::goo"],
["goo"; "goo"],
true
);
(
"test_no_simple_match1",
["foo::bar::baz"; "foo::baz"; "goo::goo"],
["foo"; "bar"],
false
);
(
"test_no_simple_match2",
["foo::bar::baz"; "foo::baz"; "goo::goo"],
["goo"; "foo"],
false
);
(
"test_no_simple_match3",
["foo::bar::baz"; "foo::baz"; "goo::goo"],
["moo"],
false
);
(
"test_no_simple_match4",
["foo::bar::baz"; "foo::baz"; "goo::goo"],
["foo"; "bar"; "baz"; "bad"],
false
);
(
"test_no_simple_match5",
["foo::bar::baz"; "foo::baz"; "goo::goo"],
["foo"; "bad"; "bar"; "baz"],
false
);
(
"test_template_match",
["foo::bar::baz"],
["foo"; "bar<goo::moo<int,std::string>,const X&>"; "baz<int>"],
true
);
(
"test_std_direct_match",
["std::foo"],
["std"; "foo"],
true
);
(
"test_std_direct_no_match1",
["std::foo"],
["std"; "goo"],
false
);
(
"test_std_direct_no_match2",
["std::foo"],
["std"; "foo"; "bad"],
false
);
(
"test_std_direct_no_match3",
["std::foo"],
["stdBAD"; "foo"],
false
);
(
"test_std_fuzzy_match1",
["std::foo"],
["std"; "__1"; "foo"],
true
);
(
"test_std_fuzzy_match2",
["std::foo"],
["std"; "goo<int>"; "foo"],
true
);
(
"test_std_fuzzy_match3",
["std::foo"],
["std"; "goo<int>"; "foo<const X&>"],
true
);
(
"test_std_fuzzy_no_match1",
["std::foo"],
["std"; "__1"; "__2"; "foo"],
false
);
(
"test_std_fuzzy_no_match2",
["std::foo"],
["std"; "__1"; "foo"; "bad"],
false
);
]
|> List.map
~f:(fun (name, fuzzy_qual_names, qualifiers, expected_output) ->
name >:: create_test fuzzy_qual_names qualifiers expected_output)
let tests = "qualified_cpp_name_fuzzy_match" >::: test_fuzzy_match

@ -0,0 +1,11 @@
(*
* Copyright (c) 2017 - present Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*)
open! IStd
val tests: OUnit2.test
Loading…
Cancel
Save