|
|
|
(*
|
|
|
|
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
|
|
*
|
|
|
|
* This source code is licensed under the MIT license found in the
|
|
|
|
* LICENSE file in the root directory of this source tree.
|
|
|
|
*)
|
|
|
|
|
|
|
|
(** sledge-buck integration *)
|
|
|
|
|
|
|
|
module Process = Shexp_process
|
|
|
|
open Process.Infix
|
|
|
|
|
|
|
|
(* directory relative to buck root containing llvm binaries *)
|
|
|
|
let llvm_bin = lazy (Option.value (Config.find "llvm-bin-dir") ~default:"")
|
|
|
|
|
|
|
|
(* buck build mode to specify an LTO build *)
|
|
|
|
let mode = lazy (Config.find_exn "buck-build-mode")
|
|
|
|
|
|
|
|
(* resolve relative path wrt root *)
|
|
|
|
let make_absolute root path =
|
|
|
|
if Filename.is_relative path then Filename.concat root path else path
|
|
|
|
|
|
|
|
(* initial working directory *)
|
|
|
|
let cwd = Unix.getcwd ()
|
|
|
|
|
|
|
|
(* query buck for root *)
|
|
|
|
let buck_root =
|
|
|
|
let open Process in
|
|
|
|
lazy
|
|
|
|
(String.strip
|
|
|
|
(eval (run "buck" ["root"; "@mode/" ^ Lazy.force mode] |- read_all)))
|
|
|
|
|
|
|
|
(* use buck root for working directory *)
|
|
|
|
let context () =
|
|
|
|
let open Process in
|
|
|
|
Context.create ~cwd:(Path (Lazy.force buck_root)) ()
|
|
|
|
|
|
|
|
(* invoke the LTO build for the target *)
|
|
|
|
let buck_build ~context target =
|
|
|
|
let open Process in
|
|
|
|
eval ~context
|
|
|
|
(run "buck"
|
|
|
|
[ "build"
|
|
|
|
; "@mode/" ^ Lazy.force mode
|
|
|
|
; "-c"
|
|
|
|
; "sledge.build=True"
|
|
|
|
; target ])
|
|
|
|
|
|
|
|
(* split a fully-qualified buck target into file and rule *)
|
|
|
|
let parse_target target =
|
|
|
|
try Scanf.sscanf target "//%s@:%s" (fun file rule -> (file, rule))
|
|
|
|
with Scanf.Scan_failure _ | End_of_file ->
|
|
|
|
fail "could not parse target: %s" target ()
|
|
|
|
|
|
|
|
(* compute the filename of the linker.argsfile for the target *)
|
|
|
|
let argsfile ~context target =
|
|
|
|
let file, rule = parse_target target in
|
|
|
|
Printf.sprintf "%s/buck-out/%s/bin/%s/%s#binary/linker.argsfile"
|
|
|
|
Process.(eval ~context cwd_logical)
|
|
|
|
(Lazy.force mode) file rule
|
|
|
|
|
|
|
|
(* add a file to the list of modules, if it exists *)
|
|
|
|
let add_module ?archive_name ~context file rev_modules =
|
|
|
|
let open Process in
|
|
|
|
if eval ~context (file_exists file) then file :: rev_modules
|
|
|
|
else (
|
|
|
|
( match archive_name with
|
|
|
|
| None -> warn "%s doesn't exist" file ()
|
|
|
|
| Some archive_name ->
|
|
|
|
warn "%s doesn't exist in archive: %s" file
|
|
|
|
(Filename.basename archive_name)
|
|
|
|
() ) ;
|
|
|
|
rev_modules )
|
|
|
|
|
|
|
|
(* use llvm-ar to get toc of an archive *)
|
|
|
|
let expand_thin_archive ~context archive_name rev_modules =
|
|
|
|
let open Process in
|
|
|
|
eval ~context
|
|
|
|
( run (Lazy.force llvm_bin ^ "llvm-ar") ["t"; archive_name]
|
|
|
|
|- fold_lines ~init:rev_modules ~f:(fun rev_modules line ->
|
|
|
|
return (add_module ~archive_name ~context line rev_modules) ) )
|
|
|
|
|
|
|
|
(* Use llvm-ar to check if the archive contains any bitcode files; if it
|
|
|
|
does, fail for now as it doesn't seem to happen. *)
|
|
|
|
let expand_arch_archive ~context archive_name =
|
|
|
|
let number_of_bitcode_files =
|
|
|
|
let open Process in
|
|
|
|
eval ~context
|
|
|
|
( run (Lazy.force llvm_bin ^ "llvm-ar") ["t"; archive_name]
|
|
|
|
|- fold_lines ~init:0 ~f:(fun acc name ->
|
|
|
|
return
|
|
|
|
(let _, is_bc =
|
|
|
|
eval ~context
|
|
|
|
(* Need to run_exit_status, because otherwise llvm-ar
|
|
|
|
gets -8 signal; which is strange. Repeating the
|
|
|
|
command in bash doesn't signal and this works. *)
|
|
|
|
( run_exit_status
|
|
|
|
(Lazy.force llvm_bin ^ "llvm-ar")
|
|
|
|
["p"; archive_name; name]
|
|
|
|
|+ run "head" ["-c2"]
|
|
|
|
|+ read_all )
|
|
|
|
in
|
|
|
|
if String.equal is_bc "BC" then (
|
|
|
|
warn "found bc file %s in %s" name archive_name () ;
|
|
|
|
acc + 1 )
|
|
|
|
else acc ) ) )
|
|
|
|
in
|
|
|
|
number_of_bitcode_files = 0
|
|
|
|
|| fail "found %d bitcode files in archive %s" number_of_bitcode_files
|
|
|
|
archive_name ()
|
|
|
|
|
|
|
|
(* find bitcode module(s) in a linker arg *)
|
|
|
|
let parse_linker_arg ~context rev_modules arg =
|
|
|
|
if String.is_suffix arg ~suffix:".o" then
|
|
|
|
add_module ~context arg rev_modules
|
|
|
|
else if String.is_suffix arg ~suffix:".a" then
|
|
|
|
let thin_archive =
|
|
|
|
String.strip Process.(eval (run "head" ["-1"; arg] |- read_all))
|
|
|
|
in
|
|
|
|
if String.equal thin_archive "!<thin>" then
|
|
|
|
expand_thin_archive ~context arg rev_modules
|
|
|
|
else if String.equal thin_archive "!<arch>" then (
|
|
|
|
assert (expand_arch_archive ~context arg) ;
|
|
|
|
rev_modules )
|
|
|
|
else fail "unknown type of archive file %s" thin_archive ()
|
|
|
|
else rev_modules
|
|
|
|
|
|
|
|
(* build target and find constituent bitcode modules *)
|
|
|
|
let bitcode_files_of ~target =
|
|
|
|
let target =
|
|
|
|
if
|
|
|
|
List.exists (Config.find_list "buck-target-patterns")
|
|
|
|
~f:(fun substring -> String.is_substring target ~substring)
|
|
|
|
then target ^ "_sledge"
|
|
|
|
else target
|
|
|
|
in
|
|
|
|
let context = context () in
|
|
|
|
buck_build ~context target ;
|
|
|
|
let modules =
|
|
|
|
In_channel.with_file
|
|
|
|
(argsfile ~context target)
|
|
|
|
~f:(In_channel.fold_lines ~init:[] ~f:(parse_linker_arg ~context))
|
|
|
|
|> List.rev
|
|
|
|
in
|
|
|
|
List.map ~f:(make_absolute (Lazy.force buck_root)) modules
|
|
|
|
|
|
|
|
(* link and optimize the modules *)
|
|
|
|
let llvm_link_opt ~fuzzer ~bitcode_output modules =
|
|
|
|
let context = context () in
|
|
|
|
let modules = if fuzzer then "-" :: modules else modules in
|
|
|
|
let open Process in
|
|
|
|
eval ~context
|
|
|
|
( ( if fuzzer then
|
|
|
|
echo ~n:() (Option.value_exn (Model.read "/lib_fuzzer_main.bc"))
|
|
|
|
else return () )
|
|
|
|
|- run (Lazy.force llvm_bin ^ "llvm-link") ("-o=-" :: modules)
|
[sledge] Add globalopt pass to remove globals
Summary:
This adds a globalopt optimization pass to sledge.
Consider code like:
```
const char *a_string = "I'm a string";
int an_int = 0;
int c() {
return an_int;
}
int main() {
char *c1 = a_string;
return c();
}
```
When compiled there are 2 levels of indirection. For example
`return an_int` Get's compiled as
```
%0 = load i32, i32* an_int1
ret i32 %0
```
Global opt reduces this (if `an_int` is internal) to just
` ret i32 0`.
Similarly and more importantly
`c1 = a_string;` get's compiled into
```
@.str = private unnamed_addr constant [13 x i8] c"I'm a string\00"
a_string = dso_local global i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0)
%c1 = alloca i8*, align 8
%0 = load i8*, i8** a_string, align 8, !dbg !25
store i8* %0, i8** %c1, align 8, !dbg !24
```
So there is a level of indirection between `c1` and `.str` where the string is stored.
With global opt, this gets reduced to:
```
@.str = private unnamed_addr constant [13 x i8] c"I'm a string\00"
%c1 = alloca i8*, align 8
store i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i64 0, i64 0), i8** %c1, align 8, !dbg !23
```
and `a_string` variable gets deleted.
On sledge this has the effect of reducing the complexity of the symbolic heap significantly.
Without this optimisation, running
`sledge.dbg llvm analyze -trace Domain.call global_vars.bc`
Gives prints the following segments:
```
∧ %.str -[)-> ⟨13,{}⟩
* %a_string -[)-> ⟨8,%.str⟩
* %an_int -[)-> ⟨4,0⟩
* %c1 -[)-> ⟨8,%.str⟩
* %retval -[)-> ⟨4,0⟩
```
So there are `an_int` and `a_string` segments, which are redundant.
with the optimisation, the heap looks like:
`∧ %.str -[)-> ⟨13,{}⟩ * %c1 -[)-> ⟨8,%.str⟩ * %retval -[)-> ⟨4,0⟩`,
Where we only have the `.str` segment and the `c1` segment, which are the two we need.
Reviewed By: ngorogiannis
Differential Revision: D15649195
fbshipit-source-id: 5f71e56e8
6 years ago
|
|
|
|- run
|
|
|
|
(Lazy.force llvm_bin ^ "opt")
|
|
|
|
[ "-o=" ^ bitcode_output
|
|
|
|
; "-internalize"
|
|
|
|
; "-internalize-public-api-list="
|
|
|
|
^ String.concat ~sep:"," (Config.find_list "entry-points")
|
|
|
|
; "-globaldce"
|
|
|
|
; "-globalopt"
|
|
|
|
; "-mergefunc"
|
|
|
|
; "-constmerge"
|
|
|
|
; "-argpromotion"
|
|
|
|
; "-ipsccp"
|
|
|
|
; "-mem2reg"
|
|
|
|
; "-dce"
|
|
|
|
; "-globaldce"
|
|
|
|
; "-deadargelim"
|
|
|
|
; "-global-merge-on-const"
|
|
|
|
; "-global-merge-ignore-single-use=false"
|
|
|
|
; "-global-merge-group-by-use=false"
|
|
|
|
(* global-merge-max-offset is set to 0 by default. If a global
|
|
|
|
variable has larger allocation size than the max-offset, it is
|
|
|
|
not merged, therefore the global-merge pass is a noop. We set
|
|
|
|
it to something big, so that it merges as much as possible. *)
|
|
|
|
; "-global-merge-max-offset=1000000"
|
|
|
|
; "-global-merge" ] )
|
|
|
|
|
|
|
|
(** command line interface *)
|
|
|
|
|
|
|
|
open Command.Let_syntax
|
|
|
|
|
|
|
|
let ( |*> ) a' f' = a' |> Command.Param.apply f'
|
|
|
|
let ( |**> ) = Command.Param.map2 ~f:(fun a f b -> f b a)
|
|
|
|
|
|
|
|
let abs_path_arg =
|
|
|
|
Command.Param.(Arg_type.map string ~f:(make_absolute cwd))
|
|
|
|
|
|
|
|
let main ~(command : unit Command.basic_command) ~analyze =
|
|
|
|
let bitcode_inputs =
|
|
|
|
let%map_open target = anon ("<target>" %: string)
|
|
|
|
and modules =
|
|
|
|
flag "modules" (optional string)
|
|
|
|
~doc:
|
|
|
|
"<file> write list of bitcode files to <file>, or to standard \
|
|
|
|
output if <file> is `-`"
|
|
|
|
in
|
|
|
|
let bitcode_files = bitcode_files_of ~target in
|
|
|
|
( match modules with
|
|
|
|
| Some "-" ->
|
|
|
|
Format.printf "%a"
|
|
|
|
(List.pp " " Format.pp_print_string)
|
|
|
|
bitcode_files
|
|
|
|
| Some file -> Out_channel.write_lines file bitcode_files
|
|
|
|
| None -> () ) ;
|
|
|
|
bitcode_files
|
|
|
|
in
|
|
|
|
let bitcode_cmd =
|
|
|
|
let summary = "report bitcode files in buck target" in
|
|
|
|
let readme () =
|
|
|
|
"Build a buck target and report the included bitcode files."
|
|
|
|
in
|
|
|
|
let param = bitcode_inputs >>| fun _ () -> () in
|
|
|
|
command ~summary ~readme param
|
|
|
|
in
|
|
|
|
let analyze_cmd =
|
|
|
|
let summary = "analyze buck target" in
|
|
|
|
let readme () =
|
|
|
|
"Analyze code in a buck target. This is a convenience wrapper for \
|
|
|
|
the sequence `sledge buck bitcode`; `sledge llvm translate`; \
|
|
|
|
`sledge analyze`."
|
|
|
|
in
|
|
|
|
let param = bitcode_inputs |*> analyze in
|
|
|
|
command ~summary ~readme param
|
|
|
|
in
|
|
|
|
let link_cmd =
|
|
|
|
let summary = "link buck target to LLVM bitcode" in
|
|
|
|
let readme () =
|
|
|
|
"Link code in a buck target to a single LLVM bitcode module. This \
|
|
|
|
also internalizes all symbols except `main` and removes dead code."
|
|
|
|
in
|
|
|
|
let link =
|
|
|
|
let%map_open bitcode_output =
|
|
|
|
flag "bitcode-output" (required abs_path_arg)
|
|
|
|
~doc:"<file> write linked bitcode to <file>"
|
|
|
|
and fuzzer =
|
|
|
|
flag "fuzzer" no_arg ~doc:"add a harness for libFuzzer targets"
|
|
|
|
in
|
|
|
|
fun () -> llvm_link_opt ~fuzzer ~bitcode_output
|
|
|
|
in
|
|
|
|
let param = bitcode_inputs |**> link in
|
|
|
|
command ~summary ~readme param
|
|
|
|
in
|
|
|
|
let summary = "integration with Buck" in
|
|
|
|
let readme () =
|
|
|
|
"Code can be provided by a buck build target, such as \
|
|
|
|
//fully/qualified/build:target. The mechanism used to integrate with \
|
|
|
|
buck uses the arguments passed to the linker, so the target must \
|
|
|
|
specify a binary that will be linked, not for instance a library \
|
|
|
|
archive. Sledge passes the --config sledge.build=True flag to buck, \
|
|
|
|
which can be used to configure buck targets for sledge."
|
|
|
|
in
|
|
|
|
Command.group ~summary ~readme ~preserve_subcommand_order:()
|
|
|
|
[("analyze", analyze_cmd); ("bitcode", bitcode_cmd); ("link", link_cmd)]
|