From b9ba97a2fd58db9fa21f8b0d4c1af8411c460df6 Mon Sep 17 00:00:00 2001 From: Timotej Kapus Date: Wed, 5 Jun 2019 10:27:17 -0700 Subject: [PATCH] [sledge] Add globalopt pass to remove globals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: This adds a globalopt optimization pass to sledge. Consider code like: ``` const char *a_string = "I'm a string"; int an_int = 0; int c() { return an_int; } int main() { char *c1 = a_string; return c(); } ``` When compiled there are 2 levels of indirection. For example `return an_int` Get's compiled as ``` %0 = load i32, i32* an_int1 ret i32 %0 ``` Global opt reduces this (if `an_int` is internal) to just ` ret i32 0`. Similarly and more importantly `c1 = a_string;` get's compiled into ``` @.str = private unnamed_addr constant [13 x i8] c"I'm a string\00" a_string = dso_local global i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0) %c1 = alloca i8*, align 8 %0 = load i8*, i8** a_string, align 8, !dbg !25 store i8* %0, i8** %c1, align 8, !dbg !24 ``` So there is a level of indirection between `c1` and `.str` where the string is stored. With global opt, this gets reduced to: ``` @.str = private unnamed_addr constant [13 x i8] c"I'm a string\00" %c1 = alloca i8*, align 8 store i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i64 0, i64 0), i8** %c1, align 8, !dbg !23 ``` and `a_string` variable gets deleted. On sledge this has the effect of reducing the complexity of the symbolic heap significantly. Without this optimisation, running `sledge.dbg llvm analyze -trace Domain.call global_vars.bc` Gives prints the following segments: ``` ∧ %.str -[)-> ⟨13,{}⟩ * %a_string -[)-> ⟨8,%.str⟩ * %an_int -[)-> ⟨4,0⟩ * %c1 -[)-> ⟨8,%.str⟩ * %retval -[)-> ⟨4,0⟩ ``` So there are `an_int` and `a_string` segments, which are redundant. with the optimisation, the heap looks like: `∧ %.str -[)-> ⟨13,{}⟩ * %c1 -[)-> ⟨8,%.str⟩ * %retval -[)-> ⟨4,0⟩`, Where we only have the `.str` segment and the `c1` segment, which are the two we need. Reviewed By: ngorogiannis Differential Revision: D15649195 fbshipit-source-id: 5f71e56e8 --- sledge/src/llair/frontend.ml | 1 + sledge/src/sledge_buck.ml | 4 +++- sledge/test/exec/global_vars.c | 28 ++++++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 sledge/test/exec/global_vars.c diff --git a/sledge/src/llair/frontend.ml b/sledge/src/llair/frontend.ml index 45a518aa3..4b52951b7 100644 --- a/sledge/src/llair/frontend.ml +++ b/sledge/src/llair/frontend.ml @@ -1328,6 +1328,7 @@ let transform : Llvm.llmodule -> unit = ["__llair_main"; "_Z12__llair_mainv"; "main"] ~f:(String.equal fn) ) ; Llvm_ipo.add_global_dce pm ; + Llvm_ipo.add_global_optimizer pm ; Llvm_scalar_opts.add_lower_atomic pm ; Llvm_scalar_opts.add_scalar_repl_aggregation pm ; Llvm_scalar_opts.add_scalarizer pm ; diff --git a/sledge/src/sledge_buck.ml b/sledge/src/sledge_buck.ml index db0bbab22..7552a7a20 100644 --- a/sledge/src/sledge_buck.ml +++ b/sledge/src/sledge_buck.ml @@ -138,7 +138,9 @@ let llvm_link_opt ~output modules = (Lazy.force llvm_bin ^ "llvm-link") ( "-internalize" :: "-internalize-public-api-list=main" :: "-o=-" :: modules ) - |- run (Lazy.force llvm_bin ^ "opt") ["-o=" ^ output; "-globaldce"] ) + |- run + (Lazy.force llvm_bin ^ "opt") + ["-o=" ^ output; "-globaldce"; "-globalopt"] ) (** command line interface *) diff --git a/sledge/test/exec/global_vars.c b/sledge/test/exec/global_vars.c new file mode 100644 index 000000000..47fc292b1 --- /dev/null +++ b/sledge/test/exec/global_vars.c @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2019-present, Facebook, Inc. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +/* The symbolic heap for this example should look like: + ^ %.str --> <13,{}> * %c1 --> <8,%.str> * %retval --> <4,0> + instead of + ^ %.str --> <13,{}> + * %a_string --> <8,%.str> + * %an_int --> <4,0> + * %c1 --> <8,%.str> + * %retval --> <4,0> + Which has an_int and a_string indirections. There can be obtained by + sledge.dbg llvm analyze -trace Domain.call global_vars.bc */ + +const char* a_string = "I'm a string"; +int an_int = 0; + +int c() { return an_int; } + +int main() { + char* c1 = a_string; + + return c(); +}