From 446ac6d87ce085f5552ec5194ee8ef35aac51ea1 Mon Sep 17 00:00:00 2001 From: Josh Berdine Date: Fri, 23 Mar 2018 15:33:25 -0700 Subject: [PATCH] Add initial version of LLAIR and LLVM to LLAIR translation Reviewed By: mbouaziz Differential Revision: D6892016 fbshipit-source-id: 3720749 --- sledge/.clang-format | 2 + sledge/.gitignore | 13 + sledge/.ocamlformat | 4 + sledge/.ocp-indent | 13 + sledge/Makefile | 73 ++ sledge/SETUP.org | 50 + sledge/TODO.org | 145 +++ sledge/jbuild-common.in | 40 + sledge/jbuild-workspace.in | 2 + sledge/llair.opam | 16 + sledge/model/cxxabi.cpp | 26 + sledge/model/llair_intrinsics.h | 13 + sledge/sledge.opam | 15 + sledge/src/config.ml | 84 ++ sledge/src/config.mli | 13 + sledge/src/import/import.ml | 98 ++ sledge/src/import/import.mli | 88 ++ sledge/src/import/jbuild.in | 13 + sledge/src/import/vector.ml | 77 ++ sledge/src/import/vector.mli | 180 +++ sledge/src/jbuild.in | 12 + sledge/src/llair/exp.ml | 482 ++++++++ sledge/src/llair/exp.mli | 255 ++++ sledge/src/llair/frontend.ml | 1334 +++++++++++++++++++++ sledge/src/llair/frontend.mli | 14 + sledge/src/llair/global.ml | 10 + sledge/src/llair/global.mli | 10 + sledge/src/llair/jbuild.in | 13 + sledge/src/llair/llair.ml | 495 ++++++++ sledge/src/llair/llair.mli | 202 ++++ sledge/src/llair/loc.ml | 30 + sledge/src/llair/loc.mli | 22 + sledge/src/llair/typ.ml | 142 +++ sledge/src/llair/typ.mli | 76 ++ sledge/src/llair/var.ml | 10 + sledge/src/llair/var.mli | 10 + sledge/src/ppx_trace/jbuild | 5 + sledge/src/ppx_trace/ppx_trace.ml | 120 ++ sledge/src/ppx_trace/ppx_trace.mli | 1 + sledge/src/sledge.ml | 36 + sledge/src/sledge.mli | 1 + sledge/src/trace/jbuild.in | 13 + sledge/src/trace/trace.ml | 58 + sledge/src/trace/trace.mli | 25 + sledge/src/version.ml.in | 12 + sledge/src/version.mli | 12 + sledge/test/Makefile | 151 +++ sledge/test/frontend/address_of_label.cpp | 24 + sledge/test/frontend/cond_alloca.cpp | 16 + sledge/test/frontend/destructor_bases.cpp | 48 + sledge/test/frontend/exceptions.cpp | 50 + sledge/test/frontend/rethrow.cpp | 57 + sledge/test/frontend/struct.cpp | 27 + sledge/test/frontend/vector.cpp | 16 + sledge/test/frontend/virtual.cpp | 26 + sledge/test/llvm | 1 + sledge/tools/gen_version.sh | 32 + 57 files changed, 4813 insertions(+) create mode 100644 sledge/.clang-format create mode 100644 sledge/.ocamlformat create mode 100644 sledge/.ocp-indent create mode 100644 sledge/Makefile create mode 100644 sledge/SETUP.org create mode 100644 sledge/TODO.org create mode 100644 sledge/jbuild-common.in create mode 100644 sledge/jbuild-workspace.in create mode 100644 sledge/llair.opam create mode 100644 sledge/model/cxxabi.cpp create mode 100644 sledge/model/llair_intrinsics.h create mode 100644 sledge/sledge.opam create mode 100644 sledge/src/config.ml create mode 100644 sledge/src/config.mli create mode 100644 sledge/src/import/import.ml create mode 100644 sledge/src/import/import.mli create mode 100644 sledge/src/import/jbuild.in create mode 100644 sledge/src/import/vector.ml create mode 100644 sledge/src/import/vector.mli create mode 100644 sledge/src/jbuild.in create mode 100644 sledge/src/llair/exp.ml create mode 100644 sledge/src/llair/exp.mli create mode 100644 sledge/src/llair/frontend.ml create mode 100644 sledge/src/llair/frontend.mli create mode 100644 sledge/src/llair/global.ml create mode 100644 sledge/src/llair/global.mli create mode 100644 sledge/src/llair/jbuild.in create mode 100644 sledge/src/llair/llair.ml create mode 100644 sledge/src/llair/llair.mli create mode 100644 sledge/src/llair/loc.ml create mode 100644 sledge/src/llair/loc.mli create mode 100644 sledge/src/llair/typ.ml create mode 100644 sledge/src/llair/typ.mli create mode 100644 sledge/src/llair/var.ml create mode 100644 sledge/src/llair/var.mli create mode 100644 sledge/src/ppx_trace/jbuild create mode 100644 sledge/src/ppx_trace/ppx_trace.ml create mode 100644 sledge/src/ppx_trace/ppx_trace.mli create mode 100644 sledge/src/sledge.ml create mode 100644 sledge/src/sledge.mli create mode 100644 sledge/src/trace/jbuild.in create mode 100644 sledge/src/trace/trace.ml create mode 100644 sledge/src/trace/trace.mli create mode 100644 sledge/src/version.ml.in create mode 100644 sledge/src/version.mli create mode 100644 sledge/test/Makefile create mode 100644 sledge/test/frontend/address_of_label.cpp create mode 100644 sledge/test/frontend/cond_alloca.cpp create mode 100644 sledge/test/frontend/destructor_bases.cpp create mode 100644 sledge/test/frontend/exceptions.cpp create mode 100644 sledge/test/frontend/rethrow.cpp create mode 100644 sledge/test/frontend/struct.cpp create mode 100644 sledge/test/frontend/vector.cpp create mode 100644 sledge/test/frontend/virtual.cpp create mode 120000 sledge/test/llvm create mode 100755 sledge/tools/gen_version.sh diff --git a/sledge/.clang-format b/sledge/.clang-format new file mode 100644 index 000000000..053b4da1c --- /dev/null +++ b/sledge/.clang-format @@ -0,0 +1,2 @@ +BasedOnStyle: llvm +PointerAlignment: Left diff --git a/sledge/.gitignore b/sledge/.gitignore index e69de29bb..da0182c12 100644 --- a/sledge/.gitignore +++ b/sledge/.gitignore @@ -0,0 +1,13 @@ +.merlin +/llvm/ +/model/cxxabi.bc +/src/import/jbuild +/src/jbuild +/src/llair/jbuild +/src/trace/jbuild +/src/version.ml +/test/*/*.bc +/test/*/*.bc.err +/test/*/*.bc.out +_build +jbuild-workspace diff --git a/sledge/.ocamlformat b/sledge/.ocamlformat new file mode 100644 index 000000000..aef6ee634 --- /dev/null +++ b/sledge/.ocamlformat @@ -0,0 +1,4 @@ +break-string-literals wrap +margin 77 +sparse false +wrap-comments true diff --git a/sledge/.ocp-indent b/sledge/.ocp-indent new file mode 100644 index 000000000..53f1b4f27 --- /dev/null +++ b/sledge/.ocp-indent @@ -0,0 +1,13 @@ +normal +base = 2 +type = 2 +in = 0 +with = 0 +match_clause = 4 +ppx_stritem_ext = 2 +max_indent = 4 +strict_with = auto +strict_else = always +strict_comments = true +align_ops = true +align_params = always diff --git a/sledge/Makefile b/sledge/Makefile new file mode 100644 index 000000000..74f4ee16e --- /dev/null +++ b/sledge/Makefile @@ -0,0 +1,73 @@ +# Copyright (c) 2018 - present Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the BSD style license found in the +# LICENSE file in the root directory of this source tree. An additional grant +# of patent rights can be found in the PATENTS file in the same directory. + +SHELL=bash + +OCAMLDOT?=ocamldot +OCAMLFORMAT?=ocamlformat + +.PHONY: default +default: exe + +.PHONY: src/version.ml +src/version.ml: + @tools/gen_version.sh $@ + +.PHONY: version +version: src/version.ml + +jbuild-workspace: jbuild-workspace.in + sed -e "s|@OPAM_SWITCH[@]|$$(opam switch show)|g" $< > $@ + +JBUILDS=$(patsubst %.in,%,$(shell find * -name jbuild.in)) + +.PHONY: jbuilds +jbuilds: $(JBUILDS) + +%/jbuild: jbuild-common.in %/jbuild.in + @cat $+ > $@ + +.PHONY: setup +setup: src/version.ml jbuild-workspace jbuilds + +.PHONY: exe +exe: setup + jbuilder build src/sledge.exe + +.PHONY: bc +bc: setup + jbuilder build _build/dbg/src/sledge.bc + +.PHONY: dbg +dbg: setup + jbuilder build _build/dbg/src/sledge.exe + +.PHONY: opt +opt: setup + jbuilder build _build/default/src/sledge.exe + +SRCS=$(shell \ls src/{,*/}*.ml{,i}) + +mod_dep.dot: $(SRCS) + ocamldep.opt $(SRCS) | $(OCAMLDOT) -r Sledge -fullgraph > _build/mod_dep.dot + +.PHONY: clean +clean: + rm -rf _build jbuild-workspace $(JBUILDS) + +.PHONY: cleaner +cleaner: clean + rm -rf src/version.ml + +.PHONY: fmt +fmt: + $(OCAMLFORMAT) -i $(SRCS) + clang-format -i model/llair_intrinsics.h model/cxxabi.cpp + +# print any variable for Makefile debugging +print-%: + @printf '$*='; printf '$($*)'; printf '\n' diff --git a/sledge/SETUP.org b/sledge/SETUP.org new file mode 100644 index 000000000..f0e23ec04 --- /dev/null +++ b/sledge/SETUP.org @@ -0,0 +1,50 @@ +1. NOTE: These instructions use ~/.local/llvm_$(opam config var switch) as an install root for llvm/clang, but installing elsewhere should be fine. Also note that you probably don't want to use this clang for anything else because it is a default debug build and hence very slow. So some PATH management is likely needed to have ~/.local/llvm_$(opam config var switch) in PATH only when building / running llair. This is done automatically for the targets in test/Makefile. +2. clone llvm, clang, and libcxxabi + - cd ~/path/to/sledge + - git clone git+ssh://git@github.com/jberdine/llvm.git --branch ocaml_api + - git clone https://github.com/llvm-mirror/clang.git llvm/tools/clang + - git clone https://github.com/llvm-mirror/libcxxabi.git llvm/projects/libcxxabi +3. build llvm & clang + - brew install cmake ninja + - cd ~/path/to/sledge/llvm + - mkdir _build + - cd _build + - cmake -G Ninja -DCMAKE_INSTALL_PREFIX=~/.local/llvm_$(opam config var switch) -DLLVM_OCAML_INSTALL_PATH=~/.local/llvm_$(opam config var switch)/lib/ocaml .. + - ninja -j8 + - ninja ocaml_doc + - ninja install +4. install deps + - add ~/.local/llvm_$(opam config var switch)/bin to shell PATH + + because installing the opam package for llvm will look for it + + export PATH=~/.local/llvm_$(opam config var switch)/bin:$PATH + - pin conf-llvm + + opam pin add -n conf-llvm --dev + + opam pin edit conf-llvm + - edit version number to match that of the llvm clone (= 7.0.0) + - pin llvm + + cd ~/path/to/sledge/llvm + + opam pin add -n -k git llvm . + + opam pin edit llvm + - edit llvm and conf-llvm version number to match that of the llvm clone (= 7.0.0) + - when prompted, ok to save new opam file +5. hush `ld: warning: directory not found for option '-L/opt/local/lib'` + - the zarith package adds a spurious linker option unless you have both brew and macports, so if you see this linker warning when compiling, execute + + sudo mkdir -p /opt/local/lib +6. install dev tools + - opam install merlin ocp-indent tuareg user-setup + - install ocamlformat +7. if needed: point new clang to xcode c++ lib + - cd ~/.local/llvm_$(opam config var switch)/include + - ln -s /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include/c++ . +8. llvm dev workflow + - modify llvm sources + - cd ~/path/to/sledge/llvm/_build; ninja -j8 + - iterate + - (optional?) git add -u; git commit -m ... + - ninja -j8 && ninja ocaml_doc && ninja install && opam upgrade llvm + - cd ~/path/to/sledge; make + - it is not uncommon to get "inconsistent assumptions" errors: clean and re-make +9. llvm emacs mode + - (add-to-list 'load-path (expand-file-name "~/path/to/sledge/llvm/utils/emacs")) + - (require 'llvm-mode) + - (require 'autodisass-llvm-bitcode) diff --git a/sledge/TODO.org b/sledge/TODO.org new file mode 100644 index 000000000..e49905e83 --- /dev/null +++ b/sledge/TODO.org @@ -0,0 +1,145 @@ +* llvm +** squash the debug loc commits +** squash the Token enum, GlobalIFunc, and instr enum commits +** reword isLiteral commit to mention OCaml API +* trace and ppx_trace +** if a traced fun has a type annotation +copy it to the left arg of |> and to the arg of Trace.retn +** use toplevel module name +rather than nearest enclosing +** use name of enclosing function that is a structure item +rather than nearest enclosing +** implement fine-grained control over which functions get traced +* import +** make types of [fail] and [warn] consistent +** ? undeprecate Caml.Printexc +- or is there some other way to use Printexc.get_raw_backtrace and Printexc.raise_with_backtrace +** implement the rest of the Array operations in Vector +** find some way to keep interface of Vector in sync with Array +* config +** document cli +* llair +** move Exp.t to Exp.T.t and remove Exp.Global.init +** ? add lazy_t to all types which might be inhabited by cyclic values +** define Label module for Exp.Label and Llair.label, to unify how functions and blocks are named +** check/ensure that generated names do not clash +- name ^ ".ti" xlate_instr LandingPad +** ? expose the roots computed by Llair.mk +** ? change blocks to take all free variables as args ++ currently the scope of an identifier bound by e.g. Load is the continuation of the inst as well as all the conts that it dominates, this is somewhat messy ++ build a table from blocks to conts ++ build a table from blocks to free vars ++ need a fixed-point computation for blocks to vars table ++ to xlate a block + - get the terminator + - if all the destination blocks except the current block are already in the table + * then + - xlate block itself like now + + when get to the terminal + + look up free vars vector of the jump destinaton in table + + map over the vector + * if the var is the name of a PHI instr + - find and translate the arg for the src block of the jmp instr + use the find_map of find_jump_args + * else use the var + + use this vector for the jump args + - compute the free vars of its code + - use this vector for the cont params + - add free vars to table + - add block to cont mapping to table + * else recurse over the destination blocks except the current block ++ after entry block (and recursively everything reachable from it) is xlated, map over the function block list looking up from the table to get order of conts to match order of blocks +** ? format #line directives in programs +** ? function types could include the types of throw continuation args +but they are currently the same for all functions: i8* +* frontend +** kill the frontend memo tables when translate returns +** translate %malloc to alloc +- call Llvm.use_begin to see if the result is immediately cast +- call Llvm.size_of on the cast-to type +- divide to compute the number of elements +- fall back to the i8* return type of malloc +** translate %free to free +** hoist alloca's to the beginning of the entry block whenever possible +** clean up translation of intrinsics +separation between xlate_intrinsic (which translates an intrinsic function name to an expression constructor) and the Call case of xlate_instr (which translates calls to intrinsic functions to instructions) is not clear +** extract struct field names from llvm debug info +** normalize cfg +- remove unreachable blocks +- combine blocks with cmnd= []; term= Unreachable into one +** support variadic functions +- lower by implementing in terms of the core +- implement the va_list type as a pair or pointers into a stack represented as a linked-list, one pointer to the current element and one to the head +- a call to a variadic function pushes the args in reverse order, so that the first arg is at the top of the stack, and passes a pointer to the top as the last arg to the callee +- va_start intrinsic returns a pointer to the first va arg, by just projecting the current pointer from the last arg +- va_arg instruction returns the current va arg using argument va_list pointer to the stack, and sets the argument va_list current pointer to the next stack element +- va_copy is just a pointer copy of the source to destination va_list arguments, creating another pointer into the stack of va args, the head pointer of copies is null +- va_end deallocates the list starting from the head pointer +** support dynamic sized stack allocation (alloca in non-entry blocks) +- lower by implementing in terms of the core +- add a linked list of stack slots data structure +- each element contains + + a pointer to some memory allocated for that slot's contents + + a pointer to the next older slot + + a pointer to the beginning of the function's stack frame +- add a global variable that always points to the head of the stack +- alloca in non-entry blocks adds an element and stores the result of alloc in it, sets next, and uses the frame pointer of the previous head +- function call adds a 'frame sentinel' element whose frame pointer points to itself, slot pointer is null (but used for va_arg below) +- function return (and other popping terminators) traverses the stack, popping elements, calling free on the slot pointers, until the element pointed to by the frame pointer is encountered +- stacksave intrinsic returns a pointer to a stack element +- stackrestore intrinsic pops the stack like return but only back to the argument pointer +** handle inline asm enough to over-approximate control-flow +- inline asm can take addresses of blocks as args, that can be jumped to +- treating inline asm conservatively requires considering these control flows +** support missing intrinsics +** support vector operations +- by lowering into multiple scalar operations +- most cases handled by Frontend.transform +- tests have a few exceptions, possibly for only unrealistic code +** combine scan_locs and scan_names into a single pass +** exceptions +- is it correct to translate landingpad clauses not matching to unreachable, or should the exception be re-thrown +- check suspicious translation of landingpads + The translation of landingpads with cleanup and other clauses ignores the other clauses. This seems suspicious, is this semantics correct? +- handle subtyping + + xlate_instr on LandingPad uses Eq and Ne of type_info values. This ignores subtyping. Subtyping info is encoded into the type_info values. +- ? implement c++ abi functions instead of using libcxxabi + + implement eh abi in C + + see cxxabi https://libcxxabi.llvm.org/spec.html and itanium abi http://itanium-cxx-abi.github.io/cxx-abi/abi-eh.html + + __cxa_call_unexpected + - translate to Unreachable, possibly warn + + __cxa_get_exception_ptr + - translate as identity function + + __cxa_allocate_exception + - translate to Alloc of exception struct type + + __cxa_begin_catch + - increment handler count of arg + - add arg to caught stack unless it is already there (next not null iff in stack) + - return arg + + __cxa_rethrow + - set rethrown field of top of caught stack, std::terminate if stack empty + - call __cxa_throw on top of caught stack + + __cxa_end_catch + - find top of caught stack + - decrement its handler count + + if handler count reaches 0 + - remove from stack + - if rethrown flag not set + + call destructor + + deallocate memory allocated by __cxa_allocate_exception +** improve treatment of Typ.is_sized on Opaque types +so that xlate_type can preserve sizedness, e.g. add a post-condition that Typ.is_sized iff Llvm.type_is_sized +** run translate in a forked subprocess +- so that when llvm crashes it does not take down sledge and an error can be returned +- will require serializing an deserializing the translated program +- alternatively: install a signal handler to catch and recover from crashes from llvm +** scalarizer does not work on functions with [optnone] attribute +- repro: llvm/Transforms/FunctionAttrs/optnone-simple.ll +- one solution: pre-process llvm to remove [optnone] attributes before running scalarizer pass +** ? move is_zero to Exp +** ? remove Exp.Nondet, replace with free variables +it is not obvious whether it will be simpler to use free variables instead of Nondet in the frontend, or to treat Nondet as a single-occurrence existential variable in the analyzer +** llvm bugs? +- Why aren't shufflevector instructions with zeroinitializer masks eliminated by the scalarizer pass? +* symbolic execution +* build diff --git a/sledge/jbuild-common.in b/sledge/jbuild-common.in new file mode 100644 index 000000000..67c4f400f --- /dev/null +++ b/sledge/jbuild-common.in @@ -0,0 +1,40 @@ +(* -*- tuareg -*- *) + +let common_flags = + {|(-w +a-3-4-9-18-40-42-44-48@50 + -strict-formats -strict-sequence -principal + -short-paths -bin-annot -keep-docs + -unboxed-types)|} + + +let ocamlc_flags = + match Jbuild_plugin.V1.context with + | "dbg" -> "-g -opaque" + | _ -> "-w -26-32 -noassert" + + +let ocamlopt_flags = + match Jbuild_plugin.V1.context with + | "dbg" -> ocamlc_flags + | _ -> ocamlc_flags ^ " -w -a -O3" + + +let ppx_trace_flags = + match Jbuild_plugin.V1.context with + | "dbg" -> "--debug" + | _ -> "" + + +let flags deps = + Printf.sprintf + {|(flags (%s %s)) + (ocamlc_flags (%s)) + (ocamlopt_flags (%s)) + (preprocess (pps (ppx_compare ppx_sexp_conv ppx_trace %s ppx_driver.runner)))|} + common_flags + (String.concat " " + (List.map (fun d -> "-open " ^ String.capitalize_ascii d) ("core_kernel" :: deps))) + ocamlc_flags ocamlopt_flags ppx_trace_flags + + +let libraries deps = String.concat " " ("core_kernel" :: deps) diff --git a/sledge/jbuild-workspace.in b/sledge/jbuild-workspace.in new file mode 100644 index 000000000..91dac1867 --- /dev/null +++ b/sledge/jbuild-workspace.in @@ -0,0 +1,2 @@ +(context ((switch @OPAM_SWITCH@) (name dbg) (merlin))) +(context ((switch @OPAM_SWITCH@) (name default))) diff --git a/sledge/llair.opam b/sledge/llair.opam new file mode 100644 index 000000000..6e6c98821 --- /dev/null +++ b/sledge/llair.opam @@ -0,0 +1,16 @@ +opam-version: "1.2" +maintainer: "Josh Berdine " +authors: "Josh Berdine " +build: [ + ["tools/gen_version.sh" "src/version.ml" version] + [make "jbuilds"] + ["jbuilder" "build" "-p" name "-j" jobs] +] +depends: [ + "cmdliner" + "core_kernel" {>= "v0.10.0"} + "jbuilder" {build} + "llvm" {build & = "7.0.0"} + "ppx_compare" {>= "v0.10.0"} + "zarith" +] diff --git a/sledge/model/cxxabi.cpp b/sledge/model/cxxabi.cpp new file mode 100644 index 000000000..0c72c20d9 --- /dev/null +++ b/sledge/model/cxxabi.cpp @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2018 - present Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +/* A simple implementation of the Itanium Base ABI for analysis purposes. + See http://itanium-cxx-abi.github.io/cxx-abi/abi-eh.html */ + +#include "abort_message.cpp" +#include "cxa_exception.cpp" +#include "cxa_exception_storage.cpp" +#include "cxa_handlers.cpp" +#include "llair_intrinsics.h" + +extern "C" { + +__attribute__((always_inline)) _Unwind_Reason_Code +_Unwind_RaiseException(_Unwind_Exception* unwind_exception) { + __llair_throw(thrown_object_from_cxa_exception( + abi::cxa_exception_from_exception_unwind_exception(unwind_exception))); +} +} diff --git a/sledge/model/llair_intrinsics.h b/sledge/model/llair_intrinsics.h new file mode 100644 index 000000000..73cf1f4a5 --- /dev/null +++ b/sledge/model/llair_intrinsics.h @@ -0,0 +1,13 @@ +/* + * Copyright (c) 2018 - present Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +extern "C" { + +__attribute__((noreturn)) void __llair_throw(void* thrown_exception); +} diff --git a/sledge/sledge.opam b/sledge/sledge.opam new file mode 100644 index 000000000..e7e88b07c --- /dev/null +++ b/sledge/sledge.opam @@ -0,0 +1,15 @@ +opam-version: "1.2" +maintainer: "Josh Berdine " +authors: "Josh Berdine " +build: [ + ["make setup"] + ["jbuilder" "build" "-p" name "-j" jobs] +] +depends: [ + "cmdliner" + "core_kernel" {>= "v0.10.0"} + "jbuilder" {build} + "llvm" {build & = "7.0.0"} + "ppx_compare" {>= "v0.10.0"} + "zarith" +] diff --git a/sledge/src/config.ml b/sledge/src/config.ml new file mode 100644 index 000000000..bd31e328f --- /dev/null +++ b/sledge/src/config.ml @@ -0,0 +1,84 @@ +(* Copyright (c) 2018 - present Facebook, Inc. All rights reserved. + + This source code is licensed under the BSD style license found in the + LICENSE file in the root directory of this source tree. An additional + grant of patent rights can be found in the PATENTS file in the same + directory. *) + +(** Configuration options *) + +(** Extension of Cmdliner supporting lighter-weight option definition *) +module Cmdliner : sig + include module type of Cmdliner + + val mk : default:'a -> 'a Term.t -> 'a ref + (** [mk ~default term] is a ref which, after [parse] is called, contains + the value of the command line option specified by [term]. *) + + val parse : Term.info -> (unit -> unit Term.ret) -> unit + (** [parse info validate] parses the command line according to the options + declared by calls to [mk], using manual and version [info], and + calling [validate] to check usage constraints not expressible in the + [Term] language. *) +end = struct + include Cmdliner + + (** existential package of a Term and a setter for a ref to receive the + parsed value *) + type arg = Arg: 'a Term.t * ('a -> unit) -> arg + + (** convert a list of arg packages to a term for the tuple of all the arg + terms, and apply it to a function that sets all the receiver refs *) + let tuple args = + let pair (Arg (trm_x, set_x)) (Arg (trm_y, set_y)) = + let trm_xy = Term.(const (fun a b -> (a, b)) $ trm_x $ trm_y) in + let set_xy (a, b) = set_x a ; set_y b in + Arg (trm_xy, set_xy) + in + let init = Arg (Term.const (), fun () -> ()) in + let Arg (trm, set) = List.fold_right ~f:pair args ~init in + Term.app (Term.const set) trm + + + let args : arg list ref = ref [] + + let mk ~default arg = + let var = ref default in + let set x = var := x in + args := Arg (arg, set) :: !args ; + var + + + let parse info validate = + match Term.eval (Term.(ret (const validate $ tuple !args)), info) with + | `Ok () -> () + | `Error _ -> Caml.exit 1 + | `Help | `Version -> Caml.exit 0 +end + +open Cmdliner + +let input = + mk ~default:"" + Arg.(required & pos ~rev:true 0 (some string) None & info []) + + +let output = + let default = None in + mk ~default Arg.(value & opt (some string) default & info ["o"; "output"]) + + +let trace_all = + let default = false in + mk ~default Arg.(value & flag & info ["t"; "trace-all"]) + + +let info = Term.info "sledge" ~version:Version.version + +let validate () = `Ok () + +;; parse info validate + +let run main = + Trace.init ~trace_all:!trace_all ; + main ~input:!input ~output:!output diff --git a/sledge/src/config.mli b/sledge/src/config.mli new file mode 100644 index 000000000..4b15e2901 --- /dev/null +++ b/sledge/src/config.mli @@ -0,0 +1,13 @@ +(* Copyright (c) 2018 - present Facebook, Inc. All rights reserved. + + This source code is licensed under the BSD style license found in the + LICENSE file in the root directory of this source tree. An additional + grant of patent rights can be found in the PATENTS file in the same + directory. *) + +(** Configuration options *) + +val run : (input:string -> output:string option -> 'a) -> 'a +(** [run main] parses command line options, performs some imperative + initialization, and then executes [main] passing the configuration + options. *) diff --git a/sledge/src/import/import.ml b/sledge/src/import/import.ml new file mode 100644 index 000000000..0f4284628 --- /dev/null +++ b/sledge/src/import/import.ml @@ -0,0 +1,98 @@ +(* Copyright (c) 2018 - present Facebook, Inc. All rights reserved. + + This source code is licensed under the BSD style license found in the + LICENSE file in the root directory of this source tree. An additional + grant of patent rights can be found in the PATENTS file in the same + directory. *) + +(** Global namespace opened in each source file by the build system *) + +include Option.Monad_infix +module Vector = Vector +include Vector.Infix + +module Z = struct + include Z + + let sexp_of_t z = Sexp.Atom (Z.to_string z) + + let t_of_sexp = function + | Sexp.Atom s -> Z.of_string s + | _ -> assert false +end + +let fst3 (x, _, _) = x + +let snd3 (_, y, _) = y + +let trd3 (_, _, z) = z + +let ( >> ) f g x = g (f x) + +let ( $ ) f g x = f x ; g x + +let ( $> ) x f = f x ; x + +let ( <$ ) f x = f x ; x + +type ('a, 'b) fmt_str = ('a, Format.formatter, unit, 'b) format4 + +type 'a fmt = Format.formatter -> 'a -> unit + +let option_fmt fmt pp ff = function + | Some x -> Format.fprintf ff fmt pp x + | None -> () + + +let rec list_fmt sep pp ff = function + | [] -> () + | [x] -> pp ff x + | x :: xs -> Format.fprintf ff "%a%( %)%a" pp x sep (list_fmt sep pp) xs + + +let vector_fmt sep pp ff v = list_fmt sep pp ff (Vector.to_list v) + +let warn fmt = + let ff = Format.std_formatter in + Format.pp_open_box ff 2 ; + Format.pp_print_string ff "Warning: " ; + Format.kfprintf + (fun ff -> + Format.pp_close_box ff () ; + Format.pp_force_newline ff () ) + ff fmt + + +let raisef exn fmt = + let bt = Caml.Printexc.get_raw_backtrace () in + let ff = Format.str_formatter in + Format.pp_open_box ff 2 ; + Format.kfprintf + (fun ff () -> + Format.pp_close_box ff () ; + let msg = Format.flush_str_formatter () in + let exn = exn msg in + Caml.Printexc.raise_with_backtrace exn bt ) + ff fmt + + +exception Unimplemented of string + +let todo fmt = raisef (fun msg -> Unimplemented msg) fmt + +let fail fmt = raisef (fun msg -> Failure msg) fmt + +let assertf cnd fmt = + if not cnd then fail fmt + else Format.ikfprintf (fun _ () -> ()) Format.str_formatter fmt + + +let checkf cnd fmt = + if not cnd then fail fmt + else Format.ikfprintf (fun _ () -> true) Format.str_formatter fmt + + +type 'a or_error = ('a, exn * Caml.Printexc.raw_backtrace) Result.t + +let or_error f x () = + try Ok (f x) with exn -> Error (exn, Caml.Printexc.get_raw_backtrace ()) diff --git a/sledge/src/import/import.mli b/sledge/src/import/import.mli new file mode 100644 index 000000000..3761ddf26 --- /dev/null +++ b/sledge/src/import/import.mli @@ -0,0 +1,88 @@ +(* Copyright (c) 2018 - present Facebook, Inc. All rights reserved. + + This source code is licensed under the BSD style license found in the + LICENSE file in the root directory of this source tree. An additional + grant of patent rights can be found in the PATENTS file in the same + directory. *) + +(** Global namespace opened in each source file by the build system *) + +include module type of Option.Monad_infix + +module Z : sig + include module type of Z + + val t_of_sexp : Sexp.t -> t + + val sexp_of_t : t -> Sexp.t +end + +module Vector = Vector + +include module type of Vector.Infix + +val fst3 : 'a * _ * _ -> 'a +(** First projection from a triple. *) + +val snd3 : _ * 'a * _ -> 'a +(** Second projection from a triple. *) + +val trd3 : _ * _ * 'a -> 'a +(** Third projection from a triple. *) + +val ( >> ) : ('a -> 'b) -> ('b -> 'c) -> 'a -> 'c +(** Composition of functions: [(f >> g) x] is exactly equivalent to [g (f + (x))]. Left associative. *) + +val ( $ ) : ('a -> unit) -> ('a -> 'b) -> 'a -> 'b +(** Sequential composition of functions: [(f $ g) x] is exactly equivalent + to [(f x) ; (g x)]. Left associative. *) + +val ( $> ) : 'a -> ('a -> unit) -> 'a +(** Apply and ignore function: [x $> f] is exactly equivalent to [f x ; x]. + Left associative. *) + +val ( <$ ) : ('a -> unit) -> 'a -> 'a +(** Reverse apply and ignore function: [f <$ x] is exactly equivalent to [f + x ; x]. Left associative. *) + +(** Format strings. *) +type ('a, 'b) fmt_str = ('a, Format.formatter, unit, 'b) format4 + +(** Formatting function for argument type. *) +type 'a fmt = Format.formatter -> 'a -> unit + +val option_fmt : + ('a_fmt -> 'a -> unit, unit) fmt_str -> 'a_fmt -> 'a option fmt +(** Format an option. *) + +val list_fmt : (unit, unit) fmt_str -> 'a fmt -> 'a list fmt +(** Format a list. *) + +val vector_fmt : (unit, unit) fmt_str -> 'a fmt -> 'a vector fmt +(** Format a vector. *) + +exception Unimplemented of string + +val warn : ('a, unit) fmt_str -> 'a +(** Issue a warning for a survivable problem. *) + +val todo : ('a, unit -> _) fmt_str -> 'a +(** Raise an [Unimplemented] exception indicating that an input is valid but + not handled by the current implementation. *) + +val assertf : bool -> ('a, unit -> unit) fmt_str -> 'a +(** Raise an [Failure] exception if the bool argument is false, indicating + that the expected condition was not satisfied. *) + +val checkf : bool -> ('a, unit -> bool) fmt_str -> 'a +(** As [assertf] but returns the argument bool. *) + +val fail : ('a, unit -> _) fmt_str -> 'a +(** Raise a [Failure] exception indicating a fatal error not covered by + [assertf], [checkf], or [todo]. *) + +type 'a or_error = ('a, exn * Caml.Printexc.raw_backtrace) Result.t + +val or_error : ('a -> 'b) -> 'a -> unit -> 'b or_error +(** [or_error f x] runs [f x] and converts unhandled exceptions to errors. *) diff --git a/sledge/src/import/jbuild.in b/sledge/src/import/jbuild.in new file mode 100644 index 000000000..bcea0faea --- /dev/null +++ b/sledge/src/import/jbuild.in @@ -0,0 +1,13 @@ +(* -*- tuareg -*- *) + +let deps = [] + +;; Jbuild_plugin.V1.send @@ Format.sprintf " +(library + ((name import) + (public_name llair.import) + %s + (libraries (zarith %s)))) +" +(flags deps) +(libraries deps) diff --git a/sledge/src/import/vector.ml b/sledge/src/import/vector.ml new file mode 100644 index 000000000..2bf9d76b6 --- /dev/null +++ b/sledge/src/import/vector.ml @@ -0,0 +1,77 @@ +(* Copyright (c) 2018 - present Facebook, Inc. All rights reserved. + + This source code is licensed under the BSD style license found in the + LICENSE file in the root directory of this source tree. An additional + grant of patent rights can be found in the PATENTS file in the same + directory. *) + +(** Vector - Immutable view of an array *) + +open Base + +(** = 'a array but covariant since imperative operations hidden *) +type +'a t + +let v (a: 'a array) : 'a t = Caml.Obj.magic a + +let a (v: 'a t) : 'a array = Caml.Obj.magic v + +let compare cmp x y = Array.compare cmp (a x) (a y) + +let t_of_sexp a_of_sexp s = v (Array.t_of_sexp a_of_sexp s) + +let sexp_of_t sexp_of_a x = Array.sexp_of_t sexp_of_a (a x) + +module Infix = struct + type +'a vector = 'a t [@@deriving compare, sexp] +end + +let create ~len x = v (Array.create ~len x) + +let empty = v [||] + +let find x ~f = Array.find (a x) ~f + +let find_exn x ~f = Array.find_exn (a x) ~f + +let fold x ~init ~f = Array.fold (a x) ~init ~f + +let fold_right x ~f ~init = Array.fold_right (a x) ~f ~init + +let for_all x ~f = Array.for_all (a x) ~f + +let for_all2_exn x y ~f = Array.for_all2_exn (a x) (a y) ~f + +external get : 'a t -> int -> 'a = "%array_safe_get" + +let init n ~f = v (Array.init n ~f) + +let is_empty x = Array.is_empty (a x) + +let iter x ~f = Array.iter (a x) ~f + +let rev_iter x ~f = Array.fold_right (a x) ~init:() ~f:(fun e () -> f e) + +let iter2_exn x y ~f = Array.iter2_exn (a x) (a y) ~f + +let iteri x ~f = Array.iteri (a x) ~f + +let length x = Array.length (a x) + +let map x ~f = v (Array.map (a x) ~f) + +let mapi x ~f = v (Array.mapi (a x) ~f) + +let slice x i j = v (Array.slice (a x) i j) + +let of_array = v + +let of_list x = v (Array.of_list x) + +let of_list_rev x = v (Array.of_list_rev x) + +let of_option x = v (Option.to_array x) + +let to_list x = Array.to_list (a x) + +let to_array = a diff --git a/sledge/src/import/vector.mli b/sledge/src/import/vector.mli new file mode 100644 index 000000000..17a16cfc0 --- /dev/null +++ b/sledge/src/import/vector.mli @@ -0,0 +1,180 @@ +(* Copyright (c) 2018 - present Facebook, Inc. All rights reserved. + + This source code is licensed under the BSD style license found in the + LICENSE file in the root directory of this source tree. An additional + grant of patent rights can be found in the PATENTS file in the same + directory. *) + +(** Vector - Immutable view of an array + + Note that vectors and arrays can be interconverted without copying. So + Vector is not a safe immutable data structure, it is only attempts to + make it inconvenient to mutate. *) + +open Base + +type +'a t + +module Infix : sig + type +'a vector = 'a t [@@deriving compare, sexp] +end + +val compare : ('a -> 'a -> int) -> 'a t -> 'a t -> int + +val t_of_sexp : (Sexp.t -> 'a) -> Sexp.t -> 'a t + +val sexp_of_t : ('a -> Sexp.t) -> 'a t -> Sexp.t + +(* val t_of_sexp : (Sexp.t -> 'a) -> Sexp.t -> 'a t *) +(* val sexp_of_t : ('a -> Sexp.t) -> 'a t -> Sexp.t *) +(* val binary_search : *) +(* ('a t, 'a, 'key) Base.Binary_searchable_intf.binary_search *) +(* val binary_search_segmented : *) +(* ('a t, 'a) Binary_searchable_intf.binary_search_segmented *) +(* val mem : 'a t -> 'a -> equal:('a -> 'a -> bool) -> bool *) + +val length : 'a t -> int + +val is_empty : 'a t -> bool + +val iter : 'a t -> f:('a -> unit) -> unit + +val rev_iter : 'a t -> f:('a -> unit) -> unit + +val fold : 'a t -> init:'accum -> f:('accum -> 'a -> 'accum) -> 'accum + +(* val fold_result : *) +(* 'a t -> init:'accum -> f:('accum -> 'a -> ('accum, 'e) result) *) +(* -> ('accum, 'e) result *) +(* val fold_until : *) +(* 'a t -> init:'accum *) +(* -> f:('accum -> 'a -> ('accum, 'stop) Container_intf.Continue_or_stop.t) *) +(* -> ('accum, 'stop) Container_intf.Finished_or_stopped_early.t *) +(* val exists : 'a t -> f:('a -> bool) -> bool *) + +val for_all : 'a t -> f:('a -> bool) -> bool + +(* val count : 'a t -> f:('a -> bool) -> int *) +(* val sum : *) +(* (module Base__.Commutative_group.S with type t = 'sum) -> *) +(* 'a t -> f:('a -> 'sum) -> 'sum *) + +val find : 'a t -> f:('a -> bool) -> 'a option + +(* val find_map : 'a t -> f:('a -> 'b option) -> 'b option *) + +val to_list : 'a t -> 'a list + +val to_array : 'a t -> 'a array +(** [to_array v] is an array that shares its representation with vector [v], + therefore mutating [to_array v] changes [v] (as well as all the shallow + copies of [v] that are likely to exist). The intended use for [to_array] + is e.g. to pattern match on a vector, or to interface with some existing + array code that is known to not mutate its arguments. *) + +(* val min_elt : 'a t -> cmp:('a -> 'a -> int) -> 'a option *) +(* val max_elt : 'a t -> cmp:('a -> 'a -> int) -> 'a option *) +(* val invariant : 'a Base__Invariant_intf.t -> 'a t Base__Invariant_intf.t *) +(* val max_length : int *) + +external get : 'a t -> int -> 'a = "%array_safe_get" + +(* external unsafe_get : 'a t -> int -> 'a = "%array_unsafe_get" *) + +val create : len:int -> 'a -> 'a t + +val init : int -> f:(int -> 'a) -> 'a t + +(* val make_matrix : dimx:int -> dimy:int -> 'a -> 'a t t *) +(* val append : 'a t -> 'a t -> 'a t *) +(* val concat : 'a t list -> 'a t *) +(* val copy : 'a t -> 'a t *) + +val of_array : 'a array -> 'a t +(** [of_array a] is a vector that shares its representation with array [a], + therefore mutating [a] changes [of_array a]. The intended use for + [of_array] is for converting an array to a vector when the array will + not be used after conversion, or with care for multi-step initialization + of a vector. *) + +val of_list : 'a list -> 'a t + +val of_option : 'a option -> 'a t + +val map : 'a t -> f:('a -> 'b) -> 'b t + +(* val folding_map : 'a t -> init:'b -> f:('b -> 'a -> 'b * 'c) -> 'c t *) +(* val folding_mapi : 'a t -> init:'b -> f:(int -> 'b -> 'a -> 'b * 'c) -> + 'c t *) +(* val fold_map : 'a t -> init:'b -> f:('b -> 'a -> 'b * 'c) -> 'b * 'c t *) +(* val fold_mapi : *) +(* 'a t -> init:'b -> f:(int -> 'b -> 'a -> 'b * 'c) -> 'b * 'c t *) + +val iteri : 'a t -> f:(int -> 'a -> unit) -> unit + +val mapi : 'a t -> f:(int -> 'a -> 'b) -> 'b t + +(* val foldi : 'a t -> init:'b -> f:(int -> 'b -> 'a -> 'b) -> 'b *) + +val fold_right : 'a t -> f:('a -> 'b -> 'b) -> init:'b -> 'b + +(* val sort : 'a t -> cmp:('a -> 'a -> int) -> 'a t *) +(* val stable_sort : 'a t -> cmp:('a -> 'a -> int) -> 'a t *) +(* val is_sorted : 'a t -> cmp:('a -> 'a -> int) -> bool *) +(* val is_sorted_strictly : 'a t -> cmp:('a -> 'a -> int) -> bool *) +(* val concat_map : 'a t -> f:('a -> 'b t) -> 'b t *) +(* val concat_mapi : 'a t -> f:(int -> 'a -> 'b t) -> 'b t *) +(* val partition_tf : 'a t -> f:('a -> bool) -> 'a t * 'a t *) +(* val partitioni_tf : 'a t -> f:(int -> 'a -> bool) -> 'a t * 'a t *) +(* val cartesian_product : 'a t -> 'b t -> ('a * 'b) t *) +(* val transpose : 'a t t -> 'a t t option *) +(* val transpose_exn : 'a t t -> 'a t t *) +(* val normalize : 'a t -> int -> int *) + +val slice : 'a t -> int -> int -> 'a t + +(* val nget : 'a t -> int -> 'a *) +(* val filter_opt : 'a option t -> 'a t *) +(* val filter_map : 'a t -> f:('a -> 'b option) -> 'b t *) +(* val filter_mapi : 'a t -> f:(int -> 'a -> 'b option) -> 'b t *) +(* val for_alli : 'a t -> f:(int -> 'a -> bool) -> bool *) +(* val existsi : 'a t -> f:(int -> 'a -> bool) -> bool *) +(* val counti : 'a t -> f:(int -> 'a -> bool) -> int *) + +val iter2_exn : 'a t -> 'b t -> f:('a -> 'b -> unit) -> unit + +(* val map2_exn : 'a t -> 'b t -> f:('a -> 'b -> 'c) -> 'c t *) +(* val fold2_exn : 'a t -> 'b t -> init:'c -> f:('c -> 'a -> 'b -> 'c) -> 'c *) + +val for_all2_exn : 'a t -> 'b t -> f:('a -> 'b -> bool) -> bool + +(* val exists2_exn : 'a t -> 'b t -> f:('a -> 'b -> bool) -> bool *) +(* val filter : 'a t -> f:('a -> bool) -> 'a t *) +(* val filteri : 'a t -> f:(int -> 'a -> bool) -> 'a t *) + +val of_list_rev : 'a list -> 'a t + +(* val of_list_map : 'a list -> f:('a -> 'b) -> 'b t *) +(* val of_list_rev_map : 'a list -> f:('a -> 'b) -> 'b t *) + +val find_exn : 'a t -> f:('a -> bool) -> 'a + +(* val find_map_exn : 'a t -> f:('a -> 'b option) -> 'b *) +(* val findi : 'a t -> f:(int -> 'a -> bool) -> (int * 'a) option *) +(* val findi_exn : 'a t -> f:(int -> 'a -> bool) -> int * 'a *) +(* val find_mapi : 'a t -> f:(int -> 'a -> 'b option) -> 'b option *) +(* val find_mapi_exn : 'a t -> f:(int -> 'a -> 'b option) -> 'b *) +(* val find_consecutive_duplicate : *) +(* 'a t -> equal:('a -> 'a -> bool) -> ('a * 'a) option *) +(* val reduce : 'a t -> f:('a -> 'a -> 'a) -> 'a option *) +(* val reduce_exn : 'a t -> f:('a -> 'a -> 'a) -> 'a *) +(* val random_element : ?random_state:Random.State.t -> 'a t -> 'a option *) +(* val random_element_exn : ?random_state:Random.State.t -> 'a t -> 'a *) +(* val zip : 'a t -> 'b t -> ('a * 'b) t option *) +(* val zip_exn : 'a t -> 'b t -> ('a * 'b) t *) +(* val unzip : ('a * 'b) t -> 'a t * 'b t *) +(* val last : 'a t -> 'a *) + +val empty : 'a t +(* val equal : 'a t -> 'a t -> equal:('a -> 'a -> bool) -> bool *) +(* val to_sequence : 'a t -> 'a Sequence.t *) diff --git a/sledge/src/jbuild.in b/sledge/src/jbuild.in new file mode 100644 index 000000000..21f0e460b --- /dev/null +++ b/sledge/src/jbuild.in @@ -0,0 +1,12 @@ +(* -*- tuareg -*- *) + +let deps = ["import"; "trace"; "llair_"] + +;; Jbuild_plugin.V1.send @@ Format.sprintf " +(executable + ((name sledge) + %s + (libraries (cmdliner %s)))) +" +(flags deps) +(libraries deps) diff --git a/sledge/src/llair/exp.ml b/sledge/src/llair/exp.ml new file mode 100644 index 000000000..db1de2003 --- /dev/null +++ b/sledge/src/llair/exp.ml @@ -0,0 +1,482 @@ +(* Copyright (c) 2018 - present Facebook, Inc. All rights reserved. + + This source code is licensed under the BSD style license found in the + LICENSE file in the root directory of this source tree. An additional + grant of patent rights can be found in the PATENTS file in the same + directory. *) + +(** Expressions *) + +type t = + | Var of {name: string; typ: Typ.t; loc: Loc.t} + | Global of {name: string; init: t option; typ: Typ.t; loc: Loc.t} + | Nondet of {typ: Typ.t; loc: Loc.t; msg: string} + | Label of {parent: string; name: string; loc: Loc.t} + | Null of {typ: Typ.t} + | App of {op: t; arg: t; loc: Loc.t} + | AppN of {op: t; args: t vector; loc: Loc.t} + (* NOTE: may be cyclic *) + | PtrFld of {fld: int} + | PtrIdx + | PrjFld of {fld: int} + | PrjIdx + | UpdFld of {fld: int} + | UpdIdx + | Integer of {data: Z.t; typ: Typ.t} + | Float of {data: string; typ: Typ.t} + | Array of {typ: Typ.t} + | Struct of {typ: Typ.t} + | Cast of {typ: Typ.t} + | Conv of {signed: bool; typ: Typ.t} + | Select + (* binary: comparison *) + | Eq + | Ne + | Gt + | Ge + | Lt + | Le + | Ugt + | Uge + | Ult + | Ule + | Ord + | Uno + (* binary: boolean / bitwise *) + | And + | Or + | Xor + | Shl + | LShr + | AShr + (* binary: arithmetic *) + | Add + | Sub + | Mul + | Div + | UDiv + | Rem + | URem +[@@deriving compare, sexp] + +let equal = [%compare.equal : t] + +let uncurry exp = + let rec uncurry_ args op = + match op with + | App {op; arg} -> uncurry_ (arg :: args) op + | AppN {op; args} -> (op, Vector.to_list args) + | _ -> (op, args) + in + uncurry_ [] exp + + +let rec fmt ff exp = + let pf fmt = + Format.pp_open_box ff 2 ; + Format.kfprintf (fun ff -> Format.pp_close_box ff ()) ff fmt + in + match[@warning "p"] uncurry exp with + | Var {name}, [] -> pf "%%%s" name + | Global {name}, [] -> + pf "@%s%t" name (fun ff -> + let demangled = Llvm.demangle name in + if not (String.is_empty demangled || String.equal name demangled) + then Format.fprintf ff "“%s”" demangled ) + | Nondet {msg}, [] -> pf "nondet \"%s\"" msg + | Label {name}, [] -> pf "%s" name + | Null _, [] -> pf "null" + | Integer {data}, [] -> pf "%a" Z.pp_print data + | Float {data}, [] -> pf "%s" data + | PtrFld {fld}, [ptr] -> pf "%a ⊕ %i" fmt ptr fld + | PtrIdx, [arr; idx] -> pf "%a ⊕ %a" fmt arr fmt idx + | PrjFld {fld}, [ptr] -> pf "%a[%i]" fmt ptr fld + | PrjIdx, [arr; idx] -> pf "%a[%a]" fmt arr fmt idx + | UpdFld {fld}, [agg; elt] -> + pf "{%a@ @[| %i → %a@]}" fmt agg fld fmt elt + | UpdIdx, [agg; elt; idx] -> pf "[%a | %a → %a]" fmt agg fmt idx fmt elt + | Array _, elts -> pf "[%a]" (list_fmt ",@ " fmt) elts + | Struct _, elts -> pf "{%a}" (list_fmt ",@ " fmt) elts + | Cast {typ}, [arg] -> pf "(@[(%a)@ %a@])" Typ.fmt typ fmt arg + | Conv {typ}, [arg] -> pf "(@[%c%a>@ %a@])" '<' Typ.fmt typ fmt arg + | Select, [cnd; thn; els] -> pf "(%a@ ? %a@ : %a)" fmt cnd fmt thn fmt els + | Eq, [x; y] -> pf "(%a@ = %a)" fmt x fmt y + | Ne, [x; y] -> pf "(%a@ != %a)" fmt x fmt y + | Gt, [x; y] -> pf "(%a@ > %a)" fmt x fmt y + | Ge, [x; y] -> pf "(%a@ >= %a)" fmt x fmt y + | Lt, [x; y] -> pf "(%a@ < %a)" fmt x fmt y + | Le, [x; y] -> pf "(%a@ <= %a)" fmt x fmt y + | Ugt, [x; y] -> pf "(%a@ u> %a)" fmt x fmt y + | Uge, [x; y] -> pf "(%a@ u>= %a)" fmt x fmt y + | Ult, [x; y] -> pf "(%a@ u< %a)" fmt x fmt y + | Ule, [x; y] -> pf "(%a@ u<= %a)" fmt x fmt y + | Ord, [x; y] -> pf "(%a@ ord %a)" fmt x fmt y + | Uno, [x; y] -> pf "(%a@ uno %a)" fmt x fmt y + | And, [x; y] -> pf "(%a@ && %a)" fmt x fmt y + | Or, [x; y] -> pf "(%a@ || %a)" fmt x fmt y + | Xor, [x; y] -> pf "(%a@ ^ %a)" fmt x fmt y + | Shl, [x; y] -> pf "(%a@ << %a)" fmt x fmt y + | LShr, [x; y] -> pf "(%a@ >> %a)" fmt x fmt y + | AShr, [x; y] -> pf "(%a@ >>a %a)" fmt x fmt y + | Add, [x; y] -> pf "(%a@ + %a)" fmt x fmt y + | Sub, [x; y] -> pf "(%a@ - %a)" fmt x fmt y + | Mul, [x; y] -> pf "(%a@ * %a)" fmt x fmt y + | Div, [x; y] -> pf "(%a@ / %a)" fmt x fmt y + | UDiv, [x; y] -> pf "(%a@ u/ %a)" fmt x fmt y + | Rem, [x; y] -> pf "(%a@ %% %a)" fmt x fmt y + | URem, [x; y] -> pf "(%a@ u%% %a)" fmt x fmt y + + +(** Queries *) + +let rec typ_of : t -> Typ.t = function[@warning "p"] + | Var {typ} + |Global {typ} + |Nondet {typ} + |Null {typ} + |Integer {typ} + |Float {typ} + |Array {typ} + |Struct {typ} + |App {op= Cast {typ} | Conv {typ}} -> + typ + | Label _ -> Typ.i8p + | App {op= PtrFld {fld}; arg} -> ( + match[@warning "p"] typ_of arg + with Pointer {elt= Tuple {elts} | Struct {elts}} -> + Typ.mkPointer ~elt:(Vector.get elts fld) ) + | App {op= App {op= PtrIdx; arg}} -> ( + match[@warning "p"] typ_of arg with Pointer {elt= Array {elt}} -> + Typ.mkPointer ~elt ) + | App {op= PrjFld {fld}; arg} -> ( + match[@warning "p"] typ_of arg with + | Tuple {elts} | Struct {elts} -> Vector.get elts fld ) + | App {op= App {op= PrjIdx; arg}} -> ( + match[@warning "p"] typ_of arg with Array {elt} -> elt ) + | App {op= App {op= UpdFld _; arg}} + |App {op= App {op= App {op= UpdIdx; arg}}} -> + typ_of arg + | App + { op= + App + { op= + ( Eq | Ne | Gt | Ge | Lt | Le | Ugt | Uge | Ult | Ule | Ord + | Uno ) } } -> + Typ.i1 + | App + { op= + App + { op= + ( And | Or | Xor | Shl | LShr | AShr | Add | Sub | Mul | Div + | UDiv | Rem | URem ) + ; arg } } + |App {op= App {op= App {op= Select}}; arg} -> + typ_of arg + | AppN {op} -> typ_of op + + +let valid_fld fld elts = 0 <= fld && fld < Vector.length elts + +(** Re-exported modules *) + +(* Variables are the expressions constructed by [Var] *) +module Var = struct + module T = struct + type nonrec t = t [@@deriving compare, sexp] + + let equal = equal + end + + include T + include Comparator.Make (T) + + let fmt = fmt + + let mk ?(loc= Loc.none) name typ = Var {name; typ; loc} + + let name = function[@warning "p"] Var {name} -> name + + let typ = function[@warning "p"] Var {typ} -> typ + + let loc = function[@warning "p"] Var {loc} -> loc +end + +(* Globals are the expressions constructed by [Global] *) +module Global = struct + type init = t + + module T = struct + type nonrec t = t [@@deriving compare, sexp] + + let equal = equal + + let hash = Hashtbl.hash + end + + include T + include Comparator.Make (T) + + let fmt_defn ff g = + let[@warning "p"] Global {init; typ} = g in + let[@warning "p"] Typ.Pointer {elt= typ} = typ in + Format.fprintf ff "@[<2>%a %a%a@]" Typ.fmt typ fmt g + (option_fmt " =@ @[%a@]" fmt) + init + + + let fmt = fmt + + let mk ?init ?(loc= Loc.none) name typ = + assert ( + Option.for_all init ~f:(fun exp -> + Typ.equal typ (Typ.mkPointer ~elt:(typ_of exp)) ) ) ; + Global {name; init; typ; loc} + + + let of_exp e = match e with Global _ -> Some e | _ -> None + + let name = function[@warning "p"] Global {name} -> name + + let typ = function[@warning "p"] Global {typ} -> typ + + let loc = function[@warning "p"] Global {loc} -> loc +end + +(** Constructors *) + +let locate loc exp = + match exp with + | Var {name; typ} -> Var {name; typ; loc} + | Global {name; init; typ} -> Global {name; init; typ; loc} + | Nondet {typ; msg} -> Nondet {typ; loc; msg} + | Label {parent; name} -> Label {parent; name; loc} + | App {op; arg} -> App {op; arg; loc} + | AppN {op; args} -> AppN {op; args; loc} + | _ -> exp + + +let mkApp1 op arg = App {op; arg; loc= Loc.none} + +let mkApp2 op x y = mkApp1 (mkApp1 op x) y + +let mkApp3 op x y z = mkApp1 (mkApp1 (mkApp1 op x) y) z + +let mkAppN op args = AppN {op; args; loc= Loc.none} + +let mkVar = Fn.id + +let mkGlobal = Fn.id + +let mkNondet (typ: Typ.t) msg = + assert (match typ with Function _ -> false | _ -> true) ; + Nondet {typ; loc= Loc.none; msg} + + +let mkLabel ~parent ~name = Label {parent; name; loc= Loc.none} + +let mkNull (typ: Typ.t) = + assert (match typ with Opaque _ | Function _ -> false | _ -> true) ; + Null {typ} + + +let mkPtrFld ~ptr ~fld = + assert ( + match typ_of ptr with + | Pointer {elt= Tuple {elts} | Struct {elts}} -> valid_fld fld elts + | _ -> false ) ; + mkApp1 (PtrFld {fld}) ptr + + +let mkPtrIdx ~ptr ~idx = + assert ( + match (typ_of ptr, typ_of idx) with + | Pointer {elt= Array _}, Integer _ -> true + | _ -> false ) ; + mkApp2 PtrIdx ptr idx + + +let mkPrjFld ~agg ~fld = + assert ( + match typ_of agg with + | Tuple {elts} | Struct {elts} -> valid_fld fld elts + | _ -> false ) ; + mkApp1 (PrjFld {fld}) agg + + +let mkPrjIdx ~arr ~idx = + assert ( + match (typ_of arr, typ_of idx) with + | Array _, Integer _ -> true + | _ -> false ) ; + mkApp2 PrjIdx arr idx + + +let mkUpdFld ~agg ~elt ~fld = + assert ( + match typ_of agg with + | Tuple {elts} | Struct {elts} -> + valid_fld fld elts && Typ.equal (Vector.get elts fld) (typ_of elt) + | _ -> false ) ; + mkApp2 (UpdFld {fld}) agg elt + + +let mkUpdIdx ~arr ~elt ~idx = + assert ( + match (typ_of arr, typ_of idx) with + | Array {elt= typ}, Integer _ -> Typ.equal typ (typ_of elt) + | _ -> false ) ; + mkApp3 UpdIdx arr elt idx + + +let mkInteger data (typ: Typ.t) = + assert ( + let in_range num bits = + let lb = Z.(-(if bits = 1 then ~$1 else ~$1 lsl Int.(bits - 1))) + and ub = Z.(~$1 lsl bits) in + Z.(leq lb num && lt num ub) + in + match typ with Integer {bits} -> in_range data bits | _ -> false ) ; + Integer {data; typ} + + +let mkBool b = mkInteger (Z.of_int (Bool.to_int b)) Typ.i1 + +let mkFloat data (typ: Typ.t) = + assert (match typ with Float _ -> true | _ -> false) ; + Float {data; typ} + + +let mkArray elts (typ: Typ.t) = + assert ( + match typ with + | Array {elt= elt_typ; len} -> + Vector.for_all elts ~f:(fun elt -> Typ.equal (typ_of elt) elt_typ) + && Vector.length elts = len + | _ -> false ) ; + mkAppN (Array {typ}) elts + + +let mkStruct elts (typ: Typ.t) = + assert ( + match typ with + | Tuple {elts= elt_typs} | Struct {elts= elt_typs} -> + Vector.for_all2_exn elts elt_typs ~f:(fun elt elt_typ -> + Typ.equal (typ_of elt) elt_typ ) + | _ -> false ) ; + mkAppN (Struct {typ}) elts + + +let mkStruct_rec key = + let memo_id = Hashtbl.create key () in + let dummy = Null {typ= Typ.mkBytes} in + let mkStruct_ ~id elt_thks typ = + match Hashtbl.find memo_id id with + | None -> + (* Add placeholder to prevent computing [elts] in calls to + [mkStruct_rec] from [elts] for recursive occurrences of [id]. *) + let elta = Array.create ~len:(Vector.length elt_thks) dummy in + let elts = Vector.of_array elta in + Hashtbl.set memo_id ~key:id ~data:elts ; + Vector.iteri elt_thks ~f:(fun i elt_thk -> + elta.(i) <- Lazy.force elt_thk ) ; + mkStruct elts typ + | Some elts -> + (* Do not call [mkStruct] as types will be checked by the call to + [mkStruct] above after the thunks are forced, before which + type-checking may spuriously fail. Note that it is important that + the value constructed here shares the array in the memo table, so + that the update after forcing the recursive thunks also updates + this value. *) + mkAppN (Struct {typ}) elts + in + Staged.stage mkStruct_ + + +let mkCast exp typ = + assert (Typ.compatible (typ_of exp) typ) ; + mkApp1 (Cast {typ}) exp + + +let mkConv exp ?(signed= false) typ = + assert (Typ.compatible (typ_of exp) typ) ; + mkApp1 (Conv {signed; typ}) exp + + +let mkSelect ~cnd ~thn ~els = + assert ( + match (typ_of cnd, typ_of thn, typ_of els) with + | Integer {bits= 1}, s, t -> Typ.equal s t + | Array {elt= Integer {bits= 1}; len= m}, (Array {len= n} as s), t -> + m = n && Typ.equal s t + | _ -> false ) ; + mkApp3 Select cnd thn els + + +let binop op x y = + assertf + (let typ = typ_of x in + match (op, typ) with + | ( (Eq | Ne | Gt | Ge | Lt | Le | Ugt | Uge | Ult | Ule) + , (Integer _ | Float _ | Pointer _) ) + |(Add | Sub | Mul | Div | Rem), (Integer _ | Float _) + |(And | Or | Xor | Shl | LShr | AShr | UDiv | URem), Integer _ + |(Ord | Uno), Float _ -> + Typ.equal typ (typ_of y) + | _ -> false) + "ill-typed: %a" fmt (mkApp2 op x y) () ; + mkApp2 op x y + + +let mkEq = binop Eq + +let mkNe = binop Ne + +let mkGt = binop Gt + +let mkGe = binop Ge + +let mkLt = binop Lt + +let mkLe = binop Le + +let mkUgt = binop Ugt + +let mkUge = binop Uge + +let mkUlt = binop Ult + +let mkUle = binop Ule + +let mkAnd = binop And + +let mkOr = binop Or + +let mkXor = binop Xor + +let mkShl = binop Shl + +let mkLShr = binop LShr + +let mkAShr = binop AShr + +let mkAdd = binop Add + +let mkSub = binop Sub + +let mkMul = binop Mul + +let mkDiv = binop Div + +let mkUDiv = binop UDiv + +let mkRem = binop Rem + +let mkURem = binop URem + +let mkOrd = binop Ord + +let mkUno = binop Uno + +(** Queries *) + +let typ = typ_of diff --git a/sledge/src/llair/exp.mli b/sledge/src/llair/exp.mli new file mode 100644 index 000000000..003d302b2 --- /dev/null +++ b/sledge/src/llair/exp.mli @@ -0,0 +1,255 @@ +(* Copyright (c) 2018 - present Facebook, Inc. All rights reserved. + + This source code is licensed under the BSD style license found in the + LICENSE file in the root directory of this source tree. An additional + grant of patent rights can be found in the PATENTS file in the same + directory. *) + +(** Expressions + + Pure (heap-independent) expressions are complex arithmetic, + bitwise-logical, etc. operations over literal values and registers. + + Expressions are represented in curried form, where the only recursive + constructor is [App], which is an application of a function symbol to an + argument. This is done to simplify the definition of 'subexpression' and + make it explicit, which is a significant help for treating equality + between expressions using congruence closure. The specific constructor + functions indicate and check the expected arity and types of the + function symbols. *) + +type t = private + | Var of {name: string; typ: Typ.t; loc: Loc.t} + (** Local variable / virtual register *) + | Global of {name: string; init: t option; typ: Typ.t; loc: Loc.t} + (** Global variable, with initalizer *) + | Nondet of {typ: Typ.t; loc: Loc.t; msg: string} + (** Anonymous local variable with arbitrary value, representing + non-deterministic approximation of value described by [msg]. *) + | Label of {parent: string; name: string; loc: Loc.t} + (** Address of named code block within parent function *) + | Null of {typ: Typ.t} + (** Pointer value that never refers to an object *) + | App of {op: t; arg: t; loc: Loc.t} + (** Application of function symbol to argument, curried *) + | AppN of {op: t; args: t vector; loc: Loc.t} + (** Application of function symbol to arguments. NOTE: may be cyclic + when [op] is [Struct]. *) + | PtrFld of {fld: int} (** Pointer to a field of a struct *) + | PtrIdx (** Pointer to an index of an array *) + | PrjFld of {fld: int} (** Project a field from a constant struct *) + | PrjIdx (** Project an index from a constant array *) + | UpdFld of {fld: int} (** Constant struct with updated field *) + | UpdIdx (** Constant array with updated index *) + | Integer of {data: Z.t; typ: Typ.t} (** Integer constant *) + | Float of {data: string; typ: Typ.t} (** Floating-point constant *) + | Array of {typ: Typ.t} (** Array constant *) + | Struct of {typ: Typ.t} (** Struct constant *) + | Cast of {typ: Typ.t} (** Cast to specified type, invertible *) + | Conv of {signed: bool; typ: Typ.t} + (** Convert to specified type, possibly with loss of information *) + | Select (** Conditional *) + (* binary: comparison *) + | Eq (** Equal test *) + | Ne (** Not-equal test *) + | Gt (** Greater-than test *) + | Ge (** Greater-than-or-equal test *) + | Lt (** Less-than test *) + | Le (** Less-than-or-equal test *) + | Ugt (** Unordered or greater-than test *) + | Uge (** Unordered or greater-than-or-equal test *) + | Ult (** Unordered or less-than test *) + | Ule (** Unordered or less-than-or-equal test *) + | Ord (** Ordered test (neither arg is nan) *) + | Uno (** Unordered test (some arg is nan) *) + (* binary: boolean / bitwise *) + | And (** Conjunction *) + | Or (** Disjunction *) + | Xor (** Exclusive-or / Boolean disequality *) + | Shl (** Shift left *) + | LShr (** Logical shift right *) + | AShr (** Arithmetic shift right *) + (* binary: arithmetic *) + | Add (** Addition *) + | Sub (** Subtraction *) + | Mul (** Multiplication *) + | Div (** Division *) + | UDiv (** Unsigned division *) + | Rem (** Remainder of division *) + | URem (** Remainder of unsigned division *) + +val compare : t -> t -> int + +val equal : t -> t -> bool + +val t_of_sexp : Sexp.t -> t + +val sexp_of_t : t -> Sexp.t + +val fmt : t fmt + +(** Re-exported modules *) + +module Var : sig + type nonrec t = private t + + include Comparator.S with type t := t + + val compare : t -> t -> int + + val equal : t -> t -> bool + + val t_of_sexp : Sexp.t -> t + + val sexp_of_t : t -> Sexp.t + + val fmt : t fmt + + val mk : ?loc:Loc.t -> string -> Typ.t -> t + + val name : t -> string + + val typ : t -> Typ.t + + val loc : t -> Loc.t +end + +module Global : sig + type init = t + + type nonrec t = private t + + include Comparator.S with type t := t + + val compare : t -> t -> int + + val equal : t -> t -> bool + + val t_of_sexp : Sexp.t -> t + + val sexp_of_t : t -> Sexp.t + + val hash : t -> int + + val fmt : t fmt + + val fmt_defn : t fmt + + val mk : ?init:init -> ?loc:Loc.t -> string -> Typ.t -> t + + val of_exp : init -> t option + + val name : t -> string + + val typ : t -> Typ.t + + val loc : t -> Loc.t +end + +(** Constructors *) + +val mkVar : Var.t -> t + +val mkGlobal : Global.t -> t + +val mkNondet : Typ.t -> string -> t + +val mkLabel : parent:string -> name:string -> t + +val mkNull : Typ.t -> t + +val mkPtrFld : ptr:t -> fld:int -> t + +val mkPtrIdx : ptr:t -> idx:t -> t + +val mkPrjFld : agg:t -> fld:int -> t + +val mkPrjIdx : arr:t -> idx:t -> t + +val mkUpdFld : agg:t -> elt:t -> fld:int -> t + +val mkUpdIdx : arr:t -> elt:t -> idx:t -> t + +val mkBool : bool -> t + +val mkInteger : Z.t -> Typ.t -> t + +val mkFloat : string -> Typ.t -> t + +val mkArray : t vector -> Typ.t -> t + +val mkStruct : t vector -> Typ.t -> t + +val mkStruct_rec : + (module Hashtbl.Key_plain with type t = 'id) + -> (id:'id -> t lazy_t vector -> Typ.t -> t) Staged.t +(** [mkStruct_rec Id id element_thunks typ] constructs a possibly-cyclic + [Struct] value. Cycles are detected using [Id]. The caller of + [mkStruct_rec Id] must ensure that a single unstaging of [mkStruct_rec + Id] is used for each complete cyclic value. Also, the caller must ensure + that recursive calls to [mkStruct_rec Id] provide [id] values that + uniquely identify at least one point on each cycle. Failure to obey + these requirements will lead to stack overflow. *) + +val mkCast : t -> Typ.t -> t + +val mkConv : t -> ?signed:bool -> Typ.t -> t + +val mkSelect : cnd:t -> thn:t -> els:t -> t + +val mkEq : t -> t -> t + +val mkNe : t -> t -> t + +val mkGt : t -> t -> t + +val mkGe : t -> t -> t + +val mkLt : t -> t -> t + +val mkLe : t -> t -> t + +val mkUgt : t -> t -> t + +val mkUge : t -> t -> t + +val mkUlt : t -> t -> t + +val mkUle : t -> t -> t + +val mkOrd : t -> t -> t + +val mkUno : t -> t -> t + +val mkAnd : t -> t -> t + +val mkOr : t -> t -> t + +val mkXor : t -> t -> t + +val mkShl : t -> t -> t + +val mkLShr : t -> t -> t + +val mkAShr : t -> t -> t + +val mkAdd : t -> t -> t + +val mkSub : t -> t -> t + +val mkMul : t -> t -> t + +val mkDiv : t -> t -> t + +val mkUDiv : t -> t -> t + +val mkRem : t -> t -> t + +val mkURem : t -> t -> t + +val locate : Loc.t -> t -> t +(** Update the debug location *) + +(** Queries *) + +val typ : t -> Typ.t diff --git a/sledge/src/llair/frontend.ml b/sledge/src/llair/frontend.ml new file mode 100644 index 000000000..adaa72add --- /dev/null +++ b/sledge/src/llair/frontend.ml @@ -0,0 +1,1334 @@ +(* Copyright (c) 2018 - present Facebook, Inc. All rights reserved. + + This source code is licensed under the BSD style license found in the + LICENSE file in the root directory of this source tree. An additional + grant of patent rights can be found in the PATENTS file in the same + directory. *) + +(** Translate LLVM to LLAIR *) + +let fmt_lltype ff t = Format.pp_print_string ff (Llvm.string_of_lltype t) + +let fmt_llvalue ff t = Format.pp_print_string ff (Llvm.string_of_llvalue t) + +let fmt_llblock ff t = + Format.pp_print_string ff (Llvm.string_of_llvalue (Llvm.value_of_block t)) + + +(* gather debug locations *) +let (scan_locs: Llvm.llmodule -> unit), (find_loc: Llvm.llvalue -> Loc.t) = + let loc_of_global g = + Loc.mk + ?dir:(Llvm.get_global_debug_loc_directory g) + ?file:(Llvm.get_global_debug_loc_filename g) + ~line:(Llvm.get_global_debug_loc_line g) + ?col:None + in + let loc_of_function f = + Loc.mk + ?dir:(Llvm.get_function_debug_loc_directory f) + ?file:(Llvm.get_function_debug_loc_filename f) + ~line:(Llvm.get_function_debug_loc_line f) + ?col:None + in + let loc_of_instr i = + Loc.mk + ?dir:(Llvm.get_debug_loc_directory i) + ?file:(Llvm.get_debug_loc_filename i) + ~line:(Llvm.get_debug_loc_line i) + ~col:(Llvm.get_debug_loc_column i) + in + let loc_tbl = Hashtbl.Poly.create () in + let add ~key ~data = + Hashtbl.update loc_tbl key ~f:(fun prev -> + Option.iter prev ~f:(fun loc -> + if Option.is_none + (List.find_a_dup ~compare:Loc.compare [loc; data; Loc.none]) + then + warn "ignoring location %a conflicting with %a for %a" Loc.fmt + loc Loc.fmt data fmt_llvalue key ) ; + data ) + in + let scan_locs m = + let scan_instr i = + let loc = loc_of_instr i in + add ~key:i ~data:loc ; + match Llvm.instr_opcode i with + | Call -> ( + match Llvm.(value_name (operand i (num_arg_operands i))) with + | "llvm.dbg.declare" -> ( + match Llvm.(get_mdnode_operands (operand i 0)) with + | [|var|] when not (String.is_empty (Llvm.value_name var)) -> + add ~key:var ~data:loc + | _ -> + warn "could not find variable for debug info %a at %a" + fmt_llvalue (Llvm.operand i 0) Loc.fmt loc ) + | _ -> () ) + | _ -> () + in + let scan_block b = Llvm.iter_instrs scan_instr b in + let scan_function f = + add ~key:f ~data:(loc_of_function f) ; + Llvm.iter_blocks scan_block f + in + let scan_global g = add ~key:g ~data:(loc_of_global g) in + Llvm.iter_globals scan_global m ; + Llvm.iter_functions scan_function m + in + let find_loc v = + Option.value (Hashtbl.find loc_tbl v) ~default:Loc.none + in + (scan_locs, find_loc) + + +let (scan_names: Llvm.llmodule -> unit), (find_name: Llvm.llvalue -> string) = + let name_tbl = Hashtbl.Poly.create () in + let scan_name = + let scope_tbl = Hashtbl.Poly.create () in + fun llv -> + let next, void_tbl = + let scope = + match Llvm.classify_value llv with + | Argument -> `Fun (Llvm.param_parent llv) + | BasicBlock -> `Fun (Llvm.block_parent (Llvm.block_of_value llv)) + | Function | GlobalAlias | GlobalIFunc | GlobalVariable -> + `Mod (Llvm.global_parent llv) + | Instruction _ -> + `Fun (Llvm.block_parent (Llvm.instr_parent llv)) + | _ -> fail "scan_name: %a" fmt_llvalue llv () + in + Hashtbl.find_or_add scope_tbl scope ~default:(fun () -> + (ref 0, Hashtbl.Poly.create ()) ) + in + let name = + match Llvm.classify_type (Llvm.type_of llv) with + | Void -> ( + let fname = + match Llvm.classify_value llv with + | Instruction (Call | Invoke) -> ( + match + Llvm.value_name + (Llvm.operand llv (Llvm.num_operands llv - 1)) + with + | "" -> Int.to_string (!next - 1) + | s -> s ) + | _ -> "void" + in + match Hashtbl.find void_tbl fname with + | None -> + Hashtbl.set void_tbl ~key:fname ~data:1 ; + Format.sprintf "%s.void" fname + | Some count -> + Hashtbl.set void_tbl ~key:fname ~data:(count + 1) ; + Format.sprintf "%s.void.%i" fname count ) + | _ -> + match Llvm.value_name llv with + | "" -> + (* anonymous values take the next SSA name *) + let name = !next in + next := name + 1 ; + Int.to_string name + | name -> + match Int.of_string name with + | _ -> + (* escape to avoid clash with names of anonymous values *) + Format.sprintf "\"%s\"" name + | exception _ -> name + in + Hashtbl.add_exn name_tbl ~key:llv ~data:name + in + let scan_names m = + let scan_global g = scan_name g in + let scan_instr i = scan_name i in + let scan_block b = + scan_name (Llvm.value_of_block b) ; + Llvm.iter_instrs scan_instr b + in + let scan_function f = + scan_name f ; + Llvm.iter_params scan_name f ; + Llvm.iter_blocks scan_block f + in + Llvm.iter_globals scan_global m ; + Llvm.iter_functions scan_function m + in + let find_name v = Hashtbl.find_exn name_tbl v in + (scan_names, find_name) + + +let label_of_block : Llvm.llbasicblock -> string = + fun blk -> find_name (Llvm.value_of_block blk) + + +let anon_struct_name : (Llvm.lltype, string) Hashtbl.t = + Hashtbl.Poly.create () + + +let struct_name : Llvm.lltype -> string = + fun llt -> + match Llvm.struct_name llt with + | Some name -> name + | None -> + Hashtbl.find_or_add anon_struct_name llt ~default:(fun () -> + Int.to_string (Hashtbl.length anon_struct_name) ) + + +let memo_type : (Llvm.lltype, Typ.t) Hashtbl.t = Hashtbl.Poly.create () + +let rec xlate_type : Llvm.lltype -> Typ.t = + fun llt -> + let xlate_type_ llt = + match Llvm.classify_type llt with + | Half -> Typ.mkFloat ~bits:16 ~enc:`IEEE + | Float -> Typ.mkFloat ~bits:32 ~enc:`IEEE + | Double -> Typ.mkFloat ~bits:64 ~enc:`IEEE + | X86fp80 -> Typ.mkFloat ~bits:80 ~enc:`Extended + | Fp128 -> Typ.mkFloat ~bits:128 ~enc:`IEEE + | Ppc_fp128 -> Typ.mkFloat ~bits:128 ~enc:`Pair + | Integer -> Typ.mkInteger ~bits:(Llvm.integer_bitwidth llt) + | X86_mmx -> Typ.mkInteger ~bits:64 + | Function -> + let return = xlate_type_opt (Llvm.return_type llt) in + let llargs = Llvm.param_types llt in + let len = Array.length llargs in + let args = Vector.init len ~f:(fun i -> xlate_type llargs.(i)) in + Typ.mkFunction ~return ~args + | Pointer -> + let elt = xlate_type (Llvm.element_type llt) in + Typ.mkPointer ~elt + | Vector -> + let elt = xlate_type (Llvm.element_type llt) in + let len = Llvm.vector_size llt in + Typ.mkArray ~elt ~len + | Array -> + let elt = xlate_type (Llvm.element_type llt) in + let len = Llvm.array_length llt in + Typ.mkArray ~elt ~len + | Struct -> + let llelts = Llvm.struct_element_types llt in + let len = Array.length llelts in + let packed = Llvm.is_packed llt in + if Llvm.is_literal llt then + let elts = Vector.init len ~f:(fun i -> xlate_type llelts.(i)) in + Typ.mkTuple ~packed elts + else + let name = struct_name llt in + if Llvm.is_opaque llt then Typ.mkOpaque ~name + else + let elts = + Vector.init len ~f:(fun i -> lazy (xlate_type llelts.(i))) + in + Typ.mkStruct ~name ~packed elts + | Token -> Typ.i8p + | Void | Label | Metadata -> assert false + in + Hashtbl.find_or_add memo_type llt ~default:(fun () -> + [%Trace.call fun pf -> pf "%a" fmt_lltype llt] + ; + xlate_type_ llt + |> + [%Trace.retn fun pf -> pf "%a" Typ.fmt_defn] ) + + +and xlate_type_opt : Llvm.lltype -> Typ.t option = + fun llt -> + match Llvm.classify_type llt with + | Void -> None + | _ -> Some (xlate_type llt) + + +let rec is_zero : Exp.t -> bool = + fun exp -> + match exp with + | Null _ -> true + | Integer {data} -> Z.equal Z.zero data + | App {op= Array _ | Struct _; arg} -> is_zero arg + | App {op; arg} -> is_zero op && is_zero arg + | _ -> false + + +let suffix_after_space : string -> string = + fun str -> String.slice str (String.rindex_exn str ' ' + 1) 0 + + +let xlate_int : Llvm.llvalue -> Exp.t = + fun llv -> + let typ = xlate_type (Llvm.type_of llv) in + let data = + match Llvm.int64_of_const llv with + | Some n -> Z.of_int64 n + | None -> Z.of_string (suffix_after_space (Llvm.string_of_llvalue llv)) + in + Exp.mkInteger data typ + + +let xlate_float : Llvm.llvalue -> Exp.t = + fun llv -> + let typ = xlate_type (Llvm.type_of llv) in + let data = suffix_after_space (Llvm.string_of_llvalue llv) in + Exp.mkFloat data typ + + +let xlate_name_opt : Llvm.llvalue -> Var.t option = + fun instr -> + Option.map + (xlate_type_opt (Llvm.type_of instr)) + ~f:(fun typ -> + let name = find_name instr in + let loc = find_loc instr in + Var.mk name typ ~loc ) + + +let xlate_name : Llvm.llvalue -> Var.t = + fun instr -> Option.value_exn (xlate_name_opt instr) + + +let xlate_intrinsic_exp : string -> (Exp.t -> Exp.t) option = + fun name -> + let i32 = Typ.mkInteger ~bits:32 in + match name with + | "llvm.eh.typeid.for" -> Some (fun arg -> Exp.mkCast arg i32) + | _ -> None + + +let memo_value : (Llvm.llvalue, Exp.t) Hashtbl.t = Hashtbl.Poly.create () + +module Llvalue = struct + type t = Llvm.llvalue + + let hash = Hashtbl.hash + + let compare = Poly.compare + + let sexp_of_t llv = Sexp.Atom (Llvm.string_of_llvalue llv) +end + +let mkStruct_rec = Staged.unstage (Exp.mkStruct_rec (module Llvalue)) + +let rec xlate_value : Llvm.llvalue -> Exp.t = + fun llv -> + let xlate_value_ llv = + let typ = xlate_type (Llvm.type_of llv) in + ( match Llvm.classify_value llv with + | Instruction Call -> ( + let func = Llvm.operand llv (Llvm.num_arg_operands llv) in + let fname = Llvm.value_name func in + match xlate_intrinsic_exp fname with + | Some mkIntrinsic -> mkIntrinsic (xlate_value (Llvm.operand llv 0)) + | None -> Exp.mkVar (xlate_name llv) ) + | Instruction (Invoke | Alloca | Load | PHI | LandingPad | VAArg) + |Argument -> + Exp.mkVar (xlate_name llv) + | Function | GlobalVariable -> Exp.mkGlobal (xlate_global llv) + | GlobalAlias -> xlate_value (Llvm.operand llv 0) + | ConstantInt -> xlate_int llv + | ConstantFP -> xlate_float llv + | ConstantPointerNull | ConstantAggregateZero -> Exp.mkNull typ + | ConstantVector | ConstantArray -> + let len = Llvm.num_operands llv in + let f i = xlate_value (Llvm.operand llv i) in + Exp.mkArray (Vector.init len ~f) typ + | ConstantDataVector -> + let len = Llvm.vector_size (Llvm.type_of llv) in + let f i = xlate_value (Llvm.const_element llv i) in + Exp.mkArray (Vector.init len ~f) typ + | ConstantDataArray -> + let len = Llvm.array_length (Llvm.type_of llv) in + let f i = xlate_value (Llvm.const_element llv i) in + Exp.mkArray (Vector.init len ~f) typ + | ConstantStruct -> + let elt_thks = + Vector.init (Llvm.num_operands llv) ~f:(fun i -> + lazy (xlate_value (Llvm.operand llv i)) ) + in + mkStruct_rec ~id:llv elt_thks typ + | BlockAddress -> + let parent = find_name (Llvm.operand llv 0) in + let name = find_name (Llvm.operand llv 1) in + Exp.mkLabel ~parent ~name + | UndefValue -> Exp.mkNondet typ (Llvm.string_of_llvalue llv) + | Instruction + ( ( Trunc | ZExt | SExt | FPToUI | FPToSI | UIToFP | SIToFP + | FPTrunc | FPExt | PtrToInt | IntToPtr | BitCast | AddrSpaceCast + | Add | FAdd | Sub | FSub | Mul | FMul | UDiv | SDiv | FDiv | URem + | SRem | FRem | Shl | LShr | AShr | And | Or | Xor | ICmp | FCmp + | Select | GetElementPtr | ExtractElement | InsertElement + | ExtractValue | InsertValue | ShuffleVector ) as opcode ) -> + xlate_opcode llv opcode + | ConstantExpr -> xlate_opcode llv (Llvm.constexpr_opcode llv) + | GlobalIFunc -> todo "ifuncs: %a" fmt_llvalue llv () + | Instruction (CatchPad | CleanupPad | CatchSwitch) -> + todo "msvc exceptions: %a" fmt_llvalue llv () + | Instruction + ( Invalid | Ret | Br | Switch | IndirectBr | Invalid2 | Unreachable + | Store | UserOp1 | UserOp2 | Fence | AtomicCmpXchg | AtomicRMW + | Resume | CleanupRet | CatchRet ) + |NullValue | BasicBlock | InlineAsm | MDNode | MDString -> + fail "xlate_value: %a" fmt_llvalue llv () ) + |> Exp.locate (find_loc llv) + in + Hashtbl.find_or_add memo_value llv ~default:(fun () -> + [%Trace.call fun pf -> pf "%a" fmt_llvalue llv] + ; + xlate_value_ llv + |> + [%Trace.retn fun pf exp -> + let typ = xlate_type (Llvm.type_of llv) in + let typ' = Exp.typ exp in + assertf (Typ.equal typ typ') + "xlate_value translated type %a to %a of %a" Typ.fmt typ Typ.fmt + typ' fmt_llvalue llv () ; + pf "%a" Exp.fmt exp] ) + + +and xlate_opcode : Llvm.llvalue -> Llvm.Opcode.t -> Exp.t = + fun llv opcode -> + [%Trace.call fun pf -> pf "%a" fmt_llvalue llv] + ; + let xlate_rand i = xlate_value (Llvm.operand llv i) in + let cast () = Exp.mkCast (xlate_rand 0) (xlate_type (Llvm.type_of llv)) in + let conv signed = + Exp.mkConv (xlate_rand 0) ~signed (xlate_type (Llvm.type_of llv)) + in + let binary mk = + if Poly.equal (Llvm.classify_type (Llvm.type_of llv)) Vector then + todo "vector operations: %a" fmt_llvalue llv () ; + mk (xlate_rand 0) (xlate_rand 1) + in + let unordered_or mk = + binary (fun x y -> Exp.mkOr (Exp.mkUno x y) (mk x y)) + in + ( match opcode with + | BitCast | AddrSpaceCast -> cast () + | Trunc | ZExt | FPToUI | UIToFP | FPTrunc | FPExt | PtrToInt | IntToPtr -> + conv false + | SExt | FPToSI | SIToFP -> conv true + | ICmp -> ( + match Option.value_exn (Llvm.icmp_predicate llv) with + | Eq -> binary Exp.mkEq + | Ne -> binary Exp.mkNe + | Ugt -> binary Exp.mkUgt + | Uge -> binary Exp.mkUge + | Ult -> binary Exp.mkUlt + | Ule -> binary Exp.mkUle + | Sgt -> binary Exp.mkGt + | Sge -> binary Exp.mkGe + | Slt -> binary Exp.mkLt + | Sle -> binary Exp.mkLe ) + | FCmp -> ( + match Llvm.fcmp_predicate llv with + | None | Some False -> binary (fun _ _ -> Exp.mkBool false) + | Some Oeq -> binary Exp.mkEq + | Some Ogt -> binary Exp.mkGt + | Some Oge -> binary Exp.mkGe + | Some Olt -> binary Exp.mkLt + | Some Ole -> binary Exp.mkLe + | Some One -> binary Exp.mkNe + | Some Ord -> binary Exp.mkOrd + | Some Uno -> binary Exp.mkUno + | Some Ueq -> unordered_or Exp.mkEq + | Some Ugt -> unordered_or Exp.mkGt + | Some Uge -> unordered_or Exp.mkGe + | Some Ult -> unordered_or Exp.mkLt + | Some Ule -> unordered_or Exp.mkLe + | Some Une -> unordered_or Exp.mkNe + | Some True -> binary (fun _ _ -> Exp.mkBool true) ) + | Add | FAdd -> binary Exp.mkAdd + | Sub | FSub -> binary Exp.mkSub + | Mul | FMul -> binary Exp.mkMul + | SDiv | FDiv -> binary Exp.mkDiv + | UDiv -> binary Exp.mkUDiv + | SRem | FRem -> binary Exp.mkRem + | URem -> binary Exp.mkURem + | Shl -> binary Exp.mkShl + | LShr -> binary Exp.mkLShr + | AShr -> binary Exp.mkAShr + | And -> binary Exp.mkAnd + | Or -> binary Exp.mkOr + | Xor -> binary Exp.mkXor + | Select -> + Exp.mkSelect ~cnd:(xlate_rand 0) ~thn:(xlate_rand 1) + ~els:(xlate_rand 2) + | ExtractElement -> Exp.mkPrjIdx ~arr:(xlate_rand 0) ~idx:(xlate_rand 1) + | InsertElement -> + Exp.mkUpdIdx ~arr:(xlate_rand 0) ~elt:(xlate_rand 1) + ~idx:(xlate_rand 2) + | ExtractValue | InsertValue -> + let agg = xlate_rand 0 in + let indices = Llvm.indices llv in + let num = Array.length indices in + let rec xlate_indices i agg = + let agg_i, mkUpd = + match Exp.typ agg with + | Tuple _ | Struct _ -> + let fld = indices.(i) in + (Exp.mkPrjFld ~agg ~fld, Exp.mkUpdFld ~agg ~fld) + | Array _ -> + let idx = + let n = indices.(i) in + let bits = if n = 0 then 1 else 1 + Int.floor_log2 n in + Exp.mkInteger (Z.of_int n) (Typ.mkInteger ~bits) + in + (Exp.mkPrjIdx ~arr:agg ~idx, Exp.mkUpdIdx ~arr:agg ~idx) + | _ -> fail "xlate_value: %a" fmt_llvalue llv () + in + let upd_or_ret elt ret = + match[@warning "p"] opcode with + | InsertValue -> mkUpd ~elt:(Lazy.force elt) + | ExtractValue -> ret + in + if i < num - 1 then + let elt = xlate_indices (i + 1) agg_i in + upd_or_ret (lazy elt) elt + else + let elt = lazy (xlate_rand 1) in + upd_or_ret elt agg_i + in + xlate_indices 0 agg + | GetElementPtr -> + if Poly.equal (Llvm.classify_type (Llvm.type_of llv)) Vector then + todo "vector operations: %a" fmt_llvalue llv () ; + let len = Llvm.num_operands llv in + if len <= 1 then cast () + else + let rec xlate_indices i = + let idx = xlate_rand i in + if i = 1 then + let base = xlate_rand 0 in + if is_zero idx then base + else + (* translate [gep t*, iN M] as [gep [1 x t]*, iN M] *) + let ptr = + match Exp.typ base with + | Pointer {elt} -> + Exp.mkCast base + (Typ.mkPointer ~elt:(Typ.mkArray ~elt ~len:1)) + | _ -> fail "xlate_opcode: %a" fmt_llvalue llv () + in + Exp.mkPtrIdx ~ptr ~idx + else + let ptr = xlate_indices (i - 1) in + match (Exp.typ ptr, idx) with + | Pointer {elt= Array _}, _ -> Exp.mkPtrIdx ~ptr ~idx + | Pointer {elt= Tuple _ | Struct _}, Integer {data} -> + Exp.mkPtrFld ~ptr ~fld:(Z.to_int data) + | _ -> fail "xlate_opcode: %a" fmt_llvalue llv () + in + xlate_indices (len - 1) + | ShuffleVector -> ( + (* translate shufflevector %x, _, zeroinitializer to + %x *) + let exp = xlate_value (Llvm.operand llv 0) in + let llmask = Llvm.operand llv 2 in + let mask = xlate_value llmask in + match (Exp.typ exp, Exp.typ mask) with + | Array {len= m}, Array {len= n} when m = n && Llvm.is_null llmask -> + exp + | _ -> fail "xlate_opcode: %a" fmt_llvalue llv () ) + | VAArg -> todo "variadic functions: %a" fmt_llvalue llv () + | Invalid | Ret | Br | Switch | IndirectBr | Invoke | Invalid2 + |Unreachable | Alloca | Load | Store | PHI | Call | UserOp1 | UserOp2 + |Fence | AtomicCmpXchg | AtomicRMW | Resume | LandingPad | CleanupRet + |CatchRet | CatchPad | CleanupPad | CatchSwitch -> + fail "xlate_opcode: %a" fmt_llvalue llv () ) + |> Exp.locate (find_loc llv) + |> + [%Trace.retn fun pf exp -> + let typ = xlate_type (Llvm.type_of llv) in + let typ' = Exp.typ exp in + assertf (Typ.equal typ typ') + "xlate_opcode translated type %a to %a of %a" Typ.fmt typ Typ.fmt typ' + fmt_llvalue llv () ; + pf "%a" Exp.fmt exp] + + +and xlate_global : Llvm.llvalue -> Global.t = + fun llg -> + let init = + match (Llvm.classify_value llg, Llvm.linkage llg) with + | _, (External | External_weak) -> None + | GlobalVariable, _ -> Some (xlate_value (Llvm.global_initializer llg)) + | _ -> None + in + let g = xlate_name llg in + Global.mk (Var.name g) (Var.typ g) ~loc:(Var.loc g) ?init + + +let xlate_global : Llvm.llvalue -> Global.t = + fun llg -> + [%Trace.call fun pf -> pf "%a" fmt_llvalue llg] + ; + xlate_global llg + |> + [%Trace.retn fun pf -> pf "%a" Global.fmt] + + +type pop_thunk = Loc.t -> Llair.inst list + +let pop_stack_frame_of_function + : Llvm.llvalue -> Llvm.llbasicblock -> pop_thunk = + fun func entry_blk -> + let append_stack_vars blk vars = + Llvm.fold_right_instrs + (fun instr vars -> + match Llvm.instr_opcode instr with + | Alloca -> xlate_name instr :: vars + | _ -> vars ) + blk vars + in + let entry_vars = append_stack_vars entry_blk [] in + Llvm.iter_blocks + (fun blk -> + if not (Poly.equal entry_blk blk) then + Llvm.iter_instrs + (fun instr -> + match Llvm.instr_opcode instr with + | Alloca -> + todo "stack allocation after function entry:@ %a" + fmt_llvalue instr () + | _ -> () ) + blk ) + func ; + let pop retn_loc = + List.map entry_vars ~f:(fun var -> + Llair.Inst.mkFree ~ptr:(Exp.mkVar var) ~loc:retn_loc ) + in + pop + + +(** construct the types involved in landingpads: i32, std::type_info*, and + __cxa_exception *) +let landingpad_typs : Llvm.llvalue -> Typ.t * Typ.t * Typ.t = + fun instr -> + let i32 = Typ.mkInteger ~bits:32 in + if match xlate_type (Llvm.type_of instr) with + | Tuple {elts} | Struct {elts} -> ( + match Vector.to_array elts with + | [|i8p'; i32'|] -> + not (Typ.equal Typ.i8p i8p') || not (Typ.equal i32 i32') + | _ -> true ) + | _ -> true + then + todo "landingpad of type other than {i8*, i32}: %a" fmt_llvalue instr () ; + let llcontext = + Llvm.(module_context (global_parent (block_parent (instr_parent instr)))) + in + let lli8p = Llvm.(pointer_type (integer_type llcontext 8)) in + let ti = Llvm.(named_struct_type llcontext "class.std::type_info") in + let tip = Llvm.pointer_type ti in + let void = Llvm.void_type llcontext in + let dtor = Llvm.(pointer_type (function_type void [|lli8p|])) in + let cxa_exception = Llvm.struct_type llcontext [|tip; dtor|] in + (i32, xlate_type tip, xlate_type cxa_exception) + + +(** construct the argument of a landingpad block, mainly fix the encoding + scheme for landingpad instruction name to block arg name *) +let landingpad_arg : Llvm.llvalue -> Var.t = + fun instr -> + Var.mk (find_name instr ^ ".exc") Typ.i8p ~loc:(find_loc instr) + + +(** [rev_map_phis ~f blk] returns [(retn_arg, rev_args, pos)] by rev_mapping + over the prefix of [PHI] instructions at the beginning of [blk]. + [retn_arg], if any, is [f] applied to the [PHI] instruction which takes + the return value of every [Invoke] predecessor of [blk]. [rev_args] is + the result of applying [f] to each of the other [PHI] instructions. + [pos] is the instruction iterator position before the first non-[PHI] + instruction of [blk]. *) +let rev_map_phis + : f:(Llvm.llvalue -> 'a) -> Llvm.llbasicblock + -> 'a option * 'a list * _ Llvm.llpos = + fun ~f blk -> + let rec block_args_ found_invoke_pred retn_arg rev_args pos = + match (pos : _ Llvm.llpos) with + | Before instr -> ( + match Llvm.instr_opcode instr with + | PHI -> + (* [has_invoke_pred] holds if some value selected by this PHI is + the return value of an [invoke] instr. [is_retn_arg] holds if + for each predecessor terminated by an invoke instr, this PHI + instr takes the value of the invoke's return value. *) + let has_invoke_pred, is_retn_arg = + List.fold (Llvm.incoming instr) ~init:(false, true) ~f: + (fun (has_invoke_pred, is_retn_arg) (arg, pred) -> + match Llvm.block_terminator pred with + | Some instr -> ( + match Llvm.instr_opcode instr with + | Invoke when Poly.equal arg instr -> (true, is_retn_arg) + | Invoke -> (has_invoke_pred, false) + | _ -> (has_invoke_pred, is_retn_arg) ) + | None -> fail "rev_map_phis: %a" fmt_llblock blk () ) + in + if found_invoke_pred && has_invoke_pred then + (* Supporting multiple PHI instructions that take the return + values of invoke instructions will require adding trampolines + for the invoke instructions to return to, that each reorder + arguments and invoke the translation of this block. *) + todo "multiple PHI instructions taking invoke return values: %a" + fmt_llblock blk () ; + let retn_arg, rev_args = + if has_invoke_pred && is_retn_arg then (Some (f instr), rev_args) + else (None, f instr :: rev_args) + in + block_args_ has_invoke_pred retn_arg rev_args + (Llvm.instr_succ instr) + | LandingPad when Option.is_some retn_arg -> + (* Supporting returning and throwing to the same block, with + different arguments, will require adding trampolines. *) + todo + "return and throw to the same block with different arguments: %a" + fmt_llblock blk () + | _ -> (retn_arg, rev_args, pos) ) + | At_end blk -> fail "rev_map_phis: %a" fmt_llblock blk () + in + block_args_ false None [] (Llvm.instr_begin blk) + + +(** [trampoline_args jump_instr dest_block] is the actual arguments to which + the translation of [dest_block] should be partially-applied, to yield a + trampoline accepting the return parameter of the block and then jumping + with all the args. *) +let trampoline_args : Llvm.llvalue -> Llvm.llbasicblock -> Exp.t vector = + fun jmp dst -> + let src = Llvm.instr_parent jmp in + rev_map_phis dst ~f:(fun instr -> + List.find_map_exn (Llvm.incoming instr) ~f:(fun (arg, pred) -> + if Poly.equal pred src then Some (xlate_value arg) else None ) ) + |> snd3 |> Vector.of_list_rev + + +(** [unique_pred blk] is the unique predecessor of [blk], or [None] if there + are 0 or >1 predecessors. *) +let unique_pred : Llvm.llbasicblock -> Llvm.llvalue option = + fun blk -> + match Llvm.use_begin (Llvm.value_of_block blk) with + | Some use -> ( + match Llvm.use_succ use with + | None -> Some (Llvm.user use) + | Some _ -> None ) + | None -> None + + +(** [return_formal_is_used instr] holds if the return value of [instr] is + used anywhere. *) +let return_formal_is_used : Llvm.llvalue -> bool = + fun instr -> Option.is_some (Llvm.use_begin instr) + + +(** [need_return_trampoline instr blk] holds when the return formal of + [instr] is used, but the returned to block [blk] does not take it as an + argument (e.g. if it has multiple predecessors and no PHI node). *) +let need_return_trampoline : Llvm.llvalue -> Llvm.llbasicblock -> bool = + fun instr blk -> + Option.is_none (fst3 (rev_map_phis blk ~f:Fn.id)) + && Option.is_none (unique_pred blk) && return_formal_is_used instr + + +(** [unique_used_invoke_pred blk] is the unique predecessor of [blk], if it + is an [Invoke] instruction, whose return value is used. *) +let unique_used_invoke_pred : Llvm.llbasicblock -> 'a option = + fun blk -> + let is_invoke i = Poly.equal (Llvm.instr_opcode i) Invoke in + match unique_pred blk with + | Some instr when is_invoke instr && return_formal_is_used instr -> + Some instr + | _ -> None + + +(** formal parameters accepted by a block *) +let block_formals : Llvm.llbasicblock -> Var.t list * _ Llvm.llpos = + fun blk -> + let retn_arg, rev_args, pos = rev_map_phis blk ~f:xlate_name in + match pos with + | Before instr -> + let instr_arg = + match Llvm.instr_opcode instr with + | LandingPad -> + assert (Option.is_none retn_arg (* ensured by rev_map_phis *)) ; + Some (landingpad_arg instr) + | _ -> + Option.first_some retn_arg + (Option.map (unique_used_invoke_pred blk) ~f:xlate_name) + in + (List.rev_append rev_args (Option.to_list instr_arg), pos) + | At_end blk -> fail "block_formals: %a" fmt_llblock blk () + + +(** actual arguments passed by a jump to a block *) +let jump_args : Llvm.llvalue -> Llvm.llbasicblock -> Exp.t vector = + fun jmp dst -> + let src = Llvm.instr_parent jmp in + let retn_arg, rev_args, _ = + rev_map_phis dst ~f:(fun phi -> + Option.value_exn + (List.find_map (Llvm.incoming phi) ~f:(fun (arg, pred) -> + if Poly.equal pred src then Some (xlate_value arg) else None + )) ) + in + let retn_arg = + Option.first_some retn_arg + (Option.map (unique_used_invoke_pred dst) ~f:(fun invoke -> + Exp.mkVar (xlate_name invoke) )) + in + Vector.of_list (List.rev_append rev_args (Option.to_list retn_arg)) + + +(** An LLVM instruction is translated to a sequence of LLAIR instructions + and a terminator, plus some additional blocks to which it may refer + (that is, essentially a function body). These are needed since LLVM and + LLAIR blocks are not in 1:1 correspondence. *) +type code = Llair.inst list * Llair.term * Llair.block list + +let fmt_code ff (insts, term, blocks) = + Format.fprintf ff "@[@[%a%t@]%t@[%a@]@]" + (list_fmt "@ " Llair.Inst.fmt) + insts + (fun ff -> + match term with + | Llair.Unreachable -> () + | _ -> + Format.fprintf ff "%t%a" + (fun ff -> + if List.is_empty insts then () else Format.fprintf ff "@ " ) + Llair.Term.fmt term ) + (fun ff -> if List.is_empty blocks then () else Format.fprintf ff "@\n") + (list_fmt "@ " Llair.Block.fmt) + blocks + + +let rec xlate_func_name llv = + match Llvm.classify_value llv with + | Function -> + let fname = find_name llv in + let lltyp = Llvm.type_of llv in + let typ = xlate_type lltyp in + Exp.mkGlobal (Global.mk fname typ ~loc:(find_loc llv)) + | ConstantExpr -> xlate_opcode llv (Llvm.constexpr_opcode llv) + | Argument | Instruction _ -> xlate_value llv + | GlobalAlias -> xlate_func_name (Llvm.operand llv 0) + | GlobalIFunc -> todo "ifunc: %a" fmt_llvalue llv () + | InlineAsm -> todo "inline asm: %a" fmt_llvalue llv () + | _ -> fail "unknown function: %a" fmt_llvalue llv () + + +let xlate_instr + : pop_thunk -> Llvm.llvalue + -> ((Llair.inst list * Llair.term -> code) -> code) -> code = + fun pop instr continue -> + [%Trace.call fun pf -> pf "%a" fmt_llvalue instr] + ; + let continue insts_term_to_code = + [%Trace.retn + fun pf () -> + pf "%a" fmt_code (insts_term_to_code ([], Llair.Term.mkUnreachable))] + () ; + continue insts_term_to_code + in + let terminal insts term blocks = + [%Trace.retn fun pf () -> pf "%a" fmt_code (insts, term, blocks)] () ; + (insts, term, blocks) + in + let name = find_name instr in + let loc = find_loc instr in + let opcode = Llvm.instr_opcode instr in + match opcode with + | Load -> + let reg = xlate_name instr in + let ptr = xlate_value (Llvm.operand instr 0) in + continue (fun (insts, term) -> + (Llair.Inst.mkLoad ~reg ~ptr ~loc :: insts, term, []) ) + | Store -> + let exp = xlate_value (Llvm.operand instr 0) in + let ptr = xlate_value (Llvm.operand instr 1) in + continue (fun (insts, term) -> + (Llair.Inst.mkStore ~ptr ~exp ~loc :: insts, term, []) ) + | Alloca -> + let reg = xlate_name instr in + let num = xlate_value (Llvm.operand instr 0) in + continue (fun (insts, term) -> + (Llair.Inst.mkAlloc ~reg ~num ~loc :: insts, term, []) ) + | Call -> ( + let llfunc = Llvm.operand instr (Llvm.num_operands instr - 1) in + let lltyp = Llvm.type_of llfunc in + let fname = Llvm.value_name llfunc in + let reg = xlate_name_opt instr in + let skip msg = + warn "ignoring uninterpreted %s %s" msg fname ; + let msg = Llvm.string_of_llvalue instr in + continue (fun (insts, term) -> + (Llair.Inst.mkNondet ~reg ~msg ~loc :: insts, term, []) ) + in + match String.split fname ~on:'.' with + | "llvm" :: "memcpy" :: _ -> + let dst = xlate_value (Llvm.operand instr 0) in + let src = xlate_value (Llvm.operand instr 1) in + let len = xlate_value (Llvm.operand instr 2) in + continue (fun (insts, term) -> + (Llair.Inst.mkMemcpy ~dst ~src ~len ~loc :: insts, term, []) + ) + | "llvm" :: "memmov" :: _ -> + let dst = xlate_value (Llvm.operand instr 0) in + let src = xlate_value (Llvm.operand instr 1) in + let len = xlate_value (Llvm.operand instr 2) in + continue (fun (insts, term) -> + (Llair.Inst.mkMemmov ~dst ~src ~len ~loc :: insts, term, []) + ) + | "llvm" :: "memset" :: _ -> + let dst = xlate_value (Llvm.operand instr 0) in + let byt = xlate_value (Llvm.operand instr 1) in + let len = xlate_value (Llvm.operand instr 2) in + continue (fun (insts, term) -> + (Llair.Inst.mkMemset ~dst ~byt ~len ~loc :: insts, term, []) + ) + | _ when Option.is_some (xlate_intrinsic_exp fname) -> + continue (fun (insts, term) -> (insts, term, [])) + | ["llvm"; "dbg"; ("declare" | "value")] + |"llvm" :: "lifetime" :: ("start" | "end") :: _ -> + continue (fun (insts, term) -> (insts, term, [])) + | ["llvm"; ("stacksave" | "stackrestore")] -> + todo "stack allocation after function entry:@ %a" fmt_llvalue + instr () + | ["llvm"; ("va_start" | "va_copy" | "va_end")] -> + skip "variadic function intrinsic" + | "llvm" :: _ -> skip "intrinsic" + | _ when Poly.equal (Llvm.classify_value llfunc) InlineAsm -> + skip "inline asm" + | ["__llair_throw"] -> + let exc = xlate_value (Llvm.operand instr 0) in + terminal (pop loc) (Llair.Term.mkThrow ~exc ~loc) [] + | _ -> + let func = xlate_func_name llfunc in + let lbl = name ^ ".ret" in + let call = + let args = + let num_args = + if not (Llvm.is_var_arg (Llvm.element_type lltyp)) then + Llvm.num_arg_operands instr + else ( + warn + "ignoring variable arguments to variadic function: %a" + fmt_llvalue instr ; + Array.length (Llvm.param_types (Llvm.element_type lltyp)) ) + in + Vector.init num_args ~f:(fun i -> + xlate_value (Llvm.operand instr i) ) + in + let return = Llair.Jump.mk lbl Vector.empty in + Llair.Term.mkCall ~func ~args ~loc ~return ~throw:None + ~ignore_result:false + in + let params = Vector.of_option reg in + continue (fun (insts, term) -> + let cmnd = Vector.of_list insts in + ([], call, [Llair.Block.mk ~lbl ~params ~cmnd ~term]) ) ) + | Invoke -> + let llfunc = Llvm.operand instr (Llvm.num_operands instr - 3) in + let lltyp = Llvm.type_of llfunc in + let fname = Llvm.value_name llfunc in + let unwind_blk = Llvm.get_unwind_dest instr in + let unwind_dst = label_of_block unwind_blk in + let args = + let num_args = + if not (Llvm.is_var_arg (Llvm.element_type lltyp)) then + Llvm.num_arg_operands instr + else ( + warn "ignoring variable arguments to variadic function: %a" + fmt_llvalue instr ; + Array.length (Llvm.param_types (Llvm.element_type lltyp)) ) + in + Vector.init num_args ~f:(fun i -> xlate_value (Llvm.operand instr i)) + in + if String.equal fname "__llair_throw" then + let key = Exp.mkInteger Z.zero Typ.i1 in + let tbl = Vector.empty in + let els = Llair.Jump.mk unwind_dst args in + terminal [] (Llair.Term.mkSwitch ~key ~tbl ~els ~loc) [] + else + let func = xlate_func_name llfunc in + let typ = xlate_type (Llvm.type_of llfunc) in + let ignore_result = + match typ with + | Pointer {elt= Function {return= Some _}} -> + not (return_formal_is_used instr) + | _ -> false + in + let return, blocks = + let blk = Llvm.get_normal_dest instr in + if not (need_return_trampoline instr blk) then + let dst = label_of_block blk in + let args = trampoline_args instr blk in + (Llair.Jump.mk dst args, []) + else + let lbl = name ^ ".ret" in + let block = + let params = Vector.of_array [|xlate_name instr|] in + let cmnd = Vector.empty in + let term = + let key = Exp.mkInteger Z.zero Typ.i1 in + let tbl = Vector.empty in + let dst = label_of_block blk in + let args = jump_args instr blk in + let els = Llair.Jump.mk dst args in + Llair.Term.mkSwitch ~key ~tbl ~els ~loc + in + Llair.Block.mk ~lbl ~params ~cmnd ~term + in + (Llair.Jump.mk lbl Vector.empty, [block]) + in + let throw = + let dst = unwind_dst in + let args = trampoline_args instr unwind_blk in + Some (Llair.Jump.mk dst args) + in + terminal [] + (Llair.Term.mkCall ~func ~args ~loc ~return ~throw ~ignore_result) + blocks + | Ret -> + let exp = + if Llvm.num_operands instr = 0 then None + else Some (xlate_value (Llvm.operand instr 0)) + in + terminal (pop loc) (Llair.Term.mkReturn ~exp ~loc) [] + | Br -> ( + match Option.value_exn (Llvm.get_branch instr) with + | `Unconditional blk -> + let key = Exp.mkInteger Z.zero Typ.i1 in + let tbl = Vector.empty in + let dst = label_of_block blk in + let args = jump_args instr blk in + let els = Llair.Jump.mk dst args in + terminal [] (Llair.Term.mkSwitch ~key ~tbl ~els ~loc) [] + | `Conditional (cnd, thn, els) -> + let key = xlate_value cnd in + let thn_lbl = label_of_block thn in + let thn_args = jump_args instr thn in + let thn = Llair.Jump.mk thn_lbl thn_args in + let tbl = Vector.of_array [|(Z.one, thn)|] in + let els_lbl = label_of_block els in + let els_args = jump_args instr els in + let els = Llair.Jump.mk els_lbl els_args in + terminal [] (Llair.Term.mkSwitch ~key ~tbl ~els ~loc) [] ) + | Switch -> + let key = xlate_value (Llvm.operand instr 0) in + let cases = + let num_cases = Llvm.num_operands instr / 2 - 1 in + let rec xlate_cases i = + if i <= num_cases then + let idx = Llvm.operand instr (2 * i) in + let blk = + Llvm.block_of_value (Llvm.operand instr (2 * i + 1)) + in + let num = + match xlate_value idx with + | Exp.Integer {data} -> data + | _ -> fail "xlate_instr: %a" fmt_llvalue instr () + in + let dst = label_of_block blk in + let args = jump_args instr blk in + let rest = xlate_cases (i + 1) in + (num, Llair.Jump.mk dst args) :: rest + else [] + in + xlate_cases 1 + in + let tbl = Vector.of_list cases in + let blk = Llvm.block_of_value (Llvm.operand instr 1) in + let dst = label_of_block blk in + let args = jump_args instr blk in + let els = Llair.Jump.mk dst args in + terminal [] (Llair.Term.mkSwitch ~key ~tbl ~els ~loc) [] + | IndirectBr -> + let ptr = xlate_value (Llvm.operand instr 0) in + let num_dests = Llvm.num_operands instr - 1 in + let lldests = + let rec dests i = + if i <= num_dests then + let v = Llvm.operand instr i in + let blk = Llvm.block_of_value v in + let dst = label_of_block blk in + let args = jump_args instr blk in + let rest = dests (i + 1) in + Llair.Jump.mk dst args :: rest + else [] + in + dests 1 + in + let tbl = Vector.of_list lldests in + terminal [] (Llair.Term.mkISwitch ~ptr ~tbl ~loc) [] + | LandingPad -> + (* Translate the landingpad clauses to code to load the type_info from + the thrown exception, and test the type_info against the clauses, + eventually jumping to the handler code following the landingpad, + passing a value for the selector which the handler code tests to + e.g. either cleanup or rethrow. *) + let i32, tip, cxa_exception = landingpad_typs instr in + let exc = Exp.mkVar (landingpad_arg instr) in + let ti = Var.mk (name ^ ".ti") tip ~loc in + (* std::type_info* ti = ((__cxa_exception* )exc - 1)->exceptionType *) + let load_ti = + (* index of the exceptionType member of __cxa_exception *) + let exceptionType = 0 in + let ptr = + Exp.mkPtrFld + ~ptr: + (Exp.mkCast + (Exp.mkPtrIdx + ~ptr: + (Exp.mkCast exc + (Typ.mkPointer + ~elt:(Typ.mkArray ~elt:cxa_exception ~len:1))) + ~idx: + (Exp.mkInteger Z.minus_one (Typ.mkInteger ~bits:64))) + (Typ.mkPointer ~elt:cxa_exception)) + ~fld:exceptionType + in + Llair.Inst.mkLoad ~reg:ti ~ptr ~loc + in + let ti = Exp.mkVar ti in + let typeid = + Option.value_exn (xlate_intrinsic_exp "llvm.eh.typeid.for") ti + in + let lbl = name ^ ".unwind" in + let param = xlate_name instr in + let params = Vector.of_array [|param|] in + let jump_unwind sel = + let dst = lbl in + let args = + Vector.of_array + [|Exp.mkStruct (Vector.of_array [|exc; sel|]) (Var.typ param)|] + in + Llair.Jump.mk dst args + in + let goto_unwind sel = + let key = Exp.mkInteger Z.zero Typ.i1 in + let tbl = Vector.empty in + let els = jump_unwind sel in + Llair.Term.mkSwitch ~key ~tbl ~els ~loc + in + let term_unwind, rev_blocks = + if Llvm.is_cleanup instr then + (goto_unwind (Exp.mkInteger Z.zero i32), []) + else + let num_clauses = Llvm.num_operands instr in + let lbl i = name ^ "." ^ Int.to_string i in + let jump i = Llair.Jump.mk (lbl i) Vector.empty in + let block i term = + Llair.Block.mk ~lbl:(lbl i) ~params:Vector.empty + ~cmnd:Vector.empty ~term + in + let match_filter = + jump_unwind (Exp.mkSub (Exp.mkInteger Z.zero i32) typeid) + in + let xlate_clause i = + let clause = Llvm.operand instr i in + let num_tis = Llvm.num_operands clause in + if num_tis = 0 then + let key = Exp.mkInteger Z.zero Typ.i1 in + let tbl = Vector.empty in + let els = match_filter in + Llair.Term.mkSwitch ~key ~tbl ~els ~loc + else + match Llvm.classify_type (Llvm.type_of clause) with + | Array (* filter *) -> ( + match Llvm.classify_value clause with + | ConstantArray -> + let rec xlate_filter i = + let tiI = + Exp.mkCast (xlate_value (Llvm.operand clause i)) tip + in + if i < num_tis - 1 then + Exp.mkAnd (Exp.mkNe tiI ti) (xlate_filter (i + 1)) + else Exp.mkNe tiI ti + in + let key = xlate_filter 0 in + let thn = match_filter in + let tbl = Vector.of_array [|(Z.one, thn)|] in + Llair.Term.mkSwitch ~key ~tbl ~els:(jump (i + 1)) ~loc + | _ -> fail "xlate_instr: %a" fmt_llvalue instr () ) + | _ (* catch *) -> + let clause = Exp.mkCast (xlate_value clause) tip in + let key = + Exp.mkOr + (Exp.mkEq clause (Exp.mkNull tip)) + (Exp.mkEq clause ti) + in + let thn = jump_unwind typeid in + let tbl = Vector.of_array [|(Z.one, thn)|] in + Llair.Term.mkSwitch ~key ~tbl ~els:(jump (i + 1)) ~loc + in + let rec rev_blocks i z = + if i < num_clauses then + rev_blocks (i + 1) (block i (xlate_clause i) :: z) + else block i Llair.Term.mkUnreachable :: z + in + (xlate_clause 0, rev_blocks 1 []) + in + continue (fun (insts, term) -> + ( [load_ti] + , term_unwind + , List.rev_append rev_blocks + [ Llair.Block.mk ~lbl ~params ~cmnd:(Vector.of_list insts) + ~term ] ) ) + | Resume -> + let exc = + Exp.mkPrjFld ~agg:(xlate_value (Llvm.operand instr 0)) ~fld:0 + in + terminal (pop loc) (Llair.Term.mkThrow ~exc ~loc) [] + | Unreachable -> terminal [] Llair.Term.mkUnreachable [] + | Trunc | ZExt | SExt | FPToUI | FPToSI | UIToFP | SIToFP | FPTrunc + |FPExt | PtrToInt | IntToPtr | BitCast | AddrSpaceCast | Add | FAdd + |Sub | FSub | Mul | FMul | UDiv | SDiv | FDiv | URem | SRem | FRem + |Shl | LShr | AShr | And | Or | Xor | ICmp | FCmp | Select + |GetElementPtr | ExtractElement | InsertElement | ShuffleVector + |ExtractValue | InsertValue -> + continue (fun (insts, term) -> (insts, term, [])) + | VAArg -> + let reg = xlate_name_opt instr in + let msg = Llvm.string_of_llvalue instr in + warn "variadic function argument: %s" msg ; + continue (fun (insts, term) -> + (Llair.Inst.mkNondet ~reg ~msg ~loc :: insts, term, []) ) + | CleanupRet | CatchRet | CatchPad | CleanupPad | CatchSwitch -> + todo "msvc exceptions: %a" fmt_llvalue instr () + | Fence | AtomicCmpXchg | AtomicRMW -> + fail "xlate_instr: %a" fmt_llvalue instr () + | PHI | Invalid | Invalid2 | UserOp1 | UserOp2 -> assert false + + +let rec xlate_instrs : pop_thunk -> _ Llvm.llpos -> code = + fun pop -> function + | Before instrI -> + xlate_instr pop instrI (fun xlate_instrI -> + let instrJ = Llvm.instr_succ instrI in + let instsJ, termJ, blocksJN = xlate_instrs pop instrJ in + let instsI, termI, blocksI = xlate_instrI (instsJ, termJ) in + (instsI, termI, blocksI @ blocksJN) ) + | At_end blk -> fail "xlate_instrs: %a" fmt_llblock blk () + + +let xlate_block : pop_thunk -> Llvm.llbasicblock -> Llair.block list = + fun pop blk -> + [%Trace.call fun pf -> pf "%a" fmt_llblock blk] + ; + let lbl = label_of_block blk in + let args, pos = block_formals blk in + let insts, term, blocks = xlate_instrs pop pos in + Llair.Block.mk ~lbl ~params:(Vector.of_list args) + ~cmnd:(Vector.of_list insts) ~term + :: blocks + |> + [%Trace.retn fun pf blocks -> pf "%s" (List.hd_exn blocks).Llair.lbl] + + +let xlate_function : Llvm.llvalue -> Llair.func = + fun llf -> + [%Trace.call fun pf -> pf "%a" fmt_llvalue llf] + ; + let name = xlate_global llf in + let params = + Llvm.fold_left_params + (fun rev_args param -> xlate_name param :: rev_args) + [] llf + |> Vector.of_list_rev + in + ( match Llvm.block_begin llf with + | Before entry_blk -> + let pop = pop_stack_frame_of_function llf entry_blk in + let[@warning "p"] entry_block :: entry_blocks = + xlate_block pop entry_blk + in + let entry = + let {Llair.lbl; cmnd; term} = entry_block in + assert (Vector.is_empty entry_block.params) ; + Llair.Block.mk ~lbl ~params ~cmnd ~term + in + let cfg = + let rec trav_blocks rev_cfg prev = + match Llvm.block_succ prev with + | Before blk -> + trav_blocks + (List.rev_append (xlate_block pop blk) rev_cfg) + blk + | At_end _ -> Vector.of_list_rev rev_cfg + in + trav_blocks (List.rev entry_blocks) entry_blk + in + Llair.Func.mk ~name ~entry ~cfg + | At_end _ -> Llair.Func.mk_undefined ~name ~params ) + |> + [%Trace.retn fun pf -> pf "@\n%a" Llair.Func.fmt] + + +let transform : Llvm.llmodule -> unit = + fun llmodule -> + let pm = Llvm.PassManager.create () in + Llvm_scalar_opts.add_lower_atomic pm ; + Llvm_scalar_opts.add_scalar_repl_aggregation pm ; + Llvm_scalar_opts.add_scalarizer pm ; + Llvm_scalar_opts.add_merge_return pm ; + Llvm_scalar_opts.add_cfg_simplification pm ; + Llvm.PassManager.run_module llmodule pm |> (ignore : bool -> _) ; + Llvm.PassManager.dispose pm + + +exception Invalid_llvm of string + +let invalid_llvm : string -> 'a = + fun msg -> + let first_line = + Option.value_map (String.index msg '\n') ~default:msg + ~f:(String.slice msg 0) + in + Format.printf "@\n%s@\n" msg ; + raise (Invalid_llvm first_line) + + +let translate : string -> Llair.t = + fun file -> + [%Trace.call fun pf -> pf "%s" file] + ; + Llvm.install_fatal_error_handler invalid_llvm ; + let llcontext = Llvm.global_context () in + let llmodule = + let llmemorybuffer = Llvm.MemoryBuffer.of_file file in + try Llvm_irreader.parse_ir llcontext llmemorybuffer + with Llvm_irreader.Error msg -> invalid_llvm msg + in + Llvm_analysis.verify_module llmodule |> Option.iter ~f:invalid_llvm ; + transform llmodule ; + scan_locs llmodule ; + scan_names llmodule ; + let globals = + Llvm.fold_left_globals + (fun globals llg -> xlate_global llg :: globals) + [] llmodule + in + let functions = + Llvm.fold_left_functions + (fun functions llf -> + let name = Llvm.value_name llf in + if String.is_prefix name ~prefix:"__llair_" + || String.is_prefix name ~prefix:"llvm." + then functions + else xlate_function llf :: functions ) + [] llmodule + in + let typ_defns = + let by_name x y = + let name = function[@warning "p"] + | Typ.Struct {name} | Opaque {name} -> name + in + String.compare (name x) (name y) + in + Hashtbl.fold memo_type ~init:[] ~f:(fun ~key:_ ~data defns -> + match data with + | Typ.Struct _ | Opaque _ -> data :: defns + | _ -> defns ) + |> List.sort ~cmp:by_name + in + Llvm.dispose_module llmodule ; + Llvm.dispose_context llcontext ; + Llair.mk ~typ_defns ~globals ~functions + |> + [%Trace.retn fun pf _ -> pf ""] diff --git a/sledge/src/llair/frontend.mli b/sledge/src/llair/frontend.mli new file mode 100644 index 000000000..41cae7621 --- /dev/null +++ b/sledge/src/llair/frontend.mli @@ -0,0 +1,14 @@ +(* Copyright (c) 2018 - present Facebook, Inc. All rights reserved. + + This source code is licensed under the BSD style license found in the + LICENSE file in the root directory of this source tree. An additional + grant of patent rights can be found in the PATENTS file in the same + directory. *) + +(** Translate LLVM to LLAIR *) + +exception Invalid_llvm of string + +val translate : string -> Llair.t +(** Translate the compilation unit in the named (llvm or bitcode) file to + LLAIR. Attempts to raise [Invalid_llvm] when the input is invalid LLVM. *) diff --git a/sledge/src/llair/global.ml b/sledge/src/llair/global.ml new file mode 100644 index 000000000..637c8d0a0 --- /dev/null +++ b/sledge/src/llair/global.ml @@ -0,0 +1,10 @@ +(* Copyright (c) 2018 - present Facebook, Inc. All rights reserved. + + This source code is licensed under the BSD style license found in the + LICENSE file in the root directory of this source tree. An additional + grant of patent rights can be found in the PATENTS file in the same + directory. *) + +(** Global variables *) + +include Exp.Global diff --git a/sledge/src/llair/global.mli b/sledge/src/llair/global.mli new file mode 100644 index 000000000..91b0f412a --- /dev/null +++ b/sledge/src/llair/global.mli @@ -0,0 +1,10 @@ +(* Copyright (c) 2018 - present Facebook, Inc. All rights reserved. + + This source code is licensed under the BSD style license found in the + LICENSE file in the root directory of this source tree. An additional + grant of patent rights can be found in the PATENTS file in the same + directory. *) + +(** Global variables *) + +include module type of Exp.Global diff --git a/sledge/src/llair/jbuild.in b/sledge/src/llair/jbuild.in new file mode 100644 index 000000000..48ebe0551 --- /dev/null +++ b/sledge/src/llair/jbuild.in @@ -0,0 +1,13 @@ +(* -*- tuareg -*- *) + +let deps = ["import"; "trace"] + +;; Jbuild_plugin.V1.send @@ Format.sprintf " +(library + ((name llair_) + (public_name llair) + %s + (libraries (llvm llvm.irreader llvm.analysis llvm.scalar_opts %s)))) +" +(flags deps) +(libraries deps) diff --git a/sledge/src/llair/llair.ml b/sledge/src/llair/llair.ml new file mode 100644 index 000000000..45cc2ae8e --- /dev/null +++ b/sledge/src/llair/llair.ml @@ -0,0 +1,495 @@ +(* Copyright (c) 2018 - present Facebook, Inc. All rights reserved. + + This source code is licensed under the BSD style license found in the + LICENSE file in the root directory of this source tree. An additional + grant of patent rights can be found in the PATENTS file in the same + directory. *) + +(** Programs / translation units *) + +type inst = + | Load of {reg: Var.t; ptr: Exp.t; loc: Loc.t} + | Store of {ptr: Exp.t; exp: Exp.t; loc: Loc.t} + | Memcpy of {dst: Exp.t; src: Exp.t; len: Exp.t; loc: Loc.t} + | Memmov of {dst: Exp.t; src: Exp.t; len: Exp.t; loc: Loc.t} + | Memset of {dst: Exp.t; byt: Exp.t; len: Exp.t; loc: Loc.t} + | Alloc of {reg: Var.t; num: Exp.t; loc: Loc.t} + | Free of {ptr: Exp.t; loc: Loc.t} + | Nondet of {reg: Var.t option; msg: string; loc: Loc.t} +[@@deriving sexp] + +type cmnd = inst vector [@@deriving sexp] + +type label = string [@@deriving sexp] + +type 'a control_transfer = + {mutable dst: 'a; args: Exp.t vector; mutable retreating: bool} +[@@deriving sexp] + +type jump = block control_transfer + +and term = + | Switch of {key: Exp.t; tbl: (Z.t * jump) vector; els: jump; loc: Loc.t} + | ISwitch of {ptr: Exp.t; tbl: jump vector; loc: Loc.t} + | Call of + { call: Exp.t control_transfer + ; return: jump + ; throw: jump option + ; ignore_result: bool + ; loc: Loc.t } + | Return of {exp: Exp.t option; loc: Loc.t} + | Throw of {exc: Exp.t; loc: Loc.t} + | Unreachable + +and block = + { lbl: label + ; params: Var.t vector + ; cmnd: cmnd + ; term: term + ; mutable parent: func + ; mutable sort_index: int } + +and cfg = block vector + +and func = {name: Global.t; entry: block; cfg: cfg} [@@deriving sexp] + +type t = + {typ_defns: Typ.t list; globals: Global.t vector; functions: func vector} + +(** Initial cyclic values *) + +let rec dummy_block = + { lbl= "dummy" + ; params= Vector.empty + ; cmnd= Vector.empty + ; term= Unreachable + ; parent= dummy_func + ; sort_index= 0 } + + +and dummy_func = + {name= Global.mk "dummy" Typ.i8p; entry= dummy_block; cfg= Vector.empty} + + +module Inst = struct + type t = inst + + let mkLoad ~reg ~ptr ~loc = + assert ( + match Exp.typ ptr with + | Pointer {elt} -> Typ.equal elt (Var.typ reg) && Typ.is_sized elt + | _ -> false ) ; + Load {reg; ptr; loc} + + + let mkStore ~ptr ~exp ~loc = + assert ( + match Exp.typ ptr with + | Pointer {elt} -> Typ.equal elt (Exp.typ exp) && Typ.is_sized elt + | _ -> false ) ; + Store {ptr; exp; loc} + + + let mkMemcpy ~dst ~src ~len ~loc = + assert ( + match (Exp.typ dst, Exp.typ src, Exp.typ len) with + | (Pointer {elt} as ptr1), ptr2, Integer _ -> + Typ.equal ptr1 ptr2 && Typ.is_sized elt + | _ -> false ) ; + Memcpy {dst; src; len; loc} + + + let mkMemmov ~dst ~src ~len ~loc = + assert ( + match (Exp.typ dst, Exp.typ src, Exp.typ len) with + | (Pointer {elt} as ptr1), ptr2, Integer _ -> + Typ.equal ptr1 ptr2 && Typ.is_sized elt + | _ -> false ) ; + Memmov {dst; src; len; loc} + + + let mkMemset ~dst ~byt ~len ~loc = + assert ( + match (Exp.typ dst, Exp.typ byt, Exp.typ len) with + | Pointer {elt}, Integer {bits= 8}, Integer _ -> Typ.is_sized elt + | _ -> false ) ; + Memset {dst; byt; len; loc} + + + let mkAlloc ~reg ~num ~loc = + assert ( + match (Var.typ reg, Exp.typ num) with + | Pointer {elt}, Integer _ -> Typ.is_sized elt + | _ -> false ) ; + Alloc {reg; num; loc} + + + let mkFree ~ptr ~loc = + assert ( + match Exp.typ ptr with + | Pointer {elt} -> Typ.is_sized elt + | _ -> false ) ; + Free {ptr; loc} + + + let mkNondet ~reg ~msg ~loc = + assert (Option.for_all ~f:(Var.typ >> Typ.is_sized) reg) ; + Nondet {reg; msg; loc} + + + let fmt ff inst = + let pf fmt = Format.fprintf ff fmt in + match inst with + | Load {reg; ptr} -> pf "load %a %a;" Var.fmt reg Exp.fmt ptr + | Store {ptr; exp} -> pf "store %a %a;" Exp.fmt ptr Exp.fmt exp + | Memcpy {dst; src; len} -> + pf "memcpy %a %a %a;" Exp.fmt dst Exp.fmt src Exp.fmt len + | Memmov {dst; src; len} -> + pf "memmov %a %a %a;" Exp.fmt dst Exp.fmt src Exp.fmt len + | Memset {dst; byt; len} -> + pf "memset %a %a %a;" Exp.fmt dst Exp.fmt byt Exp.fmt len + | Alloc {reg; num} -> + let[@warning "p"] Typ.Pointer {elt} = Var.typ reg in + pf "alloc %a [%a x %a];" Var.fmt reg Exp.fmt num Typ.fmt elt + | Free {ptr} -> pf "free %a;" Exp.fmt ptr + | Nondet {reg; msg} -> + pf "nondet %a\"%s\";" (option_fmt "%a " Var.fmt) reg msg +end + +let fmt_cmnd = vector_fmt "@ " Inst.fmt + +let fmt_args fmt_arg ff args = + Format.fprintf ff "@ (@[%a@])" (vector_fmt ",@ " fmt_arg) args + + +let fmt_param ff var = + Format.fprintf ff "%a %a" Typ.fmt (Var.typ var) Var.fmt var + + +module Jump = struct + type t = jump + + let mk lbl args = {dst= {dummy_block with lbl}; args; retreating= false} + + let fmt ff {dst= {lbl}; args; retreating} = + Format.fprintf ff "@[<2>%s%%%s%a@]" + (if retreating then "↑" else "") + lbl (fmt_args Exp.fmt) args +end + +module Term = struct + type t = term + + let mkSwitch ~key ~tbl ~els ~loc = + assert (match Exp.typ key with Integer _ -> true | _ -> false) ; + Switch {key; tbl; els; loc} + + + let mkISwitch ~ptr ~tbl ~loc = + assert ( + match Exp.typ ptr with + | Pointer {elt= Integer {bits= 8}} -> true + | _ -> false ) ; + ISwitch {ptr; tbl; loc} + + + let mkCall ~func ~args ~return ~throw ~ignore_result ~loc = + assert ( + match Exp.typ func with + | Pointer {elt= Function {args= typs}} -> + Vector.for_all2_exn typs args ~f:(fun typ arg -> + Typ.equal typ (Exp.typ arg) ) + | _ -> false ) ; + Call + { call= {dst= func; args; retreating= false} + ; return + ; throw + ; ignore_result + ; loc } + + + let mkReturn ~exp ~loc = Return {exp; loc} + + let mkThrow ~exc ~loc = Throw {exc; loc} + + let mkUnreachable = Unreachable + + let fmt ff term = + let pf fmt = Format.fprintf ff fmt in + let fmt_goto ff jmp = Format.fprintf ff "goto %a;" Jump.fmt jmp in + match term with + | Switch {key; tbl; els} -> ( + match Vector.to_array tbl with + | [||] -> pf "%a" fmt_goto els + | [|(z, jmp)|] when Z.equal Z.one z -> + pf "@[if (%a)@;<1 2>%a@ @[else@;<1 2>%a@]@]" Exp.fmt key fmt_goto + jmp fmt_goto els + | _ -> + pf "@[<2>switch %a@ @[%a@ else: %a@]@]" Exp.fmt key + (vector_fmt "@ " (fun ff (z, jmp) -> + Format.fprintf ff "%a: %a" Z.pp_print z fmt_goto jmp )) + tbl fmt_goto els ) + | ISwitch {ptr; tbl} -> + pf "@[<2>iswitch %a@ @[%a@]@]" Exp.fmt ptr + (vector_fmt "@ " (fun ff ({dst= {lbl}; _} as jmp) -> + Format.fprintf ff "%s: %a" lbl fmt_goto jmp )) + tbl + | Call {call= {dst; args; retreating}; return; throw} -> + pf "@[<2>@[<7>call @[<2>%s%a%a@]@]@ @[returnto %a%a;@]@]" + (if retreating then "↑" else "") + Exp.fmt dst (fmt_args Exp.fmt) args Jump.fmt return + (option_fmt "@ throwto %a" Jump.fmt) + throw + | Return {exp} -> pf "return%a" (option_fmt " %a" Exp.fmt) exp + | Throw {exc} -> pf "throw %a" Exp.fmt exc + | Unreachable -> pf "unreachable" +end + +module Block = struct + type t = block [@@deriving sexp] + + let mk ~lbl ~params ~cmnd ~term = + assert ( + not (List.contains_dup (Vector.to_list params) ~compare:Var.compare) + ) ; + {dummy_block with lbl; params; cmnd; term} + + + (* blocks in a [t] are uniquely identified by [sort_index] *) + let compare x y = Int.compare x.sort_index y.sort_index + + let hash {sort_index} = Int.hash sort_index + + let fmt ff {lbl; params; cmnd; term; sort_index} = + Format.fprintf ff "@[@[<4>%s%a@]: #%i@ @[%a%t%a@]@]" lbl + (fmt_args fmt_param) params sort_index fmt_cmnd cmnd + (fun ff -> if Vector.is_empty cmnd then () else Format.fprintf ff "@ ") + Term.fmt term +end + +module Func = struct + type t = func + + let find functions func = + Vector.find functions ~f:(fun {name} -> Global.equal func name) + + + let is_undefined = function + | {entry= {cmnd; term= Unreachable}} -> Vector.is_empty cmnd + | _ -> false + + + let fold_term {entry; cfg} ~init ~f = + let fold_block {term} ~init ~f = f init term in + Vector.fold cfg ~init:(fold_block entry ~init ~f) ~f:(fun z k -> + fold_block k ~init:z ~f ) + + + let mk ~name ~entry ~cfg = + let func = {name; entry; cfg} in + let resolve_parent_and_jumps block = + block.parent <- func ; + let lookup cfg lbl : block = + Vector.find_exn cfg ~f:(fun k -> String.equal lbl k.lbl) + in + let set_dst jmp = jmp.dst <- lookup cfg jmp.dst.lbl in + match block.term with + | Switch {tbl; els} -> + Vector.iter tbl ~f:(fun (_, jmp) -> set_dst jmp) ; + set_dst els + | ISwitch {tbl} -> Vector.iter tbl ~f:set_dst + | Call {return; throw} -> + set_dst return ; + Option.iter throw ~f:set_dst + | Return _ | Throw _ | Unreachable -> () + in + resolve_parent_and_jumps entry ; + Vector.iter cfg ~f:resolve_parent_and_jumps ; + assert ( + not + (List.contains_dup (Vector.to_list cfg) ~compare:(fun b1 b2 -> + String.compare b1.lbl b2.lbl )) ) ; + assert ( + let check_jump ?retn_typ {dst= {params}; args} = + let rev_frml_typs = + Vector.fold params ~init:[] ~f:(fun rev_frml_typs frml -> + Var.typ frml :: rev_frml_typs ) + in + let rev_actl_typs = + Option.to_list retn_typ + @ Vector.fold args ~init:[] ~f:(fun rev_actl_typs actl -> + Exp.typ actl :: rev_actl_typs ) + in + List.iter2_exn rev_frml_typs rev_actl_typs ~f:(fun frml actl -> + assert (Typ.equal frml actl && Typ.is_sized frml) ) + in + fold_term func ~init:() ~f:(fun () -> function + | Switch {tbl; els} -> + Vector.iter tbl ~f:(fun (_, jmp) -> check_jump jmp) ; + check_jump els + | ISwitch {tbl} -> Vector.iter tbl ~f:check_jump + | Call {call= {dst; args= actls}; return; throw; ignore_result} -> ( + match Exp.typ dst with + | Pointer {elt= Function {args= frmls; return= retn_typ}} -> + Vector.iter2_exn frmls actls ~f:(fun frml actl -> + assert (Typ.equal frml (Exp.typ actl)) ) ; + check_jump return + ?retn_typ:(if ignore_result then None else retn_typ) ; + Option.iter throw ~f:(fun throw -> + check_jump throw ~retn_typ:Typ.i8p ) + | _ -> assert false ) + | Return {exp} -> ( + let typ = Option.map exp ~f:Exp.typ in + match Global.typ name with + | Pointer {elt= Function {return}} -> + assert (Option.equal Typ.equal typ return) + | _ -> assert false ) + | Throw {exc} -> assert (Typ.equal Typ.i8p (Exp.typ exc)) + | Unreachable -> () ) ; + true ) ; + func + + + let mk_undefined ~name ~params = + let entry = + Block.mk ~lbl:"" ~params ~cmnd:Vector.empty ~term:Term.mkUnreachable + in + let cfg = Vector.empty in + mk ~name ~entry ~cfg + + + let fmt ff ({name; entry= {params; cmnd; term; sort_index}; cfg} as func) = + let fmt_if cnd str ff = if cnd then Format.fprintf ff str in + let[@warning "p"] Typ.Pointer {elt= Function {return}} = + Global.typ name + in + Format.fprintf ff "@[@[%a@[<2>%a%a@]%t@]" + (option_fmt "%a " Typ.fmt) + return Global.fmt name (fmt_args fmt_param) params (fun ff -> + if is_undefined func then Format.fprintf ff " #%i@]" sort_index + else + Format.fprintf ff " { #%i @;<1 4>@[%a@ %a@]%t%a@]@ }" + sort_index fmt_cmnd cmnd Term.fmt term + (fmt_if (not (Vector.is_empty cfg)) "@ @ ") + (vector_fmt "@\n@\n " Block.fmt) + cfg ) +end + +module Block_id = struct + type t = block [@@deriving sexp] + + (* block labels within a function are unique *) + let compare x y = + [%compare : string * Global.t] + (x.lbl, x.parent.name) (y.lbl, y.parent.name) + + + let hash b = Hashtbl.hash (b.lbl, b.parent.name) +end + +module BS = Set.Make (Block_id) +module BQ = Hash_queue.Make (Block_id) +module FQ = Hash_queue.Make (Global) + +let set_derived_metadata functions = + let compute_roots functions = + let roots = FQ.create () in + Array.iter functions ~f:(fun func -> FQ.enqueue_exn roots func.name func) ; + Array.iter functions ~f:(fun func -> + Func.fold_term func ~init:() ~f:(fun () -> function + | Call {call= {dst}} -> ( + match Global.of_exp dst with + | Some callee -> FQ.remove roots callee |> ignore + | None -> () ) + | _ -> () ) ) ; + roots + in + let topsort functions roots = + let tips_to_roots = BQ.create () in + let rec visit ancestors func src = + if not (BQ.mem tips_to_roots src) then ( + let ancestors = BS.add ancestors src in + let jump jmp = + if BS.mem ancestors jmp.dst then jmp.retreating <- true + else visit ancestors func jmp.dst + in + ( match src.term with + | Switch {tbl; els} -> + Vector.iter tbl ~f:(fun (_, jmp) -> jump jmp) ; + jump els + | ISwitch {tbl} -> Vector.iter tbl ~f:jump + | Call {call= {dst} as call; return; throw} -> + ( match Global.of_exp dst with + | Some name -> ( + match Func.find (Vector.of_array functions) name with + | Some func -> + if BS.mem ancestors func.entry then + call.retreating <- true + else visit ancestors func func.entry + | None -> + fail "Call to unknown function: %a" Global.fmt name () ) + | None -> + (* conservatively assume all virtual calls are recursive *) + call.retreating <- true ) ; + jump return ; Option.iter ~f:jump throw + | Return _ | Throw _ | Unreachable -> () ) ; + BQ.enqueue_exn tips_to_roots src () ) + in + FQ.iter roots ~f:(fun root -> visit BS.empty root root.entry) ; + tips_to_roots + in + let set_sort_indices tips_to_roots = + let index = ref (BQ.length tips_to_roots) in + BQ.iteri tips_to_roots ~f:(fun ~key:block ~data:_ -> + block.sort_index <- !index ; + decr index ) + in + let sort_cfgs functions = + Array.iter functions ~f:(fun {cfg} -> + Array.sort + ~cmp:(fun x y -> Int.compare x.sort_index y.sort_index) + (Vector.to_array cfg) ) + in + let sort_functions functions = + Array.sort + ~cmp:(fun x y -> Int.compare x.entry.sort_index y.entry.sort_index) + functions + in + let functions = Array.of_list functions in + let roots = compute_roots functions in + let tips_to_roots = topsort functions roots in + set_sort_indices tips_to_roots ; + sort_cfgs functions ; + sort_functions functions ; + Vector.of_array functions + + +let mk ~typ_defns ~globals ~functions = + assert ( + not + (List.contains_dup typ_defns ~compare:(fun (s: Typ.t) (t: Typ.t) -> + match (s, t) with + | ( (Struct {name= n1} | Opaque {name= n1}) + , (Struct {name= n2} | Opaque {name= n2}) ) -> + String.compare n1 n2 + | _ -> Typ.compare s t )) + && not + (List.contains_dup globals ~compare:(fun g1 g2 -> + String.compare (Global.name g1) (Global.name g2) )) + && not + (List.contains_dup functions ~compare:(fun f1 f2 -> + String.compare (Global.name f1.name) (Global.name f2.name) )) + ) ; + { typ_defns + ; globals= Vector.of_list_rev globals + ; functions= set_derived_metadata functions } + + +let fmt ff {typ_defns; globals; functions} = + Format.fprintf ff "@[@[%a@]@ @ @ @[%a@]@ @ @ @[%a@]@]" + (list_fmt "@\n@\n" Typ.fmt_defn) + typ_defns + (vector_fmt "@\n@\n" Global.fmt_defn) + globals + (vector_fmt "@\n@\n" Func.fmt) + functions diff --git a/sledge/src/llair/llair.mli b/sledge/src/llair/llair.mli new file mode 100644 index 000000000..c3925b827 --- /dev/null +++ b/sledge/src/llair/llair.mli @@ -0,0 +1,202 @@ +(* Copyright (c) 2018 - present Facebook, Inc. All rights reserved. + + This source code is licensed under the BSD style license found in the + LICENSE file in the root directory of this source tree. An additional + grant of patent rights can be found in the PATENTS file in the same + directory. *) + +(** Programs / translation units + + LLAIR (Low-Level Analysis Internal Representation) is an IR optimized + for static analysis using a low-level model of memory. Compared to a + compiler IR such as LLVM, an analyzer does not need to perform register + allocation, instruction selection, code generation, etc. or even much + code transformation, so the constraints on the IR are very different. + + LLAIR is a "Functional SSA" form where control transfers pass arguments + instead of using ϕ-nodes. An analyzer will need good support for + parameter passing anyhow, and ϕ-nodes make it hard to express program + properties as predicates on states, since some execution history is + needed to evaluate ϕ instructions. SSA form is beneficial for analysis + as it means that all "modified variables" side-conditions on program + logic rules trivially hold. An alternative view is that the scope of + variables [reg] assigned in instructions such as [Load] is the successor + block as well as all blocks the instruction dominates in the + control-flow graph. This language is first-order, and a term structure + for the code constituting the scope of variables is not needed, so SSA + and not CPS suffices. + + Additionally, the focus on memory analysis leads to a design where the + arithmetic and logic operations are not "instructions" but instead are + complex expressions (see [Exp]) that refer to registers (see [Var]). *) + +(** Instructions for memory manipulation or other non-control effects. *) +type inst = private + | Load of {reg: Var.t; ptr: Exp.t; loc: Loc.t} + (** Read the contents of memory at address [ptr] into [reg]. *) + | Store of {ptr: Exp.t; exp: Exp.t; loc: Loc.t} + (** Write [exp] into memory at address [ptr]. *) + | Memcpy of {dst: Exp.t; src: Exp.t; len: Exp.t; loc: Loc.t} + (** Copy [len] bytes starting from address [src] to [dst], undefined + if ranges overlap. *) + | Memmov of {dst: Exp.t; src: Exp.t; len: Exp.t; loc: Loc.t} + (** Copy [len] bytes starting from address [src] to [dst]. *) + | Memset of {dst: Exp.t; byt: Exp.t; len: Exp.t; loc: Loc.t} + (** Store byte [byt] into [len] memory addresses starting from [dst]. *) + | Alloc of {reg: Var.t; num: Exp.t; loc: Loc.t} + (** Allocate a block of memory large enough to store [num] elements of + type [t] where [reg : t*] and bind [reg] to the first address. *) + | Free of {ptr: Exp.t; loc: Loc.t} + (** Deallocate the previously allocated block at address [ptr]. *) + | Nondet of {reg: Var.t option; msg: string; loc: Loc.t} + (** Bind [reg] to an arbitrary value of its type, representing + non-deterministic approximation of behavior described by [msg]. *) + +(** A (straight-line) command is a sequence of instructions. *) +type cmnd = inst vector + +(** A label is a name of a block. *) +type label = string + +(** A jump with arguments. *) +type 'a control_transfer = + { mutable dst: 'a + ; args: Exp.t vector + ; mutable retreating: bool + (** Holds if [dst] is an ancestor in a depth-first traversal. *) } + +(** A jump with arguments to a block. *) +type jump = block control_transfer + +(** Block terminators for function call/return or other control transfers. *) +and term = private + | Switch of {key: Exp.t; tbl: (Z.t * jump) vector; els: jump; loc: Loc.t} + (** Invoke the [jump] in [tbl] associated with the integer [z] which + is equal to [key], if any, otherwise invoke [els]. *) + | ISwitch of {ptr: Exp.t; tbl: jump vector; loc: Loc.t} + (** Invoke the [jump] in [tbl] whose [dst] is equal to [ptr]. *) + | Call of + { call: Exp.t control_transfer (** [Global] for non-virtual call. *) + ; return: jump (** Return destination or trampoline. *) + ; throw: jump option (** Handler destination or trampoline. *) + ; ignore_result: bool (** Drop return value when invoking return. *) + ; loc: Loc.t } + (** Call function [call.dst] with arguments [call.args]. *) + | Return of {exp: Exp.t option; loc: Loc.t} + (** Invoke [return] of the dynamically most recent [Call]. *) + | Throw of {exc: Exp.t; loc: Loc.t} + (** Invoke [throw] of the dynamically most recent [Call] with [throw] + not [None]. *) + | Unreachable + (** Halt as control is assumed to never reach [Unreachable]. *) + +(** A block is a destination of a jump with arguments, contains code. *) +and block = private + { lbl: label + ; params: Var.t vector + ; cmnd: cmnd + ; term: term + ; mutable parent: func + ; mutable sort_index: int + (** Position in a topological order, ignoring [retreating] edges. *) + } + +and cfg + +(** A function is a control-flow graph with distinguished entry block, whose + arguments are the function arguments. *) +and func = private {name: Global.t; entry: block; cfg: cfg} + +type t = private + { typ_defns: Typ.t list (** Type definitions. *) + ; globals: Global.t vector (** Global variable definitions. *) + ; functions: func vector (** (Global) function definitions. *) } + +val mk : + typ_defns:Typ.t list -> globals:Global.t list -> functions:func list -> t + +val fmt : t fmt + +module Inst : sig + type t = inst + + val mkLoad : reg:Var.t -> ptr:Exp.t -> loc:Loc.t -> inst + + val mkStore : ptr:Exp.t -> exp:Exp.t -> loc:Loc.t -> inst + + val mkMemcpy : dst:Exp.t -> src:Exp.t -> len:Exp.t -> loc:Loc.t -> inst + + val mkMemmov : dst:Exp.t -> src:Exp.t -> len:Exp.t -> loc:Loc.t -> inst + + val mkMemset : dst:Exp.t -> byt:Exp.t -> len:Exp.t -> loc:Loc.t -> inst + + val mkAlloc : reg:Var.t -> num:Exp.t -> loc:Loc.t -> inst + + val mkFree : ptr:Exp.t -> loc:Loc.t -> inst + + val mkNondet : reg:Var.t option -> msg:string -> loc:Loc.t -> inst + + val fmt : t fmt +end + +module Jump : sig + type t = jump + + val mk : string -> Exp.t vector -> jump + + val fmt : jump fmt +end + +module Term : sig + type t = term + + val mkSwitch : + key:Exp.t -> tbl:(Z.t * jump) vector -> els:jump -> loc:Loc.t -> term + + val mkISwitch : ptr:Exp.t -> tbl:jump vector -> loc:Loc.t -> term + + val mkCall : + func:Exp.t -> args:Exp.t vector -> return:jump -> throw:jump option + -> ignore_result:bool -> loc:Loc.t -> term + + val mkReturn : exp:Exp.t option -> loc:Loc.t -> term + + val mkThrow : exc:Exp.t -> loc:Loc.t -> term + + val mkUnreachable : term + + val fmt : t fmt +end + +module Block : sig + type t = block + + val mk : + lbl:label -> params:Var.t vector -> cmnd:cmnd -> term:term -> block + + val fmt : t fmt + + val compare : t -> t -> int + + val hash : t -> int + + val sexp_of_t : t -> Sexp.t + + val t_of_sexp : Sexp.t -> t +end + +module Func : sig + type t = func + + val mk : name:Global.t -> entry:block -> cfg:block vector -> func + + val mk_undefined : name:Global.t -> params:Var.t vector -> t + + val find : func vector -> Global.t -> func option + (** Look up a function of the given name in the given functions. *) + + val is_undefined : func -> bool + (** Holds of functions that are declared but not defined. *) + + val fmt : t fmt +end diff --git a/sledge/src/llair/loc.ml b/sledge/src/llair/loc.ml new file mode 100644 index 000000000..3be9e77c2 --- /dev/null +++ b/sledge/src/llair/loc.ml @@ -0,0 +1,30 @@ +(* Copyright (c) 2018 - present Facebook, Inc. All rights reserved. + + This source code is licensed under the BSD style license found in the + LICENSE file in the root directory of this source tree. An additional + grant of patent rights can be found in the PATENTS file in the same + directory. *) + +(** Source code debug locations *) + +type t = {dir: string; file: string; line: int; col: int} +[@@deriving compare, sexp] + +let none = {dir= ""; file= ""; line= 0; col= 0} + +let mk ?(dir= none.dir) ?(file= none.file) ?(col= none.col) ~line = + {dir; file; line; col} + + +let fmt ff {dir; file; line; col} = + Format.pp_print_string ff dir ; + if not (String.is_empty dir) then + Format.pp_print_string ff Filename.dir_sep ; + Format.pp_print_string ff file ; + if not (String.is_empty file) then Format.pp_print_char ff ':' ; + if line > 0 then ( + Format.pp_print_int ff line ; + Format.pp_print_char ff ':' ) ; + if col > 0 then ( + Format.pp_print_int ff col ; + Format.pp_print_char ff ':' ) diff --git a/sledge/src/llair/loc.mli b/sledge/src/llair/loc.mli new file mode 100644 index 000000000..fb7fbb7da --- /dev/null +++ b/sledge/src/llair/loc.mli @@ -0,0 +1,22 @@ +(* Copyright (c) 2018 - present Facebook, Inc. All rights reserved. + + This source code is licensed under the BSD style license found in the + LICENSE file in the root directory of this source tree. An additional + grant of patent rights can be found in the PATENTS file in the same + directory. *) + +(** Source code debug locations *) + +type t = {dir: string; file: string; line: int; col: int} + +val compare : t -> t -> int + +val t_of_sexp : Sexp.t -> t + +val sexp_of_t : t -> Sexp.t + +val fmt : t fmt + +val none : t + +val mk : ?dir:string -> ?file:string -> ?col:int -> line:int -> t diff --git a/sledge/src/llair/typ.ml b/sledge/src/llair/typ.ml new file mode 100644 index 000000000..27324b4a6 --- /dev/null +++ b/sledge/src/llair/typ.ml @@ -0,0 +1,142 @@ +(* Copyright (c) 2018 - present Facebook, Inc. All rights reserved. + + This source code is licensed under the BSD style license found in the + LICENSE file in the root directory of this source tree. An additional + grant of patent rights can be found in the PATENTS file in the same + directory. *) + +(** Types *) + +type t = + | Function of {return: t option; args: t vector} + | Integer of {bits: int} + | Float of {bits: int; enc: [`IEEE | `Extended | `Pair]} + | Pointer of {elt: t} + | Array of {elt: t; len: int} + | Tuple of {elts: t vector; packed: bool} + | Struct of + { name: string + ; elts: t vector [@compare.ignore] (* possibly cyclic, name unique *) + ; packed: bool } + | Opaque of {name: string} + | Bytes +[@@deriving compare, sexp] + +let equal x y = compare x y = 0 + +let rec fmt ff typ = + match typ with + | Function {return; args} -> + Format.fprintf ff "@[%a@ (@[%a@])@]" (option_fmt "%a" fmt) return fmts + args + | Integer {bits} -> Format.fprintf ff "@[i%i@]" bits + | Float {bits; enc} -> + let fmt_enc ff = function + | `IEEE -> () + | `Extended -> Format.pp_print_string ff "extend" + | `Pair -> Format.pp_print_string ff "pair" + in + Format.fprintf ff "@[f%i%a@]" bits fmt_enc enc + | Pointer {elt} -> Format.fprintf ff "@[%a*@]" fmt elt + | Array {elt; len} -> Format.fprintf ff "@[[%i x %a]@]" len fmt elt + | Tuple {elts; packed} -> + let opn, cls = if packed then ("<{", "}>") else ("{", "}") in + Format.fprintf ff "@[%s @[%a@] %s@]" opn fmts elts cls + | Struct {name} | Opaque {name} -> Format.fprintf ff "@[%%%s@]" name + | Bytes -> Format.fprintf ff "bytes" + + +and fmts ff typs = vector_fmt ",@ " fmt ff typs + +let fmt_defn ff = function + | Struct {name; elts; packed} -> + let opn, cls = if packed then ("<{", "}>") else ("{", "}") in + Format.fprintf ff "@[<2>%%%s =@ @[%s %a@] %s@]" name opn fmts elts cls + | Opaque {name} -> Format.fprintf ff "@[<2>%%%s =@ opaque@]" name + | typ -> fmt ff typ + + +let is_sized = function + | Function _ | Bytes -> false + | Integer _ | Float _ | Pointer _ | Array _ | Tuple _ | Struct _ -> true + | Opaque _ -> + (* This is optimisic since sizedness of Opaque types is indeterminate, + as they are not sized but may become sized through linking. *) + true + + +let rec prim_bit_size_of = function + | Integer {bits} | Float {bits} -> Some bits + | Array {elt; len} -> + Option.map (prim_bit_size_of elt) ~f:(fun n -> n * len) + | Opaque _ | Function _ | Pointer _ | Tuple _ | Struct _ | Bytes -> None + + +let rec compatible t0 t1 = + match (t0, t1, prim_bit_size_of t0, prim_bit_size_of t1) with + | ( (Integer _ | Float _ | Pointer _) + , (Integer _ | Float _ | Pointer _) + , _ + , _ ) -> + true + | Array {elt= t; len= m}, Array {elt= u; len= n}, _, _ + when m = n && compatible t u -> + true + | ( (Integer _ | Float _ | Pointer _ | Array _) + , (Integer _ | Float _ | Pointer _ | Array _) + , Some s0 + , Some s1 ) + when s0 = s1 -> + true + | _ -> false + + +let mkFunction ~return ~args = + assert ( + Option.for_all ~f:is_sized return && Vector.for_all ~f:is_sized args ) ; + Function {return; args} + + +let mkInteger ~bits = Integer {bits} + +let mkFloat ~bits ~enc = Float {bits; enc} + +let mkPointer ~elt = Pointer {elt} + +let mkArray ~elt ~len = + assert (is_sized elt) ; + Array {elt; len} + + +let defns : (string, t) Hashtbl.t = Hashtbl.create (module String) () + +let mkTuple ~packed elts = + assert (Vector.for_all ~f:is_sized elts) ; + Tuple {elts; packed} + + +let mkStruct ~name ~packed elt_thks = + match Hashtbl.find defns name with + | Some typ -> typ + | None -> + (* Add placeholder defn to prevent computing [elts] in calls to + [mkStruct] from [elts] for recursive occurrences of [name]. *) + let elts = + Array.create ~len:(Vector.length elt_thks) (mkInteger ~bits:0) + in + let typ = Struct {name; elts= Vector.of_array elts; packed} in + Hashtbl.set defns ~key:name ~data:typ ; + Vector.iteri elt_thks ~f:(fun i elt_thk -> + let elt = Lazy.force elt_thk in + assert (is_sized elt) ; + elts.(i) <- elt ) ; + typ + + +let mkOpaque ~name = Opaque {name} + +let mkBytes = Bytes + +let i1 = mkInteger ~bits:1 + +let i8p = mkPointer ~elt:(mkInteger ~bits:8) diff --git a/sledge/src/llair/typ.mli b/sledge/src/llair/typ.mli new file mode 100644 index 000000000..c440a385a --- /dev/null +++ b/sledge/src/llair/typ.mli @@ -0,0 +1,76 @@ +(* Copyright (c) 2018 - present Facebook, Inc. All rights reserved. + + This source code is licensed under the BSD style license found in the + LICENSE file in the root directory of this source tree. An additional + grant of patent rights can be found in the PATENTS file in the same + directory. *) + +(** Types *) + +type t = private + | Function of {return: t option; args: t vector} + (** (Global) function names have type Pointer to Function. *) + | Integer of {bits: int} (** Integer of given bitwidth *) + | Float of {bits: int; enc: [`IEEE | `Extended | `Pair]} + (** Floating-point numbers of given bitwidth and encoding *) + | Pointer of {elt: t} (** Pointer to element type *) + | Array of {elt: t; len: int} + (** Statically-sized array of [len] elements of type [elt]. *) + | Tuple of {elts: t vector; packed: bool} + (** Anonymous aggregate of heterogeneous types. *) + | Struct of {name: string; elts: t vector; packed: bool} + (** Uniquely named aggregate of heterogeneous types. Every cycle of + recursive types contains a [Struct]. NOTE: recursive [Struct] + types are represented by cyclic values. *) + | Opaque of {name: string} + (** Uniquely named aggregate type whose definition is hidden. *) + | Bytes (** Dynamically-sized byte-array. *) + +val compare : t -> t -> int + +val equal : t -> t -> bool + +val t_of_sexp : Sexp.t -> t + +val sexp_of_t : t -> Sexp.t + +val fmt : t fmt + +val fmt_defn : t fmt + +(** Constructors *) + +val mkFunction : return:t option -> args:t vector -> t + +val mkInteger : bits:int -> t + +val mkFloat : bits:int -> enc:[`Extended | `IEEE | `Pair] -> t + +val mkPointer : elt:t -> t + +val mkArray : elt:t -> len:int -> t + +val mkTuple : packed:bool -> t vector -> t + +val mkStruct : name:string -> packed:bool -> t lazy_t vector -> t + +val mkOpaque : name:string -> t + +val mkBytes : t + +(** Special types *) + +val i1 : t +(** Booleans are represented by 1-bit integers. *) + +val i8p : t +(** Byte-pointers are effectively a universal type. *) + +(** Queries *) + +val compatible : t -> t -> bool +(** Compatible types are those that can be cast or converted between, + perhaps with some loss of information. *) + +val is_sized : t -> bool +(** Holds of types which are first-class and have a statically-known size. *) diff --git a/sledge/src/llair/var.ml b/sledge/src/llair/var.ml new file mode 100644 index 000000000..bba50a6a1 --- /dev/null +++ b/sledge/src/llair/var.ml @@ -0,0 +1,10 @@ +(* Copyright (c) 2018 - present Facebook, Inc. All rights reserved. + + This source code is licensed under the BSD style license found in the + LICENSE file in the root directory of this source tree. An additional + grant of patent rights can be found in the PATENTS file in the same + directory. *) + +(** Variables *) + +include Exp.Var diff --git a/sledge/src/llair/var.mli b/sledge/src/llair/var.mli new file mode 100644 index 000000000..f85a34e94 --- /dev/null +++ b/sledge/src/llair/var.mli @@ -0,0 +1,10 @@ +(* Copyright (c) 2018 - present Facebook, Inc. All rights reserved. + + This source code is licensed under the BSD style license found in the + LICENSE file in the root directory of this source tree. An additional + grant of patent rights can be found in the PATENTS file in the same + directory. *) + +(** Variables *) + +include module type of Exp.Var diff --git a/sledge/src/ppx_trace/jbuild b/sledge/src/ppx_trace/jbuild new file mode 100644 index 000000000..b7f37d17f --- /dev/null +++ b/sledge/src/ppx_trace/jbuild @@ -0,0 +1,5 @@ +(library + ((name ppx_trace) + (kind ppx_rewriter) + (preprocess no_preprocessing) + (libraries (ppx_core ppx_driver)))) diff --git a/sledge/src/ppx_trace/ppx_trace.ml b/sledge/src/ppx_trace/ppx_trace.ml new file mode 100644 index 000000000..a251d6206 --- /dev/null +++ b/sledge/src/ppx_trace/ppx_trace.ml @@ -0,0 +1,120 @@ +(* Copyright (c) 2018 - present Facebook, Inc. All rights reserved. + + This source code is licensed under the BSD style license found in the + LICENSE file in the root directory of this source tree. An additional + grant of patent rights can be found in the PATENTS file in the same + directory. *) + +(** Extension point rewriter for debug trace logging + + This ppx rewriter declares a [--debug] command line option, to be passed + by the build system in debug but not optimized build modes. + + It rewrites [[%Trace.info f]] to a call [Trace.info mod_name fun_name f] + where [mod_name] and [fun_name] are the enclosing module and function + names in the parsetree. This is only done in debug mode, if [--debug] is + not passed, then [[%Trace.info f]] is rewritten to [()]. + + Similarly, [[%Trace.call]] is rewritten to a call to [Trace.call] or + [()], and [[%Trace.retn]] to a call to [Trace.retn] or [Fn.id]. + + For example, this enables writing + + [[%Trace.call fun pf -> pf "%a" fmt_arg_type arg] ; f arg |> + [%Trace.retn fun pf -> pf "%a" fmt_result_type]] + + to trace calls to [f] in debug mode while completely compiling out the + debug code in non-debug builds. + + This mechanism can also be used e.g. to dynamically check assertions + only in debug mode. + + Additionally, [[%debug]] is rewritten to the compile-time boolean + constant determined by whether or not [--debug] is passed. *) + +open Ppx_core +open Ast_builder.Default +module Ast_mapper = Selected_ast.Ast.Ast_mapper + +let debug = ref false + +;; Ppx_driver.add_arg "--debug" (Caml.Arg.Set debug) + ~doc:"Enable debug tracing output" + +let rec get_fun_name pat = + match pat.ppat_desc with + | Ppat_var {txt; _} -> txt + | Ppat_alias (pat, _) | Ppat_constraint (pat, _) -> get_fun_name pat + | _ -> + Location.raise_errorf ~loc:pat.ppat_loc + "Unexpected pattern in binding containing [%%Trace]: %a" + (fun f p -> + Ocaml_common.Pprintast.pattern f + (Selected_ast.To_ocaml.copy_pattern p) ) + pat + + +let vb_stack_with, vb_stack_top = + let stack = ref [] in + let with_ x ~f = + stack := x :: !stack ; + let r = f () in + stack := List.tl_exn !stack ; + r + in + let top () = List.hd_exn !stack in + (with_, top) + + +let mapper = + let value_binding (m: Ast_mapper.mapper) vb = + vb_stack_with vb.pvb_pat ~f:(fun () -> + Ast_mapper.default_mapper.value_binding m vb ) + in + let expr (m: Ast_mapper.mapper) exp = + let append_here_args args = + let mod_name = evar ~loc:Location.none "Caml.__MODULE__" in + let fun_name = + estring ~loc:Location.none (get_fun_name (vb_stack_top ())) + in + (Nolabel, mod_name) :: (Nolabel, fun_name) :: args + in + match exp.pexp_desc with + | Pexp_extension ({txt= "debug"; loc}, PStr []) -> ebool ~loc !debug + | Pexp_extension + ( {txt= "Trace.call"; loc= call_loc} + , PStr [{pstr_desc= Pstr_eval (call_fun, []); _}] ) -> + if not !debug then eunit ~loc:exp.pexp_loc + else + pexp_apply ~loc:exp.pexp_loc + (evar ~loc:call_loc "Trace.call") + (append_here_args [(Nolabel, call_fun)]) + | Pexp_extension + ( {txt= "Trace.info"; loc= info_loc} + , PStr [{pstr_desc= Pstr_eval (arg, []); _}] ) -> + if not !debug then eunit ~loc:exp.pexp_loc + else + let args = + match arg.pexp_desc with + | Pexp_apply (op, args) -> (Nolabel, op) :: args + | _ -> [(Nolabel, arg)] + in + pexp_apply ~loc:exp.pexp_loc + (evar ~loc:info_loc "Trace.info") + (append_here_args args) + | Pexp_extension + ( {txt= "Trace.retn"; loc= retn_loc} + , PStr [{pstr_desc= Pstr_eval (retn_fun, []); _}] ) -> + if not !debug then evar ~loc:exp.pexp_loc "Fn.id" + else + pexp_apply ~loc:exp.pexp_loc + (evar ~loc:retn_loc "Trace.retn") + (append_here_args [(Nolabel, retn_fun)]) + | _ -> Ast_mapper.default_mapper.expr m exp + in + {Ast_mapper.default_mapper with expr; value_binding} + + +let impl = Selected_ast.Ast.map_structure mapper + +;; Ppx_driver.register_transformation "trace" ~impl diff --git a/sledge/src/ppx_trace/ppx_trace.mli b/sledge/src/ppx_trace/ppx_trace.mli new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/sledge/src/ppx_trace/ppx_trace.mli @@ -0,0 +1 @@ + diff --git a/sledge/src/sledge.ml b/sledge/src/sledge.ml new file mode 100644 index 000000000..d65415ec3 --- /dev/null +++ b/sledge/src/sledge.ml @@ -0,0 +1,36 @@ +(* Copyright (c) 2018 - present Facebook, Inc. All rights reserved. + + This source code is licensed under the BSD style license found in the + LICENSE file in the root directory of this source tree. An additional + grant of patent rights can be found in the PATENTS file in the same + directory. *) + +(** Sledge executable entry point *) + +let main ~input ~output = + try + let program = Frontend.translate input in + Trace.flush () ; + Option.iter output ~f:(function + | "-" -> Format.printf "%a@." Llair.fmt program + | filename -> + Out_channel.with_file filename ~f:(fun oc -> + let ff = Format.formatter_of_out_channel oc in + Format.fprintf ff "%a@." Llair.fmt program ) ) ; + Format.printf "@\nRESULT: Success@." + with exn -> + let bt = Caml.Printexc.get_raw_backtrace () in + Trace.flush () ; + ( match exn with + | Frontend.Invalid_llvm msg -> + Format.printf "@\nRESULT: Invalid input: %s@." msg + | Unimplemented msg -> + Format.printf "@\nRESULT: Unimplemented: %s@." msg + | Failure msg -> Format.printf "@\nRESULT: Internal error: %s@." msg + | _ -> + Format.printf "@\nRESULT: Unknown error: %s@." + (Caml.Printexc.to_string exn) ) ; + Caml.Printexc.raise_with_backtrace exn bt + + +;; Config.run main diff --git a/sledge/src/sledge.mli b/sledge/src/sledge.mli new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/sledge/src/sledge.mli @@ -0,0 +1 @@ + diff --git a/sledge/src/trace/jbuild.in b/sledge/src/trace/jbuild.in new file mode 100644 index 000000000..2f813edad --- /dev/null +++ b/sledge/src/trace/jbuild.in @@ -0,0 +1,13 @@ +(* -*- tuareg -*- *) + +let deps = ["import"] + +;; Jbuild_plugin.V1.send @@ Format.sprintf " +(library + ((name trace) + (public_name llair.trace) + %s + (libraries (%s)))) +" +(flags deps) +(libraries deps) diff --git a/sledge/src/trace/trace.ml b/sledge/src/trace/trace.ml new file mode 100644 index 000000000..258c129dd --- /dev/null +++ b/sledge/src/trace/trace.ml @@ -0,0 +1,58 @@ +(* Copyright (c) 2018 - present Facebook, Inc. All rights reserved. + + This source code is licensed under the BSD style license found in the + LICENSE file in the root directory of this source tree. An additional + grant of patent rights can be found in the PATENTS file in the same + directory. *) + +(** Debug trace logging *) + +type 'a printf = ('a, Format.formatter, unit) format -> 'a + +let ff = Format.err_formatter + +let flush = Format.pp_print_newline ff + +let margin = 100 + +let trace_all = ref false + +let init ~trace_all:ta = + Format.set_margin margin ; + Format.set_max_indent (margin - 1) ; + Format.pp_set_margin ff margin ; + Format.pp_set_max_indent ff (margin - 1) ; + Format.pp_open_vbox ff 0 ; + Caml.at_exit flush ; + trace_all := ta + + +(* selective tracing not yet implemented *) +let enabled _ = !trace_all + +let info mod_name fun_name fmt = + if enabled [fun_name; mod_name] then ( + Format.fprintf ff "@\n@[<2>| " ; + Format.kfprintf (fun ff -> Format.fprintf ff "@]") ff fmt ) + else Format.ifprintf ff fmt + + +let incf rev_prefix name fmt = + if enabled (name :: rev_prefix) then ( + Format.fprintf ff "@\n@[<2>@[( %s: " name ; + Format.kfprintf (fun ff -> Format.fprintf ff "@]") ff fmt ) + else Format.ifprintf ff fmt + + +let decf rev_prefix name fmt = + if enabled (name :: rev_prefix) then ( + Format.fprintf ff "@]@\n@[<2>) %s:@ " name ; + Format.kfprintf (fun ff -> Format.fprintf ff "@]") ff fmt ) + else Format.ifprintf ff fmt + + +let call mod_name fun_name k = k (incf [mod_name] fun_name) + +let retn mod_name fun_name k result = + k (decf [mod_name] fun_name) result ; + result diff --git a/sledge/src/trace/trace.mli b/sledge/src/trace/trace.mli new file mode 100644 index 000000000..9b188a393 --- /dev/null +++ b/sledge/src/trace/trace.mli @@ -0,0 +1,25 @@ +(* Copyright (c) 2018 - present Facebook, Inc. All rights reserved. + + This source code is licensed under the BSD style license found in the + LICENSE file in the root directory of this source tree. An additional + grant of patent rights can be found in the PATENTS file in the same + directory. *) + +(** Debug trace logging *) + +val init : trace_all:bool -> unit +(** Initialize the configuration of debug tracing. *) + +type 'a printf = ('a, Format.formatter, unit) format -> 'a + +val info : string -> string -> 'a printf +(** Emit a message at the current indentation level, if enabled. *) + +val call : string -> string -> ('a printf -> 'b) -> 'b +(** Increase indentation level and emit a message, if enabled. *) + +val retn : string -> string -> ('a printf -> 'b -> unit) -> 'b -> 'b +(** Decrease indentation level and emit a message, if enabled. *) + +val flush : unit -> unit +(** Flush the internal buffers. *) diff --git a/sledge/src/version.ml.in b/sledge/src/version.ml.in new file mode 100644 index 000000000..2d3f8fc0d --- /dev/null +++ b/sledge/src/version.ml.in @@ -0,0 +1,12 @@ +(* Copyright (c) 2018 - present Facebook, Inc. All rights reserved. + + This source code is licensed under the BSD style license found in the + LICENSE file in the root directory of this source tree. An additional + grant of patent rights can be found in the PATENTS file in the same + directory. *) + +(** Version information populated by build system *) + +let debug = [%debug ] + +let version = "%%VERSION%%" ^ if debug then "-dbg" else "-opt" diff --git a/sledge/src/version.mli b/sledge/src/version.mli new file mode 100644 index 000000000..bd9f90240 --- /dev/null +++ b/sledge/src/version.mli @@ -0,0 +1,12 @@ +(* Copyright (c) 2018 - present Facebook, Inc. All rights reserved. + + This source code is licensed under the BSD style license found in the + LICENSE file in the root directory of this source tree. An additional + grant of patent rights can be found in the PATENTS file in the same + directory. *) + +(** Version information populated by build system *) + +val debug : bool + +val version : string diff --git a/sledge/test/Makefile b/sledge/test/Makefile new file mode 100644 index 000000000..6f66814c4 --- /dev/null +++ b/sledge/test/Makefile @@ -0,0 +1,151 @@ +# Copyright (c) 2018 - present Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the BSD style license found in the +# LICENSE file in the root directory of this source tree. An additional grant +# of patent rights can be found in the PATENTS file in the same directory. + +# additional arguments to pass to clang +OPT_ARGS?=-O0 +CLANG_ARGS?=-g $(OPT_ARGS) + +# additional arguments to pass to sledge +SLEDGE_ARGS?= + +# limits for each test run +TIMEOUT?=30 +MEMOUT?=4096 +MEMOUTk=$(shell echo $$(( $(MEMOUT) * 1024 ))) + +# executable to test +SLEDGE_EXE=$(CURDIR)/../_build/dbg/src/sledge.exe + +MODEL_DIR=$(CURDIR)/../model + +# configure the non-host llvm and clang +LLVM_PREFIX=$(HOME)/.local/llvm_$(shell opam config var switch) +export PATH := $(LLVM_PREFIX)/bin:$(PATH) + +# code to test +SrcCs:=$(shell find -L * -not -path 'llvm/*' -name '*.c') +SrcCPPs:=$(shell find -L * -not -path 'llvm/*' -name '*.cpp') +SrcLLs:=$(shell find -L * -name '*.ll') +SrcBCs:=$(shell find -L * -name '*.bc') + +GenBCs:=$(patsubst %.c,%.bc,$(SrcCs)) $(patsubst %.cpp,%.bc,$(SrcCPPs)) +GenLLs:=$(patsubst %.bc,%.ll,$(GenBCs) $(SrcBCs)) + +TestBCs:=$(GenBCs) $(SrcBCs) +TestLLs:=$(SrcLLs) +Tests:=$(TestBCs) $(TestLLs) +Outs:=$(patsubst %.bc,%.bc.out,$(TestBCs)) $(patsubst %.ll,%.ll.out,$(TestLLs)) + +default: test + +$(MODEL_DIR)/cxxabi.bc: $(MODEL_DIR)/cxxabi.cpp + @(cd $(MODEL_DIR) && clang $(CLANG_ARGS) -I../llvm/projects/libcxxabi/include -I../llvm/projects/libcxxabi/src -c -emit-llvm cxxabi.cpp) + +# compile c to llvm bitcode +%.bc : %.c $(MODEL_DIR)/cxxabi.bc + @(cd $(dir $*) && clang $(CLANG_ARGS) -c -emit-llvm $(notdir $*).c -o - | llvm-link $(MODEL_DIR)/cxxabi.bc - | opt $(OPT_ARGS) -o $(notdir $*).bc) + +# compile c++ to llvm bitcode +%.bc : %.cpp $(MODEL_DIR)/cxxabi.bc + @(cd $(dir $*) && clang $(CLANG_ARGS) -c -emit-llvm $(notdir $*).cpp -o - | llvm-link $(MODEL_DIR)/cxxabi.bc - | opt $(OPT_ARGS) -o $(notdir $*).bc) + +# disassemble bitcode to llvm assembly +%.ll : %.bc + @(cd $(dir $*) && llvm-dis -show-annotations -o $(notdir $*).ll $(notdir $*).bc) + +# analyze one test input +define analyze + @bash -c ' \ + if test -e $(1).$(2); then \ + cd $(dir $(1)); \ + status=$$( \ + ( ulimit -t $(TIMEOUT) -v $(MEMOUTk) \ + ; $(SLEDGE_EXE) $(SLEDGE_ARGS) $(notdir $(1).$(2)) 1> $(notdir $(1).$(2)).out 2> $(notdir $(1).$(2)).err ) \ + )$$?; \ + case $$status in \ + ( 0 | 2 ) ;; \ + ( 152 ) echo -e "RESULT: TIMEOUT" >> $(notdir $(1).$(2)).out ;; \ + ( * ) echo -e "\nRESULT: Internal error: "$$status >> $(notdir $(1).$(2)).out ;; \ + esac ; \ + exit $$status ; \ + fi; ' +endef + +# analyze ll +%.ll.out : %.ll + $(call analyze,$*,ll) + +# analyze bc +%.bc.out : %.bc + $(call analyze,$*,bc) + +# compile all c to bc +compile: $(GenBCs) + +# disassemble all bc to ll +disasm: $(GenLLs) + +# run all tests +test: $(Outs) + +# report all results +full-report: + @find -L * -name '*.out' \ + | xargs grep "RESULT:" \ + | sed 's/.out:/: /' | column -ts$$'\t' | sort -s -k 3 | uniq + +# report errors +report: + @find -L * -name '*.out' \ + | xargs grep "RESULT:" \ + | grep -E -v "RESULT: Unimplemented: (landingpad of type other than {i8\*, i32}|msvc exceptions):" \ + | grep -E -v "RESULT: (Success|Invalid input)" \ + | sed 's/.out:/: /' | column -ts$$'\t' | sort -s -k 3 | uniq + +# report warnings +warnings: + @find -L * -name '*.out' | xargs grep -h "Warning:" | sort | uniq + +# remove generated bitcode files +cleanBC: + @rm -f $(GenBCs) + +# remove generated assembly files +cleanLL: + @rm -f $(GenLLs) + +# remove result files +cleanout: + @find -L * -name "*.out" -or -name '*.err' \ + | xargs rm -f + +# remove result files for TIMEOUTs +cleanTO: + @find -L * -name "*.out" -or -name '*.err' \ + | xargs grep -l TIMEOUT \ + | xargs rm -f + +# remove result files for MEMOUTs +cleanMO: + @find -L * -name "*.out" -or -name '*.err' \ + | xargs grep -l MEMOUT \ + | xargs rm -f + +# remove result files for Internal Errors +cleanIE: + @find -L * -name "*.out" -or -name '*.err' \ + | xargs grep -l -E "RESULT: (Internal error|Unimplemented|Unknown error)" \ + | xargs rm -f + +clean: cleanBC cleanLL cleanout + +fmt: + clang-format -i $(SrcCs) $(SrcCPPs) + +# print any variable for Makefile debugging +print-%: + @printf '$*='; printf '$($*)'; printf '\n' diff --git a/sledge/test/frontend/address_of_label.cpp b/sledge/test/frontend/address_of_label.cpp new file mode 100644 index 000000000..f0045d54e --- /dev/null +++ b/sledge/test/frontend/address_of_label.cpp @@ -0,0 +1,24 @@ +/* from http://blog.llvm.org/2010/01/address-of-label-and-indirect-branches.html + */ + +static int fn(const char* opcodes) { + static const void* codetable[] = {&&RETURN, &&INCREMENT, &&DECREMENT, + &&DOUBLE, &&SWAPWORD}; + int result = 0; + + goto* codetable[*(opcodes++)]; +RETURN: + return result; +INCREMENT: + result++; + goto* codetable[*(opcodes++)]; +DECREMENT: + result--; + goto* codetable[*(opcodes++)]; +DOUBLE: + result <<= 1; + goto* codetable[*(opcodes++)]; +SWAPWORD: + result = (result << 16) | (result >> 16); + goto* codetable[*(opcodes++)]; +} diff --git a/sledge/test/frontend/cond_alloca.cpp b/sledge/test/frontend/cond_alloca.cpp new file mode 100644 index 000000000..a50c3f02c --- /dev/null +++ b/sledge/test/frontend/cond_alloca.cpp @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2018 - present Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +int main(int argc, const char* argv[]) { + int a = 0; + if (argc > 0) { + int b = 1; + }; + return 0; +} diff --git a/sledge/test/frontend/destructor_bases.cpp b/sledge/test/frontend/destructor_bases.cpp new file mode 100644 index 000000000..baef1a798 --- /dev/null +++ b/sledge/test/frontend/destructor_bases.cpp @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2018 - present Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#pragma clang diagnostic ignored "-Winaccessible-base" + +struct T { + T(){}; + ~T(){}; +}; + +struct A : virtual T { + A(){}; + ~A(){}; +}; + +struct B : virtual A { + B(){}; + ~B(){}; +}; + +struct C { + C(){}; + ~C(){}; +}; + +struct D : A, C { + B b; + D(){}; + ~D() { A a; }; +}; + +struct E : B, C, D { + E(){}; + ~E(){}; +}; + +struct F : B, virtual C, D { + F(){}; + ~F(){}; +}; + +int main() { struct F f; } diff --git a/sledge/test/frontend/exceptions.cpp b/sledge/test/frontend/exceptions.cpp new file mode 100644 index 000000000..f9cc62fa1 --- /dev/null +++ b/sledge/test/frontend/exceptions.cpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2018 - present Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#include + +struct E { + E() { printf(" E default constructor, this=%p\n", this); } + E(const E& other) { + printf(" E copy-constructor, this=%p from that=%p\n", this, &other); + } + virtual ~E() { printf(" E destructor, this=%p\n", this); } +}; + +struct F : public E { + F() { printf(" F default constructor, this=%p\n", this); } + F(const F& other) { + printf(" F copy-constructor, this=%p from that=%p\n", this, &other); + } + virtual ~F() { printf(" F destructor, this=%p\n", this); } +}; + +__attribute__((noinline)) void f() { throw 20; } + +void g() throw(E) { + F e; + throw e; +} + +int main() { + try { + f(); + throw 20; + } catch (int e) { + if (e > 0) { + try { + g(); + } catch (E e2) { + throw; + } + } else + return e; + } + return 0; +} diff --git a/sledge/test/frontend/rethrow.cpp b/sledge/test/frontend/rethrow.cpp new file mode 100644 index 000000000..8d9f765b1 --- /dev/null +++ b/sledge/test/frontend/rethrow.cpp @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2018 - present Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#include + +struct B { + B() { printf(" Base default constructor, this=%p\n", this); } + B(const B& other) { + printf(" Base copy-constructor, this=%p from that=%p\n", this, &other); + } + virtual ~B() { printf(" Base destructor, this=%p\n", this); } +}; + +struct D : public B { + D() { printf(" Derived default constructor, this=%p\n", this); } + D(const D& other) { + printf(" Derived copy-constructor, this=%p from that=%p\n", this, &other); + } + virtual ~D() { printf(" Derived destructor, this=%p\n", this); } +}; + +void f() { + D e; + throw e; +} + +int main() { + try { + try { + f(); + } catch (B& err) { + printf("A Inner catch, &err=%p\n", &err); + throw; + } + } catch (B& err) { + printf("A Outer catch, &err=%p\n", &err); + } + printf("---\n"); + try { + try { + D e; + throw e; + } catch (B& err) { + printf("B Inner catch, &err=%p\n", &err); + throw err; + } + } catch (B& err) { + printf("B Outer catch, &err=%p\n", &err); + } + return 0; +} diff --git a/sledge/test/frontend/struct.cpp b/sledge/test/frontend/struct.cpp new file mode 100644 index 000000000..c24b01234 --- /dev/null +++ b/sledge/test/frontend/struct.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2018 - present Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#include + +struct list_head { + struct list_head *next, *prev; +}; + +struct list_head glob = {0}; + +struct list_head ears = {&ears, &ears}; + +int main() { + struct list_head* node = (struct list_head*)malloc(sizeof(struct list_head)); + node->next = node->prev = node; + free(node); + if (&glob != &ears) + return 1; + return 0; +} diff --git a/sledge/test/frontend/vector.cpp b/sledge/test/frontend/vector.cpp new file mode 100644 index 000000000..8f193dfc0 --- /dev/null +++ b/sledge/test/frontend/vector.cpp @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2018 - present Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +typedef double vector4 __attribute__((ext_vector_type(4))); + +int main() { + vector4 x = {1, 2, 3, 4}; + vector4 y = 2 * x; + return 0; +} diff --git a/sledge/test/frontend/virtual.cpp b/sledge/test/frontend/virtual.cpp new file mode 100644 index 000000000..ba5a768bf --- /dev/null +++ b/sledge/test/frontend/virtual.cpp @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2018 - present Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +class B { +public: + virtual void f() { g(); }; + virtual void g(){}; +}; + +class C : public B { +public: + virtual void f() { B::f(); }; + virtual void g(){}; +}; + +int main() { + B* p = new C; + p->f(); + return 0; +} diff --git a/sledge/test/llvm b/sledge/test/llvm new file mode 120000 index 000000000..4405e5249 --- /dev/null +++ b/sledge/test/llvm @@ -0,0 +1 @@ +../llvm/test \ No newline at end of file diff --git a/sledge/tools/gen_version.sh b/sledge/tools/gen_version.sh new file mode 100755 index 000000000..e5f1a00e5 --- /dev/null +++ b/sledge/tools/gen_version.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +# +# Copyright (c) 2018 - present Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the BSD style license found in the +# LICENSE file in the root directory of this source tree. An additional grant +# of patent rights can be found in the PATENTS file in the same directory. +# + +# usage: gen_version.sh [] + +FILE="$1" + +if [[ ! -z "$2" ]]; then + # second arg passed when called from opam + VERSION="$2" +else + # second arg omitted when called from src/jbuild + if [[ ! "%%VERSION%%" == "%%"*"%%" ]]; then + # file has been watermarked when building distrib archive + VERSION="%%VERSION%%" + else + # file has not been watermarked when building in dev git tree + VERSION=$(git describe --tags --dirty --always) + fi +fi + +(test -e $FILE || touch $FILE); + +sed -e "s|%%VERSION%[%]|$VERSION|g" $FILE.in | diff $FILE - | patch --silent $FILE