From fc0ca6b6db36954b04ffc663bdfee2420c09be54 Mon Sep 17 00:00:00 2001 From: Rohan Jacob-Rao Date: Mon, 13 Jul 2015 16:32:02 -0700 Subject: [PATCH] Initial LLVM parser. --- infer/src/Makefile | 20 +++- infer/src/llvm/ast.ml | 19 +++ infer/src/llvm/lexer.mll | 151 ++++++++++++++++++++++++ infer/src/llvm/main.ml | 20 ++++ infer/src/llvm/parser.mly | 240 ++++++++++++++++++++++++++++++++++++++ infer/src/llvm/pretty.ml | 39 +++++++ 6 files changed, 486 insertions(+), 3 deletions(-) create mode 100644 infer/src/llvm/ast.ml create mode 100644 infer/src/llvm/lexer.mll create mode 100644 infer/src/llvm/main.ml create mode 100644 infer/src/llvm/parser.mly create mode 100644 infer/src/llvm/pretty.ml diff --git a/infer/src/Makefile b/infer/src/Makefile index f06edb0e0..987052954 100644 --- a/infer/src/Makefile +++ b/infer/src/Makefile @@ -100,6 +100,12 @@ CLANG_ATDGEN_STUBS = $(addprefix $(CLANG_SOURCES)/$(CLANG_AST_BASE_NAME), $(ATDG INFER_CLANG_AST_PROJ = $(addprefix $(CLANG_SOURCES)/, clang_ast_proj.ml clang_ast_proj.mli) FCP_CLANG_AST_PROJ = $(addprefix $(CLANG_OCAML_BUILD)/, clang_ast_proj.ml clang_ast_proj.mli) +#### LLVM declarations #### + +LLVM_SOURCES = llvm +INFERLLVM_MAIN = $(LLVM_SOURCES)/main +INFERLLVM_BINARY = $(BINDIR)/InferLLVM + #### End of declarations #### # Check whether .facebook file exists in a root directory. @@ -114,20 +120,25 @@ DEPENDENCIES = $(BACKEND_SOURCES) checkers facebook/checkers facebook/checkers/g OCAMLBUILD = ocamlbuild -build-dir $(BUILDDIR) -j 0 $(addprefix -I , $(DEPENDENCIES)) $(GLOBAL_OPTIONS) $(ATDGEN_OPTIONS) $(JAVA_OPTIONS) -.PHONY: all java clang build_java build_clang annotations init sanitize version clean +.PHONY: all java clang llvm build_java build_clang build_llvm annotations init sanitize version clean -all: java clang +all: java clang llvm java: build_java annotations $(INFERANALYZE_BINARY) $(INFERPRINT_BINARY) $(INFERJAVA_BINARY) clang: build_clang annotations $(INFERANALYZE_BINARY) $(INFERPRINT_BINARY) $(INFERCLANG_BINARY) +llvm: build_llvm annotations $(INFERLLVM_BINARY) + build_java: init $(INFERPRINT_ATDGEN_STUBS) $(OCAMLBUILD) $(TYPEPROP_MAIN).native $(INFERANALYZE_MAIN).native $(INFERPRINT_MAIN).native $(INFERJAVA_MAIN).native build_clang: init $(INFERPRINT_ATDGEN_STUBS) check_clang_plugin $(CLANG_ATDGEN_STUBS) $(OCAMLBUILD) $(INFERANALYZE_MAIN).native $(INFERPRINT_MAIN).native $(INFERCLANG_MAIN).native +build_llvm: + $(OCAMLBUILD) -use-menhir $(INFERLLVM_MAIN).native + annotations: rsync -r --delete --exclude=*.ml* --exclude=*.o --exclude=*.cm* --exclude=*.native $(BUILDDIR)/* $(ANNOTDIR) @@ -189,10 +200,13 @@ $(INFERJAVA_BINARY): $(BUILDDIR)/$(INFERJAVA_MAIN).native $(INFERCLANG_BINARY): $(BUILDDIR)/$(INFERCLANG_MAIN).native $(COPY) $(BUILDDIR)/$(INFERCLANG_MAIN).native $(INFERCLANG_BINARY) +$(INFERLLVM_BINARY): $(BUILDDIR)/$(INFERLLVM_MAIN).native + $(COPY) $(BUILDDIR)/$(INFERLLVM_MAIN).native $(INFERLLVM_BINARY) + $(TYPEPROP_BINARY): $(BUILDDIR)/$(TYPEPROP_MAIN).native $(COPY) $(BUILDDIR)/$(TYPEPROP_MAIN).native $(TYPEPROP_BINARY) clean: $(BUILDDIR) $(OCAMLBUILD) -clean $(REMOVE_DIR) $(ANNOTDIR) - $(REMOVE) $(TYPEPROP_BINARY) $(INFERANALYZE_BINARY) $(INFERPRINT_BINARY) $(INFERJAVA_BINARY) $(INFERCLANG_BINARY) + $(REMOVE) $(TYPEPROP_BINARY) $(INFERANALYZE_BINARY) $(INFERPRINT_BINARY) $(INFERJAVA_BINARY) $(INFERCLANG_BINARY) $(INFERLLVM_BINARY) diff --git a/infer/src/llvm/ast.ml b/infer/src/llvm/ast.ml new file mode 100644 index 000000000..8fbb67b19 --- /dev/null +++ b/infer/src/llvm/ast.ml @@ -0,0 +1,19 @@ +(* + * Copyright (c) 2015 - Facebook. + * All rights reserved. + *) +type prog = Prog of func_def list +and func_def = FuncDef of string * typ option * (typ * string) list * instr list +and typ = + | Tint of int + | Tfloat (* just one type for now *) + | Tptr of typ + | Tvector of int * typ + | Tarray of int * typ +and instr = + | Ret of (typ * value) option +and value = + | True + | False + | Intlit of int + | Null diff --git a/infer/src/llvm/lexer.mll b/infer/src/llvm/lexer.mll new file mode 100644 index 000000000..e27e223bb --- /dev/null +++ b/infer/src/llvm/lexer.mll @@ -0,0 +1,151 @@ +(* + * Copyright (c) 2015 - Facebook. + * All rights reserved. + *) +{ + open Lexing + open Parser + + exception LexingError of string +} + +let space = [' ' '\t'] +let newline = '\n' +let comment = ';' [^ '\n']* + +let nonzero_digit = ['1'-'9'] +let digit = ['0'-'9'] +let pos_int = nonzero_digit digit* +let intlit = '-'? digit+ + +let lower = ['a'-'z'] +let upper = ['A'-'Z'] +let id_special_char = ['-' '$' '.' '_'] +let id_char = lower | upper | id_special_char +let id = ['%' '@'] id_char (id_char | digit)* + +rule token = parse + | space | comment { token lexbuf } + | newline { token lexbuf } + + (* keywords *) + | "define" { DEFINE } + + (* delimiters *) + | ',' { COMMA } + | '(' { LPAREN } + | ')' { RPAREN } + | '{' { LBRACE } + | '}' { RBRACE } + | '<' { LANGLE } + | '>' { RANGLE } + | '[' { LSQBRACK } + | ']' { RSQBRACK } + (* symbols *) + | '=' { EQUALS } + | '*' { STAR } + | ['x' 'X'] { X } + + (* TYPES *) + | "void" { VOID } + | 'i' (pos_int as width) { INT (int_of_string width) } + | "half" { HALF } + | "float" { FLOAT } + | "double" { DOUBLE } + | "fp128" { FP128 } + | "x86_fp80" { X86_FP80 } + | "ppc_fp128" { PPC_FP128 } + | "x86_mmx" { X86_MMX } + | "label" { LABEL } + | "metadata" { METADATA } + + | pos_int as size { SIZE (int_of_string size) } + (* CONSTANTS *) + | "true" { TRUE } + | "false" { FALSE } + | intlit as i { INTLIT (int_of_string i) } + (* floating point constants *) + | "null" { NULL } + + (* INSTRUCTIONS *) + (* terminator instructions *) + | "ret" { RET } + | "br" { BR } + | "switch" { SWITCH } + | "indirectbr" { INDIRECTBR } + | "invoke" { INVOKE } + | "resume" { RESUME } + | "unreachable" { UNREACHABLE } + (* binary operations *) + | "add" { ADD } + | "fadd" { FADD } + | "sub" { SUB } + | "fsub" { FSUB } + | "mul" { MUL } + | "fmul" { FMUL } + | "udiv" { UDIV } + | "sdiv" { SDIV } + | "fdiv" { FDIV } + | "urem" { UREM } + | "srem" { SREM } + | "frem" { FREM } + (* arithmetic options *) + | "nuw" { NUW } + | "nsw" { NSW } + | "exact" { EXACT } + (* floating point options *) + | "nnan" { NNAN } + | "ninf" { NINF } + | "nsz" { NSZ } + | "arcp" { ARCP } + | "fast" { FAST } + (* bitwise binary operations *) + | "shl" { SHL } + | "lshr" { LSHR } + | "ashr" { ASHR } + | "and" { AND } + | "or" { OR } + | "xor" { XOR } + (* vector operations *) + | "extractelement" { EXTRACTELEMENT } + | "insertelement" { INSERTELEMENT } + | "shufflevector" { SHUFFLEVECTOR } + (* aggregate operations *) + | "extractvalue" { EXTRACTVALUE } + | "insertvalue" { INSERTVALUE } + (* memory access and addressing operations *) + | "alloca" { ALLOCA } + | "load" { LOAD } + | "store" { STORE } + | "fence" { FENCE } + | "cmpxchg" { CMPXCHG } + | "atomicrmw" { ATOMICRMW } + | "getelementptr" { GETELEMENTPTR } + (* conversion operations *) + | "trunc" { TRUNC } (* e.g. trunc ... to ... *) + | "zext" { ZEXT } + | "sext" { SEXT } + | "fptrunc" { FPTRUNC } + | "fpext" { FPEXT } + | "fptoui" { FPTOUI } + | "fptosi" { FPTOSI } + | "uitofp" { UITOFP } + | "sitofp" { SITOFP } + | "ptrtoint" { PTRTOINT } + | "inttoptr" { INTTOPTR } + | "bitcast" { BITCAST } + | "addrspacecast" { ADDRSPACECAST } + | "to" { TO } (* all conversion operations include this keyword *) + (* other operations *) + | "icmp" { ICMP } + | "fcmp" { FCMP } + | "phi" { PHI } + | "select" { SELECT } + | "call" { CALL } + | "va_arg" { VA_ARG } + | "landingpad" { LANDINGPAD } + + (* identifiers - make this complete *) + | id as str { IDENT str } + + | eof { EOF } diff --git a/infer/src/llvm/main.ml b/infer/src/llvm/main.ml new file mode 100644 index 000000000..11abcfea4 --- /dev/null +++ b/infer/src/llvm/main.ml @@ -0,0 +1,20 @@ +(* + * Copyright (c) 2015 - Facebook. + * All rights reserved. + *) +open Lexing +open Printf + +exception UsageError of string + +let () = try + if Array.length Sys.argv < 2 then + raise (UsageError ("Missing source file as first command line argument.")) + else + let filename = Sys.argv.(1) in + let lexbuf = Lexing.from_channel (open_in filename) in + let prog = Parser.prog Lexer.token lexbuf in + let pretty = Pretty.pretty_prog prog in + print_string pretty +with + | UsageError msg -> print_string ("Usage error: " ^ msg ^ "\n") diff --git a/infer/src/llvm/parser.mly b/infer/src/llvm/parser.mly new file mode 100644 index 000000000..7179fcf00 --- /dev/null +++ b/infer/src/llvm/parser.mly @@ -0,0 +1,240 @@ +(* + * Copyright (c) 2015 - Facebook. + * All rights reserved. + *) +%{ + open Ast +%} + +(* keywords *) +%token DEFINE + +(* delimiters *) +%token COMMA +%token LPAREN +%token RPAREN +%token LBRACE +%token RBRACE +%token LANGLE +%token RANGLE +%token LSQBRACK +%token RSQBRACK +(* symbols *) +%token EQUALS +%token STAR +%token X + +(* TYPES *) +%token VOID +%token INT +%token HALF +%token FLOAT +%token DOUBLE +%token FP128 +%token X86_FP80 +%token PPC_FP128 +%token X86_MMX +%token LABEL +%token METADATA + +%token SIZE +(* CONSTANTS *) +%token TRUE +%token FALSE +%token INTLIT +%token NULL + +(* INSTRUCTIONS *) +(* terminator instructions *) +%token RET +%token BR +%token SWITCH +%token INDIRECTBR +%token INVOKE +%token RESUME +%token UNREACHABLE +(* binary operations *) +%token ADD +%token FADD +%token SUB +%token FSUB +%token MUL +%token FMUL +%token UDIV +%token SDIV +%token FDIV +%token UREM +%token SREM +%token FREM +(* arithmetic options *) +%token NUW +%token NSW +%token EXACT +(* floating point options *) +%token NNAN +%token NINF +%token NSZ +%token ARCP +%token FAST +(* bitwise binary operations *) +%token SHL +%token LSHR +%token ASHR +%token AND +%token OR +%token XOR +(* vector operations *) +%token EXTRACTELEMENT +%token INSERTELEMENT +%token SHUFFLEVECTOR +(* aggregate operations *) +%token EXTRACTVALUE +%token INSERTVALUE +(* memory access and addressing operations *) +%token ALLOCA +%token LOAD +%token STORE +%token FENCE +%token CMPXCHG +%token ATOMICRMW +%token GETELEMENTPTR +(* conversion operations *) +%token TRUNC +%token ZEXT +%token SEXT +%token FPTRUNC +%token FPEXT +%token FPTOUI +%token FPTOSI +%token UITOFP +%token SITOFP +%token PTRTOINT +%token INTTOPTR +%token BITCAST +%token ADDRSPACECAST +%token TO +(* other operations *) +%token ICMP +%token FCMP +%token PHI +%token SELECT +%token CALL +%token VA_ARG +%token LANDINGPAD + +%token IDENT + +%token EOF + +%start prog +%type prog +%type func_def +%type ret_typ +%type typ + +%% + +prog: + | defs=list(func_def) EOF { Prog defs } + +func_def: + | DEFINE ret_tp=ret_typ name=IDENT LPAREN params=separated_list(COMMA, pair(typ, IDENT)) RPAREN instrs=block { + FuncDef (name, ret_tp, params, instrs) } + +ret_typ: + | VOID { None } + | tp=typ { Some tp } + +typ: + | tp=element_typ { tp } + (*| X86_MMX { () }*) + | tp=vector_typ { tp } + | LSQBRACK sz=SIZE X tp=element_typ RSQBRACK { Tarray (sz, tp) } (* array type *) + (*| LABEL { () } + | METADATA { () }*) + (* TODO structs *) + +vector_typ: + | LANGLE sz=SIZE X tp=element_typ RANGLE { Tvector (sz, tp) } + +element_typ: + | width=INT { Tint width } + | floating_typ { Tfloat } + | tp=ptr_typ { tp } + +floating_typ: + | HALF { () } + | FLOAT { () } + | DOUBLE { () } + | FP128 { () } + | X86_FP80 { () } + | PPC_FP128 { () } + +ptr_typ: + | tp=typ STAR { Tptr tp } + +block: + | LBRACE instrs=list(instr) RBRACE { instrs } + +instr: + | term=terminator { term } + | IDENT EQUALS binop { Ret None } + +terminator: + | RET tp=typ v=value { Ret (Some (tp, v)) } + | RET VOID { Ret None } + (* + | switch + | indirectbr + | invoke + | resume + | unreachable + *) + +binop: + | ADD arith_options binop_args { () } + | FADD fast_math_flags binop_args { () } + | SUB arith_options binop_args { () } + | FSUB fast_math_flags binop_args { () } + | MUL binop_args { () } + | FMUL fast_math_flags binop_args { () } + | UDIV option(EXACT) binop_args { () } + | SDIV option(EXACT) binop_args { () } + | FDIV fast_math_flags binop_args { () } + | UREM binop_args { () } + | SREM binop_args { () } + | FREM fast_math_flags binop_args { () } + (* bitwise *) + | SHL arith_options binop_args { () } + | LSHR option(EXACT) binop_args { () } + | ASHR option(EXACT) binop_args { () } + | AND binop_args { () } + | OR binop_args { () } + | XOR binop_args { () } + (* vector *) + | EXTRACTELEMENT vector_typ value COMMA typ IDENT { () } + | INSERTELEMENT vector_typ value COMMA typ element COMMA typ IDENT { () } + +arith_options: + | option(NUW) option(NSW) { () } + +fast_math_flags: + | option(NNAN) option(NINF) option(NSZ) option(ARCP) option(FAST) { () } + +binop_args: + | typ operand COMMA operand { () } + +(* below is fuzzy *) + +operand: + (* variable *) + | v=value { v } + +element: + | v=value { v } + +value: + | TRUE { True } + | FALSE { False } + | i=INTLIT { Intlit i } + | NULL { Null } diff --git a/infer/src/llvm/pretty.ml b/infer/src/llvm/pretty.ml new file mode 100644 index 000000000..a2f295d41 --- /dev/null +++ b/infer/src/llvm/pretty.ml @@ -0,0 +1,39 @@ +(* + * Copyright (c) 2015 - Facebook. + * All rights reserved. + *) +open Ast + +let concatmap (sep : string) (f : 'a -> string) (l : 'a list) : string = String.concat sep (List.map f l) + +let rec pretty_prog : prog -> string = function + Prog defs -> concatmap "" pretty_func_def defs + +and pretty_func_def : func_def -> string = function + FuncDef (name, ret_tp, params, instrs) -> + "define " ^ pretty_ret_typ ret_tp ^ " " ^ name ^ "(" ^ + concatmap ", " (fun (tp, id) -> pretty_typ tp ^ " " ^ id) params ^ ") {\n" ^ + concatmap "" pretty_instr_ln instrs ^ "}\n" + +and pretty_ret_typ : typ option -> string = function + | None -> "void" + | Some tp -> pretty_typ tp + +and pretty_typ : typ -> string = function + | Tint width -> "i" ^ string_of_int width + | Tfloat (* just one type for now *) -> "float" + | Tptr tp -> pretty_typ tp ^ "*" + | Tvector (sz, tp) -> "<" ^ string_of_int sz ^ " x " ^ pretty_typ tp ^ ">" + | Tarray (sz, tp) -> "[" ^ string_of_int sz ^ " x " ^ pretty_typ tp ^ "]" + +and pretty_instr_ln (i : instr) : string = pretty_instr i ^ "\n" + +and pretty_instr : instr -> string = function + | Ret None -> "ret void" + | Ret (Some (tp, v)) -> "ret " ^ pretty_typ tp ^ " " ^ pretty_value v + +and pretty_value : value -> string = function + | True -> "true" + | False -> "false" + | Intlit i -> string_of_int i + | Null -> "null"