[sledge] Test infra fixes and improvements

Summary: - Fix clamping of percentage change of memory quantities - Improve sorting of coverage changes relative to unchanged error results - Filter out results for invalid LLVM code - Fix printing multiple status values - Fix passing args to sledge, different subcommands need different args - Fix cleaning commands - Add an unknown error status to the report if sledge exits without producing a status in the report - Internalize globals for tests, as otherwise C++ tests are unreadable due to the runtime system models Reviewed By: jvillard Differential Revision: D25756567 fbshipit-source-id: 3b4c003f4
5 years ago · 13f3933927
parent c25f5feafb
commit 13f3933927
4 changed files with 120 additions and 96 deletions
--- a/sledge/report/sledge_report.ml
+++ b/sledge/report/sledge_report.ml
@ -382,7 +382,7 @@ let write_html ranges rows chan =
          (Base.Float.round_decimal ~decimal_digits:2 r)
      in
      let delta_mem max pct w ppf d =
-        let r = if Float.(d < 0.000001) then 0. else 100. *. d /. w in
+        let r = if Float.(abs d < 0.000001) then 0. else 100. *. d /. w in
        Printf.fprintf ppf
          "<td align=\"right\" bgcolor=\"%s\">%s</td>\n\
           <td align=\"right\" bgcolor=\"%s\">%12.0f%%</td>\n"
@ -434,22 +434,18 @@ let write_html ranges rows chan =
        let attr = if List.is_empty cs then "" else " class=\"neutral\"" in
        Printf.fprintf ppf "%a" (coverage attr) cs
      in
-      let stat ppf = function
-        | [] ->
-            Printf.fprintf ppf
-              "<td style=\"border-left: 2px solid #eee8d5\";></td>\n"
+      let stat ppf ss =
+        Printf.fprintf ppf "<td style=\"border-left: 2px solid #eee8d5\";" ;
+        ( match ss with
+        | [] -> Printf.fprintf ppf ">"
        | ss ->
            List.iter ss ~f:(fun s ->
                match (s : Report.status) with
                | Safe _ | Unsafe _ | Ok ->
-                    Printf.fprintf ppf
-                      "<td style=\"border-left: 2px solid #eee8d5\";>%a</td>\n"
-                      pf_status s
-                | _ ->
-                    Printf.fprintf ppf
-                      "<td style=\"border-left: 2px solid #eee8d5\"; \
-                       class=\"regress\">%a</td>\n"
-                      pf_status s )
+                    Printf.fprintf ppf ">%a" pf_status s
+                | _ -> Printf.fprintf ppf "class=\"regress\">%a" pf_status s )
+        ) ;
+        Printf.fprintf ppf "</td>\n"
      in
      let statd ppf = function
        | None | Some [] -> Printf.fprintf ppf "<td></td>\n"
@ -632,7 +628,7 @@ let add_total rows =
  in
  Iter.cons total rows

-let cmp x y =
+let cmp perf x y =
  match (x.status_deltas, y.status_deltas) with
  | Some xs, Some ys ->
      List.compare Report.compare_status xs ys
@ -640,21 +636,47 @@ let cmp x y =
  | Some _, None -> -1
  | None, Some _ -> 1
  | None, None -> (
+      let max =
+        Option.map_or ~default:0 ~f:(fun cds ->
+            List.fold cds 0 ~f:(fun {Report.steps} m ->
+                Int.(max (abs steps) m) ) )
+      in
+      -Int.compare (max x.cov_deltas) (max y.cov_deltas)
+      |> fun o ->
+      if o <> 0 then o
+      else
        match (List.hd x.status, List.hd y.status) with
-    | Some (Safe _ | Unsafe _ | Ok), Some (Safe _ | Unsafe _ | Ok) -> (
+        | Some (Safe _ | Unsafe _ | Ok), Some (Safe _ | Unsafe _ | Ok)
+          when perf -> (
          match (x.times_deltas, y.times_deltas) with
          | Some xtd, Some ytd ->
-          Float.(compare (abs ytd.utime) (abs xtd.utime))
+              -Float.(compare (abs xtd.utime) (abs ytd.utime))
+              |> fun o -> if o <> 0 then o else String.compare x.name y.name
+          | Some _, None -> 1
+          | None, Some _ -> -1
+          | None, None -> String.compare x.name y.name )
+        | Some (Safe _ | Unsafe _ | Ok), Some (Safe _ | Unsafe _ | Ok) -> (
+          match (x.gcs_deltas, y.gcs_deltas) with
+          | Some xgc, Some ygc ->
+              -Float.(
+                 compare
+                   (abs xgc.Report.allocated)
+                   (abs ygc.Report.allocated))
+              |> fun o -> if o <> 0 then o else String.compare x.name y.name
          | Some _, None -> 1
          | None, Some _ -> -1
          | None, None -> String.compare x.name y.name )
        | _, Some (Safe _ | Unsafe _ | Ok) -> -1
        | Some (Safe _ | Unsafe _ | Ok), _ -> 1
-    | s, t -> Option.compare (Ord.opp Report.compare_status) s t )
+        | s, t ->
+            Option.compare (Ord.opp Report.compare_status) s t
+            |> fun o -> if o <> 0 then o else String.compare x.name y.name )

 let filter rows =
  Iter.filter rows ~f:(fun {status} ->
-      List.exists status ~f:(function Unimplemented _ -> false | _ -> true) )
+      List.exists status ~f:(function
+        | InvalidInput _ | Unimplemented _ -> false
+        | _ -> true ) )

 let input_rows ?baseline current =
  let b_tbl = Option.map ~f:read baseline in
@ -672,13 +694,13 @@ let input_rows ?baseline current =
      let c_result = Tbl.find_opt c_tbl name in
      combine name b_result c_result )

-let generate_html ?baseline current output =
+let generate_html perf ?baseline current output =
  let rows = input_rows ?baseline current in
  let rows = Iter.map ~f:average rows in
  let rows = filter rows in
  let rows = Iter.persistent rows in
  let ranges = ranges rows in
-  let rows = Iter.sort ~cmp rows in
+  let rows = Iter.sort ~cmp:(cmp perf) rows in
  let rows = add_total rows in
  Out_channel.with_file output ~f:(write_html ranges rows)

@ -691,8 +713,10 @@ let html_cmd =
  and output =
    flag "output" (required string)
      ~doc:"<file> write html report to <file>"
+  and perf =
+    flag "perf" no_arg ~doc:"sort results for a performance comparison"
  in
-  fun () -> generate_html ?baseline current output
+  fun () -> generate_html perf ?baseline current output

 let write_status ?baseline rows chan =
  let rows =
--- a/sledge/src/report.ml
+++ b/sledge/src/report.ml
@ -143,8 +143,8 @@ let name = ref ""

 let output entry =
  Option.iter !chan ~f:(fun chan ->
-      Out_channel.output_lines chan
-        [Sexp.to_string (sexp_of_t {name= !name; entry})] )
+      Sexp.output chan (sexp_of_t {name= !name; entry}) ;
+      Out_channel.newline chan )

 let init ?append filename =
  (chan :=
--- a/sledge/test/Makefile
+++ b/sledge/test/Makefile
@ -10,11 +10,14 @@ CLANG_ARGS?=-O0
 SLEDGE_DBG=$(CURDIR)/../_build/debug/cli/sledge_cli.exe
 SLEDGE_OPT=$(CURDIR)/../_build/release/cli/sledge_cli.exe

-# additional arguments to pass to sledge
-SLEDGE_ARGS?=-bound 5
+# additional arguments to pass to sledge translate, disassemble, analyze, and smt
+SLEDGE_T_ARGS?=
+SLEDGE_D_ARGS?=
+SLEDGE_A_ARGS?=-bound 5
+SLEDGE_S_ARGS?=

 # limits for each test run
-TIMEOUT?=100
+TIMEOUT?=300
 MEMOUT?=4096

 sledge_dbg=./wrap.sh $(TIMEOUT) $(MEMOUT) $(SLEDGE_DBG)
@ -34,25 +37,25 @@ export LANG := C
 default: test

 # compile c to llvm bitcode
-%.bc : %.c
-	@(cd $(@D) && clang -g -c -emit-llvm -include $(CURDIR)/../model/llair_intrinsics.h -Wno-main-return-type $(CLANG_ARGS) -o $(@F) $(<F))
+%.bc : %.c $(CURDIR)/../model/llair_intrinsics.h
+	(cd $(@D) && clang -g -c -emit-llvm -include $(CURDIR)/../model/llair_intrinsics.h -Wno-main-return-type $(CLANG_ARGS) -o $(@F) $(<F))

 # compile c++ to llvm bitcode
 %.bc : %.cpp
-	@(cd $(@D) && clang++ -g -c -emit-llvm $(CLANG_ARGS) -o $(@F) $(<F))
+	(cd $(@D) && clang++ -g -c -emit-llvm $(CLANG_ARGS) -o $(@F) $(<F))

 # disassemble bitcode to llvm assembly
 %.ll : %.bc
-	@(cd $(@D) && llvm-dis -show-annotations -o $(@F) $(<F))
+	(cd $(@D) && llvm-dis -show-annotations -o $(@F) $(<F))

 %.bllair : %.bc
 	$(SLEDGE_DBG) llvm translate $< -output $@

 %.llair : %.bc
-	$(SLEDGE_DBG) llvm disassemble -no-models -no-internalize $< -llair-output $@
+	$(SLEDGE_DBG) llvm disassemble $< -llair-output $@

 %.llair : %.ll
-	$(SLEDGE_DBG) llvm disassemble -no-models -no-internalize $< -llair-output $@
+	$(SLEDGE_DBG) llvm disassemble $< -llair-output $@

 # code to test sledge translate
 TranslateCs:=$(shell find -L {,local/}translate -name '*.c' 2>/dev/null)
@ -85,21 +88,20 @@ compile: $(GeneratedBCs)
 # run sledge llvm translate tests
 .PHONY: translate
 translate: compile
-	-@parallel --bar $(sledge_dbg) llvm disassemble -no-models -no-internalize $(SLEDGE_ARGS) ::: $(TranslateTests)
+	-parallel --bar $(sledge_dbg) llvm disassemble $(SLEDGE_D_ARGS) ::: $(TranslateTests)

 .PHONY: translate.sexp
 translate.sexp:
-	@find -L {,local/,extra/}translate -not -path 'baseline/*' -name '*.sexp' 2>/dev/null \
-	 | xargs cat > translate.sexp
+	find -L {,local/,extra/}translate -not -path 'baseline/*' -name '*.sexp' 2>/dev/null | xargs cat > translate.sexp

 baseline/translate.sexp: translate.sexp
-	@cp translate.sexp $@
+	cp translate.sexp $@

 translate.html: translate.sexp
-	@$(sledge_report) html -baseline baseline/translate.sexp translate.sexp -output $@
+	$(sledge_report) html -baseline baseline/translate.sexp translate.sexp -output $@

 translate-status: translate.sexp
-	@$(sledge_report) status -baseline baseline/translate.sexp translate.sexp | column -ts$$'\t'
+	$(sledge_report) status -baseline baseline/translate.sexp translate.sexp | column -ts$$'\t'

 #
 # analyze (backend) tests
@ -108,35 +110,34 @@ translate-status: translate.sexp
 # run analyze tests
 .PHONY: analyze
 analyze: compile
-	-@parallel --bar $(sledge_dbg) llvm analyze -trace Report+Control $(SLEDGE_ARGS) ::: $(AnalyzeTests)
+	-parallel --bar $(sledge_dbg) llvm analyze -trace Report+Control $(SLEDGE_A_ARGS) ::: $(AnalyzeTests)

-parallel_sledge_opt=parallel --shuf --bar $(sledge_opt) llvm analyze -append-report $(SLEDGE_ARGS) :::
+parallel_sledge_opt=parallel --use-cores-instead-of-threads --shuf --bar $(sledge_opt) llvm analyze -append-report $(SLEDGE_A_ARGS) :::

 analyze-perf1: compile
-	@$(parallel_sledge_opt) $(AnalyzeTests)
+	-@$(parallel_sledge_opt) $(AnalyzeTests)

 analyze-perf3: compile
-	@$(parallel_sledge_opt) $(AnalyzeTests) $(AnalyzeTests) $(AnalyzeTests)
+	-@$(parallel_sledge_opt) $(AnalyzeTests) $(AnalyzeTests) $(AnalyzeTests)

 analyze-perf5: compile
-	@$(parallel_sledge_opt) $(AnalyzeTests) $(AnalyzeTests) $(AnalyzeTests) $(AnalyzeTests) $(AnalyzeTests)
+	-@$(parallel_sledge_opt) $(AnalyzeTests) $(AnalyzeTests) $(AnalyzeTests) $(AnalyzeTests) $(AnalyzeTests)

 analyze-perf7: compile
-	@$(parallel_sledge_opt) $(AnalyzeTests) $(AnalyzeTests) $(AnalyzeTests) $(AnalyzeTests) $(AnalyzeTests) $(AnalyzeTests) $(AnalyzeTests)
+	-@$(parallel_sledge_opt) $(AnalyzeTests) $(AnalyzeTests) $(AnalyzeTests) $(AnalyzeTests) $(AnalyzeTests) $(AnalyzeTests) $(AnalyzeTests)

 .PHONY: analyze.sexp
 analyze.sexp:
-	@find -L {,local/,extra/}analyze -not -path 'baseline/*' -name '*.sexp' 2>/dev/null \
-	 | xargs cat > analyze.sexp
+	find -L {,local/,extra/}analyze -not -path 'baseline/*' -name '*.sexp' 2>/dev/null | xargs cat > analyze.sexp

 baseline/analyze.sexp: analyze.sexp
-	@cp analyze.sexp $@
+	cp analyze.sexp $@

 analyze.html: analyze.sexp
-	@$(sledge_report) html -baseline baseline/analyze.sexp analyze.sexp -output $@
+	$(sledge_report) html $(SLEDGE_R_ARGS) -baseline baseline/analyze.sexp analyze.sexp -output $@

 analyze-status: analyze.sexp
-	@$(sledge_report) status -baseline baseline/analyze.sexp analyze.sexp | column -ts$$'\t'
+	$(sledge_report) status -baseline baseline/analyze.sexp analyze.sexp | column -ts$$'\t'

 #
 # smt tests
@ -147,9 +148,9 @@ SmtTests:=$(shell find smt -name '*.smt2' 2>/dev/null)
 # run sledge smt tests
 .PHONY: smt
 smt:
-	-@parallel --bar $(sledge_dbg) smt $(SLEDGE_ARGS) ::: $(SmtTests)
+	-parallel --bar $(sledge_dbg) smt $(SLEDGE_S_ARGS) ::: $(SmtTests)

-parallel_sledge_opt_smt=parallel --shuf --bar $(sledge_opt) smt -append-report $(SLEDGE_ARGS) :::
+parallel_sledge_opt_smt=parallel --shuf --bar $(sledge_opt) smt -append-report $(SLEDGE_S_ARGS) :::

 smt-perf1:
 	@$(parallel_sledge_opt_smt) $(SmtTests)
@ -165,17 +166,16 @@ smt-perf7:

 .PHONY: smt.sexp
 smt.sexp:
-	@find -L {,local/,extra/}smt -not -path 'baseline/*' -name '*.sexp' 2>/dev/null \
-	 | xargs cat > smt.sexp
+	find -L {,local/,extra/}smt -not -path 'baseline/*' -name '*.sexp' 2>/dev/null | xargs cat > smt.sexp

 baseline/smt.sexp: smt.sexp
-	@cp smt.sexp $@
+	cp smt.sexp $@

 smt.html: smt.sexp
-	@$(sledge_report) html -baseline baseline/smt.sexp smt.sexp -output $@
+	$(sledge_report) html -baseline baseline/smt.sexp smt.sexp -output $@

 smt-status: smt.sexp
-	@$(sledge_report) status -baseline baseline/smt.sexp smt.sexp | column -ts$$'\t'
+	$(sledge_report) status -baseline baseline/smt.sexp smt.sexp | column -ts$$'\t'

 #
 # llvm tests
@ -184,22 +184,20 @@ smt-status: smt.sexp
 # run sledge llvm translate tests
 .PHONY: llvm
 llvm:
-	-@find -L llvm -name '*.ll' -or -name '*.bc' 2>/dev/null \
-	 | parallel --bar $(sledge_dbg) llvm translate -no-models -no-internalize $(SLEDGE_ARGS)
+	-find -L llvm -name '*.ll' -or -name '*.bc' 2>/dev/null | parallel --bar $(sledge_dbg) llvm translate -no-models -no-internalize $(SLEDGE_T_ARGS)

 .PHONY: llvm.sexp
 llvm.sexp:
-	@find -L {,local/,extra/}llvm -not -path 'baseline/*' -name '*.sexp' 2>/dev/null \
-	 | xargs cat > llvm.sexp
+	find -L {,local/,extra/}llvm -not -path 'baseline/*' -name '*.sexp' 2>/dev/null | xargs cat > llvm.sexp

 baseline/llvm.sexp: llvm.sexp
-	@cp llvm.sexp $@
+	cp llvm.sexp $@

 llvm.html: llvm.sexp
-	@$(sledge_report) html -baseline baseline/llvm.sexp llvm.sexp -output $@
+	$(sledge_report) html -baseline baseline/llvm.sexp llvm.sexp -output $@

 llvm-status: llvm.sexp
-	@$(sledge_report) status -baseline baseline/llvm.sexp llvm.sexp | column -ts$$'\t'
+	$(sledge_report) status -baseline baseline/llvm.sexp llvm.sexp | column -ts$$'\t'

 #
 # test workflow
@ -207,27 +205,27 @@ llvm-status: llvm.sexp

 # report warnings
 warnings:
-	@find -L * -name '*.out' | xargs grep -h "Warning:" | sort
+	find -L * -name '*.out' | xargs grep -h "Warning:" | sort

 test-translate:
-	-@find -L {,local/,extra/}translate -name "*.sexp" -or -name "*.out" -or -name '*.err' -or -name '*.bllair' -or -name '*.llair' -delete 2>/dev/null
-	-@$(MAKE) --no-print-directory translate
-	@$(MAKE) --no-print-directory translate-status
+	-rm $(TranslateTests:=.sexp) $(TranslateTests:=.out) $(TranslateTests:=.err) $(TranslateTests:=.bllair) $(TranslateTests:=.llair) 2>/dev/null
+	-$(MAKE) --no-print-directory translate
+	$(MAKE) --no-print-directory translate-status

 test-analyze:
-	-@find -L {,local/,extra/}analyze -name "*.sexp" -or -name "*.out" -or -name '*.err' -delete 2>/dev/null
-	-@$(MAKE) --no-print-directory analyze
-	@$(MAKE) --no-print-directory analyze-status
+	-rm $(AnalyzeTests:=.sexp) $(AnalyzeTests:=.out) $(AnalyzeTests:=.err) 2>/dev/null
+	-$(MAKE) --no-print-directory analyze
+	$(MAKE) --no-print-directory analyze-status

 test-smt:
-	-@find -L {,local/,extra/}smt -name "*.sexp" -or -name "*.out" -or -name '*.err' -delete 2>/dev/null
-	-@$(MAKE) --no-print-directory smt
-	@$(MAKE) --no-print-directory smt-status
+	-rm $(SmtTests:=.sexp) $(SmtTests:=.out) $(SmtTests:=.err) 2>/dev/null
+	-$(MAKE) --no-print-directory smt
+	$(MAKE) --no-print-directory smt-status

 test-llvm:
-	-@find -L {,local/,extra/}llvm -name "*.sexp" -or -name "*.out" -or -name '*.err' -or -name '*.bllair' -delete 2>/dev/null
-	-@$(MAKE) --no-print-directory llvm
-	@$(MAKE) --no-print-directory llvm-status
+	-find -L {,local/,extra/}llvm \( -name "*.sexp" -or -name "*.out" -or -name '*.err' -or -name '*.bllair' \) -delete 2>/dev/null
+	-$(MAKE) --no-print-directory llvm
+	$(MAKE) --no-print-directory llvm-status

 # run tests and check against expected results
 test: test-translate translate.html test-analyze analyze.html test-llvm llvm.html
@ -237,24 +235,23 @@ promote: baseline/translate.sexp baseline/analyze.sexp baseline/smt.sexp baselin

 # copy the current out and err files to the baseline dir
 promote-out:
-	@find -L * -not -path 'baseline/*' \( -name '*.out' -or -name '*.err' \) | xargs gcp --parents -t baseline
+	find -L * -not -path 'baseline/*' \( -name '*.out' -or -name '*.err' \) | xargs gcp --parents -t baseline

 # diff the current out files against the ones in the baseline dir
 diff-out:
-	@$(diff) -mask-uniques -include '.*\.out' baseline -alt-next current .
+	$(diff) -mask-uniques -include '.*\.out' baseline -alt-next current .

 # diff the current err files against the ones in the baseline dir
 diff-err:
-	@$(diff) -mask-uniques -include '.*\.err' baseline -alt-next current .
+	$(diff) -mask-uniques -include '.*\.err' baseline -alt-next current .

 # remove generated bitcode files
 cleanbc:
-	@rm -f $(GeneratedBCs)
+	rm -f $(GeneratedBCs)

 # remove result files
 cleanout:
-	@find -L * -not -path 'baseline/*' -name "*.sexp" -or -name "*.out" -or -name '*.err' -or -name '*.bllair' -or -name '*.llair' \
-	 | xargs rm -f
+	find -L * \( -not -path 'baseline/*' -name "*.sexp" -or -name "*.out" -or -name '*.err' -or -name '*.bllair' -or -name '*.llair' \) -delete

 clean: cleanbc cleanout

--- a/sledge/test/wrap.sh
+++ b/sledge/test/wrap.sh
@ -15,26 +15,26 @@ set -u
 timeout=$1
 memout=$(( $2*1024 ))
 command=${@: 3: $#-3}
-test_ext=${@: -1}
-test=${test_ext%.*}
+test=${@: -1}

 unknown_error () {
-  echo -e "((name $test)(entry(Status(UnknownError \"$1\"))))" >> $test.sexp
+  echo "((name $test)(entry(Status(UnknownError \"$1\"))))" >> $test.sexp
 }

 timeout () {
-  echo -e "((name $test)(entry(Status Timeout)))" >> $test.sexp
+  echo "((name $test)(entry(Status Timeout)))" >> $test.sexp
 }

 memout () {
-  echo -e "((name $test)(entry(Status Memout)))" >> $test.sexp
+  echo "((name $test)(entry(Status Memout)))" >> $test.sexp
 }

 (
  ulimit -t $timeout -v $memout
-  $command -report $test.sexp $test_ext 1> $test.out 2> $test.err
+  $command -report $test.sexp $test 1> $test.out 2> $test.err
 ) &> /dev/null
 status=$?
+
 case $status in
  ( 0 | 1 ) ;;
  ( 132 ) unknown_error "illegal instruction" ;;
@ -44,4 +44,7 @@ case $status in
  ( 137 | 152 ) timeout ;;
  ( * ) unknown_error "exit $status" ;;
 esac
+
+(test -f $test.sexp && grep -q "Status" $test.sexp) || unknown_error "exit $status"
+
 exit $status