Benchmark improvements.

* Build `+RTS -A256m -RTS` into default ghc-options for benchmark, so we don't have to specify this separately on the command line. This is necessary to get accurate benchmark results; otherwise we are largely measuring garbage collecting, some not related to the current benchmark. * Switch back from gauge to tasty-bench. * Allow specifying BASELINE file in 'make bench' for comparison (otherwise the latest is chosen by default). * Remove obsolete reference to weigh-pandoc from CONTRIBUTING.md. * Remove `-Rghc-timing` from 'make bench'.
2021-03-17 13:34:17 -07:00 · 2021-03-17 13:34:17 -07:00 · c6e5cf2e74
commit c6e5cf2e74
parent 84836719aa
4 changed files with 13 additions and 18 deletions
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -318,9 +318,6 @@ With stack:

    stack bench

-You can also build `weigh-pandoc` (`stack build pandoc:weigh-pandoc`)
-to get some statistics on memory usage.  (Eventually this should
-be incorporated into the benchmark suite.)

 Using the REPL
 --------------
--- a/18
+++ b/18
@ -7,21 +7,19 @@ DOCKERIMAGE=registry.gitlab.b-data.ch/ghc/ghc4pandoc:8.10.4
 COMMIT=$(shell git rev-parse --short HEAD)
 TIMESTAMP=$(shell date "+%Y%m%d_%H%M")
 LATESTBENCH=$(word 1,$(shell ls -t bench_*.csv 2>/dev/null))
-ifeq ($(LATESTBENCH),)
-BASELINE=
+BASELINE?=$(LATESTBENCH)
+ifeq ($(BASELINE),)
+BASELINECMD=
 else
-BASELINE=--baseline $(LATESTBENCH)
+BASELINECMD=--baseline $(BASELINE)
 endif
 GHCOPTS=-fdiagnostics-color=always -j4 +RTS -A256m -RTS
 WEBSITE=../../web/pandoc.org
 REVISION?=1
-# Note: for benchmarks we use +RTS -A256m -I0 -RTS ; otherwise the benchmarks
-# are measuring garbage collecting, and this can vary depending on which
-# other benchmarks are run.
 # For gauge:
-BENCHARGS?=--small --ci=0.90 --match=pattern $(PATTERN) +RTS -T -A256m -I0 -RTS
+# BENCHARGS?=--small --ci=0.90 --match=pattern $(PATTERN)
 # For tasty-bench:
-# BENCHARGS?=--csv bench_$(TIMESTAMP).csv --timeout=6 +RTS -T -A256m -I0 -RTS $(if $(PATTERN),--pattern "$(PATTERN)",)
+BENCHARGS?=--csv bench_$(TIMESTAMP).csv $(BASELINECMD) --timeout=6 +RTS -T -RTS $(if $(PATTERN),--pattern "$(PATTERN)",)

 quick:
 	stack install --ghc-options='$(GHCOPTS)' --install-ghc --flag 'pandoc:embed_data_files' --fast --test --ghc-options='$(GHCOPTS)' --test-arguments='-j4 --hide-successes $(TESTARGS)'
@ -58,11 +56,9 @@ ghcid-test:

 bench:
 	stack bench \
-	  --ghc-options '-Rghc-timing $(GHCOPTS)' \
+	  --ghc-options '$(GHCOPTS)' \
 	  --benchmark-arguments='$(BENCHARGS)' 2>&1 | \
 	  tee "bench_latest.txt"
-	perl -ne 'if (/\r/) { s/\x1b\[[0-9;]*[mGK]//g;s/^.*\r//;print; }' \
-	  bench_latest.txt > "bench_$(TIMESTAMP).txt"

 reformat:
 	for f in $(SOURCEFILES); do echo $$f; stylish-haskell -i $$f ; done
--- a/benchmark/benchmark-pandoc.hs
+++ b/benchmark/benchmark-pandoc.hs
@ -23,8 +23,8 @@ import Control.Monad.Except (throwError)
 import qualified Text.Pandoc.UTF8 as UTF8
 import qualified Data.ByteString as B
 import qualified Data.Text as T
-- import Test.Tasty.Bench
-import Gauge
+import Test.Tasty.Bench
+-- import Gauge
 import qualified Data.ByteString.Lazy as BL
 import Data.Maybe (mapMaybe)
 import Data.List (sortOn)
--- a/pandoc.cabal
+++ b/pandoc.cabal
@ -826,9 +826,11 @@ benchmark benchmark-pandoc
  hs-source-dirs:  benchmark
  build-depends:   bytestring,
                   containers,
-                   gauge       >= 0.2     && < 0.3,
-                   -- tasty-bench >= 0.2     && <= 0.3,
+                   -- gauge       >= 0.2     && < 0.3,
+                   tasty-bench >= 0.2     && <= 0.3,
                   mtl         >= 2.2     && < 2.3,
                   text        >= 1.1.1.0 && < 1.3,
                   time,
                   deepseq
+  -- we increase heap size to avoid benchmarking garbage collection:
+  ghc-options:     -rtsopts -with-rtsopts=-A256m -threaded