pandoc/Benchmark.hs
John MacFarlane 904050fa36 New HTML reader using tagsoup as a lexer.
* The new reader is faster and more accurate.

* API changes for Text.Pandoc.Readers.HTML:
   - removed rawHtmlBlock, anyHtmlBlockTag, anyHtmlInlineTag,
     anyHtmlTag, anyHtmlEndTag, htmlEndTag, extractTagType,
     htmlBlockElement, htmlComment
   - added htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag

* tagsoup is a new dependency.

* Text.Pandoc.Parsing: Generalized type on readWith.

* Benchmark.hs: Added length calculation to force full evaluation.

* Updated HTML reader tests.

* Updated markdown and textile readers to use the functions from
  the HTML reader.

* Note: The markdown reader now correctly handles some cases it did not
  before. For example:

    <hr/>

  is reproduced without adding a space.

    <script>
      a = '<b>';
    </script>

  is parsed correctly.
2010-12-30 13:55:40 -08:00

42 lines
1.7 KiB
Haskell

import Text.Pandoc
import Text.Pandoc.Shared (readDataFile, normalize)
import Criterion.Main
import Data.List (isSuffixOf)
readerBench :: Pandoc
-> (String, ParserState -> String -> Pandoc)
-> Benchmark
readerBench doc (name, reader) =
let writer = case lookup name writers of
Just w -> w
Nothing -> error $ "Could not find writer for " ++ name
inp = writer defaultWriterOptions{ writerWrapText = True
, writerLiterateHaskell =
"+lhs" `isSuffixOf` name } doc
-- we compute the length to force full evaluation
getLength (Pandoc (Meta a b c) d) =
length a + length b + length c + length d
in bench (name ++ " reader") $ whnf (getLength .
reader defaultParserState{ stateSmart = True
, stateStandalone = True
, stateLiterateHaskell =
"+lhs" `isSuffixOf` name }) inp
writerBench :: Pandoc
-> (String, WriterOptions -> Pandoc -> String)
-> Benchmark
writerBench doc (name, writer) = bench (name ++ " writer") $ nf
(writer defaultWriterOptions{
writerWrapText = True
, writerLiterateHaskell = "+lhs" `isSuffixOf` name }) doc
normalizeBench :: Pandoc -> Benchmark
normalizeBench doc = bench "normalize" $ whnf normalize doc
main = do
inp <- readDataFile (Just ".") "README"
let ps = defaultParserState{ stateSmart = True }
let doc = readMarkdown ps inp
let readerBs = map (readerBench doc) readers
defaultMain $ map (writerBench doc) writers ++ readerBs ++ [normalizeBench doc]