* The new reader is faster and more accurate. * API changes for Text.Pandoc.Readers.HTML: - removed rawHtmlBlock, anyHtmlBlockTag, anyHtmlInlineTag, anyHtmlTag, anyHtmlEndTag, htmlEndTag, extractTagType, htmlBlockElement, htmlComment - added htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag * tagsoup is a new dependency. * Text.Pandoc.Parsing: Generalized type on readWith. * Benchmark.hs: Added length calculation to force full evaluation. * Updated HTML reader tests. * Updated markdown and textile readers to use the functions from the HTML reader. * Note: The markdown reader now correctly handles some cases it did not before. For example: <hr/> is reproduced without adding a space. <script> a = '<b>'; </script> is parsed correctly.
42 lines
1.7 KiB
Haskell
42 lines
1.7 KiB
Haskell
import Text.Pandoc
|
|
import Text.Pandoc.Shared (readDataFile, normalize)
|
|
import Criterion.Main
|
|
import Data.List (isSuffixOf)
|
|
|
|
readerBench :: Pandoc
|
|
-> (String, ParserState -> String -> Pandoc)
|
|
-> Benchmark
|
|
readerBench doc (name, reader) =
|
|
let writer = case lookup name writers of
|
|
Just w -> w
|
|
Nothing -> error $ "Could not find writer for " ++ name
|
|
inp = writer defaultWriterOptions{ writerWrapText = True
|
|
, writerLiterateHaskell =
|
|
"+lhs" `isSuffixOf` name } doc
|
|
-- we compute the length to force full evaluation
|
|
getLength (Pandoc (Meta a b c) d) =
|
|
length a + length b + length c + length d
|
|
in bench (name ++ " reader") $ whnf (getLength .
|
|
reader defaultParserState{ stateSmart = True
|
|
, stateStandalone = True
|
|
, stateLiterateHaskell =
|
|
"+lhs" `isSuffixOf` name }) inp
|
|
|
|
writerBench :: Pandoc
|
|
-> (String, WriterOptions -> Pandoc -> String)
|
|
-> Benchmark
|
|
writerBench doc (name, writer) = bench (name ++ " writer") $ nf
|
|
(writer defaultWriterOptions{
|
|
writerWrapText = True
|
|
, writerLiterateHaskell = "+lhs" `isSuffixOf` name }) doc
|
|
|
|
normalizeBench :: Pandoc -> Benchmark
|
|
normalizeBench doc = bench "normalize" $ whnf normalize doc
|
|
|
|
main = do
|
|
inp <- readDataFile (Just ".") "README"
|
|
let ps = defaultParserState{ stateSmart = True }
|
|
let doc = readMarkdown ps inp
|
|
let readerBs = map (readerBench doc) readers
|
|
defaultMain $ map (writerBench doc) writers ++ readerBs ++ [normalizeBench doc]
|
|
|