2010-12-10 23:35:31 -08:00
|
|
|
import Text.Pandoc
|
|
|
|
import Criterion.Main
|
2012-07-27 11:13:18 -07:00
|
|
|
import Criterion.Config
|
|
|
|
import System.Environment (getArgs)
|
|
|
|
import Data.Monoid
|
2010-12-10 23:35:31 -08:00
|
|
|
|
|
|
|
readerBench :: Pandoc
|
2013-01-04 11:11:42 -08:00
|
|
|
-> (String, ReaderOptions -> String -> IO Pandoc)
|
2010-12-10 23:35:31 -08:00
|
|
|
-> Benchmark
|
|
|
|
readerBench doc (name, reader) =
|
|
|
|
let writer = case lookup name writers of
|
2012-07-25 20:08:42 -07:00
|
|
|
Just (PureStringWriter w) -> w
|
|
|
|
_ -> error $ "Could not find writer for " ++ name
|
2012-08-08 23:18:19 -07:00
|
|
|
inp = writer def{ writerWrapText = True } doc
|
New HTML reader using tagsoup as a lexer.
* The new reader is faster and more accurate.
* API changes for Text.Pandoc.Readers.HTML:
- removed rawHtmlBlock, anyHtmlBlockTag, anyHtmlInlineTag,
anyHtmlTag, anyHtmlEndTag, htmlEndTag, extractTagType,
htmlBlockElement, htmlComment
- added htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag
* tagsoup is a new dependency.
* Text.Pandoc.Parsing: Generalized type on readWith.
* Benchmark.hs: Added length calculation to force full evaluation.
* Updated HTML reader tests.
* Updated markdown and textile readers to use the functions from
the HTML reader.
* Note: The markdown reader now correctly handles some cases it did not
before. For example:
<hr/>
is reproduced without adding a space.
<script>
a = '<b>';
</script>
is parsed correctly.
2010-12-22 20:25:15 -08:00
|
|
|
-- we compute the length to force full evaluation
|
2013-06-25 18:17:32 -07:00
|
|
|
getLength (Pandoc (Meta _) d) = length d
|
2013-01-04 11:11:42 -08:00
|
|
|
in bench (name ++ " reader") $ whnfIO $ getLength `fmap`
|
|
|
|
(reader def{ readerSmart = True }) inp
|
2010-12-10 23:35:31 -08:00
|
|
|
|
|
|
|
writerBench :: Pandoc
|
2010-12-12 23:24:02 -08:00
|
|
|
-> (String, WriterOptions -> Pandoc -> String)
|
2010-12-10 23:35:31 -08:00
|
|
|
-> Benchmark
|
2010-12-12 23:24:02 -08:00
|
|
|
writerBench doc (name, writer) = bench (name ++ " writer") $ nf
|
2012-08-08 23:18:19 -07:00
|
|
|
(writer def{ writerWrapText = True }) doc
|
2010-12-10 23:35:31 -08:00
|
|
|
|
2012-07-26 10:02:00 -07:00
|
|
|
main :: IO ()
|
2010-12-10 23:35:31 -08:00
|
|
|
main = do
|
2012-07-27 11:13:18 -07:00
|
|
|
args <- getArgs
|
|
|
|
(conf,_) <- parseArgs defaultConfig{ cfgSamples = Last $ Just 20 } defaultOptions args
|
2012-12-29 19:12:19 -08:00
|
|
|
inp <- readFile "README"
|
|
|
|
inp2 <- readFile "tests/testsuite.txt"
|
2012-07-25 22:38:59 -07:00
|
|
|
let opts = def{ readerSmart = True }
|
2012-07-27 11:06:24 -07:00
|
|
|
let doc = readMarkdown opts $ inp ++ unlines (drop 3 $ lines inp2)
|
2013-09-10 09:56:56 -07:00
|
|
|
let readerBs = map (readerBench doc)
|
|
|
|
$ filter (\(n,_) -> n /="haddock") readers
|
2012-07-25 20:08:42 -07:00
|
|
|
let writers' = [(n,w) | (n, PureStringWriter w) <- writers]
|
2012-07-27 11:13:18 -07:00
|
|
|
defaultMainWith conf (return ()) $
|
2013-05-09 10:38:11 -07:00
|
|
|
map (writerBench doc) writers' ++ readerBs
|
2010-12-10 23:35:31 -08:00
|
|
|
|