2018-03-18 19:45:25 +01:00
|
|
|
{-# LANGUAGE NoImplicitPrelude #-}
|
Switch to new pandoc-types and use Text instead of String [API change].
PR #5884.
+ Use pandoc-types 1.20 and texmath 0.12.
+ Text is now used instead of String, with a few exceptions.
+ In the MediaBag module, some of the types using Strings
were switched to use FilePath instead (not Text).
+ In the Parsing module, new parsers `manyChar`, `many1Char`,
`manyTillChar`, `many1TillChar`, `many1Till`, `manyUntil`,
`mantyUntilChar` have been added: these are like their
unsuffixed counterparts but pack some or all of their output.
+ `glob` in Text.Pandoc.Class still takes String since it seems
to be intended as an interface to Glob, which uses strings.
It seems to be used only once in the package, in the EPUB writer,
so that is not hard to change.
2019-11-04 22:12:37 +01:00
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
2014-05-08 21:50:20 +02:00
|
|
|
{-
|
2019-02-04 22:52:31 +01:00
|
|
|
Copyright (C) 2012-2019 John MacFarlane <jgm@berkeley.edu>
|
2014-05-08 21:50:20 +02:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
-}
|
2018-03-18 19:45:25 +01:00
|
|
|
import Prelude
|
2010-12-11 08:35:31 +01:00
|
|
|
import Text.Pandoc
|
2019-08-25 22:58:29 +02:00
|
|
|
import Text.Pandoc.MIME
|
2020-07-07 05:10:45 +02:00
|
|
|
import Control.Monad.Except (throwError, liftIO)
|
2017-06-10 20:57:38 +02:00
|
|
|
import qualified Text.Pandoc.UTF8 as UTF8
|
2017-01-27 11:15:45 +01:00
|
|
|
import qualified Data.ByteString as B
|
Switch to new pandoc-types and use Text instead of String [API change].
PR #5884.
+ Use pandoc-types 1.20 and texmath 0.12.
+ Text is now used instead of String, with a few exceptions.
+ In the MediaBag module, some of the types using Strings
were switched to use FilePath instead (not Text).
+ In the Parsing module, new parsers `manyChar`, `many1Char`,
`manyTillChar`, `many1TillChar`, `many1Till`, `manyUntil`,
`mantyUntilChar` have been added: these are like their
unsuffixed counterparts but pack some or all of their output.
+ `glob` in Text.Pandoc.Class still takes String since it seems
to be intended as an interface to Glob, which uses strings.
It seems to be used only once in the package, in the EPUB writer,
so that is not hard to change.
2019-11-04 22:12:37 +01:00
|
|
|
import qualified Data.Text as T
|
2010-12-11 08:35:31 +01:00
|
|
|
import Criterion.Main
|
2015-10-10 01:01:08 +02:00
|
|
|
import Criterion.Types (Config(..))
|
2017-12-30 23:25:38 +01:00
|
|
|
import Data.List (intersect)
|
2020-07-07 05:10:45 +02:00
|
|
|
import Data.Maybe (mapMaybe, catMaybes)
|
2017-01-27 11:29:26 +01:00
|
|
|
import System.Environment (getArgs)
|
2019-08-25 22:58:29 +02:00
|
|
|
import qualified Data.ByteString.Lazy as BL
|
2010-12-11 08:35:31 +01:00
|
|
|
|
2020-07-07 05:10:45 +02:00
|
|
|
data Input = InputText {unInputText :: T.Text}
|
|
|
|
| InputBS {unInputBS :: BL.ByteString}
|
|
|
|
|
2010-12-11 08:35:31 +01:00
|
|
|
readerBench :: Pandoc
|
Switch to new pandoc-types and use Text instead of String [API change].
PR #5884.
+ Use pandoc-types 1.20 and texmath 0.12.
+ Text is now used instead of String, with a few exceptions.
+ In the MediaBag module, some of the types using Strings
were switched to use FilePath instead (not Text).
+ In the Parsing module, new parsers `manyChar`, `many1Char`,
`manyTillChar`, `many1TillChar`, `many1Till`, `manyUntil`,
`mantyUntilChar` have been added: these are like their
unsuffixed counterparts but pack some or all of their output.
+ `glob` in Text.Pandoc.Class still takes String since it seems
to be intended as an interface to Glob, which uses strings.
It seems to be used only once in the package, in the EPUB writer,
so that is not hard to change.
2019-11-04 22:12:37 +01:00
|
|
|
-> T.Text
|
2020-07-07 05:10:45 +02:00
|
|
|
-> IO (Maybe Benchmark)
|
|
|
|
readerBench doc name = do
|
|
|
|
let (rdr, rexts) = either (error . show) id . runPure $ getReader name
|
|
|
|
res <- runIO $ do
|
2019-09-30 03:29:49 +02:00
|
|
|
(wtr, wexts) <- getWriter name
|
|
|
|
case (rdr, wtr) of
|
|
|
|
(TextReader r, TextWriter w) -> do
|
2020-07-07 05:10:45 +02:00
|
|
|
setResourcePath ["./test"]
|
2019-09-30 03:29:49 +02:00
|
|
|
inp <- w def{ writerWrapText = WrapAuto
|
|
|
|
, writerExtensions = wexts } doc
|
2020-07-07 05:10:45 +02:00
|
|
|
return (r def{ readerExtensions = rexts } . unInputText, InputText inp)
|
|
|
|
(ByteStringReader r, ByteStringWriter w) -> do
|
|
|
|
setResourcePath ["./test"]
|
|
|
|
tmpl <- Just <$> compileDefaultTemplate name
|
|
|
|
inp <- w def{ writerWrapText = WrapAuto
|
|
|
|
, writerExtensions = wexts
|
|
|
|
, writerTemplate = tmpl } doc
|
|
|
|
liftIO $ BL.writeFile "/tmp/test.odt" inp
|
|
|
|
return (r def{ readerExtensions = rexts } . unInputBS, InputBS inp)
|
|
|
|
_ -> throwError $ PandocSomeError $ "text/bytestring format mismatch: "
|
Switch to new pandoc-types and use Text instead of String [API change].
PR #5884.
+ Use pandoc-types 1.20 and texmath 0.12.
+ Text is now used instead of String, with a few exceptions.
+ In the MediaBag module, some of the types using Strings
were switched to use FilePath instead (not Text).
+ In the Parsing module, new parsers `manyChar`, `many1Char`,
`manyTillChar`, `many1TillChar`, `many1Till`, `manyUntil`,
`mantyUntilChar` have been added: these are like their
unsuffixed counterparts but pack some or all of their output.
+ `glob` in Text.Pandoc.Class still takes String since it seems
to be intended as an interface to Glob, which uses strings.
It seems to be used only once in the package, in the EPUB writer,
so that is not hard to change.
2019-11-04 22:12:37 +01:00
|
|
|
<> name
|
2020-07-07 05:10:45 +02:00
|
|
|
return $ case res of
|
|
|
|
Right (readerFun, inp) ->
|
|
|
|
Just $ bench (T.unpack $ name <> " reader")
|
|
|
|
$ nf (\i -> either (error . show) id $ runPure (readerFun i))
|
|
|
|
inp
|
|
|
|
Left _ -> Nothing
|
2010-12-11 08:35:31 +01:00
|
|
|
|
2019-08-25 22:58:29 +02:00
|
|
|
getImages :: IO [(FilePath, MimeType, BL.ByteString)]
|
|
|
|
getImages = do
|
|
|
|
ll <- BL.readFile "test/lalune.jpg"
|
|
|
|
mv <- BL.readFile "test/movie.jpg"
|
|
|
|
return [("lalune.jpg", "image/jpg", ll)
|
|
|
|
,("movie.jpg", "image/jpg", mv)]
|
|
|
|
|
2010-12-11 08:35:31 +01:00
|
|
|
writerBench :: Pandoc
|
Switch to new pandoc-types and use Text instead of String [API change].
PR #5884.
+ Use pandoc-types 1.20 and texmath 0.12.
+ Text is now used instead of String, with a few exceptions.
+ In the MediaBag module, some of the types using Strings
were switched to use FilePath instead (not Text).
+ In the Parsing module, new parsers `manyChar`, `many1Char`,
`manyTillChar`, `many1TillChar`, `many1Till`, `manyUntil`,
`mantyUntilChar` have been added: these are like their
unsuffixed counterparts but pack some or all of their output.
+ `glob` in Text.Pandoc.Class still takes String since it seems
to be intended as an interface to Glob, which uses strings.
It seems to be used only once in the package, in the EPUB writer,
so that is not hard to change.
2019-11-04 22:12:37 +01:00
|
|
|
-> T.Text
|
2017-12-30 23:25:38 +01:00
|
|
|
-> Maybe Benchmark
|
|
|
|
writerBench doc name =
|
|
|
|
case res of
|
|
|
|
Right writerFun ->
|
2019-08-25 22:58:29 +02:00
|
|
|
Just $ env getImages $ \imgs ->
|
Switch to new pandoc-types and use Text instead of String [API change].
PR #5884.
+ Use pandoc-types 1.20 and texmath 0.12.
+ Text is now used instead of String, with a few exceptions.
+ In the MediaBag module, some of the types using Strings
were switched to use FilePath instead (not Text).
+ In the Parsing module, new parsers `manyChar`, `many1Char`,
`manyTillChar`, `many1TillChar`, `many1Till`, `manyUntil`,
`mantyUntilChar` have been added: these are like their
unsuffixed counterparts but pack some or all of their output.
+ `glob` in Text.Pandoc.Class still takes String since it seems
to be intended as an interface to Glob, which uses strings.
It seems to be used only once in the package, in the EPUB writer,
so that is not hard to change.
2019-11-04 22:12:37 +01:00
|
|
|
bench (T.unpack $ name <> " writer")
|
2017-12-30 23:25:38 +01:00
|
|
|
$ nf (\d -> either (error . show) id $
|
2019-08-25 22:58:29 +02:00
|
|
|
runPure (do mapM_
|
|
|
|
(\(fp, mt, bs) ->
|
|
|
|
insertMedia fp (Just mt) bs)
|
|
|
|
imgs
|
|
|
|
writerFun d)) doc
|
2018-10-14 08:10:27 +02:00
|
|
|
Left _ -> Nothing
|
2017-12-30 23:25:38 +01:00
|
|
|
where res = runPure $ do
|
2019-09-30 03:29:49 +02:00
|
|
|
(wtr, wexts) <- getWriter name
|
|
|
|
case wtr of
|
|
|
|
TextWriter w ->
|
2018-10-14 08:10:27 +02:00
|
|
|
return $ w def{ writerExtensions = wexts }
|
|
|
|
_ -> throwError $ PandocSomeError
|
Switch to new pandoc-types and use Text instead of String [API change].
PR #5884.
+ Use pandoc-types 1.20 and texmath 0.12.
+ Text is now used instead of String, with a few exceptions.
+ In the MediaBag module, some of the types using Strings
were switched to use FilePath instead (not Text).
+ In the Parsing module, new parsers `manyChar`, `many1Char`,
`manyTillChar`, `many1TillChar`, `many1Till`, `manyUntil`,
`mantyUntilChar` have been added: these are like their
unsuffixed counterparts but pack some or all of their output.
+ `glob` in Text.Pandoc.Class still takes String since it seems
to be intended as an interface to Glob, which uses strings.
It seems to be used only once in the package, in the EPUB writer,
so that is not hard to change.
2019-11-04 22:12:37 +01:00
|
|
|
$ "could not get text writer for " <> name
|
2010-12-11 08:35:31 +01:00
|
|
|
|
2012-07-26 19:02:00 +02:00
|
|
|
main :: IO ()
|
2010-12-11 08:35:31 +01:00
|
|
|
main = do
|
Switch to new pandoc-types and use Text instead of String [API change].
PR #5884.
+ Use pandoc-types 1.20 and texmath 0.12.
+ Text is now used instead of String, with a few exceptions.
+ In the MediaBag module, some of the types using Strings
were switched to use FilePath instead (not Text).
+ In the Parsing module, new parsers `manyChar`, `many1Char`,
`manyTillChar`, `many1TillChar`, `many1Till`, `manyUntil`,
`mantyUntilChar` have been added: these are like their
unsuffixed counterparts but pack some or all of their output.
+ `glob` in Text.Pandoc.Class still takes String since it seems
to be intended as an interface to Glob, which uses strings.
It seems to be used only once in the package, in the EPUB writer,
so that is not hard to change.
2019-11-04 22:12:37 +01:00
|
|
|
args <- filter (\x -> T.take 1 x /= "-") . fmap T.pack <$> getArgs
|
2017-12-30 20:22:55 +01:00
|
|
|
print args
|
2020-07-07 05:10:45 +02:00
|
|
|
let matchReader (n, _) =
|
2017-12-30 23:25:38 +01:00
|
|
|
null args || ("reader" `elem` args && n `elem` args)
|
2020-07-07 05:10:45 +02:00
|
|
|
matchWriter (n, TextWriter _) =
|
2017-12-30 23:25:38 +01:00
|
|
|
null args || ("writer" `elem` args && n `elem` args)
|
2017-12-30 20:22:55 +01:00
|
|
|
matchWriter _ = False
|
2020-07-07 05:10:45 +02:00
|
|
|
allWriters = map fst (writers :: [(T.Text, Writer PandocPure)])
|
|
|
|
matchedReaders = map fst (filter matchReader readers
|
Switch to new pandoc-types and use Text instead of String [API change].
PR #5884.
+ Use pandoc-types 1.20 and texmath 0.12.
+ Text is now used instead of String, with a few exceptions.
+ In the MediaBag module, some of the types using Strings
were switched to use FilePath instead (not Text).
+ In the Parsing module, new parsers `manyChar`, `many1Char`,
`manyTillChar`, `many1TillChar`, `many1Till`, `manyUntil`,
`mantyUntilChar` have been added: these are like their
unsuffixed counterparts but pack some or all of their output.
+ `glob` in Text.Pandoc.Class still takes String since it seems
to be intended as an interface to Glob, which uses strings.
It seems to be used only once in the package, in the EPUB writer,
so that is not hard to change.
2019-11-04 22:12:37 +01:00
|
|
|
:: [(T.Text, Reader PandocPure)])
|
2020-07-07 05:10:45 +02:00
|
|
|
matchedWriters = map fst (filter matchWriter writers
|
Switch to new pandoc-types and use Text instead of String [API change].
PR #5884.
+ Use pandoc-types 1.20 and texmath 0.12.
+ Text is now used instead of String, with a few exceptions.
+ In the MediaBag module, some of the types using Strings
were switched to use FilePath instead (not Text).
+ In the Parsing module, new parsers `manyChar`, `many1Char`,
`manyTillChar`, `many1TillChar`, `many1Till`, `manyUntil`,
`mantyUntilChar` have been added: these are like their
unsuffixed counterparts but pack some or all of their output.
+ `glob` in Text.Pandoc.Class still takes String since it seems
to be intended as an interface to Glob, which uses strings.
It seems to be used only once in the package, in the EPUB writer,
so that is not hard to change.
2019-11-04 22:12:37 +01:00
|
|
|
:: [(T.Text, Writer PandocPure)])
|
2017-06-10 20:57:38 +02:00
|
|
|
inp <- UTF8.toText <$> B.readFile "test/testsuite.txt"
|
2017-01-27 11:15:45 +01:00
|
|
|
let opts = def
|
2016-12-10 23:42:28 +01:00
|
|
|
let doc = either (error . show) id $ runPure $ readMarkdown opts inp
|
2020-07-07 05:10:45 +02:00
|
|
|
readerBs <- fmap catMaybes
|
|
|
|
$ mapM (readerBench doc)
|
|
|
|
$ filter (/="haddock")
|
|
|
|
(matchedReaders `intersect` allWriters)
|
2017-12-30 23:25:38 +01:00
|
|
|
-- we need the corresponding writer to generate
|
|
|
|
-- input for the reader
|
|
|
|
let writerBs = mapMaybe (writerBench doc) matchedWriters
|
2015-10-10 01:01:08 +02:00
|
|
|
defaultMainWith defaultConfig{ timeLimit = 6.0 }
|
|
|
|
(writerBs ++ readerBs)
|