Reorganize App to make it easier to limit IO in main loop.
Previously we used liftIO fairly liberally. The code has been restructured to avoid this. A small behavior change is that pandoc will now fall back to latin1 encoding for inputs that can't be read as UTF-8. This is what it did previously for content fetched from the web and not marked as to content type. It makes sense to do the same for local files.
This commit is contained in:
parent
c39ddeb8f8
commit
fd7c140cde
1 changed files with 99 additions and 84 deletions
|
@ -1,3 +1,4 @@
|
||||||
|
{-# LANGUAGE TupleSections #-}
|
||||||
{-# LANGUAGE LambdaCase #-}
|
{-# LANGUAGE LambdaCase #-}
|
||||||
{-# LANGUAGE OverloadedStrings #-}
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
{-# LANGUAGE CPP #-}
|
{-# LANGUAGE CPP #-}
|
||||||
|
@ -27,7 +28,7 @@ module Text.Pandoc.App (
|
||||||
import qualified Control.Exception as E
|
import qualified Control.Exception as E
|
||||||
import Control.Monad ( (>=>), when )
|
import Control.Monad ( (>=>), when )
|
||||||
import Control.Monad.Trans ( MonadIO(..) )
|
import Control.Monad.Trans ( MonadIO(..) )
|
||||||
import Control.Monad.Except (throwError)
|
import Control.Monad.Except (throwError, catchError)
|
||||||
import qualified Data.ByteString as BS
|
import qualified Data.ByteString as BS
|
||||||
import qualified Data.ByteString.Char8 as B8
|
import qualified Data.ByteString.Char8 as B8
|
||||||
import qualified Data.ByteString.Lazy as BL
|
import qualified Data.ByteString.Lazy as BL
|
||||||
|
@ -38,6 +39,7 @@ import Data.Text (Text)
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
import qualified Data.Text.Lazy as TL
|
import qualified Data.Text.Lazy as TL
|
||||||
import qualified Data.Text.Lazy.Encoding as TE
|
import qualified Data.Text.Lazy.Encoding as TE
|
||||||
|
import qualified Data.Text.Encoding as TSE
|
||||||
import qualified Data.Text.Encoding.Error as TE
|
import qualified Data.Text.Encoding.Error as TE
|
||||||
import qualified Data.Text.Encoding.Error as TSE
|
import qualified Data.Text.Encoding.Error as TSE
|
||||||
import Network.URI (URI (..), parseURI)
|
import Network.URI (URI (..), parseURI)
|
||||||
|
@ -48,7 +50,7 @@ import System.IO (nativeNewline, stdout)
|
||||||
import qualified System.IO as IO (Newline (..))
|
import qualified System.IO as IO (Newline (..))
|
||||||
import Text.Pandoc
|
import Text.Pandoc
|
||||||
import Text.Pandoc.Builder (setMeta)
|
import Text.Pandoc.Builder (setMeta)
|
||||||
import Text.Pandoc.MIME (getCharset)
|
import Text.Pandoc.MIME (getCharset, MimeType)
|
||||||
import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths)
|
import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths)
|
||||||
import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts,
|
import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts,
|
||||||
IpynbOutput (..))
|
IpynbOutput (..))
|
||||||
|
@ -69,6 +71,7 @@ import qualified Text.Pandoc.UTF8 as UTF8
|
||||||
import System.Posix.IO (stdOutput)
|
import System.Posix.IO (stdOutput)
|
||||||
import System.Posix.Terminal (queryTerminal)
|
import System.Posix.Terminal (queryTerminal)
|
||||||
#endif
|
#endif
|
||||||
|
import Debug.Trace
|
||||||
|
|
||||||
convertWithOpts :: Opt -> IO ()
|
convertWithOpts :: Opt -> IO ()
|
||||||
convertWithOpts opts = do
|
convertWithOpts opts = do
|
||||||
|
@ -94,14 +97,14 @@ convertWithOpts opts = do
|
||||||
let sources = case optInputFiles opts of
|
let sources = case optInputFiles opts of
|
||||||
Just xs | not (optIgnoreArgs opts) -> xs
|
Just xs | not (optIgnoreArgs opts) -> xs
|
||||||
_ -> ["-"]
|
_ -> ["-"]
|
||||||
|
|
||||||
#ifdef _WINDOWS
|
#ifdef _WINDOWS
|
||||||
let istty = True
|
let istty = True
|
||||||
#else
|
#else
|
||||||
istty <- liftIO $ queryTerminal stdOutput
|
istty <- liftIO $ queryTerminal stdOutput
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
(output, reports) <- runIOorExplode $ do
|
res <- runIO $ do
|
||||||
|
|
||||||
setTrace (optTrace opts)
|
setTrace (optTrace opts)
|
||||||
setVerbosity verbosity
|
setVerbosity verbosity
|
||||||
setUserDataDir datadir
|
setUserDataDir datadir
|
||||||
|
@ -110,6 +113,8 @@ convertWithOpts opts = do
|
||||||
setInputFiles (fromMaybe ["-"] (optInputFiles opts))
|
setInputFiles (fromMaybe ["-"] (optInputFiles opts))
|
||||||
setOutputFile (optOutputFile opts)
|
setOutputFile (optOutputFile opts)
|
||||||
|
|
||||||
|
inputs <- readSources sources
|
||||||
|
|
||||||
-- assign reader and writer based on options and filenames
|
-- assign reader and writer based on options and filenames
|
||||||
readerName <- case optFrom opts of
|
readerName <- case optFrom opts of
|
||||||
Just f -> return f
|
Just f -> return f
|
||||||
|
@ -135,20 +140,6 @@ convertWithOpts opts = do
|
||||||
|
|
||||||
(reader, readerExts) <- getReader readerName
|
(reader, readerExts) <- getReader readerName
|
||||||
|
|
||||||
let convertTabs = tabFilter (if optPreserveTabs opts ||
|
|
||||||
readerNameBase == "t2t" ||
|
|
||||||
readerNameBase == "man"
|
|
||||||
then 0
|
|
||||||
else optTabStop opts)
|
|
||||||
|
|
||||||
|
|
||||||
let readSources :: [FilePath] -> PandocIO [(FilePath, Text)]
|
|
||||||
readSources srcs =
|
|
||||||
mapM (\fp -> do
|
|
||||||
t <- readSource fp
|
|
||||||
return (if fp == "-" then "" else fp, convertTabs t)) srcs
|
|
||||||
|
|
||||||
|
|
||||||
outputSettings <- optToOutputSettings opts
|
outputSettings <- optToOutputSettings opts
|
||||||
let format = outputFormat outputSettings
|
let format = outputFormat outputSettings
|
||||||
let writer = outputWriter outputSettings
|
let writer = outputWriter outputSettings
|
||||||
|
@ -236,20 +227,11 @@ convertWithOpts opts = do
|
||||||
_ -> Format format) :))
|
_ -> Format format) :))
|
||||||
$ []
|
$ []
|
||||||
|
|
||||||
let sourceToDoc :: [FilePath] -> PandocIO Pandoc
|
let convertTabs = tabFilter (if optPreserveTabs opts ||
|
||||||
sourceToDoc sources' =
|
readerNameBase == "t2t" ||
|
||||||
case reader of
|
readerNameBase == "man"
|
||||||
TextReader r
|
then 0
|
||||||
| readerNameBase == "json" ->
|
else optTabStop opts)
|
||||||
mconcat <$> mapM (readSource >=> r readerOpts) sources'
|
|
||||||
| optFileScope opts ->
|
|
||||||
-- Read source and convert tabs (see #6709)
|
|
||||||
let readSource' = fmap convertTabs . readSource
|
|
||||||
in mconcat <$> mapM (readSource' >=> r readerOpts) sources'
|
|
||||||
| otherwise ->
|
|
||||||
readSources sources' >>= r readerOpts
|
|
||||||
ByteStringReader r ->
|
|
||||||
mconcat <$> mapM (readFile' >=> r readerOpts) sources'
|
|
||||||
|
|
||||||
|
|
||||||
when (readerNameBase == "markdown_github" ||
|
when (readerNameBase == "markdown_github" ||
|
||||||
|
@ -275,7 +257,22 @@ convertWithOpts opts = do
|
||||||
maybe id (setMeta "citation-abbreviations")
|
maybe id (setMeta "citation-abbreviations")
|
||||||
(optCitationAbbreviations opts) $ mempty
|
(optCitationAbbreviations opts) $ mempty
|
||||||
|
|
||||||
doc <- sourceToDoc sources >>=
|
doc <- (case reader of
|
||||||
|
TextReader r
|
||||||
|
| readerNameBase == "json" ->
|
||||||
|
mconcat <$>
|
||||||
|
mapM (inputToText convertTabs
|
||||||
|
>=> r readerOpts . (:[])) inputs
|
||||||
|
| optFileScope opts ->
|
||||||
|
mconcat <$> mapM
|
||||||
|
(inputToText convertTabs
|
||||||
|
>=> r readerOpts . (:[]))
|
||||||
|
inputs
|
||||||
|
| otherwise -> mapM (inputToText convertTabs) inputs
|
||||||
|
>>= r readerOpts
|
||||||
|
ByteStringReader r ->
|
||||||
|
mconcat <$> mapM (r readerOpts . inputToLazyByteString) inputs)
|
||||||
|
>>=
|
||||||
( (if isJust (optExtractMedia opts)
|
( (if isJust (optExtractMedia opts)
|
||||||
then fillMediaBag
|
then fillMediaBag
|
||||||
else return)
|
else return)
|
||||||
|
@ -310,6 +307,9 @@ convertWithOpts opts = do
|
||||||
reports <- getLog
|
reports <- getLog
|
||||||
return (output, reports)
|
return (output, reports)
|
||||||
|
|
||||||
|
case res of
|
||||||
|
Left e -> E.throwIO e
|
||||||
|
Right (output, reports) -> do
|
||||||
case optLogFile opts of
|
case optLogFile opts of
|
||||||
Nothing -> return ()
|
Nothing -> return ()
|
||||||
Just logfile -> BL.writeFile logfile (encodeLogMessages reports)
|
Just logfile -> BL.writeFile logfile (encodeLogMessages reports)
|
||||||
|
@ -344,49 +344,64 @@ adjustMetadata f (Pandoc meta bs) = Pandoc (f meta) bs
|
||||||
applyTransforms :: Monad m => [Transform] -> Pandoc -> m Pandoc
|
applyTransforms :: Monad m => [Transform] -> Pandoc -> m Pandoc
|
||||||
applyTransforms transforms d = return $ foldr ($) d transforms
|
applyTransforms transforms d = return $ foldr ($) d transforms
|
||||||
|
|
||||||
readSource :: FilePath -> PandocIO Text
|
readSources :: (PandocMonad m, MonadIO m)
|
||||||
readSource src = case parseURI src of
|
=> [FilePath] -> m [(FilePath, (BS.ByteString, Maybe MimeType))]
|
||||||
Just u | uriScheme u `elem` ["http:","https:"] ->
|
readSources srcs =
|
||||||
readURI src
|
mapM (\fp -> do t <- readSource fp
|
||||||
| uriScheme u == "file:" -> liftIO $
|
return (if fp == "-" then "" else fp, t)) srcs
|
||||||
readTextFile (uriPathToPath $ T.pack $ uriPath u)
|
|
||||||
_ -> liftIO $ readTextFile src
|
|
||||||
where readTextFile :: FilePath -> IO Text
|
|
||||||
readTextFile fp = do
|
|
||||||
bs <- if src == "-"
|
|
||||||
then BS.getContents
|
|
||||||
else BS.readFile fp
|
|
||||||
E.catch (return $! UTF8.toText bs)
|
|
||||||
(\e -> E.throwIO $ case e of
|
|
||||||
TSE.DecodeError _ (Just w) ->
|
|
||||||
case BS.elemIndex w bs of
|
|
||||||
Just offset ->
|
|
||||||
PandocUTF8DecodingError (T.pack fp) offset w
|
|
||||||
_ -> PandocUTF8DecodingError (T.pack fp) 0 w
|
|
||||||
_ -> PandocAppError (tshow e))
|
|
||||||
|
|
||||||
readURI :: FilePath -> PandocIO Text
|
readSource :: (PandocMonad m, MonadIO m)
|
||||||
readURI src = do
|
=> FilePath -> m (BS.ByteString, Maybe MimeType)
|
||||||
(bs, mt) <- openURL (T.pack src)
|
readSource "-" = (,Nothing) <$> readStdinStrict
|
||||||
|
readSource src =
|
||||||
|
case parseURI src of
|
||||||
|
Just u | uriScheme u `elem` ["http:","https:"] -> openURL (T.pack src)
|
||||||
|
| uriScheme u == "file:" ->
|
||||||
|
(,Nothing) <$>
|
||||||
|
readFileStrict (uriPathToPath $ T.pack $ uriPath u)
|
||||||
|
_ -> (,Nothing) <$> readFileStrict src
|
||||||
|
|
||||||
|
utf8ToText :: PandocMonad m => FilePath -> BS.ByteString -> m Text
|
||||||
|
utf8ToText fp bs =
|
||||||
|
case TSE.decodeUtf8' . dropBOM $ bs of
|
||||||
|
Left (TSE.DecodeError _ (Just w)) ->
|
||||||
|
case BS.elemIndex w bs of
|
||||||
|
Just offset -> throwError $ PandocUTF8DecodingError (T.pack fp) offset w
|
||||||
|
Nothing -> throwError $ PandocUTF8DecodingError (T.pack fp) 0 w
|
||||||
|
Left e -> throwError $ PandocAppError (tshow e)
|
||||||
|
Right t -> return t
|
||||||
|
where
|
||||||
|
dropBOM bs' =
|
||||||
|
if "\xEF\xBB\xBF" `BS.isPrefixOf` bs'
|
||||||
|
then BS.drop 3 bs'
|
||||||
|
else bs'
|
||||||
|
|
||||||
|
|
||||||
|
inputToText :: PandocMonad m
|
||||||
|
=> (Text -> Text)
|
||||||
|
-> (FilePath, (BS.ByteString, Maybe MimeType))
|
||||||
|
-> m (FilePath, Text)
|
||||||
|
inputToText convTabs (fp, (bs,mt)) =
|
||||||
|
(fp,) . convTabs . T.filter (/='\r') <$>
|
||||||
case mt >>= getCharset of
|
case mt >>= getCharset of
|
||||||
Just "UTF-8" -> return $ UTF8.toText bs
|
Just "UTF-8" -> utf8ToText fp bs
|
||||||
Just "ISO-8859-1" -> return $ T.pack $ B8.unpack bs
|
Just "ISO-8859-1" -> return $ T.pack $ B8.unpack bs
|
||||||
Just charset -> throwError $ PandocUnsupportedCharsetError charset
|
Just charset -> throwError $ PandocUnsupportedCharsetError charset
|
||||||
Nothing -> liftIO $ -- try first as UTF-8, then as latin1
|
Nothing -> catchError
|
||||||
E.catch (return $! UTF8.toText bs)
|
(utf8ToText fp bs)
|
||||||
(\case
|
(\case
|
||||||
TSE.DecodeError{} ->
|
PandocUTF8DecodingError{} ->
|
||||||
return $ T.pack $ B8.unpack bs
|
return $ T.pack $ B8.unpack bs
|
||||||
e -> E.throwIO e)
|
e -> throwError e)
|
||||||
|
|
||||||
readFile' :: MonadIO m => FilePath -> m BL.ByteString
|
inputToLazyByteString :: (FilePath, (BS.ByteString, Maybe MimeType))
|
||||||
readFile' "-" = liftIO BL.getContents
|
-> BL.ByteString
|
||||||
readFile' f = liftIO $ BL.readFile f
|
inputToLazyByteString (_, (bs,_)) = BL.fromStrict bs
|
||||||
|
|
||||||
writeFnBinary :: MonadIO m => FilePath -> BL.ByteString -> m ()
|
writeFnBinary :: FilePath -> BL.ByteString -> IO ()
|
||||||
writeFnBinary "-" = liftIO . BL.putStr
|
writeFnBinary "-" = BL.putStr
|
||||||
writeFnBinary f = liftIO . BL.writeFile (UTF8.encodePath f)
|
writeFnBinary f = BL.writeFile (UTF8.encodePath f)
|
||||||
|
|
||||||
writerFn :: MonadIO m => IO.Newline -> FilePath -> Text -> m ()
|
writerFn :: IO.Newline -> FilePath -> Text -> IO ()
|
||||||
writerFn eol "-" = liftIO . UTF8.putStrWith eol
|
writerFn eol "-" = UTF8.putStrWith eol
|
||||||
writerFn eol f = liftIO . UTF8.writeFileWith eol f
|
writerFn eol f = UTF8.writeFileWith eol f
|
||||||
|
|
Loading…
Reference in a new issue