Reorganize App to make it easier to limit IO in main loop.
Previously we used liftIO fairly liberally. The code has been restructured to avoid this. A small behavior change is that pandoc will now fall back to latin1 encoding for inputs that can't be read as UTF-8. This is what it did previously for content fetched from the web and not marked as to content type. It makes sense to do the same for local files.
This commit is contained in:
parent
c39ddeb8f8
commit
fd7c140cde
1 changed files with 99 additions and 84 deletions
|
@ -1,3 +1,4 @@
|
|||
{-# LANGUAGE TupleSections #-}
|
||||
{-# LANGUAGE LambdaCase #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE CPP #-}
|
||||
|
@ -27,7 +28,7 @@ module Text.Pandoc.App (
|
|||
import qualified Control.Exception as E
|
||||
import Control.Monad ( (>=>), when )
|
||||
import Control.Monad.Trans ( MonadIO(..) )
|
||||
import Control.Monad.Except (throwError)
|
||||
import Control.Monad.Except (throwError, catchError)
|
||||
import qualified Data.ByteString as BS
|
||||
import qualified Data.ByteString.Char8 as B8
|
||||
import qualified Data.ByteString.Lazy as BL
|
||||
|
@ -38,6 +39,7 @@ import Data.Text (Text)
|
|||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Lazy as TL
|
||||
import qualified Data.Text.Lazy.Encoding as TE
|
||||
import qualified Data.Text.Encoding as TSE
|
||||
import qualified Data.Text.Encoding.Error as TE
|
||||
import qualified Data.Text.Encoding.Error as TSE
|
||||
import Network.URI (URI (..), parseURI)
|
||||
|
@ -48,7 +50,7 @@ import System.IO (nativeNewline, stdout)
|
|||
import qualified System.IO as IO (Newline (..))
|
||||
import Text.Pandoc
|
||||
import Text.Pandoc.Builder (setMeta)
|
||||
import Text.Pandoc.MIME (getCharset)
|
||||
import Text.Pandoc.MIME (getCharset, MimeType)
|
||||
import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths)
|
||||
import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts,
|
||||
IpynbOutput (..))
|
||||
|
@ -69,6 +71,7 @@ import qualified Text.Pandoc.UTF8 as UTF8
|
|||
import System.Posix.IO (stdOutput)
|
||||
import System.Posix.Terminal (queryTerminal)
|
||||
#endif
|
||||
import Debug.Trace
|
||||
|
||||
convertWithOpts :: Opt -> IO ()
|
||||
convertWithOpts opts = do
|
||||
|
@ -94,14 +97,14 @@ convertWithOpts opts = do
|
|||
let sources = case optInputFiles opts of
|
||||
Just xs | not (optIgnoreArgs opts) -> xs
|
||||
_ -> ["-"]
|
||||
|
||||
#ifdef _WINDOWS
|
||||
let istty = True
|
||||
#else
|
||||
istty <- liftIO $ queryTerminal stdOutput
|
||||
#endif
|
||||
|
||||
(output, reports) <- runIOorExplode $ do
|
||||
res <- runIO $ do
|
||||
|
||||
setTrace (optTrace opts)
|
||||
setVerbosity verbosity
|
||||
setUserDataDir datadir
|
||||
|
@ -110,6 +113,8 @@ convertWithOpts opts = do
|
|||
setInputFiles (fromMaybe ["-"] (optInputFiles opts))
|
||||
setOutputFile (optOutputFile opts)
|
||||
|
||||
inputs <- readSources sources
|
||||
|
||||
-- assign reader and writer based on options and filenames
|
||||
readerName <- case optFrom opts of
|
||||
Just f -> return f
|
||||
|
@ -135,20 +140,6 @@ convertWithOpts opts = do
|
|||
|
||||
(reader, readerExts) <- getReader readerName
|
||||
|
||||
let convertTabs = tabFilter (if optPreserveTabs opts ||
|
||||
readerNameBase == "t2t" ||
|
||||
readerNameBase == "man"
|
||||
then 0
|
||||
else optTabStop opts)
|
||||
|
||||
|
||||
let readSources :: [FilePath] -> PandocIO [(FilePath, Text)]
|
||||
readSources srcs =
|
||||
mapM (\fp -> do
|
||||
t <- readSource fp
|
||||
return (if fp == "-" then "" else fp, convertTabs t)) srcs
|
||||
|
||||
|
||||
outputSettings <- optToOutputSettings opts
|
||||
let format = outputFormat outputSettings
|
||||
let writer = outputWriter outputSettings
|
||||
|
@ -236,20 +227,11 @@ convertWithOpts opts = do
|
|||
_ -> Format format) :))
|
||||
$ []
|
||||
|
||||
let sourceToDoc :: [FilePath] -> PandocIO Pandoc
|
||||
sourceToDoc sources' =
|
||||
case reader of
|
||||
TextReader r
|
||||
| readerNameBase == "json" ->
|
||||
mconcat <$> mapM (readSource >=> r readerOpts) sources'
|
||||
| optFileScope opts ->
|
||||
-- Read source and convert tabs (see #6709)
|
||||
let readSource' = fmap convertTabs . readSource
|
||||
in mconcat <$> mapM (readSource' >=> r readerOpts) sources'
|
||||
| otherwise ->
|
||||
readSources sources' >>= r readerOpts
|
||||
ByteStringReader r ->
|
||||
mconcat <$> mapM (readFile' >=> r readerOpts) sources'
|
||||
let convertTabs = tabFilter (if optPreserveTabs opts ||
|
||||
readerNameBase == "t2t" ||
|
||||
readerNameBase == "man"
|
||||
then 0
|
||||
else optTabStop opts)
|
||||
|
||||
|
||||
when (readerNameBase == "markdown_github" ||
|
||||
|
@ -275,7 +257,22 @@ convertWithOpts opts = do
|
|||
maybe id (setMeta "citation-abbreviations")
|
||||
(optCitationAbbreviations opts) $ mempty
|
||||
|
||||
doc <- sourceToDoc sources >>=
|
||||
doc <- (case reader of
|
||||
TextReader r
|
||||
| readerNameBase == "json" ->
|
||||
mconcat <$>
|
||||
mapM (inputToText convertTabs
|
||||
>=> r readerOpts . (:[])) inputs
|
||||
| optFileScope opts ->
|
||||
mconcat <$> mapM
|
||||
(inputToText convertTabs
|
||||
>=> r readerOpts . (:[]))
|
||||
inputs
|
||||
| otherwise -> mapM (inputToText convertTabs) inputs
|
||||
>>= r readerOpts
|
||||
ByteStringReader r ->
|
||||
mconcat <$> mapM (r readerOpts . inputToLazyByteString) inputs)
|
||||
>>=
|
||||
( (if isJust (optExtractMedia opts)
|
||||
then fillMediaBag
|
||||
else return)
|
||||
|
@ -310,19 +307,22 @@ convertWithOpts opts = do
|
|||
reports <- getLog
|
||||
return (output, reports)
|
||||
|
||||
case optLogFile opts of
|
||||
Nothing -> return ()
|
||||
Just logfile -> BL.writeFile logfile (encodeLogMessages reports)
|
||||
let isWarning msg = messageVerbosity msg == WARNING
|
||||
when (optFailIfWarnings opts && any isWarning reports) $
|
||||
E.throwIO PandocFailOnWarningError
|
||||
let eol = case optEol opts of
|
||||
CRLF -> IO.CRLF
|
||||
LF -> IO.LF
|
||||
Native -> nativeNewline
|
||||
case output of
|
||||
TextOutput t -> writerFn eol outputFile t
|
||||
BinaryOutput bs -> writeFnBinary outputFile bs
|
||||
case res of
|
||||
Left e -> E.throwIO e
|
||||
Right (output, reports) -> do
|
||||
case optLogFile opts of
|
||||
Nothing -> return ()
|
||||
Just logfile -> BL.writeFile logfile (encodeLogMessages reports)
|
||||
let isWarning msg = messageVerbosity msg == WARNING
|
||||
when (optFailIfWarnings opts && any isWarning reports) $
|
||||
E.throwIO PandocFailOnWarningError
|
||||
let eol = case optEol opts of
|
||||
CRLF -> IO.CRLF
|
||||
LF -> IO.LF
|
||||
Native -> nativeNewline
|
||||
case output of
|
||||
TextOutput t -> writerFn eol outputFile t
|
||||
BinaryOutput bs -> writeFnBinary outputFile bs
|
||||
|
||||
data PandocOutput = TextOutput Text | BinaryOutput BL.ByteString
|
||||
deriving (Show)
|
||||
|
@ -344,49 +344,64 @@ adjustMetadata f (Pandoc meta bs) = Pandoc (f meta) bs
|
|||
applyTransforms :: Monad m => [Transform] -> Pandoc -> m Pandoc
|
||||
applyTransforms transforms d = return $ foldr ($) d transforms
|
||||
|
||||
readSource :: FilePath -> PandocIO Text
|
||||
readSource src = case parseURI src of
|
||||
Just u | uriScheme u `elem` ["http:","https:"] ->
|
||||
readURI src
|
||||
| uriScheme u == "file:" -> liftIO $
|
||||
readTextFile (uriPathToPath $ T.pack $ uriPath u)
|
||||
_ -> liftIO $ readTextFile src
|
||||
where readTextFile :: FilePath -> IO Text
|
||||
readTextFile fp = do
|
||||
bs <- if src == "-"
|
||||
then BS.getContents
|
||||
else BS.readFile fp
|
||||
E.catch (return $! UTF8.toText bs)
|
||||
(\e -> E.throwIO $ case e of
|
||||
TSE.DecodeError _ (Just w) ->
|
||||
case BS.elemIndex w bs of
|
||||
Just offset ->
|
||||
PandocUTF8DecodingError (T.pack fp) offset w
|
||||
_ -> PandocUTF8DecodingError (T.pack fp) 0 w
|
||||
_ -> PandocAppError (tshow e))
|
||||
readSources :: (PandocMonad m, MonadIO m)
|
||||
=> [FilePath] -> m [(FilePath, (BS.ByteString, Maybe MimeType))]
|
||||
readSources srcs =
|
||||
mapM (\fp -> do t <- readSource fp
|
||||
return (if fp == "-" then "" else fp, t)) srcs
|
||||
|
||||
readURI :: FilePath -> PandocIO Text
|
||||
readURI src = do
|
||||
(bs, mt) <- openURL (T.pack src)
|
||||
readSource :: (PandocMonad m, MonadIO m)
|
||||
=> FilePath -> m (BS.ByteString, Maybe MimeType)
|
||||
readSource "-" = (,Nothing) <$> readStdinStrict
|
||||
readSource src =
|
||||
case parseURI src of
|
||||
Just u | uriScheme u `elem` ["http:","https:"] -> openURL (T.pack src)
|
||||
| uriScheme u == "file:" ->
|
||||
(,Nothing) <$>
|
||||
readFileStrict (uriPathToPath $ T.pack $ uriPath u)
|
||||
_ -> (,Nothing) <$> readFileStrict src
|
||||
|
||||
utf8ToText :: PandocMonad m => FilePath -> BS.ByteString -> m Text
|
||||
utf8ToText fp bs =
|
||||
case TSE.decodeUtf8' . dropBOM $ bs of
|
||||
Left (TSE.DecodeError _ (Just w)) ->
|
||||
case BS.elemIndex w bs of
|
||||
Just offset -> throwError $ PandocUTF8DecodingError (T.pack fp) offset w
|
||||
Nothing -> throwError $ PandocUTF8DecodingError (T.pack fp) 0 w
|
||||
Left e -> throwError $ PandocAppError (tshow e)
|
||||
Right t -> return t
|
||||
where
|
||||
dropBOM bs' =
|
||||
if "\xEF\xBB\xBF" `BS.isPrefixOf` bs'
|
||||
then BS.drop 3 bs'
|
||||
else bs'
|
||||
|
||||
|
||||
inputToText :: PandocMonad m
|
||||
=> (Text -> Text)
|
||||
-> (FilePath, (BS.ByteString, Maybe MimeType))
|
||||
-> m (FilePath, Text)
|
||||
inputToText convTabs (fp, (bs,mt)) =
|
||||
(fp,) . convTabs . T.filter (/='\r') <$>
|
||||
case mt >>= getCharset of
|
||||
Just "UTF-8" -> return $ UTF8.toText bs
|
||||
Just "UTF-8" -> utf8ToText fp bs
|
||||
Just "ISO-8859-1" -> return $ T.pack $ B8.unpack bs
|
||||
Just charset -> throwError $ PandocUnsupportedCharsetError charset
|
||||
Nothing -> liftIO $ -- try first as UTF-8, then as latin1
|
||||
E.catch (return $! UTF8.toText bs)
|
||||
(\case
|
||||
TSE.DecodeError{} ->
|
||||
Nothing -> catchError
|
||||
(utf8ToText fp bs)
|
||||
(\case
|
||||
PandocUTF8DecodingError{} ->
|
||||
return $ T.pack $ B8.unpack bs
|
||||
e -> E.throwIO e)
|
||||
e -> throwError e)
|
||||
|
||||
readFile' :: MonadIO m => FilePath -> m BL.ByteString
|
||||
readFile' "-" = liftIO BL.getContents
|
||||
readFile' f = liftIO $ BL.readFile f
|
||||
inputToLazyByteString :: (FilePath, (BS.ByteString, Maybe MimeType))
|
||||
-> BL.ByteString
|
||||
inputToLazyByteString (_, (bs,_)) = BL.fromStrict bs
|
||||
|
||||
writeFnBinary :: MonadIO m => FilePath -> BL.ByteString -> m ()
|
||||
writeFnBinary "-" = liftIO . BL.putStr
|
||||
writeFnBinary f = liftIO . BL.writeFile (UTF8.encodePath f)
|
||||
writeFnBinary :: FilePath -> BL.ByteString -> IO ()
|
||||
writeFnBinary "-" = BL.putStr
|
||||
writeFnBinary f = BL.writeFile (UTF8.encodePath f)
|
||||
|
||||
writerFn :: MonadIO m => IO.Newline -> FilePath -> Text -> m ()
|
||||
writerFn eol "-" = liftIO . UTF8.putStrWith eol
|
||||
writerFn eol f = liftIO . UTF8.writeFileWith eol f
|
||||
writerFn :: IO.Newline -> FilePath -> Text -> IO ()
|
||||
writerFn eol "-" = UTF8.putStrWith eol
|
||||
writerFn eol f = UTF8.writeFileWith eol f
|
||||
|
|
Loading…
Reference in a new issue