Factor out convertWithOpts' from convertWithOpts.

This runs in any PandocMonad, MonadIO, MonadMask instance.
This commit is contained in:
John MacFarlane 2022-08-13 17:07:46 -07:00
parent 4dfc30ca10
commit 34e00caee5

View file

@ -28,6 +28,7 @@ module Text.Pandoc.App (
import qualified Control.Exception as E
import Control.Monad ( (>=>), when, forM_ )
import Control.Monad.Trans ( MonadIO(..) )
import Control.Monad.Catch ( MonadMask )
import Control.Monad.Except (throwError, catchError)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as B8
@ -77,6 +78,7 @@ import System.Posix.Terminal (queryTerminal)
convertWithOpts :: Opt -> IO ()
convertWithOpts opts = do
let outputFile = fromMaybe "-" (optOutputFile opts)
datadir <- case optDataDir opts of
Nothing -> do
d <- defaultUserDataDir
@ -86,276 +88,19 @@ convertWithOpts opts = do
else Nothing
Just _ -> return $ optDataDir opts
let outputFile = fromMaybe "-" (optOutputFile opts)
let filters = optFilters opts
let verbosity = optVerbosity opts
when (optDumpArgs opts) $
do UTF8.hPutStrLn stdout (T.pack outputFile)
mapM_ (UTF8.hPutStrLn stdout . T.pack)
(fromMaybe ["-"] $ optInputFiles opts)
exitSuccess
let sources = case optInputFiles opts of
Just xs | not (optIgnoreArgs opts) -> xs
_ -> ["-"]
#ifdef _WINDOWS
let istty = True
#else
istty <- liftIO $ queryTerminal stdOutput
#endif
res <- runIO $ do
setTrace (optTrace opts)
setVerbosity verbosity
setUserDataDir datadir
setResourcePath (optResourcePath opts)
setInputFiles (fromMaybe ["-"] (optInputFiles opts))
setOutputFile (optOutputFile opts)
-- assign reader and writer based on options and filenames
readerName <- case optFrom opts of
Just f -> return f
Nothing -> case formatFromFilePaths sources of
Just f' -> return f'
Nothing | sources == ["-"] -> return "markdown"
| any (isURI . T.pack) sources -> return "html"
| otherwise -> do
report $ CouldNotDeduceFormat
(map (T.pack . takeExtension) sources) "markdown"
return "markdown"
let readerNameBase = T.takeWhile (\c -> c /= '+' && c /= '-') readerName
let makeSandboxed pureReader =
let files = maybe id (:) (optReferenceDoc opts) .
maybe id (:) (optEpubMetadata opts) .
maybe id (:) (optEpubCoverImage opts) .
maybe id (:) (optCSL opts) .
maybe id (:) (optCitationAbbreviations opts) $
optEpubFonts opts ++
optBibliography opts
in case pureReader of
TextReader r -> TextReader $ \o t -> sandbox files (r o t)
ByteStringReader r
-> ByteStringReader $ \o t -> sandbox files (r o t)
(reader, readerExts) <-
if ".lua" `T.isSuffixOf` readerName
then return (TextReader (readCustom (T.unpack readerName)), mempty)
else if optSandbox opts
then case runPure (getReader readerName) of
Left e -> throwError e
Right (r, rexts) -> return (makeSandboxed r, rexts)
else getReader readerName
outputSettings <- optToOutputSettings opts
let format = outputFormat outputSettings
let writer = outputWriter outputSettings
let writerName = outputWriterName outputSettings
let writerNameBase = T.takeWhile (\c -> c /= '+' && c /= '-') writerName
let writerOptions = outputWriterOptions outputSettings
let pdfOutput = isJust $ outputPdfProgram outputSettings
let bibOutput = writerNameBase == "bibtex" ||
writerNameBase == "biblatex" ||
writerNameBase == "csljson"
let standalone = optStandalone opts ||
not (isTextFormat format) ||
pdfOutput ||
bibOutput
when (pdfOutput && readerNameBase == "latex") $
case optInputFiles opts of
Just (inputFile:_) -> report $ UnusualConversion $ T.pack $
"to convert a .tex file to PDF, you get better results by using pdflatex "
<> "(or lualatex or xelatex) directly, try `pdflatex " <> inputFile
<> "` instead of `pandoc " <> inputFile <> " -o " <> outputFile <> "`."
_ -> return ()
-- We don't want to send output to the terminal if the user
-- does 'pandoc -t docx input.txt'; though we allow them to
-- force this with '-o -'. On posix systems, we detect
-- when stdout is being piped and allow output to stdout
-- in that case, but on Windows we can't.
when ((pdfOutput || not (isTextFormat format)) &&
istty && isNothing ( optOutputFile opts)) $
throwError $ PandocAppError $
"Cannot write " <> (if pdfOutput then "pdf" else format) <>
" output to terminal.\n" <>
"Specify an output file using the -o option, or " <>
"use '-o -' to force output to stdout."
abbrevs <- Set.fromList . filter (not . T.null) . T.lines . UTF8.toText <$>
case optAbbreviations opts of
Nothing -> readDataFile "abbreviations"
Just f -> readFileStrict f
case lookupMetaString "lang" (optMetadata opts) of
"" -> setTranslations $ Lang "en" Nothing (Just "US") [] [] []
l -> case parseLang l of
Left _ -> report $ InvalidLang l
Right l' -> setTranslations l'
let readerOpts = def{
readerStandalone = standalone
, readerColumns = optColumns opts
, readerTabStop = optTabStop opts
, readerIndentedCodeClasses = optIndentedCodeClasses opts
, readerDefaultImageExtension =
optDefaultImageExtension opts
, readerTrackChanges = optTrackChanges opts
, readerAbbreviations = abbrevs
, readerExtensions = readerExts
, readerStripComments = optStripComments opts
}
metadataFromFile <-
case optMetadataFiles opts of
[] -> return mempty
paths -> do
-- If format is markdown or commonmark, use the enabled extensions,
-- otherwise treat metadata as pandoc markdown (see #7926, #6832)
let readerOptsMeta =
if readerNameBase == "markdown" || readerNameBase == "commonmark"
then readerOpts
else readerOpts{ readerExtensions = pandocExtensions }
mconcat <$> mapM
(\path -> do raw <- readMetadataFile path
yamlToMeta readerOptsMeta (Just path) raw) paths
let transforms = (case optShiftHeadingLevelBy opts of
0 -> id
x -> (headerShift x :)) .
(if optStripEmptyParagraphs opts
then (stripEmptyParagraphs :)
else id) .
(if extensionEnabled Ext_east_asian_line_breaks
readerExts &&
not (extensionEnabled Ext_east_asian_line_breaks
(writerExtensions writerOptions) &&
writerWrapText writerOptions == WrapPreserve)
then (eastAsianLineBreakFilter :)
else id) .
(case optIpynbOutput opts of
_ | readerNameBase /= "ipynb" -> id
IpynbOutputAll -> id
IpynbOutputNone -> (filterIpynbOutput Nothing :)
IpynbOutputBest -> (filterIpynbOutput (Just $
if htmlFormat format
then Format "html"
else
case format of
"latex" -> Format "latex"
"beamer" -> Format "latex"
_ -> Format format) :))
$ []
let convertTabs = tabFilter (if optPreserveTabs opts ||
readerNameBase == "t2t" ||
readerNameBase == "man" ||
readerNameBase == "tsv"
then 0
else optTabStop opts)
when (readerNameBase == "markdown_github" ||
writerNameBase == "markdown_github") $
report $ Deprecated "markdown_github" "Use gfm instead."
mapM_ (uncurry setRequestHeader) (optRequestHeaders opts)
setNoCheckCertificate (optNoCheckCertificate opts)
let isPandocCiteproc (JSONFilter f) = takeBaseName f == "pandoc-citeproc"
isPandocCiteproc _ = False
when (any isPandocCiteproc filters) $
report $ Deprecated "pandoc-citeproc filter"
"Use --citeproc instead."
let cslMetadata =
maybe id (setMeta "csl") (optCSL opts) .
(case optBibliography opts of
[] -> id
xs -> setMeta "bibliography" xs) .
maybe id (setMeta "citation-abbreviations")
(optCitationAbbreviations opts) $ mempty
let filterEnv = Environment readerOpts writerOptions
inputs <- readSources sources
doc <- (case reader of
TextReader r
| readerNameBase == "json" ->
mconcat <$>
mapM (inputToText convertTabs
>=> r readerOpts . (:[])) inputs
| optFileScope opts ->
mconcat <$> mapM
(inputToText convertTabs
>=> r readerOpts . (:[]))
inputs
| otherwise -> mapM (inputToText convertTabs) inputs
>>= r readerOpts
ByteStringReader r ->
mconcat <$> mapM (r readerOpts . inputToLazyByteString) inputs)
>>= ( return . adjustMetadata (metadataFromFile <>)
>=> return . adjustMetadata (<> optMetadata opts)
>=> return . adjustMetadata (<> cslMetadata)
>=> applyTransforms transforms
>=> applyFilters filterEnv filters [T.unpack format]
>=> (if not (optSandbox opts) &&
(isJust (optExtractMedia opts)
|| writerNameBase == "docx") -- for fallback pngs
then fillMediaBag
else return)
>=> maybe return extractMedia (optExtractMedia opts)
)
when (writerNameBase == "docx" && not (optSandbox opts)) $ do
-- create fallback pngs for svgs
items <- mediaItems <$> getMediaBag
forM_ items $ \(fp, mt, bs) ->
case T.takeWhile (/=';') mt of
"image/svg+xml" -> do
res <- svgToPng (writerDpi writerOptions) bs
case res of
Right bs' -> do
let fp' = fp <> ".png"
insertMedia fp' (Just "image/png") bs'
Left e -> report $ CouldNotConvertImage (T.pack fp) (tshow e)
_ -> return ()
output <- case writer of
ByteStringWriter f -> BinaryOutput <$> f writerOptions doc
TextWriter f -> case outputPdfProgram outputSettings of
Just pdfProg -> do
res <- makePDF pdfProg (optPdfEngineOpts opts) f
writerOptions doc
case res of
Right pdf -> return $ BinaryOutput pdf
Left err' -> throwError $ PandocPDFError $
TL.toStrict (TE.decodeUtf8With TE.lenientDecode err')
Nothing -> do
let ensureNl t
| standalone = t
| T.null t || T.last t /= '\n' = t <> T.singleton '\n'
| otherwise = t
textOutput <- ensureNl <$> f writerOptions doc
if (optSelfContained opts || optEmbedResources opts) && htmlFormat format
then TextOutput <$> makeSelfContained textOutput
else return $ TextOutput textOutput
reports <- getLog
return (output, reports)
res <- runIO $ convertWithOpts' istty datadir opts
case res of
Left e -> E.throwIO e
Right (output, reports) -> do
@ -373,6 +118,268 @@ convertWithOpts opts = do
TextOutput t -> writerFn eol outputFile t
BinaryOutput bs -> writeFnBinary outputFile bs
convertWithOpts' :: (PandocMonad m, MonadIO m, MonadMask m)
=> Bool
-> Maybe FilePath
-> Opt
-> m (PandocOutput, [LogMessage])
convertWithOpts' istty datadir opts = do
let outputFile = fromMaybe "-" (optOutputFile opts)
let filters = optFilters opts
let verbosity = optVerbosity opts
let sources = case optInputFiles opts of
Just xs | not (optIgnoreArgs opts) -> xs
_ -> ["-"]
setTrace (optTrace opts)
setVerbosity verbosity
setUserDataDir datadir
setResourcePath (optResourcePath opts)
setInputFiles (fromMaybe ["-"] (optInputFiles opts))
setOutputFile (optOutputFile opts)
-- assign reader and writer based on options and filenames
readerName <- case optFrom opts of
Just f -> return f
Nothing -> case formatFromFilePaths sources of
Just f' -> return f'
Nothing | sources == ["-"] -> return "markdown"
| any (isURI . T.pack) sources -> return "html"
| otherwise -> do
report $ CouldNotDeduceFormat
(map (T.pack . takeExtension) sources) "markdown"
return "markdown"
let readerNameBase = T.takeWhile (\c -> c /= '+' && c /= '-') readerName
let makeSandboxed pureReader =
let files = maybe id (:) (optReferenceDoc opts) .
maybe id (:) (optEpubMetadata opts) .
maybe id (:) (optEpubCoverImage opts) .
maybe id (:) (optCSL opts) .
maybe id (:) (optCitationAbbreviations opts) $
optEpubFonts opts ++
optBibliography opts
in case pureReader of
TextReader r -> TextReader $ \o t -> sandbox files (r o t)
ByteStringReader r
-> ByteStringReader $ \o t -> sandbox files (r o t)
(reader, readerExts) <-
if ".lua" `T.isSuffixOf` readerName
then return (TextReader (readCustom (T.unpack readerName)), mempty)
else if optSandbox opts
then case runPure (getReader readerName) of
Left e -> throwError e
Right (r, rexts) -> return (makeSandboxed r, rexts)
else getReader readerName
outputSettings <- optToOutputSettings opts
let format = outputFormat outputSettings
let writer = outputWriter outputSettings
let writerName = outputWriterName outputSettings
let writerNameBase = T.takeWhile (\c -> c /= '+' && c /= '-') writerName
let writerOptions = outputWriterOptions outputSettings
let pdfOutput = isJust $ outputPdfProgram outputSettings
let bibOutput = writerNameBase == "bibtex" ||
writerNameBase == "biblatex" ||
writerNameBase == "csljson"
let standalone = optStandalone opts ||
not (isTextFormat format) ||
pdfOutput ||
bibOutput
when (pdfOutput && readerNameBase == "latex") $
case optInputFiles opts of
Just (inputFile:_) -> report $ UnusualConversion $ T.pack $
"to convert a .tex file to PDF, you get better results by using pdflatex "
<> "(or lualatex or xelatex) directly, try `pdflatex " <> inputFile
<> "` instead of `pandoc " <> inputFile <> " -o " <> outputFile <> "`."
_ -> return ()
-- We don't want to send output to the terminal if the user
-- does 'pandoc -t docx input.txt'; though we allow them to
-- force this with '-o -'. On posix systems, we detect
-- when stdout is being piped and allow output to stdout
-- in that case, but on Windows we can't.
when ((pdfOutput || not (isTextFormat format)) &&
istty && isNothing ( optOutputFile opts)) $
throwError $ PandocAppError $
"Cannot write " <> (if pdfOutput then "pdf" else format) <>
" output to terminal.\n" <>
"Specify an output file using the -o option, or " <>
"use '-o -' to force output to stdout."
abbrevs <- Set.fromList . filter (not . T.null) . T.lines . UTF8.toText <$>
case optAbbreviations opts of
Nothing -> readDataFile "abbreviations"
Just f -> readFileStrict f
case lookupMetaString "lang" (optMetadata opts) of
"" -> setTranslations $ Lang "en" Nothing (Just "US") [] [] []
l -> case parseLang l of
Left _ -> report $ InvalidLang l
Right l' -> setTranslations l'
let readerOpts = def{
readerStandalone = standalone
, readerColumns = optColumns opts
, readerTabStop = optTabStop opts
, readerIndentedCodeClasses = optIndentedCodeClasses opts
, readerDefaultImageExtension =
optDefaultImageExtension opts
, readerTrackChanges = optTrackChanges opts
, readerAbbreviations = abbrevs
, readerExtensions = readerExts
, readerStripComments = optStripComments opts
}
metadataFromFile <-
case optMetadataFiles opts of
[] -> return mempty
paths -> do
-- If format is markdown or commonmark, use the enabled extensions,
-- otherwise treat metadata as pandoc markdown (see #7926, #6832)
let readerOptsMeta =
if readerNameBase == "markdown" || readerNameBase == "commonmark"
then readerOpts
else readerOpts{ readerExtensions = pandocExtensions }
mconcat <$> mapM
(\path -> do raw <- readMetadataFile path
yamlToMeta readerOptsMeta (Just path) raw) paths
let transforms = (case optShiftHeadingLevelBy opts of
0 -> id
x -> (headerShift x :)) .
(if optStripEmptyParagraphs opts
then (stripEmptyParagraphs :)
else id) .
(if extensionEnabled Ext_east_asian_line_breaks
readerExts &&
not (extensionEnabled Ext_east_asian_line_breaks
(writerExtensions writerOptions) &&
writerWrapText writerOptions == WrapPreserve)
then (eastAsianLineBreakFilter :)
else id) .
(case optIpynbOutput opts of
_ | readerNameBase /= "ipynb" -> id
IpynbOutputAll -> id
IpynbOutputNone -> (filterIpynbOutput Nothing :)
IpynbOutputBest -> (filterIpynbOutput (Just $
if htmlFormat format
then Format "html"
else
case format of
"latex" -> Format "latex"
"beamer" -> Format "latex"
_ -> Format format) :))
$ []
let convertTabs = tabFilter (if optPreserveTabs opts ||
readerNameBase == "t2t" ||
readerNameBase == "man" ||
readerNameBase == "tsv"
then 0
else optTabStop opts)
when (readerNameBase == "markdown_github" ||
writerNameBase == "markdown_github") $
report $ Deprecated "markdown_github" "Use gfm instead."
mapM_ (uncurry setRequestHeader) (optRequestHeaders opts)
setNoCheckCertificate (optNoCheckCertificate opts)
let isPandocCiteproc (JSONFilter f) = takeBaseName f == "pandoc-citeproc"
isPandocCiteproc _ = False
when (any isPandocCiteproc filters) $
report $ Deprecated "pandoc-citeproc filter"
"Use --citeproc instead."
let cslMetadata =
maybe id (setMeta "csl") (optCSL opts) .
(case optBibliography opts of
[] -> id
xs -> setMeta "bibliography" xs) .
maybe id (setMeta "citation-abbreviations")
(optCitationAbbreviations opts) $ mempty
let filterEnv = Environment readerOpts writerOptions
inputs <- readSources sources
doc <- (case reader of
TextReader r
| readerNameBase == "json" ->
mconcat <$>
mapM (inputToText convertTabs
>=> r readerOpts . (:[])) inputs
| optFileScope opts ->
mconcat <$> mapM
(inputToText convertTabs
>=> r readerOpts . (:[]))
inputs
| otherwise -> mapM (inputToText convertTabs) inputs
>>= r readerOpts
ByteStringReader r ->
mconcat <$> mapM (r readerOpts . inputToLazyByteString) inputs)
>>= ( return . adjustMetadata (metadataFromFile <>)
>=> return . adjustMetadata (<> optMetadata opts)
>=> return . adjustMetadata (<> cslMetadata)
>=> applyTransforms transforms
>=> applyFilters filterEnv filters [T.unpack format]
>=> (if not (optSandbox opts) &&
(isJust (optExtractMedia opts)
|| writerNameBase == "docx") -- for fallback pngs
then fillMediaBag
else return)
>=> maybe return extractMedia (optExtractMedia opts)
)
when (writerNameBase == "docx" && not (optSandbox opts)) $ do
-- create fallback pngs for svgs
items <- mediaItems <$> getMediaBag
forM_ items $ \(fp, mt, bs) ->
case T.takeWhile (/=';') mt of
"image/svg+xml" -> do
res <- svgToPng (writerDpi writerOptions) bs
case res of
Right bs' -> do
let fp' = fp <> ".png"
insertMedia fp' (Just "image/png") bs'
Left e -> report $ CouldNotConvertImage (T.pack fp) (tshow e)
_ -> return ()
output <- case writer of
ByteStringWriter f -> BinaryOutput <$> f writerOptions doc
TextWriter f -> case outputPdfProgram outputSettings of
Just pdfProg -> do
res <- makePDF pdfProg (optPdfEngineOpts opts) f
writerOptions doc
case res of
Right pdf -> return $ BinaryOutput pdf
Left err' -> throwError $ PandocPDFError $
TL.toStrict (TE.decodeUtf8With TE.lenientDecode err')
Nothing -> do
let ensureNl t
| standalone = t
| T.null t || T.last t /= '\n' = t <> T.singleton '\n'
| otherwise = t
textOutput <- ensureNl <$> f writerOptions doc
if (optSelfContained opts || optEmbedResources opts) && htmlFormat format
then TextOutput <$> makeSelfContained textOutput
else return $ TextOutput textOutput
reports <- getLog
return (output, reports)
data PandocOutput = TextOutput Text | BinaryOutput BL.ByteString
deriving (Show)