Support for PDF generation via weasyprint and prince (#3909)

* Rename --latex-engine to --pdf-engine
* In `Text.Pandoc.Options.WriterOptions`, rename `writerLaTeXEngine` to `writerPdfEngine` and `writerLaTeXArgs` to `writerPdfArgs`.
 * Add support for `weasyprint` and `prince`, in addition to `wkhtmltopdf`, for PDF generation via HTML (closes #3906).
* `Text.Pandoc.PDF.html2pdf`: use stdin instead of intermediate HTML file
This commit is contained in:
Mauro Bieg 2017-09-12 05:18:42 +02:00 committed by John MacFarlane
parent ddecd72783
commit c7e3c1ec17
6 changed files with 127 additions and 76 deletions

View file

@ -186,12 +186,12 @@ Creating a PDF
--------------
To produce a PDF, specify an output file with a `.pdf` extension.
By default, pandoc will use LaTeX to convert it to PDF:
By default, pandoc will use LaTeX to create the PDF:
pandoc test.txt -o test.pdf
Production of a PDF requires that a LaTeX engine be installed (see
`--latex-engine`, below), and assumes that the following LaTeX packages
`--pdf-engine`, below), and assumes that the following LaTeX packages
are available: [`amsfonts`], [`amsmath`], [`lm`], [`unicode-math`],
[`ifxetex`], [`ifluatex`], [`listings`] (if the
`--listings` option is used), [`fancyvrb`], [`longtable`],
@ -210,16 +210,19 @@ if added to the template or included in any header file. The
optionally be used for [citation rendering]. These are included
with all recent versions of [TeX Live].
Alternatively, pandoc can use ConTeXt, `wkhtmltopdf`, or
`pdfroff` to create a PDF. To do this, specify an output file
with a `.pdf` extension, as before, but add `-t context`, `-t
html5`, or `-t ms` to the command line.
Alternatively, pandoc can use [ConTeXt], `pdfroff`, or any of the
following HTML/CSS-to-PDF-engines, to create a PDF: [`wkhtmltopdf`],
[`weasyprint`] or `prince`.
To do this, specify an output file with a `.pdf` extension, as before,
but add the `--pdf-engine` option or `-t context`, `-t html`, or `-t ms`
to the command line (`-t html` defaults to `--pdf-engine=wkhtmltopdf`).
PDF output can be controlled using [variables for LaTeX] (if
LaTeX is used) and [variables for ConTeXt] (if ConTeXt is used).
When using an HTML/CSS-to-PDF-engine, `--css` affects the output.
If `wkhtmltopdf` is used, then the variables `margin-left`,
`margin-right`, `margin-top`, `margin-bottom`, and `papersize`
will affect the output, as will `--css`.
will affect the output.
[`amsfonts`]: https://ctan.org/pkg/amsfonts
[`amsmath`]: https://ctan.org/pkg/amsmath
@ -251,6 +254,8 @@ will affect the output, as will `--css`.
[`bibtex`]: https://ctan.org/pkg/bibtex
[`biber`]: https://ctan.org/pkg/biber
[TeX Live]: http://www.tug.org/texlive/
[`wkhtmltopdf`]: https://wkhtmltopdf.org
[`weasyprint`]: http://weasyprint.org
Options
=======
@ -990,15 +995,15 @@ Options affecting specific writers
the EPUB-specific contents. The default is `EPUB`. To put
the EPUB contents in the top level, use an empty string.
`--latex-engine=pdflatex`|`lualatex`|`xelatex`
`--pdf-engine=pdflatex`|`lualatex`|`xelatex`|`wkhtmltopdf`|`weasyprint`|`prince`|`context`|`pdfroff`
: Use the specified LaTeX engine when producing PDF output.
: Use the specified engine when producing PDF output.
The default is `pdflatex`. If the engine is not in your PATH,
the full path of the engine may be specified here.
`--latex-engine-opt=`*STRING*
`--pdf-engine-opt=`*STRING*
: Use the given string as a command-line argument to the `latex-engine`.
: Use the given string as a command-line argument to the `pdf-engine`.
If used multiple times, the arguments are provided with spaces between
them. Note that no check for duplicate options is done.
@ -1300,7 +1305,7 @@ Language variables
[Unicode Bidirectional Algorithm].
When using LaTeX for bidirectional documents, only the `xelatex` engine
is fully supported (use `--latex-engine=xelatex`).
is fully supported (use `--pdf-engine=xelatex`).
[BCP 47]: https://tools.ietf.org/html/bcp47
[Unicode Bidirectional Algorithm]: http://www.w3.org/International/articles/inline-bidi-markup/uba-basics

View file

@ -29,8 +29,8 @@ _pandoc()
COMPREPLY=( $(compgen -W "references javascript none" -- ${cur}) )
return 0
;;
--latex-engine)
COMPREPLY=( $(compgen -W "pdflatex lualatex xelatex" -- ${cur}) )
--pdf-engine)
COMPREPLY=( $(compgen -W "pdflatex lualatex xelatex wkhtmltopdf weasyprint prince context pdfroff" -- ${cur}) )
return 0
;;
--print-default-data-file)

View file

@ -121,6 +121,53 @@ parseOptions options' defaults = do
opts <- foldl (>>=) (return defaults) actions
return (opts{ optInputFiles = args })
latexEngines :: [String]
latexEngines = ["pdflatex", "lualatex", "xelatex"]
htmlEngines :: [String]
htmlEngines = ["wkhtmltopdf", "weasyprint", "prince"]
pdfEngines :: [String]
pdfEngines = latexEngines ++ htmlEngines ++ ["context", "pdfroff"]
pdfWriterAndProg :: Maybe String -- ^ user-specified writer name
-> Maybe String -- ^ user-specified pdf-engine
-> IO (String, Maybe String) -- ^ IO (writerName, maybePdfEngineProg)
pdfWriterAndProg mWriter mEngine = do
let panErr msg = liftIO $ E.throwIO $ PandocAppError msg
case go mWriter mEngine of
(Right writ, Right prog) -> return (writ, Just prog)
(Left err, _) -> panErr err
(_, Left err) -> panErr err
where
go Nothing Nothing = (Right "latex", Right $ head latexEngines)
go (Just writer) Nothing = (Right writer, engineForWriter writer)
go Nothing (Just engine) = (writerForEngine engine, Right engine)
go (Just writer) (Just engine) =
let (Right shouldFormat) = writerForEngine engine
userFormat = case map toLower writer of
"html5" -> "html"
x -> x
in if userFormat == shouldFormat
then (Right writer, Right engine)
else (Left $ "pdf-engine " ++ engine ++ " is not compatible with output format "
++ writer ++ ", please use `-t " ++ shouldFormat ++ "`", Left "")
writerForEngine "context" = Right "context"
writerForEngine "pdfroff" = Right "ms"
writerForEngine en
| takeBaseName en `elem` latexEngines = Right "latex"
| takeBaseName en `elem` htmlEngines = Right "html"
writerForEngine _ = Left "pdf-engine not known"
engineForWriter "context" = Right "context"
engineForWriter "ms" = Right "pdfroff"
engineForWriter "latex" = Right $ head latexEngines
engineForWriter format
| format `elem` ["html", "html5"] = Right $ head htmlEngines
| otherwise = Left $ "cannot produce pdf output with output format " ++ format
convertWithOpts :: Opt -> IO ()
convertWithOpts opts = do
let args = optInputFiles opts
@ -171,18 +218,16 @@ convertWithOpts opts = do
else "markdown") sources
Just x -> map toLower x
let writerName = case optWriter opts of
Nothing -> defaultWriterName outputFile
Just x -> map toLower x
let format = takeWhile (`notElem` ['+','-'])
$ takeFileName writerName -- in case path to lua script
let nonPdfWriterName Nothing = defaultWriterName outputFile
nonPdfWriterName (Just x) = map toLower x
let pdfOutput = map toLower (takeExtension outputFile) == ".pdf"
(writerName, maybePdfProg) <- if pdfOutput
then pdfWriterAndProg (optWriter opts) (optPdfEngine opts)
else return (nonPdfWriterName $ optWriter opts, Nothing)
let laTeXOutput = format `elem` ["latex", "beamer"]
let conTeXtOutput = format == "context"
let html5Output = format == "html5" || format == "html"
let msOutput = format == "ms"
let format = takeWhile (`notElem` ['+','-'])
$ takeFileName writerName -- in case path to lua script
-- disabling the custom writer for now
(writer, writerExts) <-
@ -417,7 +462,7 @@ convertWithOpts opts = do
, writerEpubChapterLevel = optEpubChapterLevel opts
, writerTOCDepth = optTOCDepth opts
, writerReferenceDoc = optReferenceDoc opts
, writerLaTeXArgs = optLaTeXEngineArgs opts
, writerPdfArgs = optPdfEngineArgs opts
, writerSyntaxMap = syntaxMap
}
@ -475,27 +520,14 @@ convertWithOpts opts = do
case writer of
ByteStringWriter f -> f writerOptions doc >>= writeFnBinary outputFile
TextWriter f
| pdfOutput -> do
-- make sure writer is latex, beamer, context, html5 or ms
unless (laTeXOutput || conTeXtOutput || html5Output ||
msOutput) $
liftIO $ E.throwIO $ PandocAppError $
"cannot produce pdf output with " ++ format ++ " writer"
let pdfprog = case () of
_ | conTeXtOutput -> "context"
| html5Output -> "wkhtmltopdf"
| html5Output -> "wkhtmltopdf"
| msOutput -> "pdfroff"
| otherwise -> optLaTeXEngine opts
res <- makePDF pdfprog f writerOptions verbosity media doc
TextWriter f -> case maybePdfProg of
Just pdfProg -> do
res <- makePDF pdfProg f writerOptions verbosity media doc
case res of
Right pdf -> writeFnBinary outputFile pdf
Left err' -> liftIO $
E.throwIO $ PandocPDFError (UTF8.toStringLazy err')
| otherwise -> do
Nothing -> do
let htmlFormat = format `elem`
["html","html4","html5","s5","slidy","slideous","dzslides","revealjs"]
handleEntities = if (htmlFormat ||
@ -605,8 +637,8 @@ data Opt = Opt
, optDataDir :: Maybe FilePath
, optCiteMethod :: CiteMethod -- ^ Method to output cites
, optListings :: Bool -- ^ Use listings package for code blocks
, optLaTeXEngine :: String -- ^ Program to use for latex -> pdf
, optLaTeXEngineArgs :: [String] -- ^ Flags to pass to the latex-engine
, optPdfEngine :: Maybe String -- ^ Program to use for latex/html -> pdf
, optPdfEngineArgs :: [String] -- ^ Flags to pass to the engine
, optSlideLevel :: Maybe Int -- ^ Header level that creates slides
, optSetextHeaders :: Bool -- ^ Use atx headers for markdown level 1-2
, optAscii :: Bool -- ^ Use ascii characters only in html
@ -681,8 +713,8 @@ defaultOpts = Opt
, optDataDir = Nothing
, optCiteMethod = Citeproc
, optListings = False
, optLaTeXEngine = "pdflatex"
, optLaTeXEngineArgs = []
, optPdfEngine = Nothing
, optPdfEngineArgs = []
, optSlideLevel = Nothing
, optSetextHeaders = True
, optAscii = False
@ -778,7 +810,6 @@ defaultWriterName x =
".org" -> "org"
".asciidoc" -> "asciidoc"
".adoc" -> "asciidoc"
".pdf" -> "latex"
".fb2" -> "fb2"
".opml" -> "opml"
".icml" -> "icml"
@ -1314,23 +1345,24 @@ options =
"NUMBER")
"" -- "Header level at which to split chapters in EPUB"
, Option "" ["latex-engine"]
, Option "" ["pdf-engine"]
(ReqArg
(\arg opt -> do
let b = takeBaseName arg
if b `elem` ["pdflatex", "lualatex", "xelatex"]
then return opt { optLaTeXEngine = arg }
else E.throwIO $ PandocOptionError "latex-engine must be pdflatex, lualatex, or xelatex.")
if b `elem` pdfEngines
then return opt { optPdfEngine = Just arg }
else E.throwIO $ PandocOptionError $ "pdf-engine must be one of "
++ intercalate ", " pdfEngines)
"PROGRAM")
"" -- "Name of latex program to use in generating PDF"
"" -- "Name of program to use in generating PDF"
, Option "" ["latex-engine-opt"]
, Option "" ["pdf-engine-opt"]
(ReqArg
(\arg opt -> do
let oldArgs = optLaTeXEngineArgs opt
return opt { optLaTeXEngineArgs = arg : oldArgs })
let oldArgs = optPdfEngineArgs opt
return opt { optPdfEngineArgs = arg : oldArgs })
"STRING")
"" -- "Flags to pass to the LaTeX engine, all instances of this option are accumulated and used"
"" -- "Flags to pass to the PDF-engine, all instances of this option are accumulated and used"
, Option "" ["bibliography"]
(ReqArg
@ -1590,6 +1622,10 @@ handleUnrecognizedOption "--old-dashes" =
("--old-dashes has been removed. Use +old_dashes extension instead." :)
handleUnrecognizedOption "--no-wrap" =
("--no-wrap has been removed. Use --wrap=none instead." :)
handleUnrecognizedOption "--latex-engine" =
("--latex-engine has been removed. Use --pdf-engine instead." :)
handleUnrecognizedOption "--latex-engine-opt" =
("--latex-engine-opt has been removed. Use --pdf-engine-opt instead." :)
handleUnrecognizedOption "--chapters" =
("--chapters has been removed. Use --top-level-division=chapter instead." :)
handleUnrecognizedOption "--reference-docx" =

View file

@ -96,7 +96,7 @@ handleError (Left e) =
PandocSyntaxMapError s -> err 67 s
PandocFailOnWarningError -> err 3 "Failing because there were warnings."
PandocPDFProgramNotFoundError pdfprog -> err 47 $
pdfprog ++ " not found. " ++ pdfprog ++ " is needed for pdf output."
pdfprog ++ " not found. Please select a different --pdf-engine or install " ++ pdfprog
PandocPDFError logmsg -> err 43 $ "Error producing PDF.\n" ++ logmsg
PandocFilterError filtername msg -> err 83 $ "Error running filter " ++
filtername ++ ":\n" ++ msg

View file

@ -216,7 +216,7 @@ data WriterOptions = WriterOptions
, writerEpubChapterLevel :: Int -- ^ Header level for chapters (separate files)
, writerTOCDepth :: Int -- ^ Number of levels to include in TOC
, writerReferenceDoc :: Maybe FilePath -- ^ Path to reference document if specified
, writerLaTeXArgs :: [String] -- ^ Flags to pass to latex-engine
, writerPdfArgs :: [String] -- ^ Flags to pass to pdf-engine
, writerReferenceLocation :: ReferenceLocation -- ^ Location of footnotes and references for writing markdown
, writerSyntaxMap :: SyntaxMap
} deriving (Show, Data, Typeable, Generic)
@ -252,7 +252,7 @@ instance Default WriterOptions where
, writerEpubChapterLevel = 1
, writerTOCDepth = 3
, writerReferenceDoc = Nothing
, writerLaTeXArgs = []
, writerPdfArgs = []
, writerReferenceLocation = EndOfDocument
, writerSyntaxMap = defaultSyntaxMap
}

View file

@ -37,6 +37,7 @@ import qualified Control.Exception as E
import Control.Monad (unless, when)
import Control.Monad.Trans (MonadIO (..))
import qualified Data.Text as T
import qualified Data.Text.IO as TextIO
import Data.Text (Text)
import qualified Data.ByteString as BS
import Data.ByteString.Lazy (ByteString)
@ -78,8 +79,8 @@ changePathSeparators :: FilePath -> FilePath
changePathSeparators = intercalate "/" . splitDirectories
#endif
makePDF :: String -- ^ pdf creator (pdflatex, lualatex,
-- xelatex, context, wkhtmltopdf, pdfroff)
makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex,
-- wkhtmltopdf, weasyprint, prince, context, pdfroff)
-> (WriterOptions -> Pandoc -> PandocIO Text) -- ^ writer
-> WriterOptions -- ^ options
-> Verbosity -- ^ verbosity level
@ -94,7 +95,7 @@ makePDF "wkhtmltopdf" writer opts verbosity _ doc@(Pandoc meta _) = do
_ -> []
meta' <- metaToJSON opts (return . stringify) (return . stringify) meta
let toArgs (f, mbd) = maybe [] (\d -> ['-':'-':f, d]) mbd
let args = mathArgs ++
let args = writerPdfArgs opts ++ mathArgs ++
concatMap toArgs
[("page-size", getField "papersize" meta')
,("title", getField "title" meta')
@ -108,11 +109,19 @@ makePDF "wkhtmltopdf" writer opts verbosity _ doc@(Pandoc meta _) = do
(getField "margin-left" meta'))
]
source <- writer opts doc
liftIO $ html2pdf verbosity args source
liftIO $ html2pdf verbosity "wkhtmltopdf" args source
makePDF "weasyprint" writer opts verbosity _ doc = do
let args = writerPdfArgs opts
source <- writer opts doc
liftIO $ html2pdf verbosity "weasyprint" args source
makePDF "prince" writer opts verbosity _ doc = do
let args = writerPdfArgs opts
source <- writer opts doc
liftIO $ html2pdf verbosity "prince" args source
makePDF "pdfroff" writer opts verbosity _mediabag doc = do
source <- writer opts doc
let args = ["-ms", "-mpdfmark", "-e", "-t", "-k", "-KUTF-8", "-i",
"--no-toc-relocation"]
"--no-toc-relocation"] ++ writerPdfArgs opts
liftIO $ ms2pdf verbosity args source
makePDF program writer opts verbosity mediabag doc = do
let withTemp = if takeBaseName program == "context"
@ -124,7 +133,7 @@ makePDF program writer opts verbosity mediabag doc = do
source <- runIOorExplode $ do
setVerbosity verbosity
writer opts doc'
let args = writerLaTeXArgs opts
let args = writerPdfArgs opts
case takeBaseName program of
"context" -> context2pdf verbosity tmpdir source
prog | prog `elem` ["pdflatex", "lualatex", "xelatex"]
@ -212,7 +221,7 @@ tex2pdf' verbosity args tmpDir program source = do
case logmsg of
x | "! Package inputenc Error" `BC.isPrefixOf` x
&& program /= "xelatex"
-> "\nTry running pandoc with --latex-engine=xelatex."
-> "\nTry running pandoc with --pdf-engine=xelatex."
_ -> ""
return $ Left $ logmsg <> extramsg
(ExitSuccess, Nothing) -> return $ Left ""
@ -347,32 +356,33 @@ ms2pdf verbosity args source = do
ExitSuccess -> Right out
html2pdf :: Verbosity -- ^ Verbosity level
-> [String] -- ^ Args to wkhtmltopdf
-> String -- ^ Program (wkhtmltopdf, weasyprint or prince)
-> [String] -- ^ Args to program
-> Text -- ^ HTML5 source
-> IO (Either ByteString ByteString)
html2pdf verbosity args source = do
file <- withTempFile "." "html2pdf.html" $ \fp _ -> return fp
html2pdf verbosity program args source = do
pdfFile <- withTempFile "." "html2pdf.pdf" $ \fp _ -> return fp
BS.writeFile file $ UTF8.fromText source
let programArgs = args ++ [file, pdfFile]
let pdfFileArgName = if program == "prince"
then ["-o"]
else []
let programArgs = args ++ ["-"] ++ pdfFileArgName ++ [pdfFile]
env' <- getEnvironment
when (verbosity >= INFO) $ do
putStrLn "[makePDF] Command line:"
putStrLn $ "wkhtmltopdf" ++ " " ++ unwords (map show programArgs)
putStrLn $ program ++ " " ++ unwords (map show programArgs)
putStr "\n"
putStrLn "[makePDF] Environment:"
mapM_ print env'
putStr "\n"
putStrLn $ "[makePDF] Contents of " ++ file ++ ":"
BL.readFile file >>= BL.putStr
putStrLn $ "[makePDF] Contents of intermediate HTML:"
TextIO.putStr source
putStr "\n"
(exit, out) <- E.catch
(pipeProcess (Just env') "wkhtmltopdf" programArgs BL.empty)
(pipeProcess (Just env') program programArgs $ BL.fromStrict $ UTF8.fromText source)
(\(e :: IOError) -> if isDoesNotExistError e
then E.throwIO $
PandocPDFProgramNotFoundError "wkhtml2pdf"
PandocPDFProgramNotFoundError program
else E.throwIO e)
removeFile file
when (verbosity >= INFO) $ do
BL.hPutStr stdout out
putStr "\n"