Added preliminary support for PDF creation via wkhtmltopdf.

To use this:

    pandoc -t html5 -o result.pdf

(and add `--mathjax` if you have math.)
This commit is contained in:
John MacFarlane 2015-12-21 17:22:12 -08:00
parent 32d27896cd
commit 0a768f1cc5
3 changed files with 78 additions and 20 deletions

11
README
View file

@ -24,7 +24,7 @@ markup], [Haddock markup], [OPML], [Emacs Org mode], [DocBook],
[FictionBook2], [Textile], [groff man] pages, [Emacs Org mode],
[AsciiDoc], [InDesign ICML], and [Slidy], [Slideous], [DZSlides],
[reveal.js] or [S5] HTML slide shows. It can also produce [PDF] output
on systems where LaTeX or ConTeXt is installed.
on systems where LaTeX, ConTeXt, or `wkhtmltopdf` is installed.
Pandoc's enhanced version of Markdown includes syntax for [footnotes],
[tables], flexible [ordered lists], [definition lists], [fenced code blocks],
@ -189,12 +189,13 @@ any header file. The [`natbib`], [`biblatex`], [`bibtex`], and [`biber`]
packages can optionally be used for [citation rendering]. These are
included with all recent versions of [TeX Live].
Alternatively, pandoc can use ConTeXt to create a PDF.
Alternatively, pandoc can use ConTeXt or `wkhtmltopdf` to create a PDF.
To do this, specify an output file with a `.pdf` extension,
as before, but add `-t context` to the command line.
as before, but add `-t context` or `-t html5` to the command line.
PDF output can be controlled using [variables for LaTeX]
or [variables for ConTeXt].
PDF output can be controlled using [variables for LaTeX] (if
LaTeX is used), [variables for ConTeXt] (if ConTeXt is used),
and `--css` (if `wkhtmltopdf` is used).
[`amsfonts`]: https://ctan.org/pkg/amsfonts
[`amsmath`]: https://ctan.org/pkg/amsmath

View file

@ -1168,6 +1168,7 @@ convertWithOpts opts args = do
let laTeXOutput = "latex" `isPrefixOf` writerName' ||
"beamer" `isPrefixOf` writerName'
let conTeXtOutput = "context" `isPrefixOf` writerName'
let html5Output = "html5" `isPrefixOf` writerName'
let laTeXInput = "latex" `isPrefixOf` readerName' ||
"beamer" `isPrefixOf` readerName'
@ -1367,27 +1368,28 @@ convertWithOpts opts args = do
IOByteStringWriter f -> f writerOptions doc' >>= writeBinary
PureStringWriter f
| pdfOutput -> do
-- make sure writer is latex or beamer
unless (laTeXOutput || conTeXtOutput) $
-- make sure writer is latex or beamer or context or html5
unless (laTeXOutput || conTeXtOutput || html5Output) $
err 47 $ "cannot produce pdf output with " ++ writerName' ++
" writer"
let texprog = if conTeXtOutput
then "context"
else latexEngine
-- check for latex program
mbLatex <- findExecutable texprog
when (isNothing mbLatex) $
err 41 $ texprog ++ " not found. " ++
texprog ++ " is needed for pdf output."
let pdfprog = case () of
_ | conTeXtOutput -> "context"
_ | html5Output -> "wkhtmltopdf"
_ -> latexEngine
-- check for pdf creating program
mbPdfProg <- findExecutable pdfprog
when (isNothing mbPdfProg) $
err 41 $ pdfprog ++ " not found. " ++
pdfprog ++ " is needed for pdf output."
res <- makePDF texprog f writerOptions doc'
res <- makePDF pdfprog f writerOptions doc'
case res of
Right pdf -> writeBinary pdf
Left err' -> do
B.hPutStr stderr err'
B.hPut stderr $ B.pack [10]
err 43 "Error producing PDF from TeX source"
err 43 "Error producing PDF"
| otherwise -> selfcontain (f writerOptions doc' ++
['\n' | not standalone'])
>>= writerFn outputFile . handleEntities

View file

@ -38,6 +38,7 @@ import Text.Pandoc.Compat.Monoid ((<>))
import System.Exit (ExitCode (..))
import System.FilePath
import System.IO (stderr, stdout)
import System.IO.Temp (withTempFile)
import System.Directory
import Data.Digest.Pure.SHA (showDigest, sha1)
import System.Environment
@ -49,7 +50,7 @@ import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.Definition
import Text.Pandoc.Walk (walkM)
import Text.Pandoc.Shared (fetchItem', warn, withTempDir, inDirectory)
import Text.Pandoc.Options (WriterOptions(..))
import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..))
import Text.Pandoc.MIME (extensionFromMimeType, getMimeType)
import Text.Pandoc.Process (pipeProcess)
import qualified Data.ByteString.Lazy as BL
@ -63,18 +64,29 @@ changePathSeparators :: FilePath -> FilePath
changePathSeparators = intercalate "/" . splitDirectories
#endif
makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex)
makePDF :: String -- ^ pdf creator (pdflatex, lualatex,
-- xelatex, context, wkhtmltopdf)
-> (WriterOptions -> Pandoc -> String) -- ^ writer
-> WriterOptions -- ^ options
-> Pandoc -- ^ document
-> IO (Either ByteString ByteString)
makePDF "wkhtmltopdf" writer opts doc =
wkhtml2pdf (writerVerbose opts) args source
where args = case writerHTMLMathMethod opts of
-- with MathJax, wait til all math is rendered:
MathJax _ -> ["--run-script", "MathJax.Hub.Register.StartupHook('End Typeset', function() { window.status = 'mathjax_loaded' });",
"--window-status", "mathjax_loaded"]
_ -> []
source = writer opts doc
makePDF program writer opts doc = withTempDir "tex2pdf." $ \tmpdir -> do
doc' <- handleImages opts tmpdir doc
let source = writer opts doc'
args = writerLaTeXArgs opts
case program of
"context" -> context2pdf (writerVerbose opts) tmpdir source
_ -> tex2pdf' (writerVerbose opts) args tmpdir program source
_ | program `elem` ["pdflatex", "lualatex", "xelatex"]
-> tex2pdf' (writerVerbose opts) args tmpdir program source
_ -> return $ Left $ UTF8.fromStringLazy $ "Unknown program " ++ program
handleImages :: WriterOptions
-> FilePath -- ^ temp dir to store images
@ -235,6 +247,49 @@ runTeXProgram verbose program args runNumber numRuns tmpDir source = do
else return Nothing
return (exit, out <> err, pdf)
wkhtml2pdf :: Bool -- ^ Verbose output
-> [String] -- ^ Args to wkhtmltopdf
-> String -- ^ HTML5 source
-> IO (Either ByteString ByteString)
wkhtml2pdf verbose args source = do
file <- withTempFile "." "wkhtml2pdf.html" $ \fp _ -> return fp
pdfFile <- withTempFile "." "wkhtml2pdf.pdf" $ \fp _ -> return fp
UTF8.writeFile file source
let programArgs = args ++ [file, pdfFile]
env' <- getEnvironment
when verbose $ do
putStrLn "[makePDF] Command line:"
putStrLn $ "wkhtmltopdf" ++ " " ++ unwords (map show programArgs)
putStr "\n"
putStrLn "[makePDF] Environment:"
mapM_ print env'
putStr "\n"
putStrLn $ "[makePDF] Contents of " ++ file ++ ":"
B.readFile file >>= B.putStr
putStr "\n"
(exit, out, err) <- pipeProcess (Just env') "wkhtmltopdf"
programArgs BL.empty
removeFile file
when verbose $ do
B.hPutStr stdout out
B.hPutStr stderr err
putStr "\n"
pdfExists <- doesFileExist pdfFile
mbPdf <- if pdfExists
-- We read PDF as a strict bytestring to make sure that the
-- temp directory is removed on Windows.
-- See https://github.com/jgm/pandoc/issues/1192.
then do
res <- (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile
removeFile pdfFile
return res
else return Nothing
let log' = out <> err
return $ case (exit, mbPdf) of
(ExitFailure _, _) -> Left log'
(ExitSuccess, Nothing) -> Left ""
(ExitSuccess, Just pdf) -> Right pdf
context2pdf :: Bool -- ^ Verbose output
-> FilePath -- ^ temp directory for output
-> String -- ^ ConTeXt source