From 0a768f1cc568722973302554265efa414d097da3 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 21 Dec 2015 17:22:12 -0800 Subject: [PATCH] Added preliminary support for PDF creation via wkhtmltopdf. To use this: pandoc -t html5 -o result.pdf (and add `--mathjax` if you have math.) --- README | 11 ++++---- pandoc.hs | 26 +++++++++--------- src/Text/Pandoc/PDF.hs | 61 +++++++++++++++++++++++++++++++++++++++--- 3 files changed, 78 insertions(+), 20 deletions(-) diff --git a/README b/README index ebc8f58d4..ea6226d38 100644 --- a/README +++ b/README @@ -24,7 +24,7 @@ markup], [Haddock markup], [OPML], [Emacs Org mode], [DocBook], [FictionBook2], [Textile], [groff man] pages, [Emacs Org mode], [AsciiDoc], [InDesign ICML], and [Slidy], [Slideous], [DZSlides], [reveal.js] or [S5] HTML slide shows. It can also produce [PDF] output -on systems where LaTeX or ConTeXt is installed. +on systems where LaTeX, ConTeXt, or `wkhtmltopdf` is installed. Pandoc's enhanced version of Markdown includes syntax for [footnotes], [tables], flexible [ordered lists], [definition lists], [fenced code blocks], @@ -189,12 +189,13 @@ any header file. The [`natbib`], [`biblatex`], [`bibtex`], and [`biber`] packages can optionally be used for [citation rendering]. These are included with all recent versions of [TeX Live]. -Alternatively, pandoc can use ConTeXt to create a PDF. +Alternatively, pandoc can use ConTeXt or `wkhtmltopdf` to create a PDF. To do this, specify an output file with a `.pdf` extension, -as before, but add `-t context` to the command line. +as before, but add `-t context` or `-t html5` to the command line. -PDF output can be controlled using [variables for LaTeX] -or [variables for ConTeXt]. +PDF output can be controlled using [variables for LaTeX] (if +LaTeX is used), [variables for ConTeXt] (if ConTeXt is used), +and `--css` (if `wkhtmltopdf` is used). [`amsfonts`]: https://ctan.org/pkg/amsfonts [`amsmath`]: https://ctan.org/pkg/amsmath diff --git a/pandoc.hs b/pandoc.hs index cfb9adc1c..a4906382a 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -1168,6 +1168,7 @@ convertWithOpts opts args = do let laTeXOutput = "latex" `isPrefixOf` writerName' || "beamer" `isPrefixOf` writerName' let conTeXtOutput = "context" `isPrefixOf` writerName' + let html5Output = "html5" `isPrefixOf` writerName' let laTeXInput = "latex" `isPrefixOf` readerName' || "beamer" `isPrefixOf` readerName' @@ -1367,27 +1368,28 @@ convertWithOpts opts args = do IOByteStringWriter f -> f writerOptions doc' >>= writeBinary PureStringWriter f | pdfOutput -> do - -- make sure writer is latex or beamer - unless (laTeXOutput || conTeXtOutput) $ + -- make sure writer is latex or beamer or context or html5 + unless (laTeXOutput || conTeXtOutput || html5Output) $ err 47 $ "cannot produce pdf output with " ++ writerName' ++ " writer" - let texprog = if conTeXtOutput - then "context" - else latexEngine - -- check for latex program - mbLatex <- findExecutable texprog - when (isNothing mbLatex) $ - err 41 $ texprog ++ " not found. " ++ - texprog ++ " is needed for pdf output." + let pdfprog = case () of + _ | conTeXtOutput -> "context" + _ | html5Output -> "wkhtmltopdf" + _ -> latexEngine + -- check for pdf creating program + mbPdfProg <- findExecutable pdfprog + when (isNothing mbPdfProg) $ + err 41 $ pdfprog ++ " not found. " ++ + pdfprog ++ " is needed for pdf output." - res <- makePDF texprog f writerOptions doc' + res <- makePDF pdfprog f writerOptions doc' case res of Right pdf -> writeBinary pdf Left err' -> do B.hPutStr stderr err' B.hPut stderr $ B.pack [10] - err 43 "Error producing PDF from TeX source" + err 43 "Error producing PDF" | otherwise -> selfcontain (f writerOptions doc' ++ ['\n' | not standalone']) >>= writerFn outputFile . handleEntities diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index 0e533ede8..72b563044 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -38,6 +38,7 @@ import Text.Pandoc.Compat.Monoid ((<>)) import System.Exit (ExitCode (..)) import System.FilePath import System.IO (stderr, stdout) +import System.IO.Temp (withTempFile) import System.Directory import Data.Digest.Pure.SHA (showDigest, sha1) import System.Environment @@ -49,7 +50,7 @@ import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Definition import Text.Pandoc.Walk (walkM) import Text.Pandoc.Shared (fetchItem', warn, withTempDir, inDirectory) -import Text.Pandoc.Options (WriterOptions(..)) +import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..)) import Text.Pandoc.MIME (extensionFromMimeType, getMimeType) import Text.Pandoc.Process (pipeProcess) import qualified Data.ByteString.Lazy as BL @@ -63,18 +64,29 @@ changePathSeparators :: FilePath -> FilePath changePathSeparators = intercalate "/" . splitDirectories #endif -makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex) +makePDF :: String -- ^ pdf creator (pdflatex, lualatex, + -- xelatex, context, wkhtmltopdf) -> (WriterOptions -> Pandoc -> String) -- ^ writer -> WriterOptions -- ^ options -> Pandoc -- ^ document -> IO (Either ByteString ByteString) +makePDF "wkhtmltopdf" writer opts doc = + wkhtml2pdf (writerVerbose opts) args source + where args = case writerHTMLMathMethod opts of + -- with MathJax, wait til all math is rendered: + MathJax _ -> ["--run-script", "MathJax.Hub.Register.StartupHook('End Typeset', function() { window.status = 'mathjax_loaded' });", + "--window-status", "mathjax_loaded"] + _ -> [] + source = writer opts doc makePDF program writer opts doc = withTempDir "tex2pdf." $ \tmpdir -> do doc' <- handleImages opts tmpdir doc let source = writer opts doc' args = writerLaTeXArgs opts case program of "context" -> context2pdf (writerVerbose opts) tmpdir source - _ -> tex2pdf' (writerVerbose opts) args tmpdir program source + _ | program `elem` ["pdflatex", "lualatex", "xelatex"] + -> tex2pdf' (writerVerbose opts) args tmpdir program source + _ -> return $ Left $ UTF8.fromStringLazy $ "Unknown program " ++ program handleImages :: WriterOptions -> FilePath -- ^ temp dir to store images @@ -235,6 +247,49 @@ runTeXProgram verbose program args runNumber numRuns tmpDir source = do else return Nothing return (exit, out <> err, pdf) +wkhtml2pdf :: Bool -- ^ Verbose output + -> [String] -- ^ Args to wkhtmltopdf + -> String -- ^ HTML5 source + -> IO (Either ByteString ByteString) +wkhtml2pdf verbose args source = do + file <- withTempFile "." "wkhtml2pdf.html" $ \fp _ -> return fp + pdfFile <- withTempFile "." "wkhtml2pdf.pdf" $ \fp _ -> return fp + UTF8.writeFile file source + let programArgs = args ++ [file, pdfFile] + env' <- getEnvironment + when verbose $ do + putStrLn "[makePDF] Command line:" + putStrLn $ "wkhtmltopdf" ++ " " ++ unwords (map show programArgs) + putStr "\n" + putStrLn "[makePDF] Environment:" + mapM_ print env' + putStr "\n" + putStrLn $ "[makePDF] Contents of " ++ file ++ ":" + B.readFile file >>= B.putStr + putStr "\n" + (exit, out, err) <- pipeProcess (Just env') "wkhtmltopdf" + programArgs BL.empty + removeFile file + when verbose $ do + B.hPutStr stdout out + B.hPutStr stderr err + putStr "\n" + pdfExists <- doesFileExist pdfFile + mbPdf <- if pdfExists + -- We read PDF as a strict bytestring to make sure that the + -- temp directory is removed on Windows. + -- See https://github.com/jgm/pandoc/issues/1192. + then do + res <- (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile + removeFile pdfFile + return res + else return Nothing + let log' = out <> err + return $ case (exit, mbPdf) of + (ExitFailure _, _) -> Left log' + (ExitSuccess, Nothing) -> Left "" + (ExitSuccess, Just pdf) -> Right pdf + context2pdf :: Bool -- ^ Verbose output -> FilePath -- ^ temp directory for output -> String -- ^ ConTeXt source