2013-04-26 20:33:15 -07:00
|
|
|
{-# LANGUAGE OverloadedStrings, CPP #-}
|
2012-01-20 18:57:16 -08:00
|
|
|
{-
|
|
|
|
Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu>
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
-}
|
|
|
|
|
|
|
|
{- |
|
|
|
|
Module : Text.Pandoc.PDF
|
|
|
|
Copyright : Copyright (C) 2012 John MacFarlane
|
|
|
|
License : GNU GPL, version 2 or above
|
|
|
|
|
|
|
|
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
|
|
|
Stability : alpha
|
|
|
|
Portability : portable
|
|
|
|
|
|
|
|
Conversion of LaTeX documents to PDF.
|
|
|
|
-}
|
2013-07-20 12:14:43 -07:00
|
|
|
module Text.Pandoc.PDF ( makePDF ) where
|
2012-01-20 18:57:16 -08:00
|
|
|
|
|
|
|
import System.IO.Temp
|
|
|
|
import Data.ByteString.Lazy (ByteString)
|
|
|
|
import qualified Data.ByteString.Lazy as B
|
|
|
|
import qualified Data.ByteString.Lazy.Char8 as BC
|
2013-07-20 12:14:43 -07:00
|
|
|
import qualified Data.ByteString as BS
|
2012-01-20 19:04:08 -08:00
|
|
|
import System.Exit (ExitCode (..))
|
2012-01-20 18:57:16 -08:00
|
|
|
import System.FilePath
|
|
|
|
import System.Directory
|
2013-07-18 21:51:23 -07:00
|
|
|
import System.Environment
|
2012-02-06 12:12:23 -08:00
|
|
|
import Control.Monad (unless)
|
2012-02-07 22:00:22 -08:00
|
|
|
import Data.List (isInfixOf)
|
2013-12-19 21:07:09 -05:00
|
|
|
import Data.Maybe (fromMaybe)
|
2013-07-20 12:14:43 -07:00
|
|
|
import qualified Data.ByteString.Base64 as B64
|
|
|
|
import qualified Text.Pandoc.UTF8 as UTF8
|
|
|
|
import Text.Pandoc.Definition
|
2013-08-10 18:45:00 -07:00
|
|
|
import Text.Pandoc.Walk (walkM)
|
2013-07-20 12:14:43 -07:00
|
|
|
import Text.Pandoc.Shared (fetchItem, warn)
|
|
|
|
import Text.Pandoc.Options (WriterOptions(..))
|
|
|
|
import Text.Pandoc.MIME (extensionFromMimeType)
|
2013-08-08 15:15:20 -07:00
|
|
|
import Text.Pandoc.Process (pipeProcess)
|
|
|
|
import qualified Data.ByteString.Lazy as BL
|
2012-01-20 18:57:16 -08:00
|
|
|
|
2013-04-26 20:33:15 -07:00
|
|
|
withTempDir :: String -> (FilePath -> IO a) -> IO a
|
|
|
|
withTempDir =
|
|
|
|
#ifdef _WINDOWS
|
|
|
|
withTempDirectory "."
|
|
|
|
#else
|
|
|
|
withSystemTempDirectory
|
|
|
|
#endif
|
|
|
|
|
2013-07-20 12:14:43 -07:00
|
|
|
makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex)
|
|
|
|
-> (WriterOptions -> Pandoc -> String) -- ^ writer
|
|
|
|
-> WriterOptions -- ^ options
|
|
|
|
-> Pandoc -- ^ document
|
2012-01-20 19:11:35 -08:00
|
|
|
-> IO (Either ByteString ByteString)
|
2013-07-20 12:14:43 -07:00
|
|
|
makePDF program writer opts doc = withTempDir "tex2pdf." $ \tmpdir -> do
|
Options: Changed `writerSourceDir` to `writerSourceURL` (now a Maybe).
Previously we used to store the directory of the first input file,
even if it was local, and used this as a base directory for
finding images in ODT, EPUB, Docx, and PDF.
This has been confusing to many users. It seems better to look for
images relative to the current working directory, even if the first
file argument is in another directory.
writerSourceURL is set to 'Just url' when the first command-line
argument is an absolute URL. (So, relative links will be resolved
in relation to the first page.) Otherwise, 'Nothing'.
The ODT, EPUB, Docx, and PDF writers have been modified accordingly.
Note that this change may break some existing workflows. If you
have been assuming that relative links will be interpreted relative
to the directory of the first file argument, you'll need to
make that the current directory before running pandoc.
Closes #942.
2013-08-11 15:58:09 -07:00
|
|
|
doc' <- handleImages (writerSourceURL opts) tmpdir doc
|
2013-07-20 12:14:43 -07:00
|
|
|
let source = writer opts doc'
|
2012-01-20 19:04:08 -08:00
|
|
|
tex2pdf' tmpdir program source
|
2012-01-20 18:57:16 -08:00
|
|
|
|
Options: Changed `writerSourceDir` to `writerSourceURL` (now a Maybe).
Previously we used to store the directory of the first input file,
even if it was local, and used this as a base directory for
finding images in ODT, EPUB, Docx, and PDF.
This has been confusing to many users. It seems better to look for
images relative to the current working directory, even if the first
file argument is in another directory.
writerSourceURL is set to 'Just url' when the first command-line
argument is an absolute URL. (So, relative links will be resolved
in relation to the first page.) Otherwise, 'Nothing'.
The ODT, EPUB, Docx, and PDF writers have been modified accordingly.
Note that this change may break some existing workflows. If you
have been assuming that relative links will be interpreted relative
to the directory of the first file argument, you'll need to
make that the current directory before running pandoc.
Closes #942.
2013-08-11 15:58:09 -07:00
|
|
|
handleImages :: Maybe String -- ^ source base URL
|
2013-07-20 12:14:43 -07:00
|
|
|
-> FilePath -- ^ temp dir to store images
|
|
|
|
-> Pandoc -- ^ document
|
|
|
|
-> IO Pandoc
|
2013-08-10 18:45:00 -07:00
|
|
|
handleImages baseURL tmpdir = walkM (handleImage' baseURL tmpdir)
|
2013-07-20 12:14:43 -07:00
|
|
|
|
Options: Changed `writerSourceDir` to `writerSourceURL` (now a Maybe).
Previously we used to store the directory of the first input file,
even if it was local, and used this as a base directory for
finding images in ODT, EPUB, Docx, and PDF.
This has been confusing to many users. It seems better to look for
images relative to the current working directory, even if the first
file argument is in another directory.
writerSourceURL is set to 'Just url' when the first command-line
argument is an absolute URL. (So, relative links will be resolved
in relation to the first page.) Otherwise, 'Nothing'.
The ODT, EPUB, Docx, and PDF writers have been modified accordingly.
Note that this change may break some existing workflows. If you
have been assuming that relative links will be interpreted relative
to the directory of the first file argument, you'll need to
make that the current directory before running pandoc.
Closes #942.
2013-08-11 15:58:09 -07:00
|
|
|
handleImage' :: Maybe String
|
2013-07-20 12:14:43 -07:00
|
|
|
-> FilePath
|
|
|
|
-> Inline
|
|
|
|
-> IO Inline
|
|
|
|
handleImage' baseURL tmpdir (Image ils (src,tit)) = do
|
|
|
|
exists <- doesFileExist src
|
|
|
|
if exists
|
|
|
|
then return $ Image ils (src,tit)
|
|
|
|
else do
|
|
|
|
res <- fetchItem baseURL src
|
|
|
|
case res of
|
|
|
|
Right (contents, Just mime) -> do
|
2013-12-19 21:07:09 -05:00
|
|
|
let ext = fromMaybe (takeExtension src) $
|
2013-07-20 12:14:43 -07:00
|
|
|
extensionFromMimeType mime
|
|
|
|
let basename = UTF8.toString $ B64.encode $ UTF8.fromString src
|
2014-03-09 21:26:25 -07:00
|
|
|
-- note: we want / even on Windows, for TexLive:
|
|
|
|
let fname = tmpdir ++ "/" ++ basename <.> ext
|
2013-07-20 12:14:43 -07:00
|
|
|
BS.writeFile fname contents
|
|
|
|
return $ Image ils (fname,tit)
|
|
|
|
_ -> do
|
|
|
|
warn $ "Could not find image `" ++ src ++ "', skipping..."
|
|
|
|
return $ Image ils (src,tit)
|
|
|
|
handleImage' _ _ x = return x
|
|
|
|
|
2012-01-20 19:04:08 -08:00
|
|
|
tex2pdf' :: FilePath -- ^ temp directory for output
|
2012-01-20 21:36:04 -08:00
|
|
|
-> String -- ^ tex program
|
2012-01-20 18:57:16 -08:00
|
|
|
-> String -- ^ tex source
|
2012-01-20 19:11:35 -08:00
|
|
|
-> IO (Either ByteString ByteString)
|
2012-01-20 19:04:08 -08:00
|
|
|
tex2pdf' tmpDir program source = do
|
2012-02-07 22:00:22 -08:00
|
|
|
let numruns = if "\\tableofcontents" `isInfixOf` source
|
2012-03-19 09:25:33 -07:00
|
|
|
then 3 -- to get page numbers
|
|
|
|
else 2 -- 1 run won't give you PDF bookmarks
|
2012-02-06 12:12:23 -08:00
|
|
|
(exit, log', mbPdf) <- runTeXProgram program numruns tmpDir source
|
2013-10-17 13:36:43 -07:00
|
|
|
let msg = "Error producing PDF from TeX source.\n"
|
2012-01-20 18:57:16 -08:00
|
|
|
case (exit, mbPdf) of
|
2013-08-11 16:16:24 -07:00
|
|
|
(ExitFailure _, _) -> do
|
|
|
|
let logmsg = extractMsg log'
|
|
|
|
let extramsg =
|
|
|
|
case logmsg of
|
|
|
|
x | "! Package inputenc Error" `BC.isPrefixOf` x ->
|
|
|
|
"\nTry running pandoc with --latex-engine=xelatex."
|
|
|
|
_ -> ""
|
2013-10-17 13:36:43 -07:00
|
|
|
return $ Left $ msg <> logmsg <> extramsg
|
2012-01-20 19:52:56 -08:00
|
|
|
(ExitSuccess, Nothing) -> return $ Left msg
|
2012-01-20 18:57:16 -08:00
|
|
|
(ExitSuccess, Just pdf) -> return $ Right pdf
|
|
|
|
|
2012-01-20 19:52:56 -08:00
|
|
|
(<>) :: ByteString -> ByteString -> ByteString
|
|
|
|
(<>) = B.append
|
|
|
|
|
2012-01-20 18:57:16 -08:00
|
|
|
-- parsing output
|
|
|
|
|
|
|
|
extractMsg :: ByteString -> ByteString
|
2012-01-20 19:04:08 -08:00
|
|
|
extractMsg log' = do
|
|
|
|
let msg' = dropWhile (not . ("!" `BC.isPrefixOf`)) $ BC.lines log'
|
2012-01-20 18:57:16 -08:00
|
|
|
let (msg'',rest) = break ("l." `BC.isPrefixOf`) msg'
|
|
|
|
let lineno = take 1 rest
|
2012-01-21 14:18:36 -08:00
|
|
|
if null msg'
|
|
|
|
then log'
|
|
|
|
else BC.unlines (msg'' ++ lineno)
|
2012-01-20 18:57:16 -08:00
|
|
|
|
|
|
|
-- running tex programs
|
|
|
|
|
|
|
|
-- Run a TeX program on an input bytestring and return (exit code,
|
|
|
|
-- contents of stdout, contents of produced PDF if any). Rerun
|
2012-02-06 12:12:23 -08:00
|
|
|
-- a fixed number of times to resolve references.
|
2012-01-20 21:36:04 -08:00
|
|
|
runTeXProgram :: String -> Int -> FilePath -> String
|
2012-01-20 18:57:16 -08:00
|
|
|
-> IO (ExitCode, ByteString, Maybe ByteString)
|
|
|
|
runTeXProgram program runsLeft tmpDir source = do
|
2014-03-09 21:26:25 -07:00
|
|
|
let file = tmpDir ++ "/input.tex"
|
2012-02-06 12:12:23 -08:00
|
|
|
exists <- doesFileExist file
|
|
|
|
unless exists $ UTF8.writeFile file source
|
2012-01-20 18:57:16 -08:00
|
|
|
let programArgs = ["-halt-on-error", "-interaction", "nonstopmode",
|
|
|
|
"-output-directory", tmpDir, file]
|
2013-07-18 21:51:23 -07:00
|
|
|
env' <- getEnvironment
|
2014-02-23 20:35:07 -08:00
|
|
|
#ifdef _WINDOWS
|
|
|
|
let sep = ";"
|
|
|
|
#else
|
|
|
|
let sep = ":"
|
|
|
|
#endif
|
|
|
|
let texinputs = maybe (tmpDir ++ sep) ((tmpDir ++ sep) ++)
|
2013-07-18 21:51:23 -07:00
|
|
|
$ lookup "TEXINPUTS" env'
|
|
|
|
let env'' = ("TEXINPUTS", texinputs) :
|
|
|
|
[(k,v) | (k,v) <- env', k /= "TEXINPUTS"]
|
2013-08-08 15:15:20 -07:00
|
|
|
(exit, out, err) <- pipeProcess (Just env'') program programArgs BL.empty
|
2012-02-06 12:12:23 -08:00
|
|
|
if runsLeft > 1
|
2012-01-20 18:57:16 -08:00
|
|
|
then runTeXProgram program (runsLeft - 1) tmpDir source
|
2012-02-06 12:12:23 -08:00
|
|
|
else do
|
|
|
|
let pdfFile = replaceDirectory (replaceExtension file ".pdf") tmpDir
|
|
|
|
pdfExists <- doesFileExist pdfFile
|
|
|
|
pdf <- if pdfExists
|
|
|
|
then Just `fmap` B.readFile pdfFile
|
|
|
|
else return Nothing
|
|
|
|
return (exit, out <> err, pdf)
|
2012-01-20 18:57:16 -08:00
|
|
|
|