Added Text.Pandoc.PDF module, 'pdf' as new output option.

The module calls pdflatex to produce the PDF, and is basically shell scripting in haskell. But this is better than the existing markdown2pdf script, which is limited to POSIX. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1394 788f1e2b-df1e-0410-8736-df70ead52e1b
2008-08-10 17:33:20 +00:00 · 2008-08-10 17:33:20 +00:00 · 0f4bdc6b7d
commit 0f4bdc6b7d
parent f435d14f28
4 changed files with 176 additions and 160 deletions
--- a/Main.hs
+++ b/Main.hs
@ -20,10 +20,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 {- |
   Module      : Main
   Copyright   : Copyright (C) 2006-8 John MacFarlane
-   License     : GNU GPL, version 2 or above 
+   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley@edu>
-   Stability   : alpha 
+   Stability   : alpha
   Portability : portable

 Parses command-line options and calls the appropriate readers and
@ -32,6 +32,7 @@ writers.
 module Main where
 import Text.Pandoc
 import Text.Pandoc.ODT
+import Text.Pandoc.PDF
 import Text.Pandoc.Shared ( joinWithSep, HTMLMathMethod (..) )
 import Text.Pandoc.Highlighting ( languages )
 import System.Environment ( getArgs, getProgName, getEnvironment )
@ -98,7 +99,7 @@ readers = [("native"   , readPandoc)
 -- | Reader for native Pandoc format.
 readPandoc :: ParserState -> String -> Pandoc
 readPandoc _ input = read input
-    
+
 -- | Association list of formats and pairs of writers and default headers.
 writers :: [ ( String, ( WriterOptions -> Pandoc -> String, String ) ) ]
 writers = [("native"       , (writeDoc, ""))
@ -108,6 +109,7 @@ writers = [("native"       , (writeDoc, ""))
          ,("opendocument" , (writeOpenDocument, defaultOpenDocumentHeader))
          ,("odt"          , (writeOpenDocument, defaultOpenDocumentHeader))
          ,("latex"        , (writeLaTeX, defaultLaTeXHeader))
+          ,("pdf"          , (writeLaTeX, defaultLaTeXHeader))
          ,("context"      , (writeConTeXt, defaultConTeXtHeader))
          ,("texinfo"      , (writeTexinfo, ""))
          ,("man"          , (writeMan, ""))
@ -118,8 +120,7 @@ writers = [("native"       , (writeDoc, ""))
          ]

 isNonTextOutput :: String -> Bool
-isNonTextOutput "odt" = True
-isNonTextOutput _     = False
+isNonTextOutput = (`elem` ["odt", "pdf"])

 -- | Writer for Pandoc native format.
 writeDoc :: WriterOptions -> Pandoc -> String
@ -205,7 +206,7 @@ options =
                  (\arg opt -> return opt { optWriter = map toLower arg })
                  "FORMAT")
                 "" -- ("(" ++ (joinWithSep ", " $ map fst writers) ++ ")")
-    
+
    , Option "s" ["standalone"]
                 (NoArg
                  (\opt -> return opt { optStandalone = True }))
@ -250,7 +251,7 @@ options =

    , Option "m" ["asciimathml"]
                 (OptArg
-                  (\arg opt -> return opt { optHTMLMathMethod = 
+                  (\arg opt -> return opt { optHTMLMathMethod =
                                               ASCIIMathML arg })
                  "URL")
                 "" -- "Use ASCIIMathML script in html output"
@ -290,13 +291,13 @@ options =
    , Option "" ["toc", "table-of-contents"]
                (NoArg
                 (\opt -> return opt { optTableOfContents = True }))
-               "" -- "Include table of contents" 
+               "" -- "Include table of contents"

    , Option "c" ["css"]
                 (ReqArg
-                  (\arg opt -> do 
+                  (\arg opt -> do
                     let old = optCSS opt
-                     return opt { optCSS = old ++ [arg], 
+                     return opt { optCSS = old ++ [arg],
                                  optStandalone = True })
                  "CSS")
                 "" -- "Link to CSS style sheet"
@ -340,17 +341,17 @@ options =

    , Option "T" ["title-prefix"]
                 (ReqArg
-                  (\arg opt -> return opt { optTitlePrefix = arg, 
+                  (\arg opt -> return opt { optTitlePrefix = arg,
                                            optStandalone = True })
                  "STRING")
                 "" -- "String to prefix to HTML window title"
-                 
+
    , Option "D" ["print-default-header"]
                 (ReqArg
                  (\arg _ -> do
                     let header = case (lookup arg writers) of
                           Just (_, h) -> h
-                           Nothing     -> error ("Unknown reader: " ++ arg) 
+                           Nothing     -> error ("Unknown reader: " ++ arg)
                     hPutStr stdout header
                     exitWith ExitSuccess)
                  "FORMAT")
@ -376,7 +377,7 @@ options =
                 (NoArg
                  (\opt -> return opt { optIgnoreArgs = True }))
                 "" -- "Ignore command-line arguments."
-    
+
    , Option "v" ["version"]
                 (NoArg
                  (\_ -> do
@ -398,15 +399,15 @@ options =
 -- Returns usage message
 usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String
 usageMessage programName opts = usageInfo
-  (programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats:  " ++ 
-  (joinWithSep ", " $ map fst readers) ++ "\nOutput formats:  " ++ 
+  (programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats:  " ++
+  (joinWithSep ", " $ map fst readers) ++ "\nOutput formats:  " ++
  (joinWithSep ", " $ map fst writers) ++ "\nOptions:")
  opts
- 
+
 -- Determine default reader based on source file extensions
 defaultReaderName :: [FilePath] -> String
 defaultReaderName [] = "markdown"
-defaultReaderName (x:xs) = 
+defaultReaderName (x:xs) =
  case takeExtension (map toLower x) of
    ".xhtml"    -> "html"
    ".html"     -> "html"
@ -441,6 +442,7 @@ defaultWriterName x =
    ".texinfo"  -> "texinfo"
    ".db"       -> "docbook"
    ".odt"      -> "odt"
+    ".pdf"      -> "pdf"
    ['.',y] | y `elem` ['1'..'9'] -> "man"
    _          -> "html"

@ -464,7 +466,7 @@ main = do
    else
      return ()

-  let defaultOpts' = if compatMode 
+  let defaultOpts' = if compatMode
                       then defaultOpts { optReader = "markdown"
                                        , optWriter = "html"
                                        , optStrict = True }
@ -513,11 +515,11 @@ main = do
  let sources = if ignoreArgs then [] else args

  -- assign reader and writer based on options and filenames
-  let readerName' = if null readerName 
+  let readerName' = if null readerName
                      then defaultReaderName sources
                      else readerName

-  let writerName' = if null writerName 
+  let writerName' = if null writerName
                      then defaultWriterName outputFile
                      else writerName

@ -539,74 +541,76 @@ main = do
                                      -- remove DOS line endings
      tabFilter _ ('\r':'\n':xs) = '\n':(tabFilter tabStop xs)
      tabFilter _ ('\r':xs) = '\n':(tabFilter tabStop xs)
-      tabFilter spsToNextStop ('\t':xs) = 
+      tabFilter spsToNextStop ('\t':xs) =
        if preserveTabs
-           then '\t':(tabFilter tabStop xs) 
-           else replicate spsToNextStop ' ' ++ tabFilter tabStop xs 
-      tabFilter 1 (x:xs) = 
+           then '\t':(tabFilter tabStop xs)
+           else replicate spsToNextStop ' ' ++ tabFilter tabStop xs
+      tabFilter 1 (x:xs) =
        x:(tabFilter tabStop xs)
-      tabFilter spsToNextStop (x:xs) = 
+      tabFilter spsToNextStop (x:xs) =
        x:(tabFilter (spsToNextStop - 1) xs)

-  let standalone' = (standalone && not strict) || writerName' == "odt"
+  let standalone' = (standalone && not strict) || isNonTextOutput writerName'

 #ifdef _CITEPROC
  refs <- if null modsFile then return [] else readModsColletionFile modsFile
 #endif

-  let startParserState = 
+  let startParserState =
         defaultParserState { stateParseRaw     = parseRaw,
-                              stateTabStop      = tabStop, 
+                              stateTabStop      = tabStop,
                              stateSanitizeHTML = sanitize,
                              stateStandalone   = standalone',
 #ifdef _CITEPROC
                              stateCitations    = map citeKey refs,
 #endif
-                              stateSmart        = smart || writerName' `elem` 
+                              stateSmart        = smart || writerName' `elem`
                                                           ["latex", "context"],
                              stateColumns      = columns,
                              stateStrict       = strict }
  let csslink = if null css
                   then ""
-                   else concatMap 
+                   else concatMap
                        (\f -> "<link rel=\"stylesheet\" href=\"" ++
                               f ++ "\" type=\"text/css\" media=\"all\" />\n")
                        css
-  let header = (if (customHeader == "DEFAULT") 
+  let header = (if (customHeader == "DEFAULT")
                   then defaultHeader
                   else customHeader) ++ csslink ++ includeHeader
  let writerOptions = WriterOptions { writerStandalone     = standalone',
-                                      writerHeader         = header, 
+                                      writerHeader         = header,
                                      writerTitlePrefix    = titlePrefix,
-                                      writerTabStop        = tabStop, 
+                                      writerTabStop        = tabStop,
                                      writerTableOfContents = toc &&
                                                              (not strict) &&
                                                              writerName' /= "s5",
                                      writerHTMLMathMethod = mathMethod,
                                      writerS5             = (writerName' == "s5"),
                                      writerIgnoreNotes    = False,
-                                      writerIncremental    = incremental, 
+                                      writerIncremental    = incremental,
                                      writerNumberSections = numberSections,
-                                      writerIncludeBefore  = includeBefore, 
+                                      writerIncludeBefore  = includeBefore,
                                      writerIncludeAfter   = includeAfter,
                                      writerStrictMarkdown = strict,
                                      writerReferenceLinks = referenceLinks,
                                      writerWrapText       = wrap }

-  let writeOutput = if writerName' == "odt"
-                       then if outputFile == "-"
-                               then \_ -> do
-                                 hPutStrLn stderr ("Error:  Cannot write " ++ writerName ++ 
-                                     " output to stdout.\n" ++
-                                     "Specify an output file using the -o option.")
-                                 exitWith $ ExitFailure 5
-                               else let sourceDirRelative = if null sources
-                                                               then ""
-                                                               else takeDirectory (head sources)
-                                    in  saveOpenDocumentAsODT outputFile sourceDirRelative
-                       else if outputFile == "-"
-                               then putStrLn
-                               else writeFile outputFile . (++ "\n")
+  if isNonTextOutput writerName' && outputFile == "-"
+     then do hPutStrLn stderr ("Error:  Cannot write " ++ writerName ++ " output to stdout.\n" ++
+                               "Specify an output file using the -o option.")
+             exitWith $ ExitFailure 5
+     else return ()
+
+  let sourceDirRelative = if null sources
+                             then ""
+                             else takeDirectory (head sources)
+
+  let writeOutput = case writerName' of
+                          "odt"   -> saveOpenDocumentAsODT outputFile sourceDirRelative
+                          "pdf"   -> saveLaTeXAsPDF outputFile sourceDirRelative
+                          _       -> if outputFile == "-"
+                                        then putStrLn
+                                        else writeFile outputFile . (++ "\n")

  fmap (reader startParserState . tabFilter tabStop . joinWithSep "\n")
       (readSources sources) >>=
@ -615,7 +619,7 @@ main = do
 #endif
        writeOutput . writer writerOptions

-  where 
+  where
    readSources [] = mapM readSource ["-"]
    readSources sources = mapM readSource sources
    readSource "-" = getContents
--- a/27
+++ b/27
@ -82,11 +82,28 @@ To convert `hello.html` from html to markdown:
 	pandoc -f html -t markdown hello.html

 Supported output formats include `markdown`, `latex`, `context`
-(ConTeXt), `html`, `rtf` (rich text format), `rst` (reStructuredText),
-`docbook` (DocBook XML), `opendocument` (OpenDocument XML), `odt`
-(OpenOffice text document), `texinfo`, (GNU Texinfo), `mediawiki`
-(MediaWiki markup), `man` (groff man), and `s5` (which produces an
-HTML file that acts like powerpoint).
+(ConTeXt), `pdf`, `html`, `rtf` (rich text format), `rst`
+(reStructuredText), `docbook` (DocBook XML), `opendocument`
+(OpenDocument XML), `odt` (OpenOffice text document), `texinfo`, (GNU
+Texinfo), `mediawiki` (MediaWiki markup), `man` (groff man), and `s5`
+(which produces an HTML file that acts like powerpoint).
+
+Notes:
+
+- For `odt` output, you must have `zip` in the path. If you
+  don't have it installed, you can get the free [Info-ZIP].
+
+- For `pdf` output, you must have `pdflatex` and `bibtex` in the
+  path. You should also have the following LaTeX packages installed:
+  `unicode`, `fancyhdr` (if you have verbatim text in footnotes),
+  `graphicx` (if you use images), `array` (if you use tables), and
+  `ulem` (if you use strikeout text). If they are not already included
+  in your LaTeX distribution, you can get them from [CTAN].  A full
+  [TeX Live] or [MacTeX] distribution will have all of these packages.
+
+  [Info-ZIP]: http://www.info-zip.org/Zip.html
+  [TeX Live]: http://www.tug.org/texlive/
+  [MacTeX]:   http://www.tug.org/mactex/

 Supported input formats include `markdown`, `html`, `latex`, and `rst`.
 Note that the `rst` reader only parses a subset of reStructuredText
--- a/Text/Pandoc/PDF.hs
+++ b/Text/Pandoc/PDF.hs
@ -1,4 +1,4 @@
-{-# LANGUAGE TemplateHaskell, CPP #-}
+{-# LANGUAGE CPP #-}
 {-
 Copyright (C) 2008 John MacFarlane <jgm@berkeley.edu>

@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 -}

 {- |
-   Module      : Text.Pandoc.ODT
+   Module      : Text.Pandoc.PDF
   Copyright   : Copyright (C) 2006-7 John MacFarlane
   License     : GNU GPL, version 2 or above

@ -26,119 +26,113 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   Stability   : alpha
   Portability : portable

-Functions for producing an ODT file from OpenDocument XML.
+Functions for producing a PDF file from LaTeX.
 -}
-module Text.Pandoc.ODT ( saveOpenDocumentAsODT ) where
-import Text.Pandoc.TH ( binaryContentsOf )
-import Data.Maybe ( fromJust )
-import Data.List ( partition, intersperse )
+module Text.Pandoc.PDF ( saveLaTeXAsPDF ) where
+import Data.List ( isInfixOf )
 import System.Directory
-import System.FilePath ( (</>), takeDirectory, takeFileName, splitDirectories )
+import System.FilePath ( (</>), (<.>), takeBaseName )
 import System.Process ( runProcess, waitForProcess )
 import System.Exit
-import Text.XML.Light
-import Text.XML.Light.Cursor
+import System.Environment ( getEnvironment )
 import Text.Pandoc.Shared ( withTempDir )
-import Network.URI ( isURI )
-import qualified Data.ByteString as B ( writeFile, pack )
-import Data.ByteString.Internal ( c2w )
-import Prelude hiding ( writeFile, readFile )
-import System.IO ( stderr )
+import Prelude hiding ( writeFile, readFile, putStrLn )
+import System.IO ( stderr, openFile, IOMode (..) )
 #ifdef _UTF8STRING
 import System.IO.UTF8
 #else
 import Text.Pandoc.UTF8
 #endif

-- | Produce an ODT file from OpenDocument XML.
-saveOpenDocumentAsODT :: FilePath    -- ^ Pathname of ODT file to be produced.
-                      -> FilePath    -- ^ Relative directory of source file.
-                      -> String      -- ^ OpenDocument XML contents.
-                      -> IO ()
-saveOpenDocumentAsODT destinationODTPath sourceDirRelative xml = do
-  let zipCmd = "zip"
-  -- check for zip in path:
-  zipPathMaybe <- findExecutable zipCmd
-  let zipPath = case zipPathMaybe of
-                  Nothing -> error $ "The '" ++ zipCmd ++
-                                     "' command, which is needed to build an ODT file, was not found.\n" ++
-                                     "It can be obtained from http://www.info-zip.org/Zip.html\n" ++
-                                     "Debian (and Debian-based) linux: apt-get install zip\n" ++
-                                     "Windows: See http://gnuwin32.sourceforge.net/packages/zip.htm"
+-- | Produce an PDF file from LaTeX.
+saveLaTeXAsPDF :: FilePath    -- ^ Pathname of PDF file to be produced.
+                -> FilePath    -- ^ Relative directory of source file.
+                -> String      -- ^ LaTeX document.
+                -> IO ()
+saveLaTeXAsPDF destinationPDFPath sourceDirRelative latex = do
+  -- check for pdflatex and bibtex in path:
+  latexPathMaybe <- findExecutable "pdflatex"
+  bibtexPathMaybe <- findExecutable "bibtex"
+  let latexPath = case latexPathMaybe of
+                  Nothing -> error $ "The 'pdflatex' command, which is needed to build an PDF file, was not found."
                  Just x  -> x
-  withTempDir "pandoc-odt" $ \tempDir -> do
-    let tempODT = tempDir </> "reference.odt"
-    B.writeFile tempODT $ B.pack $ map c2w $(binaryContentsOf $ "odt-styles" </> "reference.odt")
-    xml' <- handlePictures tempODT sourceDirRelative xml
-    writeFile (tempDir </> "content.xml") xml'
-    ph <- runProcess zipPath ["-9", "-q", "-r", tempODT, "content.xml", "Pictures"]
-            (Just tempDir) Nothing Nothing Nothing (Just stderr)
-    ec <- waitForProcess ph   -- requires compilation with -threaded
-    case ec of
-         ExitSuccess -> copyFile tempODT destinationODTPath
-         _           -> error "Error creating ODT." >> exitWith ec
+  let bibtexPath = case bibtexPathMaybe of
+                  Nothing -> error $ "The 'bibtex' command, which is needed to build an PDF file, was not found."
+                  Just x  -> x
+  sourceDirAbsolute <- getCurrentDirectory >>= return . (</> sourceDirRelative) >>= canonicalizePath
+  withTempDir "pandoc-pdf" $ \tempDir -> do
+    env <- getEnvironment
+    let env' = ("TEXINPUTS", ".:" ++ sourceDirAbsolute ++ ":") : env
+    let baseName = "input"
+    writeFile (tempDir </> baseName <.> "tex") latex
+    let runLatex = runProgram latexPath ["-interaction=nonstopmode", baseName] tempDir env'
+    let runBibtex = runProgram bibtexPath [baseName] tempDir env'
+    messages1 <- runLatex
+    let logPath = tempDir </> baseName <.> "log"
+    tocExists <- doesFileExist (tempDir </> baseName <.> "toc")
+    logContents <- readFile logPath
+    let undefinedRefs = "There were undefined references" `isInfixOf` logContents
+    let needsBibtex = "itation" `isInfixOf` logContents
+    if needsBibtex
+       then runBibtex >>= hPutStr stderr . unlines
+       else return ()
+    if tocExists || undefinedRefs
+       then do
+         messages2 <- runLatex
+         logContents' <- readFile logPath
+         let stillUndefinedRefs = "There were undefined references" `isInfixOf` logContents'
+         if stillUndefinedRefs
+            then runLatex >>= hPutStr stderr . unlines
+            else hPutStr stderr $ unlines messages2
+       else
+         hPutStr stderr $ unlines messages1
+    let pdfPath = tempDir </> baseName <.> "pdf"
+    pdfExists <- doesFileExist pdfPath
+    if pdfExists
+       then copyFile pdfPath destinationPDFPath
+       else error "The PDF could not be created."

-- | Find <draw:image ... /> elements and copy the file (xlink:href attribute) into Pictures/ in
-- the zip file.  If filename is a URL, attempt to download it.  Modify xlink:href attributes
-- to point to the new locations in Pictures/.  Return modified XML.
-handlePictures :: FilePath     -- ^ Path of ODT file in temp directory
-               -> FilePath     -- ^ Directory (relative) containing source file
-               -> String       -- ^ OpenDocument XML string
-               -> IO String    -- ^ Modified XML
-handlePictures tempODT sourceDirRelative xml = do
-  let parsed = case parseXMLDoc xml of
-                Nothing -> error "Could not parse OpenDocument XML."
-                Just x  -> x
-  let cursor = case (fromForest $ elContent parsed) of
-                   Nothing -> error "ODT appears empty"
-                   Just x  -> x
-  cursor' <- scanPictures tempODT sourceDirRelative cursor
-  let modified = parsed { elContent = toForest $ root cursor' }
-  return $ showTopElement modified
-
-scanPictures :: FilePath -> FilePath -> Cursor -> IO Cursor
-scanPictures tempODT sourceDirRelative cursor = do
-  cursor' <- handleTree tempODT sourceDirRelative cursor
-  case right cursor' of
-       Just n  -> scanPictures tempODT sourceDirRelative n
-       Nothing -> return cursor'
-
-handleTree :: FilePath -> FilePath -> Cursor -> IO Cursor
-handleTree tempODT sourceDirRelative cursor = do
-  case firstChild cursor of
-       Nothing -> modifyContentM (handleContent tempODT sourceDirRelative) cursor
-       Just n  -> scanPictures tempODT sourceDirRelative n >>= return . fromJust . parent
-
-- | If content is an image link, handle it appropriately.
-- Otherwise, handle children if any.
-handleContent :: FilePath -> FilePath -> Content -> IO Content
-handleContent tempODT sourceDirRelative content@(Elem el) = do
-  if qName (elName el) == "image"
-     then do
-        let (hrefs, rest) = partition (\a -> qName (attrKey a) == "href") $ elAttribs el
-        let href = case hrefs of
-                          []     -> error $ "No href found in " ++ show el
-                          [x]    -> x
-                          _      -> error $ "Multiple hrefs found in " ++ show el
-        if isURI $ attrVal href
-           then return content
-           else do  -- treat as filename
-             let oldLoc = sourceDirRelative </> attrVal href
-             fileExists <- doesFileExist oldLoc
-             if fileExists
-                then do
-                   let pref = take 230 $ concat $ intersperse "_" $
-                                splitDirectories $ takeDirectory $ attrVal href
-                   let newLoc = "Pictures" </> pref ++ "_" ++ (takeFileName $ attrVal href)
-                   let tempDir = takeDirectory tempODT
-                   createDirectoryIfMissing False $ tempDir </> takeDirectory newLoc
-                   copyFile oldLoc $ tempDir </> newLoc
-                   let newAttrs = (href { attrVal = newLoc }) : rest
-                   return $ Elem (el { elAttribs = newAttrs })
-                else do
-                   hPutStrLn stderr $ "Warning:  Unable to find image at " ++ oldLoc ++ " - ignoring."
-                   return content
-     else return content
- 
-handleContent _ _ c = return c  -- not Element
+runProgram :: FilePath             -- ^ pathname of executable
+           -> [String]             -- ^ arguments
+           -> FilePath             -- ^ working directory
+           -> [(String, String)]   -- ^ environment
+           -> IO [String]
+runProgram cmdPath arguments workingDir env = do
+   let runOutputPath = workingDir </> "output" <.> "tmp"
+   runOutput <- openFile runOutputPath WriteMode
+   ph <- runProcess cmdPath arguments (Just workingDir) (Just env) Nothing (Just runOutput) (Just runOutput)
+   ec <- waitForProcess ph   -- requires compilation with -threaded
+   case ec of
+         ExitSuccess -> return []
+         _           -> do
+                          output <- readFile runOutputPath
+                          if (takeBaseName cmdPath) == "bibtex"
+                             then return $! lines output
+                             else do
+                               return $!
+                                   (if "`ucs.sty' not found" `isInfixOf` output
+                                       then ["Please install the 'unicode' package from CTAN:",
+                                             "  http://www.ctan.org/tex-archive/macros/latex/contrib/unicode/"]
+                                       else []) ++
+                                   (if "`ulem.sty' not found" `isInfixOf` output
+                                       then ["Please install the 'ulem' package from CTAN:",
+                                             "  http://www.ctan.org/tex-archive/macros/latex/contrib/misc/"]
+                                       else []) ++
+                                   (if "`graphicx.sty' not found" `isInfixOf` output
+                                       then ["Please install the 'graphicx' package from CTAN:",
+                                             "  http://www.ctan.org/tex-archive/macros/latex/required/graphics/"]
+                                       else []) ++
+                                   (if "`fancyhdr.sty' not found" `isInfixOf` output
+                                       then ["Please install the 'fancyhdr' package from CTAN:",
+                                             "  http://www.ctan.org/tex-archive/macros/latex/contrib/fancyhdr/"]
+                                       else []) ++
+                                   (if "`array.sty' not found" `isInfixOf` output
+                                       then ["Please install the 'array' package from CTAN:",
+                                             "  http://www.ctan.org/tex-archive/macros/latex/required/tools/"]
+                                       else []) ++
+                                   (filter isUseful $ lines output)
+                                     where isUseful ln = take 1 ln == "!"  ||
+                                                         take 2 ln == "l." ||
+                                                         "Error"   `isInfixOf` ln ||
+                                                         "error"   `isInfixOf` ln

--- a/pandoc.cabal
+++ b/pandoc.cabal
@ -165,6 +165,7 @@ Library
                   Text.Pandoc.CharacterReferences,
                   Text.Pandoc.Shared,
                   Text.Pandoc.ODT,
+                   Text.Pandoc.PDF,
                   Text.Pandoc.ASCIIMathML,
                   Text.Pandoc.DefaultHeaders,
                   Text.Pandoc.Highlighting,