Added Text.Pandoc.PDF module, 'pdf' as new output option.
The module calls pdflatex to produce the PDF, and is basically shell scripting in haskell. But this is better than the existing markdown2pdf script, which is limited to POSIX. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1394 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
parent
f435d14f28
commit
0f4bdc6b7d
4 changed files with 176 additions and 160 deletions
106
Main.hs
106
Main.hs
|
@ -20,10 +20,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
{- |
|
{- |
|
||||||
Module : Main
|
Module : Main
|
||||||
Copyright : Copyright (C) 2006-8 John MacFarlane
|
Copyright : Copyright (C) 2006-8 John MacFarlane
|
||||||
License : GNU GPL, version 2 or above
|
License : GNU GPL, version 2 or above
|
||||||
|
|
||||||
Maintainer : John MacFarlane <jgm@berkeley@edu>
|
Maintainer : John MacFarlane <jgm@berkeley@edu>
|
||||||
Stability : alpha
|
Stability : alpha
|
||||||
Portability : portable
|
Portability : portable
|
||||||
|
|
||||||
Parses command-line options and calls the appropriate readers and
|
Parses command-line options and calls the appropriate readers and
|
||||||
|
@ -32,6 +32,7 @@ writers.
|
||||||
module Main where
|
module Main where
|
||||||
import Text.Pandoc
|
import Text.Pandoc
|
||||||
import Text.Pandoc.ODT
|
import Text.Pandoc.ODT
|
||||||
|
import Text.Pandoc.PDF
|
||||||
import Text.Pandoc.Shared ( joinWithSep, HTMLMathMethod (..) )
|
import Text.Pandoc.Shared ( joinWithSep, HTMLMathMethod (..) )
|
||||||
import Text.Pandoc.Highlighting ( languages )
|
import Text.Pandoc.Highlighting ( languages )
|
||||||
import System.Environment ( getArgs, getProgName, getEnvironment )
|
import System.Environment ( getArgs, getProgName, getEnvironment )
|
||||||
|
@ -98,7 +99,7 @@ readers = [("native" , readPandoc)
|
||||||
-- | Reader for native Pandoc format.
|
-- | Reader for native Pandoc format.
|
||||||
readPandoc :: ParserState -> String -> Pandoc
|
readPandoc :: ParserState -> String -> Pandoc
|
||||||
readPandoc _ input = read input
|
readPandoc _ input = read input
|
||||||
|
|
||||||
-- | Association list of formats and pairs of writers and default headers.
|
-- | Association list of formats and pairs of writers and default headers.
|
||||||
writers :: [ ( String, ( WriterOptions -> Pandoc -> String, String ) ) ]
|
writers :: [ ( String, ( WriterOptions -> Pandoc -> String, String ) ) ]
|
||||||
writers = [("native" , (writeDoc, ""))
|
writers = [("native" , (writeDoc, ""))
|
||||||
|
@ -108,6 +109,7 @@ writers = [("native" , (writeDoc, ""))
|
||||||
,("opendocument" , (writeOpenDocument, defaultOpenDocumentHeader))
|
,("opendocument" , (writeOpenDocument, defaultOpenDocumentHeader))
|
||||||
,("odt" , (writeOpenDocument, defaultOpenDocumentHeader))
|
,("odt" , (writeOpenDocument, defaultOpenDocumentHeader))
|
||||||
,("latex" , (writeLaTeX, defaultLaTeXHeader))
|
,("latex" , (writeLaTeX, defaultLaTeXHeader))
|
||||||
|
,("pdf" , (writeLaTeX, defaultLaTeXHeader))
|
||||||
,("context" , (writeConTeXt, defaultConTeXtHeader))
|
,("context" , (writeConTeXt, defaultConTeXtHeader))
|
||||||
,("texinfo" , (writeTexinfo, ""))
|
,("texinfo" , (writeTexinfo, ""))
|
||||||
,("man" , (writeMan, ""))
|
,("man" , (writeMan, ""))
|
||||||
|
@ -118,8 +120,7 @@ writers = [("native" , (writeDoc, ""))
|
||||||
]
|
]
|
||||||
|
|
||||||
isNonTextOutput :: String -> Bool
|
isNonTextOutput :: String -> Bool
|
||||||
isNonTextOutput "odt" = True
|
isNonTextOutput = (`elem` ["odt", "pdf"])
|
||||||
isNonTextOutput _ = False
|
|
||||||
|
|
||||||
-- | Writer for Pandoc native format.
|
-- | Writer for Pandoc native format.
|
||||||
writeDoc :: WriterOptions -> Pandoc -> String
|
writeDoc :: WriterOptions -> Pandoc -> String
|
||||||
|
@ -205,7 +206,7 @@ options =
|
||||||
(\arg opt -> return opt { optWriter = map toLower arg })
|
(\arg opt -> return opt { optWriter = map toLower arg })
|
||||||
"FORMAT")
|
"FORMAT")
|
||||||
"" -- ("(" ++ (joinWithSep ", " $ map fst writers) ++ ")")
|
"" -- ("(" ++ (joinWithSep ", " $ map fst writers) ++ ")")
|
||||||
|
|
||||||
, Option "s" ["standalone"]
|
, Option "s" ["standalone"]
|
||||||
(NoArg
|
(NoArg
|
||||||
(\opt -> return opt { optStandalone = True }))
|
(\opt -> return opt { optStandalone = True }))
|
||||||
|
@ -250,7 +251,7 @@ options =
|
||||||
|
|
||||||
, Option "m" ["asciimathml"]
|
, Option "m" ["asciimathml"]
|
||||||
(OptArg
|
(OptArg
|
||||||
(\arg opt -> return opt { optHTMLMathMethod =
|
(\arg opt -> return opt { optHTMLMathMethod =
|
||||||
ASCIIMathML arg })
|
ASCIIMathML arg })
|
||||||
"URL")
|
"URL")
|
||||||
"" -- "Use ASCIIMathML script in html output"
|
"" -- "Use ASCIIMathML script in html output"
|
||||||
|
@ -290,13 +291,13 @@ options =
|
||||||
, Option "" ["toc", "table-of-contents"]
|
, Option "" ["toc", "table-of-contents"]
|
||||||
(NoArg
|
(NoArg
|
||||||
(\opt -> return opt { optTableOfContents = True }))
|
(\opt -> return opt { optTableOfContents = True }))
|
||||||
"" -- "Include table of contents"
|
"" -- "Include table of contents"
|
||||||
|
|
||||||
, Option "c" ["css"]
|
, Option "c" ["css"]
|
||||||
(ReqArg
|
(ReqArg
|
||||||
(\arg opt -> do
|
(\arg opt -> do
|
||||||
let old = optCSS opt
|
let old = optCSS opt
|
||||||
return opt { optCSS = old ++ [arg],
|
return opt { optCSS = old ++ [arg],
|
||||||
optStandalone = True })
|
optStandalone = True })
|
||||||
"CSS")
|
"CSS")
|
||||||
"" -- "Link to CSS style sheet"
|
"" -- "Link to CSS style sheet"
|
||||||
|
@ -340,17 +341,17 @@ options =
|
||||||
|
|
||||||
, Option "T" ["title-prefix"]
|
, Option "T" ["title-prefix"]
|
||||||
(ReqArg
|
(ReqArg
|
||||||
(\arg opt -> return opt { optTitlePrefix = arg,
|
(\arg opt -> return opt { optTitlePrefix = arg,
|
||||||
optStandalone = True })
|
optStandalone = True })
|
||||||
"STRING")
|
"STRING")
|
||||||
"" -- "String to prefix to HTML window title"
|
"" -- "String to prefix to HTML window title"
|
||||||
|
|
||||||
, Option "D" ["print-default-header"]
|
, Option "D" ["print-default-header"]
|
||||||
(ReqArg
|
(ReqArg
|
||||||
(\arg _ -> do
|
(\arg _ -> do
|
||||||
let header = case (lookup arg writers) of
|
let header = case (lookup arg writers) of
|
||||||
Just (_, h) -> h
|
Just (_, h) -> h
|
||||||
Nothing -> error ("Unknown reader: " ++ arg)
|
Nothing -> error ("Unknown reader: " ++ arg)
|
||||||
hPutStr stdout header
|
hPutStr stdout header
|
||||||
exitWith ExitSuccess)
|
exitWith ExitSuccess)
|
||||||
"FORMAT")
|
"FORMAT")
|
||||||
|
@ -376,7 +377,7 @@ options =
|
||||||
(NoArg
|
(NoArg
|
||||||
(\opt -> return opt { optIgnoreArgs = True }))
|
(\opt -> return opt { optIgnoreArgs = True }))
|
||||||
"" -- "Ignore command-line arguments."
|
"" -- "Ignore command-line arguments."
|
||||||
|
|
||||||
, Option "v" ["version"]
|
, Option "v" ["version"]
|
||||||
(NoArg
|
(NoArg
|
||||||
(\_ -> do
|
(\_ -> do
|
||||||
|
@ -398,15 +399,15 @@ options =
|
||||||
-- Returns usage message
|
-- Returns usage message
|
||||||
usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String
|
usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String
|
||||||
usageMessage programName opts = usageInfo
|
usageMessage programName opts = usageInfo
|
||||||
(programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats: " ++
|
(programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats: " ++
|
||||||
(joinWithSep ", " $ map fst readers) ++ "\nOutput formats: " ++
|
(joinWithSep ", " $ map fst readers) ++ "\nOutput formats: " ++
|
||||||
(joinWithSep ", " $ map fst writers) ++ "\nOptions:")
|
(joinWithSep ", " $ map fst writers) ++ "\nOptions:")
|
||||||
opts
|
opts
|
||||||
|
|
||||||
-- Determine default reader based on source file extensions
|
-- Determine default reader based on source file extensions
|
||||||
defaultReaderName :: [FilePath] -> String
|
defaultReaderName :: [FilePath] -> String
|
||||||
defaultReaderName [] = "markdown"
|
defaultReaderName [] = "markdown"
|
||||||
defaultReaderName (x:xs) =
|
defaultReaderName (x:xs) =
|
||||||
case takeExtension (map toLower x) of
|
case takeExtension (map toLower x) of
|
||||||
".xhtml" -> "html"
|
".xhtml" -> "html"
|
||||||
".html" -> "html"
|
".html" -> "html"
|
||||||
|
@ -441,6 +442,7 @@ defaultWriterName x =
|
||||||
".texinfo" -> "texinfo"
|
".texinfo" -> "texinfo"
|
||||||
".db" -> "docbook"
|
".db" -> "docbook"
|
||||||
".odt" -> "odt"
|
".odt" -> "odt"
|
||||||
|
".pdf" -> "pdf"
|
||||||
['.',y] | y `elem` ['1'..'9'] -> "man"
|
['.',y] | y `elem` ['1'..'9'] -> "man"
|
||||||
_ -> "html"
|
_ -> "html"
|
||||||
|
|
||||||
|
@ -464,7 +466,7 @@ main = do
|
||||||
else
|
else
|
||||||
return ()
|
return ()
|
||||||
|
|
||||||
let defaultOpts' = if compatMode
|
let defaultOpts' = if compatMode
|
||||||
then defaultOpts { optReader = "markdown"
|
then defaultOpts { optReader = "markdown"
|
||||||
, optWriter = "html"
|
, optWriter = "html"
|
||||||
, optStrict = True }
|
, optStrict = True }
|
||||||
|
@ -513,11 +515,11 @@ main = do
|
||||||
let sources = if ignoreArgs then [] else args
|
let sources = if ignoreArgs then [] else args
|
||||||
|
|
||||||
-- assign reader and writer based on options and filenames
|
-- assign reader and writer based on options and filenames
|
||||||
let readerName' = if null readerName
|
let readerName' = if null readerName
|
||||||
then defaultReaderName sources
|
then defaultReaderName sources
|
||||||
else readerName
|
else readerName
|
||||||
|
|
||||||
let writerName' = if null writerName
|
let writerName' = if null writerName
|
||||||
then defaultWriterName outputFile
|
then defaultWriterName outputFile
|
||||||
else writerName
|
else writerName
|
||||||
|
|
||||||
|
@ -539,74 +541,76 @@ main = do
|
||||||
-- remove DOS line endings
|
-- remove DOS line endings
|
||||||
tabFilter _ ('\r':'\n':xs) = '\n':(tabFilter tabStop xs)
|
tabFilter _ ('\r':'\n':xs) = '\n':(tabFilter tabStop xs)
|
||||||
tabFilter _ ('\r':xs) = '\n':(tabFilter tabStop xs)
|
tabFilter _ ('\r':xs) = '\n':(tabFilter tabStop xs)
|
||||||
tabFilter spsToNextStop ('\t':xs) =
|
tabFilter spsToNextStop ('\t':xs) =
|
||||||
if preserveTabs
|
if preserveTabs
|
||||||
then '\t':(tabFilter tabStop xs)
|
then '\t':(tabFilter tabStop xs)
|
||||||
else replicate spsToNextStop ' ' ++ tabFilter tabStop xs
|
else replicate spsToNextStop ' ' ++ tabFilter tabStop xs
|
||||||
tabFilter 1 (x:xs) =
|
tabFilter 1 (x:xs) =
|
||||||
x:(tabFilter tabStop xs)
|
x:(tabFilter tabStop xs)
|
||||||
tabFilter spsToNextStop (x:xs) =
|
tabFilter spsToNextStop (x:xs) =
|
||||||
x:(tabFilter (spsToNextStop - 1) xs)
|
x:(tabFilter (spsToNextStop - 1) xs)
|
||||||
|
|
||||||
let standalone' = (standalone && not strict) || writerName' == "odt"
|
let standalone' = (standalone && not strict) || isNonTextOutput writerName'
|
||||||
|
|
||||||
#ifdef _CITEPROC
|
#ifdef _CITEPROC
|
||||||
refs <- if null modsFile then return [] else readModsColletionFile modsFile
|
refs <- if null modsFile then return [] else readModsColletionFile modsFile
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
let startParserState =
|
let startParserState =
|
||||||
defaultParserState { stateParseRaw = parseRaw,
|
defaultParserState { stateParseRaw = parseRaw,
|
||||||
stateTabStop = tabStop,
|
stateTabStop = tabStop,
|
||||||
stateSanitizeHTML = sanitize,
|
stateSanitizeHTML = sanitize,
|
||||||
stateStandalone = standalone',
|
stateStandalone = standalone',
|
||||||
#ifdef _CITEPROC
|
#ifdef _CITEPROC
|
||||||
stateCitations = map citeKey refs,
|
stateCitations = map citeKey refs,
|
||||||
#endif
|
#endif
|
||||||
stateSmart = smart || writerName' `elem`
|
stateSmart = smart || writerName' `elem`
|
||||||
["latex", "context"],
|
["latex", "context"],
|
||||||
stateColumns = columns,
|
stateColumns = columns,
|
||||||
stateStrict = strict }
|
stateStrict = strict }
|
||||||
let csslink = if null css
|
let csslink = if null css
|
||||||
then ""
|
then ""
|
||||||
else concatMap
|
else concatMap
|
||||||
(\f -> "<link rel=\"stylesheet\" href=\"" ++
|
(\f -> "<link rel=\"stylesheet\" href=\"" ++
|
||||||
f ++ "\" type=\"text/css\" media=\"all\" />\n")
|
f ++ "\" type=\"text/css\" media=\"all\" />\n")
|
||||||
css
|
css
|
||||||
let header = (if (customHeader == "DEFAULT")
|
let header = (if (customHeader == "DEFAULT")
|
||||||
then defaultHeader
|
then defaultHeader
|
||||||
else customHeader) ++ csslink ++ includeHeader
|
else customHeader) ++ csslink ++ includeHeader
|
||||||
let writerOptions = WriterOptions { writerStandalone = standalone',
|
let writerOptions = WriterOptions { writerStandalone = standalone',
|
||||||
writerHeader = header,
|
writerHeader = header,
|
||||||
writerTitlePrefix = titlePrefix,
|
writerTitlePrefix = titlePrefix,
|
||||||
writerTabStop = tabStop,
|
writerTabStop = tabStop,
|
||||||
writerTableOfContents = toc &&
|
writerTableOfContents = toc &&
|
||||||
(not strict) &&
|
(not strict) &&
|
||||||
writerName' /= "s5",
|
writerName' /= "s5",
|
||||||
writerHTMLMathMethod = mathMethod,
|
writerHTMLMathMethod = mathMethod,
|
||||||
writerS5 = (writerName' == "s5"),
|
writerS5 = (writerName' == "s5"),
|
||||||
writerIgnoreNotes = False,
|
writerIgnoreNotes = False,
|
||||||
writerIncremental = incremental,
|
writerIncremental = incremental,
|
||||||
writerNumberSections = numberSections,
|
writerNumberSections = numberSections,
|
||||||
writerIncludeBefore = includeBefore,
|
writerIncludeBefore = includeBefore,
|
||||||
writerIncludeAfter = includeAfter,
|
writerIncludeAfter = includeAfter,
|
||||||
writerStrictMarkdown = strict,
|
writerStrictMarkdown = strict,
|
||||||
writerReferenceLinks = referenceLinks,
|
writerReferenceLinks = referenceLinks,
|
||||||
writerWrapText = wrap }
|
writerWrapText = wrap }
|
||||||
|
|
||||||
let writeOutput = if writerName' == "odt"
|
if isNonTextOutput writerName' && outputFile == "-"
|
||||||
then if outputFile == "-"
|
then do hPutStrLn stderr ("Error: Cannot write " ++ writerName ++ " output to stdout.\n" ++
|
||||||
then \_ -> do
|
"Specify an output file using the -o option.")
|
||||||
hPutStrLn stderr ("Error: Cannot write " ++ writerName ++
|
exitWith $ ExitFailure 5
|
||||||
" output to stdout.\n" ++
|
else return ()
|
||||||
"Specify an output file using the -o option.")
|
|
||||||
exitWith $ ExitFailure 5
|
let sourceDirRelative = if null sources
|
||||||
else let sourceDirRelative = if null sources
|
then ""
|
||||||
then ""
|
else takeDirectory (head sources)
|
||||||
else takeDirectory (head sources)
|
|
||||||
in saveOpenDocumentAsODT outputFile sourceDirRelative
|
let writeOutput = case writerName' of
|
||||||
else if outputFile == "-"
|
"odt" -> saveOpenDocumentAsODT outputFile sourceDirRelative
|
||||||
then putStrLn
|
"pdf" -> saveLaTeXAsPDF outputFile sourceDirRelative
|
||||||
else writeFile outputFile . (++ "\n")
|
_ -> if outputFile == "-"
|
||||||
|
then putStrLn
|
||||||
|
else writeFile outputFile . (++ "\n")
|
||||||
|
|
||||||
fmap (reader startParserState . tabFilter tabStop . joinWithSep "\n")
|
fmap (reader startParserState . tabFilter tabStop . joinWithSep "\n")
|
||||||
(readSources sources) >>=
|
(readSources sources) >>=
|
||||||
|
@ -615,7 +619,7 @@ main = do
|
||||||
#endif
|
#endif
|
||||||
writeOutput . writer writerOptions
|
writeOutput . writer writerOptions
|
||||||
|
|
||||||
where
|
where
|
||||||
readSources [] = mapM readSource ["-"]
|
readSources [] = mapM readSource ["-"]
|
||||||
readSources sources = mapM readSource sources
|
readSources sources = mapM readSource sources
|
||||||
readSource "-" = getContents
|
readSource "-" = getContents
|
||||||
|
|
27
README
27
README
|
@ -82,11 +82,28 @@ To convert `hello.html` from html to markdown:
|
||||||
pandoc -f html -t markdown hello.html
|
pandoc -f html -t markdown hello.html
|
||||||
|
|
||||||
Supported output formats include `markdown`, `latex`, `context`
|
Supported output formats include `markdown`, `latex`, `context`
|
||||||
(ConTeXt), `html`, `rtf` (rich text format), `rst` (reStructuredText),
|
(ConTeXt), `pdf`, `html`, `rtf` (rich text format), `rst`
|
||||||
`docbook` (DocBook XML), `opendocument` (OpenDocument XML), `odt`
|
(reStructuredText), `docbook` (DocBook XML), `opendocument`
|
||||||
(OpenOffice text document), `texinfo`, (GNU Texinfo), `mediawiki`
|
(OpenDocument XML), `odt` (OpenOffice text document), `texinfo`, (GNU
|
||||||
(MediaWiki markup), `man` (groff man), and `s5` (which produces an
|
Texinfo), `mediawiki` (MediaWiki markup), `man` (groff man), and `s5`
|
||||||
HTML file that acts like powerpoint).
|
(which produces an HTML file that acts like powerpoint).
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
|
||||||
|
- For `odt` output, you must have `zip` in the path. If you
|
||||||
|
don't have it installed, you can get the free [Info-ZIP].
|
||||||
|
|
||||||
|
- For `pdf` output, you must have `pdflatex` and `bibtex` in the
|
||||||
|
path. You should also have the following LaTeX packages installed:
|
||||||
|
`unicode`, `fancyhdr` (if you have verbatim text in footnotes),
|
||||||
|
`graphicx` (if you use images), `array` (if you use tables), and
|
||||||
|
`ulem` (if you use strikeout text). If they are not already included
|
||||||
|
in your LaTeX distribution, you can get them from [CTAN]. A full
|
||||||
|
[TeX Live] or [MacTeX] distribution will have all of these packages.
|
||||||
|
|
||||||
|
[Info-ZIP]: http://www.info-zip.org/Zip.html
|
||||||
|
[TeX Live]: http://www.tug.org/texlive/
|
||||||
|
[MacTeX]: http://www.tug.org/mactex/
|
||||||
|
|
||||||
Supported input formats include `markdown`, `html`, `latex`, and `rst`.
|
Supported input formats include `markdown`, `html`, `latex`, and `rst`.
|
||||||
Note that the `rst` reader only parses a subset of reStructuredText
|
Note that the `rst` reader only parses a subset of reStructuredText
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
{-# LANGUAGE TemplateHaskell, CPP #-}
|
{-# LANGUAGE CPP #-}
|
||||||
{-
|
{-
|
||||||
Copyright (C) 2008 John MacFarlane <jgm@berkeley.edu>
|
Copyright (C) 2008 John MacFarlane <jgm@berkeley.edu>
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
-}
|
-}
|
||||||
|
|
||||||
{- |
|
{- |
|
||||||
Module : Text.Pandoc.ODT
|
Module : Text.Pandoc.PDF
|
||||||
Copyright : Copyright (C) 2006-7 John MacFarlane
|
Copyright : Copyright (C) 2006-7 John MacFarlane
|
||||||
License : GNU GPL, version 2 or above
|
License : GNU GPL, version 2 or above
|
||||||
|
|
||||||
|
@ -26,119 +26,113 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
Stability : alpha
|
Stability : alpha
|
||||||
Portability : portable
|
Portability : portable
|
||||||
|
|
||||||
Functions for producing an ODT file from OpenDocument XML.
|
Functions for producing a PDF file from LaTeX.
|
||||||
-}
|
-}
|
||||||
module Text.Pandoc.ODT ( saveOpenDocumentAsODT ) where
|
module Text.Pandoc.PDF ( saveLaTeXAsPDF ) where
|
||||||
import Text.Pandoc.TH ( binaryContentsOf )
|
import Data.List ( isInfixOf )
|
||||||
import Data.Maybe ( fromJust )
|
|
||||||
import Data.List ( partition, intersperse )
|
|
||||||
import System.Directory
|
import System.Directory
|
||||||
import System.FilePath ( (</>), takeDirectory, takeFileName, splitDirectories )
|
import System.FilePath ( (</>), (<.>), takeBaseName )
|
||||||
import System.Process ( runProcess, waitForProcess )
|
import System.Process ( runProcess, waitForProcess )
|
||||||
import System.Exit
|
import System.Exit
|
||||||
import Text.XML.Light
|
import System.Environment ( getEnvironment )
|
||||||
import Text.XML.Light.Cursor
|
|
||||||
import Text.Pandoc.Shared ( withTempDir )
|
import Text.Pandoc.Shared ( withTempDir )
|
||||||
import Network.URI ( isURI )
|
import Prelude hiding ( writeFile, readFile, putStrLn )
|
||||||
import qualified Data.ByteString as B ( writeFile, pack )
|
import System.IO ( stderr, openFile, IOMode (..) )
|
||||||
import Data.ByteString.Internal ( c2w )
|
|
||||||
import Prelude hiding ( writeFile, readFile )
|
|
||||||
import System.IO ( stderr )
|
|
||||||
#ifdef _UTF8STRING
|
#ifdef _UTF8STRING
|
||||||
import System.IO.UTF8
|
import System.IO.UTF8
|
||||||
#else
|
#else
|
||||||
import Text.Pandoc.UTF8
|
import Text.Pandoc.UTF8
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
-- | Produce an ODT file from OpenDocument XML.
|
-- | Produce an PDF file from LaTeX.
|
||||||
saveOpenDocumentAsODT :: FilePath -- ^ Pathname of ODT file to be produced.
|
saveLaTeXAsPDF :: FilePath -- ^ Pathname of PDF file to be produced.
|
||||||
-> FilePath -- ^ Relative directory of source file.
|
-> FilePath -- ^ Relative directory of source file.
|
||||||
-> String -- ^ OpenDocument XML contents.
|
-> String -- ^ LaTeX document.
|
||||||
-> IO ()
|
-> IO ()
|
||||||
saveOpenDocumentAsODT destinationODTPath sourceDirRelative xml = do
|
saveLaTeXAsPDF destinationPDFPath sourceDirRelative latex = do
|
||||||
let zipCmd = "zip"
|
-- check for pdflatex and bibtex in path:
|
||||||
-- check for zip in path:
|
latexPathMaybe <- findExecutable "pdflatex"
|
||||||
zipPathMaybe <- findExecutable zipCmd
|
bibtexPathMaybe <- findExecutable "bibtex"
|
||||||
let zipPath = case zipPathMaybe of
|
let latexPath = case latexPathMaybe of
|
||||||
Nothing -> error $ "The '" ++ zipCmd ++
|
Nothing -> error $ "The 'pdflatex' command, which is needed to build an PDF file, was not found."
|
||||||
"' command, which is needed to build an ODT file, was not found.\n" ++
|
|
||||||
"It can be obtained from http://www.info-zip.org/Zip.html\n" ++
|
|
||||||
"Debian (and Debian-based) linux: apt-get install zip\n" ++
|
|
||||||
"Windows: See http://gnuwin32.sourceforge.net/packages/zip.htm"
|
|
||||||
Just x -> x
|
Just x -> x
|
||||||
withTempDir "pandoc-odt" $ \tempDir -> do
|
let bibtexPath = case bibtexPathMaybe of
|
||||||
let tempODT = tempDir </> "reference.odt"
|
Nothing -> error $ "The 'bibtex' command, which is needed to build an PDF file, was not found."
|
||||||
B.writeFile tempODT $ B.pack $ map c2w $(binaryContentsOf $ "odt-styles" </> "reference.odt")
|
Just x -> x
|
||||||
xml' <- handlePictures tempODT sourceDirRelative xml
|
sourceDirAbsolute <- getCurrentDirectory >>= return . (</> sourceDirRelative) >>= canonicalizePath
|
||||||
writeFile (tempDir </> "content.xml") xml'
|
withTempDir "pandoc-pdf" $ \tempDir -> do
|
||||||
ph <- runProcess zipPath ["-9", "-q", "-r", tempODT, "content.xml", "Pictures"]
|
env <- getEnvironment
|
||||||
(Just tempDir) Nothing Nothing Nothing (Just stderr)
|
let env' = ("TEXINPUTS", ".:" ++ sourceDirAbsolute ++ ":") : env
|
||||||
ec <- waitForProcess ph -- requires compilation with -threaded
|
let baseName = "input"
|
||||||
case ec of
|
writeFile (tempDir </> baseName <.> "tex") latex
|
||||||
ExitSuccess -> copyFile tempODT destinationODTPath
|
let runLatex = runProgram latexPath ["-interaction=nonstopmode", baseName] tempDir env'
|
||||||
_ -> error "Error creating ODT." >> exitWith ec
|
let runBibtex = runProgram bibtexPath [baseName] tempDir env'
|
||||||
|
messages1 <- runLatex
|
||||||
|
let logPath = tempDir </> baseName <.> "log"
|
||||||
|
tocExists <- doesFileExist (tempDir </> baseName <.> "toc")
|
||||||
|
logContents <- readFile logPath
|
||||||
|
let undefinedRefs = "There were undefined references" `isInfixOf` logContents
|
||||||
|
let needsBibtex = "itation" `isInfixOf` logContents
|
||||||
|
if needsBibtex
|
||||||
|
then runBibtex >>= hPutStr stderr . unlines
|
||||||
|
else return ()
|
||||||
|
if tocExists || undefinedRefs
|
||||||
|
then do
|
||||||
|
messages2 <- runLatex
|
||||||
|
logContents' <- readFile logPath
|
||||||
|
let stillUndefinedRefs = "There were undefined references" `isInfixOf` logContents'
|
||||||
|
if stillUndefinedRefs
|
||||||
|
then runLatex >>= hPutStr stderr . unlines
|
||||||
|
else hPutStr stderr $ unlines messages2
|
||||||
|
else
|
||||||
|
hPutStr stderr $ unlines messages1
|
||||||
|
let pdfPath = tempDir </> baseName <.> "pdf"
|
||||||
|
pdfExists <- doesFileExist pdfPath
|
||||||
|
if pdfExists
|
||||||
|
then copyFile pdfPath destinationPDFPath
|
||||||
|
else error "The PDF could not be created."
|
||||||
|
|
||||||
-- | Find <draw:image ... /> elements and copy the file (xlink:href attribute) into Pictures/ in
|
runProgram :: FilePath -- ^ pathname of executable
|
||||||
-- the zip file. If filename is a URL, attempt to download it. Modify xlink:href attributes
|
-> [String] -- ^ arguments
|
||||||
-- to point to the new locations in Pictures/. Return modified XML.
|
-> FilePath -- ^ working directory
|
||||||
handlePictures :: FilePath -- ^ Path of ODT file in temp directory
|
-> [(String, String)] -- ^ environment
|
||||||
-> FilePath -- ^ Directory (relative) containing source file
|
-> IO [String]
|
||||||
-> String -- ^ OpenDocument XML string
|
runProgram cmdPath arguments workingDir env = do
|
||||||
-> IO String -- ^ Modified XML
|
let runOutputPath = workingDir </> "output" <.> "tmp"
|
||||||
handlePictures tempODT sourceDirRelative xml = do
|
runOutput <- openFile runOutputPath WriteMode
|
||||||
let parsed = case parseXMLDoc xml of
|
ph <- runProcess cmdPath arguments (Just workingDir) (Just env) Nothing (Just runOutput) (Just runOutput)
|
||||||
Nothing -> error "Could not parse OpenDocument XML."
|
ec <- waitForProcess ph -- requires compilation with -threaded
|
||||||
Just x -> x
|
case ec of
|
||||||
let cursor = case (fromForest $ elContent parsed) of
|
ExitSuccess -> return []
|
||||||
Nothing -> error "ODT appears empty"
|
_ -> do
|
||||||
Just x -> x
|
output <- readFile runOutputPath
|
||||||
cursor' <- scanPictures tempODT sourceDirRelative cursor
|
if (takeBaseName cmdPath) == "bibtex"
|
||||||
let modified = parsed { elContent = toForest $ root cursor' }
|
then return $! lines output
|
||||||
return $ showTopElement modified
|
else do
|
||||||
|
return $!
|
||||||
scanPictures :: FilePath -> FilePath -> Cursor -> IO Cursor
|
(if "`ucs.sty' not found" `isInfixOf` output
|
||||||
scanPictures tempODT sourceDirRelative cursor = do
|
then ["Please install the 'unicode' package from CTAN:",
|
||||||
cursor' <- handleTree tempODT sourceDirRelative cursor
|
" http://www.ctan.org/tex-archive/macros/latex/contrib/unicode/"]
|
||||||
case right cursor' of
|
else []) ++
|
||||||
Just n -> scanPictures tempODT sourceDirRelative n
|
(if "`ulem.sty' not found" `isInfixOf` output
|
||||||
Nothing -> return cursor'
|
then ["Please install the 'ulem' package from CTAN:",
|
||||||
|
" http://www.ctan.org/tex-archive/macros/latex/contrib/misc/"]
|
||||||
handleTree :: FilePath -> FilePath -> Cursor -> IO Cursor
|
else []) ++
|
||||||
handleTree tempODT sourceDirRelative cursor = do
|
(if "`graphicx.sty' not found" `isInfixOf` output
|
||||||
case firstChild cursor of
|
then ["Please install the 'graphicx' package from CTAN:",
|
||||||
Nothing -> modifyContentM (handleContent tempODT sourceDirRelative) cursor
|
" http://www.ctan.org/tex-archive/macros/latex/required/graphics/"]
|
||||||
Just n -> scanPictures tempODT sourceDirRelative n >>= return . fromJust . parent
|
else []) ++
|
||||||
|
(if "`fancyhdr.sty' not found" `isInfixOf` output
|
||||||
-- | If content is an image link, handle it appropriately.
|
then ["Please install the 'fancyhdr' package from CTAN:",
|
||||||
-- Otherwise, handle children if any.
|
" http://www.ctan.org/tex-archive/macros/latex/contrib/fancyhdr/"]
|
||||||
handleContent :: FilePath -> FilePath -> Content -> IO Content
|
else []) ++
|
||||||
handleContent tempODT sourceDirRelative content@(Elem el) = do
|
(if "`array.sty' not found" `isInfixOf` output
|
||||||
if qName (elName el) == "image"
|
then ["Please install the 'array' package from CTAN:",
|
||||||
then do
|
" http://www.ctan.org/tex-archive/macros/latex/required/tools/"]
|
||||||
let (hrefs, rest) = partition (\a -> qName (attrKey a) == "href") $ elAttribs el
|
else []) ++
|
||||||
let href = case hrefs of
|
(filter isUseful $ lines output)
|
||||||
[] -> error $ "No href found in " ++ show el
|
where isUseful ln = take 1 ln == "!" ||
|
||||||
[x] -> x
|
take 2 ln == "l." ||
|
||||||
_ -> error $ "Multiple hrefs found in " ++ show el
|
"Error" `isInfixOf` ln ||
|
||||||
if isURI $ attrVal href
|
"error" `isInfixOf` ln
|
||||||
then return content
|
|
||||||
else do -- treat as filename
|
|
||||||
let oldLoc = sourceDirRelative </> attrVal href
|
|
||||||
fileExists <- doesFileExist oldLoc
|
|
||||||
if fileExists
|
|
||||||
then do
|
|
||||||
let pref = take 230 $ concat $ intersperse "_" $
|
|
||||||
splitDirectories $ takeDirectory $ attrVal href
|
|
||||||
let newLoc = "Pictures" </> pref ++ "_" ++ (takeFileName $ attrVal href)
|
|
||||||
let tempDir = takeDirectory tempODT
|
|
||||||
createDirectoryIfMissing False $ tempDir </> takeDirectory newLoc
|
|
||||||
copyFile oldLoc $ tempDir </> newLoc
|
|
||||||
let newAttrs = (href { attrVal = newLoc }) : rest
|
|
||||||
return $ Elem (el { elAttribs = newAttrs })
|
|
||||||
else do
|
|
||||||
hPutStrLn stderr $ "Warning: Unable to find image at " ++ oldLoc ++ " - ignoring."
|
|
||||||
return content
|
|
||||||
else return content
|
|
||||||
|
|
||||||
handleContent _ _ c = return c -- not Element
|
|
||||||
|
|
||||||
|
|
|
@ -165,6 +165,7 @@ Library
|
||||||
Text.Pandoc.CharacterReferences,
|
Text.Pandoc.CharacterReferences,
|
||||||
Text.Pandoc.Shared,
|
Text.Pandoc.Shared,
|
||||||
Text.Pandoc.ODT,
|
Text.Pandoc.ODT,
|
||||||
|
Text.Pandoc.PDF,
|
||||||
Text.Pandoc.ASCIIMathML,
|
Text.Pandoc.ASCIIMathML,
|
||||||
Text.Pandoc.DefaultHeaders,
|
Text.Pandoc.DefaultHeaders,
|
||||||
Text.Pandoc.Highlighting,
|
Text.Pandoc.Highlighting,
|
||||||
|
|
Loading…
Add table
Reference in a new issue