From 0bbea0cc7650af61870b310ebbf6fb8a9fec09a7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 5 Feb 2017 21:58:45 +0100 Subject: [PATCH] Split pandoc.hs into a module, Text.Pandoc.App, and a small program. The App module provides a function that does a pandoc conversion, based on option settings. The program (pandoc.hs) now does nothing more than parse options and pass them to this function, which can easily be used by other applications (e.g. a GUI wrapper). The Opt structure has been further simplified. API changes: * New exposed module Text.Pandoc.App * Text.Pandoc.Highlighting has been exposed. * highlightingStyles has been moved to Text.Pandoc.Highlighting. --- pandoc.cabal | 11 +- pandoc.hs | 850 ++++---------------------------- src/Text/Pandoc/App.hs | 720 +++++++++++++++++++++++++++ src/Text/Pandoc/Highlighting.hs | 14 +- 4 files changed, 822 insertions(+), 773 deletions(-) create mode 100644 src/Text/Pandoc/App.hs diff --git a/pandoc.cabal b/pandoc.cabal index 9ed1eb47d..bb95c5453 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -297,6 +297,10 @@ Library cmark >= 0.5 && < 0.6, doctemplates >= 0.1 && < 0.2, free >= 4 + if os(windows) + Cpp-options: -D_WINDOWS + else + Build-Depends: unix >= 2.4 && < 2.8 if flag(old-locale) Build-Depends: old-locale >= 1 && < 1.1, time >= 1.2 && < 1.5 @@ -330,6 +334,7 @@ Library Other-Modules: Prelude Exposed-Modules: Text.Pandoc, + Text.Pandoc.App, Text.Pandoc.Options, Text.Pandoc.Extensions, Text.Pandoc.Pretty, @@ -386,6 +391,7 @@ Library Text.Pandoc.Templates, Text.Pandoc.XML, Text.Pandoc.SelfContained, + Text.Pandoc.Highlighting, Text.Pandoc.Process, Text.Pandoc.MIME, Text.Pandoc.Class @@ -421,7 +427,6 @@ Library Text.Pandoc.UUID, Text.Pandoc.ImageSize, Text.Pandoc.Slides, - Text.Pandoc.Highlighting, Text.Pandoc.Compat.Time, Paths_pandoc @@ -448,10 +453,6 @@ Executable pandoc Build-Depends: network >= 2 && < 2.6 Ghc-Options: -rtsopts -with-rtsopts=-K16m -Wall -fno-warn-unused-do-bind Ghc-Prof-Options: -fprof-auto-exported -rtsopts -with-rtsopts=-K16m - if os(windows) - Cpp-options: -D_WINDOWS - else - Build-Depends: unix >= 2.4 && < 2.8 Default-Language: Haskell98 Other-Extensions: PatternGuards, OverloadedStrings, diff --git a/pandoc.hs b/pandoc.hs index a7407a279..bc2379289 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -30,51 +30,31 @@ Parses command-line options and calls the appropriate readers and writers. -} module Main where +import Text.Pandoc.App (defaultOpts, convertWithOpts, Opt(..)) import Text.Pandoc -import Text.Pandoc.Builder (setMeta) -import Text.Pandoc.PDF (makePDF) -import Text.Pandoc.Walk (walk) -import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, readDataFile, - safeRead, headerShift, err, openURL ) -import Text.Pandoc.MediaBag ( mediaDirectory, extractMediaBag, MediaBag ) -import Text.Pandoc.XML ( toEntities ) -import Text.Pandoc.SelfContained ( makeSelfContained ) -import Text.Pandoc.Process (pipeProcess) -import Skylighting ( defaultSyntaxMap, Syntax(..), Style, tango, pygments, - espresso, zenburn, kate, haddock, breezeDark, monochrome ) -import System.Environment ( getArgs, getProgName, getEnvironment ) -import System.Exit ( ExitCode (..), exitSuccess ) -import System.FilePath +import Text.Pandoc.Shared +import Text.Pandoc.Class (PandocIO) +import Text.Pandoc.Highlighting (highlightingStyles) +import Skylighting (Syntax(..), defaultSyntaxMap) +import Data.List (sort, intercalate) +import Data.Char (toUpper) import System.Console.GetOpt -import Data.Char ( toLower, toUpper ) -import Data.List ( intercalate, isPrefixOf, isSuffixOf, sort ) -import System.Directory ( getAppUserDataDirectory, findExecutable, - doesFileExist, Permissions(..), getPermissions ) -import System.IO ( stdout, stderr ) -import System.IO.Error ( isDoesNotExistError ) -import qualified Control.Exception as E -import Control.Exception.Extensible ( throwIO ) +import Control.Monad import qualified Text.Pandoc.UTF8 as UTF8 -import Control.Monad (when, unless, (>=>)) -import Data.Maybe (fromMaybe, isNothing, isJust) -import Data.Foldable (foldrM) -import Network.URI (parseURI, isURI, URI(..)) -import qualified Data.ByteString.Lazy as B -import qualified Data.ByteString as BS -import Data.Aeson (eitherDecode', encode) -import qualified Data.Map as M -import Data.Yaml (decode) -import qualified Data.Yaml as Yaml +import System.Environment +import System.FilePath (takeBaseName) +import Data.Maybe (fromMaybe) +import Text.Pandoc.Shared (err) import qualified Data.Text as T +import qualified Control.Exception as E +import qualified Data.ByteString as BS +import qualified Data.Map as M +import System.Exit +import System.IO (stdout) import Control.Applicative ((<|>)) +import System.Directory (getAppUserDataDirectory) import Paths_pandoc (getDataDir) import Text.Printf (printf) -#ifndef _WINDOWS -import System.Posix.Terminal (queryTerminal) -import System.Posix.IO (stdOutput) -#endif -import Control.Monad.Trans -import Text.Pandoc.Class (withMediaBag, PandocIO, getLog, setVerbosity) main :: IO () main = do @@ -95,549 +75,6 @@ main = do opts <- foldl (>>=) (return defaultOpts) actions convertWithOpts opts args -addDeprecationNote :: String -> [String] -> [String] -addDeprecationNote "--smart" = - (("--smart has been removed. Use +smart or -smart extension instead.\n" ++ - "For example: pandoc -f markdown+smart -t markdown-smart.") :) -addDeprecationNote "-S" = addDeprecationNote "--smart" -addDeprecationNote "--old-dashes" = - ("--old-dashes has been removed. Use +old_dashes extension instead." :) -addDeprecationNote "--no-wrap" = - ("--no-wrap has been removed. Use --wrap=none instead." :) -addDeprecationNote "--chapters" = - ("--chapters has been removed. Use --top-level-division=chapter instead." :) -addDeprecationNote "--reference-docx" = - ("--reference-docx has been removed. Use --reference-doc instead." :) -addDeprecationNote "--reference-odt" = - ("--reference-odt has been removed. Use --reference-doc instead." :) -addDeprecationNote x = - (("Unknown option " ++ x ++ ".") :) - -convertWithOpts :: Opt -> [FilePath] -> IO () -convertWithOpts opts args = do - let outputFile = optOutputFile opts - let filters = optFilters opts - let verbosity = optVerbosity opts - - when (optDumpArgs opts) $ - do UTF8.hPutStrLn stdout outputFile - mapM_ (UTF8.hPutStrLn stdout) args - exitSuccess - - epubStylesheet <- case optEpubStylesheet opts of - Nothing -> return Nothing - Just fp -> Just <$> UTF8.readFile fp - - epubMetadata <- case optEpubMetadata opts of - Nothing -> return Nothing - Just fp -> Just <$> UTF8.readFile fp - - let csscdn = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.6.0/katex.min.css" - let mathMethod = - case (optKaTeXJS opts, optKaTeXStylesheet opts) of - (Nothing, _) -> optHTMLMathMethod opts - (Just js, ss) -> KaTeX js (fromMaybe csscdn ss) - - - -- --bibliography implies -F pandoc-citeproc for backwards compatibility: - let needsCiteproc = isJust (M.lookup "bibliography" (optMetadata opts)) && - optCiteMethod opts `notElem` [Natbib, Biblatex] && - "pandoc-citeproc" `notElem` map takeBaseName filters - let filters' = if needsCiteproc then "pandoc-citeproc" : filters - else filters - - let sources = case args of - [] -> ["-"] - xs | optIgnoreArgs opts -> ["-"] - | otherwise -> xs - - datadir <- case optDataDir opts of - Nothing -> E.catch - (Just <$> getAppUserDataDirectory "pandoc") - (\e -> let _ = (e :: E.SomeException) - in return Nothing) - Just _ -> return $ optDataDir opts - - -- assign reader and writer based on options and filenames - let readerName = case optReader opts of - Nothing -> defaultReaderName - (if any isURI sources - then "html" - else "markdown") sources - Just x -> map toLower x - - let writerName = case optWriter opts of - Nothing -> defaultWriterName outputFile - Just x -> map toLower x - let format = takeWhile (`notElem` ['+','-']) - $ takeFileName writerName -- in case path to lua script - - let pdfOutput = map toLower (takeExtension outputFile) == ".pdf" - - let laTeXOutput = format `elem` ["latex", "beamer"] - let conTeXtOutput = format == "context" - let html5Output = format == "html5" || format == "html" - - -- disabling the custom writer for now - writer <- if ".lua" `isSuffixOf` format - -- note: use non-lowercased version writerName - then error "custom writers disabled for now" - else case getWriter writerName of - Left e -> err 9 $ - if format == "pdf" - then e ++ - "\nTo create a pdf with pandoc, use " ++ - "the latex or beamer writer and specify\n" ++ - "an output file with .pdf extension " ++ - "(pandoc -t latex -o filename.pdf)." - else e - Right w -> return (w :: Writer PandocIO) - - -- TODO: we have to get the input and the output into the state for - -- the sake of the text2tags reader. - reader <- case getReader readerName of - Right r -> return (r :: Reader PandocIO) - Left e -> err 7 e' - where e' = case readerName of - "pdf" -> e ++ - "\nPandoc can convert to PDF, but not from PDF." - "doc" -> e ++ - "\nPandoc can convert from DOCX, but not from DOC.\nTry using Word to save your DOC file as DOCX, and convert that with pandoc." - _ -> e - - let standalone = optStandalone opts || not (isTextFormat format) || pdfOutput - - templ <- case optTemplate opts of - _ | not standalone -> return Nothing - Nothing -> do - deftemp <- getDefaultTemplate datadir format - case deftemp of - Left e -> throwIO e - Right t -> return (Just t) - Just tp -> do - -- strip off extensions - let tp' = case takeExtension tp of - "" -> tp <.> format - _ -> tp - Just <$> E.catch (UTF8.readFile tp') - (\e -> if isDoesNotExistError e - then E.catch - (readDataFileUTF8 datadir - ("templates" tp')) - (\e' -> let _ = (e' :: E.SomeException) - in throwIO e') - else throwIO e) - - let addStringAsVariable varname s vars = return $ (varname, s) : vars - - let addContentsAsVariable varname fp vars = do - s <- UTF8.readFile fp - return $ (varname, s) : vars - - -- note: this reverses the list constructed in option parsing, - -- which in turn was reversed from the command-line order, - -- so we end up with the correct order in the variable list: - let withList _ [] vars = return vars - withList f (x:xs) vars = f x vars >>= withList f xs - - variables <- return (optVariables opts) - >>= - withList (addContentsAsVariable "include-before") - (optIncludeBeforeBody opts) - >>= - withList (addContentsAsVariable "include-after") - (optIncludeAfterBody opts) - >>= - withList (addContentsAsVariable "header-includes") - (optIncludeInHeader opts) - >>= - withList (addStringAsVariable "css") (optCss opts) - >>= - maybe return (addStringAsVariable "title-prefix") (optTitlePrefix opts) - >>= - maybe return (addStringAsVariable "epub-cover-image") - (optEpubCoverImage opts) - >>= - (\vars -> case mathMethod of - LaTeXMathML Nothing -> do - s <- readDataFileUTF8 datadir "LaTeXMathML.js" - return $ ("mathml-script", s) : vars - _ -> return vars) - >>= - (\vars -> if format == "dzslides" - then do - dztempl <- readDataFileUTF8 datadir - ("dzslides" "template.html") - let dzline = "