diff --git a/Main.hs b/Main.hs index 0e0b4ba26..26057ff58 100644 --- a/Main.hs +++ b/Main.hs @@ -32,22 +32,24 @@ writers. module Main where import Text.Pandoc import Text.Pandoc.ODT -import Text.Pandoc.Shared ( HTMLMathMethod (..) ) +import Text.Pandoc.Shared ( HTMLMathMethod (..), splitBy ) import Text.Pandoc.Highlighting ( languages ) import System.Environment ( getArgs, getProgName, getEnvironment ) import System.Exit ( exitWith, ExitCode (..) ) -import System.FilePath ( takeExtension, takeDirectory ) +import System.FilePath import System.Console.GetOpt import Data.Maybe ( fromMaybe ) import Data.Char ( toLower ) import Data.List ( intercalate, isSuffixOf ) -import Prelude hiding ( putStrLn, writeFile, readFile, getContents ) +import Prelude hiding ( putStr, putStrLn, writeFile, readFile, getContents ) import System.IO ( stdout, stderr ) import System.IO.UTF8 #ifdef _CITEPROC import Text.CSL import Text.Pandoc.Biblio #endif +import Text.Pandoc.Plugins (getPlugin) +import Control.Monad (foldM) copyrightMessage :: String copyrightMessage = "\nCopyright (C) 2006-8 John MacFarlane\n" ++ @@ -153,6 +155,7 @@ data Opt = Opt , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst , optWrapText :: Bool -- ^ Wrap text , optSanitizeHTML :: Bool -- ^ Sanitize HTML + , optPlugins :: [Pandoc -> IO Pandoc] -- ^ Plugins to apply #ifdef _CITEPROC , optBiblioFile :: String , optBiblioFormat :: String @@ -187,6 +190,7 @@ defaultOpts = Opt , optReferenceLinks = False , optWrapText = True , optSanitizeHTML = False + , optPlugins = [] #ifdef _CITEPROC , optBiblioFile = [] , optBiblioFormat = [] @@ -348,6 +352,15 @@ options = "FILENAME") "" -- "File to use for custom header (implies -s)" + , Option "P" ["plugins"] + (ReqArg + (\arg opt -> do + let pluginModules = splitBy ',' arg + plugins <- mapM getPlugin pluginModules + return opt { optPlugins = plugins }) + "MODULE[,MODULE...]") + "" -- "Haskell modules" + , Option "T" ["title-prefix"] (ReqArg (\arg opt -> return opt { optTitlePrefix = arg, @@ -519,6 +532,7 @@ main = do , optReferenceLinks = referenceLinks , optWrapText = wrap , optSanitizeHTML = sanitize + , optPlugins = plugins #ifdef _CITEPROC , optBiblioFile = biblioFile , optBiblioFormat = biblioFormat @@ -630,21 +644,26 @@ main = do then "" else takeDirectory (head sources) - let writeOutput = case writerName' of - "odt" -> saveOpenDocumentAsODT outputFile sourceDirRelative - _ -> if outputFile == "-" - then putStrLn - else writeFile outputFile . (++ "\n") + let readSources [] = mapM readSource ["-"] + readSources srcs = mapM readSource srcs + readSource "-" = getContents + readSource src = readFile src - fmap (reader startParserState . tabFilter tabStop . intercalate "\n") - (readSources sources) >>= + doc <- fmap (reader startParserState . tabFilter tabStop . intercalate "\n") (readSources sources) + + doc' <- do #ifdef _CITEPROC - processBiblio cslFile refs >>= + processBiblio cslFile refs doc +#else + return doc #endif - writeOutput . writer writerOptions - where - readSources [] = mapM readSource ["-"] - readSources sources = mapM readSource sources - readSource "-" = getContents - readSource source = readFile source + doc'' <- foldM (flip ($)) doc' plugins + + let writerOutput = writer writerOptions doc'' ++ "\n" + + case writerName' of + "odt" -> saveOpenDocumentAsODT outputFile sourceDirRelative writerOutput + _ -> if outputFile == "-" + then putStr writerOutput + else writeFile outputFile writerOutput diff --git a/README b/README index 86c52ac2b..4746ffae0 100644 --- a/README +++ b/README @@ -309,6 +309,14 @@ For further documentation, see the `pandoc(1)` man page. repeatedly to include multiple files. They will be included in the order specified. +`-P` *MODULE[,MODULE...]*, `--plugins`*=MODULE[,MODULE...]* +: specifies plugins to load, by module name or source file pathname. + Plugins should export a function `transform` of type `a -> a` + or `a -> IO a`, where `a` is `Inline`, `Block`, `Pandoc`, + `[Inline]`, or `[Block]`. This function will be used to transform + the pandoc document after it is parsed by the reader and before it + is written out by the writer. (See below on [Plugins](#plugins).) + `-T` or `--title-prefix` *string* : includes *string* as a prefix at the beginning of the title that appears in the HTML header (but not in the title as it appears at @@ -1108,3 +1116,165 @@ ordinary HTML (without bird tracks). writes HTML with the Haskell code in bird tracks, so it can be copied and pasted as literate Haskell source. +Plugins +======= + +Pandoc's plugin system allows users to modify pandoc's behavior by writing +short Haskell programs. A plugin is a Haskell module that exports a function +`transform`, of type `a -> a` or `a -> IO a`, where `a` is `Pandoc`, +`Block`, `Inline`, `[Block]`, or `[Inline]`. The `transform` function will +be used to transform the pandoc document generated by the reader, before +it is transformed by the writer. + +An example will help make this clearer. Suppose we want to use pandoc with +the WordPress blog engine. WordPress provides support for LaTeX math, but +instead of `$e = mc^2$`, WordPress wants `$LaTeX e = mc^2$`. Prior to plugins, +there was no good way to make pandoc do this. We could have tried using +regex replacements on the markdown input or HTML output, but this would have +been error-prone: we'd have to make sure we weren't capturing non-math text +between dollar signs (for example, text inside a code block). Besides, +pandoc's markdown reader has already identified the math bits; why not +make use of that? By writing a plugin, we can: + +~~~ {.haskell} +-- WordPressPlugin.hs +module WordPressPlugin (transform) where +import Text.Pandoc + +transform :: Inline -> Inline +transform (Math x y) = Math x $ "LaTeX " ++ y +transform x = x +~~~ + +This is a Haskell program, but a very short one. The lines + +~~~ {.haskell} +module WordPressPlugin (transform) where +import Text.Pandoc +~~~ + +just define the name of the module (`WordPressPlugin`), the names of any +exported functions (for a plugin, this will always just be `transform`), +and the modules that will be used in the program itself (`Text.Pandoc`). +The real meat of the program is the three-line definition of `transform`: + +~~~ {.haskell} +transform :: Inline -> Inline +transform (Math x y) = Math x $ "LaTeX " ++ y +transform x = x +~~~ + +The first line defines the type of the function: it is a function that +takes an `Inline` element and returns an `Inline` element. (For the definition +of `Inline`, see the module `Text.Pandoc.Definition`.) The next line says +that when the input matches the pattern `Math x y`, the string `LaTeX ` +should be inserted at the beginning of `y`. (`x` just specifies whether the +math element is inline or display math, so we leave it alone.) The last +line says, in effect, that the `transform` function has no effect on any +other kind of `Inline` element -- it just passes it through. When the plugin +is applied, this transformation will be used on every `Inline` element in +the document, and `LaTeX ` will be inserted where needed in math elements. + +To use this plugin, we just specify the module (or alternatively the filename) +with the `--plugins` option: + + % echo "Hello, $e=mc^2$." | pandoc -m --plugins=WordPressPlugin.hs + <p + >Hello, <span class="LaTeX" + >$LaTeX e=mc^2$</span + >.</p + > + +Let's look at a more complex example, involving IO. Suppose we want to include +some graphviz diagrams in our document. Of course, we could use a Makefile to +generate the diagrams, then use regular images in our document. But wouldn't it +be nicer just to include the graphviz code in the document itself, perhaps in +a specially marked delimited code block? + + ~~~ {.dot name="diagram1"} + digraph G {Hello->World} + ~~~ + +This can be accomplished by a plugin: + +~~~ {.haskell} +-- DotPlugin.hs +module DotPlugin (transform) where +import Text.Pandoc +import Text.Pandoc.Shared +import System.Process (readProcess) +import Data.Char (ord) +-- from the utf8-string package on HackageDB: +import Data.ByteString.Lazy.UTF8 (fromString) +-- from the SHA package on HackageDB: +import Data.Digest.Pure.SHA + +transform :: Block -> IO Block +transform (CodeBlock (id, classes, namevals) contents) | "dot" `elem` classes = do + let (name, outfile) = case lookup "name" namevals of + Just fn -> ([Str fn], fn ++ ".png") + Nothing -> ([], uniqueName contents ++ ".png") + result <- readProcess "dot" ["-Tpng"] contents + writeFile outfile result + return $ Para [Image name (outfile, "")] +transform x = return x + +-- | Generate a unique filename given the file's contents. +uniqueName :: String -> String +uniqueName = showDigest . sha1 . fromString +~~~ + +The heart of this plugin is the `transform` function, which converts a `Block` +to a `Block`. Again, there are two clauses, one for code blocks that are marked +with the "dot" class, one for all other blocks. Code blocks with ".dot" are +replaced with links to an image file; this file is generated by running +`dot -Tpng` on the contents of the code block. + +Because `transform` performs file reads and writes, it needs to be in the +IO monad, hence the type: `Block -> IO Block`. + +One more example. Suppose we want emphasized text to be CAPITALIZED +instead of italicized. We could use a plugin: + +~~~ {.haskell} +module CapitalizeEmphasisPlugin (transform) where +import Text.Pandoc +import Data.Char (toUpper) + +transform :: [Inline] -> [Inline] +transform (Emph xs : ys) = processIn capStr xs ++ transform ys +transform (x : ys) = x : transform ys +transform [] = [] + +capStr :: Inline -> Inline +capStr (Str x) = Str (map toUpper x) +capStr x = x +~~~ + +Here `transform` converts a whole list of `Inline` elements to another +such list. The key clause is + +~~~ {.haskell} +transform (Emph xs : ys) = processIn capStr xs ++ transform ys +~~~ + +This applies the `capStr` function recursively to all inlines in the +list of emphasized inlines and puts the transformed list in place +of the original. `capStr` is a simple `Inline` transformation that +capitalizes `Str` elements and leaves everything else alone. The +function `processIn`, defined in `Text.Pandoc.Definition`, uses some +`Data.Generics` magic to apply its argument (here `capStr`) to every +`Inline` element in a list, including elements that are deeply buried in +other elements. Thus + + processIn captStr [Str "one", Strong [Str "two", Space]] ==> + [Str "ONE", Strong [Str "TWO", Space]] + +There are other sample plugins in the `plugins` subdirectory of the +pandoc source code. + +**Note:** Do not attempt to use plugins when running pandoc in the +directory containing pandoc's source code. The interpreter will try to +load the files directly from the source code, rather than reading the compiled +versions, and pandoc will hang. + diff --git a/Text/Pandoc/Definition.hs b/Text/Pandoc/Definition.hs index 3c783b1b9..ed58f6b75 100644 --- a/Text/Pandoc/Definition.hs +++ b/Text/Pandoc/Definition.hs @@ -128,10 +128,23 @@ data Inline | Note [Block] -- ^ Footnote or endnote deriving (Show, Eq, Read, Typeable, Data) --- | Applies a transformation to matching elements in a Pandoc document. -processPandoc :: Typeable a => (a -> a) -> Pandoc -> Pandoc -processPandoc f = everywhere (mkT f) +-- | Applies a transformation on @a@s to matching elements in a @b@. +processIn :: (Data a, Data b) => (a -> a) -> b -> b +processIn f = everywhere (mkT f) + +-- | Like 'processIn', but with monadic transformations. +processInM :: (Monad m, Data a, Data b) => (a -> m a) -> b -> m b +processInM f = everywhereM (mkM f) + +-- | Runs a query on matching @a@ elements in a @c@. +queryIn :: (Data a, Data c) => (a -> [b]) -> c -> [b] +queryIn f = everything (++) ([] `mkQ` f) + +{-# DEPRECATED processPandoc "Use processIn instead" #-} +processPandoc :: Data a => (a -> a) -> Pandoc -> Pandoc +processPandoc = processIn + +{-# DEPRECATED queryPandoc "Use queryIn instead" #-} +queryPandoc :: Data a => (a -> [b]) -> Pandoc -> [b] +queryPandoc = queryIn --- | Runs a query on matching elements in a Pandoc document. -queryPandoc :: Typeable a => (a -> [b]) -> Pandoc -> [b] -queryPandoc f = everything (++) ([] `mkQ` f) diff --git a/Text/Pandoc/Plugins.hs b/Text/Pandoc/Plugins.hs new file mode 100644 index 000000000..5ba333faa --- /dev/null +++ b/Text/Pandoc/Plugins.hs @@ -0,0 +1,69 @@ +{- +Copyright (C) 2008 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Pluigns + Copyright : Copyright (C) 2008 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Support for plugins. +-} + +module Text.Pandoc.Plugins (getPlugin) +where + +import Language.Haskell.Interpreter +import Text.Pandoc +import Control.Monad (unless, liftM) +import Control.Monad.Error (throwError) +import Data.List (isInfixOf) + +-- | Returns the function named @transform@ in the specified +-- module. The module may be identified either by module name +-- or by path name. The @transform@ function should have type +-- @a -> a@ or @a -> IO a@, where @a@ is an instance of 'Data': +-- for example, @Pandoc -> Pandoc@, @Inline -> IO Inline@, +-- @Block -> Block@, or @[Inline] -> IO [Inline]@. +getPlugin :: String -> IO (Pandoc -> IO Pandoc) +getPlugin modsrc = do + res <- runInterpreter (evaluatePlugin modsrc) + case res of + Right func -> return func + Left (WontCompile xs) -> error $ "WontCompile error for plugin '" ++ modsrc ++ "'\n" ++ unlines (map errMsg xs) + Left (NotAllowed x) -> error $ "NotAllowed error for plugin '" ++ modsrc ++ "'\n" ++ x + Left (UnknownError x) -> error $ "UnknownError for plugin '" ++ modsrc ++ "'\n" ++ x + Left (GhcException x) -> error $ "GhcException for plugin '" ++ modsrc ++ "'\n" ++ x + +evaluatePlugin :: String -> Interpreter (Pandoc -> IO Pandoc) +evaluatePlugin modsrc = do + set [installedModulesInScope := False] + loadModules [modsrc] + modnames <- getLoadedModules + setTopLevelModules modnames + setImports ["Prelude", "Text.Pandoc", "Text.Pandoc.Definition"] + exports <- liftM concat $ mapM getModuleExports modnames + unless ((Fun "transform") `elem` exports) $ + throwError $ UnknownError $ "The plugin module must define a function 'transform'." + transformType <- typeOf "transform" + if "-> IO" `isInfixOf` transformType + then interpret "processInM transform" (as :: Pandoc -> IO Pandoc) + else interpret "return . (processIn transform)" (as :: Pandoc -> IO Pandoc) diff --git a/man/man1/pandoc.1.md b/man/man1/pandoc.1.md index 91c3a8537..a418e608b 100644 --- a/man/man1/pandoc.1.md +++ b/man/man1/pandoc.1.md @@ -173,6 +173,14 @@ to Pandoc. Or use `html2markdown`(1), a wrapper around `pandoc`. `context`, `docbook`, `man`, `markdown`, `opendocument`, `rst`, `rtf`). +-P *MODULE[,MODULE...]*, \--plugins=*MODULE[,MODULE...]* +: Specify plugins to load, by module name or source file pathname. + Plugins should export a function `transform` of type `a -> a` + or `a -> IO a`, where `a` is `Inline`, `Block`, `Pandoc`, + `[Inline]`, or `[Block]`. This function will be used to transform + the pandoc document after it is parsed by the reader and before it + is written out by the writer. + -T *STRING*, \--title-prefix=*STRING* : Specify *STRING* as a prefix to the HTML window title. diff --git a/pandoc.cabal b/pandoc.cabal index 20968719d..8ff5d8b87 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -70,6 +70,12 @@ Extra-Source-Files: data/odt-styles/Thumbnails/thumbnail.png, data/odt-styles/styles.xml, data/odt-styles/mimetype, + -- sample plugins + plugins/WordPressPlugin.hs, + plugins/CapitalizeEmphasisPlugin.hs, + plugins/IncludeFilePlugin.hs, + plugins/DotPlugin.hs, + plugins/ListLinksPlugin.hs, -- tests tests/bodybg.gif, tests/writer.latex, @@ -139,7 +145,7 @@ Library mtl >= 1.1, network >= 2, filepath >= 1.1, process >= 1, directory >= 1, template-haskell >= 2.2, bytestring >= 0.9, zip-archive >= 0.1.1, - utf8-string >= 0.3, old-time >= 1 + utf8-string >= 0.3, old-time >= 1, hint >= 0.3.0.0 if impl(ghc >= 6.10) Build-depends: base >= 4, syb else @@ -158,6 +164,7 @@ Library Text.Pandoc.CharacterReferences, Text.Pandoc.Shared, Text.Pandoc.ODT, + Text.Pandoc.Plugins, Text.Pandoc.LaTeXMathML, Text.Pandoc.DefaultHeaders, Text.Pandoc.Highlighting, diff --git a/plugins/CapitalizeEmphasisPlugin.hs b/plugins/CapitalizeEmphasisPlugin.hs new file mode 100644 index 000000000..31cb4403a --- /dev/null +++ b/plugins/CapitalizeEmphasisPlugin.hs @@ -0,0 +1,14 @@ +module CapitalizeEmphasisPlugin (transform) where +import Text.Pandoc +import Data.Char (toUpper) + +-- This plugin changes emphasized text into CAPITALIZED TEXT. + +transform :: [Inline] -> [Inline] +transform (Emph x : ys) = processIn capStr x ++ transform ys +transform (x : ys) = x : transform ys +transform [] = [] + +capStr :: Inline -> Inline +capStr (Str x) = Str (map toUpper x) +capStr x = x diff --git a/plugins/DotPlugin.hs b/plugins/DotPlugin.hs new file mode 100644 index 000000000..db1a02e1c --- /dev/null +++ b/plugins/DotPlugin.hs @@ -0,0 +1,30 @@ +module DotPlugin (transform) where +import Text.Pandoc +import Text.Pandoc.Shared +import System.Process (readProcess) +import Data.Char (ord) +-- from the utf8-string package on HackageDB: +import Data.ByteString.Lazy.UTF8 (fromString) +-- from the SHA package on HackageDB: +import Data.Digest.Pure.SHA + +-- This plugin allows you to include a graphviz "dot" diagram +-- in a document like this: +-- +-- ~~~ {.dot name="diagram1"} +-- digraph G {Hello->World} +-- ~~~ + +transform :: Block -> IO Block +transform (CodeBlock (id, classes, namevals) contents) | "dot" `elem` classes = do + let (name, outfile) = case lookup "name" namevals of + Just fn -> ([Str fn], fn ++ ".png") + Nothing -> ([], uniqueName contents ++ ".png") + result <- readProcess "dot" ["-Tpng"] contents + writeFile outfile result + return $ Para [Image name (outfile, "")] +transform x = return x + +-- | Generate a unique filename given the file's contents. +uniqueName :: String -> String +uniqueName = showDigest . sha1 . fromString diff --git a/plugins/IncludeFilePlugin.hs b/plugins/IncludeFilePlugin.hs new file mode 100644 index 000000000..40a8ce34d --- /dev/null +++ b/plugins/IncludeFilePlugin.hs @@ -0,0 +1,19 @@ +module IncludeFilePlugin (transform) where +import Text.Pandoc +import Text.Pandoc.Shared +import Control.Monad + +-- This plugin allows you to include the contents of an +-- external file in a delimited code block like this: +-- +-- ~~~ {include="filename"} +-- ~~~ +-- +-- Trailing newlines are trimmed. + +transform :: Block -> IO Block +transform cb@(CodeBlock (id, classes, namevals) contents) = + case lookup "include" namevals of + Just f -> return . (CodeBlock (id, classes, namevals) . stripTrailingNewlines) =<< readFile f + Nothing -> return cb +transform x = return x diff --git a/plugins/ListLinksPlugin.hs b/plugins/ListLinksPlugin.hs new file mode 100644 index 000000000..88c1553b1 --- /dev/null +++ b/plugins/ListLinksPlugin.hs @@ -0,0 +1,15 @@ +module ListLinksPlugin (transform) where +import Text.Pandoc + +-- This plugin returns an empty document and prints a list +-- of the URLs linked to in the source document. + +transform :: Pandoc -> IO Pandoc +transform p = do + let urls = queryIn findURLs p + putStrLn $ unlines urls + return $ Pandoc (Meta [] [] []) [] + +findURLs :: Inline -> [String] +findURLs (Link label (url, title)) = [url] +findURLs x = [] diff --git a/plugins/WordPressPlugin.hs b/plugins/WordPressPlugin.hs new file mode 100644 index 000000000..85b7ca72b --- /dev/null +++ b/plugins/WordPressPlugin.hs @@ -0,0 +1,10 @@ +module WordPressPlugin (transform) where +import Text.Pandoc + +-- This plugin (when used with -m) prints LaTeX math in the +-- format required by WordPress blogs. $e=mc^2$ becomes +-- $LaTeX e=mc^2$. + +transform :: Inline -> Inline +transform (Math x y) = Math x $ "LaTeX " ++ y +transform x = x