Added a plugin system, based on hint.

+ In Text.Pandoc.Definition, added processIn, processInM,
  and queryIn, and deprecated processPandoc and queryPandoc
  for these more general functions, which are useful in writing
  plugins.

+ Added module Text.Pandoc.Plugins.

+ Added a --plugins option to Main, and code to run the parsed pandoc
  document through all the plugins.

+ Provided five sample plugin files in the plugins/ directory.

+ Documented --plugin in the pandoc man page and README.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1519 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
fiddlosopher 2009-01-24 19:58:06 +00:00
parent 243008242d
commit 874c3e0dea
11 changed files with 398 additions and 24 deletions

53
Main.hs
View file

@ -32,22 +32,24 @@ writers.
module Main where
import Text.Pandoc
import Text.Pandoc.ODT
import Text.Pandoc.Shared ( HTMLMathMethod (..) )
import Text.Pandoc.Shared ( HTMLMathMethod (..), splitBy )
import Text.Pandoc.Highlighting ( languages )
import System.Environment ( getArgs, getProgName, getEnvironment )
import System.Exit ( exitWith, ExitCode (..) )
import System.FilePath ( takeExtension, takeDirectory )
import System.FilePath
import System.Console.GetOpt
import Data.Maybe ( fromMaybe )
import Data.Char ( toLower )
import Data.List ( intercalate, isSuffixOf )
import Prelude hiding ( putStrLn, writeFile, readFile, getContents )
import Prelude hiding ( putStr, putStrLn, writeFile, readFile, getContents )
import System.IO ( stdout, stderr )
import System.IO.UTF8
#ifdef _CITEPROC
import Text.CSL
import Text.Pandoc.Biblio
#endif
import Text.Pandoc.Plugins (getPlugin)
import Control.Monad (foldM)
copyrightMessage :: String
copyrightMessage = "\nCopyright (C) 2006-8 John MacFarlane\n" ++
@ -153,6 +155,7 @@ data Opt = Opt
, optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
, optWrapText :: Bool -- ^ Wrap text
, optSanitizeHTML :: Bool -- ^ Sanitize HTML
, optPlugins :: [Pandoc -> IO Pandoc] -- ^ Plugins to apply
#ifdef _CITEPROC
, optBiblioFile :: String
, optBiblioFormat :: String
@ -187,6 +190,7 @@ defaultOpts = Opt
, optReferenceLinks = False
, optWrapText = True
, optSanitizeHTML = False
, optPlugins = []
#ifdef _CITEPROC
, optBiblioFile = []
, optBiblioFormat = []
@ -348,6 +352,15 @@ options =
"FILENAME")
"" -- "File to use for custom header (implies -s)"
, Option "P" ["plugins"]
(ReqArg
(\arg opt -> do
let pluginModules = splitBy ',' arg
plugins <- mapM getPlugin pluginModules
return opt { optPlugins = plugins })
"MODULE[,MODULE...]")
"" -- "Haskell modules"
, Option "T" ["title-prefix"]
(ReqArg
(\arg opt -> return opt { optTitlePrefix = arg,
@ -519,6 +532,7 @@ main = do
, optReferenceLinks = referenceLinks
, optWrapText = wrap
, optSanitizeHTML = sanitize
, optPlugins = plugins
#ifdef _CITEPROC
, optBiblioFile = biblioFile
, optBiblioFormat = biblioFormat
@ -630,21 +644,26 @@ main = do
then ""
else takeDirectory (head sources)
let writeOutput = case writerName' of
"odt" -> saveOpenDocumentAsODT outputFile sourceDirRelative
_ -> if outputFile == "-"
then putStrLn
else writeFile outputFile . (++ "\n")
let readSources [] = mapM readSource ["-"]
readSources srcs = mapM readSource srcs
readSource "-" = getContents
readSource src = readFile src
fmap (reader startParserState . tabFilter tabStop . intercalate "\n")
(readSources sources) >>=
doc <- fmap (reader startParserState . tabFilter tabStop . intercalate "\n") (readSources sources)
doc' <- do
#ifdef _CITEPROC
processBiblio cslFile refs >>=
processBiblio cslFile refs doc
#else
return doc
#endif
writeOutput . writer writerOptions
where
readSources [] = mapM readSource ["-"]
readSources sources = mapM readSource sources
readSource "-" = getContents
readSource source = readFile source
doc'' <- foldM (flip ($)) doc' plugins
let writerOutput = writer writerOptions doc'' ++ "\n"
case writerName' of
"odt" -> saveOpenDocumentAsODT outputFile sourceDirRelative writerOutput
_ -> if outputFile == "-"
then putStr writerOutput
else writeFile outputFile writerOutput

170
README
View file

@ -309,6 +309,14 @@ For further documentation, see the `pandoc(1)` man page.
repeatedly to include multiple files. They will be included in the
order specified.
`-P` *MODULE[,MODULE...]*, `--plugins`*=MODULE[,MODULE...]*
: specifies plugins to load, by module name or source file pathname.
Plugins should export a function `transform` of type `a -> a`
or `a -> IO a`, where `a` is `Inline`, `Block`, `Pandoc`,
`[Inline]`, or `[Block]`. This function will be used to transform
the pandoc document after it is parsed by the reader and before it
is written out by the writer. (See below on [Plugins](#plugins).)
`-T` or `--title-prefix` *string*
: includes *string* as a prefix at the beginning of the title that
appears in the HTML header (but not in the title as it appears at
@ -1108,3 +1116,165 @@ ordinary HTML (without bird tracks).
writes HTML with the Haskell code in bird tracks, so it can be copied
and pasted as literate Haskell source.
Plugins
=======
Pandoc's plugin system allows users to modify pandoc's behavior by writing
short Haskell programs. A plugin is a Haskell module that exports a function
`transform`, of type `a -> a` or `a -> IO a`, where `a` is `Pandoc`,
`Block`, `Inline`, `[Block]`, or `[Inline]`. The `transform` function will
be used to transform the pandoc document generated by the reader, before
it is transformed by the writer.
An example will help make this clearer. Suppose we want to use pandoc with
the WordPress blog engine. WordPress provides support for LaTeX math, but
instead of `$e = mc^2$`, WordPress wants `$LaTeX e = mc^2$`. Prior to plugins,
there was no good way to make pandoc do this. We could have tried using
regex replacements on the markdown input or HTML output, but this would have
been error-prone: we'd have to make sure we weren't capturing non-math text
between dollar signs (for example, text inside a code block). Besides,
pandoc's markdown reader has already identified the math bits; why not
make use of that? By writing a plugin, we can:
~~~ {.haskell}
-- WordPressPlugin.hs
module WordPressPlugin (transform) where
import Text.Pandoc
transform :: Inline -> Inline
transform (Math x y) = Math x $ "LaTeX " ++ y
transform x = x
~~~
This is a Haskell program, but a very short one. The lines
~~~ {.haskell}
module WordPressPlugin (transform) where
import Text.Pandoc
~~~
just define the name of the module (`WordPressPlugin`), the names of any
exported functions (for a plugin, this will always just be `transform`),
and the modules that will be used in the program itself (`Text.Pandoc`).
The real meat of the program is the three-line definition of `transform`:
~~~ {.haskell}
transform :: Inline -> Inline
transform (Math x y) = Math x $ "LaTeX " ++ y
transform x = x
~~~
The first line defines the type of the function: it is a function that
takes an `Inline` element and returns an `Inline` element. (For the definition
of `Inline`, see the module `Text.Pandoc.Definition`.) The next line says
that when the input matches the pattern `Math x y`, the string `LaTeX `
should be inserted at the beginning of `y`. (`x` just specifies whether the
math element is inline or display math, so we leave it alone.) The last
line says, in effect, that the `transform` function has no effect on any
other kind of `Inline` element -- it just passes it through. When the plugin
is applied, this transformation will be used on every `Inline` element in
the document, and `LaTeX ` will be inserted where needed in math elements.
To use this plugin, we just specify the module (or alternatively the filename)
with the `--plugins` option:
% echo "Hello, $e=mc^2$." | pandoc -m --plugins=WordPressPlugin.hs
<p
>Hello, <span class="LaTeX"
>$LaTeX e=mc^2$</span
>.</p
>
Let's look at a more complex example, involving IO. Suppose we want to include
some graphviz diagrams in our document. Of course, we could use a Makefile to
generate the diagrams, then use regular images in our document. But wouldn't it
be nicer just to include the graphviz code in the document itself, perhaps in
a specially marked delimited code block?
~~~ {.dot name="diagram1"}
digraph G {Hello->World}
~~~
This can be accomplished by a plugin:
~~~ {.haskell}
-- DotPlugin.hs
module DotPlugin (transform) where
import Text.Pandoc
import Text.Pandoc.Shared
import System.Process (readProcess)
import Data.Char (ord)
-- from the utf8-string package on HackageDB:
import Data.ByteString.Lazy.UTF8 (fromString)
-- from the SHA package on HackageDB:
import Data.Digest.Pure.SHA
transform :: Block -> IO Block
transform (CodeBlock (id, classes, namevals) contents) | "dot" `elem` classes = do
let (name, outfile) = case lookup "name" namevals of
Just fn -> ([Str fn], fn ++ ".png")
Nothing -> ([], uniqueName contents ++ ".png")
result <- readProcess "dot" ["-Tpng"] contents
writeFile outfile result
return $ Para [Image name (outfile, "")]
transform x = return x
-- | Generate a unique filename given the file's contents.
uniqueName :: String -> String
uniqueName = showDigest . sha1 . fromString
~~~
The heart of this plugin is the `transform` function, which converts a `Block`
to a `Block`. Again, there are two clauses, one for code blocks that are marked
with the "dot" class, one for all other blocks. Code blocks with ".dot" are
replaced with links to an image file; this file is generated by running
`dot -Tpng` on the contents of the code block.
Because `transform` performs file reads and writes, it needs to be in the
IO monad, hence the type: `Block -> IO Block`.
One more example. Suppose we want emphasized text to be CAPITALIZED
instead of italicized. We could use a plugin:
~~~ {.haskell}
module CapitalizeEmphasisPlugin (transform) where
import Text.Pandoc
import Data.Char (toUpper)
transform :: [Inline] -> [Inline]
transform (Emph xs : ys) = processIn capStr xs ++ transform ys
transform (x : ys) = x : transform ys
transform [] = []
capStr :: Inline -> Inline
capStr (Str x) = Str (map toUpper x)
capStr x = x
~~~
Here `transform` converts a whole list of `Inline` elements to another
such list. The key clause is
~~~ {.haskell}
transform (Emph xs : ys) = processIn capStr xs ++ transform ys
~~~
This applies the `capStr` function recursively to all inlines in the
list of emphasized inlines and puts the transformed list in place
of the original. `capStr` is a simple `Inline` transformation that
capitalizes `Str` elements and leaves everything else alone. The
function `processIn`, defined in `Text.Pandoc.Definition`, uses some
`Data.Generics` magic to apply its argument (here `capStr`) to every
`Inline` element in a list, including elements that are deeply buried in
other elements. Thus
processIn captStr [Str "one", Strong [Str "two", Space]] ==>
[Str "ONE", Strong [Str "TWO", Space]]
There are other sample plugins in the `plugins` subdirectory of the
pandoc source code.
**Note:** Do not attempt to use plugins when running pandoc in the
directory containing pandoc's source code. The interpreter will try to
load the files directly from the source code, rather than reading the compiled
versions, and pandoc will hang.

View file

@ -128,10 +128,23 @@ data Inline
| Note [Block] -- ^ Footnote or endnote
deriving (Show, Eq, Read, Typeable, Data)
-- | Applies a transformation to matching elements in a Pandoc document.
processPandoc :: Typeable a => (a -> a) -> Pandoc -> Pandoc
processPandoc f = everywhere (mkT f)
-- | Applies a transformation on @a@s to matching elements in a @b@.
processIn :: (Data a, Data b) => (a -> a) -> b -> b
processIn f = everywhere (mkT f)
-- | Like 'processIn', but with monadic transformations.
processInM :: (Monad m, Data a, Data b) => (a -> m a) -> b -> m b
processInM f = everywhereM (mkM f)
-- | Runs a query on matching @a@ elements in a @c@.
queryIn :: (Data a, Data c) => (a -> [b]) -> c -> [b]
queryIn f = everything (++) ([] `mkQ` f)
{-# DEPRECATED processPandoc "Use processIn instead" #-}
processPandoc :: Data a => (a -> a) -> Pandoc -> Pandoc
processPandoc = processIn
{-# DEPRECATED queryPandoc "Use queryIn instead" #-}
queryPandoc :: Data a => (a -> [b]) -> Pandoc -> [b]
queryPandoc = queryIn
-- | Runs a query on matching elements in a Pandoc document.
queryPandoc :: Typeable a => (a -> [b]) -> Pandoc -> [b]
queryPandoc f = everything (++) ([] `mkQ` f)

69
Text/Pandoc/Plugins.hs Normal file
View file

@ -0,0 +1,69 @@
{-
Copyright (C) 2008 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.Pluigns
Copyright : Copyright (C) 2008 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Support for plugins.
-}
module Text.Pandoc.Plugins (getPlugin)
where
import Language.Haskell.Interpreter
import Text.Pandoc
import Control.Monad (unless, liftM)
import Control.Monad.Error (throwError)
import Data.List (isInfixOf)
-- | Returns the function named @transform@ in the specified
-- module. The module may be identified either by module name
-- or by path name. The @transform@ function should have type
-- @a -> a@ or @a -> IO a@, where @a@ is an instance of 'Data':
-- for example, @Pandoc -> Pandoc@, @Inline -> IO Inline@,
-- @Block -> Block@, or @[Inline] -> IO [Inline]@.
getPlugin :: String -> IO (Pandoc -> IO Pandoc)
getPlugin modsrc = do
res <- runInterpreter (evaluatePlugin modsrc)
case res of
Right func -> return func
Left (WontCompile xs) -> error $ "WontCompile error for plugin '" ++ modsrc ++ "'\n" ++ unlines (map errMsg xs)
Left (NotAllowed x) -> error $ "NotAllowed error for plugin '" ++ modsrc ++ "'\n" ++ x
Left (UnknownError x) -> error $ "UnknownError for plugin '" ++ modsrc ++ "'\n" ++ x
Left (GhcException x) -> error $ "GhcException for plugin '" ++ modsrc ++ "'\n" ++ x
evaluatePlugin :: String -> Interpreter (Pandoc -> IO Pandoc)
evaluatePlugin modsrc = do
set [installedModulesInScope := False]
loadModules [modsrc]
modnames <- getLoadedModules
setTopLevelModules modnames
setImports ["Prelude", "Text.Pandoc", "Text.Pandoc.Definition"]
exports <- liftM concat $ mapM getModuleExports modnames
unless ((Fun "transform") `elem` exports) $
throwError $ UnknownError $ "The plugin module must define a function 'transform'."
transformType <- typeOf "transform"
if "-> IO" `isInfixOf` transformType
then interpret "processInM transform" (as :: Pandoc -> IO Pandoc)
else interpret "return . (processIn transform)" (as :: Pandoc -> IO Pandoc)

View file

@ -173,6 +173,14 @@ to Pandoc. Or use `html2markdown`(1), a wrapper around `pandoc`.
`context`, `docbook`, `man`, `markdown`, `opendocument`,
`rst`, `rtf`).
-P *MODULE[,MODULE...]*, \--plugins=*MODULE[,MODULE...]*
: Specify plugins to load, by module name or source file pathname.
Plugins should export a function `transform` of type `a -> a`
or `a -> IO a`, where `a` is `Inline`, `Block`, `Pandoc`,
`[Inline]`, or `[Block]`. This function will be used to transform
the pandoc document after it is parsed by the reader and before it
is written out by the writer.
-T *STRING*, \--title-prefix=*STRING*
: Specify *STRING* as a prefix to the HTML window title.

View file

@ -70,6 +70,12 @@ Extra-Source-Files:
data/odt-styles/Thumbnails/thumbnail.png,
data/odt-styles/styles.xml,
data/odt-styles/mimetype,
-- sample plugins
plugins/WordPressPlugin.hs,
plugins/CapitalizeEmphasisPlugin.hs,
plugins/IncludeFilePlugin.hs,
plugins/DotPlugin.hs,
plugins/ListLinksPlugin.hs,
-- tests
tests/bodybg.gif,
tests/writer.latex,
@ -139,7 +145,7 @@ Library
mtl >= 1.1, network >= 2, filepath >= 1.1,
process >= 1, directory >= 1, template-haskell >= 2.2,
bytestring >= 0.9, zip-archive >= 0.1.1,
utf8-string >= 0.3, old-time >= 1
utf8-string >= 0.3, old-time >= 1, hint >= 0.3.0.0
if impl(ghc >= 6.10)
Build-depends: base >= 4, syb
else
@ -158,6 +164,7 @@ Library
Text.Pandoc.CharacterReferences,
Text.Pandoc.Shared,
Text.Pandoc.ODT,
Text.Pandoc.Plugins,
Text.Pandoc.LaTeXMathML,
Text.Pandoc.DefaultHeaders,
Text.Pandoc.Highlighting,

View file

@ -0,0 +1,14 @@
module CapitalizeEmphasisPlugin (transform) where
import Text.Pandoc
import Data.Char (toUpper)
-- This plugin changes emphasized text into CAPITALIZED TEXT.
transform :: [Inline] -> [Inline]
transform (Emph x : ys) = processIn capStr x ++ transform ys
transform (x : ys) = x : transform ys
transform [] = []
capStr :: Inline -> Inline
capStr (Str x) = Str (map toUpper x)
capStr x = x

30
plugins/DotPlugin.hs Normal file
View file

@ -0,0 +1,30 @@
module DotPlugin (transform) where
import Text.Pandoc
import Text.Pandoc.Shared
import System.Process (readProcess)
import Data.Char (ord)
-- from the utf8-string package on HackageDB:
import Data.ByteString.Lazy.UTF8 (fromString)
-- from the SHA package on HackageDB:
import Data.Digest.Pure.SHA
-- This plugin allows you to include a graphviz "dot" diagram
-- in a document like this:
--
-- ~~~ {.dot name="diagram1"}
-- digraph G {Hello->World}
-- ~~~
transform :: Block -> IO Block
transform (CodeBlock (id, classes, namevals) contents) | "dot" `elem` classes = do
let (name, outfile) = case lookup "name" namevals of
Just fn -> ([Str fn], fn ++ ".png")
Nothing -> ([], uniqueName contents ++ ".png")
result <- readProcess "dot" ["-Tpng"] contents
writeFile outfile result
return $ Para [Image name (outfile, "")]
transform x = return x
-- | Generate a unique filename given the file's contents.
uniqueName :: String -> String
uniqueName = showDigest . sha1 . fromString

View file

@ -0,0 +1,19 @@
module IncludeFilePlugin (transform) where
import Text.Pandoc
import Text.Pandoc.Shared
import Control.Monad
-- This plugin allows you to include the contents of an
-- external file in a delimited code block like this:
--
-- ~~~ {include="filename"}
-- ~~~
--
-- Trailing newlines are trimmed.
transform :: Block -> IO Block
transform cb@(CodeBlock (id, classes, namevals) contents) =
case lookup "include" namevals of
Just f -> return . (CodeBlock (id, classes, namevals) . stripTrailingNewlines) =<< readFile f
Nothing -> return cb
transform x = return x

View file

@ -0,0 +1,15 @@
module ListLinksPlugin (transform) where
import Text.Pandoc
-- This plugin returns an empty document and prints a list
-- of the URLs linked to in the source document.
transform :: Pandoc -> IO Pandoc
transform p = do
let urls = queryIn findURLs p
putStrLn $ unlines urls
return $ Pandoc (Meta [] [] []) []
findURLs :: Inline -> [String]
findURLs (Link label (url, title)) = [url]
findURLs x = []

View file

@ -0,0 +1,10 @@
module WordPressPlugin (transform) where
import Text.Pandoc
-- This plugin (when used with -m) prints LaTeX math in the
-- format required by WordPress blogs. $e=mc^2$ becomes
-- $LaTeX e=mc^2$.
transform :: Inline -> Inline
transform (Math x y) = Math x $ "LaTeX " ++ y
transform x = x