Added --filter option.

This makes it easier to use JSON filters.  Instead of
doing

    pandoc -t json | ./filter | pandoc -f json

you can just do

    pandoc --filter ./filter
This commit is contained in:
John MacFarlane 2013-07-23 22:52:15 -07:00
parent 5592666ca4
commit 85dacbb282
3 changed files with 30 additions and 4 deletions

8
README
View file

@ -259,6 +259,14 @@ Reader options
require different kinds of images. Currently this option only affects
the markdown and LaTeX readers.
`--filter=`*PATH*
: Specify an executable to be used as a filter transforming the
Pandoc AST after the input is parsed and before the output is
written. The executable should read JSON from stdin and write
JSON to stdout. The JSON must be formatted like pandoc's own
JSON input and output. Filters may be most easily created in Haskell,
using the utility function `toJsonFilter` from `Text.Pandoc`.
`--normalize`
: Normalize the document after reading: merge adjacent
`Str` or `Emph` elements, for example, and remove repeated `Space`s.

View file

@ -360,6 +360,7 @@ Executable pandoc
extensible-exceptions >= 0.1 && < 0.2,
highlighting-kate >= 0.5.5 && < 0.6,
HTTP >= 4000.0.5 && < 4000.3,
process >= 1 && < 1.2,
citeproc-hs >= 0.3.7 && < 0.4
if impl(ghc >= 7.0.1)
Ghc-Options: -rtsopts -with-rtsopts=-K16m -Wall -fno-warn-unused-do-bind

View file

@ -43,6 +43,7 @@ import System.Environment ( getArgs, getProgName )
import System.Exit ( exitWith, ExitCode (..) )
import System.FilePath
import System.Console.GetOpt
import System.Process (readProcess)
import Data.Char ( toLower )
import Data.List ( intercalate, isPrefixOf, sort )
import System.Directory ( getAppUserDataDirectory, doesFileExist, findExecutable )
@ -53,6 +54,7 @@ import Control.Exception.Extensible ( throwIO )
import qualified Text.Pandoc.UTF8 as UTF8
import qualified Text.CSL as CSL
import Control.Monad (when, unless, liftM)
import Data.Foldable (foldrM)
import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..))
import Network.URI (parseURI, isURI, URI(..))
import qualified Data.ByteString.Lazy as B
@ -86,6 +88,12 @@ wrapWords indent c = wrap' (c - indent) (c - indent)
isTextFormat :: String -> Bool
isTextFormat s = takeWhile (`notElem` "+-") s `notElem` ["odt","docx","epub","epub3"]
externalFilter :: FilePath -> Pandoc -> IO Pandoc
externalFilter f d = E.catch
(readJSON def `fmap` readProcess f [] (writeJSON def d))
(\e -> let _ = (e :: E.SomeException)
in err 83 $ "Error running filter `" ++ f ++ "'")
-- | Data structure for command line options.
data Opt = Opt
{ optTabStop :: Int -- ^ Number of spaces per tab
@ -272,6 +280,13 @@ options =
"STRING")
"" -- "Classes (whitespace- or comma-separated) to use for indented code-blocks"
, Option "" ["filter"]
(ReqArg
(\arg opt -> return opt { optPlugins = externalFilter arg :
optPlugins opt })
"PROGRAM")
"" -- "External JSON filter"
, Option "" ["normalize"]
(NoArg
(\opt -> return opt { optTransforms =
@ -876,6 +891,7 @@ main = do
, optReferenceLinks = referenceLinks
, optWrapText = wrap
, optColumns = columns
, optPlugins = plugins
, optEmailObfuscation = obfuscationMethod
, optIdentifierPrefix = idPrefix
, optIndentedCodeClasses = codeBlockClasses
@ -1099,6 +1115,7 @@ main = do
reader readerOpts
let doc0 = foldr ($) doc transforms
doc1 <- foldrM ($) doc0 plugins
let writeBinary :: B.ByteString -> IO ()
writeBinary = B.writeFile (UTF8.encodePath outputFile)
@ -1109,15 +1126,15 @@ main = do
case getWriter writerName' of
Left e -> err 9 e
Right (IOStringWriter f) -> f writerOptions doc0 >>= writerFn outputFile
Right (IOByteStringWriter f) -> f writerOptions doc0 >>= writeBinary
Right (IOStringWriter f) -> f writerOptions doc1 >>= writerFn outputFile
Right (IOByteStringWriter f) -> f writerOptions doc1 >>= writeBinary
Right (PureStringWriter f)
| pdfOutput -> do
res <- makePDF latexEngine f writerOptions doc0
res <- makePDF latexEngine f writerOptions doc1
case res of
Right pdf -> writeBinary pdf
Left err' -> err 43 $ UTF8.toStringLazy err'
| otherwise -> selfcontain (f writerOptions doc0 ++
| otherwise -> selfcontain (f writerOptions doc1 ++
['\n' | not standalone'])
>>= writerFn outputFile . handleEntities
where htmlFormat = writerName' `elem`