From 2677e8466311e2becdd7f65f1f6c23d559db14aa Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 8 Aug 2013 11:09:00 -0700
Subject: [PATCH] Revert "Revert "Added `--filter` option.""

This reverts commit 2e5edbb27837372f658b1abbe05371be57415847.
---
 README       |  8 ++++++++
 pandoc.cabal |  1 +
 pandoc.hs    | 25 +++++++++++++++++++++----
 3 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/README b/README
index bc5d5619d..e85ca1905 100644
--- a/README
+++ b/README
@@ -259,6 +259,14 @@ Reader options
     require different kinds of images.  Currently this option only affects
     the markdown and LaTeX readers.
 
+`--filter=`*PATH*
+:   Specify an executable to be used as a filter transforming the
+    Pandoc AST after the input is parsed and before the output is
+    written.  The executable should read JSON from stdin and write
+    JSON to stdout.  The JSON must be formatted like  pandoc's own
+    JSON input and output.  Filters may be most easily created in Haskell,
+    using the utility function `toJsonFilter` from `Text.Pandoc`.
+
 `--normalize`
 :   Normalize the document after reading:  merge adjacent
     `Str` or `Emph` elements, for example, and remove repeated `Space`s.
diff --git a/pandoc.cabal b/pandoc.cabal
index 19f8c14ef..a8dd528ad 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -352,6 +352,7 @@ Executable pandoc
                  extensible-exceptions >= 0.1 && < 0.2,
                  highlighting-kate >= 0.5.5 && < 0.6,
                  HTTP >= 4000.0.5 && < 4000.3,
+                 process >= 1 && < 1.2,
                  citeproc-hs >= 0.3.7 && < 0.4
   Ghc-Options:   -rtsopts -with-rtsopts=-K16m -Wall -fno-warn-unused-do-bind
   Ghc-Prof-Options: -auto-all -caf-all -rtsopts -with-rtsopts=-K16m
diff --git a/pandoc.hs b/pandoc.hs
index 79bade221..94d206103 100644
--- a/pandoc.hs
+++ b/pandoc.hs
@@ -43,6 +43,7 @@ import System.Environment ( getArgs, getProgName )
 import System.Exit ( exitWith, ExitCode (..) )
 import System.FilePath
 import System.Console.GetOpt
+import System.Process (readProcess)
 import Data.Char ( toLower )
 import Data.List ( intercalate, isPrefixOf, sort )
 import System.Directory ( getAppUserDataDirectory, doesFileExist, findExecutable )
@@ -53,6 +54,7 @@ import Control.Exception.Extensible ( throwIO )
 import qualified Text.Pandoc.UTF8 as UTF8
 import qualified Text.CSL as CSL
 import Control.Monad (when, unless, liftM)
+import Data.Foldable (foldrM)
 import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..))
 import Network.URI (parseURI, isURI, URI(..))
 import qualified Data.ByteString.Lazy as B
@@ -86,6 +88,12 @@ wrapWords indent c = wrap' (c - indent) (c - indent)
 isTextFormat :: String -> Bool
 isTextFormat s = takeWhile (`notElem` "+-") s `notElem` ["odt","docx","epub","epub3"]
 
+externalFilter :: FilePath -> Pandoc -> IO Pandoc
+externalFilter f d = E.catch
+  (readJSON def `fmap` readProcess f [] (writeJSON def d))
+  (\e -> let _ = (e :: E.SomeException)
+         in err 83 $ "Error running filter `" ++ f ++ "'")
+
 -- | Data structure for command line options.
 data Opt = Opt
     { optTabStop           :: Int     -- ^ Number of spaces per tab
@@ -272,6 +280,13 @@ options =
                    "STRING")
                   "" -- "Classes (whitespace- or comma-separated) to use for indented code-blocks"
 
+    , Option "" ["filter"]
+                 (ReqArg
+                  (\arg opt -> return opt { optPlugins = externalFilter arg :
+                                               optPlugins opt })
+                  "PROGRAM")
+                 "" -- "External JSON filter"
+
     , Option "" ["normalize"]
                  (NoArg
                   (\opt -> return opt { optTransforms =
@@ -876,6 +891,7 @@ main = do
               , optReferenceLinks        = referenceLinks
               , optWrapText              = wrap
               , optColumns               = columns
+              , optPlugins               = plugins
               , optEmailObfuscation      = obfuscationMethod
               , optIdentifierPrefix      = idPrefix
               , optIndentedCodeClasses   = codeBlockClasses
@@ -1099,6 +1115,7 @@ main = do
            reader readerOpts
 
   let doc0 = foldr ($) doc transforms
+  doc1 <- foldrM ($) doc0 plugins
 
   let writeBinary :: B.ByteString -> IO ()
       writeBinary = B.writeFile (UTF8.encodePath outputFile)
@@ -1109,15 +1126,15 @@ main = do
 
   case getWriter writerName' of
     Left e -> err 9 e
-    Right (IOStringWriter f) -> f writerOptions doc0 >>= writerFn outputFile
-    Right (IOByteStringWriter f) -> f writerOptions doc0 >>= writeBinary
+    Right (IOStringWriter f) -> f writerOptions doc1 >>= writerFn outputFile
+    Right (IOByteStringWriter f) -> f writerOptions doc1 >>= writeBinary
     Right (PureStringWriter f)
       | pdfOutput -> do
-              res <- makePDF latexEngine f writerOptions doc0
+              res <- makePDF latexEngine f writerOptions doc1
               case res of
                    Right pdf -> writeBinary pdf
                    Left err' -> err 43 $ UTF8.toStringLazy err'
-      | otherwise -> selfcontain (f writerOptions doc0 ++
+      | otherwise -> selfcontain (f writerOptions doc1 ++
                                   ['\n' | not standalone'])
                       >>= writerFn outputFile . handleEntities
           where htmlFormat = writerName' `elem`