From 6174b5bea5e8c4c35c191bd62f1f42e4d7fce69e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 11 Nov 2017 11:01:38 -0500
Subject: [PATCH] Add lua filter functions to walk inline and block elements.

Refactored some code from Text.Pandoc.Lua.PandocModule
into new internal module Text.Pandoc.Lua.Filter.

Add `walk_inline` and `walk_block` in pandoc lua module.
---
 doc/lua-filters.md                  |  49 +++++++-
 pandoc.cabal                        |   1 +
 src/Text/Pandoc/Lua.hs              | 150 +------------------------
 src/Text/Pandoc/Lua/Filter.hs       | 168 ++++++++++++++++++++++++++++
 src/Text/Pandoc/Lua/PandocModule.hs |  22 +++-
 5 files changed, 241 insertions(+), 149 deletions(-)
 create mode 100644 src/Text/Pandoc/Lua/Filter.hs

diff --git a/doc/lua-filters.md b/doc/lua-filters.md
index 8c8268c20..1e0b988ba 100644
--- a/doc/lua-filters.md
+++ b/doc/lua-filters.md
@@ -165,6 +165,8 @@ those elements accessible through the filter function parameter.
 
 Some pandoc functions have been made available in lua:
 
+- `walk_block` and `walk_inline` allow filters to be applied
+  inside specific block or inline elements.
 - `read` allows filters to parse strings into pandoc documents
 - `pipe` runs an external command with input from and output to
   strings
@@ -333,6 +335,20 @@ will output:
 </dl>
 ```
 
+## Uppercasing text inside all headers
+
+This filter uses `walk_block` to transform inline elements
+inside headers, converting all their text into uppercase.
+
+``` lua
+function Header(el)
+    return pandoc.walk_block(el, {
+        Str = function(el)
+            return pandoc.Str(el.text:upper())
+        end })
+end
+```
+
 ## Converting ABC code to music notation
 
 This filter replaces code blocks with class `abc` with
@@ -1070,6 +1086,38 @@ Lua functions for pandoc scripts.
 
 ## Helper Functions
 
+[`walk_block (element, filter)`]{#walk_block}
+
+:   Apply a filter inside a block element, walking its
+    contents.
+
+    Parameters:
+
+    `element`:
+    :   the block element
+
+    `filter`:
+    :   a lua filter (table of functions) to be applied
+        within the block element
+
+    Returns: the transformed block element
+
+[`walk_inline (element, filter)`]{#walk_inline}
+
+:   Apply a filter inside an inline element, walking its
+    contents.
+
+    Parameters:
+
+    `element`:
+    :   the inline element
+
+    `filter`:
+    :   a lua filter (table of functions) to be applied
+        within the inline element
+
+    Returns: the transformed inline element
+
 [`read (markup[, format])`]{#read}
 
 :   Parse the given string into a Pandoc document.
@@ -1142,7 +1190,6 @@ Lua functions for pandoc scripts.
 
         local output = pandoc.pipe("sed", {"-e","s/a/b/"}, "abc")
 
-
 # Submodule mediabag
 
 The submodule `mediabag` allows accessing pandoc's media
diff --git a/pandoc.cabal b/pandoc.cabal
index 19dfde40e..7522304e5 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -478,6 +478,7 @@ library
                    Text.Pandoc.Readers.Org.Parsing,
                    Text.Pandoc.Readers.Org.Shared,
                    Text.Pandoc.Lua.PandocModule,
+                   Text.Pandoc.Lua.Filter,
                    Text.Pandoc.Lua.StackInstances,
                    Text.Pandoc.Lua.Util,
                    Text.Pandoc.CSS,
diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs
index 091deab8c..355a5baf1 100644
--- a/src/Text/Pandoc/Lua.hs
+++ b/src/Text/Pandoc/Lua.hs
@@ -33,25 +33,18 @@ Pandoc lua utils.
 -}
 module Text.Pandoc.Lua (LuaException (..), pushPandocModule, runLuaFilter) where
 
-import Control.Monad (mplus, unless, when, (>=>))
+import Control.Monad (when, (>=>))
 import Control.Monad.Identity (Identity)
 import Control.Monad.Trans (MonadIO (..))
-import Data.Data (Data, DataType, dataTypeConstrs, dataTypeName, dataTypeOf,
-                  showConstr, toConstr, tyconUQname)
-import Data.Foldable (foldrM)
 import Data.IORef (IORef, newIORef, readIORef)
-import Data.Map (Map)
-import Data.Maybe (isJust)
-import Foreign.Lua (FromLuaStack (peek), Lua, LuaException (..), StackIndex,
+import Foreign.Lua (FromLuaStack (peek), Lua, LuaException (..),
                     Status (OK), ToLuaStack (push))
 import Text.Pandoc.Class (CommonState, PandocIO, getCommonState, getMediaBag,
                           setMediaBag)
 import Text.Pandoc.Definition
 import Text.Pandoc.Lua.PandocModule (pushMediaBagModule, pushPandocModule)
+import Text.Pandoc.Lua.Filter (LuaFilter, walkMWithLuaFilter)
 import Text.Pandoc.MediaBag (MediaBag)
-import Text.Pandoc.Walk (walkM)
-
-import qualified Data.Map as Map
 import qualified Foreign.Lua as Lua
 
 runLuaFilter :: Maybe FilePath -> FilePath -> String
@@ -109,142 +102,5 @@ pushGlobalFilter = do
 runAll :: [LuaFilter] -> Pandoc -> Lua Pandoc
 runAll = foldr ((>=>) . walkMWithLuaFilter) return
 
-walkMWithLuaFilter :: LuaFilter -> Pandoc -> Lua Pandoc
-walkMWithLuaFilter (LuaFilter fnMap) =
-  walkInlines >=> walkBlocks >=> walkMeta >=> walkPandoc
- where
-    walkInlines :: Pandoc -> Lua Pandoc
-    walkInlines =
-      if hasOneOf inlineFilterNames
-      then walkM (mconcatMapM (tryFilter fnMap :: Inline -> Lua [Inline]))
-      else return
-
-    walkBlocks :: Pandoc -> Lua Pandoc
-    walkBlocks =
-      if hasOneOf blockFilterNames
-      then walkM (mconcatMapM (tryFilter fnMap :: Block -> Lua [Block]))
-      else return
-
-    walkMeta :: Pandoc -> Lua Pandoc
-    walkMeta =
-      case Map.lookup "Meta" fnMap of
-        Just fn -> walkM (\(Pandoc meta blocks) -> do
-                             meta' <- runFilterFunction fn meta *> singleElement meta
-                             return $ Pandoc meta' blocks)
-        Nothing -> return
-
-    walkPandoc :: Pandoc -> Lua Pandoc
-    walkPandoc =
-      case foldl mplus Nothing (map (`Map.lookup` fnMap) pandocFilterNames) of
-        Just fn -> \x -> runFilterFunction fn x *> singleElement x
-        Nothing -> return
-
-    mconcatMapM f = fmap mconcat . mapM f
-    hasOneOf = any (\k -> isJust (Map.lookup k fnMap))
-
-constructorsFor :: DataType -> [String]
-constructorsFor x = map show (dataTypeConstrs x)
-
-inlineFilterNames :: [String]
-inlineFilterNames = "Inline" : constructorsFor (dataTypeOf (Str []))
-
-blockFilterNames :: [String]
-blockFilterNames = "Block" : constructorsFor (dataTypeOf (Para []))
-
-metaFilterName :: String
-metaFilterName = "Meta"
-
-pandocFilterNames :: [String]
-pandocFilterNames = ["Pandoc", "Doc"]
-
-type FunctionMap = Map String LuaFilterFunction
-newtype LuaFilter = LuaFilter FunctionMap
-newtype LuaFilterFunction = LuaFilterFunction { functionIndex :: Int }
-
--- | Try running a filter for the given element
-tryFilter :: (Data a, FromLuaStack a, ToLuaStack a)
-          => FunctionMap -> a -> Lua [a]
-tryFilter fnMap x =
-  let filterFnName = showConstr (toConstr x)
-      catchAllName = tyconUQname $ dataTypeName (dataTypeOf x)
-  in
-  case Map.lookup filterFnName fnMap `mplus` Map.lookup catchAllName fnMap of
-    Just fn -> runFilterFunction fn x *> elementOrList x
-    Nothing -> return [x]
-
-instance FromLuaStack LuaFilter where
-  peek idx =
-    let constrs = metaFilterName : pandocFilterNames
-                  ++ blockFilterNames
-                  ++ inlineFilterNames
-        fn c acc = do
-          Lua.getfield idx c
-          filterFn <- Lua.tryLua (peek (-1))
-          Lua.pop 1
-          return $ case filterFn of
-            Left _  -> acc
-            Right f -> (c, f) : acc
-    in LuaFilter . Map.fromList <$> foldrM fn [] constrs
-
--- | Push a value to the stack via a lua filter function. The filter function is
--- called with given element as argument and is expected to return an element.
--- Alternatively, the function can return nothing or nil, in which case the
--- element is left unchanged.
-runFilterFunction :: ToLuaStack a => LuaFilterFunction -> a -> Lua ()
-runFilterFunction lf x = do
-  pushFilterFunction lf
-  push x
-  z <- Lua.pcall 1 1 Nothing
-  when (z /= OK) $ do
-    let addPrefix = ("Error while running filter function: " ++)
-    Lua.throwTopMessageAsError' addPrefix
-
-elementOrList :: FromLuaStack a => a -> Lua [a]
-elementOrList x = do
-  let topOfStack = Lua.StackIndex (-1)
-  elementUnchanged <- Lua.isnil topOfStack
-  if elementUnchanged
-    then [x] <$ Lua.pop 1
-    else do
-       mbres <- Lua.peekEither topOfStack
-       case mbres of
-         Right res -> [res] <$ Lua.pop 1
-         Left _    -> Lua.toList topOfStack <* Lua.pop 1
-
-singleElement :: FromLuaStack a => a -> Lua a
-singleElement x = do
-  elementUnchanged <- Lua.isnil (-1)
-  if elementUnchanged
-    then x <$ Lua.pop 1
-    else do
-    mbres <- Lua.peekEither (-1)
-    case mbres of
-      Right res -> res <$ Lua.pop 1
-      Left err  -> do
-        Lua.pop 1
-        Lua.throwLuaError $
-          "Error while trying to get a filter's return " ++
-          "value from lua stack.\n" ++ err
-
--- | Push the filter function to the top of the stack.
-pushFilterFunction :: LuaFilterFunction -> Lua ()
-pushFilterFunction lf =
-  -- The function is stored in a lua registry table, retrieve it from there.
-  Lua.rawgeti Lua.registryindex (functionIndex lf)
-
-registerFilterFunction :: StackIndex -> Lua LuaFilterFunction
-registerFilterFunction idx = do
-  isFn <- Lua.isfunction idx
-  unless isFn . Lua.throwLuaError $ "Not a function at index " ++ show idx
-  Lua.pushvalue idx
-  refIdx <- Lua.ref Lua.registryindex
-  return $ LuaFilterFunction refIdx
-
 instance (FromLuaStack a) => FromLuaStack (Identity a) where
   peek = fmap return . peek
-
-instance ToLuaStack LuaFilterFunction where
-  push = pushFilterFunction
-
-instance FromLuaStack LuaFilterFunction where
-  peek = registerFilterFunction
diff --git a/src/Text/Pandoc/Lua/Filter.hs b/src/Text/Pandoc/Lua/Filter.hs
new file mode 100644
index 000000000..8db31e7fa
--- /dev/null
+++ b/src/Text/Pandoc/Lua/Filter.hs
@@ -0,0 +1,168 @@
+{-# LANGUAGE FlexibleContexts #-}
+
+module Text.Pandoc.Lua.Filter ( LuaFilterFunction
+                              , LuaFilter
+                              , tryFilter
+                              , runFilterFunction
+                              , walkMWithLuaFilter
+                              , walkInlines
+                              , walkBlocks
+                              , blockElementNames
+                              , inlineElementNames
+                              ) where
+import Control.Monad (mplus, unless, when, (>=>))
+import Text.Pandoc.Definition
+import Data.Foldable (foldrM)
+import Data.Map (Map)
+import qualified Data.Map as Map
+import qualified Foreign.Lua as Lua
+import Foreign.Lua (FromLuaStack (peek), Lua, StackIndex,
+                    Status (OK), ToLuaStack (push))
+import Text.Pandoc.Walk (walkM, Walkable)
+import Data.Data (Data, DataType, dataTypeConstrs, dataTypeName, dataTypeOf,
+                  showConstr, toConstr, tyconUQname)
+import Text.Pandoc.Lua.StackInstances()
+
+type FunctionMap = Map String LuaFilterFunction
+
+newtype LuaFilterFunction = LuaFilterFunction { functionIndex :: Int }
+
+instance ToLuaStack LuaFilterFunction where
+  push = pushFilterFunction
+
+instance FromLuaStack LuaFilterFunction where
+  peek = registerFilterFunction
+
+newtype LuaFilter = LuaFilter FunctionMap
+
+instance FromLuaStack LuaFilter where
+  peek idx =
+    let constrs = metaFilterName : pandocFilterNames
+                  ++ blockElementNames
+                  ++ inlineElementNames
+        fn c acc = do
+          Lua.getfield idx c
+          filterFn <- Lua.tryLua (peek (-1))
+          Lua.pop 1
+          return $ case filterFn of
+            Left _  -> acc
+            Right f -> (c, f) : acc
+    in LuaFilter . Map.fromList <$> foldrM fn [] constrs
+
+-- | Push the filter function to the top of the stack.
+pushFilterFunction :: LuaFilterFunction -> Lua ()
+pushFilterFunction lf =
+  -- The function is stored in a lua registry table, retrieve it from there.
+  Lua.rawgeti Lua.registryindex (functionIndex lf)
+
+registerFilterFunction :: StackIndex -> Lua LuaFilterFunction
+registerFilterFunction idx = do
+  isFn <- Lua.isfunction idx
+  unless isFn . Lua.throwLuaError $ "Not a function at index " ++ show idx
+  Lua.pushvalue idx
+  refIdx <- Lua.ref Lua.registryindex
+  return $ LuaFilterFunction refIdx
+
+elementOrList :: FromLuaStack a => a -> Lua [a]
+elementOrList x = do
+  let topOfStack = Lua.StackIndex (-1)
+  elementUnchanged <- Lua.isnil topOfStack
+  if elementUnchanged
+    then [x] <$ Lua.pop 1
+    else do
+       mbres <- Lua.peekEither topOfStack
+       case mbres of
+         Right res -> [res] <$ Lua.pop 1
+         Left _    -> Lua.toList topOfStack <* Lua.pop 1
+
+-- | Try running a filter for the given element
+tryFilter :: (Data a, FromLuaStack a, ToLuaStack a)
+          => LuaFilter -> a -> Lua [a]
+tryFilter (LuaFilter fnMap) x =
+  let filterFnName = showConstr (toConstr x)
+      catchAllName = tyconUQname $ dataTypeName (dataTypeOf x)
+  in
+  case Map.lookup filterFnName fnMap `mplus` Map.lookup catchAllName fnMap of
+    Just fn -> runFilterFunction fn x *> elementOrList x
+    Nothing -> return [x]
+
+-- | Push a value to the stack via a lua filter function. The filter function is
+-- called with given element as argument and is expected to return an element.
+-- Alternatively, the function can return nothing or nil, in which case the
+-- element is left unchanged.
+runFilterFunction :: ToLuaStack a => LuaFilterFunction -> a -> Lua ()
+runFilterFunction lf x = do
+  pushFilterFunction lf
+  push x
+  z <- Lua.pcall 1 1 Nothing
+  when (z /= OK) $ do
+    let addPrefix = ("Error while running filter function: " ++)
+    Lua.throwTopMessageAsError' addPrefix
+
+walkMWithLuaFilter :: LuaFilter -> Pandoc -> Lua Pandoc
+walkMWithLuaFilter f =
+  walkInlines f >=> walkBlocks f >=> walkMeta f >=> walkPandoc f
+
+mconcatMapM :: Monad m => (a -> m [a]) -> [a] -> m [a]
+mconcatMapM f = fmap mconcat . mapM f
+
+hasOneOf :: LuaFilter -> [String] -> Bool
+hasOneOf (LuaFilter fnMap) = any (\k -> Map.member k fnMap)
+
+walkInlines :: Walkable [Inline] a => LuaFilter -> a -> Lua a
+walkInlines f =
+  if f `hasOneOf` inlineElementNames
+     then walkM (mconcatMapM (tryFilter f :: Inline -> Lua [Inline]))
+     else return
+
+walkBlocks :: Walkable [Block] a => LuaFilter -> a -> Lua a
+walkBlocks f =
+  if f `hasOneOf` blockElementNames
+     then walkM (mconcatMapM (tryFilter f :: Block -> Lua [Block]))
+     else return
+
+walkMeta :: LuaFilter -> Pandoc -> Lua Pandoc
+walkMeta (LuaFilter fnMap) =
+  case Map.lookup "Meta" fnMap of
+    Just fn -> walkM (\(Pandoc meta blocks) -> do
+                         meta' <- runFilterFunction fn meta *> singleElement meta
+                         return $ Pandoc meta' blocks)
+    Nothing -> return
+
+walkPandoc :: LuaFilter -> Pandoc -> Lua Pandoc
+walkPandoc (LuaFilter fnMap) =
+  case foldl mplus Nothing (map (`Map.lookup` fnMap) pandocFilterNames) of
+    Just fn -> \x -> runFilterFunction fn x *> singleElement x
+    Nothing -> return
+
+constructorsFor :: DataType -> [String]
+constructorsFor x = map show (dataTypeConstrs x)
+
+inlineElementNames :: [String]
+inlineElementNames = "Inline" : constructorsFor (dataTypeOf (Str []))
+
+blockElementNames :: [String]
+blockElementNames = "Block" : constructorsFor (dataTypeOf (Para []))
+
+metaFilterName :: String
+metaFilterName = "Meta"
+
+pandocFilterNames :: [String]
+pandocFilterNames = ["Pandoc", "Doc"]
+
+singleElement :: FromLuaStack a => a -> Lua a
+singleElement x = do
+  elementUnchanged <- Lua.isnil (-1)
+  if elementUnchanged
+    then x <$ Lua.pop 1
+    else do
+    mbres <- Lua.peekEither (-1)
+    case mbres of
+      Right res -> res <$ Lua.pop 1
+      Left err  -> do
+        Lua.pop 1
+        Lua.throwLuaError $
+          "Error while trying to get a filter's return " ++
+          "value from lua stack.\n" ++ err
+
+
diff --git a/src/Text/Pandoc/Lua/PandocModule.hs b/src/Text/Pandoc/Lua/PandocModule.hs
index c42e180c6..ac7839d0f 100644
--- a/src/Text/Pandoc/Lua/PandocModule.hs
+++ b/src/Text/Pandoc/Lua/PandocModule.hs
@@ -1,3 +1,5 @@
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE FlexibleContexts #-}
 {-
 Copyright © 2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
 
@@ -38,13 +40,15 @@ import Data.Digest.Pure.SHA (sha1, showDigest)
 import Data.IORef
 import Data.Maybe (fromMaybe)
 import Data.Text (pack)
-import Foreign.Lua (FromLuaStack, Lua, NumResults, liftIO)
+import Foreign.Lua (ToLuaStack, FromLuaStack, Lua, NumResults, liftIO)
 import Foreign.Lua.FunctionCalling (ToHaskellFunction)
 import System.Exit (ExitCode (..))
 import Text.Pandoc.Class (CommonState (..), fetchItem, putCommonState,
                           readDataFile, runIO, runIOorExplode, setMediaBag,
                           setUserDataDir)
 import Text.Pandoc.Lua.StackInstances ()
+import Text.Pandoc.Definition (Block, Inline)
+import Text.Pandoc.Walk (Walkable)
 import Text.Pandoc.MIME (MimeType)
 import Text.Pandoc.Options (ReaderOptions (readerExtensions))
 import Text.Pandoc.Process (pipeProcess)
@@ -53,6 +57,7 @@ import Text.Pandoc.Readers (Reader (..), getReader)
 import qualified Data.ByteString.Lazy as BL
 import qualified Foreign.Lua as Lua
 import qualified Text.Pandoc.MediaBag as MB
+import Text.Pandoc.Lua.Filter (walkInlines, walkBlocks, LuaFilter)
 
 -- | Push the "pandoc" on the lua stack.
 pushPandocModule :: Maybe FilePath -> Lua ()
@@ -63,12 +68,27 @@ pushPandocModule datadir = do
   addFunction "_pipe" pipeFn
   addFunction "_read" readDoc
   addFunction "sha1" sha1HashFn
+  addFunction "walk_block" walkBlock
+  addFunction "walk_inline" walkInline
 
 -- | Get the string representation of the pandoc module
 pandocModuleScript :: Maybe FilePath -> IO String
 pandocModuleScript datadir = unpack <$>
   runIOorExplode (setUserDataDir datadir >> readDataFile "pandoc.lua")
 
+walkElement :: (ToLuaStack a, Walkable [Inline] a, Walkable [Block] a)
+            => a -> LuaFilter -> Lua NumResults
+walkElement x f = do
+  x' <- walkInlines f x >>= walkBlocks f
+  Lua.push x'
+  return 1
+
+walkInline :: Inline -> LuaFilter -> Lua NumResults
+walkInline = walkElement
+
+walkBlock :: Block -> LuaFilter -> Lua NumResults
+walkBlock = walkElement
+
 readDoc :: String -> String -> Lua NumResults
 readDoc formatSpec content = do
   case getReader formatSpec of