Lua filters: allow filtering of element lists (#6040)

Lists of Inline and Block elements can now be filtered via `Inlines` and
`Blocks` functions, respectively. This is helpful if a filter conversion
depends on the order of elements rather than a single element.

For example, the following filter can be used to remove all spaces
before a citation:

    function isSpaceBeforeCite (spc, cite)
      return spc and spc.t == 'Space'
       and cite and cite.t == 'Cite'
    end

    function Inlines (inlines)
      for i = #inlines-1,1,-1 do
        if isSpaceBeforeCite(inlines[i], inlines[i+1]) then
          inlines:remove(i)
        end
      end
      return inlines
    end

Closes: #6038
This commit is contained in:
Albert Krewinkel 2020-01-15 23:26:00 +01:00 committed by John MacFarlane
parent 400b29d10e
commit 672a4bdd1d
9 changed files with 243 additions and 32 deletions

View file

@ -136,13 +136,52 @@ Elements without matching functions are left untouched.
See [module documentation](#module-pandoc) for a list of pandoc
elements.
## Execution order
## Filters on element sequences
For some filtering tasks, the it is necessary to know the order
in which elements occur in the document. It is not enough then to
inspect a single element at a time.
There are two special function names, which can be used to define
filters on lists of blocks or lists of inlines.
[`Inlines (inlines)`]{#inlines-filter}
: If present in a filter, this function will be called on all
lists of inline elements, like the content of a [Para]
(paragraph) block, or the description of an [Image]. The
`inlines` argument passed to the function will be a [List] of
[Inlines] for each call.
[`Blocks (blocks)`]{#blocks-filter}
: If present in a filter, this function will be called on all
lists of block elements, like the content of a [MetaBlocks]
meta element block, on each item of a list, and the main
content of the [Pandoc] document. The `blocks` argument
passed to the function will be a [List] of [Inlines] for each
call.
These filter functions are special in that the result must either
be nil, in which case the list is left unchanged, or must be a
list of the correct type, i.e., the same type as the input
argument. Single elements are **not** allowed as return values,
as a single element in this context usually hints at a bug.
See ["Remove spaces before normal citations"][Inlines filter
example] for an example.
This functionality has been added in pandoc 2.9.2.
[Inlines filter example]: #remove-spaces-before-citations
## Execution Order
Element filter functions within a filter set are called in a
fixed order, skipping any which are not present:
1. functions for [*Inline* elements](#type-inline),
2. the [`Inlines`](#inlines-filter) filter function,
2. functions for [*Block* elements](#type-block) ,
2. the [`Blocks`](#inlines-filter) filter function,
3. the [`Meta`](#type-meta) filter function, and last
4. the [`Pandoc`](#type-pandoc) filter function.
@ -368,6 +407,34 @@ function Doc (blocks, meta)
end
```
## Remove spaces before citations
This filter removes all spaces preceding an "author-in-text"
citation. In Markdown, author-in-text citations (e.g.,
`@citekey`), must be preceded by a space. If these spaces are
undesired, they must be removed with a filter.
``` lua
local function is_space_before_author_in_text(spc, cite)
return spc and spc.t == 'Space'
and cite and cite.t == 'Cite'
-- there must be only a single citation, and it must have
-- mode 'AuthorInText'
and #cite.citations == 1
and cite.citations[1].mode == 'AuthorInText'
end
function Inlines (inlines)
-- Go from end to start to avoid problems with shifting indices.
for i = #inlines-1, 1, -1 do
if is_space_before_author_in_text(inlines[i], inlines[i+1]) then
inlines:remove(i)
end
end
return inlines
end
```
## Replacing placeholders with their metadata value
Lua filter functions are run in the order
@ -1650,15 +1717,18 @@ Usage:
[Citation]: #type-citation
[Citations]: #type-citation
[CommonState]: #type-commonstate
[Image]: #type-image
[Inline]: #type-inline
[Inlines]: #type-inline
[List]: #type-list
[ListAttributes]: #type-listattributes
[Meta]: #type-meta
[MetaBlocks]: #type-metablocks
[MetaValue]: #type-metavalue
[MetaValues]: #type-metavalue
[LogMessage]: #type-logmessage
[Pandoc]: #type-pandoc
[Para]: #type-para
[Version]: #type-version
[`pandoc.utils.equals`]: #pandoc.utils.equals

View file

@ -605,9 +605,10 @@ library
Text.Pandoc.Lua.Marshaling.AST,
Text.Pandoc.Lua.Marshaling.AnyValue,
Text.Pandoc.Lua.Marshaling.CommonState,
Text.Pandoc.Lua.Marshaling.Context,
Text.Pandoc.Lua.Marshaling.List,
Text.Pandoc.Lua.Marshaling.MediaBag,
Text.Pandoc.Lua.Marshaling.ReaderOptions,
Text.Pandoc.Lua.Marshaling.Context,
Text.Pandoc.Lua.Marshaling.Version,
Text.Pandoc.Lua.Module.MediaBag,
Text.Pandoc.Lua.Module.Pandoc,
@ -838,4 +839,3 @@ benchmark benchmark-pandoc
-Widentities
-Werror=missing-home-modules
-fhide-source-paths

View file

@ -2,8 +2,8 @@
{-# LANGUAGE NoImplicitPrelude #-}
{- |
Module : Text.Pandoc.Lua.Filter
Copyright : © 20122019 John MacFarlane,
© 2017-2019 Albert Krewinkel
Copyright : © 20122020 John MacFarlane,
© 2017-2020 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
Stability : alpha
@ -13,25 +13,23 @@ Types and functions for running Lua filters.
module Text.Pandoc.Lua.Filter ( LuaFilterFunction
, LuaFilter
, runFilterFile
, tryFilter
, runFilterFunction
, walkMWithLuaFilter
, walkInlines
, walkBlocks
, blockElementNames
, inlineElementNames
, module Text.Pandoc.Lua.Walk
) where
import Prelude
import Control.Applicative ((<|>))
import Control.Monad (mplus, (>=>))
import Control.Monad.Catch (finally)
import Data.Data (Data, DataType, dataTypeConstrs, dataTypeName, dataTypeOf,
showConstr, toConstr, tyconUQname)
import Data.Foldable (foldrM)
import Data.Map (Map)
import Data.Maybe (fromMaybe)
import Foreign.Lua (Lua, Peekable, Pushable)
import Text.Pandoc.Definition
import Text.Pandoc.Lua.Marshaling ()
import Text.Pandoc.Lua.Marshaling.List (List (..))
import Text.Pandoc.Lua.Walk (SingletonsList (..))
import Text.Pandoc.Walk (Walkable (walkM))
@ -67,7 +65,9 @@ newtype LuaFilter = LuaFilter (Map String LuaFilterFunction)
instance Peekable LuaFilter where
peek idx = do
let constrs = metaFilterName
let constrs = listOfInlinesFilterName
: listOfBlocksFilterName
: metaFilterName
: pandocFilterNames
++ blockElementNames
++ inlineElementNames
@ -109,22 +109,34 @@ elementOrList x = do
Right res -> [res] <$ Lua.pop 1
Left _ -> Lua.peekList topOfStack `finally` Lua.pop 1
-- | Try running a filter for the given element
tryFilter :: (Data a, Peekable a, Pushable a)
=> LuaFilter -> a -> Lua [a]
tryFilter (LuaFilter fnMap) x =
let filterFnName = showConstr (toConstr x)
catchAllName = tyconUQname $ dataTypeName (dataTypeOf x)
in
case Map.lookup filterFnName fnMap `mplus` Map.lookup catchAllName fnMap of
Just fn -> runFilterFunction fn x *> elementOrList x
Nothing -> return [x]
-- | Pop and return a value from the stack; if the value at the top of
-- the stack is @nil@, return the fallback element.
popOption :: Peekable a => a -> Lua a
popOption fallback = fromMaybe fallback . Lua.fromOptional <$> Lua.popValue
-- | Apply filter on a sequence of AST elements.
-- | Apply filter on a sequence of AST elements. Both lists and single
-- value are accepted as filter function return values.
runOnSequence :: (Data a, Peekable a, Pushable a)
=> LuaFilter -> SingletonsList a -> Lua (SingletonsList a)
runOnSequence lf (SingletonsList xs) =
SingletonsList <$> mconcatMapM (tryFilter lf) xs
runOnSequence (LuaFilter fnMap) (SingletonsList xs) =
SingletonsList <$> mconcatMapM tryFilter xs
where
tryFilter :: (Data a, Peekable a, Pushable a) => a -> Lua [a]
tryFilter x =
let filterFnName = showConstr (toConstr x)
catchAllName = tyconUQname $ dataTypeName (dataTypeOf x)
in case Map.lookup filterFnName fnMap <|> Map.lookup catchAllName fnMap of
Just fn -> runFilterFunction fn x *> elementOrList x
Nothing -> return [x]
-- | Try filtering the given value without type error corrections on
-- the return value.
runOnValue :: (Data a, Peekable a, Pushable a)
=> String -> LuaFilter -> a -> Lua a
runOnValue filterFnName (LuaFilter fnMap) x =
case Map.lookup filterFnName fnMap of
Just fn -> runFilterFunction fn x *> popOption x
Nothing -> return x
-- | Push a value to the stack via a lua filter function. The filter function is
-- called with given element as argument and is expected to return an element.
@ -138,7 +150,12 @@ runFilterFunction lf x = do
walkMWithLuaFilter :: LuaFilter -> Pandoc -> Lua Pandoc
walkMWithLuaFilter f =
walkInlines f >=> walkBlocks f >=> walkMeta f >=> walkPandoc f
walkInlines f
>=> walkInlineLists f
>=> walkBlocks f
>=> walkBlockLists f
>=> walkMeta f
>=> walkPandoc f
mconcatMapM :: (Monad m) => (a -> m [a]) -> [a] -> m [a]
mconcatMapM f = fmap mconcat . mapM f
@ -146,6 +163,9 @@ mconcatMapM f = fmap mconcat . mapM f
hasOneOf :: LuaFilter -> [String] -> Bool
hasOneOf (LuaFilter fnMap) = any (\k -> Map.member k fnMap)
contains :: LuaFilter -> String -> Bool
contains (LuaFilter fnMap) = (`Map.member` fnMap)
walkInlines :: Walkable (SingletonsList Inline) a => LuaFilter -> a -> Lua a
walkInlines lf =
let f :: SingletonsList Inline -> Lua (SingletonsList Inline)
@ -154,6 +174,14 @@ walkInlines lf =
then walkM f
else return
walkInlineLists :: Walkable (List Inline) a => LuaFilter -> a -> Lua a
walkInlineLists lf =
let f :: List Inline -> Lua (List Inline)
f = runOnValue listOfInlinesFilterName lf
in if lf `contains` listOfInlinesFilterName
then walkM f
else return
walkBlocks :: Walkable (SingletonsList Block) a => LuaFilter -> a -> Lua a
walkBlocks lf =
let f :: SingletonsList Block -> Lua (SingletonsList Block)
@ -162,13 +190,18 @@ walkBlocks lf =
then walkM f
else return
walkBlockLists :: Walkable (List Block) a => LuaFilter -> a -> Lua a
walkBlockLists lf =
let f :: List Block -> Lua (List Block)
f = runOnValue listOfBlocksFilterName lf
in if lf `contains` listOfBlocksFilterName
then walkM f
else return
walkMeta :: LuaFilter -> Pandoc -> Lua Pandoc
walkMeta (LuaFilter fnMap) =
case Map.lookup "Meta" fnMap of
Just fn -> walkM (\(Pandoc meta blocks) -> do
meta' <- runFilterFunction fn meta *> singleElement meta
return $ Pandoc meta' blocks)
Nothing -> return
walkMeta lf (Pandoc m bs) = do
m' <- runOnValue "Meta" lf m
return $ Pandoc m' bs
walkPandoc :: LuaFilter -> Pandoc -> Lua Pandoc
walkPandoc (LuaFilter fnMap) =
@ -185,6 +218,12 @@ inlineElementNames = "Inline" : constructorsFor (dataTypeOf (Str mempty))
blockElementNames :: [String]
blockElementNames = "Block" : constructorsFor (dataTypeOf (Para []))
listOfInlinesFilterName :: String
listOfInlinesFilterName = "Inlines"
listOfBlocksFilterName :: String
listOfBlocksFilterName = "Blocks"
metaFilterName :: String
metaFilterName = "Meta"

View file

@ -119,6 +119,7 @@ putConstructorsInRegistry = do
constrsToReg $ Pandoc.Citation mempty mempty mempty Pandoc.AuthorInText 0 0
putInReg "Attr" -- used for Attr type alias
putInReg "ListAttributes" -- used for ListAttributes type alias
putInReg "List" -- pandoc.List
where
constrsToReg :: Data a => a -> Lua ()
constrsToReg = mapM_ (putInReg . showConstr) . dataTypeConstrs . dataTypeOf

View file

@ -0,0 +1,45 @@
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE UndecidableInstances #-}
{- |
Module : Text.Pandoc.Lua.Marshaling.List
Copyright : © 2012-2020 John MacFarlane
© 2017-2020 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
Stability : alpha
Marshaling/unmarshaling instances for @pandoc.List@s.
-}
module Text.Pandoc.Lua.Marshaling.List
( List (..)
) where
import Prelude
import Data.Data (Data)
import Foreign.Lua (Peekable, Pushable)
import Text.Pandoc.Walk (Walkable (..))
import Text.Pandoc.Lua.Util (defineHowTo, pushViaConstructor)
import qualified Foreign.Lua as Lua
-- | List wrapper which is marshalled as @pandoc.List@.
newtype List a = List { fromList :: [a] }
deriving (Data, Eq, Show)
instance Pushable a => Pushable (List a) where
push (List xs) =
pushViaConstructor "List" xs
instance Peekable a => Peekable (List a) where
peek idx = defineHowTo "get List" $ do
xs <- Lua.peek idx
return $ List xs
-- List is just a wrapper, so we can reuse the walk instance for
-- unwrapped Hasekll lists.
instance Walkable [a] b => Walkable (List a) b where
walkM f = walkM (fmap fromList . f . List)
query f = query (f . List)

View file

@ -23,7 +23,8 @@ import Text.Pandoc.Arbitrary ()
import Text.Pandoc.Builder (bulletList, definitionList, displayMath, divWith,
doc, doubleQuoted, emph, header, lineBlock,
linebreak, math, orderedList, para, plain, rawBlock,
singleQuoted, space, str, strong)
singleQuoted, space, str, strong,
HasMeta (setMeta))
import Text.Pandoc.Class (runIOorExplode, setUserDataDir)
import Text.Pandoc.Definition (Block (BlockQuote, Div, Para), Inline (Emph, Str),
Attr, Meta, Pandoc, pandocTypesVersion)
@ -129,6 +130,28 @@ tests = map (localOption (QuickCheckTests 20))
(doc $ divWith ("", [], kv_before) (para "nil"))
(doc $ divWith ("", [], kv_after) (para "nil"))
, testCase "Filter list of inlines" $
assertFilterConversion "List of inlines"
"inlines-filter.lua"
(doc $ para ("Hello," <> linebreak <> "World! Wassup?"))
(doc $ para "Hello, World! Wassup?")
, testCase "Filter list of blocks" $
assertFilterConversion "List of blocks"
"blocks-filter.lua"
(doc $ para "one." <> para "two." <> para "three.")
(doc $ plain "3")
, testCase "Filter Meta" $
let setMetaBefore = setMeta "old" ("old" :: T.Text)
. setMeta "bool" False
setMetaAfter = setMeta "new" ("new" :: T.Text)
. setMeta "bool" True
in assertFilterConversion "Meta filtering"
"meta.lua"
(setMetaBefore . doc $ mempty)
(setMetaAfter . doc $ mempty)
, testCase "Script filename is set" $
assertFilterConversion "unexpected script name"
"script-name.lua"

View file

@ -0,0 +1,8 @@
function Blocks (blks)
-- verify that this looks like a `pandoc.List`
if not blks.find or not blks.map or not blks.filter then
error("table doesn't seem to be an instance of pandoc.List")
end
-- return plain block containing the number of elements in the list
return {pandoc.Plain {pandoc.Str(tostring(#blks))}}
end

View file

@ -0,0 +1,19 @@
function isWorldAfterSpace (fst, snd)
return fst and fst.t == 'LineBreak'
and snd and snd.t == 'Str' and snd.text == 'World!'
end
function Inlines (inlns)
-- verify that this looks like a `pandoc.List`
if not inlns.find or not inlns.map or not inlns.filter then
error("table doesn't seem to be an instance of pandoc.List")
end
-- Remove spaces before string "World"
for i = #inlns-1,1,-1 do
if isWorldAfterSpace(inlns[i], inlns[i+1]) then
inlns[i] = pandoc.Space()
end
end
return inlns
end

6
test/lua/meta.lua Normal file
View file

@ -0,0 +1,6 @@
function Meta (meta)
meta.old = nil
meta.new = "new"
meta.bool = (meta.bool == false)
return meta
end