Lua filters (#3514)

* Add `--lua-filter` option.  This works like `--filter` but takes pathnames of special lua filters and uses the lua interpreter baked into pandoc, so that no external interpreter is needed.  Note that lua filters are all applied after regular filters, regardless of their position on the command line.
* Add Text.Pandoc.Lua, exporting `runLuaFilter`.  Add `pandoc.lua` to data files.
* Add private module Text.Pandoc.Lua.PandocModule to supply the default lua module.
* Add Tests.Lua to tests.
* Add data/pandoc.lua, the lua module pandoc imports when processing its lua filters.
* Document in MANUAL.txt.
This commit is contained in:
Albert Krewinkel 2017-03-20 15:17:03 +01:00 committed by John MacFarlane
parent b010a8c5e7
commit f2f6851713
13 changed files with 528 additions and 1 deletions

View file

@ -457,6 +457,31 @@ Reader options
3. `$PATH` (executable only)
`--lua-filter=`*SCRIPT*
: Transform the document in a similar fashion as JSON filters (see
`--filter`), but use pandoc's build-in lua filtering system. The given
lua script is expected to return a list of lua filters which will be
applied in order. Each lua filter must contain element-transforming
functions indexed by the name of the AST element on which the filter
function should be applied.
The `pandoc` lua module provides helper functions for element
creation. It is always loaded into the script's lua environment.
The following is an example lua script for macro-expansion:
function expand_hello_world(inline)
if inline.c == '{{helloworld}}' then
return pandoc.Emph{ pandoc.Str "Hello, World" }
else
return inline
end
end
return {{Str = expand_hello_world}}
`-M` *KEY*[`=`*VAL*], `--metadata=`*KEY*[`:`*VAL*]
: Set the metadata field *KEY* to the value *VAL*. A value specified

144
data/pandoc.lua Normal file
View file

@ -0,0 +1,144 @@
--[[
pandoc.lua
Copyright (c) 2017 Albert Krewinkel
Permission to use, copy, modify, and/or distribute this software for any purpose
with or without fee is hereby granted, provided that the above copyright notice
and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE.
]]
--- The module
local M = {
_version = "0.1.0"
}
--- Create a new set of attributes (Attr).
function M.Attributes(id, classes, key_values)
return {id, classes, key_values}
end
local Element = {}
--- Create a new element subtype
function Element:make_subtype(o)
o = o or {}
setmetatable(o, self)
self.__index = self
return o
end
--- Create a new element given its tag and arguments
function Element:new(tag, ...)
local element = { t = tag }
local content = {...}
-- special case for unary constructors
if #content == 1 then
element.c = content[1]
-- Don't set 'c' field if no further arguments were given. This is important
-- for nullary constructors like `Space` and `HorizontalRule`.
elseif #content > 0 then
element.c = content
end
setmetatable(element, self)
self.__index = self
return element
end
local function Doc(blocks, meta)
return {
["blocks"] = blocks,
["meta"] = meta,
["pandoc-api-version"] = {1,17,0,5},
}
end
local Inline = Element:make_subtype{}
local Block = Element:make_subtype{}
M.block_types = {
"BlockQuote",
"BulletList",
"CodeBlock",
"DefinitionList",
"Div",
"Header",
"HorizontalRule",
"HorizontalRule",
"LineBlock",
"Null",
"OrderedList",
"Para",
"Plain",
"RawBlock",
"Table",
}
M.inline_types = {
"Cite",
"Code",
"DisplayMath",
"DoubleQuoted",
"Emph",
"Image",
"InlineMath",
"LineBreak",
"Link",
"Math",
"Note",
"Quoted",
"RawInline",
"SingleQuoted",
"SmallCaps",
"SoftBreak",
"Space",
"Span",
"Str",
"Strikeout",
"Strong",
"Subscript",
"Superscript"
}
for _, block_type in pairs(M.block_types) do
M[block_type] = function(...)
return Block:new(block_type, ...)
end
end
for _, inline_type in pairs(M.inline_types) do
M[inline_type] = function(...)
return Inline:new(inline_type, ...)
end
end
--- Arrays to provide fast lookup of element types
local set_of_inline_types = {}
local set_of_block_types = {}
for i = 1, #M.inline_types do
set_of_inline_types[M.inline_types[i]] = true
end
for i = 1, #M.block_types do
set_of_block_types[M.block_types[i]] = true
end
function M.global_filter()
local res = {}
for k, v in pairs(_G) do
if set_of_inline_types[k] or set_of_block_types[k] or k == "Doc" then
res[k] = v
end
end
return res
end
M["Doc"] = Doc
return M

View file

@ -104,6 +104,8 @@ Data-Files:
data/abbreviations
-- sample lua custom writer
data/sample.lua
-- pandoc lua module
data/pandoc.lua
-- bash completion template
data/bash_completion.tpl
-- documentation
@ -232,6 +234,7 @@ Extra-Source-Files:
test/odt/odt/*.odt
test/odt/markdown/*.md
test/odt/native/*.native
test/lua/strmacro.lua
Source-repository head
type: git
location: git://github.com/jgm/pandoc.git
@ -282,6 +285,7 @@ Library
pandoc-types >= 1.17 && < 1.18,
aeson >= 0.7 && < 1.2,
aeson-pretty >= 0.8 && < 0.9,
hslua-aeson >= 0.1.0.2 && < 1,
tagsoup >= 0.13.7 && < 0.15,
base64-bytestring >= 0.1 && < 1.1,
zlib >= 0.5 && < 0.7,
@ -396,6 +400,7 @@ Library
Text.Pandoc.Writers.Muse,
Text.Pandoc.Writers.Math,
Text.Pandoc.Writers.Shared,
Text.Pandoc.Lua,
Text.Pandoc.PDF,
Text.Pandoc.UTF8,
Text.Pandoc.Templates,
@ -434,6 +439,7 @@ Library
Text.Pandoc.Readers.Org.ParserState,
Text.Pandoc.Readers.Org.Parsing,
Text.Pandoc.Readers.Org.Shared,
Text.Pandoc.Lua.PandocModule,
Text.Pandoc.CSS,
Text.Pandoc.UUID,
Text.Pandoc.Slides,
@ -522,6 +528,7 @@ Test-Suite test-pandoc
Other-Modules: Tests.Old
Tests.Command
Tests.Helpers
Tests.Lua
Tests.Shared
Tests.Readers.LaTeX
Tests.Readers.HTML

View file

@ -37,4 +37,3 @@ import Text.Pandoc.App (convertWithOpts, defaultOpts, options, parseOptions)
main :: IO ()
main = parseOptions options defaultOpts >>= convertWithOpts

View file

@ -69,6 +69,7 @@ import Text.Pandoc
import Text.Pandoc.Builder (setMeta)
import Text.Pandoc.Class (PandocIO, getLog, withMediaBag)
import Text.Pandoc.Highlighting (highlightingStyles)
import Text.Pandoc.Lua ( runLuaFilter )
import Text.Pandoc.MediaBag (MediaBag, extractMediaBag, mediaDirectory)
import Text.Pandoc.PDF (makePDF)
import Text.Pandoc.Process (pipeProcess)
@ -389,6 +390,7 @@ convertWithOpts opts = do
doc' <- (maybe return (extractMedia media) (optExtractMedia opts) >=>
return . flip (foldr addMetadata) (optMetadata opts) >=>
applyTransforms transforms >=>
applyLuaFilters datadir (optLuaFilters opts) [format] >=>
applyFilters datadir filters' [format]) doc
case writer of
@ -514,6 +516,7 @@ data Opt = Opt
, optWrapText :: WrapOption -- ^ Options for wrapping text
, optColumns :: Int -- ^ Line length in characters
, optFilters :: [FilePath] -- ^ Filters to apply
, optLuaFilters :: [FilePath] -- ^ Lua filters to apply
, optEmailObfuscation :: ObfuscationMethod
, optIdentifierPrefix :: String
, optIndentedCodeClasses :: [String] -- ^ Default classes for indented code blocks
@ -580,6 +583,7 @@ defaultOpts = Opt
, optWrapText = WrapAuto
, optColumns = 72
, optFilters = []
, optLuaFilters = []
, optEmailObfuscation = NoObfuscation
, optIdentifierPrefix = ""
, optIndentedCodeClasses = []
@ -725,6 +729,12 @@ expandFilterPath mbDatadir fp = liftIO $ do
else return fp
_ -> return fp
applyLuaFilters :: MonadIO m
=> Maybe FilePath -> [FilePath] -> [String] -> Pandoc -> m Pandoc
applyLuaFilters mbDatadir filters args d = do
expandedFilters <- mapM (expandFilterPath mbDatadir) filters
foldrM ($) d $ map (flip runLuaFilter args) expandedFilters
applyFilters :: MonadIO m
=> Maybe FilePath -> [FilePath] -> [String] -> Pandoc -> m Pandoc
applyFilters mbDatadir filters args d = do
@ -814,6 +824,12 @@ options =
"PROGRAM")
"" -- "External JSON filter"
, Option "" ["lua-filter"]
(ReqArg
(\arg opt -> return opt { optLuaFilters = arg : optLuaFilters opt })
"SCRIPTPATH")
"" -- "Lua filter"
, Option "p" ["preserve-tabs"]
(NoArg
(\opt -> return opt { optPreserveTabs = True }))

226
src/Text/Pandoc/Lua.hs Normal file
View file

@ -0,0 +1,226 @@
{-
Copyright © 2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{- |
Module : Text.Pandoc.Lua
Copyright : Copyright © 2017 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
Stability : alpha
Pandoc lua utils.
-}
module Text.Pandoc.Lua ( runLuaFilter ) where
import Control.Monad ( (>=>), when )
import Control.Monad.Trans ( MonadIO(..) )
import Data.Aeson ( FromJSON(..), ToJSON(..), Result(..), Value, fromJSON )
import Data.HashMap.Lazy ( HashMap )
import Data.Text ( Text, pack, unpack )
import Data.Text.Encoding ( decodeUtf8 )
import Scripting.Lua ( LuaState, StackValue(..) )
import Scripting.Lua.Aeson ()
import Text.Pandoc.Definition ( Block(..), Inline(..), Pandoc(..) )
import Text.Pandoc.Lua.PandocModule
import Text.Pandoc.Walk
import qualified Data.HashMap.Lazy as HashMap
import qualified Scripting.Lua as Lua
import qualified Scripting.Lua as LuaAeson
runLuaFilter :: (MonadIO m)
=> FilePath -> [String] -> Pandoc -> m Pandoc
runLuaFilter filterPath args pd = liftIO $ do
lua <- LuaAeson.newstate
Lua.openlibs lua
Lua.newtable lua
Lua.setglobal lua "PANDOC_FILTER_FUNCTIONS" -- hack, store functions here
pushPandocModule lua
Lua.setglobal lua "pandoc"
status <- Lua.loadfile lua filterPath
if (status /= 0)
then do
luaErrMsg <- unpack . decodeUtf8 <$> Lua.tostring lua 1
error luaErrMsg
else do
Lua.call lua 0 1
Just luaFilters <- Lua.peek lua (-1)
Lua.push lua (map pack args)
Lua.setglobal lua "PandocParameters"
doc <- runAll luaFilters >=> luaFilter lua "filter_doc" $ pd
Lua.close lua
return doc
runAll :: [LuaFilter] -> Pandoc -> IO Pandoc
runAll [] = return
runAll (x:xs) = walkMWithLuaFilter x >=> runAll xs
luaFilter :: Lua.LuaState -> String -> Pandoc -> IO Pandoc
luaFilter lua luaFn x = do
fnExists <- isLuaFunction lua luaFn
if fnExists
then walkM (Lua.callfunc lua luaFn :: Pandoc -> IO Pandoc) x
else return x
walkMWithLuaFilter :: LuaFilter -> Pandoc -> IO Pandoc
walkMWithLuaFilter (LuaFilter lua inlineFnMap blockFnMap docFnMap) =
walkM (execInlineLuaFilter lua inlineFnMap) >=>
walkM (execBlockLuaFilter lua blockFnMap) >=>
walkM (execDocLuaFilter lua docFnMap)
type InlineFunctionMap = HashMap Text (LuaFilterFunction Inline)
type BlockFunctionMap = HashMap Text (LuaFilterFunction Block)
type DocFunctionMap = HashMap Text (LuaFilterFunction Pandoc)
data LuaFilter =
LuaFilter LuaState InlineFunctionMap BlockFunctionMap DocFunctionMap
newtype LuaFilterFunction a = LuaFilterFunction { functionIndex :: Int }
execDocLuaFilter :: LuaState
-> HashMap Text (LuaFilterFunction Pandoc)
-> Pandoc -> IO Pandoc
execDocLuaFilter lua fnMap x = do
let docFnName = "Doc"
case HashMap.lookup docFnName fnMap of
Nothing -> return x
Just fn -> runLuaFilterFunction lua fn x
execBlockLuaFilter :: LuaState
-> HashMap Text (LuaFilterFunction Block)
-> Block -> IO Block
execBlockLuaFilter lua fnMap x = do
let filterOrId constr = case HashMap.lookup constr fnMap of
Nothing -> return x
Just fn -> runLuaFilterFunction lua fn x
case x of
Plain _ -> filterOrId "Plain"
Para _ -> filterOrId "Para"
LineBlock _ -> filterOrId "LineBlock"
CodeBlock _ _ -> filterOrId "CodeBlock"
RawBlock _ _ -> filterOrId "RawBlock"
BlockQuote _ -> filterOrId "BlockQuote"
OrderedList _ _ -> filterOrId "OrderedList"
BulletList _ -> filterOrId "BulletList"
DefinitionList _ -> filterOrId "DefinitionList"
Header _ _ _ -> filterOrId "Header"
HorizontalRule -> filterOrId "HorizontalRule"
Table _ _ _ _ _ -> filterOrId "Table"
Div _ _ -> filterOrId "Div"
Null -> filterOrId "Null"
execInlineLuaFilter :: LuaState
-> HashMap Text (LuaFilterFunction Inline)
-> Inline -> IO Inline
execInlineLuaFilter lua fnMap x = do
let filterOrId constr = case HashMap.lookup constr fnMap of
Nothing -> return x
Just fn -> runLuaFilterFunction lua fn x
case x of
Cite _ _ -> filterOrId "Cite"
Code _ _ -> filterOrId "Code"
Emph _ -> filterOrId "Emph"
Image _ _ _ -> filterOrId "Image"
LineBreak -> filterOrId "LineBreak"
Link _ _ _ -> filterOrId "Link"
Math _ _ -> filterOrId "Math"
Note _ -> filterOrId "Note"
Quoted _ _ -> filterOrId "Quoted"
RawInline _ _ -> filterOrId "RawInline"
SmallCaps _ -> filterOrId "SmallCaps"
SoftBreak -> filterOrId "SoftBreak"
Space -> filterOrId "Space"
Span _ _ -> filterOrId "Span"
Str _ -> filterOrId "Str"
Strikeout _ -> filterOrId "Strikeout"
Strong _ -> filterOrId "Strong"
Subscript _ -> filterOrId "Subscript"
Superscript _ -> filterOrId "Superscript"
instance StackValue LuaFilter where
valuetype _ = Lua.TTABLE
push = undefined
peek lua i = do
-- TODO: find a more efficient way of doing this in a typesafe manner.
inlineFnMap <- Lua.peek lua i
blockFnMap <- Lua.peek lua i
docFnMap <- Lua.peek lua i
return $ LuaFilter lua <$> inlineFnMap <*> blockFnMap <*> docFnMap
runLuaFilterFunction :: (StackValue a)
=> LuaState -> LuaFilterFunction a -> a -> IO a
runLuaFilterFunction lua lf inline = do
pushFilterFunction lua lf
Lua.push lua inline
Lua.call lua 1 1
Just res <- Lua.peek lua (-1)
Lua.pop lua 1
return res
-- FIXME: use registry
pushFilterFunction :: Lua.LuaState -> LuaFilterFunction a -> IO ()
pushFilterFunction lua lf = do
Lua.getglobal lua "PANDOC_FILTER_FUNCTIONS"
Lua.rawgeti lua (-1) (functionIndex lf)
Lua.remove lua (-2) -- remove global from stack
instance StackValue (LuaFilterFunction a) where
valuetype _ = Lua.TFUNCTION
push lua v = pushFilterFunction lua v
peek lua i = do
isFn <- Lua.isfunction lua i
when (not isFn) (error $ "Not a function at index " ++ (show i))
Lua.pushvalue lua i
Lua.getglobal lua "PANDOC_FILTER_FUNCTIONS"
len <- Lua.objlen lua (-1)
Lua.insert lua (-2)
Lua.rawseti lua (-2) (len + 1)
Lua.pop lua 1
return . Just $ LuaFilterFunction (len + 1)
isLuaFunction :: Lua.LuaState -> String -> IO Bool
isLuaFunction lua fnName = do
Lua.getglobal lua fnName
res <- Lua.isfunction lua (-1)
Lua.pop lua (-1)
return res
maybeFromJson :: (FromJSON a) => Maybe Value -> Maybe a
maybeFromJson mv = fromJSON <$> mv >>= \case
Success x -> Just x
_ -> Nothing
instance StackValue Pandoc where
push lua = Lua.push lua . toJSON
peek lua i = maybeFromJson <$> peek lua i
valuetype _ = Lua.TTABLE
instance StackValue Block where
push lua = Lua.push lua . toJSON
peek lua i = maybeFromJson <$> peek lua i
valuetype _ = Lua.TTABLE
instance StackValue Inline where
push lua = Lua.push lua . toJSON
peek lua i = maybeFromJson <$> peek lua i
valuetype _ = Lua.TTABLE

View file

@ -0,0 +1,47 @@
{-
Copyright © 2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.Lua.PandocModule
Copyright : Copyright © 2017 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
Stability : alpha
Pandoc module for lua.
-}
module Text.Pandoc.Lua.PandocModule ( pushPandocModule ) where
import Data.ByteString.Char8 ( unpack )
import Scripting.Lua ( LuaState, loadstring, call)
import Text.Pandoc.Shared ( readDataFile )
-- | Push the "pandoc" on the lua stack.
pushPandocModule :: LuaState -> IO ()
pushPandocModule lua = do
script <- pandocModuleScript
status <- loadstring lua script "cn"
if (status /= 0)
then return ()
else do
call lua 0 1
-- | Get the string representation of the pandoc module
pandocModuleScript :: IO String
pandocModuleScript = unpack <$> readDataFile Nothing "pandoc.lua"

View file

@ -8,6 +8,7 @@ flags:
packages:
- '.'
extra-deps:
- hslua-aeson-0.1.0.2
- skylighting-0.3.1
- texmath-0.9.3
resolver: lts-8.4

34
test/Tests/Lua.hs Normal file
View file

@ -0,0 +1,34 @@
{-# Language OverloadedStrings #-}
module Tests.Lua ( tests ) where
import System.FilePath ((</>))
import Test.Tasty (TestTree)
import Test.Tasty.HUnit (Assertion, assertEqual, testCase)
import Text.Pandoc.Builder
import Text.Pandoc.Lua
tests :: [TestTree]
tests =
[ testCase "macro expansion via filter" $
assertFilterConversion "a '{{helloworld}}' string is expanded"
"strmacro.lua"
(doc . para $ str "{{helloworld}}")
(doc . para . emph $ str "Hello, World")
, testCase "convert all plains to paras" $
assertFilterConversion "plains become para"
"plain-to-para.lua"
(doc $ bulletList [plain (str "alfa"), plain (str "bravo")])
(doc $ bulletList [para (str "alfa"), para (str "bravo")])
, testCase "make hello world document" $
assertFilterConversion "Document contains 'Hello, World!'"
"hello-world-doc.lua"
(doc . para $ str "Hey!" <> linebreak <> str "What's up?")
(doc . para $ str "Hello," <> space <> str "World!")
]
assertFilterConversion :: String -> FilePath -> Pandoc -> Pandoc -> Assertion
assertFilterConversion msg filterPath docIn docExpected = do
docRes <- runLuaFilter ("lua" </> filterPath) [] docIn
assertEqual msg docExpected docRes

View file

@ -0,0 +1,10 @@
return {
{
Doc = function(doc)
local meta = {}
local hello = { pandoc.Str "Hello,", pandoc.Space(), pandoc.Str "World!" }
local blocks = { pandoc.Para(hello) }
return pandoc.Doc(blocks, meta)
end
}
}

View file

@ -0,0 +1,6 @@
return {
{ Plain = function (blk)
return pandoc.Para(blk.c)
end,
}
}

10
test/lua/strmacro.lua Normal file
View file

@ -0,0 +1,10 @@
return {
{ Str = function (inline)
if inline.c == "{{helloworld}}" then
return pandoc.Emph {pandoc.Str "Hello, World"}
else
return inline
end
end,
}
}

View file

@ -5,6 +5,7 @@ module Main where
import GHC.IO.Encoding
import Test.Tasty
import qualified Tests.Command
import qualified Tests.Lua
import qualified Tests.Old
import qualified Tests.Readers.Docx
import qualified Tests.Readers.EPUB
@ -61,6 +62,7 @@ tests = testGroup "pandoc tests" [ Tests.Command.tests
, testGroup "Txt2Tags" Tests.Readers.Txt2Tags.tests
, testGroup "EPUB" Tests.Readers.EPUB.tests
]
, testGroup "Lua filters" Tests.Lua.tests
]
main :: IO ()