Custom reader: pass list of sources instead of concatenated text
The first argument passed to Lua `Reader` functions is no longer a plain string but a richer data structure. The structure can easily be converted to a string by applying `tostring`, but is also a list with elements that contain each the *text* and *name* of each input source as a property of the respective name. A small example is added to the custom reader documentation, showcasing its use in a reader that creates a syntax-highlighted code block for each source code file passed as input. Existing readers must be updated.
This commit is contained in:
parent
bfb3118ebb
commit
83b5b79c0e
6 changed files with 104 additions and 15 deletions
|
@ -186,5 +186,5 @@ G = P{ "Doc",
|
||||||
}
|
}
|
||||||
|
|
||||||
function Reader(input, reader_options)
|
function Reader(input, reader_options)
|
||||||
return lpeg.match(G, input)
|
return lpeg.match(G, tostring(input))
|
||||||
end
|
end
|
||||||
|
|
|
@ -17,7 +17,7 @@ install any additional software to do this.
|
||||||
A custom reader is a Lua file that defines a function
|
A custom reader is a Lua file that defines a function
|
||||||
called `Reader`, which takes two arguments:
|
called `Reader`, which takes two arguments:
|
||||||
|
|
||||||
- a string, the raw input to be parsed
|
- the raw input to be parsed, as a list of sources
|
||||||
- optionally, a table of reader options, e.g.
|
- optionally, a table of reader options, e.g.
|
||||||
`{ columns = 62, standalone = true }`.
|
`{ columns = 62, standalone = true }`.
|
||||||
|
|
||||||
|
@ -27,6 +27,16 @@ which is automatically in scope. (Indeed, all of the utility
|
||||||
functions that are available for [Lua filters] are available
|
functions that are available for [Lua filters] are available
|
||||||
in custom readers, too.)
|
in custom readers, too.)
|
||||||
|
|
||||||
|
Each source item corresponds to a file or stream passed to pandoc
|
||||||
|
containing its text and name. E.g., if a single file `input.txt`
|
||||||
|
is passed to pandoc, then the list of sources will contain just a
|
||||||
|
single element `s`, where `s.name == 'input.txt'` and `s.text`
|
||||||
|
contains the file contents as a string.
|
||||||
|
|
||||||
|
The sources list, as well as each of its elements, can be
|
||||||
|
converted to a string via the Lua standard library function
|
||||||
|
`tostring`.
|
||||||
|
|
||||||
[Lua filters]: https://pandoc.org/lua-filters.html
|
[Lua filters]: https://pandoc.org/lua-filters.html
|
||||||
[`pandoc` module]: https://pandoc.org/lua-filters.html#module-pandoc
|
[`pandoc` module]: https://pandoc.org/lua-filters.html#module-pandoc
|
||||||
|
|
||||||
|
@ -34,12 +44,20 @@ A minimal example would be
|
||||||
|
|
||||||
```lua
|
```lua
|
||||||
function Reader(input)
|
function Reader(input)
|
||||||
return pandoc.Pandoc({ pandoc.CodeBlock(input) })
|
return pandoc.Pandoc({ pandoc.CodeBlock(tostring(input)) })
|
||||||
end
|
end
|
||||||
```
|
```
|
||||||
|
|
||||||
This just returns a document containing a big code block with
|
This just returns a document containing a big code block with all
|
||||||
all of the input.
|
of the input. Or, to create a separate code block for each input
|
||||||
|
file, one might write
|
||||||
|
|
||||||
|
``` lua
|
||||||
|
function Reader(input)
|
||||||
|
return pandoc.Pandoc(input:map(
|
||||||
|
function (s) return pandoc.CodeBlock(s.text) end))
|
||||||
|
end
|
||||||
|
```
|
||||||
|
|
||||||
In a nontrivial reader, you'll want to parse the input.
|
In a nontrivial reader, you'll want to parse the input.
|
||||||
You can do this using standard Lua library functions
|
You can do this using standard Lua library functions
|
||||||
|
@ -84,7 +102,7 @@ G = P{ "Pandoc",
|
||||||
}
|
}
|
||||||
|
|
||||||
function Reader(input)
|
function Reader(input)
|
||||||
return lpeg.match(G, input)
|
return lpeg.match(G, tostring(input))
|
||||||
end
|
end
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -277,7 +295,7 @@ function Reader(input, reader_options)
|
||||||
local refs = {}
|
local refs = {}
|
||||||
local thisref = {}
|
local thisref = {}
|
||||||
local ids = {}
|
local ids = {}
|
||||||
for line in string.gmatch(input, "[^\n]*") do
|
for line in string.gmatch(tostring(input), "[^\n]*") do
|
||||||
key, val = string.match(line, "([A-Z][A-Z0-9]) %- (.*)")
|
key, val = string.match(line, "([A-Z][A-Z0-9]) %- (.*)")
|
||||||
if key == "ER" then
|
if key == "ER" then
|
||||||
-- clean up fields
|
-- clean up fields
|
||||||
|
@ -550,7 +568,7 @@ G = P{ "Doc",
|
||||||
}
|
}
|
||||||
|
|
||||||
function Reader(input, reader_options)
|
function Reader(input, reader_options)
|
||||||
return lpeg.match(G, input)
|
return lpeg.match(G, tostring(input))
|
||||||
end
|
end
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -614,7 +632,7 @@ end
|
||||||
|
|
||||||
function Reader(input)
|
function Reader(input)
|
||||||
|
|
||||||
local parsed = json.decode(input)
|
local parsed = json.decode(tostring(input))
|
||||||
local blocks = {}
|
local blocks = {}
|
||||||
|
|
||||||
for _,entry in ipairs(parsed.data.children) do
|
for _,entry in ipairs(parsed.data.children) do
|
||||||
|
@ -636,3 +654,24 @@ Similar code can be used to consume JSON output from other APIs.
|
||||||
Note that the content of the text fields is markdown, so we
|
Note that the content of the text fields is markdown, so we
|
||||||
convert it using `pandoc.read()`.
|
convert it using `pandoc.read()`.
|
||||||
|
|
||||||
|
|
||||||
|
# Example: syntax-highlighted code files
|
||||||
|
|
||||||
|
This is a reader that puts the content of each input file into a
|
||||||
|
code block, sets the file's extension as the block's class to
|
||||||
|
enable code highlighting, and places the filename as a header
|
||||||
|
above each code block.
|
||||||
|
|
||||||
|
``` lua
|
||||||
|
function to_code_block (source)
|
||||||
|
local _, lang = pandoc.path.split_extension(source.name)
|
||||||
|
return pandoc.Div{
|
||||||
|
pandoc.Header(1, source.name == '' and '<stdin>' or source.name),
|
||||||
|
pandoc.CodeBlock(source.text, {class=lang}),
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
function Reader (input, opts)
|
||||||
|
return pandoc.Pandoc(input:map(to_code_block))
|
||||||
|
end
|
||||||
|
```
|
||||||
|
|
|
@ -694,6 +694,7 @@ library
|
||||||
Text.Pandoc.Lua.Marshal.Context,
|
Text.Pandoc.Lua.Marshal.Context,
|
||||||
Text.Pandoc.Lua.Marshal.PandocError,
|
Text.Pandoc.Lua.Marshal.PandocError,
|
||||||
Text.Pandoc.Lua.Marshal.ReaderOptions,
|
Text.Pandoc.Lua.Marshal.ReaderOptions,
|
||||||
|
Text.Pandoc.Lua.Marshal.Sources,
|
||||||
Text.Pandoc.Lua.Module.MediaBag,
|
Text.Pandoc.Lua.Module.MediaBag,
|
||||||
Text.Pandoc.Lua.Module.Pandoc,
|
Text.Pandoc.Lua.Module.Pandoc,
|
||||||
Text.Pandoc.Lua.Module.System,
|
Text.Pandoc.Lua.Module.System,
|
||||||
|
|
46
src/Text/Pandoc/Lua/Marshal/Sources.hs
Normal file
46
src/Text/Pandoc/Lua/Marshal/Sources.hs
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
{-# OPTIONS_GHC -fno-warn-orphans #-}
|
||||||
|
{- |
|
||||||
|
Module : Text.Pandoc.Lua.Marshaling.Sources
|
||||||
|
Copyright : © 2021 Albert Krewinkel
|
||||||
|
License : GNU GPL, version 2 or above
|
||||||
|
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
|
||||||
|
|
||||||
|
Marshal 'Sources'.
|
||||||
|
-}
|
||||||
|
module Text.Pandoc.Lua.Marshal.Sources
|
||||||
|
( pushSources
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Data.Text (Text)
|
||||||
|
import HsLua as Lua
|
||||||
|
import Text.Pandoc.Lua.Marshal.List (newListMetatable)
|
||||||
|
import Text.Pandoc.Sources (Sources (..))
|
||||||
|
import Text.Parsec (SourcePos, sourceName)
|
||||||
|
|
||||||
|
-- | Pushes the 'Sources' as a list of lazy Lua objects.
|
||||||
|
pushSources :: LuaError e => Pusher e Sources
|
||||||
|
pushSources (Sources srcs) = do
|
||||||
|
pushList (pushUD typeSource) srcs
|
||||||
|
newListMetatable "pandoc Sources" $ do
|
||||||
|
pushName "__tostring"
|
||||||
|
pushHaskellFunction $ do
|
||||||
|
sources <- forcePeek $ peekList (peekUD typeSource) (nthBottom 1)
|
||||||
|
pushText . mconcat $ map snd sources
|
||||||
|
return 1
|
||||||
|
rawset (nth 3)
|
||||||
|
setmetatable (nth 2)
|
||||||
|
|
||||||
|
-- | Source object type.
|
||||||
|
typeSource :: LuaError e => DocumentedType e (SourcePos, Text)
|
||||||
|
typeSource = deftype "pandoc input source"
|
||||||
|
[ operation Tostring $ lambda
|
||||||
|
### liftPure snd
|
||||||
|
<#> udparam typeSource "srcs" "Source to print in native format"
|
||||||
|
=#> functionResult pushText "string" "Haskell representation"
|
||||||
|
]
|
||||||
|
[ readonly "name" "source name"
|
||||||
|
(pushString, sourceName . fst)
|
||||||
|
, readonly "text" "source text"
|
||||||
|
(pushText, snd)
|
||||||
|
]
|
|
@ -22,7 +22,9 @@ import Text.Pandoc.Lua.Marshal.CommonState ()
|
||||||
import Text.Pandoc.Lua.Marshal.Context ()
|
import Text.Pandoc.Lua.Marshal.Context ()
|
||||||
import Text.Pandoc.Lua.Marshal.PandocError()
|
import Text.Pandoc.Lua.Marshal.PandocError()
|
||||||
import Text.Pandoc.Lua.Marshal.ReaderOptions ()
|
import Text.Pandoc.Lua.Marshal.ReaderOptions ()
|
||||||
|
import Text.Pandoc.Lua.Marshal.Sources (pushSources)
|
||||||
import Text.Pandoc.Lua.ErrorConversion ()
|
import Text.Pandoc.Lua.ErrorConversion ()
|
||||||
|
import Text.Pandoc.Sources (Sources)
|
||||||
|
|
||||||
instance Pushable Pandoc where
|
instance Pushable Pandoc where
|
||||||
push = pushPandoc
|
push = pushPandoc
|
||||||
|
@ -109,3 +111,6 @@ instance Peekable Version where
|
||||||
|
|
||||||
instance {-# OVERLAPPING #-} Peekable Attr where
|
instance {-# OVERLAPPING #-} Peekable Attr where
|
||||||
peek = forcePeek . peekAttr
|
peek = forcePeek . peekAttr
|
||||||
|
|
||||||
|
instance Pushable Sources where
|
||||||
|
push = pushSources
|
||||||
|
|
|
@ -17,7 +17,6 @@ Supports custom parsers written in Lua which produce a Pandoc AST.
|
||||||
module Text.Pandoc.Readers.Custom ( readCustom ) where
|
module Text.Pandoc.Readers.Custom ( readCustom ) where
|
||||||
import Control.Exception
|
import Control.Exception
|
||||||
import Control.Monad (when)
|
import Control.Monad (when)
|
||||||
import Data.Text (Text)
|
|
||||||
import HsLua as Lua hiding (Operation (Div), render)
|
import HsLua as Lua hiding (Operation (Div), render)
|
||||||
import HsLua.Class.Peekable (PeekError)
|
import HsLua.Class.Peekable (PeekError)
|
||||||
import Control.Monad.IO.Class (MonadIO)
|
import Control.Monad.IO.Class (MonadIO)
|
||||||
|
@ -26,13 +25,13 @@ import Text.Pandoc.Lua (Global (..), runLua, setGlobals)
|
||||||
import Text.Pandoc.Lua.Util (dofileWithTraceback)
|
import Text.Pandoc.Lua.Util (dofileWithTraceback)
|
||||||
import Text.Pandoc.Options
|
import Text.Pandoc.Options
|
||||||
import Text.Pandoc.Class (PandocMonad)
|
import Text.Pandoc.Class (PandocMonad)
|
||||||
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
|
import Text.Pandoc.Sources (Sources, ToSources(..))
|
||||||
|
|
||||||
-- | Convert custom markup to Pandoc.
|
-- | Convert custom markup to Pandoc.
|
||||||
readCustom :: (PandocMonad m, MonadIO m, ToSources s)
|
readCustom :: (PandocMonad m, MonadIO m, ToSources s)
|
||||||
=> FilePath -> ReaderOptions -> s -> m Pandoc
|
=> FilePath -> ReaderOptions -> s -> m Pandoc
|
||||||
readCustom luaFile opts sources = do
|
readCustom luaFile opts srcs = do
|
||||||
let input = sourcesToText $ toSources sources
|
let input = toSources srcs
|
||||||
let globals = [ PANDOC_SCRIPT_FILE luaFile ]
|
let globals = [ PANDOC_SCRIPT_FILE luaFile ]
|
||||||
res <- runLua $ do
|
res <- runLua $ do
|
||||||
setGlobals globals
|
setGlobals globals
|
||||||
|
@ -47,8 +46,7 @@ readCustom luaFile opts sources = do
|
||||||
Right doc -> return doc
|
Right doc -> return doc
|
||||||
|
|
||||||
parseCustom :: forall e. PeekError e
|
parseCustom :: forall e. PeekError e
|
||||||
=> Text
|
=> Sources
|
||||||
-> ReaderOptions
|
-> ReaderOptions
|
||||||
-> LuaE e Pandoc
|
-> LuaE e Pandoc
|
||||||
parseCustom = invoke @e "Reader"
|
parseCustom = invoke @e "Reader"
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue