Custom reader: pass list of sources instead of concatenated text

The first argument passed to Lua `Reader` functions is no longer a plain
string but a richer data structure. The structure can easily be
converted to a string by applying `tostring`, but is also a list with
elements that contain each the *text* and *name* of each input source as
a property of the respective name.

A small example is added to the custom reader documentation, showcasing
its use in a reader that creates a syntax-highlighted code block for
each source code file passed as input.

Existing readers must be updated.
This commit is contained in:
Albert Krewinkel 2021-12-08 19:06:48 +01:00 committed by John MacFarlane
parent bfb3118ebb
commit 83b5b79c0e
6 changed files with 104 additions and 15 deletions

View file

@ -186,5 +186,5 @@ G = P{ "Doc",
} }
function Reader(input, reader_options) function Reader(input, reader_options)
return lpeg.match(G, input) return lpeg.match(G, tostring(input))
end end

View file

@ -17,7 +17,7 @@ install any additional software to do this.
A custom reader is a Lua file that defines a function A custom reader is a Lua file that defines a function
called `Reader`, which takes two arguments: called `Reader`, which takes two arguments:
- a string, the raw input to be parsed - the raw input to be parsed, as a list of sources
- optionally, a table of reader options, e.g. - optionally, a table of reader options, e.g.
`{ columns = 62, standalone = true }`. `{ columns = 62, standalone = true }`.
@ -27,6 +27,16 @@ which is automatically in scope. (Indeed, all of the utility
functions that are available for [Lua filters] are available functions that are available for [Lua filters] are available
in custom readers, too.) in custom readers, too.)
Each source item corresponds to a file or stream passed to pandoc
containing its text and name. E.g., if a single file `input.txt`
is passed to pandoc, then the list of sources will contain just a
single element `s`, where `s.name == 'input.txt'` and `s.text`
contains the file contents as a string.
The sources list, as well as each of its elements, can be
converted to a string via the Lua standard library function
`tostring`.
[Lua filters]: https://pandoc.org/lua-filters.html [Lua filters]: https://pandoc.org/lua-filters.html
[`pandoc` module]: https://pandoc.org/lua-filters.html#module-pandoc [`pandoc` module]: https://pandoc.org/lua-filters.html#module-pandoc
@ -34,12 +44,20 @@ A minimal example would be
```lua ```lua
function Reader(input) function Reader(input)
return pandoc.Pandoc({ pandoc.CodeBlock(input) }) return pandoc.Pandoc({ pandoc.CodeBlock(tostring(input)) })
end end
``` ```
This just returns a document containing a big code block with This just returns a document containing a big code block with all
all of the input. of the input. Or, to create a separate code block for each input
file, one might write
``` lua
function Reader(input)
return pandoc.Pandoc(input:map(
function (s) return pandoc.CodeBlock(s.text) end))
end
```
In a nontrivial reader, you'll want to parse the input. In a nontrivial reader, you'll want to parse the input.
You can do this using standard Lua library functions You can do this using standard Lua library functions
@ -84,7 +102,7 @@ G = P{ "Pandoc",
} }
function Reader(input) function Reader(input)
return lpeg.match(G, input) return lpeg.match(G, tostring(input))
end end
``` ```
@ -277,7 +295,7 @@ function Reader(input, reader_options)
local refs = {} local refs = {}
local thisref = {} local thisref = {}
local ids = {} local ids = {}
for line in string.gmatch(input, "[^\n]*") do for line in string.gmatch(tostring(input), "[^\n]*") do
key, val = string.match(line, "([A-Z][A-Z0-9]) %- (.*)") key, val = string.match(line, "([A-Z][A-Z0-9]) %- (.*)")
if key == "ER" then if key == "ER" then
-- clean up fields -- clean up fields
@ -550,7 +568,7 @@ G = P{ "Doc",
} }
function Reader(input, reader_options) function Reader(input, reader_options)
return lpeg.match(G, input) return lpeg.match(G, tostring(input))
end end
``` ```
@ -614,7 +632,7 @@ end
function Reader(input) function Reader(input)
local parsed = json.decode(input) local parsed = json.decode(tostring(input))
local blocks = {} local blocks = {}
for _,entry in ipairs(parsed.data.children) do for _,entry in ipairs(parsed.data.children) do
@ -636,3 +654,24 @@ Similar code can be used to consume JSON output from other APIs.
Note that the content of the text fields is markdown, so we Note that the content of the text fields is markdown, so we
convert it using `pandoc.read()`. convert it using `pandoc.read()`.
# Example: syntax-highlighted code files
This is a reader that puts the content of each input file into a
code block, sets the file's extension as the block's class to
enable code highlighting, and places the filename as a header
above each code block.
``` lua
function to_code_block (source)
local _, lang = pandoc.path.split_extension(source.name)
return pandoc.Div{
pandoc.Header(1, source.name == '' and '<stdin>' or source.name),
pandoc.CodeBlock(source.text, {class=lang}),
}
end
function Reader (input, opts)
return pandoc.Pandoc(input:map(to_code_block))
end
```

View file

@ -694,6 +694,7 @@ library
Text.Pandoc.Lua.Marshal.Context, Text.Pandoc.Lua.Marshal.Context,
Text.Pandoc.Lua.Marshal.PandocError, Text.Pandoc.Lua.Marshal.PandocError,
Text.Pandoc.Lua.Marshal.ReaderOptions, Text.Pandoc.Lua.Marshal.ReaderOptions,
Text.Pandoc.Lua.Marshal.Sources,
Text.Pandoc.Lua.Module.MediaBag, Text.Pandoc.Lua.Module.MediaBag,
Text.Pandoc.Lua.Module.Pandoc, Text.Pandoc.Lua.Module.Pandoc,
Text.Pandoc.Lua.Module.System, Text.Pandoc.Lua.Module.System,

View file

@ -0,0 +1,46 @@
{-# LANGUAGE OverloadedStrings #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{- |
Module : Text.Pandoc.Lua.Marshaling.Sources
Copyright : © 2021 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
Marshal 'Sources'.
-}
module Text.Pandoc.Lua.Marshal.Sources
( pushSources
) where
import Data.Text (Text)
import HsLua as Lua
import Text.Pandoc.Lua.Marshal.List (newListMetatable)
import Text.Pandoc.Sources (Sources (..))
import Text.Parsec (SourcePos, sourceName)
-- | Pushes the 'Sources' as a list of lazy Lua objects.
pushSources :: LuaError e => Pusher e Sources
pushSources (Sources srcs) = do
pushList (pushUD typeSource) srcs
newListMetatable "pandoc Sources" $ do
pushName "__tostring"
pushHaskellFunction $ do
sources <- forcePeek $ peekList (peekUD typeSource) (nthBottom 1)
pushText . mconcat $ map snd sources
return 1
rawset (nth 3)
setmetatable (nth 2)
-- | Source object type.
typeSource :: LuaError e => DocumentedType e (SourcePos, Text)
typeSource = deftype "pandoc input source"
[ operation Tostring $ lambda
### liftPure snd
<#> udparam typeSource "srcs" "Source to print in native format"
=#> functionResult pushText "string" "Haskell representation"
]
[ readonly "name" "source name"
(pushString, sourceName . fst)
, readonly "text" "source text"
(pushText, snd)
]

View file

@ -22,7 +22,9 @@ import Text.Pandoc.Lua.Marshal.CommonState ()
import Text.Pandoc.Lua.Marshal.Context () import Text.Pandoc.Lua.Marshal.Context ()
import Text.Pandoc.Lua.Marshal.PandocError() import Text.Pandoc.Lua.Marshal.PandocError()
import Text.Pandoc.Lua.Marshal.ReaderOptions () import Text.Pandoc.Lua.Marshal.ReaderOptions ()
import Text.Pandoc.Lua.Marshal.Sources (pushSources)
import Text.Pandoc.Lua.ErrorConversion () import Text.Pandoc.Lua.ErrorConversion ()
import Text.Pandoc.Sources (Sources)
instance Pushable Pandoc where instance Pushable Pandoc where
push = pushPandoc push = pushPandoc
@ -109,3 +111,6 @@ instance Peekable Version where
instance {-# OVERLAPPING #-} Peekable Attr where instance {-# OVERLAPPING #-} Peekable Attr where
peek = forcePeek . peekAttr peek = forcePeek . peekAttr
instance Pushable Sources where
push = pushSources

View file

@ -17,7 +17,6 @@ Supports custom parsers written in Lua which produce a Pandoc AST.
module Text.Pandoc.Readers.Custom ( readCustom ) where module Text.Pandoc.Readers.Custom ( readCustom ) where
import Control.Exception import Control.Exception
import Control.Monad (when) import Control.Monad (when)
import Data.Text (Text)
import HsLua as Lua hiding (Operation (Div), render) import HsLua as Lua hiding (Operation (Div), render)
import HsLua.Class.Peekable (PeekError) import HsLua.Class.Peekable (PeekError)
import Control.Monad.IO.Class (MonadIO) import Control.Monad.IO.Class (MonadIO)
@ -26,13 +25,13 @@ import Text.Pandoc.Lua (Global (..), runLua, setGlobals)
import Text.Pandoc.Lua.Util (dofileWithTraceback) import Text.Pandoc.Lua.Util (dofileWithTraceback)
import Text.Pandoc.Options import Text.Pandoc.Options
import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Sources (ToSources(..), sourcesToText) import Text.Pandoc.Sources (Sources, ToSources(..))
-- | Convert custom markup to Pandoc. -- | Convert custom markup to Pandoc.
readCustom :: (PandocMonad m, MonadIO m, ToSources s) readCustom :: (PandocMonad m, MonadIO m, ToSources s)
=> FilePath -> ReaderOptions -> s -> m Pandoc => FilePath -> ReaderOptions -> s -> m Pandoc
readCustom luaFile opts sources = do readCustom luaFile opts srcs = do
let input = sourcesToText $ toSources sources let input = toSources srcs
let globals = [ PANDOC_SCRIPT_FILE luaFile ] let globals = [ PANDOC_SCRIPT_FILE luaFile ]
res <- runLua $ do res <- runLua $ do
setGlobals globals setGlobals globals
@ -47,8 +46,7 @@ readCustom luaFile opts sources = do
Right doc -> return doc Right doc -> return doc
parseCustom :: forall e. PeekError e parseCustom :: forall e. PeekError e
=> Text => Sources
-> ReaderOptions -> ReaderOptions
-> LuaE e Pandoc -> LuaE e Pandoc
parseCustom = invoke @e "Reader" parseCustom = invoke @e "Reader"