From 83b5b79c0e4f073198b5af11b9e8a0a4471fcd41 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 8 Dec 2021 19:06:48 +0100 Subject: [PATCH] Custom reader: pass list of sources instead of concatenated text The first argument passed to Lua `Reader` functions is no longer a plain string but a richer data structure. The structure can easily be converted to a string by applying `tostring`, but is also a list with elements that contain each the *text* and *name* of each input source as a property of the respective name. A small example is added to the custom reader documentation, showcasing its use in a reader that creates a syntax-highlighted code block for each source code file passed as input. Existing readers must be updated. --- data/creole.lua | 2 +- doc/custom-readers.md | 55 ++++++++++++++++++++++---- pandoc.cabal | 1 + src/Text/Pandoc/Lua/Marshal/Sources.hs | 46 +++++++++++++++++++++ src/Text/Pandoc/Lua/Orphans.hs | 5 +++ src/Text/Pandoc/Readers/Custom.hs | 10 ++--- 6 files changed, 104 insertions(+), 15 deletions(-) create mode 100644 src/Text/Pandoc/Lua/Marshal/Sources.hs diff --git a/data/creole.lua b/data/creole.lua index 5b7d7f554..590dfc871 100644 --- a/data/creole.lua +++ b/data/creole.lua @@ -186,5 +186,5 @@ G = P{ "Doc", } function Reader(input, reader_options) - return lpeg.match(G, input) + return lpeg.match(G, tostring(input)) end diff --git a/doc/custom-readers.md b/doc/custom-readers.md index afa0caa73..df2de2182 100644 --- a/doc/custom-readers.md +++ b/doc/custom-readers.md @@ -17,7 +17,7 @@ install any additional software to do this. A custom reader is a Lua file that defines a function called `Reader`, which takes two arguments: -- a string, the raw input to be parsed +- the raw input to be parsed, as a list of sources - optionally, a table of reader options, e.g. `{ columns = 62, standalone = true }`. @@ -27,6 +27,16 @@ which is automatically in scope. (Indeed, all of the utility functions that are available for [Lua filters] are available in custom readers, too.) +Each source item corresponds to a file or stream passed to pandoc +containing its text and name. E.g., if a single file `input.txt` +is passed to pandoc, then the list of sources will contain just a +single element `s`, where `s.name == 'input.txt'` and `s.text` +contains the file contents as a string. + +The sources list, as well as each of its elements, can be +converted to a string via the Lua standard library function +`tostring`. + [Lua filters]: https://pandoc.org/lua-filters.html [`pandoc` module]: https://pandoc.org/lua-filters.html#module-pandoc @@ -34,12 +44,20 @@ A minimal example would be ```lua function Reader(input) - return pandoc.Pandoc({ pandoc.CodeBlock(input) }) + return pandoc.Pandoc({ pandoc.CodeBlock(tostring(input)) }) end ``` -This just returns a document containing a big code block with -all of the input. +This just returns a document containing a big code block with all +of the input. Or, to create a separate code block for each input +file, one might write + +``` lua +function Reader(input) + return pandoc.Pandoc(input:map( + function (s) return pandoc.CodeBlock(s.text) end)) +end +``` In a nontrivial reader, you'll want to parse the input. You can do this using standard Lua library functions @@ -84,7 +102,7 @@ G = P{ "Pandoc", } function Reader(input) - return lpeg.match(G, input) + return lpeg.match(G, tostring(input)) end ``` @@ -277,7 +295,7 @@ function Reader(input, reader_options) local refs = {} local thisref = {} local ids = {} - for line in string.gmatch(input, "[^\n]*") do + for line in string.gmatch(tostring(input), "[^\n]*") do key, val = string.match(line, "([A-Z][A-Z0-9]) %- (.*)") if key == "ER" then -- clean up fields @@ -550,7 +568,7 @@ G = P{ "Doc", } function Reader(input, reader_options) - return lpeg.match(G, input) + return lpeg.match(G, tostring(input)) end ``` @@ -614,7 +632,7 @@ end function Reader(input) - local parsed = json.decode(input) + local parsed = json.decode(tostring(input)) local blocks = {} for _,entry in ipairs(parsed.data.children) do @@ -636,3 +654,24 @@ Similar code can be used to consume JSON output from other APIs. Note that the content of the text fields is markdown, so we convert it using `pandoc.read()`. + +# Example: syntax-highlighted code files + +This is a reader that puts the content of each input file into a +code block, sets the file's extension as the block's class to +enable code highlighting, and places the filename as a header +above each code block. + +``` lua +function to_code_block (source) + local _, lang = pandoc.path.split_extension(source.name) + return pandoc.Div{ + pandoc.Header(1, source.name == '' and '' or source.name), + pandoc.CodeBlock(source.text, {class=lang}), + } +end + +function Reader (input, opts) + return pandoc.Pandoc(input:map(to_code_block)) +end +``` diff --git a/pandoc.cabal b/pandoc.cabal index dcf12bf04..92513b911 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -694,6 +694,7 @@ library Text.Pandoc.Lua.Marshal.Context, Text.Pandoc.Lua.Marshal.PandocError, Text.Pandoc.Lua.Marshal.ReaderOptions, + Text.Pandoc.Lua.Marshal.Sources, Text.Pandoc.Lua.Module.MediaBag, Text.Pandoc.Lua.Module.Pandoc, Text.Pandoc.Lua.Module.System, diff --git a/src/Text/Pandoc/Lua/Marshal/Sources.hs b/src/Text/Pandoc/Lua/Marshal/Sources.hs new file mode 100644 index 000000000..7b5262ab5 --- /dev/null +++ b/src/Text/Pandoc/Lua/Marshal/Sources.hs @@ -0,0 +1,46 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} +{- | +Module : Text.Pandoc.Lua.Marshaling.Sources +Copyright : © 2021 Albert Krewinkel +License : GNU GPL, version 2 or above +Maintainer : Albert Krewinkel + +Marshal 'Sources'. +-} +module Text.Pandoc.Lua.Marshal.Sources + ( pushSources + ) where + +import Data.Text (Text) +import HsLua as Lua +import Text.Pandoc.Lua.Marshal.List (newListMetatable) +import Text.Pandoc.Sources (Sources (..)) +import Text.Parsec (SourcePos, sourceName) + +-- | Pushes the 'Sources' as a list of lazy Lua objects. +pushSources :: LuaError e => Pusher e Sources +pushSources (Sources srcs) = do + pushList (pushUD typeSource) srcs + newListMetatable "pandoc Sources" $ do + pushName "__tostring" + pushHaskellFunction $ do + sources <- forcePeek $ peekList (peekUD typeSource) (nthBottom 1) + pushText . mconcat $ map snd sources + return 1 + rawset (nth 3) + setmetatable (nth 2) + +-- | Source object type. +typeSource :: LuaError e => DocumentedType e (SourcePos, Text) +typeSource = deftype "pandoc input source" + [ operation Tostring $ lambda + ### liftPure snd + <#> udparam typeSource "srcs" "Source to print in native format" + =#> functionResult pushText "string" "Haskell representation" + ] + [ readonly "name" "source name" + (pushString, sourceName . fst) + , readonly "text" "source text" + (pushText, snd) + ] diff --git a/src/Text/Pandoc/Lua/Orphans.hs b/src/Text/Pandoc/Lua/Orphans.hs index eef05bd27..d5b8f2c5d 100644 --- a/src/Text/Pandoc/Lua/Orphans.hs +++ b/src/Text/Pandoc/Lua/Orphans.hs @@ -22,7 +22,9 @@ import Text.Pandoc.Lua.Marshal.CommonState () import Text.Pandoc.Lua.Marshal.Context () import Text.Pandoc.Lua.Marshal.PandocError() import Text.Pandoc.Lua.Marshal.ReaderOptions () +import Text.Pandoc.Lua.Marshal.Sources (pushSources) import Text.Pandoc.Lua.ErrorConversion () +import Text.Pandoc.Sources (Sources) instance Pushable Pandoc where push = pushPandoc @@ -109,3 +111,6 @@ instance Peekable Version where instance {-# OVERLAPPING #-} Peekable Attr where peek = forcePeek . peekAttr + +instance Pushable Sources where + push = pushSources diff --git a/src/Text/Pandoc/Readers/Custom.hs b/src/Text/Pandoc/Readers/Custom.hs index d7336012b..7b6c99ed8 100644 --- a/src/Text/Pandoc/Readers/Custom.hs +++ b/src/Text/Pandoc/Readers/Custom.hs @@ -17,7 +17,6 @@ Supports custom parsers written in Lua which produce a Pandoc AST. module Text.Pandoc.Readers.Custom ( readCustom ) where import Control.Exception import Control.Monad (when) -import Data.Text (Text) import HsLua as Lua hiding (Operation (Div), render) import HsLua.Class.Peekable (PeekError) import Control.Monad.IO.Class (MonadIO) @@ -26,13 +25,13 @@ import Text.Pandoc.Lua (Global (..), runLua, setGlobals) import Text.Pandoc.Lua.Util (dofileWithTraceback) import Text.Pandoc.Options import Text.Pandoc.Class (PandocMonad) -import Text.Pandoc.Sources (ToSources(..), sourcesToText) +import Text.Pandoc.Sources (Sources, ToSources(..)) -- | Convert custom markup to Pandoc. readCustom :: (PandocMonad m, MonadIO m, ToSources s) => FilePath -> ReaderOptions -> s -> m Pandoc -readCustom luaFile opts sources = do - let input = sourcesToText $ toSources sources +readCustom luaFile opts srcs = do + let input = toSources srcs let globals = [ PANDOC_SCRIPT_FILE luaFile ] res <- runLua $ do setGlobals globals @@ -47,8 +46,7 @@ readCustom luaFile opts sources = do Right doc -> return doc parseCustom :: forall e. PeekError e - => Text + => Sources -> ReaderOptions -> LuaE e Pandoc parseCustom = invoke @e "Reader" -