Lua: allow to pass custom reader options to pandoc.read

Reader options can now be passed as an optional third argument to
`pandoc.read`. The object can either be a table or a ReaderOptions value
like `PANDOC_READER_OPTIONS`. Creating new ReaderOptions objects is
possible through the new constructor `pandoc.ReaderOptions`.

Closes: #7656
This commit is contained in:
Albert Krewinkel 2021-11-06 11:00:26 +01:00 committed by John MacFarlane
parent ee2f0021f9
commit 6b462e5933
5 changed files with 178 additions and 45 deletions

View file

@ -2742,7 +2742,33 @@ format, and functions to filter and modify a subtree.
[`sha1`]{#pandoc.sha1} [`sha1`]{#pandoc.sha1}
: Alias for [`pandoc.utils.sha1`](#pandoc.utils.sha1) : Alias for [`pandoc.utils.sha1`](#pandoc.utils.sha1)
(DEPRECATED). (DEPRECATED, use `pandoc.utils.sha1` instead).
## Other constructors
[`ReaderOptions (opts)`]{#pandoc.readeroptions}
: Creates a new [ReaderOptions] value.
Parameters
`opts`:
: Either a table with a subset of the properties of a
[ReaderOptions] object, or another ReaderOptions object.
Uses the defaults specified in the manual for all
properties that are not explicitly specified. Throws an
error if a table contains properties which are not present
in a ReaderOptions object. ([ReaderOptions]|table)
Returns: new [ReaderOptions] object
Usage:
-- copy of the reader options that were defined on the command line.
local cli_opts = pandoc.ReaderOptions(PANDOC_READER_OPTIONS)
-- default reader options, but columns set to 66.
local short_colums_opts = pandoc.ReaderOptions {columns = 66}
## Helper functions ## Helper functions
@ -2815,17 +2841,23 @@ Returns: the transformed inline element
### read {#pandoc.read} ### read {#pandoc.read}
`read (markup[, format])` `read (markup[, format[, reader_options]])`
Parse the given string into a Pandoc document. Parse the given string into a Pandoc document.
Parameters: Parameters:
`markup`: `markup`:
: the markup to be parsed : the markup to be parsed (string)
`format`: `format`:
: format specification, defaults to `"markdown"`. : format specification, defaults to `"markdown"` (string)
`reader_options`:
: options passed to the reader; may be a ReaderOptions object or
a table with a subset of the keys and values of a
ReaderOptions object; defaults to the default values
documented in the manual. ([ReaderOptions]|table)
Returns: pandoc document Returns: pandoc document
@ -2838,6 +2870,8 @@ Usage:
-- The inline element in that block is an `Emph` -- The inline element in that block is an `Emph`
assert(block.content[1].t == "Emph") assert(block.content[1].t == "Emph")
[ReaderOptions]: #type-readeroptions
# Module pandoc.utils # Module pandoc.utils
This module exposes internal pandoc functions and utility This module exposes internal pandoc functions and utility

View file

@ -22,7 +22,7 @@ import Text.Pandoc.Definition (Pandoc (Pandoc), pandocTypesVersion)
import Text.Pandoc.Error (PandocError) import Text.Pandoc.Error (PandocError)
import Text.Pandoc.Lua.Marshaling () import Text.Pandoc.Lua.Marshaling ()
import Text.Pandoc.Lua.Marshaling.CommonState (pushCommonState) import Text.Pandoc.Lua.Marshaling.CommonState (pushCommonState)
import Text.Pandoc.Lua.Marshaling.ReaderOptions (pushReaderOptions) import Text.Pandoc.Lua.Marshaling.ReaderOptions (pushReaderOptionsReadonly)
import Text.Pandoc.Options (ReaderOptions) import Text.Pandoc.Options (ReaderOptions)
import qualified Data.Text as Text import qualified Data.Text as Text
@ -55,7 +55,7 @@ setGlobal global = case global of
pushUD typePandocLazy doc pushUD typePandocLazy doc
Lua.setglobal "PANDOC_DOCUMENT" Lua.setglobal "PANDOC_DOCUMENT"
PANDOC_READER_OPTIONS ropts -> do PANDOC_READER_OPTIONS ropts -> do
pushReaderOptions ropts pushReaderOptionsReadonly ropts
Lua.setglobal "PANDOC_READER_OPTIONS" Lua.setglobal "PANDOC_READER_OPTIONS"
PANDOC_SCRIPT_FILE filePath -> do PANDOC_SCRIPT_FILE filePath -> do
Lua.push filePath Lua.push filePath

View file

@ -1,3 +1,4 @@
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# OPTIONS_GHC -fno-warn-orphans #-} {-# OPTIONS_GHC -fno-warn-orphans #-}
@ -15,8 +16,10 @@ Marshaling instance for ReaderOptions and its components.
module Text.Pandoc.Lua.Marshaling.ReaderOptions module Text.Pandoc.Lua.Marshaling.ReaderOptions
( peekReaderOptions ( peekReaderOptions
, pushReaderOptions , pushReaderOptions
, pushReaderOptionsReadonly
) where ) where
import Data.Default (def)
import HsLua as Lua import HsLua as Lua
import Text.Pandoc.Lua.Marshaling.List (pushPandocList) import Text.Pandoc.Lua.Marshaling.List (pushPandocList)
import Text.Pandoc.Options (ReaderOptions (..)) import Text.Pandoc.Options (ReaderOptions (..))
@ -25,47 +28,103 @@ import Text.Pandoc.Options (ReaderOptions (..))
-- Reader Options -- Reader Options
-- --
-- | Retrieve a ReaderOptions value, either from a normal ReaderOptions
-- value, from a read-only object, or from a table with the same
-- keys as a ReaderOptions object.
peekReaderOptions :: LuaError e => Peeker e ReaderOptions peekReaderOptions :: LuaError e => Peeker e ReaderOptions
peekReaderOptions = peekUD typeReaderOptions peekReaderOptions = retrieving "ReaderOptions" . \idx ->
liftLua (ltype idx) >>= \case
TypeUserdata -> choice [ peekUD typeReaderOptions
, peekUD typeReaderOptionsReadonly
]
idx
TypeTable -> peekReaderOptionsTable idx
_ -> failPeek =<<
typeMismatchMessage "ReaderOptions userdata or table" idx
-- | Pushes a ReaderOptions value as userdata object.
pushReaderOptions :: LuaError e => Pusher e ReaderOptions pushReaderOptions :: LuaError e => Pusher e ReaderOptions
pushReaderOptions = pushUD typeReaderOptions pushReaderOptions = pushUD typeReaderOptions
typeReaderOptions :: LuaError e => DocumentedType e ReaderOptions -- | Pushes a ReaderOptions object, but makes it read-only.
typeReaderOptions = deftype "pandoc ReaderOptions" pushReaderOptionsReadonly :: LuaError e => Pusher e ReaderOptions
[ operation Tostring luaShow pushReaderOptionsReadonly = pushUD typeReaderOptionsReadonly
-- | ReaderOptions object type for read-only values.
typeReaderOptionsReadonly :: LuaError e => DocumentedType e ReaderOptions
typeReaderOptionsReadonly = deftype "ReaderOptions (read-only)"
[ operation Tostring $ lambda
### liftPure show
<#> udparam typeReaderOptions "opts" "options to print in native format"
=#> functionResult pushString "string" "Haskell representation"
, operation Newindex $ lambda
### (failLua "This ReaderOptions value is read-only.")
=?> "Throws an error when called, i.e., an assignment is made."
] ]
[ readonly "extensions" "" readerOptionsMembers
( pushString . show
, readerExtensions) -- | 'ReaderOptions' object type.
, readonly "standalone" "" typeReaderOptions :: LuaError e => DocumentedType e ReaderOptions
( pushBool typeReaderOptions = deftype "ReaderOptions"
, readerStandalone) [ operation Tostring $ lambda
, readonly "columns" "" ### liftPure show
( pushIntegral <#> udparam typeReaderOptions "opts" "options to print in native format"
, readerColumns) =#> functionResult pushString "string" "Haskell representation"
, readonly "tab_stop" "" ]
( pushIntegral readerOptionsMembers
, readerTabStop)
, readonly "indented_code_classes" "" -- | Member properties of 'ReaderOptions' Lua values.
( pushPandocList pushText readerOptionsMembers :: LuaError e
, readerIndentedCodeClasses) => [Member e (DocumentedFunction e) ReaderOptions]
, readonly "abbreviations" "" readerOptionsMembers =
( pushSet pushText [ property "abbreviations" ""
, readerAbbreviations) (pushSet pushText, readerAbbreviations)
, readonly "track_changes" "" (peekSet peekText, \opts x -> opts{ readerAbbreviations = x })
( pushString . show , property "columns" ""
, readerTrackChanges) (pushIntegral, readerColumns)
, readonly "strip_comments" "" (peekIntegral, \opts x -> opts{ readerColumns = x })
( pushBool , property "default_image_extension" ""
, readerStripComments) (pushText, readerDefaultImageExtension)
, readonly "default_image_extension" "" (peekText, \opts x -> opts{ readerDefaultImageExtension = x })
( pushText , property "extensions" ""
, readerDefaultImageExtension) (pushString . show, readerExtensions)
(peekRead, \opts x -> opts{ readerExtensions = x })
, property "indented_code_classes" ""
(pushPandocList pushText, readerIndentedCodeClasses)
(peekList peekText, \opts x -> opts{ readerIndentedCodeClasses = x })
, property "strip_comments" ""
(pushBool, readerStripComments)
(peekBool, \opts x -> opts{ readerStripComments = x })
, property "standalone" ""
(pushBool, readerStandalone)
(peekBool, \opts x -> opts{ readerStandalone = x })
, property "tab_stop" ""
(pushIntegral, readerTabStop)
(peekIntegral, \opts x -> opts{ readerTabStop = x })
, property "track_changes" ""
(pushString . show, readerTrackChanges)
(peekRead, \opts x -> opts{ readerTrackChanges = x })
] ]
luaShow :: LuaError e => DocumentedFunction e -- | Retrieves a 'ReaderOptions' object from a table on the stack, using
luaShow = defun "__tostring" -- the default values for all missing fields.
### liftPure show --
<#> udparam typeReaderOptions "state" "object to print in native format" -- Internally, this push the defaults reader options, sets each
=#> functionResult pushString "string" "Haskell representation" -- key/value pair of the table in the userdata value, then retrieves the
-- object again. This will update all fields and complain about unknown
-- keys.
peekReaderOptionsTable :: LuaError e => Peeker e ReaderOptions
peekReaderOptionsTable idx = retrieving "ReaderOptions (table)" $ do
liftLua $ do
absidx <- absindex idx
pushUD typeReaderOptions def
let setFields = do
next absidx >>= \case
False -> return () -- all fields were copied
True -> do
pushvalue (nth 2) *> insert (nth 2)
settable (nth 4) -- set in userdata object
setFields
pushnil -- first key
setFields
peekUD typeReaderOptions top

View file

@ -42,6 +42,8 @@ import Text.Pandoc.Lua.Marshaling.Attr (mkAttr, mkAttributeList)
import Text.Pandoc.Lua.Marshaling.List (List (..)) import Text.Pandoc.Lua.Marshaling.List (List (..))
import Text.Pandoc.Lua.Marshaling.ListAttributes ( mkListAttributes import Text.Pandoc.Lua.Marshaling.ListAttributes ( mkListAttributes
, peekListAttributes) , peekListAttributes)
import Text.Pandoc.Lua.Marshaling.ReaderOptions ( peekReaderOptions
, pushReaderOptions)
import Text.Pandoc.Lua.Marshaling.SimpleTable (mkSimpleTable) import Text.Pandoc.Lua.Marshaling.SimpleTable (mkSimpleTable)
import Text.Pandoc.Lua.Module.Utils (sha1) import Text.Pandoc.Lua.Module.Utils (sha1)
import Text.Pandoc.Lua.PandocLua (PandocLua, liftPandocLua, import Text.Pandoc.Lua.PandocLua (PandocLua, liftPandocLua,
@ -355,6 +357,12 @@ otherConstructors =
, mkAttributeList , mkAttributeList
, mkListAttributes , mkListAttributes
, mkSimpleTable , mkSimpleTable
, defun "ReaderOptions"
### liftPure id
<#> parameter peekReaderOptions "ReaderOptions|table" "opts" "reader options"
=#> functionResult pushReaderOptions "ReaderOptions" "new object"
#? "Creates a new ReaderOptions value."
] ]
stringConstants :: [Field e] stringConstants :: [Field e]
@ -405,10 +413,12 @@ functions =
=?> "output string, or error triple" =?> "output string, or error triple"
, defun "read" , defun "read"
### (\content mformatspec -> do ### (\content mformatspec mreaderOptions -> do
let formatSpec = fromMaybe "markdown" mformatspec let formatSpec = fromMaybe "markdown" mformatspec
readerOptions = fromMaybe def mreaderOptions
res <- Lua.liftIO . runIO $ getReader formatSpec >>= \case res <- Lua.liftIO . runIO $ getReader formatSpec >>= \case
(TextReader r, es) -> r def{ readerExtensions = es } content (TextReader r, es) -> r readerOptions{ readerExtensions = es }
content
_ -> throwError $ PandocSomeError _ -> throwError $ PandocSomeError
"Only textual formats are supported" "Only textual formats are supported"
case res of case res of
@ -422,6 +432,8 @@ functions =
throwM e) throwM e)
<#> parameter peekText "string" "content" "text to parse" <#> parameter peekText "string" "content" "text to parse"
<#> optionalParameter peekText "string" "formatspec" "format and extensions" <#> optionalParameter peekText "string" "formatspec" "format and extensions"
<#> optionalParameter peekReaderOptions "ReaderOptions" "reader_options"
"reader options"
=#> functionResult pushPandoc "Pandoc" "result document" =#> functionResult pushPandoc "Pandoc" "result document"
, sha1 , sha1

View file

@ -809,7 +809,25 @@ return {
) )
assert.are_same(expected_table, new_table) assert.are_same(expected_table, new_table)
end) end)
} },
group 'ReaderOptions' {
test('returns a userdata value', function ()
local opts = pandoc.ReaderOptions {}
assert.are_equal(type(opts), 'userdata')
end),
test('can construct from table', function ()
local opts = pandoc.ReaderOptions {columns = 66}
assert.are_equal(opts.columns, 66)
end),
test('can construct from other ReaderOptions value', function ()
local orig = pandoc.ReaderOptions{columns = 65}
local copy = pandoc.ReaderOptions(orig)
for k, v in pairs(orig) do
assert.are_same(copy[k], v)
end
assert.are_equal(copy.columns, 65)
end),
},
}, },
group 'clone' { group 'clone' {
@ -896,6 +914,16 @@ return {
'Extension empty_paragraphs not supported for gfm' 'Extension empty_paragraphs not supported for gfm'
) )
end), end),
test('read with other indented code classes', function()
local indented_code = ' return true'
local expected = pandoc.Pandoc({
pandoc.CodeBlock('return true', {class='foo'})
})
assert.are_same(
expected,
pandoc.read(indented_code, 'markdown', {indented_code_classes={'foo'}})
)
end),
test('failing read', function () test('failing read', function ()
assert.error_matches( assert.error_matches(
function () pandoc.read('foo', 'nosuchreader') end, function () pandoc.read('foo', 'nosuchreader') end,