Lua: allow to pass custom reader options to pandoc.read

Reader options can now be passed as an optional third argument to
`pandoc.read`. The object can either be a table or a ReaderOptions value
like `PANDOC_READER_OPTIONS`. Creating new ReaderOptions objects is
possible through the new constructor `pandoc.ReaderOptions`.

Closes: #7656
This commit is contained in:
Albert Krewinkel 2021-11-06 11:00:26 +01:00 committed by John MacFarlane
parent ee2f0021f9
commit 6b462e5933
5 changed files with 178 additions and 45 deletions

View file

@ -2742,7 +2742,33 @@ format, and functions to filter and modify a subtree.
[`sha1`]{#pandoc.sha1}
: Alias for [`pandoc.utils.sha1`](#pandoc.utils.sha1)
(DEPRECATED).
(DEPRECATED, use `pandoc.utils.sha1` instead).
## Other constructors
[`ReaderOptions (opts)`]{#pandoc.readeroptions}
: Creates a new [ReaderOptions] value.
Parameters
`opts`:
: Either a table with a subset of the properties of a
[ReaderOptions] object, or another ReaderOptions object.
Uses the defaults specified in the manual for all
properties that are not explicitly specified. Throws an
error if a table contains properties which are not present
in a ReaderOptions object. ([ReaderOptions]|table)
Returns: new [ReaderOptions] object
Usage:
-- copy of the reader options that were defined on the command line.
local cli_opts = pandoc.ReaderOptions(PANDOC_READER_OPTIONS)
-- default reader options, but columns set to 66.
local short_colums_opts = pandoc.ReaderOptions {columns = 66}
## Helper functions
@ -2815,17 +2841,23 @@ Returns: the transformed inline element
### read {#pandoc.read}
`read (markup[, format])`
`read (markup[, format[, reader_options]])`
Parse the given string into a Pandoc document.
Parameters:
`markup`:
: the markup to be parsed
: the markup to be parsed (string)
`format`:
: format specification, defaults to `"markdown"`.
: format specification, defaults to `"markdown"` (string)
`reader_options`:
: options passed to the reader; may be a ReaderOptions object or
a table with a subset of the keys and values of a
ReaderOptions object; defaults to the default values
documented in the manual. ([ReaderOptions]|table)
Returns: pandoc document
@ -2838,6 +2870,8 @@ Usage:
-- The inline element in that block is an `Emph`
assert(block.content[1].t == "Emph")
[ReaderOptions]: #type-readeroptions
# Module pandoc.utils
This module exposes internal pandoc functions and utility

View file

@ -22,7 +22,7 @@ import Text.Pandoc.Definition (Pandoc (Pandoc), pandocTypesVersion)
import Text.Pandoc.Error (PandocError)
import Text.Pandoc.Lua.Marshaling ()
import Text.Pandoc.Lua.Marshaling.CommonState (pushCommonState)
import Text.Pandoc.Lua.Marshaling.ReaderOptions (pushReaderOptions)
import Text.Pandoc.Lua.Marshaling.ReaderOptions (pushReaderOptionsReadonly)
import Text.Pandoc.Options (ReaderOptions)
import qualified Data.Text as Text
@ -55,7 +55,7 @@ setGlobal global = case global of
pushUD typePandocLazy doc
Lua.setglobal "PANDOC_DOCUMENT"
PANDOC_READER_OPTIONS ropts -> do
pushReaderOptions ropts
pushReaderOptionsReadonly ropts
Lua.setglobal "PANDOC_READER_OPTIONS"
PANDOC_SCRIPT_FILE filePath -> do
Lua.push filePath

View file

@ -1,3 +1,4 @@
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
@ -15,8 +16,10 @@ Marshaling instance for ReaderOptions and its components.
module Text.Pandoc.Lua.Marshaling.ReaderOptions
( peekReaderOptions
, pushReaderOptions
, pushReaderOptionsReadonly
) where
import Data.Default (def)
import HsLua as Lua
import Text.Pandoc.Lua.Marshaling.List (pushPandocList)
import Text.Pandoc.Options (ReaderOptions (..))
@ -25,47 +28,103 @@ import Text.Pandoc.Options (ReaderOptions (..))
-- Reader Options
--
-- | Retrieve a ReaderOptions value, either from a normal ReaderOptions
-- value, from a read-only object, or from a table with the same
-- keys as a ReaderOptions object.
peekReaderOptions :: LuaError e => Peeker e ReaderOptions
peekReaderOptions = peekUD typeReaderOptions
peekReaderOptions = retrieving "ReaderOptions" . \idx ->
liftLua (ltype idx) >>= \case
TypeUserdata -> choice [ peekUD typeReaderOptions
, peekUD typeReaderOptionsReadonly
]
idx
TypeTable -> peekReaderOptionsTable idx
_ -> failPeek =<<
typeMismatchMessage "ReaderOptions userdata or table" idx
-- | Pushes a ReaderOptions value as userdata object.
pushReaderOptions :: LuaError e => Pusher e ReaderOptions
pushReaderOptions = pushUD typeReaderOptions
typeReaderOptions :: LuaError e => DocumentedType e ReaderOptions
typeReaderOptions = deftype "pandoc ReaderOptions"
[ operation Tostring luaShow
-- | Pushes a ReaderOptions object, but makes it read-only.
pushReaderOptionsReadonly :: LuaError e => Pusher e ReaderOptions
pushReaderOptionsReadonly = pushUD typeReaderOptionsReadonly
-- | ReaderOptions object type for read-only values.
typeReaderOptionsReadonly :: LuaError e => DocumentedType e ReaderOptions
typeReaderOptionsReadonly = deftype "ReaderOptions (read-only)"
[ operation Tostring $ lambda
### liftPure show
<#> udparam typeReaderOptions "opts" "options to print in native format"
=#> functionResult pushString "string" "Haskell representation"
, operation Newindex $ lambda
### (failLua "This ReaderOptions value is read-only.")
=?> "Throws an error when called, i.e., an assignment is made."
]
[ readonly "extensions" ""
( pushString . show
, readerExtensions)
, readonly "standalone" ""
( pushBool
, readerStandalone)
, readonly "columns" ""
( pushIntegral
, readerColumns)
, readonly "tab_stop" ""
( pushIntegral
, readerTabStop)
, readonly "indented_code_classes" ""
( pushPandocList pushText
, readerIndentedCodeClasses)
, readonly "abbreviations" ""
( pushSet pushText
, readerAbbreviations)
, readonly "track_changes" ""
( pushString . show
, readerTrackChanges)
, readonly "strip_comments" ""
( pushBool
, readerStripComments)
, readonly "default_image_extension" ""
( pushText
, readerDefaultImageExtension)
readerOptionsMembers
-- | 'ReaderOptions' object type.
typeReaderOptions :: LuaError e => DocumentedType e ReaderOptions
typeReaderOptions = deftype "ReaderOptions"
[ operation Tostring $ lambda
### liftPure show
<#> udparam typeReaderOptions "opts" "options to print in native format"
=#> functionResult pushString "string" "Haskell representation"
]
readerOptionsMembers
-- | Member properties of 'ReaderOptions' Lua values.
readerOptionsMembers :: LuaError e
=> [Member e (DocumentedFunction e) ReaderOptions]
readerOptionsMembers =
[ property "abbreviations" ""
(pushSet pushText, readerAbbreviations)
(peekSet peekText, \opts x -> opts{ readerAbbreviations = x })
, property "columns" ""
(pushIntegral, readerColumns)
(peekIntegral, \opts x -> opts{ readerColumns = x })
, property "default_image_extension" ""
(pushText, readerDefaultImageExtension)
(peekText, \opts x -> opts{ readerDefaultImageExtension = x })
, property "extensions" ""
(pushString . show, readerExtensions)
(peekRead, \opts x -> opts{ readerExtensions = x })
, property "indented_code_classes" ""
(pushPandocList pushText, readerIndentedCodeClasses)
(peekList peekText, \opts x -> opts{ readerIndentedCodeClasses = x })
, property "strip_comments" ""
(pushBool, readerStripComments)
(peekBool, \opts x -> opts{ readerStripComments = x })
, property "standalone" ""
(pushBool, readerStandalone)
(peekBool, \opts x -> opts{ readerStandalone = x })
, property "tab_stop" ""
(pushIntegral, readerTabStop)
(peekIntegral, \opts x -> opts{ readerTabStop = x })
, property "track_changes" ""
(pushString . show, readerTrackChanges)
(peekRead, \opts x -> opts{ readerTrackChanges = x })
]
luaShow :: LuaError e => DocumentedFunction e
luaShow = defun "__tostring"
### liftPure show
<#> udparam typeReaderOptions "state" "object to print in native format"
=#> functionResult pushString "string" "Haskell representation"
-- | Retrieves a 'ReaderOptions' object from a table on the stack, using
-- the default values for all missing fields.
--
-- Internally, this push the defaults reader options, sets each
-- key/value pair of the table in the userdata value, then retrieves the
-- object again. This will update all fields and complain about unknown
-- keys.
peekReaderOptionsTable :: LuaError e => Peeker e ReaderOptions
peekReaderOptionsTable idx = retrieving "ReaderOptions (table)" $ do
liftLua $ do
absidx <- absindex idx
pushUD typeReaderOptions def
let setFields = do
next absidx >>= \case
False -> return () -- all fields were copied
True -> do
pushvalue (nth 2) *> insert (nth 2)
settable (nth 4) -- set in userdata object
setFields
pushnil -- first key
setFields
peekUD typeReaderOptions top

View file

@ -42,6 +42,8 @@ import Text.Pandoc.Lua.Marshaling.Attr (mkAttr, mkAttributeList)
import Text.Pandoc.Lua.Marshaling.List (List (..))
import Text.Pandoc.Lua.Marshaling.ListAttributes ( mkListAttributes
, peekListAttributes)
import Text.Pandoc.Lua.Marshaling.ReaderOptions ( peekReaderOptions
, pushReaderOptions)
import Text.Pandoc.Lua.Marshaling.SimpleTable (mkSimpleTable)
import Text.Pandoc.Lua.Module.Utils (sha1)
import Text.Pandoc.Lua.PandocLua (PandocLua, liftPandocLua,
@ -355,6 +357,12 @@ otherConstructors =
, mkAttributeList
, mkListAttributes
, mkSimpleTable
, defun "ReaderOptions"
### liftPure id
<#> parameter peekReaderOptions "ReaderOptions|table" "opts" "reader options"
=#> functionResult pushReaderOptions "ReaderOptions" "new object"
#? "Creates a new ReaderOptions value."
]
stringConstants :: [Field e]
@ -405,10 +413,12 @@ functions =
=?> "output string, or error triple"
, defun "read"
### (\content mformatspec -> do
### (\content mformatspec mreaderOptions -> do
let formatSpec = fromMaybe "markdown" mformatspec
readerOptions = fromMaybe def mreaderOptions
res <- Lua.liftIO . runIO $ getReader formatSpec >>= \case
(TextReader r, es) -> r def{ readerExtensions = es } content
(TextReader r, es) -> r readerOptions{ readerExtensions = es }
content
_ -> throwError $ PandocSomeError
"Only textual formats are supported"
case res of
@ -422,6 +432,8 @@ functions =
throwM e)
<#> parameter peekText "string" "content" "text to parse"
<#> optionalParameter peekText "string" "formatspec" "format and extensions"
<#> optionalParameter peekReaderOptions "ReaderOptions" "reader_options"
"reader options"
=#> functionResult pushPandoc "Pandoc" "result document"
, sha1

View file

@ -809,7 +809,25 @@ return {
)
assert.are_same(expected_table, new_table)
end)
}
},
group 'ReaderOptions' {
test('returns a userdata value', function ()
local opts = pandoc.ReaderOptions {}
assert.are_equal(type(opts), 'userdata')
end),
test('can construct from table', function ()
local opts = pandoc.ReaderOptions {columns = 66}
assert.are_equal(opts.columns, 66)
end),
test('can construct from other ReaderOptions value', function ()
local orig = pandoc.ReaderOptions{columns = 65}
local copy = pandoc.ReaderOptions(orig)
for k, v in pairs(orig) do
assert.are_same(copy[k], v)
end
assert.are_equal(copy.columns, 65)
end),
},
},
group 'clone' {
@ -896,6 +914,16 @@ return {
'Extension empty_paragraphs not supported for gfm'
)
end),
test('read with other indented code classes', function()
local indented_code = ' return true'
local expected = pandoc.Pandoc({
pandoc.CodeBlock('return true', {class='foo'})
})
assert.are_same(
expected,
pandoc.read(indented_code, 'markdown', {indented_code_classes={'foo'}})
)
end),
test('failing read', function ()
assert.error_matches(
function () pandoc.read('foo', 'nosuchreader') end,