From 6b462e59332242c18ea38a721ae672b88f33d621 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 6 Nov 2021 11:00:26 +0100 Subject: [PATCH] Lua: allow to pass custom reader options to `pandoc.read` Reader options can now be passed as an optional third argument to `pandoc.read`. The object can either be a table or a ReaderOptions value like `PANDOC_READER_OPTIONS`. Creating new ReaderOptions objects is possible through the new constructor `pandoc.ReaderOptions`. Closes: #7656 --- doc/lua-filters.md | 42 +++++- src/Text/Pandoc/Lua/Global.hs | 4 +- .../Pandoc/Lua/Marshaling/ReaderOptions.hs | 131 +++++++++++++----- src/Text/Pandoc/Lua/Module/Pandoc.hs | 16 ++- test/lua/module/pandoc.lua | 30 +++- 5 files changed, 178 insertions(+), 45 deletions(-) diff --git a/doc/lua-filters.md b/doc/lua-filters.md index e9a121f50..ba5f58120 100644 --- a/doc/lua-filters.md +++ b/doc/lua-filters.md @@ -2742,7 +2742,33 @@ format, and functions to filter and modify a subtree. [`sha1`]{#pandoc.sha1} : Alias for [`pandoc.utils.sha1`](#pandoc.utils.sha1) - (DEPRECATED). + (DEPRECATED, use `pandoc.utils.sha1` instead). + +## Other constructors + +[`ReaderOptions (opts)`]{#pandoc.readeroptions} + +: Creates a new [ReaderOptions] value. + + Parameters + + `opts`: + : Either a table with a subset of the properties of a + [ReaderOptions] object, or another ReaderOptions object. + Uses the defaults specified in the manual for all + properties that are not explicitly specified. Throws an + error if a table contains properties which are not present + in a ReaderOptions object. ([ReaderOptions]|table) + + Returns: new [ReaderOptions] object + + Usage: + + -- copy of the reader options that were defined on the command line. + local cli_opts = pandoc.ReaderOptions(PANDOC_READER_OPTIONS) + + -- default reader options, but columns set to 66. + local short_colums_opts = pandoc.ReaderOptions {columns = 66} ## Helper functions @@ -2815,17 +2841,23 @@ Returns: the transformed inline element ### read {#pandoc.read} -`read (markup[, format])` +`read (markup[, format[, reader_options]])` Parse the given string into a Pandoc document. Parameters: `markup`: -: the markup to be parsed +: the markup to be parsed (string) `format`: -: format specification, defaults to `"markdown"`. +: format specification, defaults to `"markdown"` (string) + +`reader_options`: +: options passed to the reader; may be a ReaderOptions object or + a table with a subset of the keys and values of a + ReaderOptions object; defaults to the default values + documented in the manual. ([ReaderOptions]|table) Returns: pandoc document @@ -2838,6 +2870,8 @@ Usage: -- The inline element in that block is an `Emph` assert(block.content[1].t == "Emph") +[ReaderOptions]: #type-readeroptions + # Module pandoc.utils This module exposes internal pandoc functions and utility diff --git a/src/Text/Pandoc/Lua/Global.hs b/src/Text/Pandoc/Lua/Global.hs index 23b3a8284..05510f45d 100644 --- a/src/Text/Pandoc/Lua/Global.hs +++ b/src/Text/Pandoc/Lua/Global.hs @@ -22,7 +22,7 @@ import Text.Pandoc.Definition (Pandoc (Pandoc), pandocTypesVersion) import Text.Pandoc.Error (PandocError) import Text.Pandoc.Lua.Marshaling () import Text.Pandoc.Lua.Marshaling.CommonState (pushCommonState) -import Text.Pandoc.Lua.Marshaling.ReaderOptions (pushReaderOptions) +import Text.Pandoc.Lua.Marshaling.ReaderOptions (pushReaderOptionsReadonly) import Text.Pandoc.Options (ReaderOptions) import qualified Data.Text as Text @@ -55,7 +55,7 @@ setGlobal global = case global of pushUD typePandocLazy doc Lua.setglobal "PANDOC_DOCUMENT" PANDOC_READER_OPTIONS ropts -> do - pushReaderOptions ropts + pushReaderOptionsReadonly ropts Lua.setglobal "PANDOC_READER_OPTIONS" PANDOC_SCRIPT_FILE filePath -> do Lua.push filePath diff --git a/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs b/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs index 2cc39ee3a..b19c209e8 100644 --- a/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs +++ b/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# OPTIONS_GHC -fno-warn-orphans #-} @@ -15,8 +16,10 @@ Marshaling instance for ReaderOptions and its components. module Text.Pandoc.Lua.Marshaling.ReaderOptions ( peekReaderOptions , pushReaderOptions + , pushReaderOptionsReadonly ) where +import Data.Default (def) import HsLua as Lua import Text.Pandoc.Lua.Marshaling.List (pushPandocList) import Text.Pandoc.Options (ReaderOptions (..)) @@ -25,47 +28,103 @@ import Text.Pandoc.Options (ReaderOptions (..)) -- Reader Options -- +-- | Retrieve a ReaderOptions value, either from a normal ReaderOptions +-- value, from a read-only object, or from a table with the same +-- keys as a ReaderOptions object. peekReaderOptions :: LuaError e => Peeker e ReaderOptions -peekReaderOptions = peekUD typeReaderOptions +peekReaderOptions = retrieving "ReaderOptions" . \idx -> + liftLua (ltype idx) >>= \case + TypeUserdata -> choice [ peekUD typeReaderOptions + , peekUD typeReaderOptionsReadonly + ] + idx + TypeTable -> peekReaderOptionsTable idx + _ -> failPeek =<< + typeMismatchMessage "ReaderOptions userdata or table" idx +-- | Pushes a ReaderOptions value as userdata object. pushReaderOptions :: LuaError e => Pusher e ReaderOptions pushReaderOptions = pushUD typeReaderOptions -typeReaderOptions :: LuaError e => DocumentedType e ReaderOptions -typeReaderOptions = deftype "pandoc ReaderOptions" - [ operation Tostring luaShow +-- | Pushes a ReaderOptions object, but makes it read-only. +pushReaderOptionsReadonly :: LuaError e => Pusher e ReaderOptions +pushReaderOptionsReadonly = pushUD typeReaderOptionsReadonly + +-- | ReaderOptions object type for read-only values. +typeReaderOptionsReadonly :: LuaError e => DocumentedType e ReaderOptions +typeReaderOptionsReadonly = deftype "ReaderOptions (read-only)" + [ operation Tostring $ lambda + ### liftPure show + <#> udparam typeReaderOptions "opts" "options to print in native format" + =#> functionResult pushString "string" "Haskell representation" + , operation Newindex $ lambda + ### (failLua "This ReaderOptions value is read-only.") + =?> "Throws an error when called, i.e., an assignment is made." ] - [ readonly "extensions" "" - ( pushString . show - , readerExtensions) - , readonly "standalone" "" - ( pushBool - , readerStandalone) - , readonly "columns" "" - ( pushIntegral - , readerColumns) - , readonly "tab_stop" "" - ( pushIntegral - , readerTabStop) - , readonly "indented_code_classes" "" - ( pushPandocList pushText - , readerIndentedCodeClasses) - , readonly "abbreviations" "" - ( pushSet pushText - , readerAbbreviations) - , readonly "track_changes" "" - ( pushString . show - , readerTrackChanges) - , readonly "strip_comments" "" - ( pushBool - , readerStripComments) - , readonly "default_image_extension" "" - ( pushText - , readerDefaultImageExtension) + readerOptionsMembers + +-- | 'ReaderOptions' object type. +typeReaderOptions :: LuaError e => DocumentedType e ReaderOptions +typeReaderOptions = deftype "ReaderOptions" + [ operation Tostring $ lambda + ### liftPure show + <#> udparam typeReaderOptions "opts" "options to print in native format" + =#> functionResult pushString "string" "Haskell representation" + ] + readerOptionsMembers + +-- | Member properties of 'ReaderOptions' Lua values. +readerOptionsMembers :: LuaError e + => [Member e (DocumentedFunction e) ReaderOptions] +readerOptionsMembers = + [ property "abbreviations" "" + (pushSet pushText, readerAbbreviations) + (peekSet peekText, \opts x -> opts{ readerAbbreviations = x }) + , property "columns" "" + (pushIntegral, readerColumns) + (peekIntegral, \opts x -> opts{ readerColumns = x }) + , property "default_image_extension" "" + (pushText, readerDefaultImageExtension) + (peekText, \opts x -> opts{ readerDefaultImageExtension = x }) + , property "extensions" "" + (pushString . show, readerExtensions) + (peekRead, \opts x -> opts{ readerExtensions = x }) + , property "indented_code_classes" "" + (pushPandocList pushText, readerIndentedCodeClasses) + (peekList peekText, \opts x -> opts{ readerIndentedCodeClasses = x }) + , property "strip_comments" "" + (pushBool, readerStripComments) + (peekBool, \opts x -> opts{ readerStripComments = x }) + , property "standalone" "" + (pushBool, readerStandalone) + (peekBool, \opts x -> opts{ readerStandalone = x }) + , property "tab_stop" "" + (pushIntegral, readerTabStop) + (peekIntegral, \opts x -> opts{ readerTabStop = x }) + , property "track_changes" "" + (pushString . show, readerTrackChanges) + (peekRead, \opts x -> opts{ readerTrackChanges = x }) ] -luaShow :: LuaError e => DocumentedFunction e -luaShow = defun "__tostring" - ### liftPure show - <#> udparam typeReaderOptions "state" "object to print in native format" - =#> functionResult pushString "string" "Haskell representation" +-- | Retrieves a 'ReaderOptions' object from a table on the stack, using +-- the default values for all missing fields. +-- +-- Internally, this push the defaults reader options, sets each +-- key/value pair of the table in the userdata value, then retrieves the +-- object again. This will update all fields and complain about unknown +-- keys. +peekReaderOptionsTable :: LuaError e => Peeker e ReaderOptions +peekReaderOptionsTable idx = retrieving "ReaderOptions (table)" $ do + liftLua $ do + absidx <- absindex idx + pushUD typeReaderOptions def + let setFields = do + next absidx >>= \case + False -> return () -- all fields were copied + True -> do + pushvalue (nth 2) *> insert (nth 2) + settable (nth 4) -- set in userdata object + setFields + pushnil -- first key + setFields + peekUD typeReaderOptions top diff --git a/src/Text/Pandoc/Lua/Module/Pandoc.hs b/src/Text/Pandoc/Lua/Module/Pandoc.hs index 33432b4d8..8f42a2988 100644 --- a/src/Text/Pandoc/Lua/Module/Pandoc.hs +++ b/src/Text/Pandoc/Lua/Module/Pandoc.hs @@ -42,6 +42,8 @@ import Text.Pandoc.Lua.Marshaling.Attr (mkAttr, mkAttributeList) import Text.Pandoc.Lua.Marshaling.List (List (..)) import Text.Pandoc.Lua.Marshaling.ListAttributes ( mkListAttributes , peekListAttributes) +import Text.Pandoc.Lua.Marshaling.ReaderOptions ( peekReaderOptions + , pushReaderOptions) import Text.Pandoc.Lua.Marshaling.SimpleTable (mkSimpleTable) import Text.Pandoc.Lua.Module.Utils (sha1) import Text.Pandoc.Lua.PandocLua (PandocLua, liftPandocLua, @@ -355,6 +357,12 @@ otherConstructors = , mkAttributeList , mkListAttributes , mkSimpleTable + + , defun "ReaderOptions" + ### liftPure id + <#> parameter peekReaderOptions "ReaderOptions|table" "opts" "reader options" + =#> functionResult pushReaderOptions "ReaderOptions" "new object" + #? "Creates a new ReaderOptions value." ] stringConstants :: [Field e] @@ -405,10 +413,12 @@ functions = =?> "output string, or error triple" , defun "read" - ### (\content mformatspec -> do + ### (\content mformatspec mreaderOptions -> do let formatSpec = fromMaybe "markdown" mformatspec + readerOptions = fromMaybe def mreaderOptions res <- Lua.liftIO . runIO $ getReader formatSpec >>= \case - (TextReader r, es) -> r def{ readerExtensions = es } content + (TextReader r, es) -> r readerOptions{ readerExtensions = es } + content _ -> throwError $ PandocSomeError "Only textual formats are supported" case res of @@ -422,6 +432,8 @@ functions = throwM e) <#> parameter peekText "string" "content" "text to parse" <#> optionalParameter peekText "string" "formatspec" "format and extensions" + <#> optionalParameter peekReaderOptions "ReaderOptions" "reader_options" + "reader options" =#> functionResult pushPandoc "Pandoc" "result document" , sha1 diff --git a/test/lua/module/pandoc.lua b/test/lua/module/pandoc.lua index 4da663f07..5a58914ef 100644 --- a/test/lua/module/pandoc.lua +++ b/test/lua/module/pandoc.lua @@ -809,7 +809,25 @@ return { ) assert.are_same(expected_table, new_table) end) - } + }, + group 'ReaderOptions' { + test('returns a userdata value', function () + local opts = pandoc.ReaderOptions {} + assert.are_equal(type(opts), 'userdata') + end), + test('can construct from table', function () + local opts = pandoc.ReaderOptions {columns = 66} + assert.are_equal(opts.columns, 66) + end), + test('can construct from other ReaderOptions value', function () + local orig = pandoc.ReaderOptions{columns = 65} + local copy = pandoc.ReaderOptions(orig) + for k, v in pairs(orig) do + assert.are_same(copy[k], v) + end + assert.are_equal(copy.columns, 65) + end), + }, }, group 'clone' { @@ -896,6 +914,16 @@ return { 'Extension empty_paragraphs not supported for gfm' ) end), + test('read with other indented code classes', function() + local indented_code = ' return true' + local expected = pandoc.Pandoc({ + pandoc.CodeBlock('return true', {class='foo'}) + }) + assert.are_same( + expected, + pandoc.read(indented_code, 'markdown', {indented_code_classes={'foo'}}) + ) + end), test('failing read', function () assert.error_matches( function () pandoc.read('foo', 'nosuchreader') end,