Add interface for custom readers written in Lua. (#7671)
New module Text.Pandoc.Readers.Custom, exporting readCustom [API change]. Users can now do `-f myreader.lua` and pandoc will treat the script myreader.lua as a custom reader, which parses an input string to a pandoc AST, using the pandoc module defined for Lua filters. A sample custom reader can be found in data/reader.lua. Closes #7669.
This commit is contained in:
parent
bac6ae9607
commit
ee2f0021f9
5 changed files with 135 additions and 14 deletions
36
MANUAL.txt
36
MANUAL.txt
|
@ -266,6 +266,7 @@ header when requesting a document from a URL:
|
|||
- `tikiwiki` ([TikiWiki markup])
|
||||
- `twiki` ([TWiki markup])
|
||||
- `vimwiki` ([Vimwiki])
|
||||
- the path of a custom Lua reader, see [Custom readers and writers] below
|
||||
:::
|
||||
|
||||
Extensions can be individually enabled or disabled by
|
||||
|
@ -338,7 +339,7 @@ header when requesting a document from a URL:
|
|||
- `tei` ([TEI Simple])
|
||||
- `xwiki` ([XWiki markup])
|
||||
- `zimwiki` ([ZimWiki markup])
|
||||
- the path of a custom Lua writer, see [Custom writers] below
|
||||
- the path of a custom Lua writer, see [Custom readers and writers] below
|
||||
:::
|
||||
|
||||
Note that `odt`, `docx`, `epub`, and `pdf` output will not be directed
|
||||
|
@ -6574,19 +6575,35 @@ With these custom styles, you can use your input document as a
|
|||
reference-doc while creating docx output (see below), and maintain the
|
||||
same styles in your input and output files.
|
||||
|
||||
# Custom writers
|
||||
# Custom readers and writers
|
||||
|
||||
Pandoc can be extended with custom writers written in [Lua]. (Pandoc
|
||||
includes a Lua interpreter, so Lua need not be installed separately.)
|
||||
Pandoc can be extended with custom readers and writers written
|
||||
in [Lua]. (Pandoc includes a Lua interpreter, so Lua need not
|
||||
be installed separately.)
|
||||
|
||||
To use a custom writer, simply specify the path to the Lua script
|
||||
in place of the output format. For example:
|
||||
To use a custom reader or writer, simply specify the path to the
|
||||
Lua script in place of the input or output format. For example:
|
||||
|
||||
pandoc -t data/sample.lua
|
||||
pandoc -f my_custom_markup_language.lua -t latex -s
|
||||
|
||||
Creating a custom writer requires writing a Lua function for each
|
||||
possible element in a pandoc document. To get a documented example
|
||||
which you can modify according to your needs, do
|
||||
A custom reader is a Lua script that defines one function,
|
||||
Reader, which takes a string as input and returns a Pandoc
|
||||
AST. See the [Lua filters documentation] for documentation
|
||||
of the functions that are available for creating pandoc
|
||||
AST elements. For parsing, the [lpeg] parsing library
|
||||
is available by default. To see a sample custom reader:
|
||||
|
||||
pandoc --print-default-data-file reader.lua
|
||||
|
||||
Reader options are available via the global variable
|
||||
`PANDOC_READER_OPTIONS`, as expalined in the [Lua filters
|
||||
documentation].
|
||||
|
||||
A custom writer is a Lua script that defines a function
|
||||
that specifies how to render each element in a Pandoc AST.
|
||||
To see a documented example which you can modify according
|
||||
to your needs:
|
||||
|
||||
pandoc --print-default-data-file sample.lua
|
||||
|
||||
|
@ -6598,6 +6615,7 @@ default template with the name
|
|||
subdirectory of your user data directory (see [Templates]).
|
||||
|
||||
[Lua]: https://www.lua.org
|
||||
[lpeg]: http://www.inf.puc-rio.br/~roberto/lpeg/
|
||||
|
||||
# Reproducible builds
|
||||
|
||||
|
|
44
data/reader.lua
Normal file
44
data/reader.lua
Normal file
|
@ -0,0 +1,44 @@
|
|||
-- A sample custom reader for a very simple markup language.
|
||||
-- This parses a document into paragraphs separated by blank lines.
|
||||
-- This is _{italic} and this is *{boldface}
|
||||
-- This is an escaped special character: \_, \*, \{, \}
|
||||
-- == text makes a level-2 heading
|
||||
-- That's it!
|
||||
|
||||
-- For better performance we put these functions in local variables:
|
||||
local P, S, R, Cf, Cc, Ct, V, Cs, Cg, Cb, B =
|
||||
lpeg.P, lpeg.S, lpeg.R, lpeg.Cf, lpeg.Cc, lpeg.Ct, lpeg.V,
|
||||
lpeg.Cs, lpeg.Cg, lpeg.Cb, lpeg.B
|
||||
|
||||
local whitespacechar = S(" \t\r\n")
|
||||
local specialchar = S("_*{}\\")
|
||||
local escapedchar = P"\\" * specialchar
|
||||
/ function (x) return string.sub(x,2) end
|
||||
local wordchar = (P(1) - (whitespacechar + specialchar)) + escapedchar
|
||||
local spacechar = S(" \t")
|
||||
local newline = P"\r"^-1 * P"\n"
|
||||
local blanklines = newline * spacechar^0 * newline^1
|
||||
local endline = newline - blanklines
|
||||
|
||||
-- Grammar
|
||||
G = P{ "Pandoc",
|
||||
Pandoc = blanklines^-1 * Ct(V"Block"^0) / pandoc.Pandoc;
|
||||
Block = V"Header" + V"Para";
|
||||
Para = Ct(V"Inline"^1) * blanklines^-1 / pandoc.Para;
|
||||
Header = Ct(Cg(P("=")^1 / function(x) return #x end, "length")
|
||||
* spacechar^1
|
||||
* Cg(Ct(V"Inline"^0), "contents")
|
||||
* blanklines^-1) /
|
||||
function(res) return pandoc.Header(res.length, res.contents) end;
|
||||
Inline = V"Emph" + V"Str" + V"Space" + V"SoftBreak" + V"Special" ;
|
||||
Str = wordchar^1 / pandoc.Str;
|
||||
Space = spacechar^1 / pandoc.Space;
|
||||
SoftBreak = endline / pandoc.SoftBreak;
|
||||
Emph = Ct(P"_{" * Cg(Ct((V"Inline" - P"}")^1), "contents") * P"}") /
|
||||
function(res) return pandoc.Emph(res.contents) end;
|
||||
Special = specialchar / pandoc.Str;
|
||||
}
|
||||
|
||||
function Reader(input)
|
||||
return lpeg.match(G, input)
|
||||
end
|
|
@ -642,6 +642,7 @@ library
|
|||
Text.Pandoc.Readers.Ipynb,
|
||||
Text.Pandoc.Readers.CSV,
|
||||
Text.Pandoc.Readers.RTF,
|
||||
Text.Pandoc.Readers.Custom,
|
||||
Text.Pandoc.Writers,
|
||||
Text.Pandoc.Writers.Native,
|
||||
Text.Pandoc.Writers.Docbook,
|
||||
|
|
|
@ -68,6 +68,7 @@ import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs,
|
|||
defaultUserDataDir, tshow)
|
||||
import Text.Pandoc.Writers.Shared (lookupMetaString)
|
||||
import Text.Pandoc.Readers.Markdown (yamlToMeta)
|
||||
import Text.Pandoc.Readers.Custom (readCustom)
|
||||
import qualified Text.Pandoc.UTF8 as UTF8
|
||||
#ifndef _WINDOWS
|
||||
import System.Posix.IO (stdOutput)
|
||||
|
@ -154,11 +155,13 @@ convertWithOpts opts = do
|
|||
-> ByteStringReader $ \o t -> sandbox files (r o t)
|
||||
|
||||
(reader, readerExts) <-
|
||||
if optSandbox opts
|
||||
then case runPure (getReader readerName) of
|
||||
Left e -> throwError e
|
||||
Right (r, rexts) -> return (makeSandboxed r, rexts)
|
||||
else getReader readerName
|
||||
if ".lua" `T.isSuffixOf` readerName
|
||||
then return (TextReader (readCustom (T.unpack readerName)), mempty)
|
||||
else if optSandbox opts
|
||||
then case runPure (getReader readerName) of
|
||||
Left e -> throwError e
|
||||
Right (r, rexts) -> return (makeSandboxed r, rexts)
|
||||
else getReader readerName
|
||||
|
||||
outputSettings <- optToOutputSettings opts
|
||||
let format = outputFormat outputSettings
|
||||
|
|
55
src/Text/Pandoc/Readers/Custom.hs
Normal file
55
src/Text/Pandoc/Readers/Custom.hs
Normal file
|
@ -0,0 +1,55 @@
|
|||
{-# LANGUAGE FlexibleContexts #-}
|
||||
{-# LANGUAGE FlexibleInstances #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE ScopedTypeVariables #-}
|
||||
{-# LANGUAGE TypeApplications #-}
|
||||
{- |
|
||||
Module : Text.Pandoc.Readers.Custom
|
||||
Copyright : Copyright (C) 2021 John MacFarlane
|
||||
License : GNU GPL, version 2 or above
|
||||
|
||||
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
||||
Stability : alpha
|
||||
Portability : portable
|
||||
|
||||
Supports custom parsers written in Lua which produce a Pandoc AST.
|
||||
-}
|
||||
module Text.Pandoc.Readers.Custom ( readCustom ) where
|
||||
import Control.Exception
|
||||
import Control.Monad (when)
|
||||
import Data.Text (Text)
|
||||
import HsLua as Lua hiding (Operation (Div), render)
|
||||
import HsLua.Class.Peekable (PeekError)
|
||||
import Control.Monad.IO.Class (MonadIO)
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Lua (Global (..), runLua, setGlobals)
|
||||
import Text.Pandoc.Lua.Util (dofileWithTraceback)
|
||||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Class (PandocMonad)
|
||||
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
|
||||
|
||||
-- | Convert custom markup to Pandoc.
|
||||
readCustom :: (PandocMonad m, MonadIO m, ToSources s)
|
||||
=> FilePath -> ReaderOptions -> s -> m Pandoc
|
||||
readCustom luaFile opts sources = do
|
||||
let input = sourcesToText $ toSources sources
|
||||
let globals = [ PANDOC_SCRIPT_FILE luaFile
|
||||
, PANDOC_READER_OPTIONS opts
|
||||
]
|
||||
res <- runLua $ do
|
||||
setGlobals globals
|
||||
stat <- dofileWithTraceback luaFile
|
||||
-- check for error in lua script (later we'll change the return type
|
||||
-- to handle this more gracefully):
|
||||
when (stat /= Lua.OK)
|
||||
Lua.throwErrorAsException
|
||||
parseCustom input
|
||||
case res of
|
||||
Left msg -> throw msg
|
||||
Right doc -> return doc
|
||||
|
||||
parseCustom :: forall e. PeekError e
|
||||
=> Text
|
||||
-> LuaE e Pandoc
|
||||
parseCustom = invoke @e "Reader"
|
||||
|
Loading…
Add table
Reference in a new issue