Replace old sample custom reader with a full-featured reader for creole.

This is better as an example.  And it is faster than pandoc's
regular creole parser, which shows that high-performance readers
can be developed this way.
This commit is contained in:
John MacFarlane 2021-11-07 14:33:18 -08:00
parent 213913f025
commit 881b45209e
4 changed files with 200 additions and 88 deletions

View file

@ -6594,7 +6594,7 @@ of the functions that are available for creating pandoc
AST elements. For parsing, the [lpeg] parsing library AST elements. For parsing, the [lpeg] parsing library
is available by default. To see a sample custom reader: is available by default. To see a sample custom reader:
pandoc --print-default-data-file reader.lua pandoc --print-default-data-file creole.lua
If you want your custom reader to have access to reader options If you want your custom reader to have access to reader options
(e.g. the tab stop setting), you give your Reader function a (e.g. the tab stop setting), you give your Reader function a

197
data/creole.lua Normal file
View file

@ -0,0 +1,197 @@
-- A sample custom reader for Creole 1.0 (common wiki markup)
-- http://www.wikicreole.org/wiki/CheatSheet
-- For better performance we put these functions in local variables:
local P, S, R, Cf, Cc, Ct, V, Cs, Cg, Cb, B, C, Cmt =
lpeg.P, lpeg.S, lpeg.R, lpeg.Cf, lpeg.Cc, lpeg.Ct, lpeg.V,
lpeg.Cs, lpeg.Cg, lpeg.Cb, lpeg.B, lpeg.C, lpeg.Cmt
local whitespacechar = S(" \t\r\n")
local specialchar = S("/*~[]\\{}|")
local wordchar = (1 - (whitespacechar + specialchar))
local spacechar = S(" \t")
local newline = P"\r"^-1 * P"\n"
local blankline = spacechar^0 * newline
local endline = newline * #-blankline
local endequals = spacechar^0 * P"="^0 * spacechar^0 * newline
local cellsep = spacechar^0 * P"|"
local function trim(s)
return (s:gsub("^%s*(.-)%s*$", "%1"))
end
local function ListItem(lev, ch)
local start
if ch == nil then
start = S"*#"
else
start = P(ch)
end
local subitem = function(c)
if lev < 6 then
return ListItem(lev + 1, c)
else
return (1 - 1) -- fails
end
end
local parser = spacechar^0
* start^lev
* #(- start)
* spacechar^0
* Ct((V"Inline" - (newline * spacechar^0 * S"*#"))^0)
* newline
* (Ct(subitem("*")^1) / pandoc.BulletList
+
Ct(subitem("#")^1) / pandoc.OrderedList
+
Cc(nil))
/ function (ils, sublist)
return { pandoc.Plain(ils), sublist }
end
return parser
end
local re = require're'
x = re.compile[[
listname <- {| {:tag: '' -> 'list':} (name s)* |}
name <- {| {:tag: '' -> 'id':} {[a-z][a-z]*} |}
s <- ' '*
]]
-- Grammar
G = P{ "Doc",
Doc = Ct(V"Block"^0)
/ pandoc.Pandoc ;
Block = blankline^0
* ( V"Header"
+ V"HorizontalRule"
+ V"CodeBlock"
+ V"List"
+ V"Table"
+ V"Para") ;
Para = Ct(V"Inline"^1)
* newline
/ pandoc.Para ;
HorizontalRule = spacechar^0
* P"----"
* spacechar^0
* newline
/ pandoc.HorizontalRule;
Header = (P("=")^1 / string.len)
* spacechar^1
* Ct((V"Inline" - endequals)^1)
* endequals
/ pandoc.Header;
CodeBlock = P"{{{"
* blankline
* C((1 - (newline * P"}}}"))^0)
* newline
* P"}}}"
/ pandoc.CodeBlock;
Placeholder = P"<<<"
* C(P(1) - P">>>")^0
* P">>>"
/ function() return pandoc.Div({}) end;
List = V"BulletList"
+ V"OrderedList" ;
BulletList = Ct(ListItem(1,'*')^1)
/ pandoc.BulletList ;
OrderedList = Ct(ListItem(1,'#')^1)
/ pandoc.OrderedList ;
Table = (V"TableHeader" + Cc{})
* Ct(V"TableRow"^1)
/ function(headrow, bodyrows)
local numcolumns = #(bodyrows[1])
local aligns = {}
local widths = {}
for i = 1,numcolumns do
aligns[i] = pandoc.AlignDefault
widths[i] = 0
end
return pandoc.utils.from_simple_table(
pandoc.SimpleTable({}, aligns, widths, headrow, bodyrows))
end ;
TableHeader = Ct(V"HeaderCell"^1)
* cellsep^-1
* spacechar^0
* newline ;
TableRow = Ct(V"BodyCell"^1)
* cellsep^-1
* spacechar^0
* newline ;
HeaderCell = cellsep
* P"="
* spacechar^0
* Ct((V"Inline" - (newline + cellsep))^0)
/ function(ils) return { pandoc.Plain(ils) } end ;
BodyCell = cellsep
* spacechar^0
* Ct((V"Inline" - (newline + cellsep))^0)
/ function(ils) return { pandoc.Plain(ils) } end ;
Inline = V"Emph"
+ V"Strong"
+ V"LineBreak"
+ V"Link"
+ V"URL"
+ V"Image"
+ V"Str"
+ V"Space"
+ V"SoftBreak"
+ V"Escaped"
+ V"Placeholder"
+ V"Code"
+ V"Special" ;
Str = wordchar^1
/ pandoc.Str;
Escaped = P"~"
* C(P(1))
/ pandoc.Str ;
Special = specialchar
/ pandoc.Str;
Space = spacechar^1
/ pandoc.Space ;
SoftBreak = endline
* # -(V"HorizontalRule" + V"CodeBlock")
/ pandoc.SoftBreak ;
LineBreak = P"\\\\"
/ pandoc.LineBreak ;
Code = P"{{{"
* C((1 - P"}}}")^0)
* P"}}}"
/ trim / pandoc.Code ;
Link = P"[["
* C((1 - (P"]]" + P"|"))^0)
* (P"|" * Ct((V"Inline" - P"]]")^1))^-1 * P"]]"
/ function(url, desc)
local txt = desc or {pandoc.Str(url)}
return pandoc.Link(txt, url)
end ;
Image = P"{{"
* #-P"{"
* C((1 - (S"}"))^0)
* (P"|" * Ct((V"Inline" - P"}}")^1))^-1
* P"}}"
/ function(url, desc)
local txt = desc or ""
return pandoc.Image(txt, url)
end ;
URL = P"http"
* P"s"^-1
* P":"
* (1 - (whitespacechar + (S",.?!:;\"'" * #whitespacechar)))^1
/ function(url)
return pandoc.Link(pandoc.Str(url), url)
end ;
Emph = P"//"
* Ct((V"Inline" - P"//")^1)
* P"//"
/ pandoc.Emph ;
Strong = P"**"
* Ct((V"Inline" -P"**")^1)
* P"**"
/ pandoc.Strong ;
}
function Reader(input, reader_options)
return lpeg.match(G, input)
end

View file

@ -1,87 +0,0 @@
-- A sample custom reader for a very simple markup language.
-- This parses a document into paragraphs separated by blank lines.
-- This is /italic/ and this is *boldface* and this is `code`
-- and `code``with backtick` (doubled `` = ` inside backticks).
-- This is an escaped special character: \_, \*, \\
-- == text makes a level-2 heading
-- That's it!
-- For better performance we put these functions in local variables:
local P, S, R, Cf, Cc, Ct, V, Cs, Cg, Cb, B, C, Cmt =
lpeg.P, lpeg.S, lpeg.R, lpeg.Cf, lpeg.Cc, lpeg.Ct, lpeg.V,
lpeg.Cs, lpeg.Cg, lpeg.Cb, lpeg.B, lpeg.C, lpeg.Cmt
--- if item is a table, concatenate it to acc;
-- otherwise insert it at the end.
local function add_item(acc, item)
if acc == nil then
acc = {}
end
if type(item) == "table" then
for i = 1,#item do
add_item(acc, item[i])
end
else
acc[#acc + 1] = item
end
return acc
end
local function Many1(parser)
return Cf(Cc(nil) * parser^1 , add_item)
end
local function Many(parser)
return (Many1(parser) + Cc{})
end
local whitespacechar = S(" \t\r\n")
local specialchar = S("/*\\`")
local wordchar = (1 - (whitespacechar + specialchar))
local spacechar = S(" \t")
local newline = P"\r"^-1 * P"\n"
local blanklines = newline * (spacechar^0 * newline)^1
local endline = newline - blanklines
local function BetweenDelims(c, parser, constructor)
local starter = P(c) * #(- whitespacechar)
local ender = B(1 - whitespacechar) * P(c)
return starter * Many(parser - ender) * C(ender^-1) /
function(contents, ender)
if ender == "" then -- fallback
return { pandoc.Str(c) , contents }
else
return constructor(contents)
end
end
end
-- Grammar
G = P{ "Pandoc",
Pandoc = Many(V"Block") / pandoc.Pandoc;
Block = blanklines^0 * (V"Header" + V"Para") ;
Para = Many1(V"Inline") * blanklines^-1 / pandoc.Para;
Header = (P("=")^1 / string.len)
* spacechar^1
* Many(V"Inline" - (spacechar^0 * P("=")^0 * blanklines))
* spacechar^0
* P("=")^0
* blanklines^-1 /
function(lev, contents) return pandoc.Header(lev, contents) end;
Inline = V"Emph" + V"Strong" + V"Str" + V"Space" + V"SoftBreak" +
V"Code" + V"Escaped" + V"Special";
Str = wordchar^1 / pandoc.Str;
Escaped = "\\" * C(specialchar) / function(s) return pandoc.Str(s) end;
Space = spacechar^1 / pandoc.Space;
SoftBreak = endline / pandoc.SoftBreak;
Emph = BetweenDelims("/", V"Inline", pandoc.Emph);
Strong = BetweenDelims("*", V"Inline", pandoc.Strong);
Code = P"`" * Ct(( (P"``" / "`") + (C(1) - S"`"))^1) * P"`"
/ table.concat / pandoc.Code;
Special = S"`\\" / pandoc.Str;
}
function Reader(input, opts)
return lpeg.match(G, input)
end

View file

@ -174,6 +174,8 @@ data-files:
data/abbreviations data/abbreviations
-- sample lua custom writer -- sample lua custom writer
data/sample.lua data/sample.lua
-- sample lua custom reader
data/creole.lua
-- lua init script -- lua init script
data/init.lua data/init.lua
-- pandoc lua module -- pandoc lua module