Replace old sample custom reader with a full-featured reader for creole.
This is better as an example. And it is faster than pandoc's regular creole parser, which shows that high-performance readers can be developed this way.
This commit is contained in:
parent
213913f025
commit
881b45209e
4 changed files with 200 additions and 88 deletions
|
@ -6594,7 +6594,7 @@ of the functions that are available for creating pandoc
|
|||
AST elements. For parsing, the [lpeg] parsing library
|
||||
is available by default. To see a sample custom reader:
|
||||
|
||||
pandoc --print-default-data-file reader.lua
|
||||
pandoc --print-default-data-file creole.lua
|
||||
|
||||
If you want your custom reader to have access to reader options
|
||||
(e.g. the tab stop setting), you give your Reader function a
|
||||
|
|
197
data/creole.lua
Normal file
197
data/creole.lua
Normal file
|
@ -0,0 +1,197 @@
|
|||
-- A sample custom reader for Creole 1.0 (common wiki markup)
|
||||
-- http://www.wikicreole.org/wiki/CheatSheet
|
||||
|
||||
-- For better performance we put these functions in local variables:
|
||||
local P, S, R, Cf, Cc, Ct, V, Cs, Cg, Cb, B, C, Cmt =
|
||||
lpeg.P, lpeg.S, lpeg.R, lpeg.Cf, lpeg.Cc, lpeg.Ct, lpeg.V,
|
||||
lpeg.Cs, lpeg.Cg, lpeg.Cb, lpeg.B, lpeg.C, lpeg.Cmt
|
||||
|
||||
local whitespacechar = S(" \t\r\n")
|
||||
local specialchar = S("/*~[]\\{}|")
|
||||
local wordchar = (1 - (whitespacechar + specialchar))
|
||||
local spacechar = S(" \t")
|
||||
local newline = P"\r"^-1 * P"\n"
|
||||
local blankline = spacechar^0 * newline
|
||||
local endline = newline * #-blankline
|
||||
local endequals = spacechar^0 * P"="^0 * spacechar^0 * newline
|
||||
local cellsep = spacechar^0 * P"|"
|
||||
|
||||
local function trim(s)
|
||||
return (s:gsub("^%s*(.-)%s*$", "%1"))
|
||||
end
|
||||
|
||||
local function ListItem(lev, ch)
|
||||
local start
|
||||
if ch == nil then
|
||||
start = S"*#"
|
||||
else
|
||||
start = P(ch)
|
||||
end
|
||||
local subitem = function(c)
|
||||
if lev < 6 then
|
||||
return ListItem(lev + 1, c)
|
||||
else
|
||||
return (1 - 1) -- fails
|
||||
end
|
||||
end
|
||||
local parser = spacechar^0
|
||||
* start^lev
|
||||
* #(- start)
|
||||
* spacechar^0
|
||||
* Ct((V"Inline" - (newline * spacechar^0 * S"*#"))^0)
|
||||
* newline
|
||||
* (Ct(subitem("*")^1) / pandoc.BulletList
|
||||
+
|
||||
Ct(subitem("#")^1) / pandoc.OrderedList
|
||||
+
|
||||
Cc(nil))
|
||||
/ function (ils, sublist)
|
||||
return { pandoc.Plain(ils), sublist }
|
||||
end
|
||||
return parser
|
||||
end
|
||||
|
||||
local re = require're'
|
||||
x = re.compile[[
|
||||
listname <- {| {:tag: '' -> 'list':} (name s)* |}
|
||||
name <- {| {:tag: '' -> 'id':} {[a-z][a-z]*} |}
|
||||
s <- ' '*
|
||||
]]
|
||||
|
||||
-- Grammar
|
||||
G = P{ "Doc",
|
||||
Doc = Ct(V"Block"^0)
|
||||
/ pandoc.Pandoc ;
|
||||
Block = blankline^0
|
||||
* ( V"Header"
|
||||
+ V"HorizontalRule"
|
||||
+ V"CodeBlock"
|
||||
+ V"List"
|
||||
+ V"Table"
|
||||
+ V"Para") ;
|
||||
Para = Ct(V"Inline"^1)
|
||||
* newline
|
||||
/ pandoc.Para ;
|
||||
HorizontalRule = spacechar^0
|
||||
* P"----"
|
||||
* spacechar^0
|
||||
* newline
|
||||
/ pandoc.HorizontalRule;
|
||||
Header = (P("=")^1 / string.len)
|
||||
* spacechar^1
|
||||
* Ct((V"Inline" - endequals)^1)
|
||||
* endequals
|
||||
/ pandoc.Header;
|
||||
CodeBlock = P"{{{"
|
||||
* blankline
|
||||
* C((1 - (newline * P"}}}"))^0)
|
||||
* newline
|
||||
* P"}}}"
|
||||
/ pandoc.CodeBlock;
|
||||
Placeholder = P"<<<"
|
||||
* C(P(1) - P">>>")^0
|
||||
* P">>>"
|
||||
/ function() return pandoc.Div({}) end;
|
||||
List = V"BulletList"
|
||||
+ V"OrderedList" ;
|
||||
BulletList = Ct(ListItem(1,'*')^1)
|
||||
/ pandoc.BulletList ;
|
||||
OrderedList = Ct(ListItem(1,'#')^1)
|
||||
/ pandoc.OrderedList ;
|
||||
Table = (V"TableHeader" + Cc{})
|
||||
* Ct(V"TableRow"^1)
|
||||
/ function(headrow, bodyrows)
|
||||
local numcolumns = #(bodyrows[1])
|
||||
local aligns = {}
|
||||
local widths = {}
|
||||
for i = 1,numcolumns do
|
||||
aligns[i] = pandoc.AlignDefault
|
||||
widths[i] = 0
|
||||
end
|
||||
return pandoc.utils.from_simple_table(
|
||||
pandoc.SimpleTable({}, aligns, widths, headrow, bodyrows))
|
||||
end ;
|
||||
TableHeader = Ct(V"HeaderCell"^1)
|
||||
* cellsep^-1
|
||||
* spacechar^0
|
||||
* newline ;
|
||||
TableRow = Ct(V"BodyCell"^1)
|
||||
* cellsep^-1
|
||||
* spacechar^0
|
||||
* newline ;
|
||||
HeaderCell = cellsep
|
||||
* P"="
|
||||
* spacechar^0
|
||||
* Ct((V"Inline" - (newline + cellsep))^0)
|
||||
/ function(ils) return { pandoc.Plain(ils) } end ;
|
||||
BodyCell = cellsep
|
||||
* spacechar^0
|
||||
* Ct((V"Inline" - (newline + cellsep))^0)
|
||||
/ function(ils) return { pandoc.Plain(ils) } end ;
|
||||
Inline = V"Emph"
|
||||
+ V"Strong"
|
||||
+ V"LineBreak"
|
||||
+ V"Link"
|
||||
+ V"URL"
|
||||
+ V"Image"
|
||||
+ V"Str"
|
||||
+ V"Space"
|
||||
+ V"SoftBreak"
|
||||
+ V"Escaped"
|
||||
+ V"Placeholder"
|
||||
+ V"Code"
|
||||
+ V"Special" ;
|
||||
Str = wordchar^1
|
||||
/ pandoc.Str;
|
||||
Escaped = P"~"
|
||||
* C(P(1))
|
||||
/ pandoc.Str ;
|
||||
Special = specialchar
|
||||
/ pandoc.Str;
|
||||
Space = spacechar^1
|
||||
/ pandoc.Space ;
|
||||
SoftBreak = endline
|
||||
* # -(V"HorizontalRule" + V"CodeBlock")
|
||||
/ pandoc.SoftBreak ;
|
||||
LineBreak = P"\\\\"
|
||||
/ pandoc.LineBreak ;
|
||||
Code = P"{{{"
|
||||
* C((1 - P"}}}")^0)
|
||||
* P"}}}"
|
||||
/ trim / pandoc.Code ;
|
||||
Link = P"[["
|
||||
* C((1 - (P"]]" + P"|"))^0)
|
||||
* (P"|" * Ct((V"Inline" - P"]]")^1))^-1 * P"]]"
|
||||
/ function(url, desc)
|
||||
local txt = desc or {pandoc.Str(url)}
|
||||
return pandoc.Link(txt, url)
|
||||
end ;
|
||||
Image = P"{{"
|
||||
* #-P"{"
|
||||
* C((1 - (S"}"))^0)
|
||||
* (P"|" * Ct((V"Inline" - P"}}")^1))^-1
|
||||
* P"}}"
|
||||
/ function(url, desc)
|
||||
local txt = desc or ""
|
||||
return pandoc.Image(txt, url)
|
||||
end ;
|
||||
URL = P"http"
|
||||
* P"s"^-1
|
||||
* P":"
|
||||
* (1 - (whitespacechar + (S",.?!:;\"'" * #whitespacechar)))^1
|
||||
/ function(url)
|
||||
return pandoc.Link(pandoc.Str(url), url)
|
||||
end ;
|
||||
Emph = P"//"
|
||||
* Ct((V"Inline" - P"//")^1)
|
||||
* P"//"
|
||||
/ pandoc.Emph ;
|
||||
Strong = P"**"
|
||||
* Ct((V"Inline" -P"**")^1)
|
||||
* P"**"
|
||||
/ pandoc.Strong ;
|
||||
}
|
||||
|
||||
function Reader(input, reader_options)
|
||||
return lpeg.match(G, input)
|
||||
end
|
|
@ -1,87 +0,0 @@
|
|||
-- A sample custom reader for a very simple markup language.
|
||||
-- This parses a document into paragraphs separated by blank lines.
|
||||
-- This is /italic/ and this is *boldface* and this is `code`
|
||||
-- and `code``with backtick` (doubled `` = ` inside backticks).
|
||||
-- This is an escaped special character: \_, \*, \\
|
||||
-- == text makes a level-2 heading
|
||||
-- That's it!
|
||||
|
||||
-- For better performance we put these functions in local variables:
|
||||
local P, S, R, Cf, Cc, Ct, V, Cs, Cg, Cb, B, C, Cmt =
|
||||
lpeg.P, lpeg.S, lpeg.R, lpeg.Cf, lpeg.Cc, lpeg.Ct, lpeg.V,
|
||||
lpeg.Cs, lpeg.Cg, lpeg.Cb, lpeg.B, lpeg.C, lpeg.Cmt
|
||||
|
||||
--- if item is a table, concatenate it to acc;
|
||||
-- otherwise insert it at the end.
|
||||
local function add_item(acc, item)
|
||||
if acc == nil then
|
||||
acc = {}
|
||||
end
|
||||
if type(item) == "table" then
|
||||
for i = 1,#item do
|
||||
add_item(acc, item[i])
|
||||
end
|
||||
else
|
||||
acc[#acc + 1] = item
|
||||
end
|
||||
return acc
|
||||
end
|
||||
|
||||
local function Many1(parser)
|
||||
return Cf(Cc(nil) * parser^1 , add_item)
|
||||
end
|
||||
|
||||
local function Many(parser)
|
||||
return (Many1(parser) + Cc{})
|
||||
end
|
||||
|
||||
local whitespacechar = S(" \t\r\n")
|
||||
local specialchar = S("/*\\`")
|
||||
local wordchar = (1 - (whitespacechar + specialchar))
|
||||
local spacechar = S(" \t")
|
||||
local newline = P"\r"^-1 * P"\n"
|
||||
local blanklines = newline * (spacechar^0 * newline)^1
|
||||
local endline = newline - blanklines
|
||||
|
||||
local function BetweenDelims(c, parser, constructor)
|
||||
local starter = P(c) * #(- whitespacechar)
|
||||
local ender = B(1 - whitespacechar) * P(c)
|
||||
return starter * Many(parser - ender) * C(ender^-1) /
|
||||
function(contents, ender)
|
||||
if ender == "" then -- fallback
|
||||
return { pandoc.Str(c) , contents }
|
||||
else
|
||||
return constructor(contents)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
-- Grammar
|
||||
G = P{ "Pandoc",
|
||||
Pandoc = Many(V"Block") / pandoc.Pandoc;
|
||||
Block = blanklines^0 * (V"Header" + V"Para") ;
|
||||
Para = Many1(V"Inline") * blanklines^-1 / pandoc.Para;
|
||||
Header = (P("=")^1 / string.len)
|
||||
* spacechar^1
|
||||
* Many(V"Inline" - (spacechar^0 * P("=")^0 * blanklines))
|
||||
* spacechar^0
|
||||
* P("=")^0
|
||||
* blanklines^-1 /
|
||||
function(lev, contents) return pandoc.Header(lev, contents) end;
|
||||
Inline = V"Emph" + V"Strong" + V"Str" + V"Space" + V"SoftBreak" +
|
||||
V"Code" + V"Escaped" + V"Special";
|
||||
Str = wordchar^1 / pandoc.Str;
|
||||
Escaped = "\\" * C(specialchar) / function(s) return pandoc.Str(s) end;
|
||||
Space = spacechar^1 / pandoc.Space;
|
||||
SoftBreak = endline / pandoc.SoftBreak;
|
||||
Emph = BetweenDelims("/", V"Inline", pandoc.Emph);
|
||||
Strong = BetweenDelims("*", V"Inline", pandoc.Strong);
|
||||
Code = P"`" * Ct(( (P"``" / "`") + (C(1) - S"`"))^1) * P"`"
|
||||
/ table.concat / pandoc.Code;
|
||||
Special = S"`\\" / pandoc.Str;
|
||||
}
|
||||
|
||||
function Reader(input, opts)
|
||||
return lpeg.match(G, input)
|
||||
end
|
|
@ -174,6 +174,8 @@ data-files:
|
|||
data/abbreviations
|
||||
-- sample lua custom writer
|
||||
data/sample.lua
|
||||
-- sample lua custom reader
|
||||
data/creole.lua
|
||||
-- lua init script
|
||||
data/init.lua
|
||||
-- pandoc lua module
|
||||
|
|
Loading…
Add table
Reference in a new issue