Added Text.Pandoc.Writers.Custom, --print-custom-lua-writer.

pandoc -t data/sample.lua

will load the script sample.lua and use it as a custom writer.
data/sample.lua is provided as an example.

Added `--print-custom-lua-writer` option to print the sample
script.
This commit is contained in:
John MacFarlane 2013-04-02 21:08:38 -07:00 committed by John MacFarlane
parent dede39452f
commit 4fa2a94759
6 changed files with 595 additions and 11 deletions

41
README
View file

@ -174,14 +174,16 @@ General options
(Slidy HTML and javascript slide show), `slideous` (Slideous HTML and
javascript slide show), `dzslides` (DZSlides HTML5 + javascript slide
show), `revealjs` (reveal.js HTML5 + javascript slide show), `s5`
(S5 HTML and javascript slide show). Note that `odt`, `epub`, and
`epub3` output will not be directed to *stdout*; an output
filename must be specified using the `-o/--output` option. If `+lhs` is
appended to `markdown`, `rst`, `latex`, `beamer`, `html`, or `html5`, the
output will be rendered as literate Haskell source: see [Literate Haskell
support](#literate-haskell-support), below. Markdown syntax extensions can
be individually enabled or disabled by appending `+EXTENSION` or
`-EXTENSION` to the format name, as described above under `-f`.
(S5 HTML and javascript slide show), or the path of a custom
lua writer (see [Custom writers](#custom-writers), below). Note that
`odt`, `epub`, and `epub3` output will not be directed to *stdout*; an
output filename must be specified using the `-o/--output` option. If
`+lhs` is appended to `markdown`, `rst`, `latex`, `beamer`, `html`, or
`html5`, the output will be rendered as literate Haskell source: see
[Literate Haskell support](#literate-haskell-support), below. Markdown
syntax extensions can be individually enabled or disabled by appending
`+EXTENSION` or `-EXTENSION` to the format name, as described above
under `-f`.
`-o` *FILE*, `--output=`*FILE*
: Write output to *FILE* instead of *stdout*. If *FILE* is
@ -301,6 +303,10 @@ General writer options
: Print the default template for an output *FORMAT*. (See `-t`
for a list of possible *FORMAT*s.)
`--print-sample-lua-writer`
: Print a sample lua custom writer (see [Custom writers](#custom-writers),
below.
`--no-wrap`
: Disable text wrapping in output. By default, text is wrapped
appropriately for the output format.
@ -2622,6 +2628,23 @@ ordinary HTML (without bird tracks).
writes HTML with the Haskell code in bird tracks, so it can be copied
and pasted as literate Haskell source.
Custom writers
==============
Pandoc can be extended with custom writers written in [lua]. (Pandoc
includes a lua interpreter, so lua need not be installed separately.)
To use a custom writer, simply specify the path to the lua script
in place of the output format. For example:
pandoc -t data/sample.lua
Creating a custom writer requires writing a lua function for each
possible element in a pandoc document. To get a documented example
which you can modify according to your needs, do
pandoc --print-sample-lua-writer
Authors
=======
@ -2668,3 +2691,5 @@ Sergey Astanin, Arlo O'Keeffe, Denis Laxalde, Brent Yorgey.
[PDF]: http://www.adobe.com/pdf/
[reveal.js]: http://lab.hakim.se/reveal-js/
[FictionBook2]: http://www.fictionbook.org/index.php/Eng:XML_Schema_Fictionbook_2.1
[lua]: TODO

312
data/sample.lua Normal file
View file

@ -0,0 +1,312 @@
-- This is a sample custom writer for pandoc. It produces output
-- that is very similar to that of pandoc's HTML writer.
-- There is one new feature: code blocks marked with class 'dot'
-- are piped through graphviz and images are included in the HTML
-- output using 'data:' URLs.
--
-- Invoke with: pandoc -t sample.lua
--
-- Note: you need not have lua installed on your system to use this
-- custom writer. However, if you do have lua installed, you can
-- use it to test changes to the script. 'lua sample.lua' will
-- produce informative error messages if your code contains
-- syntax errors.
-- Character escaping
local function escape(s, in_attribute)
return s:gsub("[<>&\"']",
function(x)
if x == '<' then
return '&lt;'
elseif x == '>' then
return '&gt;'
elseif x == '&' then
return '&amp;'
elseif x == '"' then
return '&quot;'
elseif x == "'" then
return '&#39;'
else
return x
end
end)
end
-- Helper function to convert an attributes table into
-- a string that can be put into HTML tags.
local function attributes(attr)
local attr_table = {}
for x,y in pairs(attr) do
if y and y ~= "" then
table.insert(attr_table, ' ' .. x .. '="' .. escape(y,true) .. '"')
end
end
return table.concat(attr_table)
end
-- Run cmd on a temporary file containing inp and return result.
local function pipe(cmd, inp)
local tmp = os.tmpname()
local tmph = io.open(tmp, "w")
tmph:write(inp)
tmph:close()
local outh = io.popen(cmd .. " " .. tmp,"r")
local result = outh:read("*all")
outh:close()
os.remove(tmp)
return result
end
-- Table to store footnotes, so they can be included at the end.
local notes = {}
-- Blocksep is used to separate block elements.
function Blocksep()
return "\n\n"
end
-- This function is called once for the whole document. Parameters:
-- body, title, date are strings; authors is an array of strings;
-- variables is a table. One could use some kind of templating
-- system here; this just gives you a simple standalone HTML file.
function Doc(body, title, authors, date, variables)
local buffer = {}
local function add(s)
table.insert(buffer, s)
end
add('<!DOCTYPE html>')
add('<html>')
add('<head>')
add('<title>' .. title .. '</title>')
add('</head>')
add('<body>')
if title ~= "" then
add('<h1 class="title">' .. title .. '</h1>')
end
for _, author in pairs(authors) do
add('<h2 class="author">' .. author .. '</h2>')
end
if date ~= "" then
add('<h3 class="date">' .. date .. '</h3>')
end
add(body)
if #notes > 0 then
add('<ol class="footnotes">')
for _,note in pairs(notes) do
add(note)
end
add('</ol>')
end
add('</body>')
add('</html>')
return table.concat(buffer,'\n')
end
-- The functions that follow render corresponding pandoc elements.
-- s is always a string, attr is always a table of attributes, and
-- items is always an array of strings (the items in a list).
-- Comments indicate the types of other variables.
function Str(s)
return escape(s)
end
function Space()
return " "
end
function LineBreak()
return "<br/>"
end
function Emph(s)
return "<em>" .. s .. "</em>"
end
function Strong(s)
return "<strong>" .. s .. "</strong>"
end
function Subscript(s)
return "<sub>" .. s .. "</sub>"
end
function Superscript(s)
return "<sup>" .. s .. "</sup>"
end
function SmallCaps(s)
return '<span style="font-variant: small-caps;">' .. s .. '</span>'
end
function Strikeout(s)
return '<del>' .. s .. '</del>'
end
function Link(s, src, tit)
return "<a href='" .. escape(src,true) .. "' title='" ..
escape(tit,true) .. "'>" .. s .. "</a>"
end
function Image(s, src, tit)
return "<img src='" .. escape(src,true) .. "' title='" ..
escape(tit,true) .. "'/>"
end
function Code(s, attr)
return "<code" .. attributes(attr) .. ">" .. escape(s) .. "</code>"
end
function InlineMath(s)
return "\\(" .. escape(s) .. "\\)"
end
function DisplayMath(s)
return "\\[" .. escape(s) .. "\\]"
end
function Note(s)
local num = #notes + 1
-- insert the back reference right before the final closing tag.
s = string.gsub(s,
'(.*)</', '%1 <a href="#fnref' .. num .. '">&#8617;</a></')
-- add a list item with the note to the note table.
table.insert(notes, '<li id="fn' .. num .. '">' .. s .. '</li>')
-- return the footnote reference, linked to the note.
return '<a id="fnref' .. num .. '" href="#fn' .. num ..
'"><sup>' .. num .. '</sup></a>'
end
function Plain(s)
return s
end
function Para(s)
return "<p>" .. s .. "</p>"
end
-- lev is an integer, the header level.
function Header(lev, s, attr)
return "<h" .. lev .. attributes(attr) .. ">" .. s .. "</h" .. lev .. ">"
end
function BlockQuote(s)
return "<blockquote>\n" .. s .. "\n</blockquote>"
end
function HorizontalRule()
return "<hr/>"
end
function CodeBlock(s, attr)
-- If code block has class 'dot', pipe the contents through dot
-- and base64, and include the base64-encoded png as a data: URL.
if attr.class and string.match(' ' .. attr.class .. ' ',' dot ') then
local png = pipe("base64", pipe("dot -Tpng", s))
return '<img src="data:image/png;base64,' .. png .. '"/>'
-- otherwise treat as code (one could pipe through a highlighter)
else
return "<pre><code" .. attributes(attr) .. ">" .. escape(s) ..
"</code></pre>"
end
end
function BulletList(items)
local buffer = {}
for _, item in pairs(items) do
table.insert(buffer, "<li>" .. item .. "</li>")
end
return "<ul>\n" .. table.concat(buffer, "\n") .. "\n</ul>"
end
function OrderedList(items)
local buffer = {}
for _, item in pairs(items) do
table.insert(buffer, "<li>" .. item .. "</li>")
end
return "<ol>\n" .. table.concat(buffer, "\n") .. "\n</ol>"
end
-- Revisit association list STackValue instance.
function DefinitionList(items)
local buffer = {}
for _,item in pairs(items) do
for k, v in pairs(item) do
table.insert(buffer,"<dt>" .. k .. "</dt>\n<dd>" ..
table.concat(v,"</dd>\n<dd>") .. "</dd>")
end
end
return "<dl>\n" .. table.concat(buffer, "\n") .. "\n</dl>"
end
-- Convert pandoc alignment to something HTML can use.
-- align is AlignLeft, AlignRight, AlignCenter, or AlignDefault.
function html_align(align)
if align == 'AlignLeft' then
return 'left'
elseif align == 'AlignRight' then
return 'right'
elseif align == 'AlignCenter' then
return 'center'
else
return 'left'
end
end
-- Caption is a string, aligns is an array of strings,
-- widths is an array of floats, headers is an array of
-- strings, rows is an array of arrays of strings.
function Table(caption, aligns, widths, headers, rows)
local buffer = {}
local function add(s)
table.insert(buffer, s)
end
add("<table>")
if caption ~= "" then
add("<caption>" .. caption .. "</caption>")
end
if widths and widths[1] ~= 0 then
for _, w in pairs(widths) do
add('<col width="' .. string.format("%d%%", w * 100) .. '" />')
end
end
local header_row = {}
local empty_header = true
for i, h in pairs(headers) do
local align = html_align(aligns[i])
table.insert(header_row,'<th align="' .. align .. '">' .. h .. '</th>')
empty_header = empty_header and h == ""
end
if empty_header then
head = ""
else
add('<tr class="header">')
for _,h in pairs(header_row) do
add(h)
end
add('</tr>')
end
local class = "even"
for _, row in pairs(rows) do
class = (class == "even" and "odd") or "even"
add('<tr class="' .. class .. '">')
for i,c in pairs(row) do
add('<td align="' .. html_align(aligns[i]) .. '">' .. c .. '</td>')
end
add('</tr>')
end
add('</table')
return table.concat(buffer,'\n')
end
-- The following code will produce runtime warnings when you haven't defined
-- all of the functions you need for the custom writer, so it's useful
-- to include when you're working on a writer.
local meta = {}
meta.__index =
function(_, key)
io.stderr:write(string.format("WARNING: Undefined function '%s'\n",key))
return function() return "" end
end
setmetatable(_G, meta)

View file

@ -101,6 +101,8 @@ Data-Files:
data/dzslides/template.html,
-- data for citeproc
data/default.csl,
-- sample lua custom writer
data/sample.lua
-- documentation
README, INSTALL, COPYRIGHT, BUGS, changelog
Extra-Source-Files:
@ -253,7 +255,8 @@ Library
data-default >= 0.4 && < 0.6,
temporary >= 1.1 && < 1.2,
blaze-html >= 0.5 && < 0.7,
blaze-markup >= 0.5.1 && < 0.6
blaze-markup >= 0.5.1 && < 0.6,
hslua >= 0.3 && < 0.4
if flag(embed_data_files)
cpp-options: -DEMBED_DATA_FILES
-- build-tools: hsb2hs
@ -305,6 +308,7 @@ Library
Text.Pandoc.Writers.RST,
Text.Pandoc.Writers.Org,
Text.Pandoc.Writers.AsciiDoc,
Text.Pandoc.Writers.Custom,
Text.Pandoc.Writers.Textile,
Text.Pandoc.Writers.MediaWiki,
Text.Pandoc.Writers.RTF,

View file

@ -327,6 +327,14 @@ options =
"FORMAT")
"" -- "Print default template for FORMAT"
, Option "" ["print-sample-lua-writer"]
(NoArg
(\_ -> do
sample <- readDataFileUTF8 Nothing "sample.lua"
UTF8.hPutStr stdout sample
exitWith ExitSuccess))
"" -- "Print sample lua custom writer"
, Option "" ["no-wrap"]
(NoArg
(\opt -> return opt { optWrapText = False }))

View file

@ -99,6 +99,7 @@ module Text.Pandoc
, writeFB2
, writeOrg
, writeAsciiDoc
, writeCustom
-- * Rendering templates and default templates
, module Text.Pandoc.Templates
-- * Version
@ -142,11 +143,12 @@ import Text.Pandoc.Writers.MediaWiki
import Text.Pandoc.Writers.Textile
import Text.Pandoc.Writers.Org
import Text.Pandoc.Writers.AsciiDoc
import Text.Pandoc.Writers.Custom
import Text.Pandoc.Templates
import Text.Pandoc.Options
import Text.Pandoc.Shared (safeRead, warn)
import Data.ByteString.Lazy (ByteString)
import Data.List (intercalate)
import Data.List (intercalate, isSuffixOf)
import Data.Version (showVersion)
import Text.JSON.Generic
import Data.Set (Set)
@ -286,7 +288,10 @@ getWriter s =
Left e -> Left $ intercalate "\n" $ [m | Message m <- errorMessages e]
Right (writerName, setExts) ->
case lookup writerName writers of
Nothing -> Left $ "Unknown writer: " ++ writerName
Nothing
| ".lua" `isSuffixOf` s ->
Right $ IOStringWriter $ writeCustom s
| otherwise -> Left $ "Unknown writer: " ++ writerName
Just (PureStringWriter r) -> Right $ PureStringWriter $
\o -> r o{ writerExtensions = setExts $
getDefaultExtensions writerName }

View file

@ -0,0 +1,230 @@
{-# LANGUAGE OverlappingInstances, FlexibleInstances, OverloadedStrings #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{- Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.Writers.Custom
Copyright : Copyright (C) 2012 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Conversion of 'Pandoc' documents to custom markup using
a lua writer.
-}
module Text.Pandoc.Writers.Custom ( writeCustom ) where
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Data.List ( intersperse )
import Scripting.Lua (LuaState, StackValue, callfunc)
import qualified Scripting.Lua as Lua
import Text.Pandoc.UTF8 (fromString, toString)
import Data.ByteString (ByteString)
import qualified Data.ByteString.Char8 as C8
import Data.Monoid
import qualified Data.Map as M
attrToMap :: Attr -> M.Map ByteString ByteString
attrToMap (id',classes,keyvals) = M.fromList
$ ("id", fromString id')
: ("class", fromString $ unwords classes)
: map (\(x,y) -> (fromString x, fromString y)) keyvals
getList :: StackValue a => LuaState -> Int -> IO [a]
getList lua i' = do
continue <- Lua.next lua i'
if continue
then do
next <- Lua.peek lua (-1)
Lua.pop lua 1
x <- maybe (fail "peek returned Nothing") return next
rest <- getList lua i'
return (x : rest)
else return []
instance StackValue ByteString where
push l x = Lua.push l $ C8.unpack x
peek l n = (fmap . fmap) C8.pack (Lua.peek l n)
valuetype _ = Lua.TSTRING
instance StackValue a => StackValue [a] where
push lua xs = do
Lua.createtable lua (length xs + 1) 0
let addValue (i, x) = Lua.push lua x >> Lua.rawseti lua (-2) i
mapM_ addValue $ zip [1..] xs
peek lua i = do
top <- Lua.gettop lua
let i' = if i < 0 then top + i + 1 else i
Lua.pushnil lua
lst <- getList lua i'
Lua.pop lua 1
return (Just lst)
valuetype _ = Lua.TTABLE
instance (StackValue a, StackValue b) => StackValue (M.Map a b) where
push lua m = do
let xs = M.toList m
Lua.createtable lua (length xs + 1) 0
let addValue (k, v) = Lua.push lua k >> Lua.push lua v >>
Lua.rawset lua (-3)
mapM_ addValue xs
peek _ _ = undefined -- not needed for our purposes
valuetype _ = Lua.TTABLE
instance (StackValue a, StackValue b) => StackValue (a,b) where
push lua (k,v) = do
Lua.createtable lua 2 0
Lua.push lua k
Lua.push lua v
Lua.rawset lua (-3)
peek _ _ = undefined -- not needed for our purposes
valuetype _ = Lua.TTABLE
instance StackValue [Inline] where
push l ils = Lua.push l . C8.unpack =<< inlineListToCustom l ils
peek _ _ = undefined
valuetype _ = Lua.TSTRING
instance StackValue [Block] where
push l ils = Lua.push l . C8.unpack =<< blockListToCustom l ils
peek _ _ = undefined
valuetype _ = Lua.TSTRING
-- | Convert Pandoc to custom markup.
writeCustom :: FilePath -> WriterOptions -> Pandoc -> IO String
writeCustom luaFile opts doc = do
luaScript <- readFile luaFile
lua <- Lua.newstate
Lua.openlibs lua
Lua.loadstring lua luaScript "custom"
Lua.call lua 0 0
-- TODO - call hierarchicalize, so we have that info
rendered <- docToCustom lua opts doc
Lua.close lua
return $ toString rendered
docToCustom :: LuaState -> WriterOptions -> Pandoc -> IO ByteString
docToCustom lua opts (Pandoc (Meta title authors date) blocks) = do
title' <- inlineListToCustom lua title
authors' <- mapM (inlineListToCustom lua) authors
date' <- inlineListToCustom lua date
body <- blockListToCustom lua blocks
callfunc lua "Doc" body title' authors' date' (writerVariables opts)
-- | Convert Pandoc block element to Custom.
blockToCustom :: LuaState -- ^ Lua state
-> Block -- ^ Block element
-> IO ByteString
blockToCustom _ Null = return ""
blockToCustom lua (Plain inlines) = callfunc lua "Plain" inlines
blockToCustom lua (Para [Image txt (src,tit)]) =
callfunc lua "CaptionedImage" src tit txt
blockToCustom lua (Para inlines) = callfunc lua "Para" inlines
blockToCustom lua (RawBlock format str) =
callfunc lua "RawBlock" format (fromString str)
blockToCustom lua HorizontalRule = callfunc lua "HorizontalRule"
blockToCustom lua (Header level attr inlines) =
callfunc lua "Header" level inlines (attrToMap attr)
blockToCustom lua (CodeBlock attr str) =
callfunc lua "CodeBlock" (fromString str) (attrToMap attr)
blockToCustom lua (BlockQuote blocks) = callfunc lua "BlockQuote" blocks
blockToCustom lua (Table capt aligns widths headers rows') =
callfunc lua "Table" capt (map show aligns) widths headers rows'
blockToCustom lua (BulletList items) = callfunc lua "BulletList" items
blockToCustom lua (OrderedList (num,sty,delim) items) =
callfunc lua "OrderedList" items num (show sty) (show delim)
blockToCustom lua (DefinitionList items) =
callfunc lua "DefinitionList" items
-- | Convert list of Pandoc block elements to Custom.
blockListToCustom :: LuaState -- ^ Options
-> [Block] -- ^ List of block elements
-> IO ByteString
blockListToCustom lua xs = do
blocksep <- callfunc lua "Blocksep"
bs <- mapM (blockToCustom lua) xs
return $ mconcat $ intersperse blocksep bs
-- | Convert list of Pandoc inline elements to Custom.
inlineListToCustom :: LuaState -> [Inline] -> IO ByteString
inlineListToCustom lua lst = do
xs <- mapM (inlineToCustom lua) lst
return $ C8.concat xs
-- | Convert Pandoc inline element to Custom.
inlineToCustom :: LuaState -> Inline -> IO ByteString
inlineToCustom lua (Str str) = callfunc lua "Str" $ fromString str
inlineToCustom lua Space = callfunc lua "Space"
inlineToCustom lua (Emph lst) = callfunc lua "Emph" lst
inlineToCustom lua (Strong lst) = callfunc lua "Strong" lst
inlineToCustom lua (Strikeout lst) = callfunc lua "Strikeout" lst
inlineToCustom lua (Superscript lst) = callfunc lua "Superscript" lst
inlineToCustom lua (Subscript lst) = callfunc lua "Subscript" lst
inlineToCustom lua (SmallCaps lst) = callfunc lua "SmallCaps" lst
inlineToCustom lua (Quoted SingleQuote lst) = callfunc lua "SingleQuoted" lst
inlineToCustom lua (Quoted DoubleQuote lst) = callfunc lua "DoubleQuoted" lst
inlineToCustom lua (Cite _ lst) = callfunc lua "Cite" lst
inlineToCustom lua (Code attr str) =
callfunc lua "Code" (fromString str) (attrToMap attr)
inlineToCustom lua (Math DisplayMath str) =
callfunc lua "DisplayMath" (fromString str)
inlineToCustom lua (Math InlineMath str) =
callfunc lua "InlineMath" (fromString str)
inlineToCustom lua (RawInline format str) =
callfunc lua "RawInline" format (fromString str)
inlineToCustom lua (LineBreak) = callfunc lua "LineBreak"
inlineToCustom lua (Link txt (src,tit)) =
callfunc lua "Link" txt (fromString src) (fromString tit)
inlineToCustom lua (Image alt (src,tit)) =
callfunc lua "Image" alt (fromString src) (fromString tit)
inlineToCustom lua (Note contents) = callfunc lua "Note" contents