Docbook writer: Updated to use Pretty.
This commit is contained in:
parent
ce533ffd90
commit
ebdbb06f94
2 changed files with 126 additions and 148 deletions
|
@ -35,13 +35,13 @@ import Text.Pandoc.Templates (renderTemplate)
|
|||
import Text.Pandoc.Readers.TeXMath
|
||||
import Data.List ( isPrefixOf, intercalate )
|
||||
import Data.Char ( toLower )
|
||||
import Text.PrettyPrint.HughesPJ hiding ( Str )
|
||||
import Text.Pandoc.Highlighting (languages, languagesByExtension)
|
||||
import Text.Pandoc.Pretty
|
||||
|
||||
-- | Convert list of authors to a docbook <author> section
|
||||
authorToDocbook :: WriterOptions -> [Inline] -> Doc
|
||||
authorToDocbook opts name' =
|
||||
let name = render $ inlinesToDocbook opts name'
|
||||
let name = render Nothing $ inlinesToDocbook opts name'
|
||||
in if ',' `elem` name
|
||||
then -- last name first
|
||||
let (lastname, rest) = break (==',') name
|
||||
|
@ -61,16 +61,20 @@ authorToDocbook opts name' =
|
|||
-- | Convert Pandoc document to string in Docbook format.
|
||||
writeDocbook :: WriterOptions -> Pandoc -> String
|
||||
writeDocbook opts (Pandoc (Meta tit auths dat) blocks) =
|
||||
let title = wrap opts tit
|
||||
let title = inlinesToDocbook opts tit
|
||||
authors = map (authorToDocbook opts) auths
|
||||
date = inlinesToDocbook opts dat
|
||||
elements = hierarchicalize blocks
|
||||
main = render $ vcat (map (elementToDocbook opts) elements)
|
||||
colwidth = if writerWrapText opts
|
||||
then Just $ writerColumns opts
|
||||
else Nothing
|
||||
render' = render colwidth
|
||||
main = render' $ vcat (map (elementToDocbook opts) elements)
|
||||
context = writerVariables opts ++
|
||||
[ ("body", main)
|
||||
, ("title", render title)
|
||||
, ("date", render date) ] ++
|
||||
[ ("author", render a) | a <- authors ]
|
||||
, ("title", render' title)
|
||||
, ("date", render' date) ] ++
|
||||
[ ("author", render' a) | a <- authors ]
|
||||
in if writerStandalone opts
|
||||
then renderTemplate context $ writerTemplate opts
|
||||
else main
|
||||
|
@ -84,7 +88,7 @@ elementToDocbook opts (Sec _ _num id' title elements) =
|
|||
then [Blk (Para [])]
|
||||
else elements
|
||||
in inTags True "section" [("id",id')] $
|
||||
inTagsSimple "title" (wrap opts title) $$
|
||||
inTagsSimple "title" (inlinesToDocbook opts title) $$
|
||||
vcat (map (elementToDocbook opts) elements')
|
||||
|
||||
-- | Convert a list of Pandoc blocks to Docbook.
|
||||
|
@ -123,7 +127,7 @@ listItemToDocbook opts item =
|
|||
blockToDocbook :: WriterOptions -> Block -> Doc
|
||||
blockToDocbook _ Null = empty
|
||||
blockToDocbook _ (Header _ _) = empty -- should not occur after hierarchicalize
|
||||
blockToDocbook opts (Plain lst) = wrap opts lst
|
||||
blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst
|
||||
blockToDocbook opts (Para [Image txt (src,_)]) =
|
||||
let capt = inlinesToDocbook opts txt
|
||||
in inTagsIndented "figure" $
|
||||
|
@ -132,12 +136,13 @@ blockToDocbook opts (Para [Image txt (src,_)]) =
|
|||
(inTagsIndented "imageobject"
|
||||
(selfClosingTag "imagedata" [("fileref",src)])) $$
|
||||
inTagsSimple "textobject" (inTagsSimple "phrase" capt))
|
||||
blockToDocbook opts (Para lst) = inTagsIndented "para" $ wrap opts lst
|
||||
blockToDocbook opts (Para lst) =
|
||||
inTagsIndented "para" $ inlinesToDocbook opts lst
|
||||
blockToDocbook opts (BlockQuote blocks) =
|
||||
inTagsIndented "blockquote" $ blocksToDocbook opts blocks
|
||||
blockToDocbook _ (CodeBlock (_,classes,_) str) =
|
||||
text ("<screen" ++ lang ++ ">\n") <>
|
||||
text (escapeStringForXML str) <> text "\n</screen>"
|
||||
text ("<screen" ++ lang ++ ">") <> cr <>
|
||||
flush (text (escapeStringForXML str) <> cr <> text "</screen>")
|
||||
where lang = if null langs
|
||||
then ""
|
||||
else " language=\"" ++ escapeStringForXML (head langs) ++
|
||||
|
@ -214,12 +219,6 @@ tableItemToDocbook opts tag align item =
|
|||
let attrib = [("align", align)]
|
||||
in inTags True tag attrib $ vcat $ map (blockToDocbook opts) item
|
||||
|
||||
-- | Take list of inline elements and return wrapped doc.
|
||||
wrap :: WriterOptions -> [Inline] -> Doc
|
||||
wrap opts lst = if writerWrapText opts
|
||||
then fsep $ map (inlinesToDocbook opts) (splitBy (== Space) lst)
|
||||
else inlinesToDocbook opts lst
|
||||
|
||||
-- | Convert a list of inline elements to Docbook.
|
||||
inlinesToDocbook :: WriterOptions -> [Inline] -> Doc
|
||||
inlinesToDocbook opts lst = hcat $ map (inlineToDocbook opts) lst
|
||||
|
@ -254,8 +253,8 @@ inlineToDocbook _ (Code str) =
|
|||
inlineToDocbook opts (Math _ str) = inlinesToDocbook opts $ readTeXMath str
|
||||
inlineToDocbook _ (TeX _) = empty
|
||||
inlineToDocbook _ (HtmlInline _) = empty
|
||||
inlineToDocbook _ LineBreak = text $ "<literallayout></literallayout>"
|
||||
inlineToDocbook _ Space = char ' '
|
||||
inlineToDocbook _ LineBreak = inTagsSimple "literallayout" empty
|
||||
inlineToDocbook _ Space = space
|
||||
inlineToDocbook opts (Link txt (src, _)) =
|
||||
if isPrefixOf "mailto:" src
|
||||
then let src' = drop 7 src
|
||||
|
@ -275,6 +274,6 @@ inlineToDocbook _ (Image _ (src, tit)) =
|
|||
else inTagsIndented "objectinfo" $
|
||||
inTagsIndented "title" (text $ escapeStringForXML tit)
|
||||
in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $
|
||||
titleDoc $$ selfClosingTag "imagedata" [("fileref", src)]
|
||||
titleDoc $$ selfClosingTag "imagedata" [("fileref", src)]
|
||||
inlineToDocbook opts (Note contents) =
|
||||
inTagsIndented "footnote" $ blocksToDocbook opts contents
|
||||
|
|
|
@ -15,14 +15,13 @@
|
|||
<date>July 17, 2006</date>
|
||||
</articleinfo>
|
||||
<para>
|
||||
This is a set of tests for pandoc. Most of them are adapted from
|
||||
John Gruber's markdown test suite.
|
||||
This is a set of tests for pandoc. Most of them are adapted from John
|
||||
Gruber's markdown test suite.
|
||||
</para>
|
||||
<section id="headers">
|
||||
<title>Headers</title>
|
||||
<section id="level-2-with-an-embedded-link">
|
||||
<title>Level 2 with an
|
||||
<ulink url="/url">embedded link</ulink></title>
|
||||
<title>Level 2 with an <ulink url="/url">embedded link</ulink></title>
|
||||
<section id="level-3-with-emphasis">
|
||||
<title>Level 3 with <emphasis>emphasis</emphasis></title>
|
||||
<section id="level-4">
|
||||
|
@ -60,16 +59,15 @@
|
|||
Here's a regular paragraph.
|
||||
</para>
|
||||
<para>
|
||||
In Markdown 1.0.0 and earlier. Version 8. This line turns into a
|
||||
list item. Because a hard-wrapped line in the middle of a paragraph
|
||||
looked like a list item.
|
||||
In Markdown 1.0.0 and earlier. Version 8. This line turns into a list
|
||||
item. Because a hard-wrapped line in the middle of a paragraph looked like
|
||||
a list item.
|
||||
</para>
|
||||
<para>
|
||||
Here's one with a bullet. * criminey.
|
||||
</para>
|
||||
<para>
|
||||
There should be a hard line
|
||||
break<literallayout></literallayout>here.
|
||||
There should be a hard line break<literallayout></literallayout>here.
|
||||
</para>
|
||||
</section>
|
||||
<section id="block-quotes">
|
||||
|
@ -866,45 +864,41 @@ These should not be escaped: \$ \\ \> \[ \{
|
|||
<div>
|
||||
foo
|
||||
</div>
|
||||
|
||||
<para>
|
||||
And nested without indentation:
|
||||
</para>
|
||||
<div>
|
||||
<div>
|
||||
<div>
|
||||
<div>
|
||||
<div>
|
||||
foo
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
</div>
|
||||
<div>
|
||||
bar
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<para>
|
||||
Interpreted markdown in a table:
|
||||
</para>
|
||||
<table>
|
||||
<tr>
|
||||
<td>
|
||||
<tr>
|
||||
<td>
|
||||
This is <emphasis>emphasized</emphasis>
|
||||
</td>
|
||||
<td>
|
||||
<td>
|
||||
And this is <emphasis role="strong">strong</emphasis>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<script type="text/javascript">document.write('This *should not* be interpreted as markdown');</script>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<script type="text/javascript">document.write('This *should not* be interpreted as markdown');</script>
|
||||
<para>
|
||||
Here's a simple block:
|
||||
</para>
|
||||
<div>
|
||||
|
||||
|
||||
foo
|
||||
</div>
|
||||
|
||||
<para>
|
||||
This should be a code block, though:
|
||||
</para>
|
||||
|
@ -923,31 +917,28 @@ These should not be escaped: \$ \\ \> \[ \{
|
|||
Now, nested:
|
||||
</para>
|
||||
<div>
|
||||
<div>
|
||||
<div>
|
||||
|
||||
<div>
|
||||
<div>
|
||||
|
||||
foo
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<para>
|
||||
This should just be an HTML comment:
|
||||
</para>
|
||||
<!-- Comment -->
|
||||
|
||||
<para>
|
||||
Multiline:
|
||||
</para>
|
||||
<!--
|
||||
Blah
|
||||
Blah
|
||||
-->
|
||||
|
||||
<!--
|
||||
This is another comment.
|
||||
-->
|
||||
Blah
|
||||
Blah
|
||||
-->
|
||||
|
||||
<!--
|
||||
This is another comment.
|
||||
-->
|
||||
<para>
|
||||
Code block:
|
||||
</para>
|
||||
|
@ -958,7 +949,6 @@ Blah
|
|||
Just plain comment, with trailing spaces on the line:
|
||||
</para>
|
||||
<!-- foo -->
|
||||
|
||||
<para>
|
||||
Code:
|
||||
</para>
|
||||
|
@ -970,28 +960,27 @@ Blah
|
|||
</para>
|
||||
<hr>
|
||||
|
||||
<hr />
|
||||
<hr />
|
||||
|
||||
<hr />
|
||||
<hr />
|
||||
|
||||
<hr>
|
||||
<hr>
|
||||
|
||||
<hr />
|
||||
<hr />
|
||||
|
||||
<hr />
|
||||
<hr />
|
||||
|
||||
<hr class="foo" id="bar" />
|
||||
<hr class="foo" id="bar" />
|
||||
|
||||
<hr class="foo" id="bar" />
|
||||
|
||||
<hr class="foo" id="bar">
|
||||
<hr class="foo" id="bar" />
|
||||
|
||||
<hr class="foo" id="bar">
|
||||
</section>
|
||||
<section id="inline-markup">
|
||||
<title>Inline Markup</title>
|
||||
<para>
|
||||
This is <emphasis>emphasized</emphasis>, and so
|
||||
<emphasis>is this</emphasis>.
|
||||
This is <emphasis>emphasized</emphasis>, and so <emphasis>is
|
||||
this</emphasis>.
|
||||
</para>
|
||||
<para>
|
||||
This is <emphasis role="strong">strong</emphasis>, and so
|
||||
|
@ -1001,18 +990,18 @@ Blah
|
|||
An <emphasis><ulink url="/url">emphasized link</ulink></emphasis>.
|
||||
</para>
|
||||
<para>
|
||||
<emphasis role="strong"><emphasis>This is strong and em.</emphasis></emphasis>
|
||||
<emphasis role="strong"><emphasis>This is strong and
|
||||
em.</emphasis></emphasis>
|
||||
</para>
|
||||
<para>
|
||||
So is <emphasis role="strong"><emphasis>this</emphasis></emphasis>
|
||||
word.
|
||||
So is <emphasis role="strong"><emphasis>this</emphasis></emphasis> word.
|
||||
</para>
|
||||
<para>
|
||||
<emphasis role="strong"><emphasis>This is strong and em.</emphasis></emphasis>
|
||||
<emphasis role="strong"><emphasis>This is strong and
|
||||
em.</emphasis></emphasis>
|
||||
</para>
|
||||
<para>
|
||||
So is <emphasis role="strong"><emphasis>this</emphasis></emphasis>
|
||||
word.
|
||||
So is <emphasis role="strong"><emphasis>this</emphasis></emphasis> word.
|
||||
</para>
|
||||
<para>
|
||||
This is code: <literal>></literal>, <literal>$</literal>,
|
||||
|
@ -1020,7 +1009,8 @@ Blah
|
|||
<literal><html></literal>.
|
||||
</para>
|
||||
<para>
|
||||
<emphasis role="strikethrough">This is <emphasis>strikeout</emphasis>.</emphasis>
|
||||
<emphasis role="strikethrough">This is
|
||||
<emphasis>strikeout</emphasis>.</emphasis>
|
||||
</para>
|
||||
<para>
|
||||
Superscripts: a<superscript>bc</superscript>d
|
||||
|
@ -1028,35 +1018,35 @@ Blah
|
|||
a<superscript>hello there</superscript>.
|
||||
</para>
|
||||
<para>
|
||||
Subscripts: H<subscript>2</subscript>O,
|
||||
H<subscript>23</subscript>O, H<subscript>many of them</subscript>O.
|
||||
Subscripts: H<subscript>2</subscript>O, H<subscript>23</subscript>O,
|
||||
H<subscript>many of them</subscript>O.
|
||||
</para>
|
||||
<para>
|
||||
These should not be superscripts or subscripts, because of the
|
||||
unescaped spaces: a^b c^d, a~b c~d.
|
||||
These should not be superscripts or subscripts, because of the unescaped
|
||||
spaces: a^b c^d, a~b c~d.
|
||||
</para>
|
||||
</section>
|
||||
<section id="smart-quotes-ellipses-dashes">
|
||||
<title>Smart quotes, ellipses, dashes</title>
|
||||
<para>
|
||||
<quote>Hello,</quote> said the spider.
|
||||
<quote><quote>Shelob</quote> is my name.</quote>
|
||||
<quote>Hello,</quote> said the spider. <quote><quote>Shelob</quote> is my
|
||||
name.</quote>
|
||||
</para>
|
||||
<para>
|
||||
<quote>A</quote>, <quote>B</quote>, and <quote>C</quote> are
|
||||
letters.
|
||||
<quote>A</quote>, <quote>B</quote>, and <quote>C</quote> are letters.
|
||||
</para>
|
||||
<para>
|
||||
<quote>Oak,</quote> <quote>elm,</quote> and <quote>beech</quote>
|
||||
are names of trees. So is <quote>pine.</quote>
|
||||
<quote>Oak,</quote> <quote>elm,</quote> and <quote>beech</quote> are names
|
||||
of trees. So is <quote>pine.</quote>
|
||||
</para>
|
||||
<para>
|
||||
<quote>He said, <quote>I want to go.</quote></quote> Were you alive
|
||||
in the 70's?
|
||||
<quote>He said, <quote>I want to go.</quote></quote> Were you alive in the
|
||||
70's?
|
||||
</para>
|
||||
<para>
|
||||
Here is some quoted <quote><literal>code</literal></quote> and a
|
||||
<quote><ulink url="http://example.com/?foo=1&bar=2">quoted link</ulink></quote>.
|
||||
<quote><ulink url="http://example.com/?foo=1&bar=2">quoted
|
||||
link</ulink></quote>.
|
||||
</para>
|
||||
<para>
|
||||
Some dashes: one—two — three—four — five.
|
||||
|
@ -1135,8 +1125,8 @@ Blah
|
|||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
Escaped <literal>$</literal>: $73
|
||||
<emphasis>this should be emphasized</emphasis> 23$.
|
||||
Escaped <literal>$</literal>: $73 <emphasis>this should be
|
||||
emphasized</emphasis> 23$.
|
||||
</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
|
@ -1316,8 +1306,8 @@ Blah
|
|||
<section id="with-ampersands">
|
||||
<title>With ampersands</title>
|
||||
<para>
|
||||
Here's a
|
||||
<ulink url="http://example.com/?foo=1&bar=2">link with an ampersand in the URL</ulink>.
|
||||
Here's a <ulink url="http://example.com/?foo=1&bar=2">link with an
|
||||
ampersand in the URL</ulink>.
|
||||
</para>
|
||||
<para>
|
||||
Here's a link with an amersand in the link text:
|
||||
|
@ -1327,8 +1317,8 @@ Blah
|
|||
Here's an <ulink url="/script?foo=1&bar=2">inline link</ulink>.
|
||||
</para>
|
||||
<para>
|
||||
Here's an
|
||||
<ulink url="/script?foo=1&bar=2">inline link in pointy braces</ulink>.
|
||||
Here's an <ulink url="/script?foo=1&bar=2">inline link in pointy
|
||||
braces</ulink>.
|
||||
</para>
|
||||
</section>
|
||||
<section id="autolinks">
|
||||
|
@ -1387,78 +1377,67 @@ or here: <http://example.com/>
|
|||
</mediaobject>
|
||||
</figure>
|
||||
<para>
|
||||
Here is a movie
|
||||
<inlinemediaobject>
|
||||
Here is a movie <inlinemediaobject>
|
||||
<imageobject>
|
||||
<imagedata fileref="movie.jpg" />
|
||||
</imageobject>
|
||||
</inlinemediaobject>
|
||||
icon.
|
||||
</inlinemediaobject> icon.
|
||||
</para>
|
||||
</section>
|
||||
<section id="footnotes">
|
||||
<title>Footnotes</title>
|
||||
<para>
|
||||
Here is a footnote
|
||||
reference,<footnote>
|
||||
<para>
|
||||
Here is the footnote. It can go anywhere after the footnote
|
||||
reference. It need not be placed at the end of the document.
|
||||
</para>
|
||||
</footnote>
|
||||
and
|
||||
another.<footnote>
|
||||
<para>
|
||||
Here's the long note. This one contains multiple blocks.
|
||||
</para>
|
||||
<para>
|
||||
Subsequent blocks are indented to show that they belong to the
|
||||
footnote (as with list items).
|
||||
</para>
|
||||
<screen>
|
||||
Here is a footnote reference,<footnote>
|
||||
<para>
|
||||
Here is the footnote. It can go anywhere after the footnote reference.
|
||||
It need not be placed at the end of the document.
|
||||
</para>
|
||||
</footnote> and another.<footnote>
|
||||
<para>
|
||||
Here's the long note. This one contains multiple blocks.
|
||||
</para>
|
||||
<para>
|
||||
Subsequent blocks are indented to show that they belong to the
|
||||
footnote (as with list items).
|
||||
</para>
|
||||
<screen>
|
||||
{ <code> }
|
||||
</screen>
|
||||
<para>
|
||||
If you want, you can indent every line, but you can also be lazy
|
||||
and just indent the first line of each block.
|
||||
</para>
|
||||
</footnote>
|
||||
This should <emphasis>not</emphasis> be a footnote reference,
|
||||
because it contains a space.[^my note] Here is an inline
|
||||
note.<footnote>
|
||||
<para>
|
||||
This is <emphasis>easier</emphasis> to type. Inline notes may
|
||||
contain <ulink url="http://google.com">links</ulink> and
|
||||
<literal>]</literal> verbatim characters, as well as [bracketed
|
||||
text].
|
||||
</para>
|
||||
</footnote>
|
||||
<para>
|
||||
If you want, you can indent every line, but you can also be lazy and
|
||||
just indent the first line of each block.
|
||||
</para>
|
||||
</footnote> This should <emphasis>not</emphasis> be a footnote reference,
|
||||
because it contains a space.[^my note] Here is an inline note.<footnote>
|
||||
<para>
|
||||
This is <emphasis>easier</emphasis> to type. Inline notes may contain
|
||||
<ulink url="http://google.com">links</ulink> and <literal>]</literal>
|
||||
verbatim characters, as well as [bracketed text].
|
||||
</para>
|
||||
</footnote>
|
||||
</para>
|
||||
<blockquote>
|
||||
<para>
|
||||
Notes can go in
|
||||
quotes.<footnote>
|
||||
<para>
|
||||
In quote.
|
||||
</para>
|
||||
</footnote>
|
||||
Notes can go in quotes.<footnote>
|
||||
<para>
|
||||
In quote.
|
||||
</para>
|
||||
</footnote>
|
||||
</para>
|
||||
</blockquote>
|
||||
<orderedlist numeration="arabic">
|
||||
<listitem>
|
||||
<para>
|
||||
And in list
|
||||
items.<footnote>
|
||||
<para>
|
||||
In list.
|
||||
</para>
|
||||
</footnote>
|
||||
And in list items.<footnote>
|
||||
<para>
|
||||
In list.
|
||||
</para>
|
||||
</footnote>
|
||||
</para>
|
||||
</listitem>
|
||||
</orderedlist>
|
||||
<para>
|
||||
This paragraph should not be part of the note, as it is not
|
||||
indented.
|
||||
This paragraph should not be part of the note, as it is not indented.
|
||||
</para>
|
||||
</section>
|
||||
</article>
|
||||
|
|
Loading…
Add table
Reference in a new issue