From 271cb4d8457b2252cddc76a476f3681e8b2a1486 Mon Sep 17 00:00:00 2001 From: Ivo Clarysse Date: Fri, 29 Apr 2016 14:00:46 -0700 Subject: [PATCH 1/3] Add docbook5 writer support --- src/Text/Pandoc.hs | 2 + src/Text/Pandoc/Options.hs | 2 + src/Text/Pandoc/Writers/Docbook.hs | 10 +- tests/Tests/Old.hs | 3 + tests/tables.docbook5 | 432 +++++++++ tests/writer.docbook5 | 1394 ++++++++++++++++++++++++++++ 6 files changed, 1840 insertions(+), 3 deletions(-) create mode 100644 tests/tables.docbook5 create mode 100644 tests/writer.docbook5 diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index b67a53f5b..58f666939 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -291,6 +291,8 @@ writers = [ writeHtmlString o{ writerSlideVariant = RevealJsSlides , writerHtml5 = True }) ,("docbook" , PureStringWriter writeDocbook) + ,("docbook5" , PureStringWriter $ \o -> + writeDocbook o{ writerDocBook5 = True }) ,("opml" , PureStringWriter writeOPML) ,("opendocument" , PureStringWriter writeOpenDocument) ,("latex" , PureStringWriter writeLaTeX) diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 171210962..fcf6537c0 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -357,6 +357,7 @@ data WriterOptions = WriterOptions , writerSourceURL :: Maybe String -- ^ Absolute URL + directory of 1st source file , writerUserDataDir :: Maybe FilePath -- ^ Path of user data directory , writerCiteMethod :: CiteMethod -- ^ How to print cites + , writerDocBook5 :: Bool -- ^ Produce DocBook5 , writerHtml5 :: Bool -- ^ Produce HTML5 , writerHtmlQTags :: Bool -- ^ Use @@ tags for quotes in HTML , writerBeamer :: Bool -- ^ Produce beamer LaTeX slide show @@ -403,6 +404,7 @@ instance Default WriterOptions where , writerSourceURL = Nothing , writerUserDataDir = Nothing , writerCiteMethod = Citeproc + , writerDocBook5 = False , writerHtml5 = False , writerHtmlQTags = False , writerBeamer = False diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 2aaebf99f..5528714a2 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -112,7 +112,9 @@ elementToDocbook opts lvl (Sec _ _num (id',_,_) title elements) = else elements tag = case lvl of n | n == 0 -> "chapter" - | n >= 1 && n <= 5 -> "sect" ++ show n + | n >= 1 && n <= 5 -> if writerDocBook5 opts + then "section" + else "sect" ++ show n | otherwise -> "simplesect" in inTags True tag [("id", writerIdentifierPrefix opts ++ id') | not (null id')] $ @@ -227,9 +229,11 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = blockToDocbook opts (DefinitionList lst) = let attribs = [("spacing", "compact") | isTightList $ concatMap snd lst] in inTags True "variablelist" attribs $ deflistItemsToDocbook opts lst -blockToDocbook _ (RawBlock f str) +blockToDocbook opts (RawBlock f str) | f == "docbook" = text str -- raw XML block - | f == "html" = text str -- allow html for backwards compatibility + | f == "html" = if writerDocBook5 opts + then empty -- No html in Docbook5 + else text str -- allow html for backwards compatibility | otherwise = empty blockToDocbook _ HorizontalRule = empty -- not semantic blockToDocbook opts (Table caption aligns widths headers rows) = diff --git a/tests/Tests/Old.hs b/tests/Tests/Old.hs index b292b1f11..4e0eb46a4 100644 --- a/tests/Tests/Old.hs +++ b/tests/Tests/Old.hs @@ -108,6 +108,9 @@ tests = [ testGroup "markdown" , test "reader" ["-r", "docbook", "-w", "native", "-s"] "docbook-xref.docbook" "docbook-xref.native" ] + , testGroup "docbook5" + [ testGroup "writer" $ writerTests "docbook5" + ] , testGroup "native" [ testGroup "writer" $ writerTests "native" , test "reader" ["-r", "native", "-w", "native", "-s"] diff --git a/tests/tables.docbook5 b/tests/tables.docbook5 new file mode 100644 index 000000000..6224cf222 --- /dev/null +++ b/tests/tables.docbook5 @@ -0,0 +1,432 @@ + + Simple table with caption: + + + + Demonstration of simple table syntax. + + + + + + + + + + Right + + + Left + + + Center + + + Default + + + + + + + 12 + + + 12 + + + 12 + + + 12 + + + + + 123 + + + 123 + + + 123 + + + 123 + + + + + 1 + + + 1 + + + 1 + + + 1 + + + + +
+ + Simple table without caption: + + + + + + + + + + + Right + + + Left + + + Center + + + Default + + + + + + + 12 + + + 12 + + + 12 + + + 12 + + + + + 123 + + + 123 + + + 123 + + + 123 + + + + + 1 + + + 1 + + + 1 + + + 1 + + + + + + + Simple table indented two spaces: + + + + Demonstration of simple table syntax. + + + + + + + + + + Right + + + Left + + + Center + + + Default + + + + + + + 12 + + + 12 + + + 12 + + + 12 + + + + + 123 + + + 123 + + + 123 + + + 123 + + + + + 1 + + + 1 + + + 1 + + + 1 + + + + +
+ + Multiline table with caption: + + + + Here's the caption. It may span multiple lines. + + + + + + + + + + Centered Header + + + Left Aligned + + + Right Aligned + + + Default aligned + + + + + + + First + + + row + + + 12.0 + + + Example of a row that spans multiple lines. + + + + + Second + + + row + + + 5.0 + + + Here's another one. Note the blank line between rows. + + + + +
+ + Multiline table without caption: + + + + + + + + + + + Centered Header + + + Left Aligned + + + Right Aligned + + + Default aligned + + + + + + + First + + + row + + + 12.0 + + + Example of a row that spans multiple lines. + + + + + Second + + + row + + + 5.0 + + + Here's another one. Note the blank line between rows. + + + + + + + Table without column headers: + + + + + + + + + + + 12 + + + 12 + + + 12 + + + 12 + + + + + 123 + + + 123 + + + 123 + + + 123 + + + + + 1 + + + 1 + + + 1 + + + 1 + + + + + + + Multiline table without column headers: + + + + + + + + + + + First + + + row + + + 12.0 + + + Example of a row that spans multiple lines. + + + + + Second + + + row + + + 5.0 + + + Here's another one. Note the blank line between rows. + + + + + diff --git a/tests/writer.docbook5 b/tests/writer.docbook5 new file mode 100644 index 000000000..494489ab5 --- /dev/null +++ b/tests/writer.docbook5 @@ -0,0 +1,1394 @@ + + +
+ + Pandoc Test Suite + + + John + MacFarlane + + + + Anonymous + + + July 17, 2006 + + + This is a set of tests for pandoc. Most of them are adapted from John + Gruber’s markdown test suite. + +
+ Headers + +
+
+ Level 1 +
+ Level 2 with <emphasis>emphasis</emphasis> +
+ Level 3 + + with no blank line + +
+
+
+ Level 2 + + with no blank line + +
+
+
+ Paragraphs + + Here’s a regular paragraph. + + + In Markdown 1.0.0 and earlier. Version 8. This line turns into a list + item. Because a hard-wrapped line in the middle of a paragraph looked like + a list item. + + + Here’s one with a bullet. * criminey. + +There should be a hard line break +here. +
+
+ Block Quotes + + E-mail style: + +
+ + This is a block quote. It is pretty short. + +
+
+ + Code in a block quote: + + +sub status { + print "working"; +} + + + A list: + + + + + item one + + + + + item two + + + + + Nested block quotes: + +
+ + nested + +
+
+ + nested + +
+
+ + This should not be a block quote: 2 > 1. + + + And a following paragraph. + +
+
+ Code Blocks + + Code: + + +---- (should be four hyphens) + +sub status { + print "working"; +} + +this code block is indented by one tab + + + And: + + + this code block is indented by two tabs + +These should not be escaped: \$ \\ \> \[ \{ + +
+
+ Lists +
+ Unordered + + Asterisks tight: + + + + + asterisk 1 + + + + + asterisk 2 + + + + + asterisk 3 + + + + + Asterisks loose: + + + + + asterisk 1 + + + + + asterisk 2 + + + + + asterisk 3 + + + + + Pluses tight: + + + + + Plus 1 + + + + + Plus 2 + + + + + Plus 3 + + + + + Pluses loose: + + + + + Plus 1 + + + + + Plus 2 + + + + + Plus 3 + + + + + Minuses tight: + + + + + Minus 1 + + + + + Minus 2 + + + + + Minus 3 + + + + + Minuses loose: + + + + + Minus 1 + + + + + Minus 2 + + + + + Minus 3 + + + +
+
+ Ordered + + Tight: + + + + + First + + + + + Second + + + + + Third + + + + + and: + + + + + One + + + + + Two + + + + + Three + + + + + Loose using tabs: + + + + + First + + + + + Second + + + + + Third + + + + + and using spaces: + + + + + One + + + + + Two + + + + + Three + + + + + Multiple paragraphs: + + + + + Item 1, graf one. + + + Item 1. graf two. The quick brown fox jumped over the lazy dog’s + back. + + + + + Item 2. + + + + + Item 3. + + + +
+
+ Nested + + + + Tab + + + + + Tab + + + + + Tab + + + + + + + + + Here’s another: + + + + + First + + + + + Second: + + + + + Fee + + + + + Fie + + + + + Foe + + + + + + + Third + + + + + Same thing but with paragraphs: + + + + + First + + + + + Second: + + + + + Fee + + + + + Fie + + + + + Foe + + + + + + + Third + + + +
+
+ Tabs and spaces + + + + this is a list item indented with tabs + + + + + this is a list item indented with spaces + + + + + this is an example list item indented with tabs + + + + + this is an example list item indented with spaces + + + + + +
+
+ Fancy list markers + + + + begins with 2 + + + + + and now 3 + + + with a continuation + + + + + sublist with roman numerals, starting with 4 + + + + + more items + + + + + a subsublist + + + + + a subsublist + + + + + + + + + Nesting: + + + + + Upper Alpha + + + + + Upper Roman. + + + + + Decimal start with 6 + + + + + Lower alpha with paren + + + + + + + + + + + Autonumbering: + + + + + Autonumber. + + + + + More. + + + + + Nested. + + + + + + + Should not be a list item: + + + M.A. 2007 + + + B. Williams + +
+
+
+ Definition Lists + + Tight using spaces: + + + + + apple + + + + red fruit + + + + + + orange + + + + orange fruit + + + + + + banana + + + + yellow fruit + + + + + + Tight using tabs: + + + + + apple + + + + red fruit + + + + + + orange + + + + orange fruit + + + + + + banana + + + + yellow fruit + + + + + + Loose: + + + + + apple + + + + red fruit + + + + + + orange + + + + orange fruit + + + + + + banana + + + + yellow fruit + + + + + + Multiple blocks with italics: + + + + + apple + + + + red fruit + + + contains seeds, crisp, pleasant to taste + + + + + + orange + + + + orange fruit + + +{ orange code block } + +
+ + orange block quote + +
+
+
+
+ + Multiple definitions, tight: + + + + + apple + + + + red fruit + + + computer + + + + + + orange + + + + orange fruit + + + bank + + + + + + Multiple definitions, loose: + + + + + apple + + + + red fruit + + + computer + + + + + + orange + + + + orange fruit + + + bank + + + + + + Blank line after term, indented marker, alternate markers: + + + + + apple + + + + red fruit + + + computer + + + + + + orange + + + + orange fruit + + + + + sublist + + + + + sublist + + + + + + +
+
+ HTML Blocks + + Simple block on one line: + + + foo + + + And nested without indentation: + + + foo + + + bar + + + Interpreted markdown in a table: + + This is emphasized + And this is strong + + Here’s a simple block: + + + foo + + + This should be a code block, though: + + +<div> + foo +</div> + + + As should this: + + +<div>foo</div> + + + Now, nested: + + + foo + + + This should just be an HTML comment: + + + Multiline: + + + Code block: + + +<!-- Comment --> + + + Just plain comment, with trailing spaces on the line: + + + Code: + + +<hr /> + + + Hr’s: + +
+
+ Inline Markup + + This is emphasized, and so is + this. + + + This is strong, and so + is this. + + + An emphasized link. + + + This is strong and + em. + + + So is this word. + + + This is strong and + em. + + + So is this word. + + + This is code: >, $, + \, \$, + <html>. + + + This is + strikeout. + + + Superscripts: abcd + ahello + ahello there. + + + Subscripts: H2O, H23O, + Hmany of themO. + + + These should not be superscripts or subscripts, because of the unescaped + spaces: a^b c^d, a~b c~d. + +
+
+ Smart quotes, ellipses, dashes + + Hello, said the spider. Shelob is my + name. + + + A, B, and C are letters. + + + Oak, elm, and beech are names + of trees. So is pine. + + + He said, I want to go. Were you alive in the + 70’s? + + + Here is some quoted code and a + quoted + link. + + + Some dashes: one—two — three—four — five. + + + Dashes between numbers: 5–7, 255–66, 1987–1999. + + + Ellipses…and…and…. + +
+
+ LaTeX + + + + + + + + 2 + 2 = 4 + + + + + x ∈ y + + + + + α ∧ ω + + + + + 223 + + + + + p-Tree + + + + + Here’s some display math: + $$\frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}$$ + + + + + Here’s one that has a line break in it: + α + ω × x2. + + + + + These shouldn’t be math: + + + + + To get the famous equation, write $e = mc^2$. + + + + + $22,000 is a lot of money. So is $34,000. (It + worked if lot is emphasized.) + + + + + Shoes ($20) and socks ($5). + + + + + Escaped $: $73 this should be + emphasized 23$. + + + + + Here’s a LaTeX table: + +
+
+ Special Characters + + Here is some unicode: + + + + + I hat: Î + + + + + o umlaut: ö + + + + + section: § + + + + + set membership: ∈ + + + + + copyright: © + + + + + AT&T has an ampersand in their name. + + + AT&T is another way to write it. + + + This & that. + + + 4 < 5. + + + 6 > 5. + + + Backslash: \ + + + Backtick: ` + + + Asterisk: * + + + Underscore: _ + + + Left brace: { + + + Right brace: } + + + Left bracket: [ + + + Right bracket: ] + + + Left paren: ( + + + Right paren: ) + + + Greater-than: > + + + Hash: # + + + Period: . + + + Bang: ! + + + Plus: + + + + Minus: - + +
+ +
+ Images + + From Voyage dans la Lune by Georges Melies (1902): + +
+ lalune + + + + + lalune + +
+ + Here is a movie + + + + icon. + +
+
+ Footnotes + + Here is a footnote reference, + + Here is the footnote. It can go anywhere after the footnote reference. + It need not be placed at the end of the document. + + and another. + + Here’s the long note. This one contains multiple blocks. + + + Subsequent blocks are indented to show that they belong to the + footnote (as with list items). + + + { <code> } + + + If you want, you can indent every line, but you can also be lazy and + just indent the first line of each block. + + This should not be a footnote reference, + because it contains a space.[^my note] Here is an inline note. + + This is easier to type. Inline notes may contain + links and ] + verbatim characters, as well as [bracketed text]. + + + +
+ + Notes can go in quotes. + + In quote. + + + +
+ + + + And in list items. + + In list. + + + + + + + This paragraph should not be part of the note, as it is not indented. + +
+
From 987ec3a7523f4fe529575004d76d93680f127fa3 Mon Sep 17 00:00:00 2001 From: Ivo Clarysse Date: Fri, 29 Apr 2016 15:43:15 -0700 Subject: [PATCH 2/3] Write out Docbook 5 namespace --- src/Text/Pandoc.hs | 2 +- src/Text/Pandoc/Options.hs | 4 ++-- src/Text/Pandoc/Writers/Docbook.hs | 11 +++++++---- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 58f666939..0330c46e2 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -292,7 +292,7 @@ writers = [ , writerHtml5 = True }) ,("docbook" , PureStringWriter writeDocbook) ,("docbook5" , PureStringWriter $ \o -> - writeDocbook o{ writerDocBook5 = True }) + writeDocbook o{ writerDocbook5 = True }) ,("opml" , PureStringWriter writeOPML) ,("opendocument" , PureStringWriter writeOpenDocument) ,("latex" , PureStringWriter writeLaTeX) diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index fcf6537c0..701cd8bd1 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -357,7 +357,7 @@ data WriterOptions = WriterOptions , writerSourceURL :: Maybe String -- ^ Absolute URL + directory of 1st source file , writerUserDataDir :: Maybe FilePath -- ^ Path of user data directory , writerCiteMethod :: CiteMethod -- ^ How to print cites - , writerDocBook5 :: Bool -- ^ Produce DocBook5 + , writerDocbook5 :: Bool -- ^ Produce DocBook5 , writerHtml5 :: Bool -- ^ Produce HTML5 , writerHtmlQTags :: Bool -- ^ Use @@ tags for quotes in HTML , writerBeamer :: Bool -- ^ Produce beamer LaTeX slide show @@ -404,7 +404,7 @@ instance Default WriterOptions where , writerSourceURL = Nothing , writerUserDataDir = Nothing , writerCiteMethod = Citeproc - , writerDocBook5 = False + , writerDocbook5 = False , writerHtml5 = False , writerHtmlQTags = False , writerBeamer = False diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 5528714a2..79ccde9af 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -112,12 +112,15 @@ elementToDocbook opts lvl (Sec _ _num (id',_,_) title elements) = else elements tag = case lvl of n | n == 0 -> "chapter" - | n >= 1 && n <= 5 -> if writerDocBook5 opts + | n >= 1 && n <= 5 -> if writerDocbook5 opts then "section" else "sect" ++ show n | otherwise -> "simplesect" - in inTags True tag [("id", writerIdentifierPrefix opts ++ id') | - not (null id')] $ + idAttr = [("id", writerIdentifierPrefix opts ++ id') | not (null id')] + nsAttr = if writerDocbook5 opts && lvl == 0 then [("xmlns", "http://docbook.org/ns/docbook")] + else [] + attribs = nsAttr ++ idAttr + in inTags True tag attribs $ inTagsSimple "title" (inlinesToDocbook opts title) $$ vcat (map (elementToDocbook opts (lvl + 1)) elements') @@ -231,7 +234,7 @@ blockToDocbook opts (DefinitionList lst) = in inTags True "variablelist" attribs $ deflistItemsToDocbook opts lst blockToDocbook opts (RawBlock f str) | f == "docbook" = text str -- raw XML block - | f == "html" = if writerDocBook5 opts + | f == "html" = if writerDocbook5 opts then empty -- No html in Docbook5 else text str -- allow html for backwards compatibility | otherwise = empty From fd36e6b64a516ffd281af0667afc6d9c00a70d64 Mon Sep 17 00:00:00 2001 From: Ivo Clarysse Date: Fri, 29 Apr 2016 16:06:55 -0700 Subject: [PATCH 3/3] Docbook5 writer: Properly handle ulink/link --- src/Text/Pandoc/Writers/Docbook.hs | 4 +- tests/writer.docbook5 | 67 +++++++++++++++--------------- 2 files changed, 37 insertions(+), 34 deletions(-) diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 79ccde9af..9acfe289a 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -351,7 +351,9 @@ inlineToDocbook opts (Link attr txt (src, _)) | otherwise = (if isPrefixOf "#" src then inTags False "link" $ ("linkend", drop 1 src) : idAndRole attr - else inTags False "ulink" $ ("url", src) : idAndRole attr ) $ + else if writerDocbook5 opts + then inTags False "link" $ ("xlink:href", src) : idAndRole attr + else inTags False "ulink" $ ("url", src) : idAndRole attr ) $ inlinesToDocbook opts txt inlineToDocbook opts (Image attr _ (src, tit)) = let titleDoc = if null tit diff --git a/tests/writer.docbook5 b/tests/writer.docbook5 index 494489ab5..5261a35be 100644 --- a/tests/writer.docbook5 +++ b/tests/writer.docbook5 @@ -22,7 +22,8 @@
Headers