From 3564cd82caf81763f83897bdd144c833ffab58ee Mon Sep 17 00:00:00 2001 From: Frerich Raabe <raabe@froglogic.com> Date: Wed, 23 Sep 2015 19:25:58 +0200 Subject: [PATCH 1/4] Minor refactoring to readDocBook I plan to use the parsed and normalized XML tree read in readDocBook in other places - prepare that commit by factoring this code out into a separate, shared, definition. --- src/Text/Pandoc/Readers/DocBook.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 352b94496..a2aa62249 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -524,8 +524,8 @@ instance Default DBState where readDocBook :: ReaderOptions -> String -> Either PandocError Pandoc readDocBook _ inp = (\blocks -> Pandoc (dbMeta st') (toList . mconcat $ blocks)) <$> bs - where (bs , st') = flip runState def . runExceptT . mapM parseBlock . normalizeTree . parseXML $ inp' - inp' = handleInstructions inp + where (bs , st') = flip runState def . runExceptT . mapM parseBlock $ tree + tree = normalizeTree . parseXML . handleInstructions $ inp -- We treat <?asciidoc-br?> specially (issue #1236), converting it -- to <br/>, since xml-light doesn't parse the instruction correctly. From f6538144f0763ae17b60c78810ed52ab96df308d Mon Sep 17 00:00:00 2001 From: Frerich Raabe <raabe@froglogic.com> Date: Wed, 23 Sep 2015 19:31:25 +0200 Subject: [PATCH 2/4] Pass the parsed DocBook content along the state of readDocBook Having access to the entire document will be needed when handling elements which refer to other elements. This is needed for e.g. <xref> or <link>, both of which reference other elements (by the 'id' attribute) for the label text. I suppose that in practice, the [Content] returned by parseXML always only contains one 'Elem' value -- the document element. However, I'm not totally sure about it, so let's just pass all the Content along. --- src/Text/Pandoc/Readers/DocBook.hs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index a2aa62249..83e2f3900 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -511,6 +511,7 @@ data DBState = DBState{ dbSectionLevel :: Int , dbAcceptsMeta :: Bool , dbBook :: Bool , dbFigureTitle :: Inlines + , dbContent :: [Content] } deriving Show instance Default DBState where @@ -519,12 +520,13 @@ instance Default DBState where , dbMeta = mempty , dbAcceptsMeta = False , dbBook = False - , dbFigureTitle = mempty } + , dbFigureTitle = mempty + , dbContent = [] } readDocBook :: ReaderOptions -> String -> Either PandocError Pandoc readDocBook _ inp = (\blocks -> Pandoc (dbMeta st') (toList . mconcat $ blocks)) <$> bs - where (bs , st') = flip runState def . runExceptT . mapM parseBlock $ tree + where (bs , st') = flip runState (def{ dbContent = tree }) . runExceptT . mapM parseBlock $ tree tree = normalizeTree . parseXML . handleInstructions $ inp -- We treat <?asciidoc-br?> specially (issue #1236), converting it From 35f12b5095799e41b563f47a7923a1d01015c71c Mon Sep 17 00:00:00 2001 From: Frerich Raabe <raabe@froglogic.com> Date: Wed, 23 Sep 2015 22:53:50 +0200 Subject: [PATCH 3/4] Added proper support for DocBook 'xref' elements 'xref' is used to create cross references to other parts of the document. It is an empty element - the cross reference text depends on various attributes. Quoting 'DocBook: The Definitive Guide': 1. If the endterm attribute is specified on xref, the content of the element pointed to by endterm will be used as the text of the cross-reference. 2. Otherwise, if the object pointed to has a specified XRefLabel, the content of that attribute will be used as the cross-reference text. --- pandoc.cabal | 1 + src/Text/Pandoc/Readers/DocBook.hs | 20 ++++++++- tests/Tests/Old.hs | 2 + tests/docbook-xref.docbook | 70 ++++++++++++++++++++++++++++++ tests/docbook-xref.native | 29 +++++++++++++ 5 files changed, 120 insertions(+), 2 deletions(-) create mode 100644 tests/docbook-xref.docbook create mode 100644 tests/docbook-xref.native diff --git a/pandoc.cabal b/pandoc.cabal index 0e1e75897..934d51a58 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -125,6 +125,7 @@ Extra-Source-Files: tests/bodybg.gif tests/*.native tests/docbook-reader.docbook + tests/docbook-xref.docbook tests/html-reader.html tests/opml-reader.opml tests/haddock-reader.haddock diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 83e2f3900..164b44b62 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -18,6 +18,7 @@ import Text.TeXMath (readMathML, writeTeX) import Text.Pandoc.Error (PandocError) import Text.Pandoc.Compat.Except import Data.Default +import Data.Foldable (asum) {- @@ -498,7 +499,7 @@ List of all DocBook tags, with [x] indicating implemented, [x] warning - An admonition set off from the text [x] wordasword - A word meant specifically as a word and not representing anything else -[ ] xref - A cross reference to another part of the document +[x] xref - A cross reference to another part of the document [ ] year - The year of publication of a document [x] ?asciidoc-br? - line break from asciidoc docbook output -} @@ -952,7 +953,13 @@ parseInline (Elem e) = "keycombo" -> keycombo <$> (mapM parseInline $ elContent e) "menuchoice" -> menuchoice <$> (mapM parseInline $ filter isGuiMenu $ elContent e) - "xref" -> return $ str "?" -- so at least you know something is there + "xref" -> do + content <- dbContent <$> get + let linkend = attrValue "linkend" e + let title = case attrValue "endterm" e of + "" -> maybe "???" xrefTitleByElem (findElementById linkend content) + endterm -> maybe "???" strContent (findElementById endterm content) + return $ link ('#' : linkend) "" (singleton (Str title)) "email" -> return $ link ("mailto:" ++ strContent e) "" $ str $ strContent e "uri" -> return $ link (strContent e) "" $ str $ strContent e @@ -1013,3 +1020,12 @@ parseInline (Elem e) = isGuiMenu (Elem x) = named "guimenu" x || named "guisubmenu" x || named "guimenuitem" x isGuiMenu _ = False + + findElementById idString content + = asum [filterElement (\x -> attrValue "id" x == idString) el | Elem el <- content] + + xrefTitleByElem el + | null xrefLabel = "???" + | otherwise = xrefLabel + where + xrefLabel = attrValue "xreflabel" el diff --git a/tests/Tests/Old.hs b/tests/Tests/Old.hs index 047ad0481..5cfee9f76 100644 --- a/tests/Tests/Old.hs +++ b/tests/Tests/Old.hs @@ -105,6 +105,8 @@ tests = [ testGroup "markdown" [ testGroup "writer" $ writerTests "docbook" , test "reader" ["-r", "docbook", "-w", "native", "-s"] "docbook-reader.docbook" "docbook-reader.native" + , test "reader" ["-r", "docbook", "-w", "native", "-s"] + "docbook-xref.docbook" "docbook-xref.native" ] , testGroup "native" [ testGroup "writer" $ writerTests "native" diff --git a/tests/docbook-xref.docbook b/tests/docbook-xref.docbook new file mode 100644 index 000000000..ebcd94d00 --- /dev/null +++ b/tests/docbook-xref.docbook @@ -0,0 +1,70 @@ +<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" + "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd"> +<book><title>An Example Book</title> +<chapter id="ch01"><title>XRef Samples</title> +<para> +This paragraph demonstrates several features of +<sgmltag>XRef</sgmltag>. +</para> +<itemizedlist> +<listitem><para>A straight link generates the +cross-reference text: <xref linkend="ch02"/>. +</para></listitem> +<listitem><para>A link to an element with an +<sgmltag class="attribute">XRefLabel</sgmltag>: +<xref linkend="ch03"/>. +</para></listitem> +<listitem><para>A link with an +<sgmltag class="attribute">EndTerm</sgmltag>: +<xref linkend="ch04" endterm="ch04short"/>. +</para></listitem> +<listitem><para>A link to an +<sgmltag>cmdsynopsis</sgmltag> element: <xref linkend="cmd01"/>. +</para></listitem> +<listitem><para>A link to an +<sgmltag>funcsynopsis</sgmltag> element: <xref linkend="func01"/>. +</para></listitem> +</itemizedlist> +</chapter> + +<chapter id="ch02"> + <title>The Second Chapter</title> + <para>Some content here</para> +</chapter> + +<chapter id="ch03" xreflabel="Chapter the Third"> + <title>The Third Chapter</title> + <para>Some content here</para> +</chapter> + +<chapter id="ch04"> + <title>The Fourth Chapter</title> + <titleabbrev id="ch04short">Chapter 4</titleabbrev> + <para>Some content here</para> + +<cmdsynopsis id="cmd01"> + <command>chgrp</command> + <arg>-R + <group> + <arg>-H</arg> + <arg>-L</arg> + <arg>-P</arg> + </group> + </arg> + <arg>-f</arg> + <arg choice='plain'><replaceable>group</replaceable></arg> + <arg rep='repeat' choice='plain'><replaceable>file</replaceable></arg> +</cmdsynopsis> + + +<funcsynopsis id="func01"> +<funcprototype> +<funcdef>int <function>max</function></funcdef> +<paramdef>int <parameter>int1</parameter></paramdef> +<paramdef>int <parameter>int2</parameter></paramdef> +</funcprototype> +</funcsynopsis> + +</chapter> +</book> + diff --git a/tests/docbook-xref.native b/tests/docbook-xref.native new file mode 100644 index 000000000..70027b2c5 --- /dev/null +++ b/tests/docbook-xref.native @@ -0,0 +1,29 @@ +Pandoc (Meta {unMeta = fromList []}) +[Header 1 ("ch01",[],[]) [Str "XRef",Space,Str "Samples"] +,Para [Str "This",Space,Str "paragraph",Space,Str "demonstrates",Space,Str "several",Space,Str "features",Space,Str "of",Space,Str "XRef."] +,BulletList + [[Para [Str "A",Space,Str "straight",Space,Str "link",Space,Str "generates",Space,Str "the",Space,Str "cross-reference",Space,Str "text:",Space,Link [Str "???"] ("#ch02",""),Str "."]] + ,[Para [Str "A",Space,Str "link",Space,Str "to",Space,Str "an",Space,Str "element",Space,Str "with",Space,Str "an",Space,Str "XRefLabel:",Space,Link [Str "Chapter the Third"] ("#ch03",""),Str "."]] + ,[Para [Str "A",Space,Str "link",Space,Str "with",Space,Str "an",Space,Str "EndTerm:",Space,Link [Str "Chapter 4"] ("#ch04",""),Str "."]] + ,[Para [Str "A",Space,Str "link",Space,Str "to",Space,Str "an",Space,Str "cmdsynopsis",Space,Str "element:",Space,Link [Str "???"] ("#cmd01",""),Str "."]] + ,[Para [Str "A",Space,Str "link",Space,Str "to",Space,Str "an",Space,Str "funcsynopsis",Space,Str "element:",Space,Link [Str "???"] ("#func01",""),Str "."]]] +,Header 1 ("ch02",[],[]) [Str "The",Space,Str "Second",Space,Str "Chapter"] +,Para [Str "Some",Space,Str "content",Space,Str "here"] +,Header 1 ("ch03",[],[]) [Str "The",Space,Str "Third",Space,Str "Chapter"] +,Para [Str "Some",Space,Str "content",Space,Str "here"] +,Header 1 ("ch04",[],[]) [Str "The",Space,Str "Fourth",Space,Str "Chapter"] +,Para [Str "Some",Space,Str "content",Space,Str "here"] +,Plain [Str "chgrp"] +,Plain [Str "-R"] +,Plain [Str "-H"] +,Plain [Str "-L"] +,Plain [Str "-P"] +,Plain [Str "-f"] +,Plain [Str "group"] +,Plain [Str "file"] +,Plain [Str "int"] +,Plain [Str "max"] +,Plain [Str "int"] +,Plain [Str "int1"] +,Plain [Str "int"] +,Plain [Str "int2"]] From eee992520cfe3d0d37185f998339690aae39f2c8 Mon Sep 17 00:00:00 2001 From: Frerich Raabe <raabe@froglogic.com> Date: Wed, 23 Sep 2015 23:08:53 +0200 Subject: [PATCH 4/4] Improve text generated for <xref> by employing docbook-xsl heuristics docbook-xsl, a set of XSLT scripts to generate HMTL out of DocBook, tries harder to generate a nice xref text. Depending on the element being linked to, it looks at the title or other descriptive child elements. Let's do that, too. --- src/Text/Pandoc/Readers/DocBook.hs | 18 ++++++++++++++++-- tests/docbook-xref.native | 6 +++--- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 164b44b62..9243221f0 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -1024,8 +1024,22 @@ parseInline (Elem e) = findElementById idString content = asum [filterElement (\x -> attrValue "id" x == idString) el | Elem el <- content] + -- Use the 'xreflabel' attribute for getting the title of a xref link; + -- if there's no such attribute, employ some heuristics based on what + -- docbook-xsl does. xrefTitleByElem el - | null xrefLabel = "???" - | otherwise = xrefLabel + | not (null xrefLabel) = xrefLabel + | otherwise = case qName (elName el) of + "chapter" -> descendantContent "title" el + "sect1" -> descendantContent "title" el + "sect2" -> descendantContent "title" el + "sect3" -> descendantContent "title" el + "sect4" -> descendantContent "title" el + "sect5" -> descendantContent "title" el + "cmdsynopsis" -> descendantContent "command" el + "funcsynopsis" -> descendantContent "function" el + _ -> qName (elName el) ++ "_title" where xrefLabel = attrValue "xreflabel" el + descendantContent name = maybe "???" strContent + . findElement (QName name Nothing Nothing) diff --git a/tests/docbook-xref.native b/tests/docbook-xref.native index 70027b2c5..ec870842b 100644 --- a/tests/docbook-xref.native +++ b/tests/docbook-xref.native @@ -2,11 +2,11 @@ Pandoc (Meta {unMeta = fromList []}) [Header 1 ("ch01",[],[]) [Str "XRef",Space,Str "Samples"] ,Para [Str "This",Space,Str "paragraph",Space,Str "demonstrates",Space,Str "several",Space,Str "features",Space,Str "of",Space,Str "XRef."] ,BulletList - [[Para [Str "A",Space,Str "straight",Space,Str "link",Space,Str "generates",Space,Str "the",Space,Str "cross-reference",Space,Str "text:",Space,Link [Str "???"] ("#ch02",""),Str "."]] + [[Para [Str "A",Space,Str "straight",Space,Str "link",Space,Str "generates",Space,Str "the",Space,Str "cross-reference",Space,Str "text:",Space,Link [Str "The Second Chapter"] ("#ch02",""),Str "."]] ,[Para [Str "A",Space,Str "link",Space,Str "to",Space,Str "an",Space,Str "element",Space,Str "with",Space,Str "an",Space,Str "XRefLabel:",Space,Link [Str "Chapter the Third"] ("#ch03",""),Str "."]] ,[Para [Str "A",Space,Str "link",Space,Str "with",Space,Str "an",Space,Str "EndTerm:",Space,Link [Str "Chapter 4"] ("#ch04",""),Str "."]] - ,[Para [Str "A",Space,Str "link",Space,Str "to",Space,Str "an",Space,Str "cmdsynopsis",Space,Str "element:",Space,Link [Str "???"] ("#cmd01",""),Str "."]] - ,[Para [Str "A",Space,Str "link",Space,Str "to",Space,Str "an",Space,Str "funcsynopsis",Space,Str "element:",Space,Link [Str "???"] ("#func01",""),Str "."]]] + ,[Para [Str "A",Space,Str "link",Space,Str "to",Space,Str "an",Space,Str "cmdsynopsis",Space,Str "element:",Space,Link [Str "chgrp"] ("#cmd01",""),Str "."]] + ,[Para [Str "A",Space,Str "link",Space,Str "to",Space,Str "an",Space,Str "funcsynopsis",Space,Str "element:",Space,Link [Str "max"] ("#func01",""),Str "."]]] ,Header 1 ("ch02",[],[]) [Str "The",Space,Str "Second",Space,Str "Chapter"] ,Para [Str "Some",Space,Str "content",Space,Str "here"] ,Header 1 ("ch03",[],[]) [Str "The",Space,Str "Third",Space,Str "Chapter"]