Support for <indexterm>s when reading DocBook (#7607)
* Support for <indexterm>s when reading DocBook * Update implementation status of `<n-ary>` tags * Remove non-idiomatic parentheses * More complete `<indexterm>` support, with tests Co-authored-by: Rowan Rodrik van der Molen <rowan@ytec.nl>
This commit is contained in:
parent
5750f60442
commit
7a70a46c03
3 changed files with 236 additions and 4 deletions
|
@ -19,7 +19,7 @@ import Data.Foldable (asum)
|
|||
import Data.Generics
|
||||
import Data.List (intersperse,elemIndex)
|
||||
import Data.List.NonEmpty (nonEmpty)
|
||||
import Data.Maybe (fromMaybe,mapMaybe)
|
||||
import Data.Maybe (catMaybes,fromMaybe,mapMaybe)
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Lazy as TL
|
||||
|
@ -316,7 +316,7 @@ List of all DocBook tags, with [x] indicating implemented,
|
|||
[ ] postcode - A postal code in an address
|
||||
[x] preface - Introductory matter preceding the first chapter of a book
|
||||
[ ] prefaceinfo - Meta-information for a Preface
|
||||
[ ] primary - The primary word or phrase under which an index term should be
|
||||
[x] primary - The primary word or phrase under which an index term should be
|
||||
sorted
|
||||
[ ] primaryie - A primary term in an index entry, not in the text
|
||||
[ ] printhistory - The printing history of a document
|
||||
|
@ -385,7 +385,7 @@ List of all DocBook tags, with [x] indicating implemented,
|
|||
[o] screeninfo - Information about how a screen shot was produced
|
||||
[ ] screenshot - A representation of what the user sees or might see on a
|
||||
computer screen
|
||||
[ ] secondary - A secondary word or phrase in an index term
|
||||
[x] secondary - A secondary word or phrase in an index term
|
||||
[ ] secondaryie - A secondary term in an index entry, rather than in the text
|
||||
[x] sect1 - A top-level section of document
|
||||
[x] sect1info - Meta-information for a Sect1
|
||||
|
@ -461,7 +461,7 @@ List of all DocBook tags, with [x] indicating implemented,
|
|||
[x] td - A table entry in an HTML table
|
||||
[x] term - The word or phrase being defined or described in a variable list
|
||||
[ ] termdef - An inline term definition
|
||||
[ ] tertiary - A tertiary word or phrase in an index term
|
||||
[x] tertiary - A tertiary word or phrase in an index term
|
||||
[ ] tertiaryie - A tertiary term in an index entry, rather than in the text
|
||||
[ ] textdata - Pointer to external text data
|
||||
[ ] textobject - A wrapper for a text description of an object and its
|
||||
|
@ -1080,6 +1080,17 @@ elementToStr :: Content -> Content
|
|||
elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
|
||||
elementToStr x = x
|
||||
|
||||
childElTextAsAttr :: Text -> Element -> Maybe (Text, Text)
|
||||
childElTextAsAttr n e = case findChild q e of
|
||||
Nothing -> Nothing
|
||||
Just childEl -> Just (n, strContentRecursive childEl)
|
||||
where q = QName n (Just "http://docbook.org/ns/docbook") Nothing
|
||||
|
||||
attrValueAsOptionalAttr :: Text -> Element -> Maybe (Text, Text)
|
||||
attrValueAsOptionalAttr n e = case attrValue n e of
|
||||
"" -> Nothing
|
||||
_ -> Just (n, attrValue n e)
|
||||
|
||||
parseInline :: PandocMonad m => Content -> DB m Inlines
|
||||
parseInline (Text (CData _ s _)) = return $ text s
|
||||
parseInline (CRef ref) =
|
||||
|
@ -1094,6 +1105,28 @@ parseInline (Elem e) =
|
|||
if ident /= "" || classes /= []
|
||||
then innerInlines (spanWith (ident,classes,[]))
|
||||
else innerInlines id
|
||||
"indexterm" -> do
|
||||
let ident = attrValue "id" e
|
||||
let classes = T.words $ attrValue "role" e
|
||||
let attrs =
|
||||
-- In DocBook, <primary>, <secondary>, <tertiary>, <see>, and <seealso>
|
||||
-- have mixed content models. However, because we're representing these
|
||||
-- elements in Pandoc's AST as attributes of a phrase, we flatten all
|
||||
-- the descendant content of these elements.
|
||||
[ childElTextAsAttr "primary" e
|
||||
, childElTextAsAttr "secondary" e
|
||||
, childElTextAsAttr "tertiary" e
|
||||
, childElTextAsAttr "see" e
|
||||
, childElTextAsAttr "seealso" e
|
||||
, attrValueAsOptionalAttr "significance" e
|
||||
, attrValueAsOptionalAttr "startref" e
|
||||
, attrValueAsOptionalAttr "scope" e
|
||||
, attrValueAsOptionalAttr "class" e
|
||||
-- We don't do anything with the "pagenum" attribute, because these only
|
||||
-- occur within literal <index> sections, which is not supported by Pandoc,
|
||||
-- because Pandoc has no concept of pages.
|
||||
]
|
||||
return $ spanWith (ident, ("indexterm" : classes), (catMaybes attrs)) mempty
|
||||
"equation" -> equation e displayMath
|
||||
"informalequation" -> equation e displayMath
|
||||
"inlineequation" -> equation e math
|
||||
|
|
|
@ -1603,4 +1603,16 @@ or here: <http://example.com/>
|
|||
</step>
|
||||
</procedure>
|
||||
</sect1>
|
||||
<sect1 id="indexterms">
|
||||
<title>Index terms</title>
|
||||
<para>
|
||||
In the simplest case, index terms<indexterm><primary>index term</primary></indexterm> consists of just a <code><primary></code> element, but <indexterm><primary>index term</primary><secondary>multi-level</secondary></indexterm> they can also consist of a <code><primary></code> <emph>and</emph> <code><secondary></code> element, and <indexterm><primary>index term</primary><secondary>multi-level</secondary><tertiary>3-level</tertiary></indexterm> can even include a <code><tertiary></code> term.
|
||||
</para>
|
||||
<para>
|
||||
Index terms can also refer to other index terms: <indexterm><primary>index cross referencing</primary></indexterm><indexterm><primary>index term</primary><secondary>cross references</secondary><see>index cross referencing</see></indexterm>exclusively, using the <code><see></code> tag; or <indexterm><primary>index cross referencing</primary><seealso>cross referencing</seealso></indexterm> as a reference to related terms, using the <code><seealso></code> tag.
|
||||
</para>
|
||||
<para>
|
||||
<indexterm><primary>food</primary><secondary>big <foreignphrase>baguette</foreignphrase> <strong>supreme</strong></secondary></indexterm>Nested content in index term elements is flattened.
|
||||
</para>
|
||||
</sect1>
|
||||
</article>
|
||||
|
|
|
@ -2930,4 +2930,191 @@ Pandoc
|
|||
[ Str "A" , Space , Str "Final" , Space , Str "Step" ]
|
||||
]
|
||||
]
|
||||
, Header
|
||||
1
|
||||
( "indexterms" , [] , [] )
|
||||
[ Str "Index" , Space , Str "terms" ]
|
||||
, Para
|
||||
[ Str "In"
|
||||
, Space
|
||||
, Str "the"
|
||||
, Space
|
||||
, Str "simplest"
|
||||
, Space
|
||||
, Str "case,"
|
||||
, Space
|
||||
, Str "index"
|
||||
, Space
|
||||
, Str "terms"
|
||||
, Span
|
||||
( "" , [ "indexterm" ] , [ ( "primary" , "index term" ) ] )
|
||||
[]
|
||||
, Space
|
||||
, Str "consists"
|
||||
, Space
|
||||
, Str "of"
|
||||
, Space
|
||||
, Str "just"
|
||||
, Space
|
||||
, Str "a"
|
||||
, Space
|
||||
, Code ( "" , [] , [] ) "<primary>"
|
||||
, Space
|
||||
, Str "element,"
|
||||
, Space
|
||||
, Str "but"
|
||||
, Space
|
||||
, Span
|
||||
( ""
|
||||
, [ "indexterm" ]
|
||||
, [ ( "primary" , "index term" )
|
||||
, ( "secondary" , "multi-level" )
|
||||
]
|
||||
)
|
||||
[]
|
||||
, Space
|
||||
, Str "they"
|
||||
, Space
|
||||
, Str "can"
|
||||
, Space
|
||||
, Str "also"
|
||||
, Space
|
||||
, Str "consist"
|
||||
, Space
|
||||
, Str "of"
|
||||
, Space
|
||||
, Str "a"
|
||||
, Space
|
||||
, Code ( "" , [] , [] ) "<primary>"
|
||||
, Space
|
||||
, Str "and"
|
||||
, Space
|
||||
, Code ( "" , [] , [] ) "<secondary>"
|
||||
, Space
|
||||
, Str "element,"
|
||||
, Space
|
||||
, Str "and"
|
||||
, Space
|
||||
, Span
|
||||
( ""
|
||||
, [ "indexterm" ]
|
||||
, [ ( "primary" , "index term" )
|
||||
, ( "secondary" , "multi-level" )
|
||||
, ( "tertiary" , "3-level" )
|
||||
]
|
||||
)
|
||||
[]
|
||||
, Space
|
||||
, Str "can"
|
||||
, Space
|
||||
, Str "even"
|
||||
, Space
|
||||
, Str "include"
|
||||
, Space
|
||||
, Str "a"
|
||||
, Space
|
||||
, Code ( "" , [] , [] ) "<tertiary>"
|
||||
, Space
|
||||
, Str "term."
|
||||
]
|
||||
, Para
|
||||
[ Str "Index"
|
||||
, Space
|
||||
, Str "terms"
|
||||
, Space
|
||||
, Str "can"
|
||||
, Space
|
||||
, Str "also"
|
||||
, Space
|
||||
, Str "refer"
|
||||
, Space
|
||||
, Str "to"
|
||||
, Space
|
||||
, Str "other"
|
||||
, Space
|
||||
, Str "index"
|
||||
, Space
|
||||
, Str "terms:"
|
||||
, Space
|
||||
, Span
|
||||
( ""
|
||||
, [ "indexterm" ]
|
||||
, [ ( "primary" , "index cross referencing" ) ]
|
||||
)
|
||||
[]
|
||||
, Span
|
||||
( ""
|
||||
, [ "indexterm" ]
|
||||
, [ ( "primary" , "index term" )
|
||||
, ( "secondary" , "cross references" )
|
||||
, ( "see" , "index cross referencing" )
|
||||
]
|
||||
)
|
||||
[]
|
||||
, Str "exclusively,"
|
||||
, Space
|
||||
, Str "using"
|
||||
, Space
|
||||
, Str "the"
|
||||
, Space
|
||||
, Code ( "" , [] , [] ) "<see>"
|
||||
, Space
|
||||
, Str "tag;"
|
||||
, Space
|
||||
, Str "or"
|
||||
, Space
|
||||
, Span
|
||||
( ""
|
||||
, [ "indexterm" ]
|
||||
, [ ( "primary" , "index cross referencing" )
|
||||
, ( "seealso" , "cross referencing" )
|
||||
]
|
||||
)
|
||||
[]
|
||||
, Space
|
||||
, Str "as"
|
||||
, Space
|
||||
, Str "a"
|
||||
, Space
|
||||
, Str "reference"
|
||||
, Space
|
||||
, Str "to"
|
||||
, Space
|
||||
, Str "related"
|
||||
, Space
|
||||
, Str "terms,"
|
||||
, Space
|
||||
, Str "using"
|
||||
, Space
|
||||
, Str "the"
|
||||
, Space
|
||||
, Code ( "" , [] , [] ) "<seealso>"
|
||||
, Space
|
||||
, Str "tag."
|
||||
]
|
||||
, Para
|
||||
[ Span
|
||||
( ""
|
||||
, [ "indexterm" ]
|
||||
, [ ( "primary" , "food" )
|
||||
, ( "secondary" , "big baguette supreme" )
|
||||
]
|
||||
)
|
||||
[]
|
||||
, Str "Nested"
|
||||
, Space
|
||||
, Str "content"
|
||||
, Space
|
||||
, Str "in"
|
||||
, Space
|
||||
, Str "index"
|
||||
, Space
|
||||
, Str "term"
|
||||
, Space
|
||||
, Str "elements"
|
||||
, Space
|
||||
, Str "is"
|
||||
, Space
|
||||
, Str "flattened."
|
||||
]
|
||||
]
|
||||
|
|
Loading…
Add table
Reference in a new issue