HTML reader now recognizes DocBook block and inline tags.

It was always possible to include raw DocBook tags in a markdown
document, but now pandoc will be able to distinguish block from
inline tags and behave accordingly. Thus, for example,

    <sidebar>
    hello
    </sidebar>

will not be wrapped in `<para>` tags.
This commit is contained in:
John MacFarlane 2011-10-25 12:44:20 -07:00
parent c8bc0391a8
commit 1b81981c5f
3 changed files with 29 additions and 10 deletions

7
README
View file

@ -1514,7 +1514,7 @@ MediaWiki
Textile Textile
~ It will be rendered inside `<span class="math">` tags. ~ It will be rendered inside `<span class="math">` tags.
RTF, Docbook, OpenDocument, ODT RTF, DocBook, OpenDocument, ODT
~ It will be rendered, if possible, using unicode characters, ~ It will be rendered, if possible, using unicode characters,
and will otherwise appear verbatim. and will otherwise appear verbatim.
@ -1523,7 +1523,7 @@ HTML, Slidy, DZSlides, S5, EPUB
command-line options selected: command-line options selected:
1. The default is to render TeX math as far as possible using unicode 1. The default is to render TeX math as far as possible using unicode
characters, as with RTF, Docbook, and OpenDocument output. Formulas characters, as with RTF, DocBook, and OpenDocument output. Formulas
are put inside a `span` with `class="math"`, so that they may be are put inside a `span` with `class="math"`, so that they may be
styled differently from the surrounding text if needed. styled differently from the surrounding text if needed.
@ -1565,7 +1565,7 @@ HTML, Slidy, DZSlides, S5, EPUB
Raw HTML Raw HTML
-------- --------
Markdown allows you to insert raw HTML anywhere in a document Markdown allows you to insert raw HTML (or DocBook) anywhere in a document
(except verbatim contexts, where `<`, `>`, and `&` are interpreted (except verbatim contexts, where `<`, `>`, and `&` are interpreted
literally). literally).
@ -1610,7 +1610,6 @@ markdown with HTML block elements. For example, one can surround
a block of markdown text with `<div>` tags without preventing it a block of markdown text with `<div>` tags without preventing it
from being interpreted as markdown. from being interpreted as markdown.
Raw TeX Raw TeX
------- -------

View file

@ -503,16 +503,35 @@ blockHtmlTags = ["address", "blockquote", "body", "center", "dir", "div",
"dt", "frameset", "li", "tbody", "td", "tfoot", "dt", "frameset", "li", "tbody", "td", "tfoot",
"th", "thead", "tr", "script", "style"] "th", "thead", "tr", "script", "style"]
-- We want to allow raw docbook in markdown documents, so we
-- include docbook block tags here too.
blockDocBookTags :: [String]
blockDocBookTags = ["calloutlist", "bibliolist", "glosslist", "itemizedlist",
"orderedlist", "segmentedlist", "simplelist",
"variablelist", "caution", "important", "note", "tip",
"warning", "address", "literallayout", "programlisting",
"programlistingco", "screen", "screenco", "screenshot",
"synopsis", "example", "informalexample", "figure",
"informalfigure", "table", "informaltable", "para",
"simpara", "formalpara", "equation", "informalequation",
"figure", "screenshot", "mediaobject", "qandaset",
"procedure", "task", "cmdsynopsis", "funcsynopsis",
"classsynopsis", "blockquote", "epigraph", "msgset",
"sidebar"]
blockTags :: [String]
blockTags = blockHtmlTags ++ blockDocBookTags
isInlineTag :: Tag String -> Bool isInlineTag :: Tag String -> Bool
isInlineTag t = tagOpen (`notElem` blockHtmlTags) (const True) t || isInlineTag t = tagOpen (`notElem` blockTags) (const True) t ||
tagClose (`notElem` blockHtmlTags) t || tagClose (`notElem` blockTags) t ||
tagComment (const True) t tagComment (const True) t
isBlockTag :: Tag String -> Bool isBlockTag :: Tag String -> Bool
isBlockTag t = tagOpen (`elem` blocktags) (const True) t || isBlockTag t = tagOpen (`elem` blocktags) (const True) t ||
tagClose (`elem` blocktags) t || tagClose (`elem` blocktags) t ||
tagComment (const True) t tagComment (const True) t
where blocktags = blockHtmlTags ++ eitherBlockOrInline where blocktags = blockTags ++ eitherBlockOrInline
isTextTag :: Tag String -> Bool isTextTag :: Tag String -> Bool
isTextTag = tagText (const True) isTextTag = tagText (const True)
@ -547,8 +566,8 @@ t `closes` t2 |
t `elem` ["h1","h2","h3","h4","h5","h6","dl","ol","ul","table","div","p"] && t `elem` ["h1","h2","h3","h4","h5","h6","dl","ol","ul","table","div","p"] &&
t2 `elem` ["h1","h2","h3","h4","h5","h6","p" ] = True -- not "div" t2 `elem` ["h1","h2","h3","h4","h5","h6","p" ] = True -- not "div"
t1 `closes` t2 | t1 `closes` t2 |
t1 `elem` blockHtmlTags && t1 `elem` blockTags &&
t2 `notElem` (blockHtmlTags ++ eitherBlockOrInline) = True t2 `notElem` (blockTags ++ eitherBlockOrInline) = True
_ `closes` _ = False _ `closes` _ = False
--- parsers for use in markdown, textile readers --- parsers for use in markdown, textile readers

View file

@ -255,7 +255,8 @@ inlineToDocbook _ EnDash = text ""
inlineToDocbook _ (Code _ str) = inlineToDocbook _ (Code _ str) =
inTagsSimple "literal" $ text (escapeStringForXML str) inTagsSimple "literal" $ text (escapeStringForXML str)
inlineToDocbook opts (Math _ str) = inlinesToDocbook opts $ readTeXMath str inlineToDocbook opts (Math _ str) = inlinesToDocbook opts $ readTeXMath str
inlineToDocbook _ (RawInline _ _) = empty inlineToDocbook _ (RawInline f x) | f == "html" || f == "docbook" = text x
| otherwise = empty
inlineToDocbook _ LineBreak = inTagsSimple "literallayout" empty inlineToDocbook _ LineBreak = inTagsSimple "literallayout" empty
inlineToDocbook _ Space = space inlineToDocbook _ Space = space
inlineToDocbook opts (Link txt (src, _)) = inlineToDocbook opts (Link txt (src, _)) =