Support --reference-location for HTML output (#7461)

The HTML writer now supports `EndOfBlock`, `EndOfSection`, and
`EndOfDocument` for reference locations.  EPUB and HTML slide
show formats are also affected by this change.

This works similarly to the markdown writer, but with special care
taken to skipping section divs with what regards to the block level.

The change also takes care to not modify the output if `EndOfDocument`
is used.
This commit is contained in:
Francesco Mazzoli 2021-09-10 18:30:05 +02:00 committed by GitHub
parent 1481dae629
commit 99a4d1d0b0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 169 additions and 39 deletions

View file

@ -972,7 +972,7 @@ header when requesting a document from a URL:
: Specify whether footnotes (and references, if `reference-links` is
set) are placed at the end of the current (top-level) block, the
current section, or the document. The default is
`document`. Currently only affects the markdown writer.
`document`. Currently only affects the markdown and HTML writers.
`--markdown-headings=setext`|`atx`

View file

@ -995,7 +995,7 @@ Specify whether footnotes (and references, if \f[C]reference-links\f[R]
is set) are placed at the end of the current (top-level) block, the
current section, or the document.
The default is \f[C]document\f[R].
Currently only affects the markdown writer.
Currently only affects the markdown and HTML writers.
.TP
\f[B]\f[CB]--markdown-headings=setext\f[B]\f[R]|\f[B]\f[CB]atx\f[B]\f[R]
Specify whether to use ATX-style (\f[C]#\f[R]-prefixed) or Setext-style

View file

@ -74,9 +74,11 @@ import Text.TeXMath
import Text.XML.Light (elChildren, unode, unqual)
import qualified Text.XML.Light as XML
import Text.XML.Light.Output
import Data.String (fromString)
data WriterState = WriterState
{ stNotes :: [Html] -- ^ List of notes
, stEmittedNotes :: Int -- ^ How many notes we've already pushed out to the HTML
, stMath :: Bool -- ^ Math is used in document
, stQuotes :: Bool -- ^ <q> tag is used
, stHighlighting :: Bool -- ^ Syntax highlighting is used
@ -88,10 +90,11 @@ data WriterState = WriterState
, stCodeBlockNum :: Int -- ^ Number of code block
, stCsl :: Bool -- ^ Has CSL references
, stCslEntrySpacing :: Maybe Int -- ^ CSL entry spacing
, stBlockLevel :: Int -- ^ Current block depth, excluding section divs
}
defaultWriterState :: WriterState
defaultWriterState = WriterState {stNotes= [], stMath = False, stQuotes = False,
defaultWriterState = WriterState {stNotes= [], stEmittedNotes = 0, stMath = False, stQuotes = False,
stHighlighting = False,
stHtml5 = False,
stEPUBVersion = Nothing,
@ -100,7 +103,8 @@ defaultWriterState = WriterState {stNotes= [], stMath = False, stQuotes = False,
stInSection = False,
stCodeBlockNum = 0,
stCsl = False,
stCslEntrySpacing = Nothing}
stCslEntrySpacing = Nothing,
stBlockLevel = 0}
-- Helpers to render HTML with the appropriate function.
@ -266,8 +270,16 @@ pandocToHtml opts (Pandoc meta blocks) = do
then fmap renderHtml' <$> tableOfContents opts sects
else return Nothing
blocks' <- blockListToHtml opts sects
notes <- do
-- make the st private just to be safe, since we modify it right afterwards
st <- get
if null (stNotes st)
then return mempty
else do
notes <- footnoteSection opts EndOfDocument (stEmittedNotes st + 1) (reverse (stNotes st))
modify (\st' -> st'{ stNotes = mempty, stEmittedNotes = stEmittedNotes st' + length (stNotes st') })
return notes
st <- get
notes <- footnoteSection opts (reverse (stNotes st))
let thebody = blocks' >> notes
let math = case writerHTMLMathMethod opts of
MathJax url
@ -490,28 +502,43 @@ tableOfContents opts sects = do
-- | Convert list of Note blocks to a footnote <div>.
-- Assumes notes are sorted.
footnoteSection :: PandocMonad m
=> WriterOptions -> [Html] -> StateT WriterState m Html
footnoteSection opts notes = do
footnoteSection ::
PandocMonad m => WriterOptions -> ReferenceLocation -> Int -> [Html] -> StateT WriterState m Html
footnoteSection opts refLocation startCounter notes = do
html5 <- gets stHtml5
slideVariant <- gets stSlideVariant
let hrtag = if html5 then H5.hr else H.hr
let hrtag = if refLocation /= EndOfBlock then (if html5 then H5.hr else H.hr) else mempty
let additionalClassName = case refLocation of
EndOfBlock -> "footnotes-end-of-block"
EndOfDocument -> "footnotes-end-of-document"
EndOfSection -> "footnotes-end-of-section"
let className = "footnotes " <> additionalClassName
epubVersion <- gets stEPUBVersion
let container x
| html5
, epubVersion == Just EPUB3
= H5.section ! A.class_ "footnotes"
= H5.section ! A.class_ className
! customAttribute "epub:type" "footnotes" $ x
| html5 = H5.section ! A.class_ "footnotes"
| html5 = H5.section ! A.class_ className
! customAttribute "role" "doc-endnotes"
$ x
| slideVariant /= NoSlides = H.div ! A.class_ "footnotes slide" $ x
| otherwise = H.div ! A.class_ "footnotes" $ x
| otherwise = H.div ! A.class_ className $ x
return $
if null notes
then mempty
else nl opts >> container (nl opts >> hrtag >> nl opts >>
H.ol (mconcat notes >> nl opts) >> nl opts)
else do
nl opts
container $ do
nl opts
hrtag
nl opts
-- Keep the previous output exactly the same if we don't
-- have multiple notes sections
if startCounter == 1
then H.ol $ mconcat notes >> nl opts
else H.ol ! A.start (fromString (show startCounter)) $ mconcat notes >> nl opts
nl opts
-- | Parse a mailto link; return Just (name, domain) or Nothing.
parseMailto :: Text -> Maybe (Text, Text)
@ -702,11 +729,10 @@ adjustNumbers opts doc =
fixnum x = x
showSecNum = T.intercalate "." . map tshow
-- | Convert Pandoc block element to HTML.
blockToHtml :: PandocMonad m => WriterOptions -> Block -> StateT WriterState m Html
blockToHtml _ Null = return mempty
blockToHtml opts (Plain lst) = inlineListToHtml opts lst
blockToHtml opts (Para [Image attr@(_,classes,_) txt (src,tit)])
blockToHtmlInner :: PandocMonad m => WriterOptions -> Block -> StateT WriterState m Html
blockToHtmlInner _ Null = return mempty
blockToHtmlInner opts (Plain lst) = inlineListToHtml opts lst
blockToHtmlInner opts (Para [Image attr@(_,classes,_) txt (src,tit)])
| "stretch" `elem` classes = do
slideVariant <- gets stSlideVariant
case slideVariant of
@ -716,20 +742,20 @@ blockToHtml opts (Para [Image attr@(_,classes,_) txt (src,tit)])
inlineToHtml opts (Image attr txt (src, tit))
_ -> figure opts attr txt (src, tit)
-- title beginning with fig: indicates that the image is a figure
blockToHtml opts (Para [Image attr txt (s,T.stripPrefix "fig:" -> Just tit)]) =
blockToHtmlInner opts (Para [Image attr txt (s,T.stripPrefix "fig:" -> Just tit)]) =
figure opts attr txt (s,tit)
blockToHtml opts (Para lst) = do
blockToHtmlInner opts (Para lst) = do
contents <- inlineListToHtml opts lst
case contents of
Empty _ | not (isEnabled Ext_empty_paragraphs opts) -> return mempty
_ -> return $ H.p contents
blockToHtml opts (LineBlock lns) =
blockToHtmlInner opts (LineBlock lns) =
if writerWrapText opts == WrapNone
then blockToHtml opts $ linesToPara lns
else do
htmlLines <- inlineListToHtml opts $ intercalate [LineBreak] lns
return $ H.div ! A.class_ "line-block" $ htmlLines
blockToHtml opts (Div (ident, "section":dclasses, dkvs)
blockToHtmlInner opts (Div (ident, "section":dclasses, dkvs)
(Header level
hattr@(hident,hclasses,hkvs) ils : xs)) = do
slideVariant <- gets stSlideVariant
@ -810,7 +836,7 @@ blockToHtml opts (Div (ident, "section":dclasses, dkvs)
if null innerSecs
then mempty
else nl opts <> innerContents
blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do
blockToHtmlInner opts (Div attr@(ident, classes, kvs') bs) = do
html5 <- gets stHtml5
slideVariant <- gets stSlideVariant
let isCslBibBody = ident == "refs" || "csl-bib-body" `elem` classes
@ -864,7 +890,7 @@ blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do
_ -> return mempty
else addAttrs opts (ident, classes'', kvs) $
divtag contents'
blockToHtml opts (RawBlock f str) = do
blockToHtmlInner opts (RawBlock f str) = do
ishtml <- isRawHtml f
if ishtml
then return $ preEscapedText str
@ -875,10 +901,10 @@ blockToHtml opts (RawBlock f str) = do
else do
report $ BlockNotRendered (RawBlock f str)
return mempty
blockToHtml _ HorizontalRule = do
blockToHtmlInner _ HorizontalRule = do
html5 <- gets stHtml5
return $ if html5 then H5.hr else H.hr
blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do
blockToHtmlInner opts (CodeBlock (id',classes,keyvals) rawCode) = do
id'' <- if T.null id'
then do
modify $ \st -> st{ stCodeBlockNum = stCodeBlockNum st + 1 }
@ -910,7 +936,7 @@ blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do
-- we set writerIdentifierPrefix to "" since id'' already
-- includes it:
addAttrs opts{writerIdentifierPrefix = ""} (id'',[],keyvals) h
blockToHtml opts (BlockQuote blocks) = do
blockToHtmlInner opts (BlockQuote blocks) = do
-- in S5, treat list in blockquote specially
-- if default is incremental, make it nonincremental;
-- otherwise incremental
@ -932,7 +958,7 @@ blockToHtml opts (BlockQuote blocks) = do
else do
contents <- blockListToHtml opts blocks
return $ H.blockquote $ nl opts >> contents >> nl opts
blockToHtml opts (Header level (ident,classes,kvs) lst) = do
blockToHtmlInner opts (Header level (ident,classes,kvs) lst) = do
contents <- inlineListToHtml opts lst
let secnum = fromMaybe mempty $ lookup "number" kvs
let contents' = if writerNumberSections opts && not (T.null secnum)
@ -955,12 +981,12 @@ blockToHtml opts (Header level (ident,classes,kvs) lst) = do
5 -> H.h5 contents'
6 -> H.h6 contents'
_ -> H.p ! A.class_ "heading" $ contents'
blockToHtml opts (BulletList lst) = do
blockToHtmlInner opts (BulletList lst) = do
contents <- mapM (listItemToHtml opts) lst
let isTaskList = not (null lst) && all isTaskListItem lst
(if isTaskList then (! A.class_ "task-list") else id) <$>
unordList opts contents
blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do
blockToHtmlInner opts (OrderedList (startnum, numstyle, _) lst) = do
contents <- mapM (listItemToHtml opts) lst
html5 <- gets stHtml5
let numstyle' = case numstyle of
@ -983,7 +1009,7 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do
else [])
l <- ordList opts contents
return $ foldl' (!) l attribs
blockToHtml opts (DefinitionList lst) = do
blockToHtmlInner opts (DefinitionList lst) = do
contents <- mapM (\(term, defs) ->
do term' <- liftM H.dt $ inlineListToHtml opts term
defs' <- mapM (liftM (\x -> H.dd (x >> nl opts)) .
@ -991,9 +1017,39 @@ blockToHtml opts (DefinitionList lst) = do
return $ mconcat $ nl opts : term' : nl opts :
intersperse (nl opts) defs') lst
defList opts contents
blockToHtml opts (Table attr caption colspecs thead tbody tfoot) =
blockToHtmlInner opts (Table attr caption colspecs thead tbody tfoot) =
tableToHtml opts (Ann.toTable attr caption colspecs thead tbody tfoot)
-- | Convert Pandoc block element to HTML. All the legwork is done by
-- 'blockToHtmlInner', this just takes care of emitting the notes after
-- the block if necessary.
blockToHtml :: PandocMonad m => WriterOptions -> Block -> StateT WriterState m Html
blockToHtml opts block = do
-- Ignore inserted section divs -- they are not blocks as they came from
-- the document itself (at least not when coming from markdown)
let isSection = case block of
Div (_, classes, _) _ | "section" `elem` classes -> True
_ -> False
let increaseLevel = not isSection
when increaseLevel $
modify (\st -> st{ stBlockLevel = stBlockLevel st + 1 })
doc <- blockToHtmlInner opts block
st <- get
let emitNotes =
(writerReferenceLocation opts == EndOfBlock && stBlockLevel st == 1) ||
(writerReferenceLocation opts == EndOfSection && isSection)
res <- if emitNotes
then do
notes <- if null (stNotes st)
then return mempty
else footnoteSection opts (writerReferenceLocation opts) (stEmittedNotes st + 1) (reverse (stNotes st))
modify (\st' -> st'{ stNotes = mempty, stEmittedNotes = stEmittedNotes st' + length (stNotes st') })
return (doc <> notes)
else return doc
when increaseLevel $
modify (\st' -> st'{ stBlockLevel = stBlockLevel st' - 1 })
return res
tableToHtml :: PandocMonad m
=> WriterOptions
-> Ann.Table
@ -1468,7 +1524,8 @@ inlineToHtml opts inline = do
-- note: null title included, as in Markdown.pl
(Note contents) -> do
notes <- gets stNotes
let number = length notes + 1
emittedNotes <- gets stEmittedNotes
let number = emittedNotes + length notes + 1
let ref = tshow number
htmlContents <- blockListToNote opts ref contents
epubVersion <- gets stEPUBVersion

View file

@ -8,8 +8,11 @@ import Text.Pandoc
import Text.Pandoc.Arbitrary ()
import Text.Pandoc.Builder
htmlWithOpts :: (ToPandoc a) => WriterOptions -> a -> String
htmlWithOpts opts = unpack . purely (writeHtml4String opts{ writerWrapText = WrapNone }) . toPandoc
html :: (ToPandoc a) => a -> String
html = unpack . purely (writeHtml4String def{ writerWrapText = WrapNone }) . toPandoc
html = htmlWithOpts def
htmlQTags :: (ToPandoc a) => a -> String
htmlQTags = unpack
@ -33,6 +36,21 @@ infix 4 =:
=> String -> (a, String) -> TestTree
(=:) = test html
noteTestDoc :: Blocks
noteTestDoc =
header 1 "Page title" <>
header 2 "First section" <>
para ("This is a footnote." <>
note (para "Down here.") <>
" And this is a " <>
link "https://www.google.com" "" "link" <>
".") <>
blockQuote (para ("A note inside a block quote." <>
note (para "The second note.")) <>
para "A second paragraph.") <>
header 2 "Second section" <>
para "Some more text."
tests :: [TestTree]
tests =
[ testGroup "inline code"
@ -86,6 +104,61 @@ tests =
=?> ("<var><code class=\"sourceCode haskell\">" ++
"<span class=\"op\">&gt;&gt;=</span></code></var>")
]
, testGroup "footnotes"
[ test (htmlWithOpts def{writerReferenceLocation=EndOfDocument})
"at the end of a document" $
noteTestDoc =?>
concat
[ "<h1>Page title</h1>"
, "<h2>First section</h2>"
, "<p>This is a footnote.<a href=\"#fn1\" class=\"footnote-ref\" id=\"fnref1\"><sup>1</sup></a> And this is a <a href=\"https://www.google.com\">link</a>.</p>"
, "<blockquote><p>A note inside a block quote.<a href=\"#fn2\" class=\"footnote-ref\" id=\"fnref2\"><sup>2</sup></a></p><p>A second paragraph.</p></blockquote>"
, "<h2>Second section</h2>"
, "<p>Some more text.</p>"
, "<div class=\"footnotes footnotes-end-of-document\"><hr /><ol><li id=\"fn1\"><p>Down here.<a href=\"#fnref1\" class=\"footnote-back\">↩︎</a></p></li><li id=\"fn2\"><p>The second note.<a href=\"#fnref2\" class=\"footnote-back\">↩︎</a></p></li></ol></div>"
]
, test (htmlWithOpts def{writerReferenceLocation=EndOfBlock})
"at the end of a block" $
noteTestDoc =?>
concat
[ "<h1>Page title</h1>"
, "<h2>First section</h2>"
, "<p>This is a footnote.<a href=\"#fn1\" class=\"footnote-ref\" id=\"fnref1\"><sup>1</sup></a> And this is a <a href=\"https://www.google.com\">link</a>.</p>"
, "<div class=\"footnotes footnotes-end-of-block\"><ol><li id=\"fn1\"><p>Down here.<a href=\"#fnref1\" class=\"footnote-back\">↩︎</a></p></li></ol></div>"
, "<blockquote><p>A note inside a block quote.<a href=\"#fn2\" class=\"footnote-ref\" id=\"fnref2\"><sup>2</sup></a></p><p>A second paragraph.</p></blockquote>"
, "<div class=\"footnotes footnotes-end-of-block\"><ol start=\"2\"><li id=\"fn2\"><p>The second note.<a href=\"#fnref2\" class=\"footnote-back\">↩︎</a></p></li></ol></div>"
, "<h2>Second section</h2>"
, "<p>Some more text.</p>"
]
, test (htmlWithOpts def{writerReferenceLocation=EndOfSection})
"at the end of a section" $
noteTestDoc =?>
concat
[ "<h1>Page title</h1>"
, "<h2>First section</h2>"
, "<p>This is a footnote.<a href=\"#fn1\" class=\"footnote-ref\" id=\"fnref1\"><sup>1</sup></a> And this is a <a href=\"https://www.google.com\">link</a>.</p>"
, "<blockquote><p>A note inside a block quote.<a href=\"#fn2\" class=\"footnote-ref\" id=\"fnref2\"><sup>2</sup></a></p><p>A second paragraph.</p></blockquote>"
, "<div class=\"footnotes footnotes-end-of-section\"><hr /><ol><li id=\"fn1\"><p>Down here.<a href=\"#fnref1\" class=\"footnote-back\">↩︎</a></p></li><li id=\"fn2\"><p>The second note.<a href=\"#fnref2\" class=\"footnote-back\">↩︎</a></p></li></ol></div>"
, "<h2>Second section</h2>"
, "<p>Some more text.</p>"
]
, test (htmlWithOpts def{writerReferenceLocation=EndOfSection, writerSectionDivs=True})
"at the end of a section, with section divs" $
noteTestDoc =?>
-- Footnotes are rendered _after_ their section (in this case after the level2 section
-- that contains it).
concat
[ "<div class=\"section level1\">"
, "<h1>Page title</h1>"
, "<div class=\"section level2\">"
, "<h2>First section</h2>"
, "<p>This is a footnote.<a href=\"#fn1\" class=\"footnote-ref\" id=\"fnref1\"><sup>1</sup></a> And this is a <a href=\"https://www.google.com\">link</a>.</p><blockquote><p>A note inside a block quote.<a href=\"#fn2\" class=\"footnote-ref\" id=\"fnref2\"><sup>2</sup></a></p><p>A second paragraph.</p></blockquote>"
, "</div>"
, "<div class=\"footnotes footnotes-end-of-section\"><hr /><ol><li id=\"fn1\"><p>Down here.<a href=\"#fnref1\" class=\"footnote-back\">↩︎</a></p></li><li id=\"fn2\"><p>The second note.<a href=\"#fnref2\" class=\"footnote-back\">↩︎</a></p></li></ol></div>"
, "<div class=\"section level2\"><h2>Second section</h2><p>Some more text.</p></div>"
, "</div>"
]
]
]
where
tQ :: (ToString a, ToPandoc a)

View file

@ -3,7 +3,7 @@
This.^[Has a footnote.]
^D
<p>This.<a href="#foofn1" class="footnote-ref" id="foofnref1" role="doc-noteref"><sup>1</sup></a></p>
<section class="footnotes" role="doc-endnotes">
<section class="footnotes footnotes-end-of-document" role="doc-endnotes">
<hr />
<ol>
<li id="foofn1" role="doc-endnote"><p>Has a footnote.<a href="#foofnref1" class="footnote-back" role="doc-backlink">↩︎</a></p></li>

View file

@ -7,7 +7,7 @@ Test.[^fn]
![Caption.](/image.jpg)
^D
<p>Test.<a href="#fn1" class="footnote-ref" id="fnref1" role="doc-noteref"><sup>1</sup></a></p>
<section class="footnotes" role="doc-endnotes">
<section class="footnotes footnotes-end-of-document" role="doc-endnotes">
<hr />
<ol>
<li id="fn1" role="doc-endnote"><p>Foo:</p>

View file

@ -665,7 +665,7 @@ Blah
<li>And in list items.<a href="#fn5" class="footnote-ref" id="fnref5"><sup>5</sup></a></li>
</ol>
<p>This paragraph should not be part of the note, as it is not indented.</p>
<div class="footnotes">
<div class="footnotes footnotes-end-of-document">
<hr />
<ol>
<li id="fn1"><p>Here is the footnote. It can go anywhere after the footnote reference. It need not be placed at the end of the document.<a href="#fnref1" class="footnote-back">↩︎</a></p></li>

View file

@ -667,7 +667,7 @@ Blah
<li>And in list items.<a href="#fn5" class="footnote-ref" id="fnref5" role="doc-noteref"><sup>5</sup></a></li>
</ol>
<p>This paragraph should not be part of the note, as it is not indented.</p>
<section class="footnotes" role="doc-endnotes">
<section class="footnotes footnotes-end-of-document" role="doc-endnotes">
<hr />
<ol>
<li id="fn1" role="doc-endnote"><p>Here is the footnote. It can go anywhere after the footnote reference. It need not be placed at the end of the document.<a href="#fnref1" class="footnote-back" role="doc-backlink">↩︎</a></p></li>