From 7a888e8603f70351094b0725fba76c4111e72dac Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 8 Feb 2022 00:01:17 -0800 Subject: [PATCH] Fix parsing of epub footnotes. Closes #7884. --- src/Text/Pandoc/Readers/HTML.hs | 81 +++++++++++++++++++-------- src/Text/Pandoc/Readers/HTML/Types.hs | 1 + test/epub/wasteland.native | 2 +- 3 files changed, 61 insertions(+), 23 deletions(-) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index d3b91c370..47b6af193 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -89,7 +89,7 @@ readHtml opts inp = do result <- flip runReaderT def $ runParserT parseDoc (HTMLState def{ stateOptions = opts } - [] Nothing Set.empty [] M.empty opts) + [] Nothing Set.empty [] M.empty opts False) "source" tags case result of Right doc -> return doc @@ -106,8 +106,8 @@ stripPrefix x = x replaceNotes :: PandocMonad m => [Block] -> TagParser m [Block] replaceNotes bs = do - st <- getState - walkM (replaceNotes' (noteTable st)) bs + notes <- noteTable <$> getState + walkM (replaceNotes' notes) bs replaceNotes' :: PandocMonad m => [(Text, Blocks)] -> Inline -> TagParser m Inline @@ -178,6 +178,9 @@ block = ((do , epubExts , "chapter" `T.isInfixOf` type' -> eSection + _ | epubExts + , type' `elem` ["footnotes", "rearnotes"] + -> eFootnotes _ | epubExts , type' `elem` ["footnote", "rearnote"] -> mempty <$ eFootnote @@ -256,19 +259,39 @@ eCase = do Nothing -> Nothing <$ manyTill pAny (pSatisfy (matchTagClose "case")) eFootnote :: PandocMonad m => TagParser m () -eFootnote = try $ do - let notes = ["footnote", "rearnote"] +eFootnote = do + guardEnabled Ext_epub_html_exts + TagOpen tag attr' <- lookAhead $ pSatisfy + (\case + TagOpen _ attr' + -> case lookup "type" attr' <|> lookup "epub:type" attr' of + Just "footnote" -> True + Just "rearnote" -> True + _ -> False + _ -> False) + let attr = toStringAttr attr' + let ident = fromMaybe "" (lookup "id" attr) + content <- pInTags tag block + updateState $ \s -> + s {noteTable = (ident, content) : noteTable s} + +eFootnotes :: PandocMonad m => TagParser m Blocks +eFootnotes = try $ do + let notes = ["footnotes", "rearnotes"] guardEnabled Ext_epub_html_exts (TagOpen tag attr') <- lookAhead pAny let attr = toStringAttr attr' guard $ maybe False (`elem` notes) (lookup "type" attr <|> lookup "epub:type" attr) - let ident = fromMaybe "" (lookup "id" attr) - content <- pInTags tag block - addNote ident content - -addNote :: PandocMonad m => Text -> Blocks -> TagParser m () -addNote uid cont = updateState (\s -> s {noteTable = (uid, cont) : noteTable s}) + updateState $ \s -> s{ inFootnotes = True } + result <- pInTags tag block + updateState $ \s -> s{ inFootnotes = False } + if null result + -- if it just contains notes, we don't need the container: + then return result + -- but there might be content other than notes, in which case + -- we want a div: + else return $ B.divWith (toAttr attr') result eNoteref :: PandocMonad m => TagParser m Inlines eNoteref = try $ do @@ -337,6 +360,7 @@ parseTypeAttr _ = DefaultStyle pOrderedList :: PandocMonad m => TagParser m Blocks pOrderedList = try $ do TagOpen _ attribs' <- pSatisfy (matchTagOpen "ol" []) + isNoteList <- inFootnotes <$> getState let attribs = toStringAttr attribs' let start = fromMaybe 1 $ lookup "start" attribs >>= safeRead let style = fromMaybe DefaultStyle @@ -352,8 +376,14 @@ pOrderedList = try $ do -- note: if they have an
    or