Ensure that valid XML identifiers are used in...

Docbook, EPUB, FB2, HTML4, S5, Slidy, Slideous,
ICML, ODT, TEI writers.

Thus, if you convert `[anchor]{#1} and [link to](#1)`,
`id_1` will be used instead of `1` for the identifier.
This commit is contained in:
John MacFarlane 2022-02-23 16:54:37 -08:00
parent 9dc5e31416
commit 2b05ce6a81
7 changed files with 32 additions and 16 deletions

View file

@ -94,7 +94,8 @@ writeDocbook5 opts d =
-- | Convert Pandoc document to string in Docbook format.
writeDocbook :: PandocMonad m => WriterOptions -> Pandoc -> DB m Text
writeDocbook opts (Pandoc meta blocks) = do
writeDocbook opts doc = do
let Pandoc meta blocks = ensureValidXmlIdentifiers doc
let colwidth = if writerWrapText opts == WrapAuto
then Just $ writerColumns opts
else Nothing

View file

@ -35,7 +35,8 @@ import qualified Data.Text.Lazy as TL
import System.FilePath (takeExtension, takeFileName, makeRelative)
import Text.HTML.TagSoup (Tag (TagOpen), fromAttrib, parseTags)
import Text.Pandoc.Builder (fromList, setMeta)
import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
import Text.Pandoc.Writers.Shared (ensureValidXmlIdentifiers)
import Text.Pandoc.Class (PandocMonad, report)
import qualified Text.Pandoc.Class.PandocPure as P
import qualified Text.Pandoc.Class.PandocMonad as P
import Data.Time
@ -435,8 +436,9 @@ pandocToEPUB :: PandocMonad m
-> Pandoc
-> E m B.ByteString
pandocToEPUB version opts doc = do
let doc' = ensureValidXmlIdentifiers doc
-- handle pictures
Pandoc meta blocks <- walkM (transformInline opts) doc >>=
Pandoc meta blocks <- walkM (transformInline opts) doc' >>=
walkM transformBlock
picEntries <- mapMaybe (snd . snd) <$> gets stMediaPaths

View file

@ -40,7 +40,8 @@ import Text.Pandoc.Logging
import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def)
import Text.Pandoc.Shared (capitalize, isURI, orderedListMarkers,
makeSections, tshow, stringify)
import Text.Pandoc.Writers.Shared (lookupMetaString, toLegacyTable)
import Text.Pandoc.Writers.Shared (lookupMetaString, toLegacyTable,
ensureValidXmlIdentifiers)
import Data.Generics (everywhere, mkT)
-- | Data to be written at the end of the document:
@ -76,7 +77,8 @@ pandocToFB2 :: PandocMonad m
=> WriterOptions
-> Pandoc
-> FBM m Text
pandocToFB2 opts (Pandoc meta blocks) = do
pandocToFB2 opts doc = do
let Pandoc meta blocks = ensureValidXmlIdentifiers doc
modify (\s -> s { writerOptions = opts })
desc <- description meta
title <- cMapM toXml . docTitle $ meta

View file

@ -149,12 +149,14 @@ writeHtml5 = writeHtml' defaultWriterState{ stHtml5 = True }
-- | Convert Pandoc document to Html 4 string.
writeHtml4String :: PandocMonad m => WriterOptions -> Pandoc -> m Text
writeHtml4String = writeHtmlString'
defaultWriterState{ stHtml5 = False }
writeHtml4String opts = writeHtmlString'
defaultWriterState{ stHtml5 = False } opts .
ensureValidXmlIdentifiers
-- | Convert Pandoc document to Html 4 structure.
writeHtml4 :: PandocMonad m => WriterOptions -> Pandoc -> m Html
writeHtml4 = writeHtml' defaultWriterState{ stHtml5 = False }
writeHtml4 opts = writeHtml' defaultWriterState{ stHtml5 = False } opts .
ensureValidXmlIdentifiers
-- | Convert Pandoc document to Html appropriate for an epub version.
writeHtmlStringForEPUB :: PandocMonad m
@ -164,6 +166,8 @@ writeHtmlStringForEPUB version o = writeHtmlString'
defaultWriterState{ stHtml5 = version == EPUB3,
stEPUBVersion = Just version }
o{ writerWrapText = WrapNone }
-- we don't use ensureValidXmlIdentifiers here because we
-- do that in the EPUB writer
-- | Convert Pandoc document to Reveal JS HTML slide show.
writeRevealJs :: PandocMonad m
@ -173,22 +177,25 @@ writeRevealJs = writeHtmlSlideShow' RevealJsSlides
-- | Convert Pandoc document to S5 HTML slide show.
writeS5 :: PandocMonad m
=> WriterOptions -> Pandoc -> m Text
writeS5 = writeHtmlSlideShow' S5Slides
writeS5 opts = writeHtmlSlideShow' S5Slides opts .
ensureValidXmlIdentifiers
-- | Convert Pandoc document to Slidy HTML slide show.
writeSlidy :: PandocMonad m
=> WriterOptions -> Pandoc -> m Text
writeSlidy = writeHtmlSlideShow' SlidySlides
writeSlidy opts = writeHtmlSlideShow' SlidySlides opts .
ensureValidXmlIdentifiers
-- | Convert Pandoc document to Slideous HTML slide show.
writeSlideous :: PandocMonad m
=> WriterOptions -> Pandoc -> m Text
writeSlideous = writeHtmlSlideShow' SlideousSlides
writeSlideous opts = writeHtmlSlideShow' SlideousSlides opts .
ensureValidXmlIdentifiers
-- | Convert Pandoc document to DZSlides HTML slide show.
writeDZSlides :: PandocMonad m
=> WriterOptions -> Pandoc -> m Text
writeDZSlides = writeHtmlSlideShow' DZSlides
writeDZSlides opts = writeHtmlSlideShow' DZSlides opts
writeHtmlSlideShow' :: PandocMonad m
=> HTMLSlideVariant -> WriterOptions -> Pandoc -> m Text

View file

@ -131,7 +131,8 @@ citeName = "Cite"
-- | Convert Pandoc document to string in ICML format.
writeICML :: PandocMonad m => WriterOptions -> Pandoc -> m Text
writeICML opts (Pandoc meta blocks) = do
writeICML opts doc = do
let Pandoc meta blocks = ensureValidXmlIdentifiers doc
let colwidth = if writerWrapText opts == WrapAuto
then Just $ writerColumns opts
else Nothing

View file

@ -36,7 +36,8 @@ import Text.Pandoc.Options (WrapOption (..), WriterOptions (..))
import Text.DocLayout
import Text.Pandoc.Shared (stringify, pandocVersion, tshow)
import Text.Pandoc.Writers.Shared (lookupMetaString, lookupMetaBlocks,
fixDisplayMath, getLang)
fixDisplayMath, getLang,
ensureValidXmlIdentifiers)
import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toTextLazy)
import Text.Pandoc.Walk
import Text.Pandoc.Writers.OpenDocument (writeOpenDocument)
@ -58,8 +59,9 @@ writeODT :: PandocMonad m
writeODT opts doc =
let initState = ODTState{ stEntries = []
}
doc' = ensureValidXmlIdentifiers doc
in
evalStateT (pandocToODT opts doc) initState
evalStateT (pandocToODT opts doc') initState
-- | Produce an ODT file from a Pandoc document.
pandocToODT :: PandocMonad m

View file

@ -28,7 +28,8 @@ import Text.Pandoc.XML
-- | Convert Pandoc document to string in Docbook format.
writeTEI :: PandocMonad m => WriterOptions -> Pandoc -> m Text
writeTEI opts (Pandoc meta blocks) = do
writeTEI opts doc = do
let Pandoc meta blocks = ensureValidXmlIdentifiers doc
let colwidth = if writerWrapText opts == WrapAuto
then Just $ writerColumns opts
else Nothing