From 7dea81f9928a754a5e620e01f36d484734442e45 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 24 Feb 2022 14:47:35 -0800 Subject: [PATCH] Text.Pandoc.XML.Light: add versions of the parsers... that allow specifying a custom entity map. Exports new functions `parseXMLElementWithEntities`, `parseXMLContentsWithEntities` [API change]. --- pandoc.cabal | 1 + src/Text/Pandoc/XML/Light.hs | 35 ++++++++++++++++++++++++++++++----- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/pandoc.cabal b/pandoc.cabal index 0a670c49b..a0190a36f 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -521,6 +521,7 @@ library unicode-transforms >= 0.3 && < 0.5, xml >= 1.3.12 && < 1.4, xml-conduit >= 1.9.1.1 && < 1.10, + xml-types >= 0.3 && < 0.4, yaml >= 0.11 && < 0.12, zip-archive >= 0.2.3.4 && < 0.5, zlib >= 0.5 && < 0.7 diff --git a/src/Text/Pandoc/XML/Light.hs b/src/Text/Pandoc/XML/Light.hs index 8ba71b68f..85095338d 100644 --- a/src/Text/Pandoc/XML/Light.hs +++ b/src/Text/Pandoc/XML/Light.hs @@ -36,6 +36,9 @@ module Text.Pandoc.XML.Light -- * Replacement for xml-light's Text.XML.Input , parseXMLElement , parseXMLContents + -- * Versions that allow passing in a custom entity table + , parseXMLElementWithEntities + , parseXMLContentsWithEntities ) where import qualified Control.Exception as E @@ -48,23 +51,45 @@ import Data.Maybe (mapMaybe) import Text.Pandoc.XML.Light.Types import Text.Pandoc.XML.Light.Proc import Text.Pandoc.XML.Light.Output +import qualified Data.XML.Types as XML -- Drop in replacement for parseXMLDoc in xml-light. parseXMLElement :: TL.Text -> Either T.Text Element -parseXMLElement t = +parseXMLElement = parseXMLElementWithEntities mempty + +-- Drop in replacement for parseXMLDoc in xml-light. +parseXMLElementWithEntities :: M.Map T.Text T.Text + -> TL.Text -> Either T.Text Element +parseXMLElementWithEntities entityMap t = elementToElement . Conduit.documentRoot <$> either (Left . T.pack . E.displayException) Right - (Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t) + (Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True + , Conduit.psDecodeEntities = decodeEnts } t) + where + decodeEnts ref = case M.lookup ref entityMap of + Nothing -> XML.ContentEntity ref + Just t' -> XML.ContentText t' parseXMLContents :: TL.Text -> Either T.Text [Content] -parseXMLContents t = - case Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t of +parseXMLContents = parseXMLContentsWithEntities mempty + +parseXMLContentsWithEntities :: M.Map T.Text T.Text + -> TL.Text -> Either T.Text [Content] +parseXMLContentsWithEntities entityMap t = + case Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True + , Conduit.psDecodeEntities = decodeEnts + } t of Left e -> case E.fromException e of Just (ContentAfterRoot _) -> - elContent <$> parseXMLElement ("" <> t <> "") + elContent <$> parseXMLElementWithEntities entityMap + ("" <> t <> "") _ -> Left . T.pack . E.displayException $ e Right x -> Right [Elem . elementToElement . Conduit.documentRoot $ x] + where + decodeEnts ref = case M.lookup ref entityMap of + Nothing -> XML.ContentEntity ref + Just t' -> XML.ContentText t' elementToElement :: Conduit.Element -> Element elementToElement (Conduit.Element name attribMap nodes) =