Text.Pandoc.XML.Light: add versions of the parsers...

that allow specifying a custom entity map.

Exports new functions `parseXMLElementWithEntities`,
`parseXMLContentsWithEntities` [API change].
This commit is contained in:
John MacFarlane 2022-02-24 14:47:35 -08:00
parent 0ae7b1e1f8
commit 7dea81f992
2 changed files with 31 additions and 5 deletions

View file

@ -521,6 +521,7 @@ library
unicode-transforms >= 0.3 && < 0.5, unicode-transforms >= 0.3 && < 0.5,
xml >= 1.3.12 && < 1.4, xml >= 1.3.12 && < 1.4,
xml-conduit >= 1.9.1.1 && < 1.10, xml-conduit >= 1.9.1.1 && < 1.10,
xml-types >= 0.3 && < 0.4,
yaml >= 0.11 && < 0.12, yaml >= 0.11 && < 0.12,
zip-archive >= 0.2.3.4 && < 0.5, zip-archive >= 0.2.3.4 && < 0.5,
zlib >= 0.5 && < 0.7 zlib >= 0.5 && < 0.7

View file

@ -36,6 +36,9 @@ module Text.Pandoc.XML.Light
-- * Replacement for xml-light's Text.XML.Input -- * Replacement for xml-light's Text.XML.Input
, parseXMLElement , parseXMLElement
, parseXMLContents , parseXMLContents
-- * Versions that allow passing in a custom entity table
, parseXMLElementWithEntities
, parseXMLContentsWithEntities
) where ) where
import qualified Control.Exception as E import qualified Control.Exception as E
@ -48,23 +51,45 @@ import Data.Maybe (mapMaybe)
import Text.Pandoc.XML.Light.Types import Text.Pandoc.XML.Light.Types
import Text.Pandoc.XML.Light.Proc import Text.Pandoc.XML.Light.Proc
import Text.Pandoc.XML.Light.Output import Text.Pandoc.XML.Light.Output
import qualified Data.XML.Types as XML
-- Drop in replacement for parseXMLDoc in xml-light. -- Drop in replacement for parseXMLDoc in xml-light.
parseXMLElement :: TL.Text -> Either T.Text Element parseXMLElement :: TL.Text -> Either T.Text Element
parseXMLElement t = parseXMLElement = parseXMLElementWithEntities mempty
-- Drop in replacement for parseXMLDoc in xml-light.
parseXMLElementWithEntities :: M.Map T.Text T.Text
-> TL.Text -> Either T.Text Element
parseXMLElementWithEntities entityMap t =
elementToElement . Conduit.documentRoot <$> elementToElement . Conduit.documentRoot <$>
either (Left . T.pack . E.displayException) Right either (Left . T.pack . E.displayException) Right
(Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t) (Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True
, Conduit.psDecodeEntities = decodeEnts } t)
where
decodeEnts ref = case M.lookup ref entityMap of
Nothing -> XML.ContentEntity ref
Just t' -> XML.ContentText t'
parseXMLContents :: TL.Text -> Either T.Text [Content] parseXMLContents :: TL.Text -> Either T.Text [Content]
parseXMLContents t = parseXMLContents = parseXMLContentsWithEntities mempty
case Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t of
parseXMLContentsWithEntities :: M.Map T.Text T.Text
-> TL.Text -> Either T.Text [Content]
parseXMLContentsWithEntities entityMap t =
case Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True
, Conduit.psDecodeEntities = decodeEnts
} t of
Left e -> Left e ->
case E.fromException e of case E.fromException e of
Just (ContentAfterRoot _) -> Just (ContentAfterRoot _) ->
elContent <$> parseXMLElement ("<wrapper>" <> t <> "</wrapper>") elContent <$> parseXMLElementWithEntities entityMap
("<wrapper>" <> t <> "</wrapper>")
_ -> Left . T.pack . E.displayException $ e _ -> Left . T.pack . E.displayException $ e
Right x -> Right [Elem . elementToElement . Conduit.documentRoot $ x] Right x -> Right [Elem . elementToElement . Conduit.documentRoot $ x]
where
decodeEnts ref = case M.lookup ref entityMap of
Nothing -> XML.ContentEntity ref
Just t' -> XML.ContentText t'
elementToElement :: Conduit.Element -> Element elementToElement :: Conduit.Element -> Element
elementToElement (Conduit.Element name attribMap nodes) = elementToElement (Conduit.Element name attribMap nodes) =