Add new unexported module T.P.XMLParser.

This exports functions that uses xml-conduit's parser to produce an xml-light Element or [Content]. This allows existing pandoc code to use a better parser without much modification. The new parser is used in all places where xml-light's parser was previously used. Benchmarks show a significant performance improvement in parsing XML-based formats (especially ODT and FB2). Note that the xml-light types use String, so the conversion from xml-conduit types involves a lot of extra allocation. It would be desirable to avoid that in the future by gradually switching to using xml-conduit directly. This can be done module by module. The new parser also reports errors, which we report when possible. A new constructor PandocXMLError has been added to PandocError in T.P.Error [API change]. Closes #7091, which was the main stimulus. These changes revealed the need for some changes in the tests. The docbook-reader.docbook test lacked definitions for the entities it used; these have been added. And the docx golden tests have been updated, because the new parser does not preserve the order of attributes. Add entity defs to docbook-reader.docbook. Update golden tests for docx.
2021-02-08 23:35:19 -08:00 · 2021-02-08 23:35:19 -08:00 · 8ca191604d
commit 8ca191604d
parent 9994ad977d
98 changed files with 238 additions and 91 deletions
--- a/MANUAL.txt
+++ b/MANUAL.txt
@ -1464,6 +1464,7 @@ Nonzero exit codes have the following meanings:
   24 PandocCiteprocError
   31 PandocEpubSubdirectoryError
   43 PandocPDFError
+   44 PandocXMLError
   47 PandocPDFProgramNotFoundError
   61 PandocHttpError
   62 PandocShouldNeverHappenError
--- a/pandoc.cabal
+++ b/pandoc.cabal
@ -493,6 +493,7 @@ library
                 unicode-transforms    >= 0.3      && < 0.4,
                 unordered-containers  >= 0.2      && < 0.3,
                 xml                   >= 1.3.12   && < 1.4,
+                 xml-conduit           >= 1.7      && < 1.10,
                 zip-archive           >= 0.2.3.4  && < 0.5,
                 zlib                  >= 0.5      && < 0.7
  if os(windows) && arch(i386)
@ -686,6 +687,7 @@ library
                   Text.Pandoc.Lua.PandocLua,
                   Text.Pandoc.Lua.Util,
                   Text.Pandoc.Lua.Walk,
+                   Text.Pandoc.XMLParser,
                   Text.Pandoc.CSS,
                   Text.Pandoc.CSV,
                   Text.Pandoc.RoffChar,
--- a/src/Text/Pandoc/Error.hs
+++ b/src/Text/Pandoc/Error.hs
@ -48,6 +48,7 @@ data PandocError = PandocIOError Text IOError
                 | PandocFailOnWarningError
                 | PandocPDFProgramNotFoundError Text
                 | PandocPDFError Text
+                 | PandocXMLError Text Text
                 | PandocFilterError Text Text
                 | PandocLuaError Text
                 | PandocCouldNotFindDataFileError Text
@ -103,6 +104,8 @@ handleError (Left e) =
    PandocPDFProgramNotFoundError pdfprog -> err 47 $
        pdfprog <> " not found. Please select a different --pdf-engine or install " <> pdfprog
    PandocPDFError logmsg -> err 43 $ "Error producing PDF.\n" <> logmsg
+    PandocXMLError fp logmsg -> err 44 $ "Invalid XML" <>
+        (if T.null fp then "" else " in " <> fp) <> ":\n" <> logmsg
    PandocFilterError filtername msg -> err 83 $ "Error running filter " <>
        filtername <> ":\n" <> msg
    PandocLuaError msg -> err 84 $ "Error running Lua:\n" <> msg
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@ -45,7 +45,9 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import qualified Text.Pandoc.UTF8 as UTF8
 import qualified Text.XML.Light as Xml
+import Text.Pandoc.XMLParser (parseXMLElement)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
 import qualified Data.Text.Encoding as TE
 import Control.Applicative
 import qualified Data.Attoparsec.ByteString.Char8 as A
@ -327,7 +329,8 @@ getSize img =

 svgSize :: WriterOptions -> ByteString -> Maybe ImageSize
 svgSize opts img = do
-  doc <- Xml.parseXMLDoc $ UTF8.toString img
+  doc <- either (const mzero) return $ parseXMLElement
+                                     $ TL.fromStrict $ UTF8.toText img
  let viewboxSize = do
        vb <- Xml.findAttrBy (== Xml.QName "viewBox" Nothing Nothing) doc
        [_,_,w,h] <- mapM safeRead (T.words (T.pack vb))
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@ -12,7 +12,7 @@ Conversion of DocBook XML to 'Pandoc' document.
 -}
 module Text.Pandoc.Readers.DocBook ( readDocBook ) where
 import Control.Monad.State.Strict
-import Data.Char (isSpace, toUpper)
+import Data.Char (isSpace, toUpper, isLetter)
 import Data.Default
 import Data.Either (rights)
 import Data.Foldable (asum)
@ -21,7 +21,10 @@ import Data.List (intersperse,elemIndex)
 import Data.Maybe (fromMaybe,mapMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
+import Control.Monad.Except (throwError)
 import Text.HTML.TagSoup.Entity (lookupEntity)
+import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
 import Text.Pandoc.Options
@ -29,6 +32,7 @@ import Text.Pandoc.Logging (LogMessage(..))
 import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
 import Text.TeXMath (readMathML, writeTeX)
 import Text.XML.Light
+import Text.Pandoc.XMLParser (parseXMLContents)

 {-

@ -537,22 +541,25 @@ instance Default DBState where

 readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
 readDocBook _ inp = do
-  let tree = normalizeTree . parseXML . handleInstructions $ crFilter inp
+  tree <- either (throwError . PandocXMLError "") (return . normalizeTree) $
+            parseXMLContents (TL.fromStrict . handleInstructions $ crFilter inp)
  (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree
  return $ Pandoc (dbMeta st') (toList . mconcat $ bs)

-- We treat <?asciidoc-br?> specially (issue #1236), converting it
-- to <br/>, since xml-light doesn't parse the instruction correctly.
-- Other xml instructions are simply removed from the input stream.
+-- We treat certain processing instructions by converting them to tags
+-- beginning "pi-".
 handleInstructions :: Text -> Text
-handleInstructions = T.pack . handleInstructions' . T.unpack
-
-handleInstructions' :: String -> String
-handleInstructions' ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions' xs
-handleInstructions' xs = case break (=='<') xs of
-                             (ys, [])     -> ys
-                             ([], '<':zs) -> '<' : handleInstructions' zs
-                             (ys, zs)     -> ys ++ handleInstructions' zs
+handleInstructions t =
+  let (x,y) = T.breakOn "<?" t
+   in if T.null y
+         then x
+         else
+           let (w,z) = T.breakOn "?>" y
+            in (if T.takeWhile (\c -> isLetter c || c == '-')
+                    (T.drop 2 w) `elem` ["asciidoc-br", "dbfo"]
+                   then x <> "<pi-" <> T.drop 2 w <> "/>"
+                   else x <> w <> T.take 2 z) <>
+               handleInstructions (T.drop 2 z)

 getFigure :: PandocMonad m => Element -> DB m Blocks
 getFigure e = do
@ -892,7 +899,11 @@ parseBlock (Elem e) =
        "subtitle" -> return mempty  -- handled in parent element
        _       -> skip >> getBlocks e
   where skip = do
-           lift $ report $ IgnoredElement $ T.pack $ qName (elName e)
+           let qn = T.pack $ qName $ elName e
+           let name = if "pi-" `T.isPrefixOf` qn
+                         then "<?" <> qn <> "?>"
+                         else qn
+           lift $ report $ IgnoredElement name
           return mempty

         codeBlockWithLang = do
@ -964,7 +975,7 @@ parseBlock (Elem e) =
                                     cs -> map toAlignment cs
                      let parseWidth s = safeRead (T.filter (\x -> (x >= '0' && x <= '9')
                                                                   || x == '.') s)
-                      let textWidth = case filterChild (named "?dbfo") e of
+                      let textWidth = case filterChild (named "pi-dbfo") e of
                                        Just d  -> case attrValue "table-width" d of
                                                     "" -> 1.0
                                                     w  -> fromMaybe 100.0 (parseWidth w) / 100.0
@ -1165,12 +1176,15 @@ parseInline (Elem e) =
        "title" -> return mempty
        "affiliation" -> skip
        -- Note: this isn't a real docbook tag; it's what we convert
-        -- <?asciidor-br?> to in handleInstructions, above.  A kludge to
-        -- work around xml-light's inability to parse an instruction.
-        "br" -> return linebreak
+        -- <?asciidor-br?> to in handleInstructions, above.
+        "pi-asciidoc-br" -> return linebreak
        _          -> skip >> innerInlines id
   where skip = do
-           lift $ report $ IgnoredElement $ T.pack $ qName (elName e)
+           let qn = T.pack $ qName $ elName e
+           let name = if "pi-" `T.isPrefixOf` qn
+                         then "<?" <> qn <> "?>"
+                         else qn
+           lift $ report $ IgnoredElement name
           return mempty

         innerInlines f = extractSpaces f . mconcat <$>
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@ -74,6 +74,7 @@ import Text.TeXMath.Readers.OMML (readOMML)
 import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont)
 import Text.XML.Light
 import qualified Text.XML.Light.Cursor as XMLC
+import Text.Pandoc.XMLParser (parseXMLElement)

 data ReaderEnv = ReaderEnv { envNotes         :: Notes
                           , envComments      :: Comments
@ -343,10 +344,16 @@ archiveToDocxWithWarnings archive = do
    Right doc -> Right (Docx doc, stateWarnings st)
    Left e    -> Left e

+parseXMLFromEntry :: Entry -> Maybe Element
+parseXMLFromEntry entry =
+  case parseXMLElement (UTF8.toTextLazy (fromEntry entry)) of
+    Left _   -> Nothing
+    Right el -> Just el
+
 getDocumentXmlPath :: Archive -> Maybe FilePath
 getDocumentXmlPath zf = do
  entry <- findEntryByPath "_rels/.rels" zf
-  relsElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+  relsElem <- parseXMLFromEntry entry
  let rels = filterChildrenName (\n -> qName n == "Relationship") relsElem
  rel <- find (\e -> findAttr (QName "Type" Nothing Nothing) e ==
                       Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument")
@ -362,7 +369,7 @@ archiveToDocument :: Archive -> D Document
 archiveToDocument zf = do
  docPath <- asks envDocXmlPath
  entry <- maybeToD $ findEntryByPath docPath zf
-  docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+  docElem <- maybeToD $ parseXMLFromEntry entry
  let namespaces = elemToNameSpaces docElem
  bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem
  let bodyElem' = fromMaybe bodyElem (walkDocument namespaces bodyElem)
@ -401,9 +408,9 @@ constructBogusParStyleData stName = ParStyle
 archiveToNotes :: Archive -> Notes
 archiveToNotes zf =
  let fnElem = findEntryByPath "word/footnotes.xml" zf
-               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+               >>= parseXMLFromEntry
      enElem = findEntryByPath "word/endnotes.xml" zf
-               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+               >>= parseXMLFromEntry
      fn_namespaces = maybe [] elemToNameSpaces fnElem
      en_namespaces = maybe [] elemToNameSpaces enElem
      ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
@ -415,7 +422,7 @@ archiveToNotes zf =
 archiveToComments :: Archive -> Comments
 archiveToComments zf =
  let cmtsElem = findEntryByPath "word/comments.xml" zf
-               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+               >>= parseXMLFromEntry
      cmts_namespaces = maybe [] elemToNameSpaces cmtsElem
      cmts = elemToComments cmts_namespaces <$> (cmtsElem >>= walkDocument cmts_namespaces)
  in
@ -445,7 +452,7 @@ filePathToRelationships :: Archive -> FilePath -> FilePath ->  [Relationship]
 filePathToRelationships ar docXmlPath fp
  | Just relType <- filePathToRelType fp docXmlPath
  , Just entry <- findEntryByPath fp ar
-  , Just relElems <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry =
+  , Just relElems <- parseXMLFromEntry entry =
  mapMaybe (relElemToRelationship relType) $ elChildren relElems
 filePathToRelationships _ _ _ = []

@ -527,7 +534,7 @@ archiveToNumbering' zf =
  case findEntryByPath "word/numbering.xml" zf of
    Nothing -> Just $ Numbering [] [] []
    Just entry -> do
-      numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+      numberingElem <- parseXMLFromEntry entry
      let namespaces = elemToNameSpaces numberingElem
          numElems = findChildrenByName namespaces "w" "num" numberingElem
          absNumElems = findChildrenByName namespaces "w" "abstractNum" numberingElem
--- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
@ -53,6 +53,7 @@ import Data.Coerce
 import Text.Pandoc.Readers.Docx.Util
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.XML.Light
+import Text.Pandoc.XMLParser (parseXMLElement)

 newtype CharStyleId   = CharStyleId T.Text
  deriving (Show, Eq, Ord, IsString, FromStyleId)
@ -135,19 +136,22 @@ defaultRunStyle = RunStyle { isBold = Nothing
                           , rParentStyle = Nothing
                           }

-archiveToStyles' :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2) =>
-                    (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2)
+archiveToStyles'
+  :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2)
+  => (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2)
 archiveToStyles' conv1 conv2 zf =
-  let stylesElem = findEntryByPath "word/styles.xml" zf >>=
-                   (parseXMLDoc . UTF8.toStringLazy . fromEntry)
-  in
-   case stylesElem of
-     Nothing -> (M.empty, M.empty)
-     Just styElem ->
-       let namespaces = elemToNameSpaces styElem
-       in
-        ( M.fromList $ map (\r -> (conv1 r, r)) $ buildBasedOnList namespaces styElem Nothing,
-          M.fromList $ map (\p -> (conv2 p, p)) $ buildBasedOnList namespaces styElem Nothing)
+  case findEntryByPath "word/styles.xml" zf of
+    Nothing -> (M.empty, M.empty)
+    Just entry ->
+      case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of
+        Left _ -> (M.empty, M.empty)
+        Right styElem ->
+          let namespaces = elemToNameSpaces styElem
+          in
+           ( M.fromList $ map (\r -> (conv1 r, r)) $
+               buildBasedOnList namespaces styElem Nothing,
+             M.fromList $ map (\p -> (conv2 p, p)) $
+               buildBasedOnList namespaces styElem Nothing)

 isBasedOnStyle :: (ElemToStyle a, FromStyleId (StyleId a)) => NameSpaces -> Element -> Maybe a -> Bool
 isBasedOnStyle ns element parentStyle
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@ -17,7 +17,7 @@ module Text.Pandoc.Readers.EPUB
  (readEPUB)
  where

-import Codec.Archive.Zip (Archive (..), Entry, findEntryByPath, fromEntry,
+import Codec.Archive.Zip (Archive (..), Entry(..), findEntryByPath, fromEntry,
                          toArchiveOrFail)
 import Control.DeepSeq (NFData, deepseq)
 import Control.Monad (guard, liftM, liftM2, mplus)
@ -41,9 +41,10 @@ import Text.Pandoc.MIME (MimeType)
 import Text.Pandoc.Options (ReaderOptions (..))
 import Text.Pandoc.Readers.HTML (readHtml)
 import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI)
-import qualified Text.Pandoc.UTF8 as UTF8 (toStringLazy)
+import qualified Text.Pandoc.UTF8 as UTF8 (toTextLazy)
 import Text.Pandoc.Walk (query, walk)
 import Text.XML.Light
+import Text.Pandoc.XMLParser (parseXMLElement)

 type Items = M.Map String (FilePath, MimeType)

@ -181,7 +182,7 @@ renameMeta s         = T.pack s
 getManifest :: PandocMonad m => Archive -> m (String, Element)
 getManifest archive = do
  metaEntry <- findEntryByPathE ("META-INF" </> "container.xml") archive
-  docElem <- (parseXMLDocE . UTF8.toStringLazy . fromEntry) metaEntry
+  docElem <- parseXMLDocE metaEntry
  let namespaces = mapMaybe attrToNSPair (elAttribs docElem)
  ns <- mkE "xmlns not in namespaces" (lookup "xmlns" namespaces)
  as <- fmap (map attrToPair . elAttribs)
@ -190,7 +191,7 @@ getManifest archive = do
  let rootdir = dropFileName manifestFile
  --mime <- lookup "media-type" as
  manifest <- findEntryByPathE manifestFile archive
-  (rootdir,) <$> (parseXMLDocE . UTF8.toStringLazy . fromEntry $ manifest)
+  (rootdir,) <$> parseXMLDocE manifest

 -- Fixup

@ -284,8 +285,12 @@ findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry
 findEntryByPathE (normalise . unEscapeString -> path) a =
  mkE ("No entry on path: " ++ path) $ findEntryByPath path a

-parseXMLDocE :: PandocMonad m => String -> m Element
-parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc
+parseXMLDocE :: PandocMonad m => Entry -> m Element
+parseXMLDocE entry =
+  either (throwError . PandocXMLError fp) return $ parseXMLElement doc
+ where
+  doc = UTF8.toTextLazy . fromEntry $ entry
+  fp  = T.pack $ eRelativePath entry

 findElementE :: PandocMonad m => QName -> Element -> m Element
 findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x
--- a/src/Text/Pandoc/Readers/FB2.hs
+++ b/src/Text/Pandoc/Readers/FB2.hs
@ -32,6 +32,7 @@ import Data.List (intersperse)
 import qualified Data.Map as M
 import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
 import Data.Default
 import Data.Maybe
 import Text.HTML.TagSoup.Entity (lookupEntity)
@ -42,6 +43,7 @@ import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Shared (crFilter)
 import Text.XML.Light
+import Text.Pandoc.XMLParser (parseXMLElement)

 type FB2 m = StateT FB2State m

@ -64,10 +66,10 @@ instance HasMeta FB2State where

 readFB2 :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
 readFB2 _ inp =
-  case parseXMLDoc $ crFilter inp of
-    Nothing -> throwError $ PandocParseError "Not an XML document"
-    Just e ->  do
-      (bs, st) <- runStateT (parseRootElement e) def
+  case parseXMLElement $ TL.fromStrict $ crFilter inp of
+    Left msg -> throwError $ PandocXMLError "" msg
+    Right el ->  do
+      (bs, st) <- runStateT (parseRootElement el) def
      let authors = if null $ fb2Authors st
                    then id
                    else setMeta "author" (map text $ reverse $ fb2Authors st)
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@ -14,6 +14,8 @@ Conversion of JATS XML to 'Pandoc' document.

 module Text.Pandoc.Readers.JATS ( readJATS ) where
 import Control.Monad.State.Strict
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Error (PandocError(..))
 import Data.Char (isDigit, isSpace, toUpper)
 import Data.Default
 import Data.Generics
@ -22,6 +24,7 @@ import qualified Data.Map as Map
 import Data.Maybe (maybeToList, fromMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
 import Text.HTML.TagSoup.Entity (lookupEntity)
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
@ -29,6 +32,7 @@ import Text.Pandoc.Options
 import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
 import Text.TeXMath (readMathML, writeTeX)
 import Text.XML.Light
+import Text.Pandoc.XMLParser (parseXMLContents)
 import qualified Data.Set as S (fromList, member)
 import Data.Set ((\\))

@ -51,8 +55,9 @@ instance Default JATSState where

 readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
 readJATS _ inp = do
-  let tree = normalizeTree . parseXML
-               $ T.unpack $ crFilter inp
+  tree <- either (throwError . PandocXMLError "")
+                 (return . normalizeTree) $
+            parseXMLContents (TL.fromStrict $ crFilter inp)
  (bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree
  return $ Pandoc (jatsMeta st') (toList . mconcat $ bs)

--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@ -19,14 +19,18 @@ import Data.Generics
 import Data.Maybe (fromMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
 import Text.HTML.TagSoup.Entity (lookupEntity)
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Options
+import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Readers.HTML (readHtml)
 import Text.Pandoc.Readers.Markdown (readMarkdown)
 import Text.Pandoc.Shared (crFilter, blocksToInlines')
 import Text.XML.Light
+import Text.Pandoc.XMLParser (parseXMLContents)
+import Control.Monad.Except (throwError)

 type OPML m = StateT OPMLState m

@ -49,8 +53,10 @@ instance Default OPMLState where
 readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
 readOPML opts inp  = do
  (bs, st') <- runStateT
-                 (mapM parseBlock $ normalizeTree $
-                    parseXML (T.unpack (crFilter inp))) def{ opmlOptions = opts }
+                 (case parseXMLContents (TL.fromStrict (crFilter inp)) of
+                     Left msg -> throwError $ PandocXMLError "" msg
+                     Right ns -> mapM parseBlock $ normalizeTree ns)
+                 def{ opmlOptions = opts }
  return $
    setTitle (opmlDocTitle st') $
    setAuthors (opmlDocAuthors st') $
--- a/src/Text/Pandoc/Readers/Odt.hs
+++ b/src/Text/Pandoc/Readers/Odt.hs
@ -15,6 +15,7 @@ module Text.Pandoc.Readers.Odt ( readOdt ) where

 import Codec.Archive.Zip
 import qualified Text.XML.Light as XML
+import Text.Pandoc.XMLParser (parseXMLElement)

 import qualified Data.ByteString.Lazy as B

@ -66,18 +67,18 @@ bytesToOdt bytes = case toArchiveOrFail bytes of

 --
 archiveToOdt :: Archive -> Either PandocError (Pandoc, MediaBag)
-archiveToOdt archive = either (Left. PandocParseError) Right $ do
-  let onFailure msg Nothing = Left msg
+archiveToOdt archive = do
+  let onFailure msg Nothing = Left $ PandocParseError msg
      onFailure _   (Just x) = Right x
  contentEntry <- onFailure "Could not find content.xml"
                   (findEntryByPath "content.xml" archive)
  stylesEntry <- onFailure "Could not find styles.xml"
                   (findEntryByPath "styles.xml" archive)
-  contentElem <- onFailure "Could not find content element"
-                   (entryToXmlElem contentEntry)
-  stylesElem <- onFailure "Could not find styles element"
-                   (entryToXmlElem stylesEntry)
-  styles <- either (\_ -> Left "Could not read styles") Right
+  contentElem <- entryToXmlElem contentEntry
+  stylesElem <- entryToXmlElem stylesEntry
+  styles <- either
+               (\_ -> Left $ PandocParseError "Could not read styles")
+               Right
               (chooseMax (readStylesAt stylesElem ) (readStylesAt contentElem))
  let filePathIsOdtMedia :: FilePath -> Bool
      filePathIsOdtMedia fp =
@ -85,10 +86,13 @@ archiveToOdt archive = either (Left. PandocParseError) Right $ do
        in  (dir == "Pictures/") || (dir /= "./" && name == "content.xml")
  let media = filteredFilesFromArchive archive filePathIsOdtMedia
  let startState = readerState styles media
-  either (\_ -> Left "Could not convert opendocument") Right
+  either (\_ -> Left $ PandocParseError "Could not convert opendocument") Right
    (runConverter' read_body startState contentElem)


 --
-entryToXmlElem :: Entry -> Maybe XML.Element
-entryToXmlElem = XML.parseXMLDoc . UTF8.toStringLazy . fromEntry
+entryToXmlElem :: Entry -> Either PandocError XML.Element
+entryToXmlElem entry =
+  case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of
+    Right x  -> Right x
+    Left msg -> Left $ PandocXMLError (T.pack $ eRelativePath entry) msg
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@ -55,8 +55,9 @@ import Text.Pandoc.Walk (query, walk, walkM)
 import Text.Pandoc.Writers.HTML (writeHtmlStringForEPUB)
 import Text.Printf (printf)
 import Text.XML.Light (Attr (..), Element (..), Node (..), QName (..),
-                       add_attrs, lookupAttr, node, onlyElems, parseXML,
+                       add_attrs, lookupAttr, node, onlyElems,
                       ppElement, showElement, strContent, unode, unqual)
+import Text.Pandoc.XMLParser (parseXMLContents)
 import Text.Pandoc.XML (escapeStringForXML)
 import Text.DocTemplates (FromContext(lookupContext), Context(..),
                          ToContext(toVal), Val(..))
@ -160,7 +161,12 @@ mkEntry path content = do
 getEPUBMetadata :: PandocMonad m => WriterOptions -> Meta -> E m EPUBMetadata
 getEPUBMetadata opts meta = do
  let md = metadataFromMeta opts meta
-  let elts = maybe [] (onlyElems . parseXML) $ writerEpubMetadata opts
+  elts <- case writerEpubMetadata opts of
+            Nothing -> return []
+            Just t -> case parseXMLContents (TL.fromStrict t) of
+                          Left msg -> throwError $
+                            PandocXMLError "epub metadata" msg
+                          Right ns -> return (onlyElems ns)
  let md' = foldr addMetadataFromXML md elts
  let addIdentifier m =
       if null (epubIdentifier m)
@ -836,7 +842,8 @@ pandocToEPUB version opts doc = do
                                            : case subs of
                                                 []    -> []
                                                 (_:_) -> [unode "ol" ! [("class","toc")] $ subs]
-          where titElements = parseXML titRendered
+          where titElements = either (const []) id $
+                                parseXMLContents (TL.fromStrict titRendered)
                titRendered = case P.runPure
                               (writeHtmlStringForEPUB version
                                 opts{ writerTemplate = Nothing
--- a/src/Text/Pandoc/Writers/FB2.hs
+++ b/src/Text/Pandoc/Writers/FB2.hs
@ -19,7 +19,7 @@ FictionBook is an XML-based e-book format. For more information see:
 module Text.Pandoc.Writers.FB2 (writeFB2)  where

 import Control.Monad (zipWithM)
-import Control.Monad.Except (catchError)
+import Control.Monad.Except (catchError, throwError)
 import Control.Monad.State.Strict (StateT, evalStateT, get, gets, lift, liftM, modify)
 import Data.ByteString.Base64 (encode)
 import Data.Char (isAscii, isControl, isSpace)
@ -27,16 +27,18 @@ import Data.Either (lefts, rights)
 import Data.List (intercalate)
 import Data.Text (Text, pack)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
 import qualified Data.Text.Encoding as TE
 import Network.HTTP (urlEncode)
 import Text.XML.Light
 import qualified Text.XML.Light as X
 import qualified Text.XML.Light.Cursor as XC
-import qualified Text.XML.Light.Input as XI
+import Text.Pandoc.XMLParser (parseXMLContents)

 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Text.Pandoc.Definition
+import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Logging
 import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def)
 import Text.Pandoc.Shared (capitalize, isURI, orderedListMarkers,
@ -307,7 +309,10 @@ blockToXml (CodeBlock _ s) = return . spaceBeforeAfter .
                             map (el "p" . el "code" . T.unpack) . T.lines $ s
 blockToXml (RawBlock f str) =
  if f == Format "fb2"
-    then return $ XI.parseXML str
+    then
+      case parseXMLContents (TL.fromStrict str) of
+         Left msg  -> throwError $ PandocXMLError "" msg
+         Right nds -> return nds
    else return []
 blockToXml (Div _ bs) = cMapM blockToXml bs
 blockToXml (BlockQuote bs) = list . el "cite" <$> cMapM blockToXml bs
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@ -13,7 +13,7 @@ Conversion of 'Pandoc' documents to ODT.
 -}
 module Text.Pandoc.Writers.ODT ( writeODT ) where
 import Codec.Archive.Zip
-import Control.Monad.Except (catchError)
+import Control.Monad.Except (catchError, throwError)
 import Control.Monad.State.Strict
 import qualified Data.ByteString.Lazy as B
 import Data.Generics (everywhere', mkT)
@ -27,6 +27,7 @@ import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Text.Pandoc.Definition
+import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.ImageSize
 import Text.Pandoc.Logging
 import Text.Pandoc.MIME (extensionFromMimeType, getMimeType)
@ -35,10 +36,11 @@ import Text.DocLayout
 import Text.Pandoc.Shared (stringify, pandocVersion, tshow)
 import Text.Pandoc.Writers.Shared (lookupMetaString, lookupMetaBlocks,
                                   fixDisplayMath)
-import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy)
+import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toTextLazy)
 import Text.Pandoc.Walk
 import Text.Pandoc.Writers.OpenDocument (writeOpenDocument)
 import Text.Pandoc.XML
+import Text.Pandoc.XMLParser (parseXMLElement)
 import Text.TeXMath
 import Text.XML.Light

@ -172,17 +174,18 @@ updateStyleWithLang :: PandocMonad m => Maybe Lang -> Archive -> O m Archive
 updateStyleWithLang Nothing arch = return arch
 updateStyleWithLang (Just lang) arch = do
  epochtime <- floor `fmap` lift P.getPOSIXTime
-  return arch{ zEntries = [if eRelativePath e == "styles.xml"
-                              then case parseXMLDoc
-                                      (toStringLazy (fromEntry e)) of
-                                      Nothing -> e
-                                      Just d ->
-                                        toEntry "styles.xml" epochtime
-                                        ( fromStringLazy
-                                        . ppTopElement
-                                        . addLang lang $ d )
-                              else e
-                            | e <- zEntries arch] }
+  entries <- mapM (\e -> if eRelativePath e == "styles.xml"
+                            then case parseXMLElement
+                                    (toTextLazy (fromEntry e)) of
+                                    Left msg -> throwError $
+                                        PandocXMLError "styles.xml" msg
+                                    Right d -> return $
+                                      toEntry "styles.xml" epochtime
+                                      ( fromStringLazy
+                                      . ppTopElement
+                                      . addLang lang $ d )
+                            else return e) (zEntries arch)
+  return arch{ zEntries = entries }

 addLang :: Lang -> Element -> Element
 addLang lang = everywhere' (mkT updateLangAttr)
--- a/src/Text/Pandoc/Writers/OOXML.hs
+++ b/src/Text/Pandoc/Writers/OOXML.hs
@ -35,6 +35,7 @@ import qualified Data.Text as T
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.XML.Light as XML
+import Text.Pandoc.XMLParser (parseXMLElement)

 mknode :: Node t => String -> [(String,String)] -> t -> Element
 mknode s attrs =
@ -62,10 +63,10 @@ parseXml refArchive distArchive relpath =
         findEntryByPath relpath distArchive of
            Nothing -> throwError $ PandocSomeError $
                        T.pack relpath <> " missing in reference file"
-            Just e  -> case parseXMLDoc . UTF8.toStringLazy . fromEntry $ e of
-                       Nothing -> throwError $ PandocSomeError $
-                                   T.pack relpath <> " corrupt in reference file"
-                       Just d  -> return d
+            Just e  -> case parseXMLElement . UTF8.toTextLazy . fromEntry $ e of
+                       Left msg ->
+                         throwError $ PandocXMLError (T.pack relpath) msg
+                       Right d  -> return d

 -- Copied from Util

--- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
@ -29,6 +29,7 @@ import Data.Time.Clock (UTCTime)
 import Data.Time.Clock.POSIX (utcTimeToPOSIXSeconds, posixSecondsToUTCTime)
 import System.FilePath.Posix (splitDirectories, splitExtension, takeExtension)
 import Text.XML.Light
+import Text.Pandoc.XMLParser (parseXMLElement)
 import Text.Pandoc.Definition
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
@ -77,7 +78,8 @@ getPresentationSize :: Archive -> Archive -> Maybe (Integer, Integer)
 getPresentationSize refArchive distArchive = do
  entry <- findEntryByPath "ppt/presentation.xml" refArchive  `mplus`
           findEntryByPath "ppt/presentation.xml" distArchive
-  presElement <- parseXMLDoc $ UTF8.toStringLazy $ fromEntry entry
+  presElement <- either (const Nothing) return $
+                   parseXMLElement $ UTF8.toTextLazy $ fromEntry entry
  let ns = elemToNameSpaces presElement
  sldSize <- findChild (elemName ns "p" "sldSz") presElement
  cxS <- findAttr (QName "cx" Nothing Nothing) sldSize
--- a/src/Text/Pandoc/XMLParser.hs
+++ b/src/Text/Pandoc/XMLParser.hs
@ -0,0 +1,66 @@
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.XMLParser
+   Copyright   : Copyright (C) 2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Bridge to allow using xml-conduit's parser with xml-light's types.
+-}
+module Text.Pandoc.XMLParser
+  ( parseXMLElement
+  , parseXMLContents
+  , module Text.XML.Light.Types
+  ) where
+
+import qualified Control.Exception as E
+import qualified Text.XML as Conduit
+import Text.XML.Unresolved (InvalidEventStream(..))
+import qualified Text.XML.Light as Light
+import Text.XML.Light.Types
+import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
+import qualified Data.Map as M
+import Data.Maybe (mapMaybe)
+
+-- Drop in replacement for parseXMLDoc in xml-light.
+parseXMLElement :: TL.Text -> Either T.Text Light.Element
+parseXMLElement t =
+  elementToElement .  Conduit.documentRoot <$>
+    either (Left . T.pack . E.displayException) Right
+    (Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t)
+
+parseXMLContents :: TL.Text -> Either T.Text [Light.Content]
+parseXMLContents t =
+  case Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t of
+    Left e ->
+      case E.fromException e of
+        Just (ContentAfterRoot _) ->
+          elContent <$> parseXMLElement ("<wrapper>" <> t <> "</wrapper>")
+        _ -> Left . T.pack . E.displayException $ e
+    Right x -> Right [Light.Elem . elementToElement . Conduit.documentRoot $ x]
+
+elementToElement :: Conduit.Element -> Light.Element
+elementToElement (Conduit.Element name attribMap nodes) =
+  Light.Element (nameToQname name) attrs (mapMaybe nodeToContent nodes) Nothing
+ where
+  attrs = map (\(n,v) -> Light.Attr (nameToQname n) (T.unpack v)) $
+              M.toList attribMap
+  nameToQname (Conduit.Name localName mbns mbpref) =
+    case mbpref of
+      Nothing | "xmlns:" `T.isPrefixOf` localName ->
+           Light.QName (T.unpack $ T.drop 6 localName)  (T.unpack <$> mbns)
+                       (Just "xmlns")
+      _ -> Light.QName (T.unpack localName) (T.unpack <$> mbns)
+                       (T.unpack <$> mbpref)
+
+nodeToContent :: Conduit.Node -> Maybe Light.Content
+nodeToContent (Conduit.NodeElement el) =
+  Just (Light.Elem (elementToElement el))
+nodeToContent (Conduit.NodeContent t) =
+  Just (Light.Text (Light.CData Light.CDataText (T.unpack t) Nothing))
+nodeToContent _ = Nothing
+
--- a/test/Tests/Readers/JATS.hs
+++ b/test/Tests/Readers/JATS.hs
@ -88,6 +88,7 @@ tests = [ testGroup "inline code"
            "<p>\n\
            \  <inline-formula><alternatives>\n\
            \  <tex-math><![CDATA[\\sigma|_{\\{x\\}}]]></tex-math>\n\
+            \  </alternatives></inline-formula>\n\
            \</p>"
            =?> para (math "\\sigma|_{\\{x\\}}")
          , test jats "math ml only" $
--- a/test/command/5321.md
+++ b/test/command/5321.md
@ -4,7 +4,7 @@
  <caption>
    <p>bar</p>
  </caption>
-  <graphic xlink:href="foo.png" xlink:alt-text="baz">
+  <graphic xlink:href="foo.png" xlink:alt-text="baz" />
 </fig>
 ^D
 [Para [Image ("fig-1",[],[]) [Str "bar"] ("foo.png","fig:")]]
@ -17,7 +17,7 @@
    <title>foo</title>
    <p>bar</p>
  </caption>
-  <graphic xlink:href="foo.png" xlink:alt-text="baz">
+  <graphic xlink:href="foo.png" xlink:alt-text="baz" />
 </fig>
 ^D
 [Para [Image ("fig-1",[],[]) [Str "foo",LineBreak,Str "bar"] ("foo.png","fig:")]]
--- a/test/docbook-reader.docbook
+++ b/test/docbook-reader.docbook
@ -1,6 +1,11 @@
 <?xml version="1.0" encoding="utf-8" ?>
 <!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN"
-                  "http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd">
+"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd"
+[
+<!ENTITY GHC "GHC" >
+<!ENTITY let "LET" >
+<!ENTITY case "CASE" >
+]>
 <article>
  <articleinfo>
    <title>Pandoc Test Suite</title>
--- a/test/docx/golden/block_quotes.docx
+++ b/test/docx/golden/block_quotes.docx
--- a/test/docx/golden/codeblock.docx
+++ b/test/docx/golden/codeblock.docx
--- a/test/docx/golden/comments.docx
+++ b/test/docx/golden/comments.docx
--- a/test/docx/golden/custom_style_no_reference.docx
+++ b/test/docx/golden/custom_style_no_reference.docx
--- a/test/docx/golden/custom_style_preserve.docx
+++ b/test/docx/golden/custom_style_preserve.docx
--- a/test/docx/golden/custom_style_reference.docx
+++ b/test/docx/golden/custom_style_reference.docx
--- a/test/docx/golden/definition_list.docx
+++ b/test/docx/golden/definition_list.docx
--- a/test/docx/golden/document-properties-short-desc.docx
+++ b/test/docx/golden/document-properties-short-desc.docx
--- a/test/docx/golden/document-properties.docx
+++ b/test/docx/golden/document-properties.docx
--- a/test/docx/golden/headers.docx
+++ b/test/docx/golden/headers.docx
--- a/test/docx/golden/image.docx
+++ b/test/docx/golden/image.docx
--- a/test/docx/golden/inline_code.docx
+++ b/test/docx/golden/inline_code.docx
--- a/test/docx/golden/inline_formatting.docx
+++ b/test/docx/golden/inline_formatting.docx
--- a/test/docx/golden/inline_images.docx
+++ b/test/docx/golden/inline_images.docx
--- a/test/docx/golden/link_in_notes.docx
+++ b/test/docx/golden/link_in_notes.docx
--- a/test/docx/golden/links.docx
+++ b/test/docx/golden/links.docx
--- a/test/docx/golden/lists.docx
+++ b/test/docx/golden/lists.docx
--- a/test/docx/golden/lists_continuing.docx
+++ b/test/docx/golden/lists_continuing.docx
--- a/test/docx/golden/lists_multiple_initial.docx
+++ b/test/docx/golden/lists_multiple_initial.docx
--- a/test/docx/golden/lists_restarting.docx
+++ b/test/docx/golden/lists_restarting.docx
--- a/test/docx/golden/nested_anchors_in_header.docx
+++ b/test/docx/golden/nested_anchors_in_header.docx
--- a/test/docx/golden/notes.docx
+++ b/test/docx/golden/notes.docx
--- a/test/docx/golden/raw-blocks.docx
+++ b/test/docx/golden/raw-blocks.docx
--- a/test/docx/golden/raw-bookmarks.docx
+++ b/test/docx/golden/raw-bookmarks.docx
--- a/test/docx/golden/table_one_row.docx
+++ b/test/docx/golden/table_one_row.docx
--- a/test/docx/golden/table_with_list_cell.docx
+++ b/test/docx/golden/table_with_list_cell.docx
--- a/test/docx/golden/tables.docx
+++ b/test/docx/golden/tables.docx
--- a/test/docx/golden/track_changes_deletion.docx
+++ b/test/docx/golden/track_changes_deletion.docx
--- a/test/docx/golden/track_changes_insertion.docx
+++ b/test/docx/golden/track_changes_insertion.docx
--- a/test/docx/golden/track_changes_move.docx
+++ b/test/docx/golden/track_changes_move.docx
--- a/test/docx/golden/track_changes_scrubbed_metadata.docx
+++ b/test/docx/golden/track_changes_scrubbed_metadata.docx
--- a/test/docx/golden/unicode.docx
+++ b/test/docx/golden/unicode.docx
--- a/test/docx/golden/verbatim_subsuper.docx
+++ b/test/docx/golden/verbatim_subsuper.docx
--- a/test/jats-reader.native
+++ b/test/jats-reader.native
@ -1,4 +1,4 @@
-Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"]]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]})
+Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]})
 [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber's",Space,Str "markdown",Space,Str "test",Space,Str "suite."]
 ,Header 1 ("headers",[],[]) [Str "Headers"]
 ,Header 2 ("level-2-with-an-embedded-link",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",SoftBreak,Link ("",[],[]) [Str "embedded",SoftBreak,Str "link"] ("/url","")]
--- a/test/jats-reader.xml
+++ b/test/jats-reader.xml
@ -20,6 +20,7 @@
      <surname>MacFarlane</surname>
      <given-names>John</given-names>
    </name>
+  </contrib>
  <contrib contrib-type="author">
    <name>
      <surname>Anonymous</surname>
--- a/test/pptx/code-custom.pptx
+++ b/test/pptx/code-custom.pptx
--- a/test/pptx/code-custom_templated.pptx
+++ b/test/pptx/code-custom_templated.pptx
--- a/test/pptx/code.pptx
+++ b/test/pptx/code.pptx
--- a/test/pptx/code_templated.pptx
+++ b/test/pptx/code_templated.pptx
--- a/test/pptx/document-properties-short-desc.pptx
+++ b/test/pptx/document-properties-short-desc.pptx
--- a/test/pptx/document-properties-short-desc_templated.pptx
+++ b/test/pptx/document-properties-short-desc_templated.pptx
--- a/test/pptx/document-properties.pptx
+++ b/test/pptx/document-properties.pptx
--- a/test/pptx/document-properties_templated.pptx
+++ b/test/pptx/document-properties_templated.pptx
--- a/test/pptx/endnotes.pptx
+++ b/test/pptx/endnotes.pptx
--- a/test/pptx/endnotes_templated.pptx
+++ b/test/pptx/endnotes_templated.pptx
--- a/test/pptx/endnotes_toc.pptx
+++ b/test/pptx/endnotes_toc.pptx
--- a/test/pptx/endnotes_toc_templated.pptx
+++ b/test/pptx/endnotes_toc_templated.pptx
--- a/test/pptx/images.pptx
+++ b/test/pptx/images.pptx
--- a/test/pptx/images_templated.pptx
+++ b/test/pptx/images_templated.pptx
--- a/test/pptx/inline_formatting.pptx
+++ b/test/pptx/inline_formatting.pptx
--- a/test/pptx/inline_formatting_templated.pptx
+++ b/test/pptx/inline_formatting_templated.pptx
--- a/test/pptx/lists.pptx
+++ b/test/pptx/lists.pptx
--- a/test/pptx/lists_templated.pptx
+++ b/test/pptx/lists_templated.pptx
--- a/test/pptx/raw_ooxml.pptx
+++ b/test/pptx/raw_ooxml.pptx
--- a/test/pptx/raw_ooxml_templated.pptx
+++ b/test/pptx/raw_ooxml_templated.pptx
--- a/test/pptx/remove_empty_slides.pptx
+++ b/test/pptx/remove_empty_slides.pptx
--- a/test/pptx/remove_empty_slides_templated.pptx
+++ b/test/pptx/remove_empty_slides_templated.pptx
--- a/test/pptx/slide_breaks.pptx
+++ b/test/pptx/slide_breaks.pptx
--- a/test/pptx/slide_breaks_slide_level_1.pptx
+++ b/test/pptx/slide_breaks_slide_level_1.pptx
--- a/test/pptx/slide_breaks_slide_level_1_templated.pptx
+++ b/test/pptx/slide_breaks_slide_level_1_templated.pptx
--- a/test/pptx/slide_breaks_templated.pptx
+++ b/test/pptx/slide_breaks_templated.pptx
--- a/test/pptx/slide_breaks_toc.pptx
+++ b/test/pptx/slide_breaks_toc.pptx
--- a/test/pptx/slide_breaks_toc_templated.pptx
+++ b/test/pptx/slide_breaks_toc_templated.pptx
--- a/test/pptx/speaker_notes.pptx
+++ b/test/pptx/speaker_notes.pptx
--- a/test/pptx/speaker_notes_after_metadata.pptx
+++ b/test/pptx/speaker_notes_after_metadata.pptx
--- a/test/pptx/speaker_notes_after_metadata_templated.pptx
+++ b/test/pptx/speaker_notes_after_metadata_templated.pptx
--- a/test/pptx/speaker_notes_afterheader.pptx
+++ b/test/pptx/speaker_notes_afterheader.pptx
--- a/test/pptx/speaker_notes_afterheader_templated.pptx
+++ b/test/pptx/speaker_notes_afterheader_templated.pptx
--- a/test/pptx/speaker_notes_afterseps.pptx
+++ b/test/pptx/speaker_notes_afterseps.pptx
--- a/test/pptx/speaker_notes_afterseps_templated.pptx
+++ b/test/pptx/speaker_notes_afterseps_templated.pptx
--- a/test/pptx/speaker_notes_templated.pptx
+++ b/test/pptx/speaker_notes_templated.pptx
--- a/test/pptx/start_numbering_at.pptx
+++ b/test/pptx/start_numbering_at.pptx
--- a/test/pptx/start_numbering_at_templated.pptx
+++ b/test/pptx/start_numbering_at_templated.pptx
--- a/test/pptx/tables.pptx
+++ b/test/pptx/tables.pptx
--- a/test/pptx/tables_templated.pptx
+++ b/test/pptx/tables_templated.pptx
--- a/test/pptx/two_column.pptx
+++ b/test/pptx/two_column.pptx
--- a/test/pptx/two_column_templated.pptx
+++ b/test/pptx/two_column_templated.pptx