From cfa26e3ca0346397f41af9aed5b4cd1d86be1220 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 12 Jun 2021 13:56:09 -0700 Subject: [PATCH] Docx reader: handle absolute URIs in Relationship Target. Closes #7374. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index aaa8f4ad0..dbb16a821 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -479,20 +479,26 @@ filePathToRelType path docXmlPath = then Just InDocument else Nothing -relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship -relElemToRelationship relType element | qName (elName element) == "Relationship" = +relElemToRelationship :: FilePath -> DocumentLocation -> Element + -> Maybe Relationship +relElemToRelationship fp relType element | qName (elName element) == "Relationship" = do relId <- findAttr (QName "Id" Nothing Nothing) element target <- findAttr (QName "Target" Nothing Nothing) element - return $ Relationship relType relId target -relElemToRelationship _ _ = Nothing + -- target may be relative (media/image1.jpeg) or absolute + -- (/word/media/image1.jpeg); we need to relativize it (see #7374) + let frontOfFp = T.pack $ takeWhile (/= '_') fp + let target' = fromMaybe target $ + T.stripPrefix frontOfFp $ T.dropWhile (== '/') target + return $ Relationship relType relId target' +relElemToRelationship _ _ _ = Nothing filePathToRelationships :: Archive -> FilePath -> FilePath -> [Relationship] filePathToRelationships ar docXmlPath fp | Just relType <- filePathToRelType fp docXmlPath , Just entry <- findEntryByPath fp ar , Just relElems <- parseXMLFromEntry entry = - mapMaybe (relElemToRelationship relType) $ elChildren relElems + mapMaybe (relElemToRelationship fp relType) $ elChildren relElems filePathToRelationships _ _ _ = [] archiveToRelationships :: Archive -> FilePath -> [Relationship]