Docx reader: handle multiple pic elements inside a drawing.

Closes #7786.
This commit is contained in:
John MacFarlane 2021-12-30 20:54:12 -08:00
parent cc30d646ca
commit 7ff1b798c4

View file

@ -1,4 +1,5 @@
{-# LANGUAGE ViewPatterns #-} {-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{- | {- |
Module : Text.Pandoc.Readers.Docx.Parse Module : Text.Pandoc.Readers.Docx.Parse
@ -78,6 +79,19 @@ import Text.TeXMath (Exp)
import Text.TeXMath.Readers.OMML (readOMML) import Text.TeXMath.Readers.OMML (readOMML)
import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont) import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont)
import Text.Pandoc.XML.Light import Text.Pandoc.XML.Light
( filterChild,
findElement,
strContent,
showElement,
findAttr,
filterChildrenName,
filterElementName,
parseXMLElement,
elChildren,
QName(QName, qName),
Content(Elem),
Element(elContent, elName),
findElements )
data ReaderEnv = ReaderEnv { envNotes :: Notes data ReaderEnv = ReaderEnv { envNotes :: Notes
, envComments :: Comments , envComments :: Comments
@ -886,16 +900,19 @@ elemToParPart' ns element
| isElem ns "w" "r" element | isElem ns "w" "r" element
, Just drawingElem <- findChildByName ns "w" "drawing" element , Just drawingElem <- findChildByName ns "w" "drawing" element
, pic_ns <- "http://schemas.openxmlformats.org/drawingml/2006/picture" , pic_ns <- "http://schemas.openxmlformats.org/drawingml/2006/picture"
, Just picElem <- findElement (QName "pic" (Just pic_ns) (Just "pic")) drawingElem , picElems <- findElements (QName "pic" (Just pic_ns) (Just "pic")) drawingElem
= let (title, alt) = getTitleAndAlt ns drawingElem = let (title, alt) = getTitleAndAlt ns drawingElem
a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem drawings = map (\el ->
>>= findAttrByName ns "r" "embed" ((findElement (QName "blip" (Just a_ns) (Just "a")) el
in >>= findAttrByName ns "r" "embed"), el)) picElems
case drawing of in mapM (\case
Just s -> expandDrawingId s >>= \(fp, bs) -> (Just s, el) -> do
return [Drawing fp title alt bs (elemToExtent drawingElem)] (fp, bs) <- expandDrawingId s
Nothing -> throwError WrongElem let extent = elemToExtent el <|> elemToExtent element
return $ Drawing fp title alt bs extent
(Nothing, _) -> throwError WrongElem)
drawings
-- The two cases below are an attempt to deal with images in deprecated vml format. -- The two cases below are an attempt to deal with images in deprecated vml format.
-- Todo: check out title and attr for deprecated format. -- Todo: check out title and attr for deprecated format.
elemToParPart' ns element elemToParPart' ns element
@ -930,11 +947,11 @@ elemToParPart' ns element
= return [Chart] = return [Chart]
elemToParPart' ns element elemToParPart' ns element
| isElem ns "w" "r" element = do | isElem ns "w" "r" element = do
run <- elemToRun ns element runs <- elemToRun ns element
return [PlainRun run] return $ map PlainRun runs
elemToParPart' ns element elemToParPart' ns element
| Just change <- getTrackedChange ns element = do | Just change <- getTrackedChange ns element = do
runs <- mapD (elemToRun ns) (elChildren element) runs <- mconcat <$> mapD (elemToRun ns) (elChildren element)
return [ChangedRuns change runs] return [ChangedRuns change runs]
elemToParPart' ns element elemToParPart' ns element
| isElem ns "w" "bookmarkStart" element | isElem ns "w" "bookmarkStart" element
@ -992,59 +1009,61 @@ lookupEndnote :: T.Text -> Notes -> Maybe Element
lookupEndnote s (Notes _ _ ens) = ens >>= M.lookup s lookupEndnote s (Notes _ _ ens) = ens >>= M.lookup s
elemToExtent :: Element -> Extent elemToExtent :: Element -> Extent
elemToExtent drawingElem = elemToExtent el =
case (getDim "cx", getDim "cy") of case (getDim "cx", getDim "cy") of
(Just w, Just h) -> Just (w, h) (Just w, Just h) -> Just (w, h)
_ -> Nothing _ -> Nothing
where where
wp_ns = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" getDim at = filterElementName (\n -> qName n `elem` ["extent", "ext"]) el
getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem >>= findAttr (QName at Nothing Nothing) >>= safeRead
>>= findAttr (QName at Nothing Nothing) >>= safeRead
childElemToRun :: NameSpaces -> Element -> D Run childElemToRun :: NameSpaces -> Element -> D [Run]
childElemToRun ns element childElemToRun ns element
| isElem ns "w" "drawing" element | isElem ns "w" "drawing" element
, pic_ns <- "http://schemas.openxmlformats.org/drawingml/2006/picture" , pic_ns <- "http://schemas.openxmlformats.org/drawingml/2006/picture"
, Just picElem <- findElement (QName "pic" (Just pic_ns) (Just "pic")) element , picElems <- findElements (QName "pic" (Just pic_ns) (Just "pic")) element
= let (title, alt) = getTitleAndAlt ns element = let (title, alt) = getTitleAndAlt ns element
a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem drawings = map (\el ->
>>= findAttr (QName "embed" (M.lookup "r" ns) (Just "r")) ((findElement (QName "blip" (Just a_ns) (Just "a")) el
in >>= findAttrByName ns "r" "embed"), el)) picElems
case drawing of in mapM (\case
Just s -> expandDrawingId s >>= (Just s, el) -> do
(\(fp, bs) -> return $ InlineDrawing fp title alt bs $ elemToExtent element) (fp, bs) <- expandDrawingId s
Nothing -> throwError WrongElem let extent = elemToExtent el <|> elemToExtent element
return $ InlineDrawing fp title alt bs extent
(Nothing, _) -> throwError WrongElem)
drawings
childElemToRun ns element childElemToRun ns element
| isElem ns "w" "drawing" element | isElem ns "w" "drawing" element
, c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/chart" , c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/chart"
, Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) element , Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) element
= return InlineChart = return [InlineChart]
childElemToRun ns element childElemToRun ns element
| isElem ns "w" "drawing" element | isElem ns "w" "drawing" element
, c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/diagram" , c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/diagram"
, Just _ <- findElement (QName "relIds" (Just c_ns) (Just "dgm")) element , Just _ <- findElement (QName "relIds" (Just c_ns) (Just "dgm")) element
= return InlineDiagram = return [InlineDiagram]
childElemToRun ns element childElemToRun ns element
| isElem ns "w" "footnoteReference" element | isElem ns "w" "footnoteReference" element
, Just fnId <- findAttrByName ns "w" "id" element = do , Just fnId <- findAttrByName ns "w" "id" element = do
notes <- asks envNotes notes <- asks envNotes
case lookupFootnote fnId notes of case lookupFootnote fnId notes of
Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e) Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e)
return $ Footnote bps return [Footnote bps]
Nothing -> return $ Footnote [] Nothing -> return [Footnote []]
childElemToRun ns element childElemToRun ns element
| isElem ns "w" "endnoteReference" element | isElem ns "w" "endnoteReference" element
, Just enId <- findAttrByName ns "w" "id" element = do , Just enId <- findAttrByName ns "w" "id" element = do
notes <- asks envNotes notes <- asks envNotes
case lookupEndnote enId notes of case lookupEndnote enId notes of
Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e) Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e)
return $ Endnote bps return [Endnote bps]
Nothing -> return $ Endnote [] Nothing -> return [Endnote []]
childElemToRun _ _ = throwError WrongElem childElemToRun _ _ = throwError WrongElem
elemToRun :: NameSpaces -> Element -> D Run elemToRun :: NameSpaces -> Element -> D [Run]
elemToRun ns element elemToRun ns element
| isElem ns "w" "r" element | isElem ns "w" "r" element
, Just altCont <- findChildByName ns "mc" "AlternateContent" element = , Just altCont <- findChildByName ns "mc" "AlternateContent" element =
@ -1070,7 +1089,7 @@ elemToRun ns element
| isElem ns "w" "r" element = do | isElem ns "w" "r" element = do
runElems <- elemToRunElems ns element runElems <- elemToRunElems ns element
runStyle <- elemToRunStyleD ns element runStyle <- elemToRunStyleD ns element
return $ Run runStyle runElems return [Run runStyle runElems]
elemToRun _ _ = throwError WrongElem elemToRun _ _ = throwError WrongElem
getParentStyleValue :: (ParStyle -> Maybe a) -> ParStyle -> Maybe a getParentStyleValue :: (ParStyle -> Maybe a) -> ParStyle -> Maybe a