From effc348965f20f4131ac3b0357303c548d851308 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 2 Nov 2016 11:56:52 -0400 Subject: [PATCH] Docx reader: Handle Alt text and titles in images. We use the "description" field as alt text and the "title" field as title. These can be accessed through the "Format Picture" dialog in Word. --- src/Text/Pandoc/Readers/Docx.hs | 8 +++---- src/Text/Pandoc/Readers/Docx/Parse.hs | 31 +++++++++++++++++++++------ 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index fa534f801..2c171fe0b 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -317,10 +317,10 @@ runToInlines (Footnote bps) = do runToInlines (Endnote bps) = do blksList <- smushBlocks <$> (mapM bodyPartToBlocks bps) return $ note blksList -runToInlines (InlineDrawing fp bs ext) = do +runToInlines (InlineDrawing fp title alt bs ext) = do mediaBag <- gets docxMediaBag modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag } - return $ imageWith (extentToAttr ext) fp "" "" + return $ imageWith (extentToAttr ext) fp title $ text alt extentToAttr :: Extent -> Attr extentToAttr (Just (w, h)) = @@ -401,10 +401,10 @@ parPartToInlines (BookMark _ anchor) = unless inHdrBool (modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap}) return $ spanWith (newAnchor, ["anchor"], []) mempty -parPartToInlines (Drawing fp bs ext) = do +parPartToInlines (Drawing fp title alt bs ext) = do mediaBag <- gets docxMediaBag modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag } - return $ imageWith (extentToAttr ext) fp "" "" + return $ imageWith (extentToAttr ext) fp title $ text alt parPartToInlines (InternalHyperLink anchor runs) = do ils <- smushInlines <$> mapM runToInlines runs return $ link ('#' : anchor) "" ils diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 7b9779105..a821d0693 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -215,14 +215,14 @@ data ParPart = PlainRun Run | BookMark BookMarkId Anchor | InternalHyperLink Anchor [Run] | ExternalHyperLink URL [Run] - | Drawing FilePath B.ByteString Extent + | Drawing FilePath String String B.ByteString Extent -- title, alt | PlainOMath [Exp] deriving Show data Run = Run RunStyle [RunElem] | Footnote [BodyPart] | Endnote [BodyPart] - | InlineDrawing FilePath B.ByteString Extent + | InlineDrawing FilePath String String B.ByteString Extent -- title, alt deriving Show data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen @@ -649,12 +649,20 @@ elemToParPart :: NameSpaces -> Element -> D ParPart elemToParPart ns element | isElem ns "w" "r" element , Just drawingElem <- findChild (elemName ns "w" "drawing") element = - let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" + let mbDocPr = findChild (elemName ns "wp" "inline") drawingElem >>= + findChild (elemName ns "wp" "docPr") + alt = case mbDocPr >>= findAttr (elemName ns "" "descr") of + Just alt' -> alt' + Nothing -> "" + title = case mbDocPr >>= findAttr (elemName ns "" "title") of + Just title' -> title' + Nothing -> "" + a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" drawing = findElement (QName "blip" (Just a_ns) (Just "a")) element >>= findAttr (elemName ns "r" "embed") in case drawing of - Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp bs $ elemToExtent drawingElem) + Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem) Nothing -> throwError WrongElem -- The below is an attempt to deal with images in deprecated vml format. elemToParPart ns element @@ -664,7 +672,8 @@ elemToParPart ns element >>= findAttr (elemName ns "r" "id") in case drawing of - Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp bs Nothing) + -- Todo: check out title and attr for deprecated format. + Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing) Nothing -> throwError WrongElem elemToParPart ns element | isElem ns "w" "r" element = @@ -751,13 +760,21 @@ elemToExtent drawingElem = childElemToRun :: NameSpaces -> Element -> D Run childElemToRun ns element | isElem ns "w" "drawing" element = - let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" + let mbDocPr = findChild (elemName ns "wp" "inline") element >>= + findChild (elemName ns "wp" "docPr") + alt = case mbDocPr >>= findAttr (elemName ns "" "descr") of + Just alt' -> alt' + Nothing -> "" + title = case mbDocPr >>= findAttr (elemName ns "" "title") of + Just title' -> title' + Nothing -> "" + a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" drawing = findElement (QName "blip" (Just a_ns) (Just "a")) element >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) in case drawing of Just s -> expandDrawingId s >>= - (\(fp, bs) -> return $ InlineDrawing fp bs $ elemToExtent element) + (\(fp, bs) -> return $ InlineDrawing fp title alt bs $ elemToExtent element) Nothing -> throwError WrongElem childElemToRun ns element | isElem ns "w" "footnoteReference" element