Docx reader: Handle Alt text and titles in images.

We use the "description" field as alt text and the "title" field as
title. These can be accessed through the "Format Picture" dialog in
Word.
This commit is contained in:
Jesse Rosenthal 2016-11-02 11:56:52 -04:00
parent 1138ae6656
commit effc348965
2 changed files with 28 additions and 11 deletions

View file

@ -317,10 +317,10 @@ runToInlines (Footnote bps) = do
runToInlines (Endnote bps) = do
blksList <- smushBlocks <$> (mapM bodyPartToBlocks bps)
return $ note blksList
runToInlines (InlineDrawing fp bs ext) = do
runToInlines (InlineDrawing fp title alt bs ext) = do
mediaBag <- gets docxMediaBag
modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag }
return $ imageWith (extentToAttr ext) fp "" ""
return $ imageWith (extentToAttr ext) fp title $ text alt
extentToAttr :: Extent -> Attr
extentToAttr (Just (w, h)) =
@ -401,10 +401,10 @@ parPartToInlines (BookMark _ anchor) =
unless inHdrBool
(modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap})
return $ spanWith (newAnchor, ["anchor"], []) mempty
parPartToInlines (Drawing fp bs ext) = do
parPartToInlines (Drawing fp title alt bs ext) = do
mediaBag <- gets docxMediaBag
modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag }
return $ imageWith (extentToAttr ext) fp "" ""
return $ imageWith (extentToAttr ext) fp title $ text alt
parPartToInlines (InternalHyperLink anchor runs) = do
ils <- smushInlines <$> mapM runToInlines runs
return $ link ('#' : anchor) "" ils

View file

@ -215,14 +215,14 @@ data ParPart = PlainRun Run
| BookMark BookMarkId Anchor
| InternalHyperLink Anchor [Run]
| ExternalHyperLink URL [Run]
| Drawing FilePath B.ByteString Extent
| Drawing FilePath String String B.ByteString Extent -- title, alt
| PlainOMath [Exp]
deriving Show
data Run = Run RunStyle [RunElem]
| Footnote [BodyPart]
| Endnote [BodyPart]
| InlineDrawing FilePath B.ByteString Extent
| InlineDrawing FilePath String String B.ByteString Extent -- title, alt
deriving Show
data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen
@ -649,12 +649,20 @@ elemToParPart :: NameSpaces -> Element -> D ParPart
elemToParPart ns element
| isElem ns "w" "r" element
, Just drawingElem <- findChild (elemName ns "w" "drawing") element =
let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
let mbDocPr = findChild (elemName ns "wp" "inline") drawingElem >>=
findChild (elemName ns "wp" "docPr")
alt = case mbDocPr >>= findAttr (elemName ns "" "descr") of
Just alt' -> alt'
Nothing -> ""
title = case mbDocPr >>= findAttr (elemName ns "" "title") of
Just title' -> title'
Nothing -> ""
a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
drawing = findElement (QName "blip" (Just a_ns) (Just "a")) element
>>= findAttr (elemName ns "r" "embed")
in
case drawing of
Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp bs $ elemToExtent drawingElem)
Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem)
Nothing -> throwError WrongElem
-- The below is an attempt to deal with images in deprecated vml format.
elemToParPart ns element
@ -664,7 +672,8 @@ elemToParPart ns element
>>= findAttr (elemName ns "r" "id")
in
case drawing of
Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp bs Nothing)
-- Todo: check out title and attr for deprecated format.
Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing)
Nothing -> throwError WrongElem
elemToParPart ns element
| isElem ns "w" "r" element =
@ -751,13 +760,21 @@ elemToExtent drawingElem =
childElemToRun :: NameSpaces -> Element -> D Run
childElemToRun ns element
| isElem ns "w" "drawing" element =
let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
let mbDocPr = findChild (elemName ns "wp" "inline") element >>=
findChild (elemName ns "wp" "docPr")
alt = case mbDocPr >>= findAttr (elemName ns "" "descr") of
Just alt' -> alt'
Nothing -> ""
title = case mbDocPr >>= findAttr (elemName ns "" "title") of
Just title' -> title'
Nothing -> ""
a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
drawing = findElement (QName "blip" (Just a_ns) (Just "a")) element
>>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
in
case drawing of
Just s -> expandDrawingId s >>=
(\(fp, bs) -> return $ InlineDrawing fp bs $ elemToExtent element)
(\(fp, bs) -> return $ InlineDrawing fp title alt bs $ elemToExtent element)
Nothing -> throwError WrongElem
childElemToRun ns element
| isElem ns "w" "footnoteReference" element