Docx reader: Handle Alt text and titles in images.
We use the "description" field as alt text and the "title" field as title. These can be accessed through the "Format Picture" dialog in Word.
This commit is contained in:
parent
1138ae6656
commit
effc348965
2 changed files with 28 additions and 11 deletions
|
@ -317,10 +317,10 @@ runToInlines (Footnote bps) = do
|
||||||
runToInlines (Endnote bps) = do
|
runToInlines (Endnote bps) = do
|
||||||
blksList <- smushBlocks <$> (mapM bodyPartToBlocks bps)
|
blksList <- smushBlocks <$> (mapM bodyPartToBlocks bps)
|
||||||
return $ note blksList
|
return $ note blksList
|
||||||
runToInlines (InlineDrawing fp bs ext) = do
|
runToInlines (InlineDrawing fp title alt bs ext) = do
|
||||||
mediaBag <- gets docxMediaBag
|
mediaBag <- gets docxMediaBag
|
||||||
modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag }
|
modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag }
|
||||||
return $ imageWith (extentToAttr ext) fp "" ""
|
return $ imageWith (extentToAttr ext) fp title $ text alt
|
||||||
|
|
||||||
extentToAttr :: Extent -> Attr
|
extentToAttr :: Extent -> Attr
|
||||||
extentToAttr (Just (w, h)) =
|
extentToAttr (Just (w, h)) =
|
||||||
|
@ -401,10 +401,10 @@ parPartToInlines (BookMark _ anchor) =
|
||||||
unless inHdrBool
|
unless inHdrBool
|
||||||
(modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap})
|
(modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap})
|
||||||
return $ spanWith (newAnchor, ["anchor"], []) mempty
|
return $ spanWith (newAnchor, ["anchor"], []) mempty
|
||||||
parPartToInlines (Drawing fp bs ext) = do
|
parPartToInlines (Drawing fp title alt bs ext) = do
|
||||||
mediaBag <- gets docxMediaBag
|
mediaBag <- gets docxMediaBag
|
||||||
modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag }
|
modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag }
|
||||||
return $ imageWith (extentToAttr ext) fp "" ""
|
return $ imageWith (extentToAttr ext) fp title $ text alt
|
||||||
parPartToInlines (InternalHyperLink anchor runs) = do
|
parPartToInlines (InternalHyperLink anchor runs) = do
|
||||||
ils <- smushInlines <$> mapM runToInlines runs
|
ils <- smushInlines <$> mapM runToInlines runs
|
||||||
return $ link ('#' : anchor) "" ils
|
return $ link ('#' : anchor) "" ils
|
||||||
|
|
|
@ -215,14 +215,14 @@ data ParPart = PlainRun Run
|
||||||
| BookMark BookMarkId Anchor
|
| BookMark BookMarkId Anchor
|
||||||
| InternalHyperLink Anchor [Run]
|
| InternalHyperLink Anchor [Run]
|
||||||
| ExternalHyperLink URL [Run]
|
| ExternalHyperLink URL [Run]
|
||||||
| Drawing FilePath B.ByteString Extent
|
| Drawing FilePath String String B.ByteString Extent -- title, alt
|
||||||
| PlainOMath [Exp]
|
| PlainOMath [Exp]
|
||||||
deriving Show
|
deriving Show
|
||||||
|
|
||||||
data Run = Run RunStyle [RunElem]
|
data Run = Run RunStyle [RunElem]
|
||||||
| Footnote [BodyPart]
|
| Footnote [BodyPart]
|
||||||
| Endnote [BodyPart]
|
| Endnote [BodyPart]
|
||||||
| InlineDrawing FilePath B.ByteString Extent
|
| InlineDrawing FilePath String String B.ByteString Extent -- title, alt
|
||||||
deriving Show
|
deriving Show
|
||||||
|
|
||||||
data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen
|
data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen
|
||||||
|
@ -649,12 +649,20 @@ elemToParPart :: NameSpaces -> Element -> D ParPart
|
||||||
elemToParPart ns element
|
elemToParPart ns element
|
||||||
| isElem ns "w" "r" element
|
| isElem ns "w" "r" element
|
||||||
, Just drawingElem <- findChild (elemName ns "w" "drawing") element =
|
, Just drawingElem <- findChild (elemName ns "w" "drawing") element =
|
||||||
let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
|
let mbDocPr = findChild (elemName ns "wp" "inline") drawingElem >>=
|
||||||
|
findChild (elemName ns "wp" "docPr")
|
||||||
|
alt = case mbDocPr >>= findAttr (elemName ns "" "descr") of
|
||||||
|
Just alt' -> alt'
|
||||||
|
Nothing -> ""
|
||||||
|
title = case mbDocPr >>= findAttr (elemName ns "" "title") of
|
||||||
|
Just title' -> title'
|
||||||
|
Nothing -> ""
|
||||||
|
a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
|
||||||
drawing = findElement (QName "blip" (Just a_ns) (Just "a")) element
|
drawing = findElement (QName "blip" (Just a_ns) (Just "a")) element
|
||||||
>>= findAttr (elemName ns "r" "embed")
|
>>= findAttr (elemName ns "r" "embed")
|
||||||
in
|
in
|
||||||
case drawing of
|
case drawing of
|
||||||
Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp bs $ elemToExtent drawingElem)
|
Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem)
|
||||||
Nothing -> throwError WrongElem
|
Nothing -> throwError WrongElem
|
||||||
-- The below is an attempt to deal with images in deprecated vml format.
|
-- The below is an attempt to deal with images in deprecated vml format.
|
||||||
elemToParPart ns element
|
elemToParPart ns element
|
||||||
|
@ -664,7 +672,8 @@ elemToParPart ns element
|
||||||
>>= findAttr (elemName ns "r" "id")
|
>>= findAttr (elemName ns "r" "id")
|
||||||
in
|
in
|
||||||
case drawing of
|
case drawing of
|
||||||
Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp bs Nothing)
|
-- Todo: check out title and attr for deprecated format.
|
||||||
|
Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing)
|
||||||
Nothing -> throwError WrongElem
|
Nothing -> throwError WrongElem
|
||||||
elemToParPart ns element
|
elemToParPart ns element
|
||||||
| isElem ns "w" "r" element =
|
| isElem ns "w" "r" element =
|
||||||
|
@ -751,13 +760,21 @@ elemToExtent drawingElem =
|
||||||
childElemToRun :: NameSpaces -> Element -> D Run
|
childElemToRun :: NameSpaces -> Element -> D Run
|
||||||
childElemToRun ns element
|
childElemToRun ns element
|
||||||
| isElem ns "w" "drawing" element =
|
| isElem ns "w" "drawing" element =
|
||||||
let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
|
let mbDocPr = findChild (elemName ns "wp" "inline") element >>=
|
||||||
|
findChild (elemName ns "wp" "docPr")
|
||||||
|
alt = case mbDocPr >>= findAttr (elemName ns "" "descr") of
|
||||||
|
Just alt' -> alt'
|
||||||
|
Nothing -> ""
|
||||||
|
title = case mbDocPr >>= findAttr (elemName ns "" "title") of
|
||||||
|
Just title' -> title'
|
||||||
|
Nothing -> ""
|
||||||
|
a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
|
||||||
drawing = findElement (QName "blip" (Just a_ns) (Just "a")) element
|
drawing = findElement (QName "blip" (Just a_ns) (Just "a")) element
|
||||||
>>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
|
>>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
|
||||||
in
|
in
|
||||||
case drawing of
|
case drawing of
|
||||||
Just s -> expandDrawingId s >>=
|
Just s -> expandDrawingId s >>=
|
||||||
(\(fp, bs) -> return $ InlineDrawing fp bs $ elemToExtent element)
|
(\(fp, bs) -> return $ InlineDrawing fp title alt bs $ elemToExtent element)
|
||||||
Nothing -> throwError WrongElem
|
Nothing -> throwError WrongElem
|
||||||
childElemToRun ns element
|
childElemToRun ns element
|
||||||
| isElem ns "w" "footnoteReference" element
|
| isElem ns "w" "footnoteReference" element
|
||||||
|
|
Loading…
Reference in a new issue