Improve docx reader's robustness in extracting images.
The docx reader made a couple assumptions about how docx containers were laid out that were not always true, with the result that some images in documents did not get found/extracted. Closes #7511.
This commit is contained in:
parent
5159d6653b
commit
ef4efa5373
1 changed files with 6 additions and 5 deletions
|
@ -507,9 +507,7 @@ archiveToRelationships archive docXmlPath =
|
|||
|
||||
filePathIsMedia :: FilePath -> Bool
|
||||
filePathIsMedia fp =
|
||||
let (dir, _) = splitFileName fp
|
||||
in
|
||||
(dir == "word/media/")
|
||||
"media" `elem` splitPath (takeDirectory fp)
|
||||
|
||||
lookupLevel :: T.Text -> T.Text -> Numbering -> Maybe Level
|
||||
lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
|
||||
|
@ -774,8 +772,11 @@ expandDrawingId s = do
|
|||
target <- asks (fmap T.unpack . lookupRelationship location s . envRelationships)
|
||||
case target of
|
||||
Just filepath -> do
|
||||
bytes <- asks (lookup ("word/" ++ filepath) . envMedia)
|
||||
case bytes of
|
||||
media <- asks envMedia
|
||||
let filepath' = case filepath of
|
||||
('/':rest) -> rest
|
||||
_ -> "word/" ++ filepath
|
||||
case lookup filepath' media of
|
||||
Just bs -> return (filepath, bs)
|
||||
Nothing -> throwError DocxError
|
||||
Nothing -> throwError DocxError
|
||||
|
|
Loading…
Add table
Reference in a new issue