Handle Word files generated by Microsoft Word Online.
For some reason, Word in Office 365 Online uses `document2.xml` for the content, instead of `document.xml`. This causes pandoc not to be able to parse docx. This quick fix has the parser check for both `document.xml` and `document2.xml`. Addresses #5277, but a more robust solution would be to get the name of the main document dynamically (who knows whether it might change again?).
This commit is contained in:
parent
59fa4eb17e
commit
2b003d4a6b
1 changed files with 2 additions and 0 deletions
|
@ -364,6 +364,7 @@ archiveToDocxWithWarnings archive = do
|
|||
archiveToDocument :: Archive -> D Document
|
||||
archiveToDocument zf = do
|
||||
entry <- maybeToD $ findEntryByPath "word/document.xml" zf
|
||||
`mplus` findEntryByPath "word/document2.xml" zf -- see #5277
|
||||
docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
|
||||
let namespaces = elemToNameSpaces docElem
|
||||
bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem
|
||||
|
@ -478,6 +479,7 @@ archiveToComments zf =
|
|||
|
||||
filePathToRelType :: FilePath -> Maybe DocumentLocation
|
||||
filePathToRelType "word/_rels/document.xml.rels" = Just InDocument
|
||||
filePathToRelType "word/_rels/document2.xml.rels" = Just InDocument
|
||||
filePathToRelType "word/_rels/footnotes.xml.rels" = Just InFootnote
|
||||
filePathToRelType "word/_rels/endnotes.xml.rels" = Just InEndnote
|
||||
filePathToRelType _ = Nothing
|
||||
|
|
Loading…
Add table
Reference in a new issue