From 2b003d4a6bc2b86f94859c43c9a16ccb6da9275c Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 6 Feb 2019 09:01:26 -0800
Subject: [PATCH] Handle Word files generated by Microsoft Word Online.

For some reason, Word in Office 365 Online uses `document2.xml`
for the content, instead of `document.xml`.  This causes pandoc
not to be able to parse docx.

This quick fix has the parser check for both `document.xml`
and `document2.xml`.

Addresses #5277, but a more robust solution would be to
get the name of the main document dynamically (who knows
whether it might change again?).
---
 src/Text/Pandoc/Readers/Docx/Parse.hs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 843f2cdcd..e551ca7aa 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -364,6 +364,7 @@ archiveToDocxWithWarnings archive = do
 archiveToDocument :: Archive -> D Document
 archiveToDocument zf = do
   entry <- maybeToD $ findEntryByPath "word/document.xml" zf
+             `mplus` findEntryByPath "word/document2.xml" zf -- see #5277
   docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
   let namespaces = elemToNameSpaces docElem
   bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem
@@ -478,6 +479,7 @@ archiveToComments zf =
 
 filePathToRelType :: FilePath -> Maybe DocumentLocation
 filePathToRelType "word/_rels/document.xml.rels"  = Just InDocument
+filePathToRelType "word/_rels/document2.xml.rels" = Just InDocument
 filePathToRelType "word/_rels/footnotes.xml.rels" = Just InFootnote
 filePathToRelType "word/_rels/endnotes.xml.rels"  = Just InEndnote
 filePathToRelType _                               = Nothing