From 10c471907693aac3e01e9550ce203834ff367de1 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 3 Sep 2021 21:50:30 -0700
Subject: [PATCH] RTF reader:  if doc begins with {\rtf1 ... } only parse its
 contents.

Some documents seem to have non-RTF (e.g. XML) material after the
`{\rtf1 ... }` group.
---
 src/Text/Pandoc/Readers/RTF.hs | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/Text/Pandoc/Readers/RTF.hs b/src/Text/Pandoc/Readers/RTF.hs
index 5e5799b49..e577ee70b 100644
--- a/src/Text/Pandoc/Readers/RTF.hs
+++ b/src/Text/Pandoc/Readers/RTF.hs
@@ -204,7 +204,13 @@ parseRTF = do
   skipMany nl
   toks <- many tok
   -- return $! traceShowId toks
-  bs <- (foldM processTok mempty toks >>= emitBlocks)
+  bs <- (case toks of
+          -- if we start with {\rtf1...}, parse that and ignore
+          -- what follows (which in certain cases can be non-RTF content)
+          tok@(Tok _ (Grouped (Tok _ (ControlWord "rtf" (Just 1)) : _))) : _
+            -> foldM processTok mempty [tok]
+          _ -> foldM processTok mempty toks)
+        >>= emitBlocks
   unclosed <- closeContainers
   let doc = B.doc $ bs <> unclosed
   kvs <- sMetadata <$> getState