From ef5fad2698f3d4c1fe528f138264cc8abb3b2943 Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Mon, 23 Jun 2014 15:25:46 -0400
Subject: [PATCH 1/5] Add new typeclass, Reducible

This defines a typeclass `Reducible` which allows us to "reduce" pandoc
Inlines and Blocks, like so

    Emph [Strong [Str "foo", Space]] <++> Strong [Emph [Str "bar"]], Str
"baz"] =
        [Strong [Emph [Str "foo", Space, Str "bar"], Space, Str "baz"]]

So adjacent formattings and strings are appropriately grouped.

Another set of operators for `(Reducible a) => (Many a)` are also
included.
---
 src/Text/Pandoc/Readers/Docx/Reducible.hs | 150 ++++++++++++++++++++++
 1 file changed, 150 insertions(+)
 create mode 100644 src/Text/Pandoc/Readers/Docx/Reducible.hs

diff --git a/src/Text/Pandoc/Readers/Docx/Reducible.hs b/src/Text/Pandoc/Readers/Docx/Reducible.hs
new file mode 100644
index 000000000..1ed31ebd0
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/Reducible.hs
@@ -0,0 +1,150 @@
+{-# LANGUAGE OverloadedStrings #-}
+
+module Text.Pandoc.Readers.Docx.Reducible ((<++>),
+                                           (<+++>),
+                                           Reducible,
+                                           Container(..),
+                                           container,
+                                           innards,
+                                           reduceList,
+                                           reduceListB,
+                                           rebuild)
+       where
+
+import Text.Pandoc.Builder
+import Data.List ((\\), intersect)
+
+data Container a = Container ([a] -> a) | NullContainer
+
+instance (Eq a) => Eq (Container a) where
+  (Container x) == (Container y) = ((x []) == (y []))
+  NullContainer == NullContainer = True
+  _ == _ = False
+
+instance (Show a) => Show (Container a) where
+  show (Container x) = "Container {" ++
+                       (reverse $ drop 3 $ reverse $ show $ x []) ++
+                       "}"
+  show (NullContainer) = "NullContainer"
+
+class Reducible a where
+  (<++>) :: a -> a -> [a]
+  container :: a -> Container a
+  innards :: a -> [a]
+  isSpace :: a -> Bool
+
+(<+++>) :: (Reducible a) => Many a -> Many a -> Many a
+mr <+++> ms = fromList $ reduceList $ toList mr ++ toList ms
+
+reduceListB :: (Reducible a) => Many a -> Many a
+reduceListB = fromList . reduceList . toList
+
+reduceList' :: (Reducible a) => [a] -> [a] -> [a]
+reduceList' acc [] = acc
+reduceList' [] (x:xs) = reduceList' [x] xs
+reduceList' as (x:xs) = reduceList' (init as ++ (last as <++> x) ) xs
+
+reduceList :: (Reducible a) => [a] -> [a]
+reduceList = reduceList' []
+
+combineReducibles :: (Reducible a, Eq a) => a -> a -> [a]
+combineReducibles r s =
+  let (conts, rs) = topLevelContainers r
+      (conts', ss) = topLevelContainers s
+      shared = conts `intersect` conts'
+      remaining = conts \\ shared
+      remaining' = conts' \\ shared
+  in
+   case null shared of
+       True -> case (not . null) rs && isSpace (last rs) of
+         True -> rebuild conts (init rs) ++ [last rs, s]
+         False -> [r,s]
+       False -> rebuild 
+                shared $
+                reduceList $
+                (rebuild remaining rs) ++ (rebuild remaining' ss)
+
+instance Reducible Inline where
+  s1@(Span (id1, classes1, kvs1) ils1) <++> s2@(Span (id2, classes2, kvs2) ils2) =
+    let classes'  = classes1 `intersect` classes2
+        kvs'      = kvs1 `intersect` kvs2
+        classes1' = classes1 \\ classes'
+        kvs1'     = kvs1 \\ kvs'
+        classes2' = classes2 \\ classes'
+        kvs2'     = kvs2 \\ kvs'
+    in
+     case null classes' && null kvs' of
+       True -> [s1,s2]
+       False -> let attr'  = ("", classes', kvs')
+                    attr1' = (id1, classes1', kvs1')
+                    attr2' = (id2, classes2', kvs2')
+                    s1' = case null classes1' && null kvs1' of
+                      True -> ils1
+                      False -> [Span attr1' ils1]
+                    s2' = case null classes2' && null kvs2' of
+                      True -> ils2
+                      False -> [Span attr2' ils2]
+                in
+                 [Span attr' $ reduceList $ s1' ++ s2']
+
+  (Str x) <++> (Str y) = [Str (x++y)]
+  il <++> il' = combineReducibles il il'
+
+  container (Emph _) = Container Emph
+  container (Strong _) = Container Strong
+  container (Strikeout _) = Container Strikeout
+  container (Subscript _) = Container Subscript
+  container (Superscript _) = Container Superscript
+  container (Quoted qt _) = Container $ Quoted qt
+  container (Cite cs _) = Container $ Cite cs
+  container (Span attr _) = Container $ Span attr
+  container _ = NullContainer
+
+  innards (Emph ils) = ils
+  innards (Strong ils) = ils
+  innards (Strikeout ils) = ils
+  innards (Subscript ils) = ils
+  innards (Superscript ils) = ils
+  innards (Quoted _ ils) = ils
+  innards (Cite _ ils) = ils
+  innards (Span _ ils) = ils
+  innards _ = []
+
+  isSpace Space = True
+  isSpace _     = False
+
+instance Reducible Block where
+  (Div (ident, classes, kvs) blks) <++> blk | "list-item" `elem` classes = 
+    [Div (ident, classes, kvs) (reduceList blks), blk]
+
+  blk <++> blk' = combineReducibles blk blk'
+
+  container (BlockQuote _) = Container BlockQuote
+  container (Div attr _) = Container $ Div attr
+  container _            = NullContainer
+
+  innards (BlockQuote bs) = bs
+  innards (Div _ bs) = bs
+  innards _          = []
+
+  isSpace _          = False
+
+
+topLevelContainers' :: (Reducible a) => [a] -> ([Container a], [a])
+topLevelContainers' (r : []) = case container r of
+  NullContainer -> ([], [r])
+  _             ->
+    let (conts, inns) = topLevelContainers' (innards r)
+    in
+    ((container r) : conts, inns)
+topLevelContainers' rs = ([], rs)
+
+topLevelContainers :: (Reducible a) => a -> ([Container a], [a])
+topLevelContainers il = topLevelContainers' [il]
+
+rebuild :: [Container a] -> [a] -> [a]
+rebuild [] xs = xs
+rebuild ((Container f) : cs) xs = rebuild cs $ [f xs]
+rebuild (NullContainer : cs) xs = rebuild cs $ xs
+  
+  

From 94d0fb15382a4855938c540c9e521642bccc00e3 Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Mon, 23 Jun 2014 15:27:01 -0400
Subject: [PATCH 2/5] Move some of the clean-up logic into List module.

This will allow us to get rid of more general functions we no longer need in
the main reader.
---
 src/Text/Pandoc/Readers/Docx/Lists.hs | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs
index 68559d98b..1e37d0076 100644
--- a/src/Text/Pandoc/Readers/Docx/Lists.hs
+++ b/src/Text/Pandoc/Readers/Docx/Lists.hs
@@ -29,9 +29,12 @@ Functions for converting flat docx paragraphs into nested lists.
 -}
 
 module Text.Pandoc.Readers.Docx.Lists ( blocksToBullets
-                                      , blocksToDefinitions) where
+                                      , blocksToDefinitions
+                                      , listParagraphDivs
+                                      ) where
 
 import Text.Pandoc.JSON
+import Text.Pandoc.Generic (bottomUp)
 import Text.Pandoc.Shared (trim)
 import Control.Monad
 import Data.List
@@ -159,10 +162,9 @@ flatToBullets elems = flatToBullets' (-1) elems
 
 blocksToBullets :: [Block] -> [Block]
 blocksToBullets blks =
-  -- bottomUp removeListItemDivs $ 
+  bottomUp removeListDivs $ 
   flatToBullets $ (handleListParagraphs blks)
 
-
 plainParaInlines :: Block -> [Inline]
 plainParaInlines (Plain ils) = ils
 plainParaInlines (Para ils) = ils
@@ -199,6 +201,23 @@ blocksToDefinitions' [] acc (b:blks) =
 blocksToDefinitions' defAcc acc (b:blks) =
   blocksToDefinitions' [] (b : (DefinitionList (reverse defAcc)) : acc) blks
 
+removeListDivs' :: Block -> [Block]
+removeListDivs' (Div (ident, classes, kvs) blks)
+  | "list-item" `elem` classes =
+    case delete "list-item" classes of
+      [] -> blks
+      classes' -> [Div (ident, classes', kvs) $ blks]
+removeListDivs' (Div (ident, classes, kvs) blks)
+  | not $ null $ listParagraphDivs `intersect` classes =
+    case classes \\ listParagraphDivs of
+      [] -> blks
+      classes' -> [Div (ident, classes', kvs) blks]
+removeListDivs' blk = [blk]
+
+removeListDivs :: [Block] -> [Block]
+removeListDivs = concatMap removeListDivs'
+  
+
 
 blocksToDefinitions :: [Block] -> [Block]
 blocksToDefinitions = blocksToDefinitions' [] []

From 11b0778744d0eeb61e2502e452d010631fab979b Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Mon, 23 Jun 2014 15:27:55 -0400
Subject: [PATCH 3/5] Use Reducible in docx reader.

This cleans up them implementation, and cuts down on tree-walking.
Anecdotally, I've seen about a 3-fold speedup.
---
 src/Text/Pandoc/Readers/Docx.hs | 378 +++++++++-----------------------
 1 file changed, 108 insertions(+), 270 deletions(-)

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 09c2330fb..ffe7f5a92 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -79,8 +79,10 @@ import Text.Pandoc.Builder (text, toList)
 import Text.Pandoc.Generic (bottomUp)
 import Text.Pandoc.MIME (getMimeType)
 import Text.Pandoc.UTF8 (toString)
+import Text.Pandoc.Walk
 import Text.Pandoc.Readers.Docx.Parse
 import Text.Pandoc.Readers.Docx.Lists
+import Text.Pandoc.Readers.Docx.Reducible
 import Data.Maybe (mapMaybe, isJust, fromJust)
 import Data.List (delete, isPrefixOf, (\\), intersect)
 import qualified Data.ByteString as BS
@@ -96,28 +98,65 @@ readDocx opts bytes =
     Just docx -> Pandoc nullMeta (docxToBlocks opts docx)
     Nothing   -> error $ "couldn't parse docx file"
 
-runStyleToSpanAttr :: RunStyle -> (String, [String], [(String, String)])
-runStyleToSpanAttr rPr = ("",
-                          mapMaybe id [
-                            if isBold rPr then (Just "strong") else Nothing,
-                            if isItalic rPr then (Just "emph") else Nothing,
-                            if isSmallCaps rPr then (Just "smallcaps") else Nothing,
-                            if isStrike rPr then (Just "strike") else Nothing,
-                            if isSuperScript rPr then (Just "superscript") else Nothing,
-                            if isSubScript rPr then (Just "subscript") else Nothing,
-                            rStyle rPr],
-                          case underline rPr of
-                            Just fmt -> [("underline", fmt)]
-                            _        -> []
-                         )
+spansToKeep :: [String]
+spansToKeep = ["list-item", "Definition", "DefinitionTerm"] ++ codeSpans
 
-parStyleToDivAttr :: ParagraphStyle -> (String, [String], [(String, String)])
-parStyleToDivAttr pPr = ("",
-                          pStyle pPr,
-                          case indent pPr of
-                            Just n  -> [("indent", (show n))]
-                            Nothing -> []
-                         )
+
+-- This is empty, but we put it in for future-proofing.
+divsToKeep :: [String]
+divsToKeep = []
+
+runStyleToContainers :: RunStyle -> [Container Inline]
+runStyleToContainers rPr =
+  let formatters = mapMaybe id
+                 [ if isBold rPr then (Just Strong) else Nothing
+                 , if isItalic rPr then (Just Emph) else Nothing
+                 , if isSmallCaps rPr then (Just SmallCaps) else Nothing
+                 , if isStrike rPr then (Just Strikeout) else Nothing
+                 , if isSuperScript rPr then (Just Superscript) else Nothing
+                 , if isSubScript rPr then (Just Subscript) else Nothing
+                 , rStyle rPr >>=
+                   (\s -> if s `elem` spansToKeep then Just s else Nothing) >>=
+                   (\s -> Just $ Span ("", [s], []))
+                 , underline rPr >>= (\f -> Just $ Span ("", [], [("underline", f)]))
+                 ]
+  in
+   map Container formatters
+
+
+divAttrToContainers :: [String] -> [(String, String)] -> [Container Block]
+divAttrToContainers [] [] = []
+divAttrToContainers (c:cs) _ | isJust (isHeaderClass c) =
+  let n = fromJust (isHeaderClass c)
+  in
+   [(Container $ \blks ->
+      Header n ("", delete ("Heading" ++ show n) cs, []) (blksToInlines blks))]
+divAttrToContainers (c:_) _ | c `elem` codeDivs =
+  [Container $ \blks -> CodeBlock ("", [], []) (concatMap blkToCode blks)]
+divAttrToContainers (c:cs) kvs | c `elem` listParagraphDivs =
+  let kvs' = filter (\(k,_) -> k /= "indent") kvs
+  in
+   (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs')
+divAttrToContainers (c:cs) kvs | c `elem` blockQuoteDivs =
+  (Container BlockQuote) : (divAttrToContainers (cs \\ blockQuoteDivs) kvs)
+divAttrToContainers (c:cs) kvs | c `elem` divsToKeep =
+  (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs)
+divAttrToContainers (_:cs) kvs = divAttrToContainers cs kvs
+divAttrToContainers [] (kv:kvs) | fst kv == "indent" =
+  (Container BlockQuote) : divAttrToContainers [] kvs
+divAttrToContainers [] (_:kvs) =
+  divAttrToContainers [] kvs
+
+
+parStyleToContainers :: ParagraphStyle -> [Container Block]
+parStyleToContainers pPr =
+  let classes = pStyle pPr
+      kvs = case indent pPr of
+        Just n -> [("indent", show n)]
+        Nothing -> []
+  in
+   divAttrToContainers classes kvs
+  
 
 strToInlines :: String -> [Inline]
 strToInlines = toList . text
@@ -144,103 +183,42 @@ runElemToString (Tab) = ['\t']
 runElemsToString :: [RunElem] -> String
 runElemsToString = concatMap runElemToString
 
---- We use this instead of the more general
---- Text.Pandoc.Shared.normalize for reasons of efficiency. For
---- whatever reason, `normalize` makes a run take almost twice as
---- long. (It does more, but this does what we need)
-inlineNormalize :: [Inline] -> [Inline]
-inlineNormalize [] = []
-inlineNormalize (Str "" : ils) = inlineNormalize ils
-inlineNormalize ((Str s) : (Str s') : l) =
-  inlineNormalize (Str (s++s') : l)
-inlineNormalize ((Emph ils) : (Emph ils') : l) =
-  inlineNormalize $ (Emph $ inlineNormalize (ils ++ ils')) : l
-inlineNormalize ((Emph ils) : l) =
-  Emph (inlineNormalize ils) : (inlineNormalize l)
-inlineNormalize ((Strong ils) : (Strong ils') : l) = 
-  inlineNormalize $ (Strong $ inlineNormalize (ils ++ ils')) : l
-inlineNormalize ((Strong ils) : l) =
-  Strong (inlineNormalize ils) : (inlineNormalize l)
-inlineNormalize ((Strikeout ils) : (Strikeout ils') : l) = 
-  inlineNormalize $ (Strikeout $ inlineNormalize (ils ++ ils')) : l
-inlineNormalize ((Strikeout ils) : l) =
-  Strikeout (inlineNormalize ils) : (inlineNormalize l)
-inlineNormalize ((Superscript ils) : (Superscript ils') : l) = 
-  inlineNormalize $ (Superscript $ inlineNormalize (ils ++ ils')) : l
-inlineNormalize ((Superscript ils) : l) =
-  Superscript (inlineNormalize ils) : (inlineNormalize l)
-inlineNormalize ((Subscript ils) : (Subscript ils') : l) = 
-  inlineNormalize $ (Subscript $ inlineNormalize (ils ++ ils')) : l
-inlineNormalize ((Subscript ils) : l) =
-  Subscript (inlineNormalize ils) : (inlineNormalize l)
-inlineNormalize ((Space : Space : l)) =
-  inlineNormalize $ (Space : l)
-inlineNormalize ((Quoted qt ils) : l) =
-  Quoted qt (inlineNormalize ils) : inlineNormalize l
-inlineNormalize ((Cite cits ils) : l) =
-  let
-    f :: Citation -> Citation
-    f (Citation s pref suff mode num hash) =
-      Citation s (inlineNormalize pref) (inlineNormalize suff) mode num hash
-  in
-   Cite (map f cits) (inlineNormalize ils) : (inlineNormalize l)
-inlineNormalize ((Link ils s) : l) =
-  Link (inlineNormalize ils) s : (inlineNormalize l)
-inlineNormalize ((Image ils s) : l) =
-  Image (inlineNormalize ils) s : (inlineNormalize l)
-inlineNormalize ((Note blks) : l) =
-  Note (map blockNormalize blks) : (inlineNormalize l)
-inlineNormalize ((Span attr ils) : l) =
-  Span attr (inlineNormalize ils) : (inlineNormalize l)
-inlineNormalize (il : l) = il : (inlineNormalize l)
 
-stripSpaces :: [Inline] -> [Inline]
-stripSpaces ils =
-  reverse $ dropWhile (Space ==) $ reverse $ dropWhile (Space ==) ils
+inlineCodeContainer :: Container Inline -> Bool
+inlineCodeContainer (Container f) = case f [] of
+  Span (_, classes, _) _ -> (not . null) (classes `intersect` codeSpans)
+  _         -> False
+inlineCodeContainer _ = False
 
-blockNormalize :: Block -> Block
-blockNormalize (Plain ils) = Plain $ stripSpaces $ inlineNormalize ils
-blockNormalize (Para ils) = Para $ stripSpaces $ inlineNormalize ils
-blockNormalize (Header n attr ils) =
-  Header n attr $ stripSpaces $ inlineNormalize ils
-blockNormalize (Table ils align width hdr cells) =
-  Table (stripSpaces $ inlineNormalize ils) align width hdr cells
-blockNormalize (DefinitionList pairs) =
-  DefinitionList $ map (\(ils, blklsts) -> (stripSpaces (inlineNormalize ils), (map (map blockNormalize) blklsts))) pairs
-blockNormalize (BlockQuote blks) = BlockQuote (map blockNormalize blks)
-blockNormalize (OrderedList attr blkslst) =
-  OrderedList attr $ map (\blks -> map blockNormalize blks) blkslst
-blockNormalize (BulletList blkslst) =
-  BulletList $ map (\blks -> map blockNormalize blks) blkslst
-blockNormalize (Div attr blks) = Div attr (map blockNormalize blks)
-blockNormalize blk = blk
+-- blockCodeContainer :: Container Block -> Bool
+-- blockCodeContainer (Container f) = case f [] of
+--   Div (ident, classes, kvs) _ -> (not . null) (classes `intersect` codeDivs)
+--   _         -> False
+-- blockCodeContainer _ = False
 
 runToInlines :: ReaderOptions -> Docx -> Run -> [Inline]
 runToInlines _ _ (Run rs runElems)
-  | isJust (rStyle rs) && (fromJust (rStyle rs)) `elem` codeSpans =
-    case runStyleToSpanAttr rs == ("", [], []) of
-      True -> [Str (runElemsToString runElems)]
-      False -> [Span (runStyleToSpanAttr rs) [Str (runElemsToString runElems)]]
-  | otherwise = case runStyleToSpanAttr rs == ("", [], []) of
-      True -> concatMap runElemToInlines runElems
-      False -> [Span (runStyleToSpanAttr rs) (concatMap runElemToInlines runElems)]
+  | any inlineCodeContainer (runStyleToContainers rs) =
+      rebuild (runStyleToContainers rs) $ [Str $ runElemsToString runElems]
+  | otherwise =
+      rebuild (runStyleToContainers rs) (concatMap runElemToInlines runElems)
 runToInlines opts docx@(Docx _ notes _ _ _ ) (Footnote fnId) =
   case (getFootNote fnId notes) of
     Just bodyParts ->
-      [Note [Div ("", ["footnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]]
+      [Note (concatMap (bodyPartToBlocks opts docx) bodyParts)]
     Nothing        ->
-      [Note [Div ("", ["footnote"], []) []]]
+      [Note []]
 runToInlines opts docx@(Docx _ notes _ _ _) (Endnote fnId) =
   case (getEndNote fnId notes) of
     Just bodyParts ->
-      [Note [Div ("", ["endnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]]
+      [Note (concatMap (bodyPartToBlocks opts docx) bodyParts)]
     Nothing        ->
-      [Note [Div ("", ["endnote"], []) []]]
+      [Note []]
 
 parPartToInlines :: ReaderOptions -> Docx -> ParPart -> [Inline]
 parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r
-parPartToInlines _ _ (BookMark _ anchor) =
-  [Span (anchor, ["anchor"], []) []]
+parPartToInlines _ _ (BookMark _ anchor) | anchor `elem` dummyAnchors = []
+parPartToInlines _ _ (BookMark _ anchor) = [Span (anchor, ["anchor"], []) []]
 parPartToInlines _ (Docx _ _ _ rels _) (Drawing relid) =
   case lookupRelationship relid rels of
     Just target -> [Image [] (combine "word" target, "")]
@@ -276,7 +254,6 @@ makeHeaderAnchors h@(Header n (_, classes, kvs) ils) =
     _ -> h
 makeHeaderAnchors blk = blk
 
-
 parPartsToInlines :: ReaderOptions -> Docx -> [ParPart] -> [Inline]
 parPartsToInlines opts docx parparts =
   --
@@ -284,23 +261,32 @@ parPartsToInlines opts docx parparts =
   -- not mandatory.
   --
   (if False -- TODO depend on option
-      then bottomUp (makeImagesSelfContained docx)
+      then walk (makeImagesSelfContained docx)
       else id) $
-  bottomUp spanTrim $
-  bottomUp spanCorrect $
-  bottomUp spanReduce $
-  concatMap (parPartToInlines opts docx) parparts
+  -- bottomUp spanTrim $
+  -- bottomUp spanCorrect $
+  -- bottomUp spanReduce $
+  reduceList $ concatMap (parPartToInlines opts docx) parparts
 
 cellToBlocks :: ReaderOptions -> Docx -> Cell -> [Block]
-cellToBlocks opts docx (Cell bps) = map (bodyPartToBlock opts docx) bps
+cellToBlocks opts docx (Cell bps) = concatMap (bodyPartToBlocks opts docx) bps
 
 rowToBlocksList :: ReaderOptions -> Docx -> Row -> [[Block]]
 rowToBlocksList opts docx (Row cells) = map (cellToBlocks opts docx) cells
 
-bodyPartToBlock :: ReaderOptions -> Docx -> BodyPart -> Block
-bodyPartToBlock opts docx (Paragraph pPr parparts) =
-  Div (parStyleToDivAttr pPr) [Para (parPartsToInlines opts docx parparts)]
-bodyPartToBlock opts docx@(Docx _ _ numbering _ _) (ListItem pPr numId lvl parparts) =
+bodyPartToBlocks :: ReaderOptions -> Docx -> BodyPart -> [Block]
+bodyPartToBlocks opts docx (Paragraph pPr parparts) =
+  case parPartsToInlines opts docx parparts of
+    [] ->
+      []
+    _ ->
+      let parContents = parPartsToInlines opts docx parparts
+          trimmedContents = reverse $ dropWhile (Space ==) $ reverse $ dropWhile (Space ==) parContents
+      in
+       rebuild
+       (parStyleToContainers pPr)
+       [Para trimmedContents]
+bodyPartToBlocks opts docx@(Docx _ _ numbering _ _) (ListItem pPr numId lvl parparts) =
   let
     kvs = case lookupLevel numId lvl numbering of
       Just (_, fmt, txt, Just start) -> [ ("level", lvl)
@@ -317,12 +303,12 @@ bodyPartToBlock opts docx@(Docx _ _ numbering _ _) (ListItem pPr numId lvl parpa
                                         ]
       Nothing                        -> []
   in
-   Div
-   ("", ["list-item"], kvs)
-   [bodyPartToBlock opts docx (Paragraph pPr parparts)]
-bodyPartToBlock _ _ (Tbl _ _ _ []) =
-  Para []
-bodyPartToBlock opts docx (Tbl cap _ look (r:rs)) =
+   [Div
+    ("", ["list-item"], kvs)
+    (bodyPartToBlocks opts docx (Paragraph pPr parparts))]
+bodyPartToBlocks _ _ (Tbl _ _ _ []) =
+  [Para []]
+bodyPartToBlocks opts docx (Tbl cap _ look (r:rs)) =
   let caption = strToInlines cap
       (hdr, rows) = case firstRowFormatting look of
         True -> (Just r, rs)
@@ -344,7 +330,8 @@ bodyPartToBlock opts docx (Tbl cap _ look (r:rs)) =
       alignments = take size (repeat AlignDefault)
       widths = take size (repeat 0) :: [Double]
   in
-   Table caption alignments widths hdrCells cells
+   [Table caption alignments widths hdrCells cells]
+
 
 makeImagesSelfContained :: Docx -> Inline -> Inline
 makeImagesSelfContained (Docx _ _ _ _ media) i@(Image alt (uri, title)) =
@@ -360,127 +347,19 @@ makeImagesSelfContained _ inline = inline
 
 bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block]
 bodyToBlocks opts docx (Body bps) =
-  bottomUp removeEmptyPars $
-  map blockNormalize $
-  bottomUp spanRemove $
-  bottomUp divRemove $
   map (makeHeaderAnchors) $
-  bottomUp divCorrect $
-  bottomUp divReduce $
-  bottomUp divCorrectPreReduce $
   bottomUp blocksToDefinitions $
   blocksToBullets $
-  map (bodyPartToBlock opts docx) bps
+  concatMap (bodyPartToBlocks opts docx) bps
 
 docxToBlocks :: ReaderOptions -> Docx -> [Block]
 docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) = bodyToBlocks opts d body
 
-spanReduce :: [Inline] -> [Inline]
-spanReduce [] = []
-spanReduce ((Span (id1, classes1, kvs1) ils1) : ils)
-  | (id1, classes1, kvs1) == ("", [], []) = ils1 ++ (spanReduce ils)
-spanReduce (s1@(Span (id1, classes1, kvs1) ils1) :
-            s2@(Span (id2, classes2, kvs2) ils2) :
-            ils) =
-  let classes'  = classes1 `intersect` classes2
-      kvs'      = kvs1 `intersect` kvs2
-      classes1' = classes1 \\ classes'
-      kvs1'     = kvs1 \\ kvs'
-      classes2' = classes2 \\ classes'
-      kvs2'     = kvs2 \\ kvs'
-  in
-   case null classes' && null kvs' of
-     True -> s1 : (spanReduce (s2 : ils))
-     False -> let attr'  = ("", classes', kvs')
-                  attr1' = (id1, classes1', kvs1')
-                  attr2' = (id2, classes2', kvs2')
-              in
-               spanReduce (Span attr' [(Span attr1' ils1), (Span attr2' ils2)] :
-                           ils)
-spanReduce (il:ils) = il : (spanReduce ils)
 
 ilToCode :: Inline -> String
 ilToCode (Str s) = s
 ilToCode _ = ""
 
-spanRemove' :: Inline -> [Inline]
-spanRemove' s@(Span (ident, classes, _) [])
-  -- "_GoBack" is automatically inserted. We don't want to keep it.
-  | classes == ["anchor"] && not (ident `elem` dummyAnchors) = [s]
-spanRemove' (Span (_, _, kvs) ils) =
-  case lookup "underline" kvs of
-    Just val -> [Span ("", [], [("underline", val)]) ils]
-    Nothing  -> ils
-spanRemove' il = [il]
-
-spanRemove :: [Inline] -> [Inline]
-spanRemove = concatMap spanRemove'
-
-spanTrim' :: Inline -> [Inline]
-spanTrim' il@(Span _ []) = [il]
-spanTrim' il@(Span attr (il':[]))
-  | il' == Space = [Span attr [], Space]
-  | otherwise = [il]
-spanTrim' (Span attr ils)
-  | head ils == Space && last ils == Space =
-    [Space, Span attr (init $ tail ils), Space]
-  | head ils == Space = [Space, Span attr (tail ils)]
-  | last ils == Space = [Span attr (init ils), Space]
-spanTrim' il = [il]
-
-spanTrim :: [Inline] -> [Inline]
-spanTrim = concatMap spanTrim'
-
-spanCorrect' :: Inline -> [Inline]
-spanCorrect' (Span ("", [], []) ils) = ils
-spanCorrect' (Span (ident, classes, kvs) ils)
-  | "emph" `elem` classes =
-    [Emph $ spanCorrect' $ Span (ident, (delete "emph" classes), kvs) ils]
-  | "strong" `elem` classes =
-      [Strong $ spanCorrect' $ Span (ident, (delete "strong" classes), kvs) ils]
-  | "smallcaps" `elem` classes =
-      [SmallCaps $ spanCorrect' $ Span (ident, (delete "smallcaps" classes), kvs) ils]
-  | "strike" `elem` classes =
-      [Strikeout $ spanCorrect' $ Span (ident, (delete "strike" classes), kvs) ils]
-  | "superscript" `elem` classes =
-      [Superscript $ spanCorrect' $ Span (ident, (delete "superscript" classes), kvs) ils]
-  | "subscript" `elem` classes =
-      [Subscript $ spanCorrect' $ Span (ident, (delete "subscript" classes), kvs) ils]
-  | (not . null) (codeSpans `intersect` classes) =
-         [Code (ident, (classes \\ codeSpans), kvs) (init $ unlines $ map ilToCode ils)]
-  | otherwise =
-      [Span (ident, classes, kvs) ils]
-spanCorrect' il = [il]
-
-spanCorrect :: [Inline] -> [Inline]
-spanCorrect = concatMap spanCorrect'
-
-removeEmptyPars :: [Block] -> [Block]
-removeEmptyPars blks = filter (\b -> b /= (Para [])) blks
-
-divReduce :: [Block] -> [Block]
-divReduce [] = []
-divReduce ((Div (id1, classes1, kvs1) blks1) : blks)
-  | (id1, classes1, kvs1) == ("", [], []) = blks1 ++ (divReduce blks)
-divReduce (d1@(Div (id1, classes1, kvs1) blks1) :
-           d2@(Div (id2, classes2, kvs2) blks2) :
-            blks) =
-  let classes'  = classes1 `intersect` classes2
-      kvs'      = kvs1 `intersect` kvs2
-      classes1' = classes1 \\ classes'
-      kvs1'     = kvs1 \\ kvs'
-      classes2' = classes2 \\ classes'
-      kvs2'     = kvs2 \\ kvs'
-  in
-   case null classes' && null kvs' of
-     True -> d1 : (divReduce (d2 : blks))
-     False -> let attr'  = ("", classes', kvs')
-                  attr1' = (id1, classes1', kvs1')
-                  attr2' = (id2, classes2', kvs2')
-              in
-               divReduce (Div attr' [(Div attr1' blks1), (Div attr2' blks2)] :
-                           blks)
-divReduce (blk:blks) = blk : (divReduce blks)
 
 isHeaderClass :: String -> Maybe Int
 isHeaderClass s | "Heading" `isPrefixOf` s =
@@ -490,27 +369,12 @@ isHeaderClass s | "Heading" `isPrefixOf` s =
     _       -> Nothing
 isHeaderClass _ = Nothing
 
-findHeaderClass :: [String] -> Maybe Int
-findHeaderClass ss = case mapMaybe id $ map isHeaderClass ss of
-  [] -> Nothing
-  n : _ -> Just n
 
 blksToInlines :: [Block] -> [Inline]
 blksToInlines (Para ils : _) = ils
 blksToInlines (Plain ils : _) = ils
 blksToInlines _ = []
 
-divCorrectPreReduce' :: Block -> [Block]
-divCorrectPreReduce' (Div (ident, classes, kvs) blks)
-  | isJust $ findHeaderClass classes =
-    let n = fromJust $ findHeaderClass classes
-    in
-    [Header n (ident, delete ("Heading" ++ (show n)) classes, kvs) (blksToInlines blks)]
-  | otherwise = [Div (ident, classes, kvs) blks]
-divCorrectPreReduce' blk = [blk]
-
-divCorrectPreReduce :: [Block] -> [Block]
-divCorrectPreReduce = concatMap divCorrectPreReduce'
 
 blkToCode :: Block -> String
 blkToCode (Para []) = ""
@@ -520,29 +384,3 @@ blkToCode (Para ((Span (_, classes, _) ils'): ils))
     (init $ unlines $ map ilToCode ils') ++ (blkToCode (Para ils))
 blkToCode _ = ""
 
-divRemove' :: Block -> [Block]
-divRemove' (Div (_, _, kvs) blks) =
-  case lookup "indent" kvs of
-    Just val -> [Div ("", [], [("indent", val)]) blks]
-    Nothing  -> blks
-divRemove' blk = [blk]
-
-divRemove :: [Block] -> [Block]
-divRemove = concatMap divRemove'
-
-divCorrect' :: Block -> [Block]
-divCorrect' b@(Div (ident, classes, kvs) blks)
-  | (not . null) (blockQuoteDivs `intersect` classes) =
-    [BlockQuote [Div (ident, classes \\ blockQuoteDivs, kvs) blks]]
-  | (not . null) (codeDivs `intersect` classes) =
-    [CodeBlock (ident, (classes \\ codeDivs), kvs) (init $ unlines $ map blkToCode blks)]
-  | otherwise =
-      case lookup "indent" kvs of
-        Just "0" -> [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks]
-        Just _   ->
-          [BlockQuote [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks]]
-        Nothing  -> [b]
-divCorrect' blk = [blk]
-
-divCorrect :: [Block] -> [Block]
-divCorrect = concatMap divCorrect'

From 8517a4f2e59dc2ecf20a96eedfa5b2cc47f0aeba Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Mon, 23 Jun 2014 15:29:04 -0400
Subject: [PATCH 4/5] Add Reducible to cabal file.

---
 pandoc.cabal | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandoc.cabal b/pandoc.cabal
index e35a76caf..634d249fe 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -327,6 +327,7 @@ Library
                    Text.Pandoc.SelfContained,
                    Text.Pandoc.Process
   Other-Modules:   Text.Pandoc.Readers.Docx.Lists,
+                   Text.Pandoc.Readers.Docx.Reducible,
                    Text.Pandoc.Readers.Docx.Parse,
                    Text.Pandoc.Writers.Shared,
                    Text.Pandoc.Asciify,

From 9b954fa855158d99b4ddba7c3ffe7f2fed7ce25f Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Mon, 23 Jun 2014 15:40:34 -0400
Subject: [PATCH 5/5] Add test for correctly trimming spaces in formatting.

This used to be fixed in the tree-walking. We need to make sure we're doing it
right now.
---
 tests/Tests/Readers/Docx.hs                     |   4 ++++
 tests/docx.trailing_spaces_in_formatting.docx   | Bin 0 -> 12916 bytes
 tests/docx.trailing_spaces_in_formatting.native |   1 +
 3 files changed, 5 insertions(+)
 create mode 100644 tests/docx.trailing_spaces_in_formatting.docx
 create mode 100644 tests/docx.trailing_spaces_in_formatting.native

diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs
index ffb079eee..a42dc31e9 100644
--- a/tests/Tests/Readers/Docx.hs
+++ b/tests/Tests/Readers/Docx.hs
@@ -82,6 +82,10 @@ tests = [ testGroup "inlines"
             "normalizing inlines deep inside blocks"
             "docx.deep_normalize.docx"
             "docx.deep_normalize.native"
+          , testCompare
+            "move trailing spaces outside of formatting"
+            "docx.trailing_spaces_in_formatting.docx"
+            "docx.trailing_spaces_in_formatting.native"
           ]
         , testGroup "blocks"
           [ testCompare
diff --git a/tests/docx.trailing_spaces_in_formatting.docx b/tests/docx.trailing_spaces_in_formatting.docx
new file mode 100644
index 0000000000000000000000000000000000000000..ebe7404a93a6a0e2fb0695a04f857841b10b2a2e
GIT binary patch
literal 12916
zcmeHtWmH_*(sm<3gImx9cL{;u1P$&kK^u2(EV#Qn!JW{!LvRfa!JPoX-QnxZz4OjY
zX4YNb|9ej_`s}s()YHZ8lBaf+yc7&9HUJKQ0000;0f-bqzCKU@K;$z301JQsttD(@
z?Pz4}sH^O1Yvk~O31nsYDhC#tCJO)!dH#RbfAJ2~#LLL}37`iaQ9Z&}slhC`vGm4Y
zW?dA?l%2FN3D5O^1e)Ak91%-Om1O5@wnl$pJ-Qo4e&aVTnKTZ~OJ|`tBj?5dO>R{%
z%wSY8*>{~N`oe!e#0(|R5~crr*HBo8;{kxY<ZW^l8LUem*C2Gwmu_wr$5^cj(JZZZ
z8?yaquVFqe?Je1w$pEm{<;}ort5Ei@nT6SO#y3*Cg+mycDs;<Q6sgkb6R>^6Z#sc1
z+Uw=QLJ4UuV0qLE1ius2pdXvyj=IB?oD(`fQ!)<_W-80|#BI2V>InNqG*Ob-T8-;x
z3bu=c)H79RGNaE!d0gW{Da22LsL%H>kB?~0m#_|2lxcchQDga?T}TKvN_O+=W=jv8
z{m^Ohl%(4peeKkC8dMHN)I0Bn+7<RgTzog#2$W8AIuS#jSi;{oVUdlsz^rKBnK)Pi
zBQ+*~(k)KncU@082TQ(1C58lC@ccBUsv--)?2$F@BZp`u2Z@3+>t8t*9Ip`pfTt%I
zfc#%Pmn4Ru{tRM{vJl5ahIp>7y^*B@Gt<xa|9R|xv77$sUk{I6kc1I@9(eR@MZe#Z
zV*!b5Vs2SH+l6co#y@2r$^~^eZ@#TH_f?bpg3kBz?IF3DG{3Ydo<d%@3Vn^~tl2a@
zu@wuDRecz|yhguPs;>!(*a+Cn+Gd8CHzq!tH~e*J&U$sAAO$~}@f&cic0hlda#J*s
z1fo(@%rN0acxh8x-?<u0YD&Sp28x-<kw9&q`di%@)DIj4ACZiYW`{y>`r-9$=9moV
zEG2sFBg|<p7ziv`MkzC549bw+uA1dcUx+;EhD8Q2AZ8<mjGlQorNe0IpgGRvxb{`>
zyd>3N!1Z*$6ZWfSHsy!pu)q6=2Tu|g8RU~j7yy6>fB@xUV-IBhm!kk}44teXiSB2H
z``u}vAZZTr^ndqN7AGU;&+@M@Khcb>`XOSj3XgrR^z+FgqmrqAqGK=GV=GQI(qIle
zlZ<g{+vuc3vu69W$~iPiM73|eT_+iMaH|*Bm(S8hj!DCx!itIBL=cP{U0%JBJ58({
z+eykG<EJ2ANuw??rEM2k;;(72PXN{z<I$Bu%4Y_VPscHMGsgl4Jm2{l`3;-KA%Ew%
zp$)6E!s_=j;VF;swJ6NA0JHh7B8aRRzhKR0_J&<>SzjHvZs=V17P5+W*<tG$oW75H
z6l{H=6HZ=>vznWI`rbl*s_$tr^;8@U6dr!_GIyuHRnYMQ`y&^1W+$?bjhou3wM-$=
z$hPCy$b{-)NoJp#h2*-WY>3+I(vw?4>vtW0G=A+%)PJ%uvnw9oa7Ya1ga-gHAg1%1
zjs4lW@>FD^=2_5N)qZ$^J%V5dKhi*}TKP<q&}AQ9YUB4wFc?G`5h~v9@r>X(YLw3|
zNf9l)u~|+#nVoEYS3=>gp8?C4o2xG!GDf6We1`vUQA4n(xXwFCOD`CP&xas(^!?L2
znNo^pZAwD!zUcFm32SdOK{l~8_ATh4joGn_zJ9}KTtx^~()JiCdXuI^QR}y(>Z_ex
z;a3^Ed=51MO53J-{=Mq>>OC!lzF*0E_MJ`Of~}L22sI?V1op3+{Cx12G%{-Z;rOyw
zW7+)Yv(LaLbUMKcAg1}}QyAp*lAC~eoCzYM7CaP6T;lu(3$n|S{e8MOsp&m?l)8$3
z0Y*59>j)sYD<Rg2UWo)55+Tnh70*b{P+hD^RSJP~BBtzPui?lL1H~(rBdM=u_Lb!P
z_H9Ru`_giK{ZZL`J62IR1ZOD2>w8USviXf1Q0cXYh8^KqA}5=q-}L4RMBY!E3U!9$
zIkYgmjV9?d6;D_-ihE2S=hR`UpsJ^zXDqM0SO?X%;4|VqtP24x)o$Z50ET;`P57!=
zt-{el<~3NuHx;t11YZj=G~d3zU`*ib;$(fJ%Bg?lJM!t+{?!iw-+Y_>6@V_~Y3!@7
z0b7+72(Bi1Sfmd77@e%l3?~7vYAHB8sPtgKC@vehCNqZSz)-~JT+`C}&>vU2IC|?z
zMYGu=4ipPpjmU@mt*9GxwH6zTEIYb3qrAr3I<J7dnN#}6_w7wRPuw{JUSRK?j@8?|
z*OjcyV2>q7m}Nii5viDreO%vs=%}Hu4S}NomX{A~D>rWQfgAFH8{@d|8m1VIADZe}
zO_}m>Eq1x8?(IJu8MF~DU8I<zGJkfGTfTn@G0eY*N0udF+9EgrAde6L!1>*D9Zij_
zjF^9WfBiE!?rX@|EWE{TN1Jtd<!EC_?Mwjg6gpd(#wbJBZ5>2n7LqtCpCd3K;Ym2Z
z;SJT*q#uMcYZaug<%2t$f5$e*ZAm;<0Txq??NQf>eLEXk&QF1PvvK!C^0K(}+hN~x
zVoXw6xp|!j`>(<jC$0i<;DaTSOXhM{ak_Na<P-mzR@#{5+K8)_=Fo2{A_l&Co@C$b
z7Q*5sza=H+p>?V2e5aMrvmO==$7u<*rA5C+*y*)2YgE<x5c*n%H-7a+|1v>TvD}z^
z-1%Gkbw0R?kXW}bCI(p&SSA4-aPUqM(Moeh(8LBam)>$;q5?PDPSLv*J46;rQ$E$#
z*f}9qn1m&3g6nEN+kBjBvJ|uhTY^Gum08NK2+QfUo_WIPnUj3myJ%{46C4GAkNgT$
z6Is8$_bOi=FiRA;w99m^pDAP4_6T_#Odl?nEu6G8s#4E^1HL45xUQWyH*}cmAdK$e
z*glRJF|Kd^v5L~auC{&rKwmx`7{-wmJie)iT24HFWHN=^pIm()c}?);n>oHmpsy)S
zj#TxFGZDtn+7~MuIcNYBp!zs$v6*D=Y!J=iI*5X1=J_#qM?OB!=1?Q)S7Zs@G3DA;
zG^OYGuXC?{++0;;z7y?3%Orkxy?edZwE_Q5Q)r~bD&2oeg}MD<f3)vq?z_y7XQ$(M
z(;$uBr`s;-_WOHRcJv7os*y0}PxqJm17Q{X4-d^^IL9(RVn)}|y&^ejn+OMIFR<UJ
z+EG%I+qL0zAxxfzex-=#>`0Y*_K+D$bpDmrIK_pCi@Upal^AL(7YXEPKZ*fD4++V^
zNq5Pag*vn!;nr5x)E7IWOL66t=rsRspxAq0=%L=z9ER1`-YS9Q^af2z?8I7x==IeL
zS!`4ZA?yi<d+%tZgG0q1vrb93zEb?EJ4~X)()2Q{dF0fo#$k14&p)Q9?mmBXwybuC
zcj7Faa0?@967NSErt^P^`91lBJClY;Erw`4IRWJ(Olgs1e~0q5k+vM&v7`74u~u(!
zjoR;t08c5|l@9qzx}~<Eg32)LP!i?w79TEV^7?B*j!rwx0OBzUI>UF-@aQ=&4(wE(
zt!!Bb?zkMo$l^BLp;AUR%T>VDo9zU%A*1uX!N6&2k!`?g_oF2=G>}BWb}CfuW4meY
z{7R#2J+!ut#1TJ;r~B@6^qm#sTpHM2Wo!5{M5q<PrHa?35n*RRH>3WTVval-{Y-1B
zdHwz~r~A{*dJD}b<~EAa5*BRJrLfue3pGn8ouA#M@d=A)$vN`OS}6DV1Yg2ka67D1
z^w2UytSXqWve?>Ye=$bE)b<onVGFiJl5t;}6cI{^L8X*z^zX{S|D0ETGImYxK4tks
zIJle$v-AVW>>3VV`X_cI*7qf2M5Ye!=~KUGq?Bt91p8W7bd`M$;C@rzgz*(yG)ODE
z>TM-gyx#!wv>q5t#m0`HNeVutOTS^a;M~`H4{hbLm%GT=i;TBRhQH<w<ezXGy(_X#
zkMA2yC(}eDe15wM3Z0f*)J)wcwB-ql8{8*Ap50n@$70^X;61~X=ShiVprxjk8dF%X
zU{6XTIklU=?6s8*?Bje^A+;oCfZJ&lSK*IxrxZ5_yctd_*M7}`_`~)>|A4;`tOTz}
zI*pqcgM#uVApju7Mos@@k2UP3fTXOPTP;yp1=n$0Apzy=_(7`IaJ{suT8ez$`I4PJ
z-y<jZg-8bmL1oBn-|9!=k}Np^N^O_q&@A0mYgw%Y>Bb%|sixD2<c>MJ;RytjE!9HP
z%mG|$Bt=p`SNah6CY3?!c%F7Tsx4;m1p60HqsrrVh>QGXu=6nd+9=53js-P*Ov>Ha
zNck8SchCgzN<6PLS3lwvkQS}_&fnv0%0Ja}nV2Ql1PbTP7m{LPUy|b>3SDL~r0nnn
zsPio}Ge{(2uwtlSsuig2V{lPe7Fn#8EHCh33l^oB$0k7e@!)$Mx+e<e%5!jSc{tE%
zR9?*!T^I7aOdHOM*9wvmj$3F)uQWw7ZZ7#?leq0}qk+Nq+M=4kRTWS@p*l7=@yIU9
zxvgqJKa<`&^i`Ylc)g{UMV(EAc9E3y0sitGoriI|^A3tr|5bJ!*)=FTL~vF>S+^<z
ze&X2#YGjsOmEYTBHd>4MGN%dEA&x!dLr3H`>hN+(p0M^&v;7<{hdHiBP)zR#!~X1i
zBqJL;=n@#(ImY>}|3zQ&C~%8e!^07csP);(b1G>GEa~G6NxhwHcq+74H;iJ{jvLJX
znA*H9Q5vEktiK6z%=Le)%)!Xf(ahS!;b&T_QC+ZJ;6(S-&3byZ(-=wdO#+!za-8bY
z+7F~vHi4FvS2>Jg^NN=K@Y_)|b}lfEF_iHZXwGC}9VGmzy?vX3%loZzC>^8hDI3)I
zU}R{UWc#u;?$JcIIvICoo;Mwzuf&RO%k6;&1B!Z>F_j>KzT~p%?y{zeaf;d1mk_QP
zBv?8ZMF)cur7L{uTWk3+ReMneE1KNPWX7|5h73Jm0;*;_R=4k@sua1)Ok=!#eJ-Q)
zyK&(s@x@hF$^{=kFuy^8k3OnmGM@@n&dF0)Amt%>|C+lf5oegQVT{+PJ)KK@ll$>?
z%z3)sMAb7xTY>Zdd#>ztr~-997&p@k#5!)|$~W)<hoXma4&MX$&b;*J^+P#o#zlf&
zlS66awlKcC5Hg=&p_o=Ln?MFmanQQ5CL%e82@7%O@H!wdygxLTV#u1O(HlOiHWEO$
zWN??^WG$8S>%MaYlfzWk=M|SLn>xNqK3gy%d9}bdo>!Y?A*w0T9N%d^kn@51DgF{u
zqogt<E@-<<%7g{1u@aq2RECB#s~vRbjo@orMkh`kX=4P>G$}Y~5^*=$%LD((X}@d8
zhhkY*hUbeS{?ui^o?p+@IiUyo(uE#FkJd)YBNjgIVMjatvo$iW-i43u%3vr1c*_N5
zr0>+#`pQ<tGm2iA#P(Yzeny7)ZhO69L*O?j$4T*TBS}X&_Ue0X>CQV-zNTK>I$l_x
z#Ts9B&|tk~UTV6|VIJukKw((ZpH1@TNR0*V;Ub-eB}KH|iHlBgcq8MGSp3Ki8SEjW
zc}^W}6}_ggW;Qw$N#OL12W_ZM<m$~KFk3f%-Ky#17yEjw^CL}EIuSy41%33_+Lf9z
zAmv%Y!Q92?GQ_R0OaWpgf{vfi=An!o+<Pb1wbl_mg+*T8p%|RhI>W*k?86I7Bs|LC
ze$!3oh>YEx=Mw}|O0F%vZ!Taii68%HgS>5gQrcD6@5T0DKf&0F+MYh*6+MF!HZ$3Q
z?@z8N6QprTHMl&?s$J7z%|0ZXyYHBoA&So*VA$9fmS;D#v{n}^M!h1@yNR&$Qb9d`
zR3sbGSPpx&XbDr~?a%~rLAhzHFyd8%L3S>#E=ZI|_RwzQ`_P+hevLQn8=14*8E;hO
z@+8C<MzW44-Pgk>yAvMRk}CJJ^83eb`_o*vGyS{q+3Vl}_U0rbDDP0N=gCLCxS1aR
zv2Mr9!j*-Fd{PGa!~UI^x)>QK{mFsD$2ufoUZcaUHm~w9G}g~0kX1R>zfwFQ^SuR_
zIXe>$13RV7%j9h4OJ3=?4N;^-rFb>o3h1`NgyoJE8|#!J`Q~>76f0FQ@6AP&P^7TD
zkb~l`g;ywjJT*vJc%Eh=UVI;4LL;bIlQCa_tySJ*4HYp=D?}4T{yJ4zGLwzq<3djg
zWjIx1#2M4Ug~|urQk%e01yiDo0G=uy|LDkv)DFBDU&{~iFBw1O8m*JEYM|5JWxaDX
z)xI`L?RkfgVT4IA*}{-3hqVi%GM{z}5~`r9Iq1R;{@R-a>rgH484_h+zr(LD<1rNf
zO#S8Oh^zQdBu7%&Prya5f7~-CXsE|NNDZpb2LKTKzGn`Ou9ij)YG#h6qQ=HXhK~OV
zeQ8!okkHq+vNG%mAGIPZp0zp1b|Ak4!(kQmmgP#b5+|qB>gHZxmH(lfLhH4&<x%fp
zDQ4gN%4g>bUBP^gqs-1PuA35cz#7}aKI+e1<Z`!UZt4wJE-#}VZ8O*mV@9FzPXg|S
zm9CZ+R=nJ+Tg9uly$exa1W$xn?K3XQB=+FNN&K(^^-pZbgF>XYxOpytoH2W~$Jao5
zEH74`l+2w^>mqKvGCs~dIbw-T0gYfYnO<J#8BHqEp$WqjUTLAk`{508FiEv^naHau
z6R@YfPHGG?hg?orXEym>G=aT75#1xL7)1olrR~>#i`^~KC{~$t3SPiVV7#?R3-X4%
z1^Rk{<8CE=a32fJ%0lmCHQZ=m_&|qKXI*q?Mn_;HU))-npuXOB!7Z1h>874t2{jhc
z_wk5(Fuegu!xM4QA^oXbmgXgK$b0(Yo?enb5-k^l*TeHN=ach_L$RxK1C2Va(_@OU
zKD`qktks7<P@gkdlaQr=RRAPbg(i_D;#Z8~0nDuyZN{$A`9BQp)39HL$=!a6VjWa8
zaqm!+3+9PN2V)|!T;H%t1|YB4CK@Ld>rFGi!ycWwBD^rTkm8w;st)bA>cl$~5uAL|
zn<YoaJ+xHiZ&Z|R0pWsovORipbMK6%Qv&YLcy7;B`80i&0)}F9xAIxvj>jZEsggzN
zcB6f&p?`g*cjALzH^RAtj*!AifRL~{M<nvq<2t7F#>kZlIruFAT%vKUp5_=$jb56M
zVRfBY(Jp>OeU*goQEAJLOtQSPNF_v-m4|$FVbS9sBt;TB+IiLZ1EZ(<&XnBa@hVX4
ziuTH7wB2h8>4W+?ki=;9@kSd@DEjtgCNBwnKI|lhlEh;HoOie&HyzP|Jd^WsGCX_9
z$7`FA^i-n5d`nKH4`0V0U3?v<ZuG{aMU_zm7ASArxEH(UY_Tc=)5dyuESbJ{wR*P>
zkA)}C7b#Qn7pI&{pQvoFojZvQlwd^A{U9EvNnOT>AlQX0LLdvI9pZ6u&{6RFkM3e@
z6LVahhMEmOR2)l8U~0ifBDeyj`wC6F0>$*25e`F`m(gI*M$xjSOS2S1(hXf#W69RJ
zZKbwxh%(PeCce*MS>j<w>OE;V2LlfH^$`q8`YliGogF_Vv@@8mNUh+S;Pr0Az%s5g
zAxo4EaP_RaaYK$O*Z*WCubQnnTxT1u;35n(3q-Yit}=8GkWnw~b|?MfPFJL4JF7bF
zQ@#I$2~k4VSz?nV$78}B8QM^>;QsrOh)k>rZDCWq=E5E6{G!Gtw_JI|FaCP-&-h@i
zUaqy9F*)AR^}TqKq30gs@AR5%K&`3R%jIbY<~IhmmvI(tu~Szp(c||6GvK!fb`pZP
zT%T#dGQpmb)m<u(y{4{M#XH^Ab|bm}9$2ho&*aQ4nH4>$Pvcg(W=Kk%Pvx#8B(e!R
zt^NkuB6$W=066~8@iO3+hou(ZG@;J@>++VzJS)tYue|%tQRDFK)mSy|*DghCvTFy4
zL1LnV8lx?JGf}>jk7E5N;9fBR%APmu7+6&tP&*}dm)UnI!V3gBP=NcZESr;=cxqTj
z2L>$#Yj9Z<S48&PHC1ig8QJcre1&A?t8K_=HutC8QV!A(W=uV?I?Q(o4!BCp=_qOJ
zSIT_%WdRuDoa+&Q2=Zlwv7ZYRyrYl(p!Ep7nA#3=bCnj1R}V;AYM#g?p&Za6GjO@i
zJuBgZZH48IPxd^%Aaa^bSoR)5K~j@KnHCI&2c&#Va|U`Rgqgo!{s-_XUUm6@eTgXH
zWoW(5MZ0zj2DZI#>Uu<0bhQ$ea<haexF4~kvJ6D7ev2dd8k^z6VtiIFhK-2GMlc67
zTO^2F;J1Z#%N^-P2#2re3|wik%E^20{!=}{-|q8ZwU!Hm`>mglSX0<GumR~AsacvC
z!v6={qyrbm0iCDIg)I_dN_6sG<o_F%;O1A@llT9Eog!8LsaGDd0~n&>Z32vd8;-)n
zPhc_vu@;Q1we=-5d^yObRlQZ0DgU{<iSWPgveO9eNLWr(p{!OYz<nf(ciJ0nL^Ia^
z87|c0szv6P9-(C~Zs03$c(i~eEt+NUQU3kzDys9RI&1_2N>!ieTk)%E{(PmzU|a5^
z1_|4%ZwY5}+TLd2lav>yn<$GUB|3N$-{PivNRQvRN3IRZYG*R2&8+%HH_fYPLrr`W
zvRM_9w<{MzQN8k574<AG0hK+i)Ma&!3QWPI(b&K+pjnGJWre~VT|STHKh%M*PY?YT
zx&IlGGjIzxT-<5Smc!npW_&dHK>-RHz0Ct{6LH<*#7!6Y*>XuWPi>RtS>hzq8y|Pc
zYzq4?tUD)46g+i@(_TM@!gr_@Fy|SuT6?jt?hSkJ@Qo9Ljm;^mr2`K(usj8{-K$0c
z@xs&iX9~~8b*L3cK7$mAIBXatrn$~#YLY0;Mw;s4p`8m{+7jO7+dM&%{NK;=4su~W
zOG1c&C*&v^1Jde%oIuOl+t@lV8`{_#{VaGOO~L>5Hz2$yJXS%j>lJpuip)J4$`PZ{
z2V6?4q@uYvDd}{(lZY=L#2DfMpBLJb>a-4W(D-#m)|$6<`jf;|#NffQZ<_=tBots%
zIfQ<c2-u{lcm$Dcr{I=ZvtjSRqB0IGq@@!M5+XC#z1C}m#d{X(9G?b#(8_^r*msqc
zEafW<Z}&~T$AiWy0A=G-88old<TKUP6urzf)E8+n!6V`wVtF@JPH5;v!YD{JF3A+I
zPar<^t;d!zdaT0OM+lwWZcwUQ_2X=cl8x-Gvx<AWXXt+qYx8Zi;-RQ=YUwj)7As!5
zJ@PO|9qfl5=<v~#O|X{U*L2;W4@VTiL6hBAT6_Drs8#NTv6~>xK%ECuVPSLy1gW5L
zra|}1>l%%0`@~q+bf$HUI?^k)A=A{4j+LJeUDkqNoKUa4*6&k6Q1PDYduU1@A$9fN
z4N%UF=#?16G;O&70JOjI&)@3pG#Q75w~)3{#;(`u@nL_gcIk=80H^FkLtLILP;L|*
zQNmQ*moj_yu;R!-aGcF7wwcPI>e|N>JloS8{k+A^0Z(ihEoDoRTFUEoefA27xg$@x
zkniR2u(so}G87>$gt05a^x&RyXQT6UJ5;(*y|^gGdi6yh_ES&C#0q{~rVN!UIIgl^
zcSo7(*fzIX{YArYU7R{_<!V<UYeV@(t!fPGRovXkNwIG70FCOoaW3?YEc~5172JXx
zE546t#wJg~cX*3>Y>Rqs<Vk6G*3ph#PWwxP?AVFn^E`bTRlx#>*4NBmQbP8|yHkC?
zPa581i?!jwn<OoWm>3XutS-sQ-G5`T&qiGyh?o_ls!y6zQPsv(yWaMbl|Ed`4B?gk
zK@f=jyc@~VlUL4us|$<P-XW7-!SlMU!Q|7?e%N%7(a<Xn96uYCqWh45?@xl&FUd4-
ztLN560uH)!-DAY-g4|iTnlw~+s?VP=#JqIov@F2Q=c3Lv-1WUwJG*N(JsamSPqqOc
zBt!uxfHww)6r0cTLf8pt(*moX<%Kt2*VPNtU%tmfXZy+d8p=?(^N=Y<2e(*w6Vl|k
zubH|MeBh;CYS^BAKc!RR_T&9<%dD$OwWvLy?y0O#a}ZB^xJ1E&GuZCJ+BjfzZTqwb
zKm62bkaZQ}bNGpg1tW*o!wau}%9trx)*s~*ZvkKr->I;s7j*d}o8$0(L@37-->X8V
zKsiNc#aW3%{U;v5or{kwrpA0F+}_R_o*8HycC3rtHeE7$r>KNsJ;yrUHE0pRLlKgy
z6Mp87tFaZ_sAoyUv=&-yRYmop<YsPKZ1#DO1tny`ngv;`wlWf&99WGbn3#kKWZ6zv
zn?~jerp|sF6q%?y!p{0h`6MG;C|N7NiZq>BT9yB#d}?>K*-kiH=Qjq8tSPc}t>n<t
zbFxb&7N>WdOCj1FLd7OKCNM#d^GqHs?^tmzb3Qr@d^<+OqmoMmmx{yM7v{<vFW0s>
zU>Mar<+LVf8J#;@>#4shJ{SR&5+^R|%$f~l)kpbPKS^wH!ezwm*7PLsZ{)qmtlKnd
zJ{e_IzPn~&^Plc7o^*JhY=N+EaD|>Ri_UJMR)O~xfQ_f!xZ3P}iTL2s6JWE_n>^vA
z7Mf}HY0OKlA=B*TSuRhTWxgx?iWc9*xh8n~Fd)*gZcZ$SYm6QZ`SLjv1{>aNTtkf<
zX2>&lxOcH~N4yBDX@rz=bCsYm1-JlWRI!eM_g4YL@u~YV_#~Au%7$NGl<8{@0V+kW
z((beOM-3JYyy-rCW_K?<YcYb&f{mFO)4iHMugErGOcl%5raE?zVB#aNT4p^fsv-NN
z-D=nkQeQLl4pb3<GgYSrdTXBMdVW=!5#a`c?113eDs9!*(mYkH%(H{qz934yye0{C
zuiD||OKsB`8R!7wEaN2#`PuQ}>8dJgO!eGp8R*h{^P0Y*l47F5S*L0wIyGq6C<uLm
zhVUFJ@81>;pex~CEF0*sa+Lf6)t)p!ry8g3g%pIBA-<}6%UMhR@eTJBU(LkBmNV=L
zmHaG}c_@cZXQXTz2uuAMi5kniOz{UonO%1p077gKv;MkZT7HN==}!A%U?E%#p=DIw
zv#0XDCF?SPHF_c|kYbD7%#-7vL1pBJMCp8bP~{_3Am0*DVH=yRr*H3&{u7!6LL^}!
zELG+wU4<R9Jvj^tr(TA|)gqMq$)KRR6N507acrQ2%TeS)Ib?f3F)hO)@d(KL#2+t4
zG%t35#pMxTSEYr>d>_(-D07^!?aTQa>>qm)80PX0$a_Kso}HKf$$%l7z#E@c0>Too
z{7vD=`(G56`uQwWNu^{lIr!&>B49x%AC|!N;2QzXFi!nHH2+TuJAY!_$D_27%c!G(
za{APvuLgt9X@Q7|qt1CPV-1AcE(1OtM*Dp63wmxK<tG2Zqx8Zhs;<&~IrbGWUQ<kR
zBk|CEF<*%*m_^qyouc7!uk&sP^$Xj)z9iqWD@<WfNngM*D3*^C_JQq+mS|1_jc2Hw
zo%0ni${9z@`#64)k_dq=@?y64G99fYIH)9a;o57#7wMBvFiKjH9()-<_9x>*Iocwj
zSp3QO_Cxz3d5H!%9jj-FpTL3kM||_Sb8|L^F1{}3_%5L?kugCP4MlHx5Q3_~Ql5PN
z(dFbW-Ou^8sYpQx+ps6?km$CSVOy|z6A+#=3XPVBpHbINFc4uUSJ<ZxWl=CQ|AM%_
ze$>{F^Kz3GCK`2mq(N#=#66SZ4#fC|b8Y(dciozDA`s?*on%w%XU1Xy;@zM&V*ag)
ze<pp(Vl6dVKL;}*c^C8doNEl3;8fN(u>6hnEylN6FR)+-TtR!n`?xbmsy+nu`)nwS
z1_4bc;j_#J$>?ICCA3a{>}T-QM9Djv`Y^c?!f2c!wcb{GPpR%@WthYGdNR3s+}s>z
zW-bL!X}q>-h$TJce!(TJ$i8ilk>m1hd;q}Ji^)Y@9LpeLVLUc>eN#Q@flY4~0dtp+
zk4TWyJ#V?$6i6kFKB<5oCm)`G+uf<wYWr;bC}|E+g^WCL6f}4+fARV-d;a~)jhyvl
zCFkrNMHaK6htI~eoOEWGnJF1sAH%gipYu+JvKp=qla=4Yu2of@`0|l=6sx}Pse=i!
zD~(+2M5=RdxTjFuMBsL7jh2klK-#xw7T6$2kBg$fcqJ<_Xp))~5H4<%3}1^QA?)u-
zw1#)guncvGE4PV@%0@cRvcS-tK!nvXc_GWr#Zf>9T$}(FRs??+6;>HIeukZB94LG~
z(e>g^!Jn=NSpm=qoTuSuQ)vynNVU!tu+OTTDAZm@k$NfrrEpp0a#wYHj@<5Mrg1Zt
zsst#g=>E*xS?1lMr)$IA_eHOJ+oxRz&mule{=+rW=u@A00EoDNol{(@h->l`@Hu$}
zd;_K06GKw@L#E(ihm5;ofUS)839W1ujyGYN9dDRZjlpG^Mi40FhD7o~4<nw|e8UIr
zbS&Q{rOt3t36R33;p63rB<Y0?YR80-%ED7uMK>MVG~3&>b4!ku`m~n|3kwI|Ewlnh
zhMr&SJ+EDAcp?>6BGWsLPTa>%DXvli7DaTm507jtC`*$Nv6-1$W8E9DXFM98PLcPA
zdqE8D@AMBJ`krzIQh!WB$RYZl>koZf+y9jx5a0awm9EqQameSbXtVELv~k#1Wf4Ma
z^h6u%=xf1rTaBhmQ+EO-qLp*!zC~3Ls?9m4%0)%7U7U-bq$TO#!Q5t*6%TFY&>^!;
z)D05-IL*GXzbIx9f}QlOgWEgx&u+muS~<z6NA*;Gfn=^P^nt|Ck+9d7*8ojl&`daG
z<d%di;PH#)H`<VTRsJheGA)&9_O7Jhf}YF4uP8&s8^*y;3pKT@yU0;r2V(R(2=_Ar
zgV)_-bGgzGmuC^06!=0Z#~r@7*nWv_Ks=`2TX^-ED-Ezh(%}~l%(y;|sb<*xlrbqO
zL?PG1LD`B@7uNFtx+8lS(a`A}h~XPFsaTg#PTwMZDjB9Qgr*qk>hR-fGfyLLOUkm7
z`i4uq<DK4$agRzG;RqkPc}i*x7e9VWR;0@|rJ`}~?LAP|MD&H8B}Q7xNQuSZK<GS2
zK|#r8Q04^Ttcs}Ai8b0BQ5%=aenKEb%4q{}tt~gL*DLtLcLOn$xG8B_xa~KOJ59B%
zFb+9gCL(Z8&(S_t)|)Bl@U>7>XTK{3E%HROrC-p!^h#WHpU1E?%iMOhfo&(RZ<RHf
z_>l}#x1HLKWtL4+$U&F5Cn~zqAZQZl?D&y)odfc#{>vAjpcx^pihmxf|NXcB{r(R_
z_VQAH1^DZ9@9%;??@bT~`_ruNuY!LadHl2BGGx-{KMg?siuCJX+aFj$kb&gCjk*0Q
z{_AkjAL0+l{}%ss)aX};Uz@mpK+Hn2-ESKFxuyH7_^(~0Kg1Jp|1JJYPw7{LUwbNl
zAPnODuE0NaSboL%>*35FC;-3!WHRe-M>W5S|5{rA0U$~C7d3tc+Fyl#<p_TWx6%Gv
s_%Dp%SNXpN_CKTn011Xa>HhC9FE8~BVrBrqbI1!3VyP-W+w*|`2magm<^TWy

literal 0
HcmV?d00001

diff --git a/tests/docx.trailing_spaces_in_formatting.native b/tests/docx.trailing_spaces_in_formatting.native
new file mode 100644
index 000000000..46ea9bca8
--- /dev/null
+++ b/tests/docx.trailing_spaces_in_formatting.native
@@ -0,0 +1 @@
+[Para [Str "Turn",Space,Str "my",Space,Emph [Str "formatting"],Space,Str "off",Space,Str "after",Space,Str "the",Space,Str "spaces."]]