From ef5fad2698f3d4c1fe528f138264cc8abb3b2943 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal <jrosenthal@jhu.edu> Date: Mon, 23 Jun 2014 15:25:46 -0400 Subject: [PATCH 1/5] Add new typeclass, Reducible This defines a typeclass `Reducible` which allows us to "reduce" pandoc Inlines and Blocks, like so Emph [Strong [Str "foo", Space]] <++> Strong [Emph [Str "bar"]], Str "baz"] = [Strong [Emph [Str "foo", Space, Str "bar"], Space, Str "baz"]] So adjacent formattings and strings are appropriately grouped. Another set of operators for `(Reducible a) => (Many a)` are also included. --- src/Text/Pandoc/Readers/Docx/Reducible.hs | 150 ++++++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 src/Text/Pandoc/Readers/Docx/Reducible.hs diff --git a/src/Text/Pandoc/Readers/Docx/Reducible.hs b/src/Text/Pandoc/Readers/Docx/Reducible.hs new file mode 100644 index 000000000..1ed31ebd0 --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/Reducible.hs @@ -0,0 +1,150 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Text.Pandoc.Readers.Docx.Reducible ((<++>), + (<+++>), + Reducible, + Container(..), + container, + innards, + reduceList, + reduceListB, + rebuild) + where + +import Text.Pandoc.Builder +import Data.List ((\\), intersect) + +data Container a = Container ([a] -> a) | NullContainer + +instance (Eq a) => Eq (Container a) where + (Container x) == (Container y) = ((x []) == (y [])) + NullContainer == NullContainer = True + _ == _ = False + +instance (Show a) => Show (Container a) where + show (Container x) = "Container {" ++ + (reverse $ drop 3 $ reverse $ show $ x []) ++ + "}" + show (NullContainer) = "NullContainer" + +class Reducible a where + (<++>) :: a -> a -> [a] + container :: a -> Container a + innards :: a -> [a] + isSpace :: a -> Bool + +(<+++>) :: (Reducible a) => Many a -> Many a -> Many a +mr <+++> ms = fromList $ reduceList $ toList mr ++ toList ms + +reduceListB :: (Reducible a) => Many a -> Many a +reduceListB = fromList . reduceList . toList + +reduceList' :: (Reducible a) => [a] -> [a] -> [a] +reduceList' acc [] = acc +reduceList' [] (x:xs) = reduceList' [x] xs +reduceList' as (x:xs) = reduceList' (init as ++ (last as <++> x) ) xs + +reduceList :: (Reducible a) => [a] -> [a] +reduceList = reduceList' [] + +combineReducibles :: (Reducible a, Eq a) => a -> a -> [a] +combineReducibles r s = + let (conts, rs) = topLevelContainers r + (conts', ss) = topLevelContainers s + shared = conts `intersect` conts' + remaining = conts \\ shared + remaining' = conts' \\ shared + in + case null shared of + True -> case (not . null) rs && isSpace (last rs) of + True -> rebuild conts (init rs) ++ [last rs, s] + False -> [r,s] + False -> rebuild + shared $ + reduceList $ + (rebuild remaining rs) ++ (rebuild remaining' ss) + +instance Reducible Inline where + s1@(Span (id1, classes1, kvs1) ils1) <++> s2@(Span (id2, classes2, kvs2) ils2) = + let classes' = classes1 `intersect` classes2 + kvs' = kvs1 `intersect` kvs2 + classes1' = classes1 \\ classes' + kvs1' = kvs1 \\ kvs' + classes2' = classes2 \\ classes' + kvs2' = kvs2 \\ kvs' + in + case null classes' && null kvs' of + True -> [s1,s2] + False -> let attr' = ("", classes', kvs') + attr1' = (id1, classes1', kvs1') + attr2' = (id2, classes2', kvs2') + s1' = case null classes1' && null kvs1' of + True -> ils1 + False -> [Span attr1' ils1] + s2' = case null classes2' && null kvs2' of + True -> ils2 + False -> [Span attr2' ils2] + in + [Span attr' $ reduceList $ s1' ++ s2'] + + (Str x) <++> (Str y) = [Str (x++y)] + il <++> il' = combineReducibles il il' + + container (Emph _) = Container Emph + container (Strong _) = Container Strong + container (Strikeout _) = Container Strikeout + container (Subscript _) = Container Subscript + container (Superscript _) = Container Superscript + container (Quoted qt _) = Container $ Quoted qt + container (Cite cs _) = Container $ Cite cs + container (Span attr _) = Container $ Span attr + container _ = NullContainer + + innards (Emph ils) = ils + innards (Strong ils) = ils + innards (Strikeout ils) = ils + innards (Subscript ils) = ils + innards (Superscript ils) = ils + innards (Quoted _ ils) = ils + innards (Cite _ ils) = ils + innards (Span _ ils) = ils + innards _ = [] + + isSpace Space = True + isSpace _ = False + +instance Reducible Block where + (Div (ident, classes, kvs) blks) <++> blk | "list-item" `elem` classes = + [Div (ident, classes, kvs) (reduceList blks), blk] + + blk <++> blk' = combineReducibles blk blk' + + container (BlockQuote _) = Container BlockQuote + container (Div attr _) = Container $ Div attr + container _ = NullContainer + + innards (BlockQuote bs) = bs + innards (Div _ bs) = bs + innards _ = [] + + isSpace _ = False + + +topLevelContainers' :: (Reducible a) => [a] -> ([Container a], [a]) +topLevelContainers' (r : []) = case container r of + NullContainer -> ([], [r]) + _ -> + let (conts, inns) = topLevelContainers' (innards r) + in + ((container r) : conts, inns) +topLevelContainers' rs = ([], rs) + +topLevelContainers :: (Reducible a) => a -> ([Container a], [a]) +topLevelContainers il = topLevelContainers' [il] + +rebuild :: [Container a] -> [a] -> [a] +rebuild [] xs = xs +rebuild ((Container f) : cs) xs = rebuild cs $ [f xs] +rebuild (NullContainer : cs) xs = rebuild cs $ xs + + From 94d0fb15382a4855938c540c9e521642bccc00e3 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal <jrosenthal@jhu.edu> Date: Mon, 23 Jun 2014 15:27:01 -0400 Subject: [PATCH 2/5] Move some of the clean-up logic into List module. This will allow us to get rid of more general functions we no longer need in the main reader. --- src/Text/Pandoc/Readers/Docx/Lists.hs | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs index 68559d98b..1e37d0076 100644 --- a/src/Text/Pandoc/Readers/Docx/Lists.hs +++ b/src/Text/Pandoc/Readers/Docx/Lists.hs @@ -29,9 +29,12 @@ Functions for converting flat docx paragraphs into nested lists. -} module Text.Pandoc.Readers.Docx.Lists ( blocksToBullets - , blocksToDefinitions) where + , blocksToDefinitions + , listParagraphDivs + ) where import Text.Pandoc.JSON +import Text.Pandoc.Generic (bottomUp) import Text.Pandoc.Shared (trim) import Control.Monad import Data.List @@ -159,10 +162,9 @@ flatToBullets elems = flatToBullets' (-1) elems blocksToBullets :: [Block] -> [Block] blocksToBullets blks = - -- bottomUp removeListItemDivs $ + bottomUp removeListDivs $ flatToBullets $ (handleListParagraphs blks) - plainParaInlines :: Block -> [Inline] plainParaInlines (Plain ils) = ils plainParaInlines (Para ils) = ils @@ -199,6 +201,23 @@ blocksToDefinitions' [] acc (b:blks) = blocksToDefinitions' defAcc acc (b:blks) = blocksToDefinitions' [] (b : (DefinitionList (reverse defAcc)) : acc) blks +removeListDivs' :: Block -> [Block] +removeListDivs' (Div (ident, classes, kvs) blks) + | "list-item" `elem` classes = + case delete "list-item" classes of + [] -> blks + classes' -> [Div (ident, classes', kvs) $ blks] +removeListDivs' (Div (ident, classes, kvs) blks) + | not $ null $ listParagraphDivs `intersect` classes = + case classes \\ listParagraphDivs of + [] -> blks + classes' -> [Div (ident, classes', kvs) blks] +removeListDivs' blk = [blk] + +removeListDivs :: [Block] -> [Block] +removeListDivs = concatMap removeListDivs' + + blocksToDefinitions :: [Block] -> [Block] blocksToDefinitions = blocksToDefinitions' [] [] From 11b0778744d0eeb61e2502e452d010631fab979b Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal <jrosenthal@jhu.edu> Date: Mon, 23 Jun 2014 15:27:55 -0400 Subject: [PATCH 3/5] Use Reducible in docx reader. This cleans up them implementation, and cuts down on tree-walking. Anecdotally, I've seen about a 3-fold speedup. --- src/Text/Pandoc/Readers/Docx.hs | 378 +++++++++----------------------- 1 file changed, 108 insertions(+), 270 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 09c2330fb..ffe7f5a92 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -79,8 +79,10 @@ import Text.Pandoc.Builder (text, toList) import Text.Pandoc.Generic (bottomUp) import Text.Pandoc.MIME (getMimeType) import Text.Pandoc.UTF8 (toString) +import Text.Pandoc.Walk import Text.Pandoc.Readers.Docx.Parse import Text.Pandoc.Readers.Docx.Lists +import Text.Pandoc.Readers.Docx.Reducible import Data.Maybe (mapMaybe, isJust, fromJust) import Data.List (delete, isPrefixOf, (\\), intersect) import qualified Data.ByteString as BS @@ -96,28 +98,65 @@ readDocx opts bytes = Just docx -> Pandoc nullMeta (docxToBlocks opts docx) Nothing -> error $ "couldn't parse docx file" -runStyleToSpanAttr :: RunStyle -> (String, [String], [(String, String)]) -runStyleToSpanAttr rPr = ("", - mapMaybe id [ - if isBold rPr then (Just "strong") else Nothing, - if isItalic rPr then (Just "emph") else Nothing, - if isSmallCaps rPr then (Just "smallcaps") else Nothing, - if isStrike rPr then (Just "strike") else Nothing, - if isSuperScript rPr then (Just "superscript") else Nothing, - if isSubScript rPr then (Just "subscript") else Nothing, - rStyle rPr], - case underline rPr of - Just fmt -> [("underline", fmt)] - _ -> [] - ) +spansToKeep :: [String] +spansToKeep = ["list-item", "Definition", "DefinitionTerm"] ++ codeSpans -parStyleToDivAttr :: ParagraphStyle -> (String, [String], [(String, String)]) -parStyleToDivAttr pPr = ("", - pStyle pPr, - case indent pPr of - Just n -> [("indent", (show n))] - Nothing -> [] - ) + +-- This is empty, but we put it in for future-proofing. +divsToKeep :: [String] +divsToKeep = [] + +runStyleToContainers :: RunStyle -> [Container Inline] +runStyleToContainers rPr = + let formatters = mapMaybe id + [ if isBold rPr then (Just Strong) else Nothing + , if isItalic rPr then (Just Emph) else Nothing + , if isSmallCaps rPr then (Just SmallCaps) else Nothing + , if isStrike rPr then (Just Strikeout) else Nothing + , if isSuperScript rPr then (Just Superscript) else Nothing + , if isSubScript rPr then (Just Subscript) else Nothing + , rStyle rPr >>= + (\s -> if s `elem` spansToKeep then Just s else Nothing) >>= + (\s -> Just $ Span ("", [s], [])) + , underline rPr >>= (\f -> Just $ Span ("", [], [("underline", f)])) + ] + in + map Container formatters + + +divAttrToContainers :: [String] -> [(String, String)] -> [Container Block] +divAttrToContainers [] [] = [] +divAttrToContainers (c:cs) _ | isJust (isHeaderClass c) = + let n = fromJust (isHeaderClass c) + in + [(Container $ \blks -> + Header n ("", delete ("Heading" ++ show n) cs, []) (blksToInlines blks))] +divAttrToContainers (c:_) _ | c `elem` codeDivs = + [Container $ \blks -> CodeBlock ("", [], []) (concatMap blkToCode blks)] +divAttrToContainers (c:cs) kvs | c `elem` listParagraphDivs = + let kvs' = filter (\(k,_) -> k /= "indent") kvs + in + (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs') +divAttrToContainers (c:cs) kvs | c `elem` blockQuoteDivs = + (Container BlockQuote) : (divAttrToContainers (cs \\ blockQuoteDivs) kvs) +divAttrToContainers (c:cs) kvs | c `elem` divsToKeep = + (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs) +divAttrToContainers (_:cs) kvs = divAttrToContainers cs kvs +divAttrToContainers [] (kv:kvs) | fst kv == "indent" = + (Container BlockQuote) : divAttrToContainers [] kvs +divAttrToContainers [] (_:kvs) = + divAttrToContainers [] kvs + + +parStyleToContainers :: ParagraphStyle -> [Container Block] +parStyleToContainers pPr = + let classes = pStyle pPr + kvs = case indent pPr of + Just n -> [("indent", show n)] + Nothing -> [] + in + divAttrToContainers classes kvs + strToInlines :: String -> [Inline] strToInlines = toList . text @@ -144,103 +183,42 @@ runElemToString (Tab) = ['\t'] runElemsToString :: [RunElem] -> String runElemsToString = concatMap runElemToString ---- We use this instead of the more general ---- Text.Pandoc.Shared.normalize for reasons of efficiency. For ---- whatever reason, `normalize` makes a run take almost twice as ---- long. (It does more, but this does what we need) -inlineNormalize :: [Inline] -> [Inline] -inlineNormalize [] = [] -inlineNormalize (Str "" : ils) = inlineNormalize ils -inlineNormalize ((Str s) : (Str s') : l) = - inlineNormalize (Str (s++s') : l) -inlineNormalize ((Emph ils) : (Emph ils') : l) = - inlineNormalize $ (Emph $ inlineNormalize (ils ++ ils')) : l -inlineNormalize ((Emph ils) : l) = - Emph (inlineNormalize ils) : (inlineNormalize l) -inlineNormalize ((Strong ils) : (Strong ils') : l) = - inlineNormalize $ (Strong $ inlineNormalize (ils ++ ils')) : l -inlineNormalize ((Strong ils) : l) = - Strong (inlineNormalize ils) : (inlineNormalize l) -inlineNormalize ((Strikeout ils) : (Strikeout ils') : l) = - inlineNormalize $ (Strikeout $ inlineNormalize (ils ++ ils')) : l -inlineNormalize ((Strikeout ils) : l) = - Strikeout (inlineNormalize ils) : (inlineNormalize l) -inlineNormalize ((Superscript ils) : (Superscript ils') : l) = - inlineNormalize $ (Superscript $ inlineNormalize (ils ++ ils')) : l -inlineNormalize ((Superscript ils) : l) = - Superscript (inlineNormalize ils) : (inlineNormalize l) -inlineNormalize ((Subscript ils) : (Subscript ils') : l) = - inlineNormalize $ (Subscript $ inlineNormalize (ils ++ ils')) : l -inlineNormalize ((Subscript ils) : l) = - Subscript (inlineNormalize ils) : (inlineNormalize l) -inlineNormalize ((Space : Space : l)) = - inlineNormalize $ (Space : l) -inlineNormalize ((Quoted qt ils) : l) = - Quoted qt (inlineNormalize ils) : inlineNormalize l -inlineNormalize ((Cite cits ils) : l) = - let - f :: Citation -> Citation - f (Citation s pref suff mode num hash) = - Citation s (inlineNormalize pref) (inlineNormalize suff) mode num hash - in - Cite (map f cits) (inlineNormalize ils) : (inlineNormalize l) -inlineNormalize ((Link ils s) : l) = - Link (inlineNormalize ils) s : (inlineNormalize l) -inlineNormalize ((Image ils s) : l) = - Image (inlineNormalize ils) s : (inlineNormalize l) -inlineNormalize ((Note blks) : l) = - Note (map blockNormalize blks) : (inlineNormalize l) -inlineNormalize ((Span attr ils) : l) = - Span attr (inlineNormalize ils) : (inlineNormalize l) -inlineNormalize (il : l) = il : (inlineNormalize l) -stripSpaces :: [Inline] -> [Inline] -stripSpaces ils = - reverse $ dropWhile (Space ==) $ reverse $ dropWhile (Space ==) ils +inlineCodeContainer :: Container Inline -> Bool +inlineCodeContainer (Container f) = case f [] of + Span (_, classes, _) _ -> (not . null) (classes `intersect` codeSpans) + _ -> False +inlineCodeContainer _ = False -blockNormalize :: Block -> Block -blockNormalize (Plain ils) = Plain $ stripSpaces $ inlineNormalize ils -blockNormalize (Para ils) = Para $ stripSpaces $ inlineNormalize ils -blockNormalize (Header n attr ils) = - Header n attr $ stripSpaces $ inlineNormalize ils -blockNormalize (Table ils align width hdr cells) = - Table (stripSpaces $ inlineNormalize ils) align width hdr cells -blockNormalize (DefinitionList pairs) = - DefinitionList $ map (\(ils, blklsts) -> (stripSpaces (inlineNormalize ils), (map (map blockNormalize) blklsts))) pairs -blockNormalize (BlockQuote blks) = BlockQuote (map blockNormalize blks) -blockNormalize (OrderedList attr blkslst) = - OrderedList attr $ map (\blks -> map blockNormalize blks) blkslst -blockNormalize (BulletList blkslst) = - BulletList $ map (\blks -> map blockNormalize blks) blkslst -blockNormalize (Div attr blks) = Div attr (map blockNormalize blks) -blockNormalize blk = blk +-- blockCodeContainer :: Container Block -> Bool +-- blockCodeContainer (Container f) = case f [] of +-- Div (ident, classes, kvs) _ -> (not . null) (classes `intersect` codeDivs) +-- _ -> False +-- blockCodeContainer _ = False runToInlines :: ReaderOptions -> Docx -> Run -> [Inline] runToInlines _ _ (Run rs runElems) - | isJust (rStyle rs) && (fromJust (rStyle rs)) `elem` codeSpans = - case runStyleToSpanAttr rs == ("", [], []) of - True -> [Str (runElemsToString runElems)] - False -> [Span (runStyleToSpanAttr rs) [Str (runElemsToString runElems)]] - | otherwise = case runStyleToSpanAttr rs == ("", [], []) of - True -> concatMap runElemToInlines runElems - False -> [Span (runStyleToSpanAttr rs) (concatMap runElemToInlines runElems)] + | any inlineCodeContainer (runStyleToContainers rs) = + rebuild (runStyleToContainers rs) $ [Str $ runElemsToString runElems] + | otherwise = + rebuild (runStyleToContainers rs) (concatMap runElemToInlines runElems) runToInlines opts docx@(Docx _ notes _ _ _ ) (Footnote fnId) = case (getFootNote fnId notes) of Just bodyParts -> - [Note [Div ("", ["footnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]] + [Note (concatMap (bodyPartToBlocks opts docx) bodyParts)] Nothing -> - [Note [Div ("", ["footnote"], []) []]] + [Note []] runToInlines opts docx@(Docx _ notes _ _ _) (Endnote fnId) = case (getEndNote fnId notes) of Just bodyParts -> - [Note [Div ("", ["endnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]] + [Note (concatMap (bodyPartToBlocks opts docx) bodyParts)] Nothing -> - [Note [Div ("", ["endnote"], []) []]] + [Note []] parPartToInlines :: ReaderOptions -> Docx -> ParPart -> [Inline] parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r -parPartToInlines _ _ (BookMark _ anchor) = - [Span (anchor, ["anchor"], []) []] +parPartToInlines _ _ (BookMark _ anchor) | anchor `elem` dummyAnchors = [] +parPartToInlines _ _ (BookMark _ anchor) = [Span (anchor, ["anchor"], []) []] parPartToInlines _ (Docx _ _ _ rels _) (Drawing relid) = case lookupRelationship relid rels of Just target -> [Image [] (combine "word" target, "")] @@ -276,7 +254,6 @@ makeHeaderAnchors h@(Header n (_, classes, kvs) ils) = _ -> h makeHeaderAnchors blk = blk - parPartsToInlines :: ReaderOptions -> Docx -> [ParPart] -> [Inline] parPartsToInlines opts docx parparts = -- @@ -284,23 +261,32 @@ parPartsToInlines opts docx parparts = -- not mandatory. -- (if False -- TODO depend on option - then bottomUp (makeImagesSelfContained docx) + then walk (makeImagesSelfContained docx) else id) $ - bottomUp spanTrim $ - bottomUp spanCorrect $ - bottomUp spanReduce $ - concatMap (parPartToInlines opts docx) parparts + -- bottomUp spanTrim $ + -- bottomUp spanCorrect $ + -- bottomUp spanReduce $ + reduceList $ concatMap (parPartToInlines opts docx) parparts cellToBlocks :: ReaderOptions -> Docx -> Cell -> [Block] -cellToBlocks opts docx (Cell bps) = map (bodyPartToBlock opts docx) bps +cellToBlocks opts docx (Cell bps) = concatMap (bodyPartToBlocks opts docx) bps rowToBlocksList :: ReaderOptions -> Docx -> Row -> [[Block]] rowToBlocksList opts docx (Row cells) = map (cellToBlocks opts docx) cells -bodyPartToBlock :: ReaderOptions -> Docx -> BodyPart -> Block -bodyPartToBlock opts docx (Paragraph pPr parparts) = - Div (parStyleToDivAttr pPr) [Para (parPartsToInlines opts docx parparts)] -bodyPartToBlock opts docx@(Docx _ _ numbering _ _) (ListItem pPr numId lvl parparts) = +bodyPartToBlocks :: ReaderOptions -> Docx -> BodyPart -> [Block] +bodyPartToBlocks opts docx (Paragraph pPr parparts) = + case parPartsToInlines opts docx parparts of + [] -> + [] + _ -> + let parContents = parPartsToInlines opts docx parparts + trimmedContents = reverse $ dropWhile (Space ==) $ reverse $ dropWhile (Space ==) parContents + in + rebuild + (parStyleToContainers pPr) + [Para trimmedContents] +bodyPartToBlocks opts docx@(Docx _ _ numbering _ _) (ListItem pPr numId lvl parparts) = let kvs = case lookupLevel numId lvl numbering of Just (_, fmt, txt, Just start) -> [ ("level", lvl) @@ -317,12 +303,12 @@ bodyPartToBlock opts docx@(Docx _ _ numbering _ _) (ListItem pPr numId lvl parpa ] Nothing -> [] in - Div - ("", ["list-item"], kvs) - [bodyPartToBlock opts docx (Paragraph pPr parparts)] -bodyPartToBlock _ _ (Tbl _ _ _ []) = - Para [] -bodyPartToBlock opts docx (Tbl cap _ look (r:rs)) = + [Div + ("", ["list-item"], kvs) + (bodyPartToBlocks opts docx (Paragraph pPr parparts))] +bodyPartToBlocks _ _ (Tbl _ _ _ []) = + [Para []] +bodyPartToBlocks opts docx (Tbl cap _ look (r:rs)) = let caption = strToInlines cap (hdr, rows) = case firstRowFormatting look of True -> (Just r, rs) @@ -344,7 +330,8 @@ bodyPartToBlock opts docx (Tbl cap _ look (r:rs)) = alignments = take size (repeat AlignDefault) widths = take size (repeat 0) :: [Double] in - Table caption alignments widths hdrCells cells + [Table caption alignments widths hdrCells cells] + makeImagesSelfContained :: Docx -> Inline -> Inline makeImagesSelfContained (Docx _ _ _ _ media) i@(Image alt (uri, title)) = @@ -360,127 +347,19 @@ makeImagesSelfContained _ inline = inline bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block] bodyToBlocks opts docx (Body bps) = - bottomUp removeEmptyPars $ - map blockNormalize $ - bottomUp spanRemove $ - bottomUp divRemove $ map (makeHeaderAnchors) $ - bottomUp divCorrect $ - bottomUp divReduce $ - bottomUp divCorrectPreReduce $ bottomUp blocksToDefinitions $ blocksToBullets $ - map (bodyPartToBlock opts docx) bps + concatMap (bodyPartToBlocks opts docx) bps docxToBlocks :: ReaderOptions -> Docx -> [Block] docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) = bodyToBlocks opts d body -spanReduce :: [Inline] -> [Inline] -spanReduce [] = [] -spanReduce ((Span (id1, classes1, kvs1) ils1) : ils) - | (id1, classes1, kvs1) == ("", [], []) = ils1 ++ (spanReduce ils) -spanReduce (s1@(Span (id1, classes1, kvs1) ils1) : - s2@(Span (id2, classes2, kvs2) ils2) : - ils) = - let classes' = classes1 `intersect` classes2 - kvs' = kvs1 `intersect` kvs2 - classes1' = classes1 \\ classes' - kvs1' = kvs1 \\ kvs' - classes2' = classes2 \\ classes' - kvs2' = kvs2 \\ kvs' - in - case null classes' && null kvs' of - True -> s1 : (spanReduce (s2 : ils)) - False -> let attr' = ("", classes', kvs') - attr1' = (id1, classes1', kvs1') - attr2' = (id2, classes2', kvs2') - in - spanReduce (Span attr' [(Span attr1' ils1), (Span attr2' ils2)] : - ils) -spanReduce (il:ils) = il : (spanReduce ils) ilToCode :: Inline -> String ilToCode (Str s) = s ilToCode _ = "" -spanRemove' :: Inline -> [Inline] -spanRemove' s@(Span (ident, classes, _) []) - -- "_GoBack" is automatically inserted. We don't want to keep it. - | classes == ["anchor"] && not (ident `elem` dummyAnchors) = [s] -spanRemove' (Span (_, _, kvs) ils) = - case lookup "underline" kvs of - Just val -> [Span ("", [], [("underline", val)]) ils] - Nothing -> ils -spanRemove' il = [il] - -spanRemove :: [Inline] -> [Inline] -spanRemove = concatMap spanRemove' - -spanTrim' :: Inline -> [Inline] -spanTrim' il@(Span _ []) = [il] -spanTrim' il@(Span attr (il':[])) - | il' == Space = [Span attr [], Space] - | otherwise = [il] -spanTrim' (Span attr ils) - | head ils == Space && last ils == Space = - [Space, Span attr (init $ tail ils), Space] - | head ils == Space = [Space, Span attr (tail ils)] - | last ils == Space = [Span attr (init ils), Space] -spanTrim' il = [il] - -spanTrim :: [Inline] -> [Inline] -spanTrim = concatMap spanTrim' - -spanCorrect' :: Inline -> [Inline] -spanCorrect' (Span ("", [], []) ils) = ils -spanCorrect' (Span (ident, classes, kvs) ils) - | "emph" `elem` classes = - [Emph $ spanCorrect' $ Span (ident, (delete "emph" classes), kvs) ils] - | "strong" `elem` classes = - [Strong $ spanCorrect' $ Span (ident, (delete "strong" classes), kvs) ils] - | "smallcaps" `elem` classes = - [SmallCaps $ spanCorrect' $ Span (ident, (delete "smallcaps" classes), kvs) ils] - | "strike" `elem` classes = - [Strikeout $ spanCorrect' $ Span (ident, (delete "strike" classes), kvs) ils] - | "superscript" `elem` classes = - [Superscript $ spanCorrect' $ Span (ident, (delete "superscript" classes), kvs) ils] - | "subscript" `elem` classes = - [Subscript $ spanCorrect' $ Span (ident, (delete "subscript" classes), kvs) ils] - | (not . null) (codeSpans `intersect` classes) = - [Code (ident, (classes \\ codeSpans), kvs) (init $ unlines $ map ilToCode ils)] - | otherwise = - [Span (ident, classes, kvs) ils] -spanCorrect' il = [il] - -spanCorrect :: [Inline] -> [Inline] -spanCorrect = concatMap spanCorrect' - -removeEmptyPars :: [Block] -> [Block] -removeEmptyPars blks = filter (\b -> b /= (Para [])) blks - -divReduce :: [Block] -> [Block] -divReduce [] = [] -divReduce ((Div (id1, classes1, kvs1) blks1) : blks) - | (id1, classes1, kvs1) == ("", [], []) = blks1 ++ (divReduce blks) -divReduce (d1@(Div (id1, classes1, kvs1) blks1) : - d2@(Div (id2, classes2, kvs2) blks2) : - blks) = - let classes' = classes1 `intersect` classes2 - kvs' = kvs1 `intersect` kvs2 - classes1' = classes1 \\ classes' - kvs1' = kvs1 \\ kvs' - classes2' = classes2 \\ classes' - kvs2' = kvs2 \\ kvs' - in - case null classes' && null kvs' of - True -> d1 : (divReduce (d2 : blks)) - False -> let attr' = ("", classes', kvs') - attr1' = (id1, classes1', kvs1') - attr2' = (id2, classes2', kvs2') - in - divReduce (Div attr' [(Div attr1' blks1), (Div attr2' blks2)] : - blks) -divReduce (blk:blks) = blk : (divReduce blks) isHeaderClass :: String -> Maybe Int isHeaderClass s | "Heading" `isPrefixOf` s = @@ -490,27 +369,12 @@ isHeaderClass s | "Heading" `isPrefixOf` s = _ -> Nothing isHeaderClass _ = Nothing -findHeaderClass :: [String] -> Maybe Int -findHeaderClass ss = case mapMaybe id $ map isHeaderClass ss of - [] -> Nothing - n : _ -> Just n blksToInlines :: [Block] -> [Inline] blksToInlines (Para ils : _) = ils blksToInlines (Plain ils : _) = ils blksToInlines _ = [] -divCorrectPreReduce' :: Block -> [Block] -divCorrectPreReduce' (Div (ident, classes, kvs) blks) - | isJust $ findHeaderClass classes = - let n = fromJust $ findHeaderClass classes - in - [Header n (ident, delete ("Heading" ++ (show n)) classes, kvs) (blksToInlines blks)] - | otherwise = [Div (ident, classes, kvs) blks] -divCorrectPreReduce' blk = [blk] - -divCorrectPreReduce :: [Block] -> [Block] -divCorrectPreReduce = concatMap divCorrectPreReduce' blkToCode :: Block -> String blkToCode (Para []) = "" @@ -520,29 +384,3 @@ blkToCode (Para ((Span (_, classes, _) ils'): ils)) (init $ unlines $ map ilToCode ils') ++ (blkToCode (Para ils)) blkToCode _ = "" -divRemove' :: Block -> [Block] -divRemove' (Div (_, _, kvs) blks) = - case lookup "indent" kvs of - Just val -> [Div ("", [], [("indent", val)]) blks] - Nothing -> blks -divRemove' blk = [blk] - -divRemove :: [Block] -> [Block] -divRemove = concatMap divRemove' - -divCorrect' :: Block -> [Block] -divCorrect' b@(Div (ident, classes, kvs) blks) - | (not . null) (blockQuoteDivs `intersect` classes) = - [BlockQuote [Div (ident, classes \\ blockQuoteDivs, kvs) blks]] - | (not . null) (codeDivs `intersect` classes) = - [CodeBlock (ident, (classes \\ codeDivs), kvs) (init $ unlines $ map blkToCode blks)] - | otherwise = - case lookup "indent" kvs of - Just "0" -> [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks] - Just _ -> - [BlockQuote [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks]] - Nothing -> [b] -divCorrect' blk = [blk] - -divCorrect :: [Block] -> [Block] -divCorrect = concatMap divCorrect' From 8517a4f2e59dc2ecf20a96eedfa5b2cc47f0aeba Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal <jrosenthal@jhu.edu> Date: Mon, 23 Jun 2014 15:29:04 -0400 Subject: [PATCH 4/5] Add Reducible to cabal file. --- pandoc.cabal | 1 + 1 file changed, 1 insertion(+) diff --git a/pandoc.cabal b/pandoc.cabal index e35a76caf..634d249fe 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -327,6 +327,7 @@ Library Text.Pandoc.SelfContained, Text.Pandoc.Process Other-Modules: Text.Pandoc.Readers.Docx.Lists, + Text.Pandoc.Readers.Docx.Reducible, Text.Pandoc.Readers.Docx.Parse, Text.Pandoc.Writers.Shared, Text.Pandoc.Asciify, From 9b954fa855158d99b4ddba7c3ffe7f2fed7ce25f Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal <jrosenthal@jhu.edu> Date: Mon, 23 Jun 2014 15:40:34 -0400 Subject: [PATCH 5/5] Add test for correctly trimming spaces in formatting. This used to be fixed in the tree-walking. We need to make sure we're doing it right now. --- tests/Tests/Readers/Docx.hs | 4 ++++ tests/docx.trailing_spaces_in_formatting.docx | Bin 0 -> 12916 bytes tests/docx.trailing_spaces_in_formatting.native | 1 + 3 files changed, 5 insertions(+) create mode 100644 tests/docx.trailing_spaces_in_formatting.docx create mode 100644 tests/docx.trailing_spaces_in_formatting.native diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index ffb079eee..a42dc31e9 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -82,6 +82,10 @@ tests = [ testGroup "inlines" "normalizing inlines deep inside blocks" "docx.deep_normalize.docx" "docx.deep_normalize.native" + , testCompare + "move trailing spaces outside of formatting" + "docx.trailing_spaces_in_formatting.docx" + "docx.trailing_spaces_in_formatting.native" ] , testGroup "blocks" [ testCompare diff --git a/tests/docx.trailing_spaces_in_formatting.docx b/tests/docx.trailing_spaces_in_formatting.docx new file mode 100644 index 0000000000000000000000000000000000000000..ebe7404a93a6a0e2fb0695a04f857841b10b2a2e GIT binary patch literal 12916 zcmeHtWmH_*(sm<3gImx9cL{;u1P$&kK^u2(EV#Qn!JW{!LvRfa!JPoX-QnxZz4OjY zX4YNb|9ej_`s}s()YHZ8lBaf+yc7&9HUJKQ0000;0f-bqzCKU@K;$z301JQsttD(@ z?Pz4}sH^O1Yvk~O31nsYDhC#tCJO)!dH#RbfAJ2~#LLL}37`iaQ9Z&}slhC`vGm4Y zW?dA?l%2FN3D5O^1e)Ak91%-Om1O5@wnl$pJ-Qo4e&aVTnKTZ~OJ|`tBj?5dO>R{% z%wSY8*>{~N`oe!e#0(|R5~crr*HBo8;{kxY<ZW^l8LUem*C2Gwmu_wr$5^cj(JZZZ z8?yaquVFqe?Je1w$pEm{<;}ort5Ei@nT6SO#y3*Cg+mycDs;<Q6sgkb6R>^6Z#sc1 z+Uw=QLJ4UuV0qLE1ius2pdXvyj=IB?oD(`fQ!)<_W-80|#BI2V>InNqG*Ob-T8-;x z3bu=c)H79RGNaE!d0gW{Da22LsL%H>kB?~0m#_|2lxcchQDga?T}TKvN_O+=W=jv8 z{m^Ohl%(4peeKkC8dMHN)I0Bn+7<RgTzog#2$W8AIuS#jSi;{oVUdlsz^rKBnK)Pi zBQ+*~(k)KncU@082TQ(1C58lC@ccBUsv--)?2$F@BZp`u2Z@3+>t8t*9Ip`pfTt%I zfc#%Pmn4Ru{tRM{vJl5ahIp>7y^*B@Gt<xa|9R|xv77$sUk{I6kc1I@9(eR@MZe#Z zV*!b5Vs2SH+l6co#y@2r$^~^eZ@#TH_f?bpg3kBz?IF3DG{3Ydo<d%@3Vn^~tl2a@ zu@wuDRecz|yhguPs;>!(*a+Cn+Gd8CHzq!tH~e*J&U$sAAO$~}@f&cic0hlda#J*s z1fo(@%rN0acxh8x-?<u0YD&Sp28x-<kw9&q`di%@)DIj4ACZiYW`{y>`r-9$=9moV zEG2sFBg|<p7ziv`MkzC549bw+uA1dcUx+;EhD8Q2AZ8<mjGlQorNe0IpgGRvxb{`> zyd>3N!1Z*$6ZWfSHsy!pu)q6=2Tu|g8RU~j7yy6>fB@xUV-IBhm!kk}44teXiSB2H z``u}vAZZTr^ndqN7AGU;&+@M@Khcb>`XOSj3XgrR^z+FgqmrqAqGK=GV=GQI(qIle zlZ<g{+vuc3vu69W$~iPiM73|eT_+iMaH|*Bm(S8hj!DCx!itIBL=cP{U0%JBJ58({ z+eykG<EJ2ANuw??rEM2k;;(72PXN{z<I$Bu%4Y_VPscHMGsgl4Jm2{l`3;-KA%Ew% zp$)6E!s_=j;VF;swJ6NA0JHh7B8aRRzhKR0_J&<>SzjHvZs=V17P5+W*<tG$oW75H z6l{H=6HZ=>vznWI`rbl*s_$tr^;8@U6dr!_GIyuHRnYMQ`y&^1W+$?bjhou3wM-$= z$hPCy$b{-)NoJp#h2*-WY>3+I(vw?4>vtW0G=A+%)PJ%uvnw9oa7Ya1ga-gHAg1%1 zjs4lW@>FD^=2_5N)qZ$^J%V5dKhi*}TKP<q&}AQ9YUB4wFc?G`5h~v9@r>X(YLw3| zNf9l)u~|+#nVoEYS3=>gp8?C4o2xG!GDf6We1`vUQA4n(xXwFCOD`CP&xas(^!?L2 znNo^pZAwD!zUcFm32SdOK{l~8_ATh4joGn_zJ9}KTtx^~()JiCdXuI^QR}y(>Z_ex z;a3^Ed=51MO53J-{=Mq>>OC!lzF*0E_MJ`Of~}L22sI?V1op3+{Cx12G%{-Z;rOyw zW7+)Yv(LaLbUMKcAg1}}QyAp*lAC~eoCzYM7CaP6T;lu(3$n|S{e8MOsp&m?l)8$3 z0Y*59>j)sYD<Rg2UWo)55+Tnh70*b{P+hD^RSJP~BBtzPui?lL1H~(rBdM=u_Lb!P z_H9Ru`_giK{ZZL`J62IR1ZOD2>w8USviXf1Q0cXYh8^KqA}5=q-}L4RMBY!E3U!9$ zIkYgmjV9?d6;D_-ihE2S=hR`UpsJ^zXDqM0SO?X%;4|VqtP24x)o$Z50ET;`P57!= zt-{el<~3NuHx;t11YZj=G~d3zU`*ib;$(fJ%Bg?lJM!t+{?!iw-+Y_>6@V_~Y3!@7 z0b7+72(Bi1Sfmd77@e%l3?~7vYAHB8sPtgKC@vehCNqZSz)-~JT+`C}&>vU2IC|?z zMYGu=4ipPpjmU@mt*9GxwH6zTEIYb3qrAr3I<J7dnN#}6_w7wRPuw{JUSRK?j@8?| z*OjcyV2>q7m}Nii5viDreO%vs=%}Hu4S}NomX{A~D>rWQfgAFH8{@d|8m1VIADZe} zO_}m>Eq1x8?(IJu8MF~DU8I<zGJkfGTfTn@G0eY*N0udF+9EgrAde6L!1>*D9Zij_ zjF^9WfBiE!?rX@|EWE{TN1Jtd<!EC_?Mwjg6gpd(#wbJBZ5>2n7LqtCpCd3K;Ym2Z z;SJT*q#uMcYZaug<%2t$f5$e*ZAm;<0Txq??NQf>eLEXk&QF1PvvK!C^0K(}+hN~x zVoXw6xp|!j`>(<jC$0i<;DaTSOXhM{ak_Na<P-mzR@#{5+K8)_=Fo2{A_l&Co@C$b z7Q*5sza=H+p>?V2e5aMrvmO==$7u<*rA5C+*y*)2YgE<x5c*n%H-7a+|1v>TvD}z^ z-1%Gkbw0R?kXW}bCI(p&SSA4-aPUqM(Moeh(8LBam)>$;q5?PDPSLv*J46;rQ$E$# z*f}9qn1m&3g6nEN+kBjBvJ|uhTY^Gum08NK2+QfUo_WIPnUj3myJ%{46C4GAkNgT$ z6Is8$_bOi=FiRA;w99m^pDAP4_6T_#Odl?nEu6G8s#4E^1HL45xUQWyH*}cmAdK$e z*glRJF|Kd^v5L~auC{&rKwmx`7{-wmJie)iT24HFWHN=^pIm()c}?);n>oHmpsy)S zj#TxFGZDtn+7~MuIcNYBp!zs$v6*D=Y!J=iI*5X1=J_#qM?OB!=1?Q)S7Zs@G3DA; zG^OYGuXC?{++0;;z7y?3%Orkxy?edZwE_Q5Q)r~bD&2oeg}MD<f3)vq?z_y7XQ$(M z(;$uBr`s;-_WOHRcJv7os*y0}PxqJm17Q{X4-d^^IL9(RVn)}|y&^ejn+OMIFR<UJ z+EG%I+qL0zAxxfzex-=#>`0Y*_K+D$bpDmrIK_pCi@Upal^AL(7YXEPKZ*fD4++V^ zNq5Pag*vn!;nr5x)E7IWOL66t=rsRspxAq0=%L=z9ER1`-YS9Q^af2z?8I7x==IeL zS!`4ZA?yi<d+%tZgG0q1vrb93zEb?EJ4~X)()2Q{dF0fo#$k14&p)Q9?mmBXwybuC zcj7Faa0?@967NSErt^P^`91lBJClY;Erw`4IRWJ(Olgs1e~0q5k+vM&v7`74u~u(! zjoR;t08c5|l@9qzx}~<Eg32)LP!i?w79TEV^7?B*j!rwx0OBzUI>UF-@aQ=&4(wE( zt!!Bb?zkMo$l^BLp;AUR%T>VDo9zU%A*1uX!N6&2k!`?g_oF2=G>}BWb}CfuW4meY z{7R#2J+!ut#1TJ;r~B@6^qm#sTpHM2Wo!5{M5q<PrHa?35n*RRH>3WTVval-{Y-1B zdHwz~r~A{*dJD}b<~EAa5*BRJrLfue3pGn8ouA#M@d=A)$vN`OS}6DV1Yg2ka67D1 z^w2UytSXqWve?>Ye=$bE)b<onVGFiJl5t;}6cI{^L8X*z^zX{S|D0ETGImYxK4tks zIJle$v-AVW>>3VV`X_cI*7qf2M5Ye!=~KUGq?Bt91p8W7bd`M$;C@rzgz*(yG)ODE z>TM-gyx#!wv>q5t#m0`HNeVutOTS^a;M~`H4{hbLm%GT=i;TBRhQH<w<ezXGy(_X# zkMA2yC(}eDe15wM3Z0f*)J)wcwB-ql8{8*Ap50n@$70^X;61~X=ShiVprxjk8dF%X zU{6XTIklU=?6s8*?Bje^A+;oCfZJ&lSK*IxrxZ5_yctd_*M7}`_`~)>|A4;`tOTz} zI*pqcgM#uVApju7Mos@@k2UP3fTXOPTP;yp1=n$0Apzy=_(7`IaJ{suT8ez$`I4PJ z-y<jZg-8bmL1oBn-|9!=k}Np^N^O_q&@A0mYgw%Y>Bb%|sixD2<c>MJ;RytjE!9HP z%mG|$Bt=p`SNah6CY3?!c%F7Tsx4;m1p60HqsrrVh>QGXu=6nd+9=53js-P*Ov>Ha zNck8SchCgzN<6PLS3lwvkQS}_&fnv0%0Ja}nV2Ql1PbTP7m{LPUy|b>3SDL~r0nnn zsPio}Ge{(2uwtlSsuig2V{lPe7Fn#8EHCh33l^oB$0k7e@!)$Mx+e<e%5!jSc{tE% zR9?*!T^I7aOdHOM*9wvmj$3F)uQWw7ZZ7#?leq0}qk+Nq+M=4kRTWS@p*l7=@yIU9 zxvgqJKa<`&^i`Ylc)g{UMV(EAc9E3y0sitGoriI|^A3tr|5bJ!*)=FTL~vF>S+^<z ze&X2#YGjsOmEYTBHd>4MGN%dEA&x!dLr3H`>hN+(p0M^&v;7<{hdHiBP)zR#!~X1i zBqJL;=n@#(ImY>}|3zQ&C~%8e!^07csP);(b1G>GEa~G6NxhwHcq+74H;iJ{jvLJX znA*H9Q5vEktiK6z%=Le)%)!Xf(ahS!;b&T_QC+ZJ;6(S-&3byZ(-=wdO#+!za-8bY z+7F~vHi4FvS2>Jg^NN=K@Y_)|b}lfEF_iHZXwGC}9VGmzy?vX3%loZzC>^8hDI3)I zU}R{UWc#u;?$JcIIvICoo;Mwzuf&RO%k6;&1B!Z>F_j>KzT~p%?y{zeaf;d1mk_QP zBv?8ZMF)cur7L{uTWk3+ReMneE1KNPWX7|5h73Jm0;*;_R=4k@sua1)Ok=!#eJ-Q) zyK&(s@x@hF$^{=kFuy^8k3OnmGM@@n&dF0)Amt%>|C+lf5oegQVT{+PJ)KK@ll$>? z%z3)sMAb7xTY>Zdd#>ztr~-997&p@k#5!)|$~W)<hoXma4&MX$&b;*J^+P#o#zlf& zlS66awlKcC5Hg=&p_o=Ln?MFmanQQ5CL%e82@7%O@H!wdygxLTV#u1O(HlOiHWEO$ zWN??^WG$8S>%MaYlfzWk=M|SLn>xNqK3gy%d9}bdo>!Y?A*w0T9N%d^kn@51DgF{u zqogt<E@-<<%7g{1u@aq2RECB#s~vRbjo@orMkh`kX=4P>G$}Y~5^*=$%LD((X}@d8 zhhkY*hUbeS{?ui^o?p+@IiUyo(uE#FkJd)YBNjgIVMjatvo$iW-i43u%3vr1c*_N5 zr0>+#`pQ<tGm2iA#P(Yzeny7)ZhO69L*O?j$4T*TBS}X&_Ue0X>CQV-zNTK>I$l_x z#Ts9B&|tk~UTV6|VIJukKw((ZpH1@TNR0*V;Ub-eB}KH|iHlBgcq8MGSp3Ki8SEjW zc}^W}6}_ggW;Qw$N#OL12W_ZM<m$~KFk3f%-Ky#17yEjw^CL}EIuSy41%33_+Lf9z zAmv%Y!Q92?GQ_R0OaWpgf{vfi=An!o+<Pb1wbl_mg+*T8p%|RhI>W*k?86I7Bs|LC ze$!3oh>YEx=Mw}|O0F%vZ!Taii68%HgS>5gQrcD6@5T0DKf&0F+MYh*6+MF!HZ$3Q z?@z8N6QprTHMl&?s$J7z%|0ZXyYHBoA&So*VA$9fmS;D#v{n}^M!h1@yNR&$Qb9d` zR3sbGSPpx&XbDr~?a%~rLAhzHFyd8%L3S>#E=ZI|_RwzQ`_P+hevLQn8=14*8E;hO z@+8C<MzW44-Pgk>yAvMRk}CJJ^83eb`_o*vGyS{q+3Vl}_U0rbDDP0N=gCLCxS1aR zv2Mr9!j*-Fd{PGa!~UI^x)>QK{mFsD$2ufoUZcaUHm~w9G}g~0kX1R>zfwFQ^SuR_ zIXe>$13RV7%j9h4OJ3=?4N;^-rFb>o3h1`NgyoJE8|#!J`Q~>76f0FQ@6AP&P^7TD zkb~l`g;ywjJT*vJc%Eh=UVI;4LL;bIlQCa_tySJ*4HYp=D?}4T{yJ4zGLwzq<3djg zWjIx1#2M4Ug~|urQk%e01yiDo0G=uy|LDkv)DFBDU&{~iFBw1O8m*JEYM|5JWxaDX z)xI`L?RkfgVT4IA*}{-3hqVi%GM{z}5~`r9Iq1R;{@R-a>rgH484_h+zr(LD<1rNf zO#S8Oh^zQdBu7%&Prya5f7~-CXsE|NNDZpb2LKTKzGn`Ou9ij)YG#h6qQ=HXhK~OV zeQ8!okkHq+vNG%mAGIPZp0zp1b|Ak4!(kQmmgP#b5+|qB>gHZxmH(lfLhH4&<x%fp zDQ4gN%4g>bUBP^gqs-1PuA35cz#7}aKI+e1<Z`!UZt4wJE-#}VZ8O*mV@9FzPXg|S zm9CZ+R=nJ+Tg9uly$exa1W$xn?K3XQB=+FNN&K(^^-pZbgF>XYxOpytoH2W~$Jao5 zEH74`l+2w^>mqKvGCs~dIbw-T0gYfYnO<J#8BHqEp$WqjUTLAk`{508FiEv^naHau z6R@YfPHGG?hg?orXEym>G=aT75#1xL7)1olrR~>#i`^~KC{~$t3SPiVV7#?R3-X4% z1^Rk{<8CE=a32fJ%0lmCHQZ=m_&|qKXI*q?Mn_;HU))-npuXOB!7Z1h>874t2{jhc z_wk5(Fuegu!xM4QA^oXbmgXgK$b0(Yo?enb5-k^l*TeHN=ach_L$RxK1C2Va(_@OU zKD`qktks7<P@gkdlaQr=RRAPbg(i_D;#Z8~0nDuyZN{$A`9BQp)39HL$=!a6VjWa8 zaqm!+3+9PN2V)|!T;H%t1|YB4CK@Ld>rFGi!ycWwBD^rTkm8w;st)bA>cl$~5uAL| zn<YoaJ+xHiZ&Z|R0pWsovORipbMK6%Qv&YLcy7;B`80i&0)}F9xAIxvj>jZEsggzN zcB6f&p?`g*cjALzH^RAtj*!AifRL~{M<nvq<2t7F#>kZlIruFAT%vKUp5_=$jb56M zVRfBY(Jp>OeU*goQEAJLOtQSPNF_v-m4|$FVbS9sBt;TB+IiLZ1EZ(<&XnBa@hVX4 ziuTH7wB2h8>4W+?ki=;9@kSd@DEjtgCNBwnKI|lhlEh;HoOie&HyzP|Jd^WsGCX_9 z$7`FA^i-n5d`nKH4`0V0U3?v<ZuG{aMU_zm7ASArxEH(UY_Tc=)5dyuESbJ{wR*P> zkA)}C7b#Qn7pI&{pQvoFojZvQlwd^A{U9EvNnOT>AlQX0LLdvI9pZ6u&{6RFkM3e@ z6LVahhMEmOR2)l8U~0ifBDeyj`wC6F0>$*25e`F`m(gI*M$xjSOS2S1(hXf#W69RJ zZKbwxh%(PeCce*MS>j<w>OE;V2LlfH^$`q8`YliGogF_Vv@@8mNUh+S;Pr0Az%s5g zAxo4EaP_RaaYK$O*Z*WCubQnnTxT1u;35n(3q-Yit}=8GkWnw~b|?MfPFJL4JF7bF zQ@#I$2~k4VSz?nV$78}B8QM^>;QsrOh)k>rZDCWq=E5E6{G!Gtw_JI|FaCP-&-h@i zUaqy9F*)AR^}TqKq30gs@AR5%K&`3R%jIbY<~IhmmvI(tu~Szp(c||6GvK!fb`pZP zT%T#dGQpmb)m<u(y{4{M#XH^Ab|bm}9$2ho&*aQ4nH4>$Pvcg(W=Kk%Pvx#8B(e!R zt^NkuB6$W=066~8@iO3+hou(ZG@;J@>++VzJS)tYue|%tQRDFK)mSy|*DghCvTFy4 zL1LnV8lx?JGf}>jk7E5N;9fBR%APmu7+6&tP&*}dm)UnI!V3gBP=NcZESr;=cxqTj z2L>$#Yj9Z<S48&PHC1ig8QJcre1&A?t8K_=HutC8QV!A(W=uV?I?Q(o4!BCp=_qOJ zSIT_%WdRuDoa+&Q2=Zlwv7ZYRyrYl(p!Ep7nA#3=bCnj1R}V;AYM#g?p&Za6GjO@i zJuBgZZH48IPxd^%Aaa^bSoR)5K~j@KnHCI&2c&#Va|U`Rgqgo!{s-_XUUm6@eTgXH zWoW(5MZ0zj2DZI#>Uu<0bhQ$ea<haexF4~kvJ6D7ev2dd8k^z6VtiIFhK-2GMlc67 zTO^2F;J1Z#%N^-P2#2re3|wik%E^20{!=}{-|q8ZwU!Hm`>mglSX0<GumR~AsacvC z!v6={qyrbm0iCDIg)I_dN_6sG<o_F%;O1A@llT9Eog!8LsaGDd0~n&>Z32vd8;-)n zPhc_vu@;Q1we=-5d^yObRlQZ0DgU{<iSWPgveO9eNLWr(p{!OYz<nf(ciJ0nL^Ia^ z87|c0szv6P9-(C~Zs03$c(i~eEt+NUQU3kzDys9RI&1_2N>!ieTk)%E{(PmzU|a5^ z1_|4%ZwY5}+TLd2lav>yn<$GUB|3N$-{PivNRQvRN3IRZYG*R2&8+%HH_fYPLrr`W zvRM_9w<{MzQN8k574<AG0hK+i)Ma&!3QWPI(b&K+pjnGJWre~VT|STHKh%M*PY?YT zx&IlGGjIzxT-<5Smc!npW_&dHK>-RHz0Ct{6LH<*#7!6Y*>XuWPi>RtS>hzq8y|Pc zYzq4?tUD)46g+i@(_TM@!gr_@Fy|SuT6?jt?hSkJ@Qo9Ljm;^mr2`K(usj8{-K$0c z@xs&iX9~~8b*L3cK7$mAIBXatrn$~#YLY0;Mw;s4p`8m{+7jO7+dM&%{NK;=4su~W zOG1c&C*&v^1Jde%oIuOl+t@lV8`{_#{VaGOO~L>5Hz2$yJXS%j>lJpuip)J4$`PZ{ z2V6?4q@uYvDd}{(lZY=L#2DfMpBLJb>a-4W(D-#m)|$6<`jf;|#NffQZ<_=tBots% zIfQ<c2-u{lcm$Dcr{I=ZvtjSRqB0IGq@@!M5+XC#z1C}m#d{X(9G?b#(8_^r*msqc zEafW<Z}&~T$AiWy0A=G-88old<TKUP6urzf)E8+n!6V`wVtF@JPH5;v!YD{JF3A+I zPar<^t;d!zdaT0OM+lwWZcwUQ_2X=cl8x-Gvx<AWXXt+qYx8Zi;-RQ=YUwj)7As!5 zJ@PO|9qfl5=<v~#O|X{U*L2;W4@VTiL6hBAT6_Drs8#NTv6~>xK%ECuVPSLy1gW5L zra|}1>l%%0`@~q+bf$HUI?^k)A=A{4j+LJeUDkqNoKUa4*6&k6Q1PDYduU1@A$9fN z4N%UF=#?16G;O&70JOjI&)@3pG#Q75w~)3{#;(`u@nL_gcIk=80H^FkLtLILP;L|* zQNmQ*moj_yu;R!-aGcF7wwcPI>e|N>JloS8{k+A^0Z(ihEoDoRTFUEoefA27xg$@x zkniR2u(so}G87>$gt05a^x&RyXQT6UJ5;(*y|^gGdi6yh_ES&C#0q{~rVN!UIIgl^ zcSo7(*fzIX{YArYU7R{_<!V<UYeV@(t!fPGRovXkNwIG70FCOoaW3?YEc~5172JXx zE546t#wJg~cX*3>Y>Rqs<Vk6G*3ph#PWwxP?AVFn^E`bTRlx#>*4NBmQbP8|yHkC? zPa581i?!jwn<OoWm>3XutS-sQ-G5`T&qiGyh?o_ls!y6zQPsv(yWaMbl|Ed`4B?gk zK@f=jyc@~VlUL4us|$<P-XW7-!SlMU!Q|7?e%N%7(a<Xn96uYCqWh45?@xl&FUd4- ztLN560uH)!-DAY-g4|iTnlw~+s?VP=#JqIov@F2Q=c3Lv-1WUwJG*N(JsamSPqqOc zBt!uxfHww)6r0cTLf8pt(*moX<%Kt2*VPNtU%tmfXZy+d8p=?(^N=Y<2e(*w6Vl|k zubH|MeBh;CYS^BAKc!RR_T&9<%dD$OwWvLy?y0O#a}ZB^xJ1E&GuZCJ+BjfzZTqwb zKm62bkaZQ}bNGpg1tW*o!wau}%9trx)*s~*ZvkKr->I;s7j*d}o8$0(L@37-->X8V zKsiNc#aW3%{U;v5or{kwrpA0F+}_R_o*8HycC3rtHeE7$r>KNsJ;yrUHE0pRLlKgy z6Mp87tFaZ_sAoyUv=&-yRYmop<YsPKZ1#DO1tny`ngv;`wlWf&99WGbn3#kKWZ6zv zn?~jerp|sF6q%?y!p{0h`6MG;C|N7NiZq>BT9yB#d}?>K*-kiH=Qjq8tSPc}t>n<t zbFxb&7N>WdOCj1FLd7OKCNM#d^GqHs?^tmzb3Qr@d^<+OqmoMmmx{yM7v{<vFW0s> zU>Mar<+LVf8J#;@>#4shJ{SR&5+^R|%$f~l)kpbPKS^wH!ezwm*7PLsZ{)qmtlKnd zJ{e_IzPn~&^Plc7o^*JhY=N+EaD|>Ri_UJMR)O~xfQ_f!xZ3P}iTL2s6JWE_n>^vA z7Mf}HY0OKlA=B*TSuRhTWxgx?iWc9*xh8n~Fd)*gZcZ$SYm6QZ`SLjv1{>aNTtkf< zX2>&lxOcH~N4yBDX@rz=bCsYm1-JlWRI!eM_g4YL@u~YV_#~Au%7$NGl<8{@0V+kW z((beOM-3JYyy-rCW_K?<YcYb&f{mFO)4iHMugErGOcl%5raE?zVB#aNT4p^fsv-NN z-D=nkQeQLl4pb3<GgYSrdTXBMdVW=!5#a`c?113eDs9!*(mYkH%(H{qz934yye0{C zuiD||OKsB`8R!7wEaN2#`PuQ}>8dJgO!eGp8R*h{^P0Y*l47F5S*L0wIyGq6C<uLm zhVUFJ@81>;pex~CEF0*sa+Lf6)t)p!ry8g3g%pIBA-<}6%UMhR@eTJBU(LkBmNV=L zmHaG}c_@cZXQXTz2uuAMi5kniOz{UonO%1p077gKv;MkZT7HN==}!A%U?E%#p=DIw zv#0XDCF?SPHF_c|kYbD7%#-7vL1pBJMCp8bP~{_3Am0*DVH=yRr*H3&{u7!6LL^}! zELG+wU4<R9Jvj^tr(TA|)gqMq$)KRR6N507acrQ2%TeS)Ib?f3F)hO)@d(KL#2+t4 zG%t35#pMxTSEYr>d>_(-D07^!?aTQa>>qm)80PX0$a_Kso}HKf$$%l7z#E@c0>Too z{7vD=`(G56`uQwWNu^{lIr!&>B49x%AC|!N;2QzXFi!nHH2+TuJAY!_$D_27%c!G( za{APvuLgt9X@Q7|qt1CPV-1AcE(1OtM*Dp63wmxK<tG2Zqx8Zhs;<&~IrbGWUQ<kR zBk|CEF<*%*m_^qyouc7!uk&sP^$Xj)z9iqWD@<WfNngM*D3*^C_JQq+mS|1_jc2Hw zo%0ni${9z@`#64)k_dq=@?y64G99fYIH)9a;o57#7wMBvFiKjH9()-<_9x>*Iocwj zSp3QO_Cxz3d5H!%9jj-FpTL3kM||_Sb8|L^F1{}3_%5L?kugCP4MlHx5Q3_~Ql5PN z(dFbW-Ou^8sYpQx+ps6?km$CSVOy|z6A+#=3XPVBpHbINFc4uUSJ<ZxWl=CQ|AM%_ ze$>{F^Kz3GCK`2mq(N#=#66SZ4#fC|b8Y(dciozDA`s?*on%w%XU1Xy;@zM&V*ag) ze<pp(Vl6dVKL;}*c^C8doNEl3;8fN(u>6hnEylN6FR)+-TtR!n`?xbmsy+nu`)nwS z1_4bc;j_#J$>?ICCA3a{>}T-QM9Djv`Y^c?!f2c!wcb{GPpR%@WthYGdNR3s+}s>z zW-bL!X}q>-h$TJce!(TJ$i8ilk>m1hd;q}Ji^)Y@9LpeLVLUc>eN#Q@flY4~0dtp+ zk4TWyJ#V?$6i6kFKB<5oCm)`G+uf<wYWr;bC}|E+g^WCL6f}4+fARV-d;a~)jhyvl zCFkrNMHaK6htI~eoOEWGnJF1sAH%gipYu+JvKp=qla=4Yu2of@`0|l=6sx}Pse=i! zD~(+2M5=RdxTjFuMBsL7jh2klK-#xw7T6$2kBg$fcqJ<_Xp))~5H4<%3}1^QA?)u- zw1#)guncvGE4PV@%0@cRvcS-tK!nvXc_GWr#Zf>9T$}(FRs??+6;>HIeukZB94LG~ z(e>g^!Jn=NSpm=qoTuSuQ)vynNVU!tu+OTTDAZm@k$NfrrEpp0a#wYHj@<5Mrg1Zt zsst#g=>E*xS?1lMr)$IA_eHOJ+oxRz&mule{=+rW=u@A00EoDNol{(@h->l`@Hu$} zd;_K06GKw@L#E(ihm5;ofUS)839W1ujyGYN9dDRZjlpG^Mi40FhD7o~4<nw|e8UIr zbS&Q{rOt3t36R33;p63rB<Y0?YR80-%ED7uMK>MVG~3&>b4!ku`m~n|3kwI|Ewlnh zhMr&SJ+EDAcp?>6BGWsLPTa>%DXvli7DaTm507jtC`*$Nv6-1$W8E9DXFM98PLcPA zdqE8D@AMBJ`krzIQh!WB$RYZl>koZf+y9jx5a0awm9EqQameSbXtVELv~k#1Wf4Ma z^h6u%=xf1rTaBhmQ+EO-qLp*!zC~3Ls?9m4%0)%7U7U-bq$TO#!Q5t*6%TFY&>^!; z)D05-IL*GXzbIx9f}QlOgWEgx&u+muS~<z6NA*;Gfn=^P^nt|Ck+9d7*8ojl&`daG z<d%di;PH#)H`<VTRsJheGA)&9_O7Jhf}YF4uP8&s8^*y;3pKT@yU0;r2V(R(2=_Ar zgV)_-bGgzGmuC^06!=0Z#~r@7*nWv_Ks=`2TX^-ED-Ezh(%}~l%(y;|sb<*xlrbqO zL?PG1LD`B@7uNFtx+8lS(a`A}h~XPFsaTg#PTwMZDjB9Qgr*qk>hR-fGfyLLOUkm7 z`i4uq<DK4$agRzG;RqkPc}i*x7e9VWR;0@|rJ`}~?LAP|MD&H8B}Q7xNQuSZK<GS2 zK|#r8Q04^Ttcs}Ai8b0BQ5%=aenKEb%4q{}tt~gL*DLtLcLOn$xG8B_xa~KOJ59B% zFb+9gCL(Z8&(S_t)|)Bl@U>7>XTK{3E%HROrC-p!^h#WHpU1E?%iMOhfo&(RZ<RHf z_>l}#x1HLKWtL4+$U&F5Cn~zqAZQZl?D&y)odfc#{>vAjpcx^pihmxf|NXcB{r(R_ z_VQAH1^DZ9@9%;??@bT~`_ruNuY!LadHl2BGGx-{KMg?siuCJX+aFj$kb&gCjk*0Q z{_AkjAL0+l{}%ss)aX};Uz@mpK+Hn2-ESKFxuyH7_^(~0Kg1Jp|1JJYPw7{LUwbNl zAPnODuE0NaSboL%>*35FC;-3!WHRe-M>W5S|5{rA0U$~C7d3tc+Fyl#<p_TWx6%Gv s_%Dp%SNXpN_CKTn011Xa>HhC9FE8~BVrBrqbI1!3VyP-W+w*|`2magm<^TWy literal 0 HcmV?d00001 diff --git a/tests/docx.trailing_spaces_in_formatting.native b/tests/docx.trailing_spaces_in_formatting.native new file mode 100644 index 000000000..46ea9bca8 --- /dev/null +++ b/tests/docx.trailing_spaces_in_formatting.native @@ -0,0 +1 @@ +[Para [Str "Turn",Space,Str "my",Space,Emph [Str "formatting"],Space,Str "off",Space,Str "after",Space,Str "the",Space,Str "spaces."]]