Docx reader: don't strip out empty paragraphs.

We now have the `--strip-empty-paragraphs` option for that,
if you want it.  Closes #2252.

Updated docx reader tests.

We use stripEmptyParagraphs to avoid changing too
many tests.  We should add new tests for empty paragraphs.
This commit is contained in:
John MacFarlane 2017-12-02 15:31:53 -08:00
parent f4b86a1bc2
commit d6c58eb836
5 changed files with 39 additions and 19 deletions

View file

@ -534,9 +534,7 @@ bodyPartToBlocks (Paragraph pPr parparts)
then do modify $ \s -> s { docxDropCap = ils' }
return mempty
else do modify $ \s -> s { docxDropCap = mempty }
return $ case isNull ils' of
True -> mempty
_ -> parStyleToTransform pPr $ para ils'
return $ parStyleToTransform pPr $ para ils'
bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do
let
kvs = case levelInfo of

View file

@ -10,6 +10,7 @@ import Test.Tasty
import Test.Tasty.HUnit
import Tests.Helpers
import Text.Pandoc
import Text.Pandoc.Shared (stripEmptyParagraphs)
import qualified Text.Pandoc.Class as P
import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory)
import Text.Pandoc.UTF8 as UTF8
@ -37,20 +38,23 @@ instance ToString NoNormPandoc where
instance ToPandoc NoNormPandoc where
toPandoc = unNoNorm
compareOutput :: ReaderOptions
-> FilePath
-> FilePath
-> IO (NoNormPandoc, NoNormPandoc)
compareOutput opts docxFile nativeFile = do
compareOutput :: Bool
-> ReaderOptions
-> FilePath
-> FilePath
-> IO (NoNormPandoc, NoNormPandoc)
compareOutput strip opts docxFile nativeFile = do
df <- B.readFile docxFile
nf <- UTF8.toText <$> BS.readFile nativeFile
p <- runIOorExplode $ readDocx opts df
df' <- runIOorExplode $ readNative def nf
return $ (noNorm p, noNorm df')
return $ (noNorm (if strip
then stripEmptyParagraphs p
else p), noNorm df')
testCompareWithOptsIO :: ReaderOptions -> String -> FilePath -> FilePath -> IO TestTree
testCompareWithOptsIO opts name docxFile nativeFile = do
(dp, np) <- compareOutput opts docxFile nativeFile
(dp, np) <- compareOutput True opts docxFile nativeFile
return $ test id name (dp, np)
testCompareWithOpts :: ReaderOptions -> String -> FilePath -> FilePath -> TestTree
@ -71,6 +75,11 @@ testForWarningsWithOpts :: ReaderOptions -> String -> FilePath -> [String] -> Te
testForWarningsWithOpts opts name docxFile expected =
unsafePerformIO $ testForWarningsWithOptsIO opts name docxFile expected
testCompareNoStrip :: String -> FilePath -> FilePath -> TestTree
testCompareNoStrip name docxFile nativeFile = unsafePerformIO $ do
(dp, np) <- compareOutput False defopts docxFile nativeFile
return $ test id name (dp, np)
-- testForWarnings :: String -> FilePath -> [String] -> TestTree
-- testForWarnings = testForWarningsWithOpts defopts
@ -257,6 +266,10 @@ tests = [ testGroup "inlines"
"dropcap paragraphs"
"docx/drop_cap.docx"
"docx/drop_cap.native"
, testCompareNoStrip
"empty paragraphs without stripping"
"docx/drop_cap.docx"
"docx/drop_cap_nostrip.native"
]
, testGroup "track changes"
[ testCompare

View file

@ -1,15 +1,15 @@
[Table [] [AlignDefault] [0.0]
[[]]
[[[]]
[[[Plain []]]
,[[Plain [Str "User\8217s",Space,Str "Guide"]]]
,[[]]
,[[]]
,[[]]
,[[Plain []]]
,[[Plain []]]
,[[Plain []]]
,[[Plain [Str "11",Space,Str "August",Space,Str "2017"]]]
,[[]]
,[[]]
,[[]]
,[[]]]
,[[Plain []]]
,[[Plain []]]
,[[Plain []]]
,[[Plain []]]]
,Para [Str "CONTENTS"]
,Para [Strong [Str "Section",Space,Str "Page"]]
,Para [Str "FIGURES",Space,Str "iv"]

View file

@ -1,4 +1,4 @@
[Para [Str "I",Space,Str "want",Space,Span ("",["comment-start"],[("id","0"),("author","Jesse Rosenthal"),("date","2016-05-09T16:13:00Z")]) [Str "I",Space,Str "left",Space,Str "a",Space,Str "comment."],Str "some",Space,Str "text",Space,Str "to",Space,Str "have",Space,Str "a",Space,Str "comment",Space,Span ("",["comment-end"],[("id","0")]) [],Str "on",Space,Str "it."]
,Para [Str "This",Space,Str "is",Space,Span ("",["comment-start"],[("id","1"),("author","Jesse Rosenthal"),("date","2016-05-09T16:13:00Z")]) [Str "A",Space,Str "comment",Space,Str "across",Space,Str "paragraphs."],Str "a",Space,Str "new",Space,Str "paragraph."]
,Para [Str "And",Space,Str "so",Span ("",["comment-end"],[("id","1")]) [],Space,Str "is",Space,Str "this."]
,Para [Str "One",Space,Span ("",["comment-start"],[("id","2"),("author","Jesse Rosenthal"),("date","2016-05-09T16:14:00Z")]) [Str "This",Space,Str "one",Space,Str "has",Space,Str "multiple",Space,Str "paragraphs.",Space,Str "\182",Space,Str "See?"],Str "more",Span ("",["comment-end"],[("id","2")]) [],Str ".",Space,Str "And",Space,Str "this",Space,Str "is",Space,Str "one",Space,Str "with",Space,Str "a",Space,Span ("",["comment-start"],[("id","3"),("author","Jesse Rosenthal"),("date","2016-06-22T14:35:00Z")]) [Str "Do",Space,Str "something."],Span ("",["comment-start"],[("id","4"),("author","Jesse Rosenthal"),("date","2016-06-22T14:36:00Z")]) [Str "Do",Space,Str "something",Space,Str "else."],Str "comment",Space,Str "in",Space,Str "a",Space,Str "comment",Span ("",["comment-end"],[("id","3")]) [Span ("",["comment-end"],[("id","4")]) []],Str "."]]
,Para [Str "One",Space,Span ("",["comment-start"],[("id","2"),("author","Jesse Rosenthal"),("date","2016-05-09T16:14:00Z")]) [Str "This",Space,Str "one",Space,Str "has",Space,Str "multiple",Space,Str "paragraphs.",Space,Str "\182",Space,Str "\182",Space,Str "See?"],Str "more",Span ("",["comment-end"],[("id","2")]) [],Str ".",Space,Str "And",Space,Str "this",Space,Str "is",Space,Str "one",Space,Str "with",Space,Str "a",Space,Span ("",["comment-start"],[("id","3"),("author","Jesse Rosenthal"),("date","2016-06-22T14:35:00Z")]) [Str "Do",Space,Str "something."],Span ("",["comment-start"],[("id","4"),("author","Jesse Rosenthal"),("date","2016-06-22T14:36:00Z")]) [Str "Do",Space,Str "something",Space,Str "else."],Str "comment",Space,Str "in",Space,Str "a",Space,Str "comment",Span ("",["comment-end"],[("id","3")]) [Span ("",["comment-end"],[("id","4")]) []],Str "."]]

View file

@ -0,0 +1,9 @@
[Para [Str "Drop",Space,Str "cap."]
,Para []
,Para [Str "Next",Space,Str "paragraph."]
,Para []
,Para [Str "Drop",Space,Str "cap",Space,Str "in",Space,Str "margin."]
,Para []
,Para []
,Para []
,Para [Str "Drop",Space,Str "cap",Space,Str "(not",Space,Str "really)."]]