2014-06-17 07:44:40 +02:00
|
|
|
module Tests.Readers.Docx (tests) where
|
2014-06-15 20:55:17 +02:00
|
|
|
|
|
|
|
import Text.Pandoc.Options
|
|
|
|
import Text.Pandoc.Readers.Native
|
|
|
|
import Text.Pandoc.Definition
|
|
|
|
import Tests.Helpers
|
|
|
|
import Test.Framework
|
2014-07-31 18:10:33 +02:00
|
|
|
import Test.HUnit (assertBool)
|
|
|
|
import Test.Framework.Providers.HUnit
|
2014-06-15 20:55:17 +02:00
|
|
|
import qualified Data.ByteString.Lazy as B
|
2014-06-17 07:44:40 +02:00
|
|
|
import Text.Pandoc.Readers.Docx
|
2014-06-21 00:26:15 +02:00
|
|
|
import Text.Pandoc.Writers.Native (writeNative)
|
|
|
|
import qualified Data.Map as M
|
2014-07-31 18:10:33 +02:00
|
|
|
import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory)
|
|
|
|
import Codec.Archive.Zip
|
2015-02-18 20:57:48 +01:00
|
|
|
import Text.Pandoc.Error
|
2014-06-15 20:55:17 +02:00
|
|
|
|
2014-06-21 00:26:15 +02:00
|
|
|
-- We define a wrapper around pandoc that doesn't normalize in the
|
|
|
|
-- tests. Since we do our own normalization, we want to make sure
|
|
|
|
-- we're doing it right.
|
|
|
|
|
|
|
|
data NoNormPandoc = NoNormPandoc {unNoNorm :: Pandoc}
|
|
|
|
deriving Show
|
|
|
|
|
|
|
|
noNorm :: Pandoc -> NoNormPandoc
|
|
|
|
noNorm = NoNormPandoc
|
|
|
|
|
|
|
|
instance ToString NoNormPandoc where
|
|
|
|
toString d = writeNative def{ writerStandalone = s } $ toPandoc d
|
|
|
|
where s = case d of
|
|
|
|
NoNormPandoc (Pandoc (Meta m) _)
|
|
|
|
| M.null m -> False
|
|
|
|
| otherwise -> True
|
|
|
|
|
|
|
|
instance ToPandoc NoNormPandoc where
|
|
|
|
toPandoc = unNoNorm
|
|
|
|
|
|
|
|
compareOutput :: ReaderOptions
|
|
|
|
-> FilePath
|
|
|
|
-> FilePath
|
|
|
|
-> IO (NoNormPandoc, NoNormPandoc)
|
2014-06-19 18:05:16 +02:00
|
|
|
compareOutput opts docxFile nativeFile = do
|
2014-06-15 20:55:17 +02:00
|
|
|
df <- B.readFile docxFile
|
|
|
|
nf <- Prelude.readFile nativeFile
|
2015-02-18 20:57:48 +01:00
|
|
|
let (p, _) = handleError $ readDocx opts df
|
|
|
|
return $ (noNorm p, noNorm (handleError $ readNative nf))
|
2014-06-15 20:55:17 +02:00
|
|
|
|
2014-06-19 18:05:16 +02:00
|
|
|
testCompareWithOptsIO :: ReaderOptions -> String -> FilePath -> FilePath -> IO Test
|
|
|
|
testCompareWithOptsIO opts name docxFile nativeFile = do
|
|
|
|
(dp, np) <- compareOutput opts docxFile nativeFile
|
2014-06-15 20:55:17 +02:00
|
|
|
return $ test id name (dp, np)
|
|
|
|
|
2014-06-19 18:05:16 +02:00
|
|
|
testCompareWithOpts :: ReaderOptions -> String -> FilePath -> FilePath -> Test
|
|
|
|
testCompareWithOpts opts name docxFile nativeFile =
|
|
|
|
buildTest $ testCompareWithOptsIO opts name docxFile nativeFile
|
|
|
|
|
2014-06-15 20:55:17 +02:00
|
|
|
testCompare :: String -> FilePath -> FilePath -> Test
|
2014-06-19 18:05:16 +02:00
|
|
|
testCompare = testCompareWithOpts def
|
2014-06-15 20:55:17 +02:00
|
|
|
|
2014-07-31 18:10:33 +02:00
|
|
|
getMedia :: FilePath -> FilePath -> IO (Maybe B.ByteString)
|
|
|
|
getMedia archivePath mediaPath = do
|
|
|
|
zf <- B.readFile archivePath >>= return . toArchive
|
2014-09-02 20:05:54 +02:00
|
|
|
return $ findEntryByPath ("word/" ++ mediaPath) zf >>= (Just . fromEntry)
|
2014-07-31 18:10:33 +02:00
|
|
|
|
|
|
|
compareMediaPathIO :: FilePath -> MediaBag -> FilePath -> IO Bool
|
|
|
|
compareMediaPathIO mediaPath mediaBag docxPath = do
|
|
|
|
docxMedia <- getMedia docxPath mediaPath
|
|
|
|
let mbBS = case lookupMedia mediaPath mediaBag of
|
|
|
|
Just (_, bs) -> bs
|
|
|
|
Nothing -> error ("couldn't find " ++
|
|
|
|
mediaPath ++
|
|
|
|
" in media bag")
|
|
|
|
docxBS = case docxMedia of
|
|
|
|
Just bs -> bs
|
|
|
|
Nothing -> error ("couldn't find " ++
|
|
|
|
mediaPath ++
|
|
|
|
" in media bag")
|
|
|
|
return $ mbBS == docxBS
|
|
|
|
|
|
|
|
compareMediaBagIO :: FilePath -> IO Bool
|
|
|
|
compareMediaBagIO docxFile = do
|
2014-07-31 04:31:38 +02:00
|
|
|
df <- B.readFile docxFile
|
2015-02-18 20:57:48 +01:00
|
|
|
let (_, mb) = handleError $ readDocx def df
|
2014-07-31 18:10:33 +02:00
|
|
|
bools <- mapM
|
2014-08-12 05:10:50 +02:00
|
|
|
(\(fp, _, _) -> compareMediaPathIO fp mb docxFile)
|
2014-07-31 18:10:33 +02:00
|
|
|
(mediaDirectory mb)
|
|
|
|
return $ and bools
|
2014-07-31 04:31:38 +02:00
|
|
|
|
2014-07-31 18:10:33 +02:00
|
|
|
testMediaBagIO :: String -> FilePath -> IO Test
|
|
|
|
testMediaBagIO name docxFile = do
|
|
|
|
outcome <- compareMediaBagIO docxFile
|
2014-08-12 05:10:50 +02:00
|
|
|
return $ testCase name (assertBool
|
2014-07-31 18:10:33 +02:00
|
|
|
("Media didn't match media bag in file " ++ docxFile)
|
|
|
|
outcome)
|
2014-07-31 04:31:38 +02:00
|
|
|
|
2014-07-31 18:10:33 +02:00
|
|
|
testMediaBag :: String -> FilePath -> Test
|
|
|
|
testMediaBag name docxFile = buildTest $ testMediaBagIO name docxFile
|
2014-06-15 20:55:17 +02:00
|
|
|
|
|
|
|
tests :: [Test]
|
|
|
|
tests = [ testGroup "inlines"
|
|
|
|
[ testCompare
|
|
|
|
"font formatting"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/inline_formatting.docx"
|
|
|
|
"docx/inline_formatting.native"
|
2014-08-16 20:05:56 +02:00
|
|
|
, testCompare
|
|
|
|
"font formatting with character styles"
|
|
|
|
"docx/char_styles.docx"
|
|
|
|
"docx/char_styles.native"
|
2014-06-15 20:55:17 +02:00
|
|
|
, testCompare
|
|
|
|
"hyperlinks"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/links.docx"
|
|
|
|
"docx/links.native"
|
2014-06-15 20:55:17 +02:00
|
|
|
, testCompare
|
2014-08-07 21:34:49 +02:00
|
|
|
"inline image"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/image.docx"
|
|
|
|
"docx/image_no_embed.native"
|
2015-01-21 19:38:04 +01:00
|
|
|
, testCompare
|
|
|
|
"VML image"
|
|
|
|
"docx/image_vml.docx"
|
|
|
|
"docx/image_vml.native"
|
2014-08-07 21:34:49 +02:00
|
|
|
, testCompare
|
|
|
|
"inline image in links"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/inline_images.docx"
|
|
|
|
"docx/inline_images.native"
|
2014-06-15 20:55:17 +02:00
|
|
|
, testCompare
|
|
|
|
"handling unicode input"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/unicode.docx"
|
|
|
|
"docx/unicode.native"
|
2014-06-20 01:33:22 +02:00
|
|
|
, testCompare
|
|
|
|
"literal tabs"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/tabs.docx"
|
|
|
|
"docx/tabs.native"
|
2015-10-04 12:08:17 +02:00
|
|
|
, testCompare
|
|
|
|
"special punctuation"
|
|
|
|
"docx/special_punctuation.docx"
|
|
|
|
"docx/special_punctuation.native"
|
2014-06-21 23:58:32 +02:00
|
|
|
, testCompare
|
|
|
|
"normalizing inlines"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/normalize.docx"
|
|
|
|
"docx/normalize.native"
|
2014-06-22 07:58:41 +02:00
|
|
|
, testCompare
|
|
|
|
"normalizing inlines deep inside blocks"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/deep_normalize.docx"
|
|
|
|
"docx/deep_normalize.native"
|
2014-06-23 21:40:34 +02:00
|
|
|
, testCompare
|
|
|
|
"move trailing spaces outside of formatting"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/trailing_spaces_in_formatting.docx"
|
|
|
|
"docx/trailing_spaces_in_formatting.native"
|
2014-06-24 16:33:49 +02:00
|
|
|
, testCompare
|
|
|
|
"inline code (with VerbatimChar style)"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/inline_code.docx"
|
|
|
|
"docx/inline_code.native"
|
2015-02-21 14:44:13 +01:00
|
|
|
, testCompare
|
|
|
|
"inline code in subscript and superscript"
|
|
|
|
"docx/verbatim_subsuper.docx"
|
|
|
|
"docx/verbatim_subsuper.native"
|
2014-06-20 01:33:22 +02:00
|
|
|
]
|
2014-06-15 20:55:17 +02:00
|
|
|
, testGroup "blocks"
|
|
|
|
[ testCompare
|
|
|
|
"headers"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/headers.docx"
|
|
|
|
"docx/headers.native"
|
2014-07-15 11:25:32 +02:00
|
|
|
, testCompare
|
|
|
|
"headers already having auto identifiers"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/already_auto_ident.docx"
|
|
|
|
"docx/already_auto_ident.native"
|
2014-09-05 01:39:49 +02:00
|
|
|
, testCompare
|
|
|
|
"numbered headers automatically made into list"
|
|
|
|
"docx/numbered_header.docx"
|
|
|
|
"docx/numbered_header.native"
|
2014-10-25 22:00:27 +02:00
|
|
|
, testCompare
|
|
|
|
"i18n blocks (headers and blockquotes)"
|
|
|
|
"docx/i18n_blocks.docx"
|
|
|
|
"docx/i18n_blocks.native"
|
2014-06-15 20:55:17 +02:00
|
|
|
, testCompare
|
|
|
|
"lists"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/lists.docx"
|
|
|
|
"docx/lists.native"
|
2014-06-24 17:48:23 +02:00
|
|
|
, testCompare
|
|
|
|
"definition lists"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/definition_list.docx"
|
|
|
|
"docx/definition_list.native"
|
2015-02-19 06:24:04 +01:00
|
|
|
, testCompare
|
|
|
|
"custom defined lists in styles"
|
|
|
|
"docx/german_styled_lists.docx"
|
|
|
|
"docx/german_styled_lists.native"
|
2014-06-15 20:55:17 +02:00
|
|
|
, testCompare
|
|
|
|
"footnotes and endnotes"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/notes.docx"
|
|
|
|
"docx/notes.native"
|
2014-06-15 20:55:17 +02:00
|
|
|
, testCompare
|
|
|
|
"blockquotes (parsing indent as blockquote)"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/block_quotes.docx"
|
|
|
|
"docx/block_quotes_parse_indent.native"
|
2014-06-30 05:37:00 +02:00
|
|
|
, testCompare
|
|
|
|
"hanging indents"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/hanging_indent.docx"
|
|
|
|
"docx/hanging_indent.native"
|
2014-06-15 20:55:17 +02:00
|
|
|
, testCompare
|
|
|
|
"tables"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/tables.docx"
|
|
|
|
"docx/tables.native"
|
2015-02-13 15:08:07 +01:00
|
|
|
, testCompare
|
|
|
|
"tables with lists in cells"
|
|
|
|
"docx/table_with_list_cell.docx"
|
|
|
|
"docx/table_with_list_cell.native"
|
2014-06-24 16:33:49 +02:00
|
|
|
, testCompare
|
|
|
|
"code block"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/codeblock.docx"
|
|
|
|
"docx/codeblock.native"
|
2014-08-12 05:10:50 +02:00
|
|
|
, testCompare
|
|
|
|
"dropcap paragraphs"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/drop_cap.docx"
|
|
|
|
"docx/drop_cap.native"
|
2014-06-15 20:55:17 +02:00
|
|
|
]
|
2014-06-25 17:09:28 +02:00
|
|
|
, testGroup "track changes"
|
|
|
|
[ testCompare
|
2014-06-25 22:13:59 +02:00
|
|
|
"insertion (default)"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/track_changes_insertion.docx"
|
|
|
|
"docx/track_changes_insertion_accept.native"
|
2014-06-25 22:13:59 +02:00
|
|
|
, testCompareWithOpts def{readerTrackChanges=AcceptChanges}
|
|
|
|
"insert insertion (accept)"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/track_changes_insertion.docx"
|
|
|
|
"docx/track_changes_insertion_accept.native"
|
2014-06-25 22:13:59 +02:00
|
|
|
, testCompareWithOpts def{readerTrackChanges=RejectChanges}
|
|
|
|
"remove insertion (reject)"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/track_changes_insertion.docx"
|
|
|
|
"docx/track_changes_insertion_reject.native"
|
2014-06-25 17:09:28 +02:00
|
|
|
, testCompare
|
2014-06-25 22:13:59 +02:00
|
|
|
"deletion (default)"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/track_changes_deletion.docx"
|
|
|
|
"docx/track_changes_deletion_accept.native"
|
2014-06-25 22:13:59 +02:00
|
|
|
, testCompareWithOpts def{readerTrackChanges=AcceptChanges}
|
|
|
|
"remove deletion (accept)"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/track_changes_deletion.docx"
|
|
|
|
"docx/track_changes_deletion_accept.native"
|
2014-06-25 22:13:59 +02:00
|
|
|
, testCompareWithOpts def{readerTrackChanges=RejectChanges}
|
|
|
|
"insert deletion (reject)"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/track_changes_deletion.docx"
|
|
|
|
"docx/track_changes_deletion_reject.native"
|
2014-06-25 22:13:59 +02:00
|
|
|
, testCompareWithOpts def{readerTrackChanges=AllChanges}
|
|
|
|
"keep insertion (all)"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/track_changes_deletion.docx"
|
|
|
|
"docx/track_changes_deletion_all.native"
|
2014-06-25 22:13:59 +02:00
|
|
|
, testCompareWithOpts def{readerTrackChanges=AllChanges}
|
|
|
|
"keep deletion (all)"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/track_changes_deletion.docx"
|
|
|
|
"docx/track_changes_deletion_all.native"
|
2014-06-25 17:09:28 +02:00
|
|
|
]
|
2014-07-31 04:32:55 +02:00
|
|
|
, testGroup "media"
|
2014-07-31 18:10:33 +02:00
|
|
|
[ testMediaBag
|
2014-07-31 04:32:55 +02:00
|
|
|
"image extraction"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/image.docx"
|
2014-07-31 04:32:55 +02:00
|
|
|
]
|
2014-07-27 21:11:18 +02:00
|
|
|
, testGroup "metadata"
|
|
|
|
[ testCompareWithOpts def{readerStandalone=True}
|
|
|
|
"metadata fields"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/metadata.docx"
|
|
|
|
"docx/metadata.native"
|
2014-07-27 21:11:18 +02:00
|
|
|
, testCompareWithOpts def{readerStandalone=True}
|
|
|
|
"stop recording metadata with normal text"
|
2014-08-13 20:16:50 +02:00
|
|
|
"docx/metadata_after_normal.docx"
|
|
|
|
"docx/metadata_after_normal.native"
|
2014-07-27 21:11:18 +02:00
|
|
|
]
|
|
|
|
|
2014-06-15 20:55:17 +02:00
|
|
|
]
|