{-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Docx Copyright : © 2017-2020 Jesse Rosenthal, John MacFarlane License : GNU GPL, version 2 or above Maintainer : Jesse Rosenthal Stability : alpha Portability : portable Tests for the word docx reader. -} module Tests.Readers.Docx (tests) where import Codec.Archive.Zip import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as B import qualified Data.Map as M import qualified Data.Text as T import Data.Maybe import System.IO.Unsafe import Test.Tasty import Test.Tasty.HUnit import Tests.Helpers import Text.Pandoc import qualified Text.Pandoc.Class as P import qualified Text.Pandoc.MediaBag as MB import Text.Pandoc.UTF8 as UTF8 -- We define a wrapper around pandoc that doesn't normalize in the -- tests. Since we do our own normalization, we want to make sure -- we're doing it right. newtype NoNormPandoc = NoNormPandoc {unNoNorm :: Pandoc} deriving Show noNorm :: Pandoc -> NoNormPandoc noNorm = NoNormPandoc defopts :: ReaderOptions defopts = def{ readerExtensions = getDefaultExtensions "docx" } instance ToString NoNormPandoc where toString d = T.unpack $ purely (writeNative def{ writerTemplate = s }) $ toPandoc d where s = case d of NoNormPandoc (Pandoc (Meta m) _) | M.null m -> Nothing | otherwise -> Just mempty -- need this to get meta output instance ToPandoc NoNormPandoc where toPandoc = unNoNorm compareOutput :: ReaderOptions -> FilePath -> FilePath -> IO (NoNormPandoc, NoNormPandoc) compareOutput opts docxFile nativeFile = do df <- B.readFile docxFile nf <- UTF8.toText <$> BS.readFile nativeFile p <- runIOorExplode $ readDocx opts df df' <- runIOorExplode $ readNative def nf return (noNorm p, noNorm df') testCompareWithOptsIO :: ReaderOptions -> String -> FilePath -> FilePath -> IO TestTree testCompareWithOptsIO opts name docxFile nativeFile = do (dp, np) <- compareOutput opts docxFile nativeFile return $ test id name (dp, np) testCompareWithOpts :: ReaderOptions -> String -> FilePath -> FilePath -> TestTree testCompareWithOpts opts name docxFile nativeFile = unsafePerformIO $ testCompareWithOptsIO opts name docxFile nativeFile testCompare :: String -> FilePath -> FilePath -> TestTree testCompare = testCompareWithOpts defopts testForWarningsWithOptsIO :: ReaderOptions -> String -> FilePath -> [String] -> IO TestTree testForWarningsWithOptsIO opts name docxFile expected = do df <- B.readFile docxFile logs <- runIOorExplode $ setVerbosity ERROR >> readDocx opts df >> P.getLog let warns = [m | DocxParserWarning m <- logs] return $ test id name (T.unlines warns, unlines expected) testForWarningsWithOpts :: ReaderOptions -> String -> FilePath -> [String] -> TestTree testForWarningsWithOpts opts name docxFile expected = unsafePerformIO $ testForWarningsWithOptsIO opts name docxFile expected -- testForWarnings :: String -> FilePath -> [String] -> TestTree -- testForWarnings = testForWarningsWithOpts defopts getMedia :: FilePath -> FilePath -> IO (Maybe B.ByteString) getMedia archivePath mediaPath = fmap fromEntry . findEntryByPath ("word/" ++ mediaPath) . toArchive <$> B.readFile archivePath compareMediaPathIO :: FilePath -> MB.MediaBag -> FilePath -> IO Bool compareMediaPathIO mediaPath mediaBag docxPath = do docxMedia <- getMedia docxPath mediaPath let mbBS = case MB.lookupMedia mediaPath mediaBag of Just item -> MB.mediaContents item Nothing -> error ("couldn't find " ++ mediaPath ++ " in media bag") docxBS = fromMaybe (error ("couldn't find " ++ mediaPath ++ " in media bag")) docxMedia return $ mbBS == docxBS compareMediaBagIO :: FilePath -> IO Bool compareMediaBagIO docxFile = do df <- B.readFile docxFile mb <- runIOorExplode $ readDocx defopts df >> P.getMediaBag bools <- mapM (\(fp, _, _) -> compareMediaPathIO fp mb docxFile) (MB.mediaDirectory mb) return $ and bools testMediaBagIO :: String -> FilePath -> IO TestTree testMediaBagIO name docxFile = do outcome <- compareMediaBagIO docxFile return $ testCase name (assertBool ("Media didn't match media bag in file " ++ docxFile) outcome) testMediaBag :: String -> FilePath -> TestTree testMediaBag name docxFile = unsafePerformIO $ testMediaBagIO name docxFile tests :: [TestTree] tests = [ testGroup "document" [ testCompare "allow different document.xml file as defined in _rels/.rels" "docx/alternate_document_path.docx" "docx/alternate_document_path.native" ] , testGroup "inlines" [ testCompare "font formatting" "docx/inline_formatting.docx" "docx/inline_formatting.native" , testCompare "font formatting with character styles" "docx/char_styles.docx" "docx/char_styles.native" , testCompare "hyperlinks" "docx/links.docx" "docx/links.native" , testCompare "hyperlinks in tag" "docx/instrText_hyperlink.docx" "docx/instrText_hyperlink.native" , testCompare "nested fields with tag" "docx/nested_instrText.docx" "docx/nested_instrText.native" , testCompare "empty fields with tag" "docx/empty_field.docx" "docx/empty_field.native" , testCompare "pageref hyperlinks in tag" "docx/pageref.docx" "docx/pageref.native" , testCompare "inline image" "docx/image.docx" "docx/image_no_embed.native" , testCompare "VML image" "docx/image_vml.docx" "docx/image_vml.native" , testCompare "VML image as object" "docx/image_vml_as_object.docx" "docx/image_vml_as_object.native" , testCompare "inline image in links" "docx/inline_images.docx" "docx/inline_images.native" , testCompare "handling unicode input" "docx/unicode.docx" "docx/unicode.native" , testCompare "literal tabs" "docx/tabs.docx" "docx/tabs.native" , testCompare "special punctuation" "docx/special_punctuation.docx" "docx/special_punctuation.native" , testCompare "normalizing inlines" "docx/normalize.docx" "docx/normalize.native" , testCompare "normalizing inlines deep inside blocks" "docx/deep_normalize.docx" "docx/deep_normalize.native" , testCompare "move trailing spaces outside of formatting" "docx/trailing_spaces_in_formatting.docx" "docx/trailing_spaces_in_formatting.native" , testCompare "remove trailing spaces from last inline" "docx/trim_last_inline.docx" "docx/trim_last_inline.native" , testCompare "inline code (with VerbatimChar style)" "docx/inline_code.docx" "docx/inline_code.native" , testCompare "inline code in subscript and superscript" "docx/verbatim_subsuper.docx" "docx/verbatim_subsuper.native" , testCompare "inlines inside of Structured Document Tags" "docx/sdt_elements.docx" "docx/sdt_elements.native" , testCompare "Structured Document Tags in footnotes" "docx/sdt_in_footnote.docx" "docx/sdt_in_footnote.native" , testCompare "nested Structured Document Tags" "docx/nested_sdt.docx" "docx/nested_sdt.native" , testCompare "nested Smart Tags" "docx/nested_smart_tags.docx" "docx/nested_smart_tags.native" , testCompare "remove anchor spans with nothing pointing to them" "docx/unused_anchors.docx" "docx/unused_anchors.native" , testCompare "collapse overlapping targets (anchor spans)" "docx/overlapping_targets.docx" "docx/overlapping_targets.native" ] , testGroup "blocks" [ testCompare "headers" "docx/headers.docx" "docx/headers.native" , testCompare "headers already having auto identifiers" "docx/already_auto_ident.docx" "docx/already_auto_ident.native" , testCompare "avoid zero-level headers" "docx/0_level_headers.docx" "docx/0_level_headers.native" , testCompare "nested anchor spans in header" "docx/nested_anchors_in_header.docx" "docx/nested_anchors_in_header.native" , testCompare "single numbered item not made into list" "docx/numbered_header.docx" "docx/numbered_header.native" , testCompare "enumerated headers not made into numbered list" "docx/enumerated_headings.docx" "docx/enumerated_headings.native" , testCompare "i18n blocks (headers and blockquotes)" "docx/i18n_blocks.docx" "docx/i18n_blocks.native" , testCompare "lists" "docx/lists.docx" "docx/lists.native" , testCompare "compact lists" "docx/lists-compact.docx" "docx/lists-compact.native" , testCompare "lists with level overrides" "docx/lists_level_override.docx" "docx/lists_level_override.native" , testCompare "lists continuing after interruption" "docx/lists_continuing.docx" "docx/lists_continuing.native" , testCompare "lists restarting after interruption" "docx/lists_restarting.docx" "docx/lists_restarting.native" , testCompare "sublists reset numbering to 1" "docx/lists_sublist_reset.docx" "docx/lists_sublist_reset.native" , testCompare "definition lists" "docx/definition_list.docx" "docx/definition_list.native" , testCompare "custom defined lists in styles" "docx/german_styled_lists.docx" "docx/german_styled_lists.native" , testCompare "user deletes bullet after list item (=> part of item par)" "docx/dummy_item_after_list_item.docx" "docx/dummy_item_after_list_item.native" , testCompare "user deletes bullet after par (=> new par)" "docx/dummy_item_after_paragraph.docx" "docx/dummy_item_after_paragraph.native" , testCompare "footnotes and endnotes" "docx/notes.docx" "docx/notes.native" , testCompare "links in footnotes and endnotes" "docx/link_in_notes.docx" "docx/link_in_notes.native" , testCompare "blockquotes (parsing indent as blockquote)" "docx/block_quotes.docx" "docx/block_quotes_parse_indent.native" , testCompare "blockquotes (parsing indent relative to the indent of the parent style as blockquote)" "docx/relative_indentation_blockquotes.docx" "docx/relative_indentation_blockquotes.native" , testCompare "hanging indents" "docx/hanging_indent.docx" "docx/hanging_indent.native" , testCompare "tables" "docx/tables.docx" "docx/tables.native" , testCompare "tables with lists in cells" "docx/table_with_list_cell.docx" "docx/table_with_list_cell.native" , testCompare "a table with a header which contains rowspans greater than 1" "docx/table_header_rowspan.docx" "docx/table_header_rowspan.native" , testCompare "tables with one row" "docx/table_one_row.docx" "docx/table_one_row.native" , testCompare "tables with just one row, which is a header" "docx/table_one_header_row.docx" "docx/table_one_header_row.native" , testCompare "tables with variable width" "docx/table_variable_width.docx" "docx/table_variable_width.native" , testCompare "tables with captions which contain a Table field" "docx/table_captions_with_field.docx" "docx/table_captions_with_field.native" , testCompare "tables with captions which don't contain a Table field" "docx/table_captions_no_field.docx" "docx/table_captions_no_field.native" , testCompare "code block" "docx/codeblock.docx" "docx/codeblock.native" , testCompare "combine adjacent code blocks" "docx/adjacent_codeblocks.docx" "docx/adjacent_codeblocks.native" , testCompare "dropcap paragraphs" "docx/drop_cap.docx" "docx/drop_cap.native" ] , testGroup "citations" [ testCompare "zotero with -citations" "docx/zotero_citations.docx" "docx/zotero_citations_minus.native" ] , testGroup "track changes" [ testCompare "insertion (default)" "docx/track_changes_insertion.docx" "docx/track_changes_insertion_accept.native" , testCompareWithOpts def{readerTrackChanges=AcceptChanges} "insert insertion (accept)" "docx/track_changes_insertion.docx" "docx/track_changes_insertion_accept.native" , testCompareWithOpts def{readerTrackChanges=RejectChanges} "remove insertion (reject)" "docx/track_changes_insertion.docx" "docx/track_changes_insertion_reject.native" , testCompare "deletion (default)" "docx/track_changes_deletion.docx" "docx/track_changes_deletion_accept.native" , testCompareWithOpts def{readerTrackChanges=AcceptChanges} "remove deletion (accept)" "docx/track_changes_deletion.docx" "docx/track_changes_deletion_accept.native" , testCompareWithOpts def{readerTrackChanges=RejectChanges} "insert deletion (reject)" "docx/track_changes_deletion.docx" "docx/track_changes_deletion_reject.native" , testCompareWithOpts def{readerTrackChanges=AllChanges} "keep insertion (all)" "docx/track_changes_deletion.docx" "docx/track_changes_deletion_all.native" , testCompareWithOpts def{readerTrackChanges=AllChanges} "keep deletion (all)" "docx/track_changes_deletion.docx" "docx/track_changes_deletion_all.native" , testCompareWithOpts def{readerTrackChanges=AcceptChanges} "move text (accept)" "docx/track_changes_move.docx" "docx/track_changes_move_accept.native" , testCompareWithOpts def{readerTrackChanges=RejectChanges} "move text (reject)" "docx/track_changes_move.docx" "docx/track_changes_move_reject.native" , testCompareWithOpts def{readerTrackChanges=AllChanges} "move text (all)" "docx/track_changes_move.docx" "docx/track_changes_move_all.native" , testCompareWithOpts def{readerTrackChanges=AcceptChanges} "comments (accept -- no comments)" "docx/comments.docx" "docx/comments_no_comments.native" , testCompareWithOpts def{readerTrackChanges=RejectChanges} "comments (reject -- comments)" "docx/comments.docx" "docx/comments_no_comments.native" , testCompareWithOpts def{readerTrackChanges=AllChanges} "comments (all comments)" "docx/comments.docx" "docx/comments.native" , testCompareWithOpts def{readerTrackChanges=AcceptChanges} "paragraph insertion/deletion (accept)" "docx/paragraph_insertion_deletion.docx" "docx/paragraph_insertion_deletion_accept.native" , testCompareWithOpts def{readerTrackChanges=RejectChanges} "paragraph insertion/deletion (reject)" "docx/paragraph_insertion_deletion.docx" "docx/paragraph_insertion_deletion_reject.native" , testCompareWithOpts def{readerTrackChanges=AllChanges} "paragraph insertion/deletion (all)" "docx/paragraph_insertion_deletion.docx" "docx/paragraph_insertion_deletion_all.native" , testCompareWithOpts def{readerTrackChanges=AllChanges} "paragraph insertion/deletion (all)" "docx/track_changes_scrubbed_metadata.docx" "docx/track_changes_scrubbed_metadata.native" , testForWarningsWithOpts def{readerTrackChanges=AcceptChanges} "comment warnings (accept -- no warnings)" "docx/comments_warning.docx" [] , testForWarningsWithOpts def{readerTrackChanges=RejectChanges} "comment warnings (reject -- no warnings)" "docx/comments_warning.docx" [] , testForWarningsWithOpts def{readerTrackChanges=AllChanges} "comment warnings (all)" "docx/comments_warning.docx" ["Docx comment 1 will not retain formatting"] ] , testGroup "media" [ testMediaBag "image extraction" "docx/image.docx" ] , testGroup "custom styles" [ testCompare "custom styles (`+styles`) not enabled (default)" "docx/custom-style-reference.docx" "docx/custom-style-no-styles.native" , testCompareWithOpts def{readerExtensions=extensionsFromList [Ext_styles]} "custom styles (`+styles`) enabled" "docx/custom-style-reference.docx" "docx/custom-style-with-styles.native" , testCompareWithOpts def{readerExtensions=extensionsFromList [Ext_styles]} "custom styles (`+styles`): Compact style is removed from output" "docx/compact-style-removal.docx" "docx/compact-style-removal.native" ] , testGroup "metadata" [ testCompareWithOpts def{readerStandalone=True} "metadata fields" "docx/metadata.docx" "docx/metadata.native" , testCompareWithOpts def{readerStandalone=True} "stop recording metadata with normal text" "docx/metadata_after_normal.docx" "docx/metadata_after_normal.native" ] ]