Powerpoint writer tests: New test framework for pptx.

Previously we had tested certain properties of the output PowerPoint
slides. Corruption, though, comes as the result of a numebr of
interrelated issues in the output pptx archive. This is a new
approach, which compares the output of the Powerpoint writer with
files that we know to (a) not be corrupt, and (b) to show the desired
output behavior (details below). This commit introduces three tests
using the new framework. More will follow.

The test procedure: given a native file and a pptx file, we generate a
pptx archive from the native file, and then test:

1. Whether the same files are in the two archives

2. Whether each of the contained xml files is the same. (We skip time
   entries in `docProps/core.xml`, since these are derived from IO. We
   just check to make sure that they're there in the same way in both
   files.)

3. Whether each of the media files is the same.

Note that steps 2 and 3, though they compare multiple files, are one
test each, since the number of files depends on the input file (if
there is a failure, it will only report the first failed file
comparison in the test failure).
This commit is contained in:
Jesse Rosenthal 2018-01-21 00:25:03 -05:00
parent 3193bf6be7
commit 54526525bf
7 changed files with 191 additions and 150 deletions

View file

@ -302,6 +302,8 @@ extra-source-files:
test/docx/*.native test/docx/*.native
test/epub/*.epub test/epub/*.epub
test/epub/*.native test/epub/*.native
test/pptx/*.pptx
test/pptx/*.native
test/txt2tags.t2t test/txt2tags.t2t
test/twiki-reader.twiki test/twiki-reader.twiki
test/tikiwiki-reader.tikiwiki test/tikiwiki-reader.tikiwiki

View file

@ -1,169 +1,196 @@
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE MultiWayIf #-}
module Tests.Writers.Powerpoint (tests) where module Tests.Writers.Powerpoint (tests) where
import Control.Exception (throwIO) -- import Control.Exception (throwIO)
import Text.Pandoc import Text.Pandoc
import Text.Pandoc.Builder
import Text.Pandoc.Arbitrary ()
import Text.Pandoc.Walk
import Test.Tasty import Test.Tasty
import Test.Tasty.HUnit import Test.Tasty.HUnit
import Test.Tasty.QuickCheck
import Codec.Archive.Zip import Codec.Archive.Zip
import Text.XML.Light import Text.XML.Light
import Data.List (isPrefixOf, isSuffixOf, sort) import qualified Data.ByteString.Lazy as BL
import Data.Maybe (mapMaybe) import qualified Data.Text.IO as T
import Data.List (isPrefixOf, isSuffixOf, sort, (\\), intercalate)
getPptxArchive :: WriterOptions -> Pandoc -> IO Archive import Data.Maybe (fromJust, isNothing)
getPptxArchive opts pd = do import Tests.Helpers
mbs <- runIO $ import Data.Algorithm.Diff
do setUserDataDir $ Just "../data" import Control.Monad (when)
writePowerpoint opts pd
case mbs of
Left e -> throwIO e
Right bs -> return $ toArchive bs
----- Number of Slides -----------
numberOfSlides :: WriterOptions -> Pandoc -> IO Int
numberOfSlides opts pd = do
archive <- getPptxArchive opts pd
return $
length $
filter (isSuffixOf ".xml") $
filter (isPrefixOf "ppt/slides/slide") $
filesInArchive archive
testNumberOfSlides :: TestName -> Int -> WriterOptions -> Pandoc -> TestTree
testNumberOfSlides name n opts pd =
testCase name $ do
n' <- numberOfSlides opts pd
n' @=? n
numSlideTests :: TestTree
numSlideTests = testGroup "Number of slides in output"
[ testNumberOfSlides
"simple one-slide deck" 1
def
(doc $ para "foo")
, testNumberOfSlides
"with metadata (header slide)" 2
def
(setTitle "My Title" $ doc $ para "foo")
, testNumberOfSlides
"With h1 slide (using default slide-level)" 1
def
(doc $ header 1 "Header" <> para "foo")
, testNumberOfSlides
"With h2 slide (using default slide-level)" 2
def
(doc $ header 1 "Header" <> header 2 "subeader" <> para "foo")
, testNumberOfSlides
"With h1 slide (using slide-level 3)" 2
def {writerSlideLevel= Just 3}
(doc $ header 1 "Header" <> para "foo")
, testNumberOfSlides
"With h2 slide (using slide-level 3)" 3
def {writerSlideLevel= Just 3}
(doc $ header 1 "Header" <> header 2 "subeader" <> para "foo")
, testNumberOfSlides
"With image slide, no header" 3
def
(doc $
para "first slide" <>
para (image "lalune.jpg" "" "") <>
para "foo")
, testNumberOfSlides
"With image slide, header" 3
def
(doc $
para "first slide" <>
header 2 "image header" <>
para (image "lalune.jpg" "" "") <>
para "foo")
, testNumberOfSlides
"With table, no header" 3
def
(doc $
para "first slide" <>
simpleTable [para "foo" <> para "bar"] [[para "this" <> para "that"]] <>
para "foo")
, testNumberOfSlides
"With table, header" 3
def
(doc $
para "first slide" <>
header 2 "table header" <>
simpleTable [para "foo" <> para "bar"] [[para "this" <> para "that"]] <>
para "foo")
, testNumberOfSlides
"hrule" 2
def
(doc $
para "first slide" <> horizontalRule <> para "last slide")
, testNumberOfSlides
"with notes slide" 2
def
(doc $
para $ text "Foo" <> note (para "note text"))
]
----- Content Types -----------
contentTypesFileExists :: WriterOptions -> Pandoc -> TestTree getPptxBytes :: WriterOptions
contentTypesFileExists opts pd = -> FilePath
testCase "Existence of [Content_Types].xml file" $ -> FilePath
do archive <- getPptxArchive opts pd -> IO (BL.ByteString, BL.ByteString)
assertBool "Missing [Content_Types].xml file" $ getPptxBytes opts nativeFp pptxFp = do
"[Content_Types].xml" `elem` filesInArchive archive ntvTxt <- T.readFile nativeFp
ntv <- runIOorExplode $ readNative def ntvTxt
myPptxBs <- runIOorExplode $ writePowerpoint opts ntv
goodPptxBs <- BL.readFile pptxFp
return (myPptxBs, goodPptxBs)
assertSameFileList :: Archive -> Archive -> FilePath -> Assertion
assertSameFileList myArch goodArch pptxFp = do
let filesMy = filesInArchive myArch
filesGood = filesInArchive goodArch
diffMyGood = filesMy \\ filesGood
diffGoodMy = filesGood \\ filesMy
if | null diffMyGood && null diffGoodMy -> return ()
| null diffMyGood ->
assertFailure $
"Files in " ++ pptxFp ++ " but not in generated archive:\n" ++
intercalate ", " diffGoodMy
| null diffGoodMy ->
assertFailure $
"Files in generated archive but not in " ++ pptxFp ++ ":\n" ++
intercalate ", " diffMyGood
| otherwise ->
assertFailure $
"Files in " ++ pptxFp ++ " but not in generated archive:\n" ++
intercalate ", " diffGoodMy ++
"\n" ++
"Files in generated archive but not in " ++ pptxFp ++ ":\n" ++
intercalate ", " diffMyGood
-- We want an "Override" entry for each xml file under ppt/. compareXMLBool :: Content -> Content -> Bool
prop_ContentOverrides :: Pandoc -> IO Bool -- We make a special exception for times at the moment, and just pass
prop_ContentOverrides pd = do -- them because we can't control the utctime when running IO. Besides,
-- remove Math to avoid warnings -- so long as we have two times, we're okay.
let go :: Inline -> Inline compareXMLBool (Elem myElem) (Elem goodElem)
go (Math _ _) = Str "Math" | (QName "created" _ (Just "dcterms")) <- elName myElem
go i = i , (QName "created" _ (Just "dcterms")) <- elName goodElem =
pd' = walk go pd True
archive <- getPptxArchive def pd' compareXMLBool (Elem myElem) (Elem goodElem)
let xmlFiles = filter ("[Content_Types].xml" /=) $ | (QName "modified" _ (Just "dcterms")) <- elName myElem
filter (isSuffixOf ".xml") $ , (QName "modified" _ (Just "dcterms")) <- elName goodElem =
filesInArchive archive True
contentTypes <- case findEntryByPath "[Content_Types].xml" archive of compareXMLBool (Elem myElem) (Elem goodElem) =
Just ent -> return $ fromEntry ent and [ elName myElem == elName goodElem
Nothing -> throwIO $ , elAttribs myElem == elAttribs goodElem
PandocSomeError "Missing [Content_Types].xml file" , and $
typesElem <- case parseXMLDoc contentTypes of map (uncurry compareXMLBool) $
Just element -> return element zip (elContent myElem) (elContent goodElem)
Nothing -> throwIO $ ]
PandocSomeError "[Content_Types].xml cannot be parsed" compareXMLBool (Text myCData) (Text goodCData) =
let ns = findAttr (QName "xmlns" Nothing Nothing) typesElem and [ cdVerbatim myCData == cdVerbatim goodCData
overrides = findChildren (QName "Override" ns Nothing) typesElem , cdData myCData == cdData goodCData
partNames = mapMaybe (findAttr (QName "PartName" Nothing Nothing)) overrides , cdLine myCData == cdLine goodCData
-- files in content_types are absolute ]
absXmlFiles = map (\fp -> case fp of compareXMLBool (CRef myStr) (CRef goodStr) =
('/':_) -> fp myStr == goodStr
_ -> '/': fp compareXMLBool _ _ = False
)
xmlFiles
return $ sort absXmlFiles == sort partNames
contentOverridesTests :: TestTree displayDiff :: Content -> Content -> String
contentOverridesTests = localOption (QuickCheckTests 20) $ displayDiff elemA elemB =
testProperty "Content Overrides for each XML file" $ showDiff (1,1) $ getDiff (lines $ ppContent elemA) (lines $ ppContent elemB)
\x -> ioProperty $ prop_ContentOverrides (x :: Pandoc)
contentTypeTests :: TestTree compareXMLFile :: FilePath -> Archive -> Archive -> Assertion
contentTypeTests = testGroup "[Content_Types].xml file" compareXMLFile fp myArch goodArch = do
[ contentTypesFileExists def (doc $ para "foo") let mbMyEntry = findEntryByPath fp myArch
, contentOverridesTests when (isNothing mbMyEntry)
] (assertFailure $
"Can't extract " ++ fp ++ " from generated archive")
let mbMyXMLDoc = parseXMLDoc $ fromEntry $ fromJust mbMyEntry
when (isNothing mbMyXMLDoc)
(assertFailure $
"Can't parse xml in " ++ fp ++ " from generated archive")
let myContent = Elem $ fromJust mbMyXMLDoc
let mbGoodEntry = findEntryByPath fp goodArch
when (isNothing mbGoodEntry)
(assertFailure $
"Can't extract " ++ fp ++ " from archive in stored pptx file")
let mbGoodXMLDoc = parseXMLDoc $ fromEntry $ fromJust mbGoodEntry
when (isNothing mbGoodXMLDoc)
(assertFailure $
"Can't parse xml in " ++ fp ++ " from archive in stored pptx file")
let goodContent = Elem $ fromJust mbGoodXMLDoc
assertBool
("Non-matching xml in " ++ fp ++ ":\n" ++ displayDiff myContent goodContent)
(compareXMLBool myContent goodContent)
compareBinaryFile :: FilePath -> Archive -> Archive -> Assertion
compareBinaryFile fp myArch goodArch = do
let mbMyEntry = findEntryByPath fp myArch
when (isNothing mbMyEntry)
(assertFailure $
"Can't extract " ++ fp ++ " from generated archive")
let myBytes = fromEntry $ fromJust mbMyEntry
let mbGoodEntry = findEntryByPath fp goodArch
when (isNothing mbGoodEntry)
(assertFailure $
"Can't extract " ++ fp ++ " from archive in stored pptx file")
let goodBytes = fromEntry $ fromJust mbGoodEntry
assertBool (fp ++ " doesn't match") (myBytes == goodBytes)
testSameFileList :: WriterOptions -> FilePath -> FilePath -> TestTree
testSameFileList opts myFp goodFp =
testCase ("Identical file list in archives") $ do
(myBS, goodBS) <- getPptxBytes opts myFp goodFp
let myArch = toArchive myBS
goodArch = toArchive goodBS
(assertSameFileList myArch goodArch goodFp)
testSameXML :: WriterOptions -> FilePath -> FilePath -> TestTree
testSameXML opts myFp goodFp = testCaseSteps "Comparing extracted xml files" $
\step -> do
(myBS, goodBS) <- getPptxBytes opts myFp goodFp
let myArch = toArchive myBS
goodArch = toArchive goodBS
let xmlFileList = sort $
filter (\fp -> ".xml" `isSuffixOf` fp || ".rels" `isSuffixOf` fp)
(filesInArchive myArch)
mapM_
(\fp -> step ("- " ++ fp) >> compareXMLFile fp myArch goodArch)
xmlFileList
testSameMedia :: WriterOptions -> FilePath -> FilePath -> TestTree
testSameMedia opts myFp goodFp = testCaseSteps "Comparing media files" $
\step -> do
(myBS, goodBS) <- getPptxBytes opts myFp goodFp
let myArch = toArchive myBS
goodArch = toArchive goodBS
let mediaFileList = sort $
filter (\fp -> "ppt/media/" `isPrefixOf` fp)
(filesInArchive myArch)
mapM_
(\fp -> step ("- " ++ fp) >> compareBinaryFile fp myArch goodArch)
mediaFileList
testCompareWithOpts :: String -> WriterOptions ->FilePath -> FilePath -> TestTree
testCompareWithOpts testName opts nativeFp pptxFp =
testGroup testName [ testSameFileList opts nativeFp pptxFp
, testSameXML opts nativeFp pptxFp
, testSameMedia opts nativeFp pptxFp
]
testCompare :: String -> FilePath -> FilePath -> TestTree
testCompare testName nativeFp pptxFp =
testCompareWithOpts testName def nativeFp pptxFp
--------------------------------------------------------------
tests :: [TestTree] tests :: [TestTree]
tests = [ numSlideTests tests = [ testCompare
, contentTypeTests "Inline formatting"
"pptx/inline_formatting.native"
"pptx/inline_formatting.pptx"
, testCompare
"slide breaks (default slide-level)"
"pptx/slide_breaks.native"
"pptx/slide_breaks.pptx"
, testCompareWithOpts
"slide breaks (slide-level set to 1)"
def{writerSlideLevel=Just 1}
"pptx/slide_breaks.native"
"pptx/slide_breaks_slide_level_1.pptx"
] ]

View file

@ -0,0 +1,5 @@
Pandoc (Meta {unMeta = fromList []})
[Para [Str "Here",Space,Str "are",Space,Str "examples",Space,Str "of",Space,Emph [Str "italics"],Str ",",Space,Strong [Str "bold"],Str ",",Space,Str "and",Space,Strong [Emph [Str "bold",Space,Str "italics"]],Str "."]
,Para [Str "Here",Space,Str "is",Space,Strikeout [Str "strook-three"],Space,Str "strike-through",Space,Str "and",Space,SmallCaps [Str "small",Space,Str "caps"],Str "."]
,Para [Str "We",Space,Str "can",Space,Str "also",Space,Str "do",Space,Str "subscripts",Space,Str "(H",Subscript [Str "2"],Str "0)",Space,Str "and",Space,Str "super",Superscript [Str "script"],Str "."]
,RawBlock (Format "html") "<!-- Comments don't show up. -->"]

Binary file not shown.

View file

@ -0,0 +1,7 @@
Pandoc (Meta {unMeta = fromList []})
[Para [Str "Break",Space,Str "with",Space,Str "a",Space,Str "new",Space,Str "section-level",Space,Str "header"]
,Header 1 ("below-section-level",[],[]) [Str "Below",Space,Str "section-level"]
,Header 2 ("section-level",[],[]) [Str "Section-level"]
,Para [Str "Third",Space,Str "slide",Space,Str "(with",Space,Str "a",Space,Str "section-level",Space,Str "of",Space,Str "2)"]
,HorizontalRule
,Para [Str "This",Space,Str "is",Space,Str "another",Space,Str "slide."]]

BIN
test/pptx/slide_breaks.pptx Normal file

Binary file not shown.

Binary file not shown.