pptx: Select layouts from reference doc by name

Until now, users had to make sure that their reference doc contains
layouts in a specific order: the first four layouts in the file had to
have a specific structure, or else pandoc would error (or sometimes
successfully produce a pptx file, which PowerPoint would then fail to
open).

This commit changes the layout selection to use the layout names rather
than order: users must make sure their reference doc contains four
layouts with specific names, and if a layout with the right name isn’t
found pandoc will output a warning and use the corresponding layout from
the default reference doc as a fallback.

I believe the use of names rather than order will be clearer to users,
and the clearer errors will help them troubleshoot when things go wrong.

- Add tests for moved layouts
- Add tests for deleted layouts
- Add newly included layouts to slideMaster1.xml to fix tests
This commit is contained in:
Emily Bourke 2021-08-10 17:20:53 +01:00 committed by John MacFarlane
parent 9204e5c9b1
commit 72823ad947
47 changed files with 241 additions and 32 deletions

View file

@ -1165,13 +1165,18 @@ header when requesting a document from a URL:
`.pptx` or `.potx` extension) are known to work, as are most `.pptx` or `.potx` extension) are known to work, as are most
templates derived from these. templates derived from these.
The specific requirement is that the template should begin with The specific requirement is that the template should contain layouts
the following first four layouts: with the following names (as seen within PowerPoint):
1. Title Slide - Title Slide
2. Title and Content - Title and Content
3. Section Header - Section Header
4. Two Content - Two Content
For each name, the first layout found with that name will be used.
If no layout is found with one of the names, pandoc will output a
warning and use the layout with that name from the default reference
doc instead.
All templates included with a recent version of MS PowerPoint All templates included with a recent version of MS PowerPoint
will fit these criteria. (You can click on `Layout` under the will fit these criteria. (You can click on `Layout` under the

View file

@ -1,3 +1,4 @@
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternGuards #-} {-# LANGUAGE PatternGuards #-}
{- | {- |
@ -21,14 +22,19 @@ import Control.Monad.Reader
import Control.Monad.State import Control.Monad.State
import Codec.Archive.Zip import Codec.Archive.Zip
import Data.List (intercalate, stripPrefix, nub, union, isPrefixOf, intersperse) import Data.List (intercalate, stripPrefix, nub, union, isPrefixOf, intersperse)
import Data.CaseInsensitive (CI)
import qualified Data.CaseInsensitive as CI
import Data.Default import Data.Default
import Data.Foldable (toList)
import Data.List.NonEmpty (nonEmpty, NonEmpty ((:|)))
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as T import qualified Data.Text as T
import qualified Data.Text.Read import Data.Text.Read (decimal)
import Data.Time (formatTime, defaultTimeLocale) import Data.Time (formatTime, defaultTimeLocale)
import Data.Time.Clock (UTCTime) import Data.Time.Clock (UTCTime)
import Data.Time.Clock.POSIX (utcTimeToPOSIXSeconds, posixSecondsToUTCTime) import Data.Time.Clock.POSIX (utcTimeToPOSIXSeconds, posixSecondsToUTCTime)
import System.FilePath.Posix (splitDirectories, splitExtension, takeExtension) import Data.Traversable (for)
import System.FilePath.Posix (splitDirectories, splitExtension, takeExtension, takeFileName)
import Text.Pandoc.XML.Light as XML import Text.Pandoc.XML.Light as XML
import Text.Pandoc.Definition import Text.Pandoc.Definition
import qualified Text.Pandoc.UTF8 as UTF8 import qualified Text.Pandoc.UTF8 as UTF8
@ -48,11 +54,11 @@ import System.FilePath.Glob
import Text.DocTemplates (FromContext(lookupContext), Context) import Text.DocTemplates (FromContext(lookupContext), Context)
import Text.DocLayout (literal) import Text.DocLayout (literal)
import Text.TeXMath import Text.TeXMath
import Text.Pandoc.Logging (LogMessage(TemplateWarning))
import Text.Pandoc.Writers.Math (convertMath) import Text.Pandoc.Writers.Math (convertMath)
import Text.Pandoc.Writers.Powerpoint.Presentation import Text.Pandoc.Writers.Powerpoint.Presentation
import Text.Pandoc.Shared (tshow, stringify) import Text.Pandoc.Shared (tshow, stringify)
import Skylighting (fromColor) import Skylighting (fromColor)
import Data.List.NonEmpty (nonEmpty)
-- |The 'EMU' type is used to specify sizes in English Metric Units. -- |The 'EMU' type is used to specify sizes in English Metric Units.
type EMU = Integer type EMU = Integer
@ -117,6 +123,7 @@ data WriterEnv = WriterEnv { envRefArchive :: Archive
-- no entry in the map for it. -- no entry in the map for it.
, envSpeakerNotesIdMap :: M.Map Int Int , envSpeakerNotesIdMap :: M.Map Int Int
, envInSpeakerNotes :: Bool , envInSpeakerNotes :: Bool
, envSlideLayouts :: Maybe SlideLayouts
} }
deriving (Show) deriving (Show)
@ -136,8 +143,33 @@ instance Default WriterEnv where
, envSlideIdMap = mempty , envSlideIdMap = mempty
, envSpeakerNotesIdMap = mempty , envSpeakerNotesIdMap = mempty
, envInSpeakerNotes = False , envInSpeakerNotes = False
, envSlideLayouts = Nothing
} }
type SlideLayouts = SlideLayoutsOf SlideLayout
data SlideLayoutsOf a = SlideLayouts
{ metadata :: a
, title :: a
, content :: a
, twoColumn :: a
} deriving (Show, Functor, Foldable, Traversable)
data SlideLayout = SlideLayout
{ slElement :: Element
, slInReferenceDoc :: Bool
-- ^ True if the layout is in the provided reference doc, False if it's in
-- the default reference doc.
, slPath :: FilePath
, slEntry :: Entry
} deriving (Show)
getSlideLayouts :: PandocMonad m => P m SlideLayouts
getSlideLayouts = asks envSlideLayouts >>= maybe (throwError e) pure
where
e = PandocSomeError ("Slide layouts aren't defined, even though they should "
<> "always be. This is a bug in pandoc.")
data ContentType = NormalContent data ContentType = NormalContent
| TwoColumnLeftContent | TwoColumnLeftContent
| TwoColumnRightContent | TwoColumnRightContent
@ -264,7 +296,24 @@ presentationToArchiveP p@(Presentation docProps slides) = do
T.unlines (map (T.pack . (" " <>)) missingFiles) T.unlines (map (T.pack . (" " <>)) missingFiles)
) )
newArch' <- foldM copyFileToArchive emptyArchive filePaths newArch <- foldM copyFileToArchive emptyArchive filePaths
-- Add any layouts taken from the default archive,
-- overwriting any already added.
slideLayouts <- getSlideLayouts
let f layout =
if not (slInReferenceDoc layout)
then addEntryToArchive (slEntry layout)
else id
let newArch' = foldr f newArch slideLayouts
-- Update the master to make sure it includes any layouts we've just added
master <- getMaster
masterRels <- getMasterRels
let (updatedMasterElem, updatedMasterRelElem) = updateMasterElems slideLayouts master masterRels
updatedMasterEntry <- elemToEntry "ppt/slideMasters/slideMaster1.xml" updatedMasterElem
updatedMasterRelEntry <- elemToEntry "ppt/slideMasters/_rels/slideMaster1.xml.rels" updatedMasterRelElem
-- we make a modified ppt/viewProps.xml out of the presentation viewProps -- we make a modified ppt/viewProps.xml out of the presentation viewProps
viewPropsEntry <- makeViewPropsEntry viewPropsEntry <- makeViewPropsEntry
-- we make a docProps/core.xml entry out of the presentation docprops -- we make a docProps/core.xml entry out of the presentation docprops
@ -293,9 +342,82 @@ presentationToArchiveP p@(Presentation docProps slides) = do
spkNotesEntries <> spkNotesEntries <>
spkNotesRelEntries <> spkNotesRelEntries <>
mediaEntries <> mediaEntries <>
[updatedMasterEntry, updatedMasterRelEntry] <>
[contentTypesEntry, docPropsEntry, docCustomPropsEntry, relsEntry, [contentTypesEntry, docPropsEntry, docCustomPropsEntry, relsEntry,
presEntry, presRelsEntry, viewPropsEntry] presEntry, presRelsEntry, viewPropsEntry]
updateMasterElems :: SlideLayouts -> Element -> Element -> (Element, Element)
updateMasterElems layouts master masterRels = (updatedMaster, updatedMasterRels)
where
updatedMaster = master { elContent = updateSldLayoutIdLst <$> elContent master }
(updatedRelationshipIds, updatedMasterRels) = addLayoutRels masterRels
updateSldLayoutIdLst :: Content -> Content
updateSldLayoutIdLst (Elem e) = case elName e of
(QName "sldLayoutIdLst" _ _) -> let
mkChild relationshipId (lastId, children) = let
thisId = lastId + 1
newChild = Element
{ elName = QName "sldLayoutId" Nothing (Just "p")
, elAttribs =
[ Attr (QName "id" Nothing Nothing) (T.pack (show thisId))
, Attr (QName "id" Nothing (Just "r")) relationshipId
]
, elContent = []
, elLine = Nothing
}
in (thisId, Elem newChild : children)
newChildren = snd (foldr mkChild (maxIdNumber' e, []) updatedRelationshipIds)
in Elem e { elContent = elContent e <> newChildren }
_ -> Elem e
updateSldLayoutIdLst c = c
addLayoutRels ::
Element ->
([Text], Element)
addLayoutRels e = let
layoutsToAdd = filter (not . slInReferenceDoc) (toList layouts)
newRelationships = snd (foldr mkRelationship (maxIdNumber e, []) layoutsToAdd)
newRelationshipIds = mapMaybe getRelationshipId newRelationships
mkRelationship layout (lastId, relationships) = let
thisId = lastId + 1
slideLayoutPath = "../slideLayouts/" <> T.pack (takeFileName (slPath layout))
newRelationship = Element
{ elName = QName "Relationship" Nothing Nothing
, elAttribs =
[ Attr (QName "Id" Nothing Nothing) ("rId" <> T.pack (show thisId))
, Attr (QName "Type" Nothing Nothing) "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout"
, Attr (QName "Target" Nothing Nothing) slideLayoutPath
]
, elContent = []
, elLine = Nothing
}
in (thisId, Elem newRelationship : relationships)
in (newRelationshipIds, e {elContent = elContent e <> newRelationships})
getRelationshipId :: Content -> Maybe Text
getRelationshipId (Elem e) = findAttr (QName "Id" Nothing Nothing) e
getRelationshipId _ = Nothing
maxIdNumber :: Element -> Integer
maxIdNumber relationships = maximum (0 : idNumbers)
where
idNumbers = fst <$> mapMaybe (hush . decimal . T.drop 3) idAttributes
idAttributes = mapMaybe getIdAttribute (elContent relationships)
getIdAttribute (Elem e) = findAttr (QName "Id" Nothing Nothing) e
getIdAttribute _ = Nothing
maxIdNumber' :: Element -> Integer
maxIdNumber' sldLayouts = maximum (0 : idNumbers)
where
idNumbers = fst <$> mapMaybe (hush . decimal) idAttributes
idAttributes = mapMaybe getIdAttribute (elContent sldLayouts)
getIdAttribute (Elem e) = findAttr (QName "id" Nothing Nothing) e
getIdAttribute _ = Nothing
hush :: Either a b -> Maybe b
hush = either (const Nothing) Just
makeSlideIdMap :: Presentation -> M.Map SlideId Int makeSlideIdMap :: Presentation -> M.Map SlideId Int
makeSlideIdMap (Presentation _ slides) = makeSlideIdMap (Presentation _ slides) =
M.fromList $ map slideId slides `zip` [1..] M.fromList $ map slideId slides `zip` [1..]
@ -318,6 +440,40 @@ presentationToArchive opts meta pres = do
Nothing -> toArchive . BL.fromStrict <$> Nothing -> toArchive . BL.fromStrict <$>
P.readDataFile "reference.pptx" P.readDataFile "reference.pptx"
let (referenceLayouts, defaultReferenceLayouts) =
(getLayoutsFromArchive refArchive, getLayoutsFromArchive distArchive)
let layoutTitles = SlideLayouts { metadata = "Title Slide" :: Text
, title = "Section Header"
, content = "Title and Content"
, twoColumn = "Two Content"
}
layouts <- for layoutTitles $ \layoutTitle -> do
let layout = M.lookup (CI.mk layoutTitle) referenceLayouts
let defaultLayout = M.lookup (CI.mk layoutTitle) defaultReferenceLayouts
case (layout, defaultLayout) of
(Nothing, Nothing) ->
throwError (PandocSomeError ("Couldn't find layout named \""
<> layoutTitle <> "\" in the provided "
<> "reference doc or in the default "
<> "reference doc included with pandoc."))
(Nothing, Just ((element, path, entry) :| _)) -> do
P.logOutput (TemplateWarning ("Couldn't find layout named \""
<> layoutTitle <> "\" in provided "
<> "reference doc. Falling back to "
<> "the default included with pandoc."))
pure SlideLayout { slElement = element
, slPath = path
, slEntry = entry
, slInReferenceDoc = False
}
(Just ((element, path, entry) :| _), _ ) ->
pure SlideLayout { slElement = element
, slPath = path
, slEntry = entry
, slInReferenceDoc = True
}
utctime <- P.getTimestamp utctime <- P.getTimestamp
presSize <- case getPresentationSize refArchive distArchive of presSize <- case getPresentationSize refArchive distArchive of
@ -341,6 +497,7 @@ presentationToArchive opts meta pres = do
, envPresentationSize = presSize , envPresentationSize = presSize
, envSlideIdMap = makeSlideIdMap pres , envSlideIdMap = makeSlideIdMap pres
, envSpeakerNotesIdMap = makeSpeakerNotesMap pres , envSpeakerNotesIdMap = makeSpeakerNotesMap pres
, envSlideLayouts = Just layouts
} }
let st = def { stMediaGlobalIds = initialGlobalIds refArchive distArchive let st = def { stMediaGlobalIds = initialGlobalIds refArchive distArchive
@ -348,7 +505,30 @@ presentationToArchive opts meta pres = do
runP env st $ presentationToArchiveP pres runP env st $ presentationToArchiveP pres
-- | Get all slide layouts from an archive, as a map where the layout's name
-- gives the map key.
--
-- For each layout, the map contains its XML representation, its path within
-- the archive, and the archive entry.
getLayoutsFromArchive :: Archive -> M.Map (CI Text) (NonEmpty (Element, FilePath, Entry))
getLayoutsFromArchive archive =
M.fromListWith (<>) ((\t@(e, _, _) -> (CI.mk (name e), pure t)) <$> layouts)
where
layouts :: [(Element, FilePath, Entry)]
layouts = mapMaybe findElementByPath paths
parseXml' entry = case parseXMLElement (UTF8.toTextLazy (fromEntry entry)) of
Left _ -> Nothing
Right element -> Just element
findElementByPath :: FilePath -> Maybe (Element, FilePath, Entry)
findElementByPath path = do
entry <- findEntryByPath path archive
element <- parseXml' entry
pure (element, path, entry)
paths = filter (match (compile "ppt/slideLayouts/slideLayout*.xml")) (filesInArchive archive)
name element = fromMaybe "Untitled layout" $ do
let ns = elemToNameSpaces element
cSld <- findChild (elemName ns "p" "cSld") element
findAttr (QName "name" Nothing Nothing) cSld
-------------------------------------------------- --------------------------------------------------
@ -365,15 +545,14 @@ curSlideHasSpeakerNotes =
-------------------------------------------------- --------------------------------------------------
getLayout :: PandocMonad m => Layout -> P m Element getLayout :: PandocMonad m => Layout -> P m Element
getLayout layout = do getLayout layout = getElement <$> getSlideLayouts
let layoutpath = case layout of where
MetadataSlide{} -> "ppt/slideLayouts/slideLayout1.xml" getElement =
TitleSlide{} -> "ppt/slideLayouts/slideLayout3.xml" slElement . case layout of
ContentSlide{} -> "ppt/slideLayouts/slideLayout2.xml" MetadataSlide{} -> metadata
TwoColumnSlide{} -> "ppt/slideLayouts/slideLayout4.xml" TitleSlide{} -> title
refArchive <- asks envRefArchive ContentSlide{} -> content
distArchive <- asks envDistArchive TwoColumnSlide{} -> twoColumn
parseXml refArchive distArchive layoutpath
shapeHasId :: NameSpaces -> T.Text -> Element -> Bool shapeHasId :: NameSpaces -> T.Text -> Element -> Bool
shapeHasId ns ident element shapeHasId ns ident element
@ -604,6 +783,12 @@ getMaster = do
distArchive <- asks envDistArchive distArchive <- asks envDistArchive
parseXml refArchive distArchive "ppt/slideMasters/slideMaster1.xml" parseXml refArchive distArchive "ppt/slideMasters/slideMaster1.xml"
getMasterRels :: PandocMonad m => P m Element
getMasterRels = do
refArchive <- asks envRefArchive
distArchive <- asks envDistArchive
parseXml refArchive distArchive "ppt/slideMasters/_rels/slideMaster1.xml.rels"
-- We want to get the header dimensions, so we can make sure that the -- We want to get the header dimensions, so we can make sure that the
-- image goes underneath it. We only use this in a content slide if it -- image goes underneath it. We only use this in a content slide if it
-- has a header. -- has a header.
@ -1606,11 +1791,13 @@ speakerNotesSlideRelElement slide = do
slideToSlideRelElement :: PandocMonad m => Slide -> P m Element slideToSlideRelElement :: PandocMonad m => Slide -> P m Element
slideToSlideRelElement slide = do slideToSlideRelElement slide = do
idNum <- slideNum slide idNum <- slideNum slide
let target = case slide of target <- flip fmap getSlideLayouts $
(Slide _ MetadataSlide{} _) -> "../slideLayouts/slideLayout1.xml" T.pack . ("../slideLayouts/" <>) . takeFileName .
(Slide _ TitleSlide{} _) -> "../slideLayouts/slideLayout3.xml" slPath . case slide of
(Slide _ ContentSlide{} _) -> "../slideLayouts/slideLayout2.xml" (Slide _ MetadataSlide{} _) -> metadata
(Slide _ TwoColumnSlide{} _) -> "../slideLayouts/slideLayout4.xml" (Slide _ TitleSlide{} _) -> title
(Slide _ ContentSlide{} _) -> content
(Slide _ TwoColumnSlide{} _) -> twoColumn
speakerNotesRels <- maybeToList <$> speakerNotesSlideRelElement slide speakerNotesRels <- maybeToList <$> speakerNotesSlideRelElement slide

View file

@ -7,18 +7,21 @@ import System.FilePath
import Text.DocTemplates (ToContext(toVal), Context(..)) import Text.DocTemplates (ToContext(toVal), Context(..))
import qualified Data.Map as M import qualified Data.Map as M
import Data.Text (pack) import Data.Text (pack)
import Data.List (unzip4)
-- templating is important enough, and can break enough things, that -- templating is important enough, and can break enough things, that
-- we want to run all our tests with both default formatting and a -- we want to run all our tests with both default formatting and a
-- template. -- template.
modifyPptxName :: FilePath -> FilePath modifyPptxName :: FilePath -> String -> FilePath
modifyPptxName fp = modifyPptxName fp suffix =
addExtension (dropExtension fp ++ "_templated") "pptx" addExtension (dropExtension fp ++ suffix) "pptx"
pptxTests :: String -> WriterOptions -> FilePath -> FilePath -> (TestTree, TestTree) pptxTests :: String -> WriterOptions -> FilePath -> FilePath -> (TestTree, TestTree, TestTree, TestTree)
pptxTests name opts native pptx = pptxTests name opts native pptx =
let referenceDoc = "pptx/reference_depth.pptx" let referenceDoc = "pptx/reference_depth.pptx"
movedLayoutsReferenceDoc = "pptx/reference_moved_layouts.pptx"
deletedLayoutsReferenceDoc = "pptx/reference_deleted_layouts.pptx"
in in
( ooxmlTest ( ooxmlTest
writePowerpoint writePowerpoint
@ -31,15 +34,29 @@ pptxTests name opts native pptx =
name name
opts{writerReferenceDoc=Just referenceDoc} opts{writerReferenceDoc=Just referenceDoc}
native native
(modifyPptxName pptx) (modifyPptxName pptx "_templated")
, ooxmlTest
writePowerpoint
name
opts{writerReferenceDoc=Just movedLayoutsReferenceDoc}
native
(modifyPptxName pptx "_moved_layouts")
, ooxmlTest
writePowerpoint
name
opts{writerReferenceDoc=Just deletedLayoutsReferenceDoc}
native
(modifyPptxName pptx "_deleted_layouts")
) )
groupPptxTests :: [(TestTree, TestTree)] -> [TestTree] groupPptxTests :: [(TestTree, TestTree, TestTree, TestTree)] -> [TestTree]
groupPptxTests pairs = groupPptxTests pairs =
let (noRefs, refs) = unzip pairs let (noRefs, refs, movedLayouts, deletedLayouts) = unzip4 pairs
in in
[ testGroup "Default slide formatting" noRefs [ testGroup "Default slide formatting" noRefs
, testGroup "With `--reference-doc` pptx file" refs , testGroup "With `--reference-doc` pptx file" refs
, testGroup "With layouts in reference doc moved" movedLayouts
, testGroup "With layouts in reference doc deleted" deletedLayouts
] ]

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.