pptx: Select layouts from reference doc by name

Until now, users had to make sure that their reference doc contains
layouts in a specific order: the first four layouts in the file had to
have a specific structure, or else pandoc would error (or sometimes
successfully produce a pptx file, which PowerPoint would then fail to
open).

This commit changes the layout selection to use the layout names rather
than order: users must make sure their reference doc contains four
layouts with specific names, and if a layout with the right name isn’t
found pandoc will output a warning and use the corresponding layout from
the default reference doc as a fallback.

I believe the use of names rather than order will be clearer to users,
and the clearer errors will help them troubleshoot when things go wrong.

- Add tests for moved layouts
- Add tests for deleted layouts
- Add newly included layouts to slideMaster1.xml to fix tests
This commit is contained in:
Emily Bourke 2021-08-10 17:20:53 +01:00 committed by John MacFarlane
parent 9204e5c9b1
commit 72823ad947
47 changed files with 241 additions and 32 deletions

View file

@ -1165,13 +1165,18 @@ header when requesting a document from a URL:
`.pptx` or `.potx` extension) are known to work, as are most
templates derived from these.
The specific requirement is that the template should begin with
the following first four layouts:
The specific requirement is that the template should contain layouts
with the following names (as seen within PowerPoint):
1. Title Slide
2. Title and Content
3. Section Header
4. Two Content
- Title Slide
- Title and Content
- Section Header
- Two Content
For each name, the first layout found with that name will be used.
If no layout is found with one of the names, pandoc will output a
warning and use the layout with that name from the default reference
doc instead.
All templates included with a recent version of MS PowerPoint
will fit these criteria. (You can click on `Layout` under the

View file

@ -1,3 +1,4 @@
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternGuards #-}
{- |
@ -21,14 +22,19 @@ import Control.Monad.Reader
import Control.Monad.State
import Codec.Archive.Zip
import Data.List (intercalate, stripPrefix, nub, union, isPrefixOf, intersperse)
import Data.CaseInsensitive (CI)
import qualified Data.CaseInsensitive as CI
import Data.Default
import Data.Foldable (toList)
import Data.List.NonEmpty (nonEmpty, NonEmpty ((:|)))
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Read
import Data.Text.Read (decimal)
import Data.Time (formatTime, defaultTimeLocale)
import Data.Time.Clock (UTCTime)
import Data.Time.Clock.POSIX (utcTimeToPOSIXSeconds, posixSecondsToUTCTime)
import System.FilePath.Posix (splitDirectories, splitExtension, takeExtension)
import Data.Traversable (for)
import System.FilePath.Posix (splitDirectories, splitExtension, takeExtension, takeFileName)
import Text.Pandoc.XML.Light as XML
import Text.Pandoc.Definition
import qualified Text.Pandoc.UTF8 as UTF8
@ -48,11 +54,11 @@ import System.FilePath.Glob
import Text.DocTemplates (FromContext(lookupContext), Context)
import Text.DocLayout (literal)
import Text.TeXMath
import Text.Pandoc.Logging (LogMessage(TemplateWarning))
import Text.Pandoc.Writers.Math (convertMath)
import Text.Pandoc.Writers.Powerpoint.Presentation
import Text.Pandoc.Shared (tshow, stringify)
import Skylighting (fromColor)
import Data.List.NonEmpty (nonEmpty)
-- |The 'EMU' type is used to specify sizes in English Metric Units.
type EMU = Integer
@ -117,6 +123,7 @@ data WriterEnv = WriterEnv { envRefArchive :: Archive
-- no entry in the map for it.
, envSpeakerNotesIdMap :: M.Map Int Int
, envInSpeakerNotes :: Bool
, envSlideLayouts :: Maybe SlideLayouts
}
deriving (Show)
@ -136,8 +143,33 @@ instance Default WriterEnv where
, envSlideIdMap = mempty
, envSpeakerNotesIdMap = mempty
, envInSpeakerNotes = False
, envSlideLayouts = Nothing
}
type SlideLayouts = SlideLayoutsOf SlideLayout
data SlideLayoutsOf a = SlideLayouts
{ metadata :: a
, title :: a
, content :: a
, twoColumn :: a
} deriving (Show, Functor, Foldable, Traversable)
data SlideLayout = SlideLayout
{ slElement :: Element
, slInReferenceDoc :: Bool
-- ^ True if the layout is in the provided reference doc, False if it's in
-- the default reference doc.
, slPath :: FilePath
, slEntry :: Entry
} deriving (Show)
getSlideLayouts :: PandocMonad m => P m SlideLayouts
getSlideLayouts = asks envSlideLayouts >>= maybe (throwError e) pure
where
e = PandocSomeError ("Slide layouts aren't defined, even though they should "
<> "always be. This is a bug in pandoc.")
data ContentType = NormalContent
| TwoColumnLeftContent
| TwoColumnRightContent
@ -264,7 +296,24 @@ presentationToArchiveP p@(Presentation docProps slides) = do
T.unlines (map (T.pack . (" " <>)) missingFiles)
)
newArch' <- foldM copyFileToArchive emptyArchive filePaths
newArch <- foldM copyFileToArchive emptyArchive filePaths
-- Add any layouts taken from the default archive,
-- overwriting any already added.
slideLayouts <- getSlideLayouts
let f layout =
if not (slInReferenceDoc layout)
then addEntryToArchive (slEntry layout)
else id
let newArch' = foldr f newArch slideLayouts
-- Update the master to make sure it includes any layouts we've just added
master <- getMaster
masterRels <- getMasterRels
let (updatedMasterElem, updatedMasterRelElem) = updateMasterElems slideLayouts master masterRels
updatedMasterEntry <- elemToEntry "ppt/slideMasters/slideMaster1.xml" updatedMasterElem
updatedMasterRelEntry <- elemToEntry "ppt/slideMasters/_rels/slideMaster1.xml.rels" updatedMasterRelElem
-- we make a modified ppt/viewProps.xml out of the presentation viewProps
viewPropsEntry <- makeViewPropsEntry
-- we make a docProps/core.xml entry out of the presentation docprops
@ -293,9 +342,82 @@ presentationToArchiveP p@(Presentation docProps slides) = do
spkNotesEntries <>
spkNotesRelEntries <>
mediaEntries <>
[updatedMasterEntry, updatedMasterRelEntry] <>
[contentTypesEntry, docPropsEntry, docCustomPropsEntry, relsEntry,
presEntry, presRelsEntry, viewPropsEntry]
updateMasterElems :: SlideLayouts -> Element -> Element -> (Element, Element)
updateMasterElems layouts master masterRels = (updatedMaster, updatedMasterRels)
where
updatedMaster = master { elContent = updateSldLayoutIdLst <$> elContent master }
(updatedRelationshipIds, updatedMasterRels) = addLayoutRels masterRels
updateSldLayoutIdLst :: Content -> Content
updateSldLayoutIdLst (Elem e) = case elName e of
(QName "sldLayoutIdLst" _ _) -> let
mkChild relationshipId (lastId, children) = let
thisId = lastId + 1
newChild = Element
{ elName = QName "sldLayoutId" Nothing (Just "p")
, elAttribs =
[ Attr (QName "id" Nothing Nothing) (T.pack (show thisId))
, Attr (QName "id" Nothing (Just "r")) relationshipId
]
, elContent = []
, elLine = Nothing
}
in (thisId, Elem newChild : children)
newChildren = snd (foldr mkChild (maxIdNumber' e, []) updatedRelationshipIds)
in Elem e { elContent = elContent e <> newChildren }
_ -> Elem e
updateSldLayoutIdLst c = c
addLayoutRels ::
Element ->
([Text], Element)
addLayoutRels e = let
layoutsToAdd = filter (not . slInReferenceDoc) (toList layouts)
newRelationships = snd (foldr mkRelationship (maxIdNumber e, []) layoutsToAdd)
newRelationshipIds = mapMaybe getRelationshipId newRelationships
mkRelationship layout (lastId, relationships) = let
thisId = lastId + 1
slideLayoutPath = "../slideLayouts/" <> T.pack (takeFileName (slPath layout))
newRelationship = Element
{ elName = QName "Relationship" Nothing Nothing
, elAttribs =
[ Attr (QName "Id" Nothing Nothing) ("rId" <> T.pack (show thisId))
, Attr (QName "Type" Nothing Nothing) "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout"
, Attr (QName "Target" Nothing Nothing) slideLayoutPath
]
, elContent = []
, elLine = Nothing
}
in (thisId, Elem newRelationship : relationships)
in (newRelationshipIds, e {elContent = elContent e <> newRelationships})
getRelationshipId :: Content -> Maybe Text
getRelationshipId (Elem e) = findAttr (QName "Id" Nothing Nothing) e
getRelationshipId _ = Nothing
maxIdNumber :: Element -> Integer
maxIdNumber relationships = maximum (0 : idNumbers)
where
idNumbers = fst <$> mapMaybe (hush . decimal . T.drop 3) idAttributes
idAttributes = mapMaybe getIdAttribute (elContent relationships)
getIdAttribute (Elem e) = findAttr (QName "Id" Nothing Nothing) e
getIdAttribute _ = Nothing
maxIdNumber' :: Element -> Integer
maxIdNumber' sldLayouts = maximum (0 : idNumbers)
where
idNumbers = fst <$> mapMaybe (hush . decimal) idAttributes
idAttributes = mapMaybe getIdAttribute (elContent sldLayouts)
getIdAttribute (Elem e) = findAttr (QName "id" Nothing Nothing) e
getIdAttribute _ = Nothing
hush :: Either a b -> Maybe b
hush = either (const Nothing) Just
makeSlideIdMap :: Presentation -> M.Map SlideId Int
makeSlideIdMap (Presentation _ slides) =
M.fromList $ map slideId slides `zip` [1..]
@ -318,6 +440,40 @@ presentationToArchive opts meta pres = do
Nothing -> toArchive . BL.fromStrict <$>
P.readDataFile "reference.pptx"
let (referenceLayouts, defaultReferenceLayouts) =
(getLayoutsFromArchive refArchive, getLayoutsFromArchive distArchive)
let layoutTitles = SlideLayouts { metadata = "Title Slide" :: Text
, title = "Section Header"
, content = "Title and Content"
, twoColumn = "Two Content"
}
layouts <- for layoutTitles $ \layoutTitle -> do
let layout = M.lookup (CI.mk layoutTitle) referenceLayouts
let defaultLayout = M.lookup (CI.mk layoutTitle) defaultReferenceLayouts
case (layout, defaultLayout) of
(Nothing, Nothing) ->
throwError (PandocSomeError ("Couldn't find layout named \""
<> layoutTitle <> "\" in the provided "
<> "reference doc or in the default "
<> "reference doc included with pandoc."))
(Nothing, Just ((element, path, entry) :| _)) -> do
P.logOutput (TemplateWarning ("Couldn't find layout named \""
<> layoutTitle <> "\" in provided "
<> "reference doc. Falling back to "
<> "the default included with pandoc."))
pure SlideLayout { slElement = element
, slPath = path
, slEntry = entry
, slInReferenceDoc = False
}
(Just ((element, path, entry) :| _), _ ) ->
pure SlideLayout { slElement = element
, slPath = path
, slEntry = entry
, slInReferenceDoc = True
}
utctime <- P.getTimestamp
presSize <- case getPresentationSize refArchive distArchive of
@ -341,6 +497,7 @@ presentationToArchive opts meta pres = do
, envPresentationSize = presSize
, envSlideIdMap = makeSlideIdMap pres
, envSpeakerNotesIdMap = makeSpeakerNotesMap pres
, envSlideLayouts = Just layouts
}
let st = def { stMediaGlobalIds = initialGlobalIds refArchive distArchive
@ -348,7 +505,30 @@ presentationToArchive opts meta pres = do
runP env st $ presentationToArchiveP pres
-- | Get all slide layouts from an archive, as a map where the layout's name
-- gives the map key.
--
-- For each layout, the map contains its XML representation, its path within
-- the archive, and the archive entry.
getLayoutsFromArchive :: Archive -> M.Map (CI Text) (NonEmpty (Element, FilePath, Entry))
getLayoutsFromArchive archive =
M.fromListWith (<>) ((\t@(e, _, _) -> (CI.mk (name e), pure t)) <$> layouts)
where
layouts :: [(Element, FilePath, Entry)]
layouts = mapMaybe findElementByPath paths
parseXml' entry = case parseXMLElement (UTF8.toTextLazy (fromEntry entry)) of
Left _ -> Nothing
Right element -> Just element
findElementByPath :: FilePath -> Maybe (Element, FilePath, Entry)
findElementByPath path = do
entry <- findEntryByPath path archive
element <- parseXml' entry
pure (element, path, entry)
paths = filter (match (compile "ppt/slideLayouts/slideLayout*.xml")) (filesInArchive archive)
name element = fromMaybe "Untitled layout" $ do
let ns = elemToNameSpaces element
cSld <- findChild (elemName ns "p" "cSld") element
findAttr (QName "name" Nothing Nothing) cSld
--------------------------------------------------
@ -365,15 +545,14 @@ curSlideHasSpeakerNotes =
--------------------------------------------------
getLayout :: PandocMonad m => Layout -> P m Element
getLayout layout = do
let layoutpath = case layout of
MetadataSlide{} -> "ppt/slideLayouts/slideLayout1.xml"
TitleSlide{} -> "ppt/slideLayouts/slideLayout3.xml"
ContentSlide{} -> "ppt/slideLayouts/slideLayout2.xml"
TwoColumnSlide{} -> "ppt/slideLayouts/slideLayout4.xml"
refArchive <- asks envRefArchive
distArchive <- asks envDistArchive
parseXml refArchive distArchive layoutpath
getLayout layout = getElement <$> getSlideLayouts
where
getElement =
slElement . case layout of
MetadataSlide{} -> metadata
TitleSlide{} -> title
ContentSlide{} -> content
TwoColumnSlide{} -> twoColumn
shapeHasId :: NameSpaces -> T.Text -> Element -> Bool
shapeHasId ns ident element
@ -604,6 +783,12 @@ getMaster = do
distArchive <- asks envDistArchive
parseXml refArchive distArchive "ppt/slideMasters/slideMaster1.xml"
getMasterRels :: PandocMonad m => P m Element
getMasterRels = do
refArchive <- asks envRefArchive
distArchive <- asks envDistArchive
parseXml refArchive distArchive "ppt/slideMasters/_rels/slideMaster1.xml.rels"
-- We want to get the header dimensions, so we can make sure that the
-- image goes underneath it. We only use this in a content slide if it
-- has a header.
@ -1606,11 +1791,13 @@ speakerNotesSlideRelElement slide = do
slideToSlideRelElement :: PandocMonad m => Slide -> P m Element
slideToSlideRelElement slide = do
idNum <- slideNum slide
let target = case slide of
(Slide _ MetadataSlide{} _) -> "../slideLayouts/slideLayout1.xml"
(Slide _ TitleSlide{} _) -> "../slideLayouts/slideLayout3.xml"
(Slide _ ContentSlide{} _) -> "../slideLayouts/slideLayout2.xml"
(Slide _ TwoColumnSlide{} _) -> "../slideLayouts/slideLayout4.xml"
target <- flip fmap getSlideLayouts $
T.pack . ("../slideLayouts/" <>) . takeFileName .
slPath . case slide of
(Slide _ MetadataSlide{} _) -> metadata
(Slide _ TitleSlide{} _) -> title
(Slide _ ContentSlide{} _) -> content
(Slide _ TwoColumnSlide{} _) -> twoColumn
speakerNotesRels <- maybeToList <$> speakerNotesSlideRelElement slide

View file

@ -7,18 +7,21 @@ import System.FilePath
import Text.DocTemplates (ToContext(toVal), Context(..))
import qualified Data.Map as M
import Data.Text (pack)
import Data.List (unzip4)
-- templating is important enough, and can break enough things, that
-- we want to run all our tests with both default formatting and a
-- template.
modifyPptxName :: FilePath -> FilePath
modifyPptxName fp =
addExtension (dropExtension fp ++ "_templated") "pptx"
modifyPptxName :: FilePath -> String -> FilePath
modifyPptxName fp suffix =
addExtension (dropExtension fp ++ suffix) "pptx"
pptxTests :: String -> WriterOptions -> FilePath -> FilePath -> (TestTree, TestTree)
pptxTests :: String -> WriterOptions -> FilePath -> FilePath -> (TestTree, TestTree, TestTree, TestTree)
pptxTests name opts native pptx =
let referenceDoc = "pptx/reference_depth.pptx"
movedLayoutsReferenceDoc = "pptx/reference_moved_layouts.pptx"
deletedLayoutsReferenceDoc = "pptx/reference_deleted_layouts.pptx"
in
( ooxmlTest
writePowerpoint
@ -31,15 +34,29 @@ pptxTests name opts native pptx =
name
opts{writerReferenceDoc=Just referenceDoc}
native
(modifyPptxName pptx)
(modifyPptxName pptx "_templated")
, ooxmlTest
writePowerpoint
name
opts{writerReferenceDoc=Just movedLayoutsReferenceDoc}
native
(modifyPptxName pptx "_moved_layouts")
, ooxmlTest
writePowerpoint
name
opts{writerReferenceDoc=Just deletedLayoutsReferenceDoc}
native
(modifyPptxName pptx "_deleted_layouts")
)
groupPptxTests :: [(TestTree, TestTree)] -> [TestTree]
groupPptxTests :: [(TestTree, TestTree, TestTree, TestTree)] -> [TestTree]
groupPptxTests pairs =
let (noRefs, refs) = unzip pairs
let (noRefs, refs, movedLayouts, deletedLayouts) = unzip4 pairs
in
[ testGroup "Default slide formatting" noRefs
, testGroup "With `--reference-doc` pptx file" refs
, testGroup "With layouts in reference doc moved" movedLayouts
, testGroup "With layouts in reference doc deleted" deletedLayouts
]

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.