From 9b6ee81c1916bb23d2cb24534adb88d65b4642df Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Sun, 22 Sep 2019 23:00:35 +0400 Subject: [PATCH] [Docx Writer] Re-use Readers.Docx.Parse for StyleMap (#5766) * [Docx Parser] Move style-parsing-specific code to a new module * [Docx Writer] Re-use Readers.Docx.Parse.Styles for StyleMap * [Docx Writer] Move Readers.Docx.StyleMap to Writers.Docx.StyleMap It's never used outside of writer code, so it makes more sense to scope it under writers really. --- pandoc.cabal | 3 +- src/Text/Pandoc/Readers/Docx/Parse.hs | 260 +--------------- src/Text/Pandoc/Readers/Docx/Parse/Styles.hs | 304 +++++++++++++++++++ src/Text/Pandoc/Readers/Docx/StyleMap.hs | 123 -------- src/Text/Pandoc/Writers/Docx.hs | 61 ++-- src/Text/Pandoc/Writers/Docx/StyleMap.hs | 48 +++ 6 files changed, 387 insertions(+), 412 deletions(-) create mode 100644 src/Text/Pandoc/Readers/Docx/Parse/Styles.hs delete mode 100644 src/Text/Pandoc/Readers/Docx/StyleMap.hs create mode 100644 src/Text/Pandoc/Writers/Docx/StyleMap.hs diff --git a/pandoc.cabal b/pandoc.cabal index 7734f7233..44c8acf40 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -566,8 +566,8 @@ library Text.Pandoc.Readers.Docx.Lists, Text.Pandoc.Readers.Docx.Combine, Text.Pandoc.Readers.Docx.Parse, + Text.Pandoc.Readers.Docx.Parse.Styles, Text.Pandoc.Readers.Docx.Util, - Text.Pandoc.Readers.Docx.StyleMap, Text.Pandoc.Readers.Docx.Fields, Text.Pandoc.Readers.LaTeX.Parsing, Text.Pandoc.Readers.LaTeX.Lang, @@ -592,6 +592,7 @@ library Text.Pandoc.Readers.Org.Parsing, Text.Pandoc.Readers.Org.Shared, Text.Pandoc.Readers.Roff, + Text.Pandoc.Writers.Docx.StyleMap, Text.Pandoc.Writers.Roff, Text.Pandoc.Writers.Powerpoint.Presentation, Text.Pandoc.Writers.Powerpoint.Output, diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 00c5fb0be..8c5c94bb9 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -1,14 +1,9 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE PatternGuards #-} {-# LANGUAGE ViewPatterns #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} {- | Module : Text.Pandoc.Readers.Docx.Parse Copyright : Copyright (C) 2014-2019 Jesse Rosenthal + 2019 Nikolay Yakimov License : GNU GPL, version 2 or above Maintainer : Jesse Rosenthal @@ -57,6 +52,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..) , leftBiasedMergeRunStyle ) where import Prelude +import Text.Pandoc.Readers.Docx.Parse.Styles import Codec.Archive.Zip import Control.Applicative ((<|>)) import Control.Monad.Except @@ -64,13 +60,10 @@ import Control.Monad.Reader import Control.Monad.State.Strict import Data.Bits ((.|.)) import qualified Data.ByteString.Lazy as B -import Data.Char (chr, ord, readLitChar, toLower) +import Data.Char (chr, ord, readLitChar) import Data.List -import Data.Function (on) -import Data.String (IsString(..)) import qualified Data.Map as M import Data.Maybe -import Data.Coerce import System.FilePath import Text.Pandoc.Readers.Docx.Util import Text.Pandoc.Readers.Docx.Fields @@ -262,37 +255,6 @@ newtype Row = Row [Cell] newtype Cell = Cell [BodyPart] deriving Show -newtype CharStyleId = CharStyleId { fromCharStyleId :: String } - deriving (Show, Eq, Ord, FromStyleId) -newtype ParaStyleId = ParaStyleId { fromParaStyleId :: String } - deriving (Show, Eq, Ord, FromStyleId) - -newtype CharStyleName = CharStyleName { fromCharStyleName :: CIString } - deriving (Show, Eq, Ord, IsString, FromStyleName) -newtype ParaStyleName = ParaStyleName { fromParaStyleName :: CIString } - deriving (Show, Eq, Ord, IsString, FromStyleName) - --- Case-insensitive comparisons -newtype CIString = CIString String deriving (Show, IsString, FromStyleName) - -class FromStyleName a where - fromStyleName :: a -> String - -instance FromStyleName String where - fromStyleName = id - -class FromStyleId a where - fromStyleId :: a -> String - -instance FromStyleId String where - fromStyleId = id - -instance Eq CIString where - (==) = (==) `on` map toLower . coerce - -instance Ord CIString where - compare = compare `on` map toLower . coerce - leftBiasedMergeRunStyle :: RunStyle -> RunStyle -> RunStyle leftBiasedMergeRunStyle a b = RunStyle { isBold = isBold a <|> isBold b @@ -333,44 +295,6 @@ data Run = Run RunStyle [RunElem] data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen deriving Show -data VertAlign = BaseLn | SupScrpt | SubScrpt - deriving Show - -data CharStyle = CharStyle { cStyleId :: CharStyleId - , cStyleName :: CharStyleName - , cStyleData :: RunStyle - } deriving (Show) - -data RunStyle = RunStyle { isBold :: Maybe Bool - , isItalic :: Maybe Bool - , isSmallCaps :: Maybe Bool - , isStrike :: Maybe Bool - , isRTL :: Maybe Bool - , rVertAlign :: Maybe VertAlign - , rUnderline :: Maybe String - , rParentStyle :: Maybe CharStyle - } - deriving Show - -data ParStyle = ParStyle { headingLev :: Maybe (ParaStyleName, Int) - , numInfo :: Maybe (String, String) - , psParentStyle :: Maybe ParStyle - , pStyleName :: ParaStyleName - , pStyleId :: ParaStyleId - } - deriving Show - -defaultRunStyle :: RunStyle -defaultRunStyle = RunStyle { isBold = Nothing - , isItalic = Nothing - , isSmallCaps = Nothing - , isStrike = Nothing - , isRTL = Nothing - , rVertAlign = Nothing - , rUnderline = Nothing - , rParentStyle = Nothing - } - type Target = String type Anchor = String type URL = String @@ -449,46 +373,6 @@ elemToBody _ _ = throwError WrongElem archiveToStyles :: Archive -> (CharStyleMap, ParStyleMap) archiveToStyles = archiveToStyles' getStyleId getStyleId -archiveToStyles' :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2) => - (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2) -archiveToStyles' conv1 conv2 zf = - let stylesElem = findEntryByPath "word/styles.xml" zf >>= - (parseXMLDoc . UTF8.toStringLazy . fromEntry) - in - case stylesElem of - Nothing -> (M.empty, M.empty) - Just styElem -> - let namespaces = elemToNameSpaces styElem - in - ( M.fromList $ map (\r -> (conv1 r, r)) $ buildBasedOnList namespaces styElem Nothing, - M.fromList $ map (\p -> (conv2 p, p)) $ buildBasedOnList namespaces styElem Nothing) - -isBasedOnStyle :: (ElemToStyle a, FromStyleId (StyleId a)) => NameSpaces -> Element -> Maybe a -> Bool -isBasedOnStyle ns element parentStyle - | isElem ns "w" "style" element - , Just styleType <- findAttrByName ns "w" "type" element - , styleType == cStyleType parentStyle - , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>= - findAttrByName ns "w" "val" - , Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps) - | isElem ns "w" "style" element - , Just styleType <- findAttrByName ns "w" "type" element - , styleType == cStyleType parentStyle - , Nothing <- findChildByName ns "w" "basedOn" element - , Nothing <- parentStyle = True - | otherwise = False - -class HasStyleId a => ElemToStyle a where - cStyleType :: Maybe a -> String - elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a - -class FromStyleId (StyleId a) => HasStyleId a where - type StyleId a - getStyleId :: a -> StyleId a - -class FromStyleName (StyleName a) => HasStyleName a where - type StyleName a - getStyleName :: a -> StyleName a class HasParentStyle a where getParentStyle :: a -> Maybe a @@ -511,52 +395,6 @@ constructBogusParStyleData stName = ParStyle , pStyleId = ParaStyleId . filter (/=' ') . fromStyleName $ stName } -instance ElemToStyle CharStyle where - cStyleType _ = "character" - elemToStyle ns element parentStyle - | isElem ns "w" "style" element - , Just "character" <- findAttrByName ns "w" "type" element = - elemToCharStyle ns element parentStyle - | otherwise = Nothing - -instance HasStyleId CharStyle where - type StyleId CharStyle = CharStyleId - getStyleId = cStyleId - -instance HasStyleName CharStyle where - type StyleName CharStyle = CharStyleName - getStyleName = cStyleName - -instance ElemToStyle ParStyle where - cStyleType _ = "paragraph" - elemToStyle ns element parentStyle - | isElem ns "w" "style" element - , Just "paragraph" <- findAttrByName ns "w" "type" element - = elemToParStyleData ns element parentStyle - | otherwise = Nothing - -instance HasStyleId ParStyle where - type StyleId ParStyle = ParaStyleId - getStyleId = pStyleId - -instance HasStyleName ParStyle where - type StyleName ParStyle = ParaStyleName - getStyleName = pStyleName - -getStyleChildren :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a] -getStyleChildren ns element parentStyle - | isElem ns "w" "styles" element = - mapMaybe (\e -> elemToStyle ns e parentStyle) $ - filterChildren (\e' -> isBasedOnStyle ns e' parentStyle) element - | otherwise = [] - -buildBasedOnList :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a] -buildBasedOnList ns element rootStyle = - case getStyleChildren ns element rootStyle of - [] -> [] - stys -> stys ++ - concatMap (buildBasedOnList ns element . Just) stys - archiveToNotes :: Archive -> Notes archiveToNotes zf = let fnElem = findEntryByPath "word/footnotes.xml" zf @@ -789,9 +627,6 @@ testBitMask bitMaskS n = [] -> False ((n', _) : _) -> (n' .|. n) /= 0 -stringToInteger :: String -> Maybe Integer -stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)]) - pHeading :: ParagraphStyle -> Maybe (ParaStyleName, Int) pHeading = getParStyleField headingLev . pStyle @@ -1166,21 +1001,6 @@ elemToParagraphStyle ns element sty } elemToParagraphStyle _ _ _ = defaultParagraphStyle -checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool -checkOnOff ns rPr tag - | Just t <- findChild tag rPr - , Just val <- findAttrByName ns "w" "val" t = - Just $ case val of - "true" -> True - "false" -> False - "on" -> True - "off" -> False - "1" -> True - "0" -> False - _ -> False - | Just _ <- findChild tag rPr = Just True -checkOnOff _ _ _ = Nothing - elemToRunStyleD :: NameSpaces -> Element -> D RunStyle elemToRunStyleD ns element | Just rPr <- findChildByName ns "w" "rPr" element = do @@ -1192,80 +1012,6 @@ elemToRunStyleD ns element return $ elemToRunStyle ns element parentSty elemToRunStyleD _ _ = return defaultRunStyle -elemToCharStyle :: NameSpaces - -> Element -> Maybe CharStyle -> Maybe CharStyle -elemToCharStyle ns element parentStyle - = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element) - <*> getElementStyleName ns element - <*> (Just $ elemToRunStyle ns element parentStyle) - -elemToRunStyle :: NameSpaces -> Element -> Maybe CharStyle -> RunStyle -elemToRunStyle ns element parentStyle - | Just rPr <- findChildByName ns "w" "rPr" element = - RunStyle - { - isBold = checkOnOff ns rPr (elemName ns "w" "b") `mplus` - checkOnOff ns rPr (elemName ns "w" "bCs") - , isItalic = checkOnOff ns rPr (elemName ns "w" "i") `mplus` - checkOnOff ns rPr (elemName ns "w" "iCs") - , isSmallCaps = checkOnOff ns rPr (elemName ns "w" "smallCaps") - , isStrike = checkOnOff ns rPr (elemName ns "w" "strike") - , isRTL = checkOnOff ns rPr (elemName ns "w" "rtl") - , rVertAlign = - findChildByName ns "w" "vertAlign" rPr >>= - findAttrByName ns "w" "val" >>= - \v -> Just $ case v of - "superscript" -> SupScrpt - "subscript" -> SubScrpt - _ -> BaseLn - , rUnderline = - findChildByName ns "w" "u" rPr >>= - findAttrByName ns "w" "val" - , rParentStyle = parentStyle - } -elemToRunStyle _ _ _ = defaultRunStyle - -getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int) -getHeaderLevel ns element - | Just styleName <- getElementStyleName ns element - , Just n <- stringToInteger =<< - (stripPrefix "heading " . map toLower $ - fromStyleName styleName) - , n > 0 = Just (styleName, fromInteger n) -getHeaderLevel _ _ = Nothing - -getElementStyleName :: Coercible String a => NameSpaces -> Element -> Maybe a -getElementStyleName ns el = coerce <$> - ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val") - <|> findAttrByName ns "w" "styleId" el) - -getNumInfo :: NameSpaces -> Element -> Maybe (String, String) -getNumInfo ns element = do - let numPr = findChildByName ns "w" "pPr" element >>= - findChildByName ns "w" "numPr" - lvl = fromMaybe "0" (numPr >>= - findChildByName ns "w" "ilvl" >>= - findAttrByName ns "w" "val") - numId <- numPr >>= - findChildByName ns "w" "numId" >>= - findAttrByName ns "w" "val" - return (numId, lvl) - - -elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle -elemToParStyleData ns element parentStyle - | Just styleId <- findAttrByName ns "w" "styleId" element - , Just styleName <- getElementStyleName ns element - = Just $ ParStyle - { - headingLev = getHeaderLevel ns element - , numInfo = getNumInfo ns element - , psParentStyle = parentStyle - , pStyleName = styleName - , pStyleId = ParaStyleId styleId - } -elemToParStyleData _ _ _ = Nothing - elemToRunElem :: NameSpaces -> Element -> D RunElem elemToRunElem ns element | isElem ns "w" "t" element diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs new file mode 100644 index 000000000..c2d27e7fb --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs @@ -0,0 +1,304 @@ +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{- | + Module : Text.Pandoc.Readers.Docx.Parse.Styles + Copyright : Copyright (C) 2014-2019 Jesse Rosenthal + 2019 Nikolay Yakimov + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal + Stability : alpha + Portability : portable + +Type machinery and code for extraction and manipulation of docx styles +-} + +module Text.Pandoc.Readers.Docx.Parse.Styles ( + CharStyleId(..) + , CharStyle + , ParaStyleId(..) + , ParStyle(..) + , RunStyle(..) + , HasStyleName + , StyleName + , ParaStyleName + , CharStyleName + , FromStyleName + , VertAlign(..) + , StyleId + , HasStyleId + , archiveToStyles' + , getStyleId + , getStyleName + , cStyleData + , fromStyleName + , fromStyleId + , stringToInteger + , getNumInfo + , elemToRunStyle + , defaultRunStyle + ) where +import Prelude +import Codec.Archive.Zip +import Control.Applicative ((<|>)) +import Control.Monad.Except +import Data.Char (toLower) +import Data.List +import Data.Function (on) +import Data.String (IsString(..)) +import qualified Data.Map as M +import Data.Maybe +import Data.Coerce +import Text.Pandoc.Readers.Docx.Util +import qualified Text.Pandoc.UTF8 as UTF8 +import Text.XML.Light + +newtype CharStyleId = CharStyleId String + deriving (Show, Eq, Ord, IsString, FromStyleId) +newtype ParaStyleId = ParaStyleId String + deriving (Show, Eq, Ord, IsString, FromStyleId) + +newtype CharStyleName = CharStyleName CIString + deriving (Show, Eq, Ord, IsString, FromStyleName) +newtype ParaStyleName = ParaStyleName CIString + deriving (Show, Eq, Ord, IsString, FromStyleName) + +-- Case-insensitive comparisons +newtype CIString = CIString String deriving (Show, IsString, FromStyleName) + +class FromStyleName a where + fromStyleName :: a -> String + +instance FromStyleName String where + fromStyleName = id + +class FromStyleId a where + fromStyleId :: a -> String + +instance FromStyleId String where + fromStyleId = id + +instance Eq CIString where + (==) = (==) `on` map toLower . coerce + +instance Ord CIString where + compare = compare `on` map toLower . coerce + +data VertAlign = BaseLn | SupScrpt | SubScrpt + deriving Show + +data CharStyle = CharStyle { cStyleId :: CharStyleId + , cStyleName :: CharStyleName + , cStyleData :: RunStyle + } deriving (Show) + +data RunStyle = RunStyle { isBold :: Maybe Bool + , isItalic :: Maybe Bool + , isSmallCaps :: Maybe Bool + , isStrike :: Maybe Bool + , isRTL :: Maybe Bool + , rVertAlign :: Maybe VertAlign + , rUnderline :: Maybe String + , rParentStyle :: Maybe CharStyle + } + deriving Show + +data ParStyle = ParStyle { headingLev :: Maybe (ParaStyleName, Int) + , numInfo :: Maybe (String, String) + , psParentStyle :: Maybe ParStyle + , pStyleName :: ParaStyleName + , pStyleId :: ParaStyleId + } + deriving Show + +defaultRunStyle :: RunStyle +defaultRunStyle = RunStyle { isBold = Nothing + , isItalic = Nothing + , isSmallCaps = Nothing + , isStrike = Nothing + , isRTL = Nothing + , rVertAlign = Nothing + , rUnderline = Nothing + , rParentStyle = Nothing + } + +archiveToStyles' :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2) => + (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2) +archiveToStyles' conv1 conv2 zf = + let stylesElem = findEntryByPath "word/styles.xml" zf >>= + (parseXMLDoc . UTF8.toStringLazy . fromEntry) + in + case stylesElem of + Nothing -> (M.empty, M.empty) + Just styElem -> + let namespaces = elemToNameSpaces styElem + in + ( M.fromList $ map (\r -> (conv1 r, r)) $ buildBasedOnList namespaces styElem Nothing, + M.fromList $ map (\p -> (conv2 p, p)) $ buildBasedOnList namespaces styElem Nothing) + +isBasedOnStyle :: (ElemToStyle a, FromStyleId (StyleId a)) => NameSpaces -> Element -> Maybe a -> Bool +isBasedOnStyle ns element parentStyle + | isElem ns "w" "style" element + , Just styleType <- findAttrByName ns "w" "type" element + , styleType == cStyleType parentStyle + , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>= + findAttrByName ns "w" "val" + , Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps) + | isElem ns "w" "style" element + , Just styleType <- findAttrByName ns "w" "type" element + , styleType == cStyleType parentStyle + , Nothing <- findChildByName ns "w" "basedOn" element + , Nothing <- parentStyle = True + | otherwise = False + +class HasStyleId a => ElemToStyle a where + cStyleType :: Maybe a -> String + elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a + +class FromStyleId (StyleId a) => HasStyleId a where + type StyleId a + getStyleId :: a -> StyleId a + +class FromStyleName (StyleName a) => HasStyleName a where + type StyleName a + getStyleName :: a -> StyleName a + +instance ElemToStyle CharStyle where + cStyleType _ = "character" + elemToStyle ns element parentStyle + | isElem ns "w" "style" element + , Just "character" <- findAttrByName ns "w" "type" element = + elemToCharStyle ns element parentStyle + | otherwise = Nothing + +instance HasStyleId CharStyle where + type StyleId CharStyle = CharStyleId + getStyleId = cStyleId + +instance HasStyleName CharStyle where + type StyleName CharStyle = CharStyleName + getStyleName = cStyleName + +instance ElemToStyle ParStyle where + cStyleType _ = "paragraph" + elemToStyle ns element parentStyle + | isElem ns "w" "style" element + , Just "paragraph" <- findAttrByName ns "w" "type" element + = elemToParStyleData ns element parentStyle + | otherwise = Nothing + +instance HasStyleId ParStyle where + type StyleId ParStyle = ParaStyleId + getStyleId = pStyleId + +instance HasStyleName ParStyle where + type StyleName ParStyle = ParaStyleName + getStyleName = pStyleName + +getStyleChildren :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a] +getStyleChildren ns element parentStyle + | isElem ns "w" "styles" element = + mapMaybe (\e -> elemToStyle ns e parentStyle) $ + filterChildren (\e' -> isBasedOnStyle ns e' parentStyle) element + | otherwise = [] + +buildBasedOnList :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a] +buildBasedOnList ns element rootStyle = + case getStyleChildren ns element rootStyle of + [] -> [] + stys -> stys ++ + concatMap (buildBasedOnList ns element . Just) stys + +stringToInteger :: String -> Maybe Integer +stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)]) + +checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool +checkOnOff ns rPr tag + | Just t <- findChild tag rPr + , Just val <- findAttrByName ns "w" "val" t = + Just $ case val of + "true" -> True + "false" -> False + "on" -> True + "off" -> False + "1" -> True + "0" -> False + _ -> False + | Just _ <- findChild tag rPr = Just True +checkOnOff _ _ _ = Nothing + +elemToCharStyle :: NameSpaces + -> Element -> Maybe CharStyle -> Maybe CharStyle +elemToCharStyle ns element parentStyle + = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element) + <*> getElementStyleName ns element + <*> (Just $ elemToRunStyle ns element parentStyle) + +elemToRunStyle :: NameSpaces -> Element -> Maybe CharStyle -> RunStyle +elemToRunStyle ns element parentStyle + | Just rPr <- findChildByName ns "w" "rPr" element = + RunStyle + { + isBold = checkOnOff ns rPr (elemName ns "w" "b") `mplus` + checkOnOff ns rPr (elemName ns "w" "bCs") + , isItalic = checkOnOff ns rPr (elemName ns "w" "i") `mplus` + checkOnOff ns rPr (elemName ns "w" "iCs") + , isSmallCaps = checkOnOff ns rPr (elemName ns "w" "smallCaps") + , isStrike = checkOnOff ns rPr (elemName ns "w" "strike") + , isRTL = checkOnOff ns rPr (elemName ns "w" "rtl") + , rVertAlign = + findChildByName ns "w" "vertAlign" rPr >>= + findAttrByName ns "w" "val" >>= + \v -> Just $ case v of + "superscript" -> SupScrpt + "subscript" -> SubScrpt + _ -> BaseLn + , rUnderline = + findChildByName ns "w" "u" rPr >>= + findAttrByName ns "w" "val" + , rParentStyle = parentStyle + } +elemToRunStyle _ _ _ = defaultRunStyle + +getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int) +getHeaderLevel ns element + | Just styleName <- getElementStyleName ns element + , Just n <- stringToInteger =<< + (stripPrefix "heading " . map toLower $ + fromStyleName styleName) + , n > 0 = Just (styleName, fromInteger n) +getHeaderLevel _ _ = Nothing + +getElementStyleName :: Coercible String a => NameSpaces -> Element -> Maybe a +getElementStyleName ns el = coerce <$> + ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val") + <|> findAttrByName ns "w" "styleId" el) + +getNumInfo :: NameSpaces -> Element -> Maybe (String, String) +getNumInfo ns element = do + let numPr = findChildByName ns "w" "pPr" element >>= + findChildByName ns "w" "numPr" + lvl = fromMaybe "0" (numPr >>= + findChildByName ns "w" "ilvl" >>= + findAttrByName ns "w" "val") + numId <- numPr >>= + findChildByName ns "w" "numId" >>= + findAttrByName ns "w" "val" + return (numId, lvl) + +elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle +elemToParStyleData ns element parentStyle + | Just styleId <- findAttrByName ns "w" "styleId" element + , Just styleName <- getElementStyleName ns element + = Just $ ParStyle + { + headingLev = getHeaderLevel ns element + , numInfo = getNumInfo ns element + , psParentStyle = parentStyle + , pStyleName = styleName + , pStyleId = ParaStyleId styleId + } +elemToParStyleData _ _ _ = Nothing diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs deleted file mode 100644 index bdf7b4df2..000000000 --- a/src/Text/Pandoc/Readers/Docx/StyleMap.hs +++ /dev/null @@ -1,123 +0,0 @@ -{-# LANGUAGE NoImplicitPrelude #-} -{- | - Module : Text.Pandoc.Readers.Docx.StyleMaps - Copyright : © 2014-2019 Jesse Rosenthal , - 2014-2019 John MacFarlane , - 2015 Nikolay Yakimov - License : GNU GPL, version 2 or above - - Maintainer : Jesse Rosenthal - Stability : alpha - Portability : portable - -Mappings of element styles (word to pandoc-internal). --} -module Text.Pandoc.Readers.Docx.StyleMap ( StyleMaps(..) - , alterMap - , getMap - , defaultStyleMaps - , getStyleMaps - , getStyleId - , hasStyleName - ) where - -import Prelude -import Control.Monad.State.Strict -import Data.Char (toLower) -import qualified Data.Map as M -import Text.Pandoc.Readers.Docx.Util -import Text.XML.Light - -newtype ParaStyleMap = ParaStyleMap ( M.Map String String ) -newtype CharStyleMap = CharStyleMap ( M.Map String String ) - -class StyleMap a where - alterMap :: (M.Map String String -> M.Map String String) -> a -> a - getMap :: a -> M.Map String String - -instance StyleMap ParaStyleMap where - alterMap f (ParaStyleMap m) = ParaStyleMap $ f m - getMap (ParaStyleMap m) = m - -instance StyleMap CharStyleMap where - alterMap f (CharStyleMap m) = CharStyleMap $ f m - getMap (CharStyleMap m) = m - -insert :: (StyleMap a) => Maybe String -> Maybe String -> a -> a -insert (Just k) (Just v) m = alterMap (M.insert k v) m -insert _ _ m = m - -getStyleId :: (StyleMap a) => String -> a -> String -getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) . getMap - -hasStyleName :: (StyleMap a) => String -> a -> Bool -hasStyleName styleName = M.member (map toLower styleName) . getMap - -data StyleMaps = StyleMaps { sNameSpaces :: NameSpaces - , sParaStyleMap :: ParaStyleMap - , sCharStyleMap :: CharStyleMap - } - -data StyleType = ParaStyle | CharStyle - -defaultStyleMaps :: StyleMaps -defaultStyleMaps = StyleMaps { sNameSpaces = [] - , sParaStyleMap = ParaStyleMap M.empty - , sCharStyleMap = CharStyleMap M.empty - } - -type StateM a = State StyleMaps a - -getStyleMaps :: Element -> StyleMaps -getStyleMaps docElem = execState genStyleMap state' - where - state' = defaultStyleMaps {sNameSpaces = elemToNameSpaces docElem} - genStyleItem e = do - styleType <- getStyleType e - styleId <- getAttrStyleId e - nameValLowercase <- fmap (map toLower) `fmap` getNameVal e - case styleType of - Just ParaStyle -> modParaStyleMap $ insert nameValLowercase styleId - Just CharStyle -> modCharStyleMap $ insert nameValLowercase styleId - _ -> return () - genStyleMap = do - style <- elemName' "style" - let styles = findChildren style docElem - forM_ styles genStyleItem - -modParaStyleMap :: (ParaStyleMap -> ParaStyleMap) -> StateM () -modParaStyleMap f = modify $ \s -> - s {sParaStyleMap = f $ sParaStyleMap s} - -modCharStyleMap :: (CharStyleMap -> CharStyleMap) -> StateM () -modCharStyleMap f = modify $ \s -> - s {sCharStyleMap = f $ sCharStyleMap s} - -getStyleType :: Element -> StateM (Maybe StyleType) -getStyleType e = do - styleTypeStr <- getAttrType e - case styleTypeStr of - Just "paragraph" -> return $ Just ParaStyle - Just "character" -> return $ Just CharStyle - _ -> return Nothing - -getAttrType :: Element -> StateM (Maybe String) -getAttrType el = do - name <- elemName' "type" - return $ findAttr name el - -getAttrStyleId :: Element -> StateM (Maybe String) -getAttrStyleId el = do - name <- elemName' "styleId" - return $ findAttr name el - -getNameVal :: Element -> StateM (Maybe String) -getNameVal el = do - name <- elemName' "name" - val <- elemName' "val" - return $ findChild name el >>= findAttr val - -elemName' :: String -> StateM QName -elemName' name = do - namespaces <- gets sNameSpaces - return $ elemName namespaces "w" name diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index d62dbeedb..4b709358f 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -3,6 +3,7 @@ {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.Docx Copyright : Copyright (C) 2012-2019 John MacFarlane @@ -23,8 +24,9 @@ import Control.Monad.Reader import Control.Monad.State.Strict import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL -import Data.Char (isSpace, ord, toLower, isLetter) +import Data.Char (isSpace, ord, isLetter) import Data.List (intercalate, isPrefixOf, isSuffixOf) +import Data.String (fromString) import qualified Data.Map as M import Data.Maybe (fromMaybe, isNothing, mapMaybe, maybeToList) import qualified Data.Set as Set @@ -46,7 +48,7 @@ import Text.Pandoc.Logging import Text.Pandoc.MIME (MimeType, extensionFromMimeType, getMimeType, getMimeTypeDef) import Text.Pandoc.Options -import Text.Pandoc.Readers.Docx.StyleMap +import Text.Pandoc.Writers.Docx.StyleMap import Text.Pandoc.Shared import Text.Pandoc.Walk import Text.Pandoc.Writers.Math @@ -132,8 +134,8 @@ data WriterState = WriterState{ , stFirstPara :: Bool , stInTable :: Bool , stTocTitle :: [Inline] - , stDynamicParaProps :: Set.Set String - , stDynamicTextProps :: Set.Set String + , stDynamicParaProps :: Set.Set ParaStyleName + , stDynamicTextProps :: Set.Set CharStyleName , stCurId :: Int } @@ -147,7 +149,7 @@ defaultWriterState = WriterState{ , stLists = [NoMarker] , stInsId = 1 , stDelId = 1 - , stStyleMaps = defaultStyleMaps + , stStyleMaps = StyleMaps M.empty M.empty , stFirstPara = False , stInTable = False , stTocTitle = [Str "Table of Contents"] @@ -265,7 +267,7 @@ writeDocx opts doc@(Pandoc meta _) = do styledoc <- addLang <$> parseXml refArchive distArchive stylepath -- parse styledoc for heading styles - let styleMaps = getStyleMaps styledoc + let styleMaps = getStyleMaps refArchive let tocTitle = case lookupMetaInlines "toc-title" meta of [] -> stTocTitle defaultWriterState @@ -462,11 +464,11 @@ writeDocx opts doc@(Pandoc meta _) = do -- are not already in the style map. Note that keys in the stylemap -- are normalized as lowercase. let newDynamicParaProps = filter - (\sty -> isNothing $ M.lookup (toLower <$> sty) $ getMap $ sParaStyleMap styleMaps) + (\sty -> not $ hasStyleName sty $ smParaStyle styleMaps) (Set.toList $ stDynamicParaProps st) newDynamicTextProps = filter - (\sty -> isNothing $ M.lookup (toLower <$> sty) $ getMap $ sCharStyleMap styleMaps) + (\sty -> not $ hasStyleName sty $ smCharStyle styleMaps) (Set.toList $ stDynamicTextProps st) let newstyles = map newParaPropToOpenXml newDynamicParaProps ++ @@ -609,8 +611,8 @@ writeDocx opts doc@(Pandoc meta _) = do return $ fromArchive archive -newParaPropToOpenXml :: String -> Element -newParaPropToOpenXml s = +newParaPropToOpenXml :: ParaStyleName -> Element +newParaPropToOpenXml (fromStyleName -> s) = let styleId = filter (not . isSpace) s in mknode "w:style" [ ("w:type", "paragraph") , ("w:customStyle", "1") @@ -620,8 +622,8 @@ newParaPropToOpenXml s = , mknode "w:qFormat" [] () ] -newTextPropToOpenXml :: String -> Element -newTextPropToOpenXml s = +newTextPropToOpenXml :: CharStyleName -> Element +newTextPropToOpenXml (fromStyleName -> s) = let styleId = filter (not . isSpace) s in mknode "w:style" [ ("w:type", "character") , ("w:customStyle", "1") @@ -634,7 +636,7 @@ styleToOpenXml :: StyleMaps -> Style -> [Element] styleToOpenXml sm style = maybeToList parStyle ++ mapMaybe toStyle alltoktypes where alltoktypes = enumFromTo KeywordTok NormalTok - toStyle toktype | hasStyleName (show toktype) (sCharStyleMap sm) = Nothing + toStyle toktype | hasStyleName (fromString $ show toktype) (smCharStyle sm) = Nothing | otherwise = Just $ mknode "w:style" [("w:type","character"), ("w:customStyle","1"),("w:styleId",show toktype)] @@ -657,7 +659,7 @@ styleToOpenXml sm style = tokBg toktype = maybe "auto" (drop 1 . fromColor) $ (tokenBackground =<< M.lookup toktype tokStyles) `mplus` backgroundColor style - parStyle | hasStyleName "Source Code" (sParaStyleMap sm) = Nothing + parStyle | hasStyleName "Source Code" (smParaStyle sm) = Nothing | otherwise = Just $ mknode "w:style" [("w:type","paragraph"), ("w:customStyle","1"),("w:styleId","SourceCode")] @@ -848,17 +850,17 @@ writeOpenXML opts (Pandoc meta blocks) = do blocksToOpenXML :: (PandocMonad m) => WriterOptions -> [Block] -> WS m [Element] blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls -pStyleM :: (PandocMonad m) => String -> WS m XML.Element +pStyleM :: (PandocMonad m) => ParaStyleName -> WS m XML.Element pStyleM styleName = do - styleMaps <- gets stStyleMaps - let sty' = getStyleId styleName $ sParaStyleMap styleMaps - return $ mknode "w:pStyle" [("w:val",sty')] () + pStyleMap <- gets (smParaStyle . stStyleMaps) + let sty' = getStyleIdFromName styleName pStyleMap + return $ mknode "w:pStyle" [("w:val", fromStyleId sty')] () -rStyleM :: (PandocMonad m) => String -> WS m XML.Element +rStyleM :: (PandocMonad m) => CharStyleName -> WS m XML.Element rStyleM styleName = do - styleMaps <- gets stStyleMaps - let sty' = getStyleId styleName $ sCharStyleMap styleMaps - return $ mknode "w:rStyle" [("w:val",sty')] () + cStyleMap <- gets (smCharStyle . stStyleMaps) + let sty' = getStyleIdFromName styleName cStyleMap + return $ mknode "w:rStyle" [("w:val", fromStyleId sty')] () getUniqueId :: (PandocMonad m) => WS m String -- the + 20 is to ensure that there are no clashes with the rIds @@ -880,7 +882,7 @@ blockToOpenXML' :: (PandocMonad m) => WriterOptions -> Block -> WS m [Element] blockToOpenXML' _ Null = return [] blockToOpenXML' opts (Div (ident,_classes,kvs) bs) = do stylemod <- case lookup dynamicStyleKey kvs of - Just sty -> do + Just (fromString -> sty) -> do modify $ \s -> s{stDynamicParaProps = Set.insert sty (stDynamicParaProps s)} @@ -901,7 +903,7 @@ blockToOpenXML' opts (Div (ident,_classes,kvs) bs) = do wrapBookmark ident $ header ++ contents blockToOpenXML' opts (Header lev (ident,_,_) lst) = do setFirstPara - paraProps <- withParaPropM (pStyleM ("Heading "++show lev)) $ + paraProps <- withParaPropM (pStyleM (fromString $ "Heading "++show lev)) $ getParaProps False contents <- inlinesToOpenXML opts lst if null ident @@ -916,8 +918,7 @@ blockToOpenXML' opts (Plain lst) = do isInTable <- gets stInTable let block = blockToOpenXML opts (Para lst) prop <- pStyleM "Compact" - para <- if isInTable then withParaProp prop block else block - return $ para + if isInTable then withParaProp prop block else block -- title beginning with fig: indicates that the image is a figure blockToOpenXML' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = do setFirstPara @@ -1087,9 +1088,7 @@ getTextProps :: (PandocMonad m) => WS m [Element] getTextProps = do props <- asks envTextProperties let squashed = squashProps props - return $ if null squashed - then [] - else [mknode "w:rPr" [] squashed] + return [mknode "w:rPr" [] squashed | (not . null) squashed] withTextProp :: PandocMonad m => Element -> WS m a -> WS m a withTextProp d p = @@ -1174,7 +1173,7 @@ inlineToOpenXML' _ (Span (ident,["comment-end"],kvs) _) = ] inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do stylemod <- case lookup dynamicStyleKey kvs of - Just sty -> do + Just (fromString -> sty) -> do modify $ \s -> s{stDynamicTextProps = Set.insert sty (stDynamicTextProps s)} @@ -1259,7 +1258,7 @@ inlineToOpenXML' opts (Math mathType str) = do inlineToOpenXML' opts (Cite _ lst) = inlinesToOpenXML opts lst inlineToOpenXML' opts (Code attrs str) = do let alltoktypes = [KeywordTok ..] - tokTypesMap <- mapM (\tt -> (,) tt <$> rStyleM (show tt)) alltoktypes + tokTypesMap <- mapM (\tt -> (,) tt <$> rStyleM (fromString $ show tt)) alltoktypes let unhighlighted = intercalate [br] `fmap` mapM formattedString (lines str) formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) diff --git a/src/Text/Pandoc/Writers/Docx/StyleMap.hs b/src/Text/Pandoc/Writers/Docx/StyleMap.hs new file mode 100644 index 000000000..4f0b0c3f9 --- /dev/null +++ b/src/Text/Pandoc/Writers/Docx/StyleMap.hs @@ -0,0 +1,48 @@ +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE FlexibleContexts #-} +{- | + Module : Text.Pandoc.Writers.Docx.StyleMap + Copyright : © 2014-2019 Jesse Rosenthal , + 2014-2019 John MacFarlane , + 2015-2019 Nikolay Yakimov + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal + Stability : alpha + Portability : portable + +Mappings of element styles (word to pandoc-internal). +-} + +module Text.Pandoc.Writers.Docx.StyleMap ( StyleMaps(..) + , ParaStyleName + , CharStyleName + , getStyleMaps + , getStyleIdFromName + , hasStyleName + , fromStyleId + , fromStyleName + ) where + +import Text.Pandoc.Readers.Docx.Parse.Styles +import Codec.Archive.Zip +import qualified Data.Map as M +import Data.String +import Data.Char (isSpace) +import Prelude + +data StyleMaps = StyleMaps { smCharStyle :: CharStyleNameMap, smParaStyle :: ParaStyleNameMap } +type ParaStyleNameMap = M.Map ParaStyleName ParStyle +type CharStyleNameMap = M.Map CharStyleName CharStyle + +getStyleIdFromName :: (Ord sn, FromStyleName sn, IsString (StyleId sty), HasStyleId sty) + => sn -> M.Map sn sty -> StyleId sty +getStyleIdFromName s = maybe (fallback s) getStyleId . M.lookup s + where fallback = fromString . filter (not . isSpace) . fromStyleName + +hasStyleName :: (Ord sn, HasStyleId sty) + => sn -> M.Map sn sty -> Bool +hasStyleName styleName = M.member styleName + +getStyleMaps :: Archive -> StyleMaps +getStyleMaps = uncurry StyleMaps . archiveToStyles' getStyleName getStyleName