[Docx Writer] Re-use Readers.Docx.Parse for StyleMap (#5766)

* [Docx Parser] Move style-parsing-specific code to a new module

* [Docx Writer] Re-use Readers.Docx.Parse.Styles for StyleMap

* [Docx Writer] Move Readers.Docx.StyleMap to Writers.Docx.StyleMap

It's never used outside of writer code, so it makes more sense to scope it under writers really.
This commit is contained in:
Nikolay Yakimov 2019-09-22 23:00:35 +04:00 committed by John MacFarlane
parent d247e9f72e
commit 9b6ee81c19
6 changed files with 387 additions and 412 deletions

View file

@ -566,8 +566,8 @@ library
Text.Pandoc.Readers.Docx.Lists,
Text.Pandoc.Readers.Docx.Combine,
Text.Pandoc.Readers.Docx.Parse,
Text.Pandoc.Readers.Docx.Parse.Styles,
Text.Pandoc.Readers.Docx.Util,
Text.Pandoc.Readers.Docx.StyleMap,
Text.Pandoc.Readers.Docx.Fields,
Text.Pandoc.Readers.LaTeX.Parsing,
Text.Pandoc.Readers.LaTeX.Lang,
@ -592,6 +592,7 @@ library
Text.Pandoc.Readers.Org.Parsing,
Text.Pandoc.Readers.Org.Shared,
Text.Pandoc.Readers.Roff,
Text.Pandoc.Writers.Docx.StyleMap,
Text.Pandoc.Writers.Roff,
Text.Pandoc.Writers.Powerpoint.Presentation,
Text.Pandoc.Writers.Powerpoint.Output,

View file

@ -1,14 +1,9 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{- |
Module : Text.Pandoc.Readers.Docx.Parse
Copyright : Copyright (C) 2014-2019 Jesse Rosenthal
2019 Nikolay Yakimov <root@livid.pp.ru>
License : GNU GPL, version 2 or above
Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
@ -57,6 +52,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
, leftBiasedMergeRunStyle
) where
import Prelude
import Text.Pandoc.Readers.Docx.Parse.Styles
import Codec.Archive.Zip
import Control.Applicative ((<|>))
import Control.Monad.Except
@ -64,13 +60,10 @@ import Control.Monad.Reader
import Control.Monad.State.Strict
import Data.Bits ((.|.))
import qualified Data.ByteString.Lazy as B
import Data.Char (chr, ord, readLitChar, toLower)
import Data.Char (chr, ord, readLitChar)
import Data.List
import Data.Function (on)
import Data.String (IsString(..))
import qualified Data.Map as M
import Data.Maybe
import Data.Coerce
import System.FilePath
import Text.Pandoc.Readers.Docx.Util
import Text.Pandoc.Readers.Docx.Fields
@ -262,37 +255,6 @@ newtype Row = Row [Cell]
newtype Cell = Cell [BodyPart]
deriving Show
newtype CharStyleId = CharStyleId { fromCharStyleId :: String }
deriving (Show, Eq, Ord, FromStyleId)
newtype ParaStyleId = ParaStyleId { fromParaStyleId :: String }
deriving (Show, Eq, Ord, FromStyleId)
newtype CharStyleName = CharStyleName { fromCharStyleName :: CIString }
deriving (Show, Eq, Ord, IsString, FromStyleName)
newtype ParaStyleName = ParaStyleName { fromParaStyleName :: CIString }
deriving (Show, Eq, Ord, IsString, FromStyleName)
-- Case-insensitive comparisons
newtype CIString = CIString String deriving (Show, IsString, FromStyleName)
class FromStyleName a where
fromStyleName :: a -> String
instance FromStyleName String where
fromStyleName = id
class FromStyleId a where
fromStyleId :: a -> String
instance FromStyleId String where
fromStyleId = id
instance Eq CIString where
(==) = (==) `on` map toLower . coerce
instance Ord CIString where
compare = compare `on` map toLower . coerce
leftBiasedMergeRunStyle :: RunStyle -> RunStyle -> RunStyle
leftBiasedMergeRunStyle a b = RunStyle
{ isBold = isBold a <|> isBold b
@ -333,44 +295,6 @@ data Run = Run RunStyle [RunElem]
data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen
deriving Show
data VertAlign = BaseLn | SupScrpt | SubScrpt
deriving Show
data CharStyle = CharStyle { cStyleId :: CharStyleId
, cStyleName :: CharStyleName
, cStyleData :: RunStyle
} deriving (Show)
data RunStyle = RunStyle { isBold :: Maybe Bool
, isItalic :: Maybe Bool
, isSmallCaps :: Maybe Bool
, isStrike :: Maybe Bool
, isRTL :: Maybe Bool
, rVertAlign :: Maybe VertAlign
, rUnderline :: Maybe String
, rParentStyle :: Maybe CharStyle
}
deriving Show
data ParStyle = ParStyle { headingLev :: Maybe (ParaStyleName, Int)
, numInfo :: Maybe (String, String)
, psParentStyle :: Maybe ParStyle
, pStyleName :: ParaStyleName
, pStyleId :: ParaStyleId
}
deriving Show
defaultRunStyle :: RunStyle
defaultRunStyle = RunStyle { isBold = Nothing
, isItalic = Nothing
, isSmallCaps = Nothing
, isStrike = Nothing
, isRTL = Nothing
, rVertAlign = Nothing
, rUnderline = Nothing
, rParentStyle = Nothing
}
type Target = String
type Anchor = String
type URL = String
@ -449,46 +373,6 @@ elemToBody _ _ = throwError WrongElem
archiveToStyles :: Archive -> (CharStyleMap, ParStyleMap)
archiveToStyles = archiveToStyles' getStyleId getStyleId
archiveToStyles' :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2) =>
(a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2)
archiveToStyles' conv1 conv2 zf =
let stylesElem = findEntryByPath "word/styles.xml" zf >>=
(parseXMLDoc . UTF8.toStringLazy . fromEntry)
in
case stylesElem of
Nothing -> (M.empty, M.empty)
Just styElem ->
let namespaces = elemToNameSpaces styElem
in
( M.fromList $ map (\r -> (conv1 r, r)) $ buildBasedOnList namespaces styElem Nothing,
M.fromList $ map (\p -> (conv2 p, p)) $ buildBasedOnList namespaces styElem Nothing)
isBasedOnStyle :: (ElemToStyle a, FromStyleId (StyleId a)) => NameSpaces -> Element -> Maybe a -> Bool
isBasedOnStyle ns element parentStyle
| isElem ns "w" "style" element
, Just styleType <- findAttrByName ns "w" "type" element
, styleType == cStyleType parentStyle
, Just basedOnVal <- findChildByName ns "w" "basedOn" element >>=
findAttrByName ns "w" "val"
, Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps)
| isElem ns "w" "style" element
, Just styleType <- findAttrByName ns "w" "type" element
, styleType == cStyleType parentStyle
, Nothing <- findChildByName ns "w" "basedOn" element
, Nothing <- parentStyle = True
| otherwise = False
class HasStyleId a => ElemToStyle a where
cStyleType :: Maybe a -> String
elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a
class FromStyleId (StyleId a) => HasStyleId a where
type StyleId a
getStyleId :: a -> StyleId a
class FromStyleName (StyleName a) => HasStyleName a where
type StyleName a
getStyleName :: a -> StyleName a
class HasParentStyle a where
getParentStyle :: a -> Maybe a
@ -511,52 +395,6 @@ constructBogusParStyleData stName = ParStyle
, pStyleId = ParaStyleId . filter (/=' ') . fromStyleName $ stName
}
instance ElemToStyle CharStyle where
cStyleType _ = "character"
elemToStyle ns element parentStyle
| isElem ns "w" "style" element
, Just "character" <- findAttrByName ns "w" "type" element =
elemToCharStyle ns element parentStyle
| otherwise = Nothing
instance HasStyleId CharStyle where
type StyleId CharStyle = CharStyleId
getStyleId = cStyleId
instance HasStyleName CharStyle where
type StyleName CharStyle = CharStyleName
getStyleName = cStyleName
instance ElemToStyle ParStyle where
cStyleType _ = "paragraph"
elemToStyle ns element parentStyle
| isElem ns "w" "style" element
, Just "paragraph" <- findAttrByName ns "w" "type" element
= elemToParStyleData ns element parentStyle
| otherwise = Nothing
instance HasStyleId ParStyle where
type StyleId ParStyle = ParaStyleId
getStyleId = pStyleId
instance HasStyleName ParStyle where
type StyleName ParStyle = ParaStyleName
getStyleName = pStyleName
getStyleChildren :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a]
getStyleChildren ns element parentStyle
| isElem ns "w" "styles" element =
mapMaybe (\e -> elemToStyle ns e parentStyle) $
filterChildren (\e' -> isBasedOnStyle ns e' parentStyle) element
| otherwise = []
buildBasedOnList :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a]
buildBasedOnList ns element rootStyle =
case getStyleChildren ns element rootStyle of
[] -> []
stys -> stys ++
concatMap (buildBasedOnList ns element . Just) stys
archiveToNotes :: Archive -> Notes
archiveToNotes zf =
let fnElem = findEntryByPath "word/footnotes.xml" zf
@ -789,9 +627,6 @@ testBitMask bitMaskS n =
[] -> False
((n', _) : _) -> (n' .|. n) /= 0
stringToInteger :: String -> Maybe Integer
stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)])
pHeading :: ParagraphStyle -> Maybe (ParaStyleName, Int)
pHeading = getParStyleField headingLev . pStyle
@ -1166,21 +1001,6 @@ elemToParagraphStyle ns element sty
}
elemToParagraphStyle _ _ _ = defaultParagraphStyle
checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool
checkOnOff ns rPr tag
| Just t <- findChild tag rPr
, Just val <- findAttrByName ns "w" "val" t =
Just $ case val of
"true" -> True
"false" -> False
"on" -> True
"off" -> False
"1" -> True
"0" -> False
_ -> False
| Just _ <- findChild tag rPr = Just True
checkOnOff _ _ _ = Nothing
elemToRunStyleD :: NameSpaces -> Element -> D RunStyle
elemToRunStyleD ns element
| Just rPr <- findChildByName ns "w" "rPr" element = do
@ -1192,80 +1012,6 @@ elemToRunStyleD ns element
return $ elemToRunStyle ns element parentSty
elemToRunStyleD _ _ = return defaultRunStyle
elemToCharStyle :: NameSpaces
-> Element -> Maybe CharStyle -> Maybe CharStyle
elemToCharStyle ns element parentStyle
= CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element)
<*> getElementStyleName ns element
<*> (Just $ elemToRunStyle ns element parentStyle)
elemToRunStyle :: NameSpaces -> Element -> Maybe CharStyle -> RunStyle
elemToRunStyle ns element parentStyle
| Just rPr <- findChildByName ns "w" "rPr" element =
RunStyle
{
isBold = checkOnOff ns rPr (elemName ns "w" "b") `mplus`
checkOnOff ns rPr (elemName ns "w" "bCs")
, isItalic = checkOnOff ns rPr (elemName ns "w" "i") `mplus`
checkOnOff ns rPr (elemName ns "w" "iCs")
, isSmallCaps = checkOnOff ns rPr (elemName ns "w" "smallCaps")
, isStrike = checkOnOff ns rPr (elemName ns "w" "strike")
, isRTL = checkOnOff ns rPr (elemName ns "w" "rtl")
, rVertAlign =
findChildByName ns "w" "vertAlign" rPr >>=
findAttrByName ns "w" "val" >>=
\v -> Just $ case v of
"superscript" -> SupScrpt
"subscript" -> SubScrpt
_ -> BaseLn
, rUnderline =
findChildByName ns "w" "u" rPr >>=
findAttrByName ns "w" "val"
, rParentStyle = parentStyle
}
elemToRunStyle _ _ _ = defaultRunStyle
getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int)
getHeaderLevel ns element
| Just styleName <- getElementStyleName ns element
, Just n <- stringToInteger =<<
(stripPrefix "heading " . map toLower $
fromStyleName styleName)
, n > 0 = Just (styleName, fromInteger n)
getHeaderLevel _ _ = Nothing
getElementStyleName :: Coercible String a => NameSpaces -> Element -> Maybe a
getElementStyleName ns el = coerce <$>
((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val")
<|> findAttrByName ns "w" "styleId" el)
getNumInfo :: NameSpaces -> Element -> Maybe (String, String)
getNumInfo ns element = do
let numPr = findChildByName ns "w" "pPr" element >>=
findChildByName ns "w" "numPr"
lvl = fromMaybe "0" (numPr >>=
findChildByName ns "w" "ilvl" >>=
findAttrByName ns "w" "val")
numId <- numPr >>=
findChildByName ns "w" "numId" >>=
findAttrByName ns "w" "val"
return (numId, lvl)
elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle
elemToParStyleData ns element parentStyle
| Just styleId <- findAttrByName ns "w" "styleId" element
, Just styleName <- getElementStyleName ns element
= Just $ ParStyle
{
headingLev = getHeaderLevel ns element
, numInfo = getNumInfo ns element
, psParentStyle = parentStyle
, pStyleName = styleName
, pStyleId = ParaStyleId styleId
}
elemToParStyleData _ _ _ = Nothing
elemToRunElem :: NameSpaces -> Element -> D RunElem
elemToRunElem ns element
| isElem ns "w" "t" element

View file

@ -0,0 +1,304 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{- |
Module : Text.Pandoc.Readers.Docx.Parse.Styles
Copyright : Copyright (C) 2014-2019 Jesse Rosenthal
2019 Nikolay Yakimov <root@livid.pp.ru>
License : GNU GPL, version 2 or above
Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
Stability : alpha
Portability : portable
Type machinery and code for extraction and manipulation of docx styles
-}
module Text.Pandoc.Readers.Docx.Parse.Styles (
CharStyleId(..)
, CharStyle
, ParaStyleId(..)
, ParStyle(..)
, RunStyle(..)
, HasStyleName
, StyleName
, ParaStyleName
, CharStyleName
, FromStyleName
, VertAlign(..)
, StyleId
, HasStyleId
, archiveToStyles'
, getStyleId
, getStyleName
, cStyleData
, fromStyleName
, fromStyleId
, stringToInteger
, getNumInfo
, elemToRunStyle
, defaultRunStyle
) where
import Prelude
import Codec.Archive.Zip
import Control.Applicative ((<|>))
import Control.Monad.Except
import Data.Char (toLower)
import Data.List
import Data.Function (on)
import Data.String (IsString(..))
import qualified Data.Map as M
import Data.Maybe
import Data.Coerce
import Text.Pandoc.Readers.Docx.Util
import qualified Text.Pandoc.UTF8 as UTF8
import Text.XML.Light
newtype CharStyleId = CharStyleId String
deriving (Show, Eq, Ord, IsString, FromStyleId)
newtype ParaStyleId = ParaStyleId String
deriving (Show, Eq, Ord, IsString, FromStyleId)
newtype CharStyleName = CharStyleName CIString
deriving (Show, Eq, Ord, IsString, FromStyleName)
newtype ParaStyleName = ParaStyleName CIString
deriving (Show, Eq, Ord, IsString, FromStyleName)
-- Case-insensitive comparisons
newtype CIString = CIString String deriving (Show, IsString, FromStyleName)
class FromStyleName a where
fromStyleName :: a -> String
instance FromStyleName String where
fromStyleName = id
class FromStyleId a where
fromStyleId :: a -> String
instance FromStyleId String where
fromStyleId = id
instance Eq CIString where
(==) = (==) `on` map toLower . coerce
instance Ord CIString where
compare = compare `on` map toLower . coerce
data VertAlign = BaseLn | SupScrpt | SubScrpt
deriving Show
data CharStyle = CharStyle { cStyleId :: CharStyleId
, cStyleName :: CharStyleName
, cStyleData :: RunStyle
} deriving (Show)
data RunStyle = RunStyle { isBold :: Maybe Bool
, isItalic :: Maybe Bool
, isSmallCaps :: Maybe Bool
, isStrike :: Maybe Bool
, isRTL :: Maybe Bool
, rVertAlign :: Maybe VertAlign
, rUnderline :: Maybe String
, rParentStyle :: Maybe CharStyle
}
deriving Show
data ParStyle = ParStyle { headingLev :: Maybe (ParaStyleName, Int)
, numInfo :: Maybe (String, String)
, psParentStyle :: Maybe ParStyle
, pStyleName :: ParaStyleName
, pStyleId :: ParaStyleId
}
deriving Show
defaultRunStyle :: RunStyle
defaultRunStyle = RunStyle { isBold = Nothing
, isItalic = Nothing
, isSmallCaps = Nothing
, isStrike = Nothing
, isRTL = Nothing
, rVertAlign = Nothing
, rUnderline = Nothing
, rParentStyle = Nothing
}
archiveToStyles' :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2) =>
(a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2)
archiveToStyles' conv1 conv2 zf =
let stylesElem = findEntryByPath "word/styles.xml" zf >>=
(parseXMLDoc . UTF8.toStringLazy . fromEntry)
in
case stylesElem of
Nothing -> (M.empty, M.empty)
Just styElem ->
let namespaces = elemToNameSpaces styElem
in
( M.fromList $ map (\r -> (conv1 r, r)) $ buildBasedOnList namespaces styElem Nothing,
M.fromList $ map (\p -> (conv2 p, p)) $ buildBasedOnList namespaces styElem Nothing)
isBasedOnStyle :: (ElemToStyle a, FromStyleId (StyleId a)) => NameSpaces -> Element -> Maybe a -> Bool
isBasedOnStyle ns element parentStyle
| isElem ns "w" "style" element
, Just styleType <- findAttrByName ns "w" "type" element
, styleType == cStyleType parentStyle
, Just basedOnVal <- findChildByName ns "w" "basedOn" element >>=
findAttrByName ns "w" "val"
, Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps)
| isElem ns "w" "style" element
, Just styleType <- findAttrByName ns "w" "type" element
, styleType == cStyleType parentStyle
, Nothing <- findChildByName ns "w" "basedOn" element
, Nothing <- parentStyle = True
| otherwise = False
class HasStyleId a => ElemToStyle a where
cStyleType :: Maybe a -> String
elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a
class FromStyleId (StyleId a) => HasStyleId a where
type StyleId a
getStyleId :: a -> StyleId a
class FromStyleName (StyleName a) => HasStyleName a where
type StyleName a
getStyleName :: a -> StyleName a
instance ElemToStyle CharStyle where
cStyleType _ = "character"
elemToStyle ns element parentStyle
| isElem ns "w" "style" element
, Just "character" <- findAttrByName ns "w" "type" element =
elemToCharStyle ns element parentStyle
| otherwise = Nothing
instance HasStyleId CharStyle where
type StyleId CharStyle = CharStyleId
getStyleId = cStyleId
instance HasStyleName CharStyle where
type StyleName CharStyle = CharStyleName
getStyleName = cStyleName
instance ElemToStyle ParStyle where
cStyleType _ = "paragraph"
elemToStyle ns element parentStyle
| isElem ns "w" "style" element
, Just "paragraph" <- findAttrByName ns "w" "type" element
= elemToParStyleData ns element parentStyle
| otherwise = Nothing
instance HasStyleId ParStyle where
type StyleId ParStyle = ParaStyleId
getStyleId = pStyleId
instance HasStyleName ParStyle where
type StyleName ParStyle = ParaStyleName
getStyleName = pStyleName
getStyleChildren :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a]
getStyleChildren ns element parentStyle
| isElem ns "w" "styles" element =
mapMaybe (\e -> elemToStyle ns e parentStyle) $
filterChildren (\e' -> isBasedOnStyle ns e' parentStyle) element
| otherwise = []
buildBasedOnList :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a]
buildBasedOnList ns element rootStyle =
case getStyleChildren ns element rootStyle of
[] -> []
stys -> stys ++
concatMap (buildBasedOnList ns element . Just) stys
stringToInteger :: String -> Maybe Integer
stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)])
checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool
checkOnOff ns rPr tag
| Just t <- findChild tag rPr
, Just val <- findAttrByName ns "w" "val" t =
Just $ case val of
"true" -> True
"false" -> False
"on" -> True
"off" -> False
"1" -> True
"0" -> False
_ -> False
| Just _ <- findChild tag rPr = Just True
checkOnOff _ _ _ = Nothing
elemToCharStyle :: NameSpaces
-> Element -> Maybe CharStyle -> Maybe CharStyle
elemToCharStyle ns element parentStyle
= CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element)
<*> getElementStyleName ns element
<*> (Just $ elemToRunStyle ns element parentStyle)
elemToRunStyle :: NameSpaces -> Element -> Maybe CharStyle -> RunStyle
elemToRunStyle ns element parentStyle
| Just rPr <- findChildByName ns "w" "rPr" element =
RunStyle
{
isBold = checkOnOff ns rPr (elemName ns "w" "b") `mplus`
checkOnOff ns rPr (elemName ns "w" "bCs")
, isItalic = checkOnOff ns rPr (elemName ns "w" "i") `mplus`
checkOnOff ns rPr (elemName ns "w" "iCs")
, isSmallCaps = checkOnOff ns rPr (elemName ns "w" "smallCaps")
, isStrike = checkOnOff ns rPr (elemName ns "w" "strike")
, isRTL = checkOnOff ns rPr (elemName ns "w" "rtl")
, rVertAlign =
findChildByName ns "w" "vertAlign" rPr >>=
findAttrByName ns "w" "val" >>=
\v -> Just $ case v of
"superscript" -> SupScrpt
"subscript" -> SubScrpt
_ -> BaseLn
, rUnderline =
findChildByName ns "w" "u" rPr >>=
findAttrByName ns "w" "val"
, rParentStyle = parentStyle
}
elemToRunStyle _ _ _ = defaultRunStyle
getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int)
getHeaderLevel ns element
| Just styleName <- getElementStyleName ns element
, Just n <- stringToInteger =<<
(stripPrefix "heading " . map toLower $
fromStyleName styleName)
, n > 0 = Just (styleName, fromInteger n)
getHeaderLevel _ _ = Nothing
getElementStyleName :: Coercible String a => NameSpaces -> Element -> Maybe a
getElementStyleName ns el = coerce <$>
((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val")
<|> findAttrByName ns "w" "styleId" el)
getNumInfo :: NameSpaces -> Element -> Maybe (String, String)
getNumInfo ns element = do
let numPr = findChildByName ns "w" "pPr" element >>=
findChildByName ns "w" "numPr"
lvl = fromMaybe "0" (numPr >>=
findChildByName ns "w" "ilvl" >>=
findAttrByName ns "w" "val")
numId <- numPr >>=
findChildByName ns "w" "numId" >>=
findAttrByName ns "w" "val"
return (numId, lvl)
elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle
elemToParStyleData ns element parentStyle
| Just styleId <- findAttrByName ns "w" "styleId" element
, Just styleName <- getElementStyleName ns element
= Just $ ParStyle
{
headingLev = getHeaderLevel ns element
, numInfo = getNumInfo ns element
, psParentStyle = parentStyle
, pStyleName = styleName
, pStyleId = ParaStyleId styleId
}
elemToParStyleData _ _ _ = Nothing

View file

@ -1,123 +0,0 @@
{-# LANGUAGE NoImplicitPrelude #-}
{- |
Module : Text.Pandoc.Readers.Docx.StyleMaps
Copyright : © 2014-2019 Jesse Rosenthal <jrosenthal@jhu.edu>,
2014-2019 John MacFarlane <jgm@berkeley.edu>,
2015 Nikolay Yakimov <root@livid.pp.ru>
License : GNU GPL, version 2 or above
Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
Stability : alpha
Portability : portable
Mappings of element styles (word to pandoc-internal).
-}
module Text.Pandoc.Readers.Docx.StyleMap ( StyleMaps(..)
, alterMap
, getMap
, defaultStyleMaps
, getStyleMaps
, getStyleId
, hasStyleName
) where
import Prelude
import Control.Monad.State.Strict
import Data.Char (toLower)
import qualified Data.Map as M
import Text.Pandoc.Readers.Docx.Util
import Text.XML.Light
newtype ParaStyleMap = ParaStyleMap ( M.Map String String )
newtype CharStyleMap = CharStyleMap ( M.Map String String )
class StyleMap a where
alterMap :: (M.Map String String -> M.Map String String) -> a -> a
getMap :: a -> M.Map String String
instance StyleMap ParaStyleMap where
alterMap f (ParaStyleMap m) = ParaStyleMap $ f m
getMap (ParaStyleMap m) = m
instance StyleMap CharStyleMap where
alterMap f (CharStyleMap m) = CharStyleMap $ f m
getMap (CharStyleMap m) = m
insert :: (StyleMap a) => Maybe String -> Maybe String -> a -> a
insert (Just k) (Just v) m = alterMap (M.insert k v) m
insert _ _ m = m
getStyleId :: (StyleMap a) => String -> a -> String
getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) . getMap
hasStyleName :: (StyleMap a) => String -> a -> Bool
hasStyleName styleName = M.member (map toLower styleName) . getMap
data StyleMaps = StyleMaps { sNameSpaces :: NameSpaces
, sParaStyleMap :: ParaStyleMap
, sCharStyleMap :: CharStyleMap
}
data StyleType = ParaStyle | CharStyle
defaultStyleMaps :: StyleMaps
defaultStyleMaps = StyleMaps { sNameSpaces = []
, sParaStyleMap = ParaStyleMap M.empty
, sCharStyleMap = CharStyleMap M.empty
}
type StateM a = State StyleMaps a
getStyleMaps :: Element -> StyleMaps
getStyleMaps docElem = execState genStyleMap state'
where
state' = defaultStyleMaps {sNameSpaces = elemToNameSpaces docElem}
genStyleItem e = do
styleType <- getStyleType e
styleId <- getAttrStyleId e
nameValLowercase <- fmap (map toLower) `fmap` getNameVal e
case styleType of
Just ParaStyle -> modParaStyleMap $ insert nameValLowercase styleId
Just CharStyle -> modCharStyleMap $ insert nameValLowercase styleId
_ -> return ()
genStyleMap = do
style <- elemName' "style"
let styles = findChildren style docElem
forM_ styles genStyleItem
modParaStyleMap :: (ParaStyleMap -> ParaStyleMap) -> StateM ()
modParaStyleMap f = modify $ \s ->
s {sParaStyleMap = f $ sParaStyleMap s}
modCharStyleMap :: (CharStyleMap -> CharStyleMap) -> StateM ()
modCharStyleMap f = modify $ \s ->
s {sCharStyleMap = f $ sCharStyleMap s}
getStyleType :: Element -> StateM (Maybe StyleType)
getStyleType e = do
styleTypeStr <- getAttrType e
case styleTypeStr of
Just "paragraph" -> return $ Just ParaStyle
Just "character" -> return $ Just CharStyle
_ -> return Nothing
getAttrType :: Element -> StateM (Maybe String)
getAttrType el = do
name <- elemName' "type"
return $ findAttr name el
getAttrStyleId :: Element -> StateM (Maybe String)
getAttrStyleId el = do
name <- elemName' "styleId"
return $ findAttr name el
getNameVal :: Element -> StateM (Maybe String)
getNameVal el = do
name <- elemName' "name"
val <- elemName' "val"
return $ findChild name el >>= findAttr val
elemName' :: String -> StateM QName
elemName' name = do
namespaces <- gets sNameSpaces
return $ elemName namespaces "w" name

View file

@ -3,6 +3,7 @@
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Writers.Docx
Copyright : Copyright (C) 2012-2019 John MacFarlane
@ -23,8 +24,9 @@ import Control.Monad.Reader
import Control.Monad.State.Strict
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import Data.Char (isSpace, ord, toLower, isLetter)
import Data.Char (isSpace, ord, isLetter)
import Data.List (intercalate, isPrefixOf, isSuffixOf)
import Data.String (fromString)
import qualified Data.Map as M
import Data.Maybe (fromMaybe, isNothing, mapMaybe, maybeToList)
import qualified Data.Set as Set
@ -46,7 +48,7 @@ import Text.Pandoc.Logging
import Text.Pandoc.MIME (MimeType, extensionFromMimeType, getMimeType,
getMimeTypeDef)
import Text.Pandoc.Options
import Text.Pandoc.Readers.Docx.StyleMap
import Text.Pandoc.Writers.Docx.StyleMap
import Text.Pandoc.Shared
import Text.Pandoc.Walk
import Text.Pandoc.Writers.Math
@ -132,8 +134,8 @@ data WriterState = WriterState{
, stFirstPara :: Bool
, stInTable :: Bool
, stTocTitle :: [Inline]
, stDynamicParaProps :: Set.Set String
, stDynamicTextProps :: Set.Set String
, stDynamicParaProps :: Set.Set ParaStyleName
, stDynamicTextProps :: Set.Set CharStyleName
, stCurId :: Int
}
@ -147,7 +149,7 @@ defaultWriterState = WriterState{
, stLists = [NoMarker]
, stInsId = 1
, stDelId = 1
, stStyleMaps = defaultStyleMaps
, stStyleMaps = StyleMaps M.empty M.empty
, stFirstPara = False
, stInTable = False
, stTocTitle = [Str "Table of Contents"]
@ -265,7 +267,7 @@ writeDocx opts doc@(Pandoc meta _) = do
styledoc <- addLang <$> parseXml refArchive distArchive stylepath
-- parse styledoc for heading styles
let styleMaps = getStyleMaps styledoc
let styleMaps = getStyleMaps refArchive
let tocTitle = case lookupMetaInlines "toc-title" meta of
[] -> stTocTitle defaultWriterState
@ -462,11 +464,11 @@ writeDocx opts doc@(Pandoc meta _) = do
-- are not already in the style map. Note that keys in the stylemap
-- are normalized as lowercase.
let newDynamicParaProps = filter
(\sty -> isNothing $ M.lookup (toLower <$> sty) $ getMap $ sParaStyleMap styleMaps)
(\sty -> not $ hasStyleName sty $ smParaStyle styleMaps)
(Set.toList $ stDynamicParaProps st)
newDynamicTextProps = filter
(\sty -> isNothing $ M.lookup (toLower <$> sty) $ getMap $ sCharStyleMap styleMaps)
(\sty -> not $ hasStyleName sty $ smCharStyle styleMaps)
(Set.toList $ stDynamicTextProps st)
let newstyles = map newParaPropToOpenXml newDynamicParaProps ++
@ -609,8 +611,8 @@ writeDocx opts doc@(Pandoc meta _) = do
return $ fromArchive archive
newParaPropToOpenXml :: String -> Element
newParaPropToOpenXml s =
newParaPropToOpenXml :: ParaStyleName -> Element
newParaPropToOpenXml (fromStyleName -> s) =
let styleId = filter (not . isSpace) s
in mknode "w:style" [ ("w:type", "paragraph")
, ("w:customStyle", "1")
@ -620,8 +622,8 @@ newParaPropToOpenXml s =
, mknode "w:qFormat" [] ()
]
newTextPropToOpenXml :: String -> Element
newTextPropToOpenXml s =
newTextPropToOpenXml :: CharStyleName -> Element
newTextPropToOpenXml (fromStyleName -> s) =
let styleId = filter (not . isSpace) s
in mknode "w:style" [ ("w:type", "character")
, ("w:customStyle", "1")
@ -634,7 +636,7 @@ styleToOpenXml :: StyleMaps -> Style -> [Element]
styleToOpenXml sm style =
maybeToList parStyle ++ mapMaybe toStyle alltoktypes
where alltoktypes = enumFromTo KeywordTok NormalTok
toStyle toktype | hasStyleName (show toktype) (sCharStyleMap sm) = Nothing
toStyle toktype | hasStyleName (fromString $ show toktype) (smCharStyle sm) = Nothing
| otherwise = Just $
mknode "w:style" [("w:type","character"),
("w:customStyle","1"),("w:styleId",show toktype)]
@ -657,7 +659,7 @@ styleToOpenXml sm style =
tokBg toktype = maybe "auto" (drop 1 . fromColor)
$ (tokenBackground =<< M.lookup toktype tokStyles)
`mplus` backgroundColor style
parStyle | hasStyleName "Source Code" (sParaStyleMap sm) = Nothing
parStyle | hasStyleName "Source Code" (smParaStyle sm) = Nothing
| otherwise = Just $
mknode "w:style" [("w:type","paragraph"),
("w:customStyle","1"),("w:styleId","SourceCode")]
@ -848,17 +850,17 @@ writeOpenXML opts (Pandoc meta blocks) = do
blocksToOpenXML :: (PandocMonad m) => WriterOptions -> [Block] -> WS m [Element]
blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls
pStyleM :: (PandocMonad m) => String -> WS m XML.Element
pStyleM :: (PandocMonad m) => ParaStyleName -> WS m XML.Element
pStyleM styleName = do
styleMaps <- gets stStyleMaps
let sty' = getStyleId styleName $ sParaStyleMap styleMaps
return $ mknode "w:pStyle" [("w:val",sty')] ()
pStyleMap <- gets (smParaStyle . stStyleMaps)
let sty' = getStyleIdFromName styleName pStyleMap
return $ mknode "w:pStyle" [("w:val", fromStyleId sty')] ()
rStyleM :: (PandocMonad m) => String -> WS m XML.Element
rStyleM :: (PandocMonad m) => CharStyleName -> WS m XML.Element
rStyleM styleName = do
styleMaps <- gets stStyleMaps
let sty' = getStyleId styleName $ sCharStyleMap styleMaps
return $ mknode "w:rStyle" [("w:val",sty')] ()
cStyleMap <- gets (smCharStyle . stStyleMaps)
let sty' = getStyleIdFromName styleName cStyleMap
return $ mknode "w:rStyle" [("w:val", fromStyleId sty')] ()
getUniqueId :: (PandocMonad m) => WS m String
-- the + 20 is to ensure that there are no clashes with the rIds
@ -880,7 +882,7 @@ blockToOpenXML' :: (PandocMonad m) => WriterOptions -> Block -> WS m [Element]
blockToOpenXML' _ Null = return []
blockToOpenXML' opts (Div (ident,_classes,kvs) bs) = do
stylemod <- case lookup dynamicStyleKey kvs of
Just sty -> do
Just (fromString -> sty) -> do
modify $ \s ->
s{stDynamicParaProps = Set.insert sty
(stDynamicParaProps s)}
@ -901,7 +903,7 @@ blockToOpenXML' opts (Div (ident,_classes,kvs) bs) = do
wrapBookmark ident $ header ++ contents
blockToOpenXML' opts (Header lev (ident,_,_) lst) = do
setFirstPara
paraProps <- withParaPropM (pStyleM ("Heading "++show lev)) $
paraProps <- withParaPropM (pStyleM (fromString $ "Heading "++show lev)) $
getParaProps False
contents <- inlinesToOpenXML opts lst
if null ident
@ -916,8 +918,7 @@ blockToOpenXML' opts (Plain lst) = do
isInTable <- gets stInTable
let block = blockToOpenXML opts (Para lst)
prop <- pStyleM "Compact"
para <- if isInTable then withParaProp prop block else block
return $ para
if isInTable then withParaProp prop block else block
-- title beginning with fig: indicates that the image is a figure
blockToOpenXML' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = do
setFirstPara
@ -1087,9 +1088,7 @@ getTextProps :: (PandocMonad m) => WS m [Element]
getTextProps = do
props <- asks envTextProperties
let squashed = squashProps props
return $ if null squashed
then []
else [mknode "w:rPr" [] squashed]
return [mknode "w:rPr" [] squashed | (not . null) squashed]
withTextProp :: PandocMonad m => Element -> WS m a -> WS m a
withTextProp d p =
@ -1174,7 +1173,7 @@ inlineToOpenXML' _ (Span (ident,["comment-end"],kvs) _) =
]
inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do
stylemod <- case lookup dynamicStyleKey kvs of
Just sty -> do
Just (fromString -> sty) -> do
modify $ \s ->
s{stDynamicTextProps = Set.insert sty
(stDynamicTextProps s)}
@ -1259,7 +1258,7 @@ inlineToOpenXML' opts (Math mathType str) = do
inlineToOpenXML' opts (Cite _ lst) = inlinesToOpenXML opts lst
inlineToOpenXML' opts (Code attrs str) = do
let alltoktypes = [KeywordTok ..]
tokTypesMap <- mapM (\tt -> (,) tt <$> rStyleM (show tt)) alltoktypes
tokTypesMap <- mapM (\tt -> (,) tt <$> rStyleM (fromString $ show tt)) alltoktypes
let unhighlighted = intercalate [br] `fmap`
mapM formattedString (lines str)
formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok)

View file

@ -0,0 +1,48 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{- |
Module : Text.Pandoc.Writers.Docx.StyleMap
Copyright : © 2014-2019 Jesse Rosenthal <jrosenthal@jhu.edu>,
2014-2019 John MacFarlane <jgm@berkeley.edu>,
2015-2019 Nikolay Yakimov <root@livid.pp.ru>
License : GNU GPL, version 2 or above
Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
Stability : alpha
Portability : portable
Mappings of element styles (word to pandoc-internal).
-}
module Text.Pandoc.Writers.Docx.StyleMap ( StyleMaps(..)
, ParaStyleName
, CharStyleName
, getStyleMaps
, getStyleIdFromName
, hasStyleName
, fromStyleId
, fromStyleName
) where
import Text.Pandoc.Readers.Docx.Parse.Styles
import Codec.Archive.Zip
import qualified Data.Map as M
import Data.String
import Data.Char (isSpace)
import Prelude
data StyleMaps = StyleMaps { smCharStyle :: CharStyleNameMap, smParaStyle :: ParaStyleNameMap }
type ParaStyleNameMap = M.Map ParaStyleName ParStyle
type CharStyleNameMap = M.Map CharStyleName CharStyle
getStyleIdFromName :: (Ord sn, FromStyleName sn, IsString (StyleId sty), HasStyleId sty)
=> sn -> M.Map sn sty -> StyleId sty
getStyleIdFromName s = maybe (fallback s) getStyleId . M.lookup s
where fallback = fromString . filter (not . isSpace) . fromStyleName
hasStyleName :: (Ord sn, HasStyleId sty)
=> sn -> M.Map sn sty -> Bool
hasStyleName styleName = M.member styleName
getStyleMaps :: Archive -> StyleMaps
getStyleMaps = uncurry StyleMaps . archiveToStyles' getStyleName getStyleName