Started moving StyleMap out of writer code

This commit is contained in:
Nikolay Yakimov 2015-03-01 22:57:35 +03:00
parent 13daf3ed6a
commit 409111f647
5 changed files with 165 additions and 67 deletions

View file

@ -338,7 +338,9 @@ Library
Other-Modules: Text.Pandoc.Readers.Docx.Lists, Other-Modules: Text.Pandoc.Readers.Docx.Lists,
Text.Pandoc.Readers.Docx.Reducible, Text.Pandoc.Readers.Docx.Reducible,
Text.Pandoc.Readers.Docx.Parse, Text.Pandoc.Readers.Docx.Parse,
Text.Pandoc.Readers.Docx.Fonts Text.Pandoc.Readers.Docx.Fonts,
Text.Pandoc.Readers.Docx.Util,
Text.Pandoc.Readers.Docx.StyleMap
Text.Pandoc.Writers.Shared, Text.Pandoc.Writers.Shared,
Text.Pandoc.Asciify, Text.Pandoc.Asciify,
Text.Pandoc.MIME, Text.Pandoc.MIME,

View file

@ -65,6 +65,7 @@ import Text.Pandoc.Compat.Except
import Text.TeXMath.Readers.OMML (readOMML) import Text.TeXMath.Readers.OMML (readOMML)
import Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..)) import Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..))
import Text.TeXMath (Exp) import Text.TeXMath (Exp)
import Text.Pandoc.Readers.Docx.Util
import Data.Char (readLitChar, ord, chr, isDigit) import Data.Char (readLitChar, ord, chr, isDigit)
data ReaderEnv = ReaderEnv { envNotes :: Notes data ReaderEnv = ReaderEnv { envNotes :: Notes
@ -108,8 +109,6 @@ mapD f xs =
in in
concatMapM handler xs concatMapM handler xs
type NameSpaces = [(String, String)]
data Docx = Docx Document data Docx = Docx Document
deriving Show deriving Show
@ -249,10 +248,6 @@ type ChangeId = String
type Author = String type Author = String
type ChangeDate = String type ChangeDate = String
attrToNSPair :: Attr -> Maybe (String, String)
attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
attrToNSPair _ = Nothing
archiveToDocx :: Archive -> Either DocxError Docx archiveToDocx :: Archive -> Either DocxError Docx
archiveToDocx archive = do archiveToDocx archive = do
let notes = archiveToNotes archive let notes = archiveToNotes archive
@ -269,7 +264,7 @@ archiveToDocument :: Archive -> D Document
archiveToDocument zf = do archiveToDocument zf = do
entry <- maybeToD $ findEntryByPath "word/document.xml" zf entry <- maybeToD $ findEntryByPath "word/document.xml" zf
docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
let namespaces = mapMaybe attrToNSPair (elAttribs docElem) let namespaces = elemToNameSpaces docElem
bodyElem <- maybeToD $ findChild (elemName namespaces "w" "body") docElem bodyElem <- maybeToD $ findChild (elemName namespaces "w" "body") docElem
body <- elemToBody namespaces bodyElem body <- elemToBody namespaces bodyElem
return $ Document namespaces body return $ Document namespaces body
@ -288,7 +283,7 @@ archiveToStyles zf =
case stylesElem of case stylesElem of
Nothing -> (M.empty, M.empty) Nothing -> (M.empty, M.empty)
Just styElem -> Just styElem ->
let namespaces = mapMaybe attrToNSPair (elAttribs styElem) let namespaces = elemToNameSpaces styElem
in in
( M.fromList $ buildBasedOnList namespaces styElem ( M.fromList $ buildBasedOnList namespaces styElem
(Nothing :: Maybe CharStyle), (Nothing :: Maybe CharStyle),
@ -356,10 +351,10 @@ archiveToNotes zf =
enElem = findEntryByPath "word/endnotes.xml" zf enElem = findEntryByPath "word/endnotes.xml" zf
>>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
fn_namespaces = case fnElem of fn_namespaces = case fnElem of
Just e -> mapMaybe attrToNSPair (elAttribs e) Just e -> elemToNameSpaces e
Nothing -> [] Nothing -> []
en_namespaces = case enElem of en_namespaces = case enElem of
Just e -> mapMaybe attrToNSPair (elAttribs e) Just e -> elemToNameSpaces e
Nothing -> [] Nothing -> []
ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
fn = fnElem >>= (elemToNotes ns "footnote") fn = fnElem >>= (elemToNotes ns "footnote")
@ -459,7 +454,7 @@ archiveToNumbering' zf = do
Nothing -> Just $ Numbering [] [] [] Nothing -> Just $ Numbering [] [] []
Just entry -> do Just entry -> do
numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
let namespaces = mapMaybe attrToNSPair (elAttribs numberingElem) let namespaces = elemToNameSpaces numberingElem
numElems = findChildren numElems = findChildren
(QName "num" (lookup "w" namespaces) (Just "w")) (QName "num" (lookup "w" namespaces) (Just "w"))
numberingElem numberingElem
@ -488,15 +483,6 @@ elemToNotes _ _ _ = Nothing
--------------------------------------------- ---------------------------------------------
--------------------------------------------- ---------------------------------------------
elemName :: NameSpaces -> String -> String -> QName
elemName ns prefix name = (QName name (lookup prefix ns) (Just prefix))
isElem :: NameSpaces -> String -> String -> Element -> Bool
isElem ns prefix name element =
qName (elName element) == name &&
qURI (elName element) == (lookup prefix ns)
elemToTblGrid :: NameSpaces -> Element -> D TblGrid elemToTblGrid :: NameSpaces -> Element -> D TblGrid
elemToTblGrid ns element | isElem ns "w" "tblGrid" element = elemToTblGrid ns element | isElem ns "w" "tblGrid" element =
let cols = findChildren (elemName ns "w" "gridCol") element let cols = findChildren (elemName ns "w" "gridCol") element

View file

@ -0,0 +1,105 @@
module Text.Pandoc.Readers.Docx.StyleMap ( StyleMap
, ParaStyleMap
, CharStyleMap
, StyleMaps(..)
, defaultStyleMaps
, getStyleMaps
, getStyleId
, hasStyleName
) where
import Text.XML.Light
import Text.Pandoc.Readers.Docx.Util
import Control.Monad.State
import Data.Char (toLower)
import Data.Maybe (fromMaybe)
import qualified Data.Map as M
newtype ParaStyleMap = ParaStyleMap ( M.Map String String )
newtype CharStyleMap = CharStyleMap ( M.Map String String )
class StyleMap a where
alterMap :: (M.Map String String -> M.Map String String) -> a -> a
getMap :: a -> M.Map String String
instance StyleMap ParaStyleMap where
alterMap f (ParaStyleMap m) = ParaStyleMap $ f m
getMap (ParaStyleMap m) = m
instance StyleMap CharStyleMap where
alterMap f (CharStyleMap m) = CharStyleMap $ f m
getMap (CharStyleMap m) = m
insert :: (StyleMap a) => String -> String -> a -> a
insert k v = alterMap $ M.insert k v
getStyleId :: (StyleMap a) => String -> a -> String
getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) . getMap
hasStyleName :: (StyleMap a) => String -> a -> Bool
hasStyleName styleName = M.member (map toLower styleName) . getMap
data StyleMaps = StyleMaps { sNameSpaces :: NameSpaces
, sParaStyleMap :: ParaStyleMap
, sCharStyleMap :: CharStyleMap
}
data StyleType = ParaStyle | CharStyle
defaultStyleMaps :: StyleMaps
defaultStyleMaps = StyleMaps { sNameSpaces = []
, sParaStyleMap = ParaStyleMap M.empty
, sCharStyleMap = CharStyleMap M.empty
}
type StateM a = StateT StyleMaps Maybe a
getStyleMaps :: Element -> StyleMaps
getStyleMaps docElem = fromMaybe state' $ execStateT genStyleMap state'
where
state' = defaultStyleMaps {sNameSpaces = elemToNameSpaces docElem}
insertPara key val = modify $ \s ->
s { sParaStyleMap = insert key val $ sParaStyleMap s }
insertChar key val = modify $ \s ->
s { sCharStyleMap = insert key val $ sCharStyleMap s }
genStyleItem e = do
styleType <- getStyleType e
nameVal <- getNameVal e
styleId <- getAttrStyleId e
let nameValLC = map toLower nameVal
case styleType of
ParaStyle -> insertPara nameValLC styleId
CharStyle -> insertChar nameValLC styleId
genStyleMap = do
style <- elemName' "style"
let styles = findChildren style docElem
forM_ styles genStyleItem
getStyleType :: Element -> StateM StyleType
getStyleType e = do
styleTypeStr <- getAttrType e
case styleTypeStr of
"paragraph" -> return ParaStyle
"character" -> return CharStyle
_ -> lift Nothing
getAttrType :: Element -> StateM String
getAttrType el = do
name <- elemName' "type"
lift $ findAttr name el
getAttrStyleId :: Element -> StateM String
getAttrStyleId el = do
name <- elemName' "styleId"
lift $ findAttr name el
getNameVal :: Element -> StateM String
getNameVal el = do
name <- elemName' "name"
val <- elemName' "val"
lift $ findChild name el >>= findAttr val
elemName' :: String -> StateM QName
elemName' name = do
namespaces <- gets sNameSpaces
return $ elemName namespaces "w" name

View file

@ -0,0 +1,26 @@
module Text.Pandoc.Readers.Docx.Util (
NameSpaces
, elemName
, isElem
, elemToNameSpaces
) where
import Text.XML.Light
import Data.Maybe (mapMaybe)
type NameSpaces = [(String, String)]
elemToNameSpaces :: Element -> NameSpaces
elemToNameSpaces = mapMaybe attrToNSPair . elAttribs
attrToNSPair :: Attr -> Maybe (String, String)
attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
attrToNSPair _ = Nothing
elemName :: NameSpaces -> String -> String -> QName
elemName ns prefix name = QName name (lookup prefix ns) (Just prefix)
isElem :: NameSpaces -> String -> String -> Element -> Bool
isElem ns prefix name element =
qName (elName element) == name &&
qURI (elName element) == lookup prefix ns

View file

@ -54,6 +54,8 @@ import Text.Pandoc.Walk
import Text.Highlighting.Kate.Types () import Text.Highlighting.Kate.Types ()
import Text.XML.Light as XML import Text.XML.Light as XML
import Text.TeXMath import Text.TeXMath
import Text.Pandoc.Readers.Docx.StyleMap
import Text.Pandoc.Readers.Docx.Util (elemName)
import Control.Monad.State import Control.Monad.State
import Text.Highlighting.Kate import Text.Highlighting.Kate
import Data.Unique (hashUnique, newUnique) import Data.Unique (hashUnique, newUnique)
@ -64,7 +66,6 @@ import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef,
extensionFromMimeType) extensionFromMimeType)
import Control.Applicative ((<$>), (<|>), (<*>)) import Control.Applicative ((<$>), (<|>), (<*>))
import Data.Maybe (fromMaybe, mapMaybe, maybeToList) import Data.Maybe (fromMaybe, mapMaybe, maybeToList)
import Data.Char (toLower)
data ListMarker = NoMarker data ListMarker = NoMarker
| BulletMarker | BulletMarker
@ -90,9 +91,6 @@ listMarkerToId (NumberMarker sty delim n) =
OneParen -> '2' OneParen -> '2'
TwoParens -> '3' TwoParens -> '3'
newtype ParaStyleMap = ParaStyleMap (M.Map String String) deriving Show
newtype CharStyleMap = CharStyleMap (M.Map String String) deriving Show
data WriterState = WriterState{ data WriterState = WriterState{
stTextProperties :: [Element] stTextProperties :: [Element]
, stParaProperties :: [Element] , stParaProperties :: [Element]
@ -109,8 +107,7 @@ data WriterState = WriterState{
, stChangesAuthor :: String , stChangesAuthor :: String
, stChangesDate :: String , stChangesDate :: String
, stPrintWidth :: Integer , stPrintWidth :: Integer
, stParaStyles :: ParaStyleMap , stStyleMaps :: StyleMaps
, stCharStyles :: CharStyleMap
, stFirstPara :: Bool , stFirstPara :: Bool
} }
@ -131,8 +128,7 @@ defaultWriterState = WriterState{
, stChangesAuthor = "unknown" , stChangesAuthor = "unknown"
, stChangesDate = "1969-12-31T19:00:00Z" , stChangesDate = "1969-12-31T19:00:00Z"
, stPrintWidth = 1 , stPrintWidth = 1
, stParaStyles = ParaStyleMap M.empty , stStyleMaps = defaultStyleMaps
, stCharStyles = CharStyleMap M.empty
, stFirstPara = False , stFirstPara = False
} }
@ -220,28 +216,14 @@ writeDocx opts doc@(Pandoc meta _) = do
styledoc <- parseXml refArchive distArchive stylepath styledoc <- parseXml refArchive distArchive stylepath
-- parse styledoc for heading styles -- parse styledoc for heading styles
let styleNamespaces = map ((,) <$> qName . attrKey <*> attrVal) . let styleMaps = getStyleMaps styledoc
filter ((==Just "xmlns") . qPrefix . attrKey) .
elAttribs $ styledoc
mywURI = lookup "w" styleNamespaces
myName name = QName name mywURI (Just "w")
getAttrStyleId = findAttr (myName "styleId")
getAttrType = findAttr (myName "type")
isParaStyle = (Just "paragraph" ==) . getAttrType
isCharStyle = (Just "character" ==) . getAttrType
getNameVal = findChild (myName "name") >=> findAttr (myName "val") >=> return . map toLower
genStyleItem f e | f e = liftM2 (,) <$> getNameVal <*> getAttrStyleId $ e
| otherwise = Nothing
genStyleMap f = M.fromList $ mapMaybe (genStyleItem f) $ findChildren (myName "style") styledoc
paraStyles = ParaStyleMap $ genStyleMap isParaStyle
charStyles = CharStyleMap $ genStyleMap isCharStyle
((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc') ((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc')
defaultWriterState{ stChangesAuthor = fromMaybe "unknown" username defaultWriterState{ stChangesAuthor = fromMaybe "unknown" username
, stChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime , stChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime
, stPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth) , stPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth)
, stParaStyles = paraStyles , stStyleMaps = styleMaps
, stCharStyles = charStyles} }
let epochtime = floor $ utcTimeToPOSIXSeconds utctime let epochtime = floor $ utcTimeToPOSIXSeconds utctime
let imgs = M.elems $ stImages st let imgs = M.elems $ stImages st
@ -394,7 +376,7 @@ writeDocx opts doc@(Pandoc meta _) = do
linkrels linkrels
-- styles -- styles
let newstyles = styleToOpenXml charStyles paraStyles $ writerHighlightStyle opts let newstyles = styleToOpenXml styleMaps $ writerHighlightStyle opts
let styledoc' = styledoc{ elContent = modifyContent (elContent styledoc) } let styledoc' = styledoc{ elContent = modifyContent (elContent styledoc) }
where where
modifyContent modifyContent
@ -402,9 +384,10 @@ writeDocx opts doc@(Pandoc meta _) = do
| otherwise = filter notTokStyle | otherwise = filter notTokStyle
notTokStyle (Elem el) = notStyle el || notTokId el notTokStyle (Elem el) = notStyle el || notTokId el
notTokStyle _ = True notTokStyle _ = True
notStyle = (/= myName "style") . elName notStyle = (/= elemName' "style") . elName
notTokId = maybe True (`notElem` tokStys) . getAttrStyleId notTokId = maybe True (`notElem` tokStys) . findAttr (elemName' "styleId")
tokStys = "SourceCode" : map show (enumFromTo KeywordTok NormalTok) tokStys = "SourceCode" : map show (enumFromTo KeywordTok NormalTok)
elemName' = elemName (sNameSpaces styleMaps) "w"
let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' let styleEntry = toEntry stylepath epochtime $ renderXml styledoc'
-- construct word/numbering.xml -- construct word/numbering.xml
@ -481,12 +464,11 @@ writeDocx opts doc@(Pandoc meta _) = do
miscRelEntries ++ otherMediaEntries miscRelEntries ++ otherMediaEntries
return $ fromArchive archive return $ fromArchive archive
styleToOpenXml :: CharStyleMap -> ParaStyleMap -> Style -> [Element] styleToOpenXml :: StyleMaps -> Style -> [Element]
styleToOpenXml (CharStyleMap csm) (ParaStyleMap psm) style = styleToOpenXml sm style =
maybeToList parStyle ++ mapMaybe toStyle alltoktypes maybeToList parStyle ++ mapMaybe toStyle alltoktypes
where alltoktypes = enumFromTo KeywordTok NormalTok where alltoktypes = enumFromTo KeywordTok NormalTok
styleExists m styleName = M.member (map toLower styleName) m toStyle toktype | hasStyleName (show toktype) (sCharStyleMap sm) = Nothing
toStyle toktype | styleExists csm $ show toktype = Nothing
| otherwise = Just $ | otherwise = Just $
mknode "w:style" [("w:type","character"), mknode "w:style" [("w:type","character"),
("w:customStyle","1"),("w:styleId",show toktype)] ("w:customStyle","1"),("w:styleId",show toktype)]
@ -509,7 +491,7 @@ styleToOpenXml (CharStyleMap csm) (ParaStyleMap psm) style =
tokBg toktype = maybe "auto" (drop 1 . fromColor) tokBg toktype = maybe "auto" (drop 1 . fromColor)
$ (tokenBackground =<< lookup toktype tokStyles) $ (tokenBackground =<< lookup toktype tokStyles)
`mplus` backgroundColor style `mplus` backgroundColor style
parStyle | styleExists psm "Source Code" = Nothing parStyle | hasStyleName "Source Code" (sParaStyleMap sm) = Nothing
| otherwise = Just $ | otherwise = Just $
mknode "w:style" [("w:type","paragraph"), mknode "w:style" [("w:type","paragraph"),
("w:customStyle","1"),("w:styleId","SourceCode")] ("w:customStyle","1"),("w:styleId","SourceCode")]
@ -638,30 +620,27 @@ writeOpenXML opts (Pandoc meta blocks) = do
blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element] blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element]
blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls
getStyleId :: String -> M.Map String String -> String pStyle :: String -> StyleMaps -> Element
getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) pStyle sty m = mknode "w:pStyle" [("w:val",sty')] ()
pStyle :: String -> ParaStyleMap -> Element
pStyle sty (ParaStyleMap m) = mknode "w:pStyle" [("w:val",sty')] ()
where where
sty' = getStyleId sty m sty' = getStyleId sty $ sParaStyleMap m
pCustomStyle :: String -> Element pCustomStyle :: String -> Element
pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] () pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] ()
pStyleM :: String -> WS XML.Element pStyleM :: String -> WS XML.Element
pStyleM = flip fmap (gets stParaStyles) . pStyle pStyleM = (`fmap` gets stStyleMaps) . pStyle
rStyle :: String -> CharStyleMap -> Element rStyle :: String -> StyleMaps -> Element
rStyle sty (CharStyleMap m) = mknode "w:rStyle" [("w:val",sty')] () rStyle sty m = mknode "w:rStyle" [("w:val",sty')] ()
where where
sty' = getStyleId sty m sty' = getStyleId sty $ sCharStyleMap m
rCustomStyle :: String -> Element rCustomStyle :: String -> Element
rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] () rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] ()
rStyleM :: String -> WS XML.Element rStyleM :: String -> WS XML.Element
rStyleM = flip fmap (gets stCharStyles) . rStyle rStyleM = (`fmap` gets stStyleMaps) . rStyle
getUniqueId :: MonadIO m => m String getUniqueId :: MonadIO m => m String
-- the + 20 is to ensure that there are no clashes with the rIds -- the + 20 is to ensure that there are no clashes with the rIds
@ -710,10 +689,10 @@ blockToOpenXML opts (Para lst) = do
paraProps <- getParaProps $ case lst of paraProps <- getParaProps $ case lst of
[Math DisplayMath _] -> True [Math DisplayMath _] -> True
_ -> False _ -> False
pSM <- gets stParaStyles sm <- gets stStyleMaps
let paraProps' = case paraProps of let paraProps' = case paraProps of
[] | isFirstPara -> [mknode "w:pPr" [] [pCustomStyle "FirstParagraph"]] [] | isFirstPara -> [mknode "w:pPr" [] [pCustomStyle "FirstParagraph"]]
[] -> [mknode "w:pPr" [] [pStyle "Body Text" pSM]] [] -> [mknode "w:pPr" [] [pStyle "Body Text" sm]]
ps -> ps ps -> ps
modify $ \s -> s { stFirstPara = False } modify $ \s -> s { stFirstPara = False }
contents <- inlinesToOpenXML opts lst contents <- inlinesToOpenXML opts lst