Started moving StyleMap out of writer code

This commit is contained in:
Nikolay Yakimov 2015-03-01 22:57:35 +03:00
parent 13daf3ed6a
commit 409111f647
5 changed files with 165 additions and 67 deletions

View file

@ -338,7 +338,9 @@ Library
Other-Modules: Text.Pandoc.Readers.Docx.Lists,
Text.Pandoc.Readers.Docx.Reducible,
Text.Pandoc.Readers.Docx.Parse,
Text.Pandoc.Readers.Docx.Fonts
Text.Pandoc.Readers.Docx.Fonts,
Text.Pandoc.Readers.Docx.Util,
Text.Pandoc.Readers.Docx.StyleMap
Text.Pandoc.Writers.Shared,
Text.Pandoc.Asciify,
Text.Pandoc.MIME,

View file

@ -65,6 +65,7 @@ import Text.Pandoc.Compat.Except
import Text.TeXMath.Readers.OMML (readOMML)
import Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..))
import Text.TeXMath (Exp)
import Text.Pandoc.Readers.Docx.Util
import Data.Char (readLitChar, ord, chr, isDigit)
data ReaderEnv = ReaderEnv { envNotes :: Notes
@ -108,8 +109,6 @@ mapD f xs =
in
concatMapM handler xs
type NameSpaces = [(String, String)]
data Docx = Docx Document
deriving Show
@ -249,10 +248,6 @@ type ChangeId = String
type Author = String
type ChangeDate = String
attrToNSPair :: Attr -> Maybe (String, String)
attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
attrToNSPair _ = Nothing
archiveToDocx :: Archive -> Either DocxError Docx
archiveToDocx archive = do
let notes = archiveToNotes archive
@ -269,7 +264,7 @@ archiveToDocument :: Archive -> D Document
archiveToDocument zf = do
entry <- maybeToD $ findEntryByPath "word/document.xml" zf
docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
let namespaces = mapMaybe attrToNSPair (elAttribs docElem)
let namespaces = elemToNameSpaces docElem
bodyElem <- maybeToD $ findChild (elemName namespaces "w" "body") docElem
body <- elemToBody namespaces bodyElem
return $ Document namespaces body
@ -288,7 +283,7 @@ archiveToStyles zf =
case stylesElem of
Nothing -> (M.empty, M.empty)
Just styElem ->
let namespaces = mapMaybe attrToNSPair (elAttribs styElem)
let namespaces = elemToNameSpaces styElem
in
( M.fromList $ buildBasedOnList namespaces styElem
(Nothing :: Maybe CharStyle),
@ -356,10 +351,10 @@ archiveToNotes zf =
enElem = findEntryByPath "word/endnotes.xml" zf
>>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
fn_namespaces = case fnElem of
Just e -> mapMaybe attrToNSPair (elAttribs e)
Just e -> elemToNameSpaces e
Nothing -> []
en_namespaces = case enElem of
Just e -> mapMaybe attrToNSPair (elAttribs e)
Just e -> elemToNameSpaces e
Nothing -> []
ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
fn = fnElem >>= (elemToNotes ns "footnote")
@ -459,7 +454,7 @@ archiveToNumbering' zf = do
Nothing -> Just $ Numbering [] [] []
Just entry -> do
numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
let namespaces = mapMaybe attrToNSPair (elAttribs numberingElem)
let namespaces = elemToNameSpaces numberingElem
numElems = findChildren
(QName "num" (lookup "w" namespaces) (Just "w"))
numberingElem
@ -488,15 +483,6 @@ elemToNotes _ _ _ = Nothing
---------------------------------------------
---------------------------------------------
elemName :: NameSpaces -> String -> String -> QName
elemName ns prefix name = (QName name (lookup prefix ns) (Just prefix))
isElem :: NameSpaces -> String -> String -> Element -> Bool
isElem ns prefix name element =
qName (elName element) == name &&
qURI (elName element) == (lookup prefix ns)
elemToTblGrid :: NameSpaces -> Element -> D TblGrid
elemToTblGrid ns element | isElem ns "w" "tblGrid" element =
let cols = findChildren (elemName ns "w" "gridCol") element

View file

@ -0,0 +1,105 @@
module Text.Pandoc.Readers.Docx.StyleMap ( StyleMap
, ParaStyleMap
, CharStyleMap
, StyleMaps(..)
, defaultStyleMaps
, getStyleMaps
, getStyleId
, hasStyleName
) where
import Text.XML.Light
import Text.Pandoc.Readers.Docx.Util
import Control.Monad.State
import Data.Char (toLower)
import Data.Maybe (fromMaybe)
import qualified Data.Map as M
newtype ParaStyleMap = ParaStyleMap ( M.Map String String )
newtype CharStyleMap = CharStyleMap ( M.Map String String )
class StyleMap a where
alterMap :: (M.Map String String -> M.Map String String) -> a -> a
getMap :: a -> M.Map String String
instance StyleMap ParaStyleMap where
alterMap f (ParaStyleMap m) = ParaStyleMap $ f m
getMap (ParaStyleMap m) = m
instance StyleMap CharStyleMap where
alterMap f (CharStyleMap m) = CharStyleMap $ f m
getMap (CharStyleMap m) = m
insert :: (StyleMap a) => String -> String -> a -> a
insert k v = alterMap $ M.insert k v
getStyleId :: (StyleMap a) => String -> a -> String
getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) . getMap
hasStyleName :: (StyleMap a) => String -> a -> Bool
hasStyleName styleName = M.member (map toLower styleName) . getMap
data StyleMaps = StyleMaps { sNameSpaces :: NameSpaces
, sParaStyleMap :: ParaStyleMap
, sCharStyleMap :: CharStyleMap
}
data StyleType = ParaStyle | CharStyle
defaultStyleMaps :: StyleMaps
defaultStyleMaps = StyleMaps { sNameSpaces = []
, sParaStyleMap = ParaStyleMap M.empty
, sCharStyleMap = CharStyleMap M.empty
}
type StateM a = StateT StyleMaps Maybe a
getStyleMaps :: Element -> StyleMaps
getStyleMaps docElem = fromMaybe state' $ execStateT genStyleMap state'
where
state' = defaultStyleMaps {sNameSpaces = elemToNameSpaces docElem}
insertPara key val = modify $ \s ->
s { sParaStyleMap = insert key val $ sParaStyleMap s }
insertChar key val = modify $ \s ->
s { sCharStyleMap = insert key val $ sCharStyleMap s }
genStyleItem e = do
styleType <- getStyleType e
nameVal <- getNameVal e
styleId <- getAttrStyleId e
let nameValLC = map toLower nameVal
case styleType of
ParaStyle -> insertPara nameValLC styleId
CharStyle -> insertChar nameValLC styleId
genStyleMap = do
style <- elemName' "style"
let styles = findChildren style docElem
forM_ styles genStyleItem
getStyleType :: Element -> StateM StyleType
getStyleType e = do
styleTypeStr <- getAttrType e
case styleTypeStr of
"paragraph" -> return ParaStyle
"character" -> return CharStyle
_ -> lift Nothing
getAttrType :: Element -> StateM String
getAttrType el = do
name <- elemName' "type"
lift $ findAttr name el
getAttrStyleId :: Element -> StateM String
getAttrStyleId el = do
name <- elemName' "styleId"
lift $ findAttr name el
getNameVal :: Element -> StateM String
getNameVal el = do
name <- elemName' "name"
val <- elemName' "val"
lift $ findChild name el >>= findAttr val
elemName' :: String -> StateM QName
elemName' name = do
namespaces <- gets sNameSpaces
return $ elemName namespaces "w" name

View file

@ -0,0 +1,26 @@
module Text.Pandoc.Readers.Docx.Util (
NameSpaces
, elemName
, isElem
, elemToNameSpaces
) where
import Text.XML.Light
import Data.Maybe (mapMaybe)
type NameSpaces = [(String, String)]
elemToNameSpaces :: Element -> NameSpaces
elemToNameSpaces = mapMaybe attrToNSPair . elAttribs
attrToNSPair :: Attr -> Maybe (String, String)
attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
attrToNSPair _ = Nothing
elemName :: NameSpaces -> String -> String -> QName
elemName ns prefix name = QName name (lookup prefix ns) (Just prefix)
isElem :: NameSpaces -> String -> String -> Element -> Bool
isElem ns prefix name element =
qName (elName element) == name &&
qURI (elName element) == lookup prefix ns

View file

@ -54,6 +54,8 @@ import Text.Pandoc.Walk
import Text.Highlighting.Kate.Types ()
import Text.XML.Light as XML
import Text.TeXMath
import Text.Pandoc.Readers.Docx.StyleMap
import Text.Pandoc.Readers.Docx.Util (elemName)
import Control.Monad.State
import Text.Highlighting.Kate
import Data.Unique (hashUnique, newUnique)
@ -64,7 +66,6 @@ import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef,
extensionFromMimeType)
import Control.Applicative ((<$>), (<|>), (<*>))
import Data.Maybe (fromMaybe, mapMaybe, maybeToList)
import Data.Char (toLower)
data ListMarker = NoMarker
| BulletMarker
@ -90,9 +91,6 @@ listMarkerToId (NumberMarker sty delim n) =
OneParen -> '2'
TwoParens -> '3'
newtype ParaStyleMap = ParaStyleMap (M.Map String String) deriving Show
newtype CharStyleMap = CharStyleMap (M.Map String String) deriving Show
data WriterState = WriterState{
stTextProperties :: [Element]
, stParaProperties :: [Element]
@ -109,8 +107,7 @@ data WriterState = WriterState{
, stChangesAuthor :: String
, stChangesDate :: String
, stPrintWidth :: Integer
, stParaStyles :: ParaStyleMap
, stCharStyles :: CharStyleMap
, stStyleMaps :: StyleMaps
, stFirstPara :: Bool
}
@ -131,8 +128,7 @@ defaultWriterState = WriterState{
, stChangesAuthor = "unknown"
, stChangesDate = "1969-12-31T19:00:00Z"
, stPrintWidth = 1
, stParaStyles = ParaStyleMap M.empty
, stCharStyles = CharStyleMap M.empty
, stStyleMaps = defaultStyleMaps
, stFirstPara = False
}
@ -220,28 +216,14 @@ writeDocx opts doc@(Pandoc meta _) = do
styledoc <- parseXml refArchive distArchive stylepath
-- parse styledoc for heading styles
let styleNamespaces = map ((,) <$> qName . attrKey <*> attrVal) .
filter ((==Just "xmlns") . qPrefix . attrKey) .
elAttribs $ styledoc
mywURI = lookup "w" styleNamespaces
myName name = QName name mywURI (Just "w")
getAttrStyleId = findAttr (myName "styleId")
getAttrType = findAttr (myName "type")
isParaStyle = (Just "paragraph" ==) . getAttrType
isCharStyle = (Just "character" ==) . getAttrType
getNameVal = findChild (myName "name") >=> findAttr (myName "val") >=> return . map toLower
genStyleItem f e | f e = liftM2 (,) <$> getNameVal <*> getAttrStyleId $ e
| otherwise = Nothing
genStyleMap f = M.fromList $ mapMaybe (genStyleItem f) $ findChildren (myName "style") styledoc
paraStyles = ParaStyleMap $ genStyleMap isParaStyle
charStyles = CharStyleMap $ genStyleMap isCharStyle
let styleMaps = getStyleMaps styledoc
((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc')
defaultWriterState{ stChangesAuthor = fromMaybe "unknown" username
, stChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime
, stPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth)
, stParaStyles = paraStyles
, stCharStyles = charStyles}
, stStyleMaps = styleMaps
}
let epochtime = floor $ utcTimeToPOSIXSeconds utctime
let imgs = M.elems $ stImages st
@ -394,7 +376,7 @@ writeDocx opts doc@(Pandoc meta _) = do
linkrels
-- styles
let newstyles = styleToOpenXml charStyles paraStyles $ writerHighlightStyle opts
let newstyles = styleToOpenXml styleMaps $ writerHighlightStyle opts
let styledoc' = styledoc{ elContent = modifyContent (elContent styledoc) }
where
modifyContent
@ -402,9 +384,10 @@ writeDocx opts doc@(Pandoc meta _) = do
| otherwise = filter notTokStyle
notTokStyle (Elem el) = notStyle el || notTokId el
notTokStyle _ = True
notStyle = (/= myName "style") . elName
notTokId = maybe True (`notElem` tokStys) . getAttrStyleId
notStyle = (/= elemName' "style") . elName
notTokId = maybe True (`notElem` tokStys) . findAttr (elemName' "styleId")
tokStys = "SourceCode" : map show (enumFromTo KeywordTok NormalTok)
elemName' = elemName (sNameSpaces styleMaps) "w"
let styleEntry = toEntry stylepath epochtime $ renderXml styledoc'
-- construct word/numbering.xml
@ -481,12 +464,11 @@ writeDocx opts doc@(Pandoc meta _) = do
miscRelEntries ++ otherMediaEntries
return $ fromArchive archive
styleToOpenXml :: CharStyleMap -> ParaStyleMap -> Style -> [Element]
styleToOpenXml (CharStyleMap csm) (ParaStyleMap psm) style =
styleToOpenXml :: StyleMaps -> Style -> [Element]
styleToOpenXml sm style =
maybeToList parStyle ++ mapMaybe toStyle alltoktypes
where alltoktypes = enumFromTo KeywordTok NormalTok
styleExists m styleName = M.member (map toLower styleName) m
toStyle toktype | styleExists csm $ show toktype = Nothing
toStyle toktype | hasStyleName (show toktype) (sCharStyleMap sm) = Nothing
| otherwise = Just $
mknode "w:style" [("w:type","character"),
("w:customStyle","1"),("w:styleId",show toktype)]
@ -509,7 +491,7 @@ styleToOpenXml (CharStyleMap csm) (ParaStyleMap psm) style =
tokBg toktype = maybe "auto" (drop 1 . fromColor)
$ (tokenBackground =<< lookup toktype tokStyles)
`mplus` backgroundColor style
parStyle | styleExists psm "Source Code" = Nothing
parStyle | hasStyleName "Source Code" (sParaStyleMap sm) = Nothing
| otherwise = Just $
mknode "w:style" [("w:type","paragraph"),
("w:customStyle","1"),("w:styleId","SourceCode")]
@ -638,30 +620,27 @@ writeOpenXML opts (Pandoc meta blocks) = do
blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element]
blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls
getStyleId :: String -> M.Map String String -> String
getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s)
pStyle :: String -> ParaStyleMap -> Element
pStyle sty (ParaStyleMap m) = mknode "w:pStyle" [("w:val",sty')] ()
pStyle :: String -> StyleMaps -> Element
pStyle sty m = mknode "w:pStyle" [("w:val",sty')] ()
where
sty' = getStyleId sty m
sty' = getStyleId sty $ sParaStyleMap m
pCustomStyle :: String -> Element
pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] ()
pStyleM :: String -> WS XML.Element
pStyleM = flip fmap (gets stParaStyles) . pStyle
pStyleM = (`fmap` gets stStyleMaps) . pStyle
rStyle :: String -> CharStyleMap -> Element
rStyle sty (CharStyleMap m) = mknode "w:rStyle" [("w:val",sty')] ()
rStyle :: String -> StyleMaps -> Element
rStyle sty m = mknode "w:rStyle" [("w:val",sty')] ()
where
sty' = getStyleId sty m
sty' = getStyleId sty $ sCharStyleMap m
rCustomStyle :: String -> Element
rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] ()
rStyleM :: String -> WS XML.Element
rStyleM = flip fmap (gets stCharStyles) . rStyle
rStyleM = (`fmap` gets stStyleMaps) . rStyle
getUniqueId :: MonadIO m => m String
-- the + 20 is to ensure that there are no clashes with the rIds
@ -710,10 +689,10 @@ blockToOpenXML opts (Para lst) = do
paraProps <- getParaProps $ case lst of
[Math DisplayMath _] -> True
_ -> False
pSM <- gets stParaStyles
sm <- gets stStyleMaps
let paraProps' = case paraProps of
[] | isFirstPara -> [mknode "w:pPr" [] [pCustomStyle "FirstParagraph"]]
[] -> [mknode "w:pPr" [] [pStyle "Body Text" pSM]]
[] -> [mknode "w:pPr" [] [pStyle "Body Text" sm]]
ps -> ps
modify $ \s -> s { stFirstPara = False }
contents <- inlinesToOpenXML opts lst