From 5cdd11725c2db417f7f93d09fdb7ead90d1700a6 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Sat, 21 Feb 2015 22:20:18 +0300 Subject: [PATCH 01/16] Initial stab at more involved fix for #1607 This patch attempts to build a style name -> style id mapping based on styles.xml from reference doc, and changes pStyle and rStyle to accept style name as a parameter instead of styleId. There is a fallback mechanic that removes spaces from style name and returns it as style id, but it likely won't help much. Style names are matched lower-case, since headings and `footnote text` have lowercase names. --- src/Text/Pandoc/Writers/Docx.hs | 150 ++++++++++++++++++-------------- 1 file changed, 86 insertions(+), 64 deletions(-) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 441392918..437422451 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -29,7 +29,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to docx. -} module Text.Pandoc.Writers.Docx ( writeDocx ) where -import Data.List ( intercalate, isPrefixOf, isSuffixOf, stripPrefix ) +import Data.List ( intercalate, isPrefixOf, isSuffixOf ) import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BL8 @@ -64,7 +64,7 @@ import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef, extensionFromMimeType) import Control.Applicative ((<$>), (<|>), (<*>)) import Data.Maybe (fromMaybe, mapMaybe) -import Data.Char (isDigit) +import Data.Char (toLower) data ListMarker = NoMarker | BulletMarker @@ -90,6 +90,9 @@ listMarkerToId (NumberMarker sty delim n) = OneParen -> '2' TwoParens -> '3' +newtype ParaStyleMap = ParaStyleMap (M.Map String String) deriving Show +newtype CharStyleMap = CharStyleMap (M.Map String String) deriving Show + data WriterState = WriterState{ stTextProperties :: [Element] , stParaProperties :: [Element] @@ -106,7 +109,8 @@ data WriterState = WriterState{ , stChangesAuthor :: String , stChangesDate :: String , stPrintWidth :: Integer - , stHeadingStyles :: [(Int,String)] + , stParaStyles :: ParaStyleMap + , stCharStyles :: CharStyleMap , stFirstPara :: Bool } @@ -127,7 +131,8 @@ defaultWriterState = WriterState{ , stChangesAuthor = "unknown" , stChangesDate = "1969-12-31T19:00:00Z" , stPrintWidth = 1 - , stHeadingStyles = [] + , stParaStyles = ParaStyleMap M.empty + , stCharStyles = CharStyleMap M.empty , stFirstPara = False } @@ -218,29 +223,25 @@ writeDocx opts doc@(Pandoc meta _) = do let styleNamespaces = map ((,) <$> qName . attrKey <*> attrVal) . filter ((==Just "xmlns") . qPrefix . attrKey) . elAttribs $ styledoc - let headingStyles = - let - mywURI = lookup "w" styleNamespaces - myName name = QName name mywURI (Just "w") - getAttrStyleId = findAttr (myName "styleId") - getNameVal = findChild (myName "name") >=> findAttr (myName "val") - getNum s | not $ null s, all isDigit s = Just (read s :: Int) - | otherwise = Nothing - getEngHeader = getAttrStyleId >=> stripPrefix "Heading" >=> getNum - getIntHeader = getNameVal >=> stripPrefix "heading " >=> getNum - toTuple getF = liftM2 (,) <$> getF <*> getAttrStyleId - toMap getF = mapMaybe (toTuple getF) $ - findChildren (myName "style") styledoc - select a b | not $ null a = a - | otherwise = b - in - select (toMap getEngHeader) (toMap getIntHeader) + mywURI = lookup "w" styleNamespaces + myName name = QName name mywURI (Just "w") + getAttrStyleId = findAttr (myName "styleId") + getAttrType = findAttr (myName "type") + isParaStyle = (Just "paragraph" ==) . getAttrType + isCharStyle = (Just "character" ==) . getAttrType + getNameVal = findChild (myName "name") >=> findAttr (myName "val") >=> return . map toLower + genStyleItem f e | f e = liftM2 (,) <$> getNameVal <*> getAttrStyleId $ e + | otherwise = Nothing + genStyleMap f = M.fromList $ mapMaybe (genStyleItem f) $ findChildren (myName "style") styledoc + paraStyles = ParaStyleMap $ genStyleMap isParaStyle + charStyles = CharStyleMap $ genStyleMap isCharStyle ((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc') defaultWriterState{ stChangesAuthor = fromMaybe "unknown" username , stChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime , stPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth) - , stHeadingStyles = headingStyles} + , stParaStyles = paraStyles + , stCharStyles = charStyles} let epochtime = floor $ utcTimeToPOSIXSeconds utctime let imgs = M.elems $ stImages st @@ -602,14 +603,14 @@ writeOpenXML opts (Pandoc meta blocks) = do Just (MetaBlocks [Para xs]) -> xs Just (MetaInlines xs) -> xs _ -> [] - title <- withParaProp (pStyle "Title") $ blocksToOpenXML opts [Para tit | not (null tit)] - subtitle <- withParaProp (pStyle "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')] - authors <- withParaProp (pStyle "Author") $ blocksToOpenXML opts $ + title <- withParaPropM (pStyleM "Title") $ blocksToOpenXML opts [Para tit | not (null tit)] + subtitle <- withParaPropM (pStyleM "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')] + authors <- withParaPropM (pStyleM "Author") $ blocksToOpenXML opts $ map Para auths - date <- withParaProp (pStyle "Date") $ blocksToOpenXML opts [Para dat | not (null dat)] + date <- withParaPropM (pStyleM "Date") $ blocksToOpenXML opts [Para dat | not (null dat)] abstract <- if null abstract' then return [] - else withParaProp (pStyle "Abstract") $ blocksToOpenXML opts abstract' + else withParaPropM (pStyleM "Abstract") $ blocksToOpenXML opts abstract' let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs convertSpace xs = xs @@ -623,11 +624,24 @@ writeOpenXML opts (Pandoc meta blocks) = do blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element] blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls -pStyle :: String -> Element -pStyle sty = mknode "w:pStyle" [("w:val",sty)] () +getStyleId :: String -> M.Map String String -> String +getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) -rStyle :: String -> Element -rStyle sty = mknode "w:rStyle" [("w:val",sty)] () +pStyle :: String -> ParaStyleMap -> Element +pStyle sty (ParaStyleMap m) = mknode "w:pStyle" [("w:val",sty')] () + where + sty' = getStyleId sty m + +pStyleM :: String -> WS XML.Element +pStyleM = flip fmap (gets stParaStyles) . pStyle + +rStyle :: String -> CharStyleMap -> Element +rStyle sty (CharStyleMap m) = mknode "w:rStyle" [("w:val",sty')] () + where + sty' = getStyleId sty m + +rStyleM :: String -> WS XML.Element +rStyleM = flip fmap (gets stCharStyles) . rStyle getUniqueId :: MonadIO m => m String -- the + 20 is to ensure that there are no clashes with the rIds @@ -641,13 +655,12 @@ blockToOpenXML opts (Div (_,["references"],_) bs) = do let (hs, bs') = span isHeaderBlock bs header <- blocksToOpenXML opts hs -- We put the Bibliography style on paragraphs after the header - rest <- withParaProp (pStyle "Bibliography") $ blocksToOpenXML opts bs' + rest <- withParaPropM (pStyleM "Bibliography") $ blocksToOpenXML opts bs' return (header ++ rest) blockToOpenXML opts (Div _ bs) = blocksToOpenXML opts bs blockToOpenXML opts (Header lev (ident,_,_) lst) = do setFirstPara - headingStyles <- gets stHeadingStyles - paraProps <- maybe id (withParaProp . pStyle) (lookup lev headingStyles) $ + paraProps <- withParaPropM (pStyleM ("Heading "++show lev)) $ getParaProps False contents <- inlinesToOpenXML opts lst usedIdents <- gets stSectionIds @@ -660,26 +673,27 @@ blockToOpenXML opts (Header lev (ident,_,_) lst) = do ,("w:name",bookmarkName)] () let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () return [mknode "w:p" [] (paraProps ++ [bookmarkStart, bookmarkEnd] ++ contents)] -blockToOpenXML opts (Plain lst) = withParaProp (pStyle "Compact") +blockToOpenXML opts (Plain lst) = withParaPropM (pStyleM "Compact") $ blockToOpenXML opts (Para lst) -- title beginning with fig: indicates that the image is a figure blockToOpenXML opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = do setFirstPara paraProps <- getParaProps False contents <- inlinesToOpenXML opts [Image alt (src,tit)] - captionNode <- withParaProp (pStyle "ImageCaption") + captionNode <- withParaPropM (pStyleM "Image Caption") $ blockToOpenXML opts (Para alt) return $ mknode "w:p" [] (paraProps ++ contents) : captionNode -- fixDisplayMath sometimes produces a Para [] as artifact blockToOpenXML _ (Para []) = return [] blockToOpenXML opts (Para lst) = do - isFirstPara <- gets stFirstPara + isFirstPara <- gets stFirstPara paraProps <- getParaProps $ case lst of [Math DisplayMath _] -> True _ -> False + pSM <- gets stParaStyles let paraProps' = case paraProps of - [] | isFirstPara -> [mknode "w:pPr" [] [(pStyle "FirstParagraph")]] - [] -> [mknode "w:pPr" [] [(pStyle "BodyText")]] + [] | isFirstPara -> [mknode "w:pPr" [] [(pStyle "First Paragraph" pSM)]] + [] -> [mknode "w:pPr" [] [(pStyle "Body Text" pSM)]] ps -> ps modify $ \s -> s { stFirstPara = False } contents <- inlinesToOpenXML opts lst @@ -688,11 +702,11 @@ blockToOpenXML _ (RawBlock format str) | format == Format "openxml" = return [ x | Elem x <- parseXML str ] | otherwise = return [] blockToOpenXML opts (BlockQuote blocks) = do - p <- withParaProp (pStyle "BlockQuote") $ blocksToOpenXML opts blocks + p <- withParaPropM (pStyleM "Block Quote") $ blocksToOpenXML opts blocks setFirstPara return p blockToOpenXML opts (CodeBlock attrs str) = do - p <- withParaProp (pStyle "SourceCode") $ (blockToOpenXML opts $ Para [Code attrs str]) + p <- withParaPropM (pStyleM "Source Code") (blockToOpenXML opts $ Para [Code attrs str]) setFirstPara return p blockToOpenXML _ HorizontalRule = do @@ -707,7 +721,7 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do let captionStr = stringify caption caption' <- if null caption then return [] - else withParaProp (pStyle "TableCaption") + else withParaPropM (pStyleM "Table Caption") $ blockToOpenXML opts (Para caption) let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] () let cellToOpenXML (al, cell) = withParaProp (alignmentFor al) @@ -767,9 +781,9 @@ blockToOpenXML opts (DefinitionList items) = do definitionListItemToOpenXML :: WriterOptions -> ([Inline],[[Block]]) -> WS [Element] definitionListItemToOpenXML opts (term,defs) = do - term' <- withParaProp (pStyle "DefinitionTerm") + term' <- withParaPropM (pStyleM "Definition Term") $ blockToOpenXML opts (Para term) - defs' <- withParaProp (pStyle "Definition") + defs' <- withParaPropM (pStyleM "Definition") $ concat `fmap` mapM (blocksToOpenXML opts) defs return $ term' ++ defs' @@ -833,6 +847,9 @@ withTextProp d p = do popTextProp return res +withTextPropM :: WS Element -> WS a -> WS a +withTextPropM = (. flip withTextProp) . (>>=) + getParaProps :: Bool -> WS [Element] getParaProps displayMathPara = do props <- gets stParaProperties @@ -861,6 +878,9 @@ withParaProp d p = do popParaProp return res +withParaPropM :: WS Element -> WS a -> WS a +withParaPropM = (. flip withParaProp) . (>>=) + formattedString :: String -> WS [Element] formattedString str = do props <- getTextProps @@ -943,25 +963,27 @@ inlineToOpenXML opts (Math mathType str) = do Right r -> return [r] Left _ -> inlinesToOpenXML opts (texMathToInlines mathType str) inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst -inlineToOpenXML opts (Code attrs str) = - withTextProp (rStyle "VerbatimChar") - $ if writerHighlight opts - then case highlight formatOpenXML attrs str of - Nothing -> unhighlighted - Just h -> return h - else unhighlighted - where unhighlighted = intercalate [br] `fmap` - (mapM formattedString $ lines str) - formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) - toHlTok (toktype,tok) = mknode "w:r" [] - [ mknode "w:rPr" [] - [ rStyle $ show toktype ] - , mknode "w:t" [("xml:space","preserve")] tok ] +inlineToOpenXML opts (Code attrs str) = do + rSM <- gets stCharStyles + let unhighlighted = intercalate [br] `fmap` + (mapM formattedString $ lines str) + formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) + toHlTok (toktype,tok) = mknode "w:r" [] + [ mknode "w:rPr" [] + [ rStyle (show toktype) rSM ] + , mknode "w:t" [("xml:space","preserve")] tok ] + withTextProp (rStyle "Verbatim Char" rSM) + $ if writerHighlight opts + then case highlight formatOpenXML attrs str of + Nothing -> unhighlighted + Just h -> return h + else unhighlighted inlineToOpenXML opts (Note bs) = do notes <- gets stFootnotes notenum <- getUniqueId + rSM <- gets stCharStyles let notemarker = mknode "w:r" [] - [ mknode "w:rPr" [] (rStyle "FootnoteRef") + [ mknode "w:rPr" [] (rStyle "Footnote Ref" rSM) , mknode "w:footnoteRef" [] () ] let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : ils) : xs @@ -971,22 +993,22 @@ inlineToOpenXML opts (Note bs) = do oldParaProperties <- gets stParaProperties oldTextProperties <- gets stTextProperties modify $ \st -> st{ stListLevel = -1, stParaProperties = [], stTextProperties = [] } - contents <- withParaProp (pStyle "FootnoteText") $ blocksToOpenXML opts + contents <- withParaPropM (pStyleM "Footnote Text") $ blocksToOpenXML opts $ insertNoteRef bs modify $ \st -> st{ stListLevel = oldListLevel, stParaProperties = oldParaProperties, stTextProperties = oldTextProperties } let newnote = mknode "w:footnote" [("w:id", notenum)] $ contents modify $ \s -> s{ stFootnotes = newnote : notes } return [ mknode "w:r" [] - [ mknode "w:rPr" [] (rStyle "FootnoteRef") + [ mknode "w:rPr" [] (rStyle "Footnote Ref" rSM) , mknode "w:footnoteReference" [("w:id", notenum)] () ] ] -- internal link: inlineToOpenXML opts (Link txt ('#':xs,_)) = do - contents <- withTextProp (rStyle "Link") $ inlinesToOpenXML opts txt + contents <- withTextPropM (rStyleM "Link") $ inlinesToOpenXML opts txt return [ mknode "w:hyperlink" [("w:anchor",xs)] contents ] -- external link: inlineToOpenXML opts (Link txt (src,_)) = do - contents <- withTextProp (rStyle "Link") $ inlinesToOpenXML opts txt + contents <- withTextPropM (rStyleM "Link") $ inlinesToOpenXML opts txt extlinks <- gets stExternalLinks id' <- case M.lookup src extlinks of Just i -> return i @@ -1088,7 +1110,7 @@ defaultFootnotes = [ mknode "w:footnote" [ mknode "w:p" [] $ [ mknode "w:r" [] $ [ mknode "w:continuationSeparator" [] ()]]]] - + parseXml :: Archive -> Archive -> String -> IO Element parseXml refArchive distArchive relpath = case ((findEntryByPath relpath refArchive `mplus` From 80715ecd7a39288aef501b3550b45cb2f121df10 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Sun, 22 Feb 2015 00:19:58 +0300 Subject: [PATCH 02/16] Prototype fix for #1872 --- src/Text/Pandoc/Writers/Docx.hs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 437422451..a240997ab 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -394,7 +394,7 @@ writeDocx opts doc@(Pandoc meta _) = do linkrels -- styles - let newstyles = styleToOpenXml $ writerHighlightStyle opts + let newstyles = styleToOpenXml charStyles $ writerHighlightStyle opts let styledoc' = styledoc{ elContent = elContent styledoc ++ [Elem x | x <- newstyles, writerHighlight opts] } let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' @@ -473,10 +473,13 @@ writeDocx opts doc@(Pandoc meta _) = do miscRelEntries ++ otherMediaEntries return $ fromArchive archive -styleToOpenXml :: Style -> [Element] -styleToOpenXml style = parStyle : map toStyle alltoktypes +styleToOpenXml :: CharStyleMap -> Style -> [Element] +styleToOpenXml (CharStyleMap m) style = parStyle : mapMaybe toStyle alltoktypes where alltoktypes = enumFromTo KeywordTok NormalTok - toStyle toktype = mknode "w:style" [("w:type","character"), + toStyle toktype = + if M.member (map toLower $ show toktype) m then Nothing + else Just $ + mknode "w:style" [("w:type","character"), ("w:customStyle","1"),("w:styleId",show toktype)] [ mknode "w:name" [("w:val",show toktype)] () , mknode "w:basedOn" [("w:val","VerbatimChar")] () From 8b3acde9deaeb30ba75299001ea1b15345983f3c Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Sun, 22 Feb 2015 23:25:12 +0300 Subject: [PATCH 03/16] If --no-highlight is set, remove *Tok styles. --- src/Text/Pandoc/Writers/Docx.hs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index a240997ab..64da9a497 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -395,8 +395,16 @@ writeDocx opts doc@(Pandoc meta _) = do -- styles let newstyles = styleToOpenXml charStyles $ writerHighlightStyle opts - let styledoc' = styledoc{ elContent = elContent styledoc ++ - [Elem x | x <- newstyles, writerHighlight opts] } + let styledoc' = styledoc{ elContent = modifyContent (elContent styledoc) } + where + modifyContent + | writerHighlight opts = (++ map Elem newstyles) + | otherwise = filter notTokStyle + notTokStyle (Elem el) = notStyle el || notTokId el + notTokStyle _ = True + notStyle = (/= myName "style") . elName + notTokId = maybe True (`notElem` tokStys) . getAttrStyleId + tokStys = map show $ enumFromTo KeywordTok NormalTok let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' -- construct word/numbering.xml From 7ae7f0c051b83e41c8bb4c0f15a2b57f76cd6298 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Mon, 23 Feb 2015 01:53:47 +0300 Subject: [PATCH 04/16] Also skip SourceCode style if exists --- src/Text/Pandoc/Writers/Docx.hs | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 64da9a497..ba8a28de0 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -63,7 +63,7 @@ import qualified Control.Exception as E import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef, extensionFromMimeType) import Control.Applicative ((<$>), (<|>), (<*>)) -import Data.Maybe (fromMaybe, mapMaybe) +import Data.Maybe (fromMaybe, mapMaybe, maybeToList) import Data.Char (toLower) data ListMarker = NoMarker @@ -394,7 +394,7 @@ writeDocx opts doc@(Pandoc meta _) = do linkrels -- styles - let newstyles = styleToOpenXml charStyles $ writerHighlightStyle opts + let newstyles = styleToOpenXml charStyles paraStyles $ writerHighlightStyle opts let styledoc' = styledoc{ elContent = modifyContent (elContent styledoc) } where modifyContent @@ -404,7 +404,7 @@ writeDocx opts doc@(Pandoc meta _) = do notTokStyle _ = True notStyle = (/= myName "style") . elName notTokId = maybe True (`notElem` tokStys) . getAttrStyleId - tokStys = map show $ enumFromTo KeywordTok NormalTok + tokStys = "SourceCode" : map show (enumFromTo KeywordTok NormalTok) let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' -- construct word/numbering.xml @@ -481,12 +481,13 @@ writeDocx opts doc@(Pandoc meta _) = do miscRelEntries ++ otherMediaEntries return $ fromArchive archive -styleToOpenXml :: CharStyleMap -> Style -> [Element] -styleToOpenXml (CharStyleMap m) style = parStyle : mapMaybe toStyle alltoktypes +styleToOpenXml :: CharStyleMap -> ParaStyleMap -> Style -> [Element] +styleToOpenXml (CharStyleMap csm) (ParaStyleMap psm) style = + maybeToList parStyle ++ mapMaybe toStyle alltoktypes where alltoktypes = enumFromTo KeywordTok NormalTok - toStyle toktype = - if M.member (map toLower $ show toktype) m then Nothing - else Just $ + styleExists m styleName = M.member (map toLower styleName) m + toStyle toktype | styleExists csm $ show toktype = Nothing + | otherwise = Just $ mknode "w:style" [("w:type","character"), ("w:customStyle","1"),("w:styleId",show toktype)] [ mknode "w:name" [("w:val",show toktype)] () @@ -508,7 +509,9 @@ styleToOpenXml (CharStyleMap m) style = parStyle : mapMaybe toStyle alltoktypes tokBg toktype = maybe "auto" (drop 1 . fromColor) $ (tokenBackground =<< lookup toktype tokStyles) `mplus` backgroundColor style - parStyle = mknode "w:style" [("w:type","paragraph"), + parStyle | styleExists psm "Source Code" = Nothing + | otherwise = Just $ + mknode "w:style" [("w:type","paragraph"), ("w:customStyle","1"),("w:styleId","SourceCode")] [ mknode "w:name" [("w:val","Source Code")] () , mknode "w:basedOn" [("w:val","Normal")] () From 47c70b91313dd5e907efd34d5a26d908b625c476 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Mon, 23 Feb 2015 02:05:32 +0300 Subject: [PATCH 05/16] Do not lookup custom styles --- src/Text/Pandoc/Writers/Docx.hs | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index ba8a28de0..f217dd9bc 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -646,6 +646,9 @@ pStyle sty (ParaStyleMap m) = mknode "w:pStyle" [("w:val",sty')] () where sty' = getStyleId sty m +pCustomStyle :: String -> Element +pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] () + pStyleM :: String -> WS XML.Element pStyleM = flip fmap (gets stParaStyles) . pStyle @@ -654,6 +657,9 @@ rStyle sty (CharStyleMap m) = mknode "w:rStyle" [("w:val",sty')] () where sty' = getStyleId sty m +rCustomStyle :: String -> Element +rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] () + rStyleM :: String -> WS XML.Element rStyleM = flip fmap (gets stCharStyles) . rStyle @@ -720,7 +726,7 @@ blockToOpenXML opts (BlockQuote blocks) = do setFirstPara return p blockToOpenXML opts (CodeBlock attrs str) = do - p <- withParaPropM (pStyleM "Source Code") (blockToOpenXML opts $ Para [Code attrs str]) + p <- withParaProp (pCustomStyle "SourceCode") (blockToOpenXML opts $ Para [Code attrs str]) setFirstPara return p blockToOpenXML _ HorizontalRule = do @@ -978,15 +984,14 @@ inlineToOpenXML opts (Math mathType str) = do Left _ -> inlinesToOpenXML opts (texMathToInlines mathType str) inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst inlineToOpenXML opts (Code attrs str) = do - rSM <- gets stCharStyles let unhighlighted = intercalate [br] `fmap` (mapM formattedString $ lines str) formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) toHlTok (toktype,tok) = mknode "w:r" [] [ mknode "w:rPr" [] - [ rStyle (show toktype) rSM ] + [ rCustomStyle (show toktype) ] , mknode "w:t" [("xml:space","preserve")] tok ] - withTextProp (rStyle "Verbatim Char" rSM) + withTextProp (rCustomStyle "VerbatimChar") $ if writerHighlight opts then case highlight formatOpenXML attrs str of Nothing -> unhighlighted @@ -995,9 +1000,8 @@ inlineToOpenXML opts (Code attrs str) = do inlineToOpenXML opts (Note bs) = do notes <- gets stFootnotes notenum <- getUniqueId - rSM <- gets stCharStyles let notemarker = mknode "w:r" [] - [ mknode "w:rPr" [] (rStyle "Footnote Ref" rSM) + [ mknode "w:rPr" [] (rCustomStyle "FootnoteRef") , mknode "w:footnoteRef" [] () ] let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : ils) : xs @@ -1014,15 +1018,15 @@ inlineToOpenXML opts (Note bs) = do let newnote = mknode "w:footnote" [("w:id", notenum)] $ contents modify $ \s -> s{ stFootnotes = newnote : notes } return [ mknode "w:r" [] - [ mknode "w:rPr" [] (rStyle "Footnote Ref" rSM) + [ mknode "w:rPr" [] (rCustomStyle "FootnoteRef") , mknode "w:footnoteReference" [("w:id", notenum)] () ] ] -- internal link: inlineToOpenXML opts (Link txt ('#':xs,_)) = do - contents <- withTextPropM (rStyleM "Link") $ inlinesToOpenXML opts txt + contents <- withTextProp (rCustomStyle "Link") $ inlinesToOpenXML opts txt return [ mknode "w:hyperlink" [("w:anchor",xs)] contents ] -- external link: inlineToOpenXML opts (Link txt (src,_)) = do - contents <- withTextPropM (rStyleM "Link") $ inlinesToOpenXML opts txt + contents <- withTextProp (rCustomStyle "Link") $ inlinesToOpenXML opts txt extlinks <- gets stExternalLinks id' <- case M.lookup src extlinks of Just i -> return i From 908a47e4b9c9ad5cc1fd2f5e551ef0fd98d93178 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Tue, 24 Feb 2015 02:31:14 +0300 Subject: [PATCH 06/16] Treat some ambiguous styles as custom for now * Author * Abstract * Compact * ImageCaption * TableCaption * DefinitionTerm * Definition * FirstParagraph --- docxstyles.txt | 44 +++++++++++++++++++++++++++++++++ src/Text/Pandoc/Writers/Docx.hs | 21 ++++++++-------- 2 files changed, 54 insertions(+), 11 deletions(-) create mode 100644 docxstyles.txt diff --git a/docxstyles.txt b/docxstyles.txt new file mode 100644 index 000000000..6bc405d15 --- /dev/null +++ b/docxstyles.txt @@ -0,0 +1,44 @@ +| Name | Id | custom | word | type | alt | +|:-----------------------|:---------------------|:------:|:----:|:----:|:---------------------------------| +| Title | Title | | + | p | | +| Subtitle | Subtitle | | + | p | | +| Author | Author | ? | | p | ? | +| Date | Date | | + | p | | +| Abstract | Abstract | ? | | p | ? | +| Bibliography | Bibliography | | + | p | | +| Heading 1 | Heading1 | | + | p | | +| Heading 2 | Heading2 | | + | p | | +| Heading 3 | Heading3 | | + | p | | +| Heading 4 | Heading4 | | + | p | | +| Heading 5 | Heading5 | | + | p | | +| Compact | Compact | ? | | p | ? | +| Image Caption | ImageCaption | | | p | caption | +| First Paragraph | FirstParagraph | | ??? | p | | +| Body Text | BodyText | | + | p | | +| Block Quote | BlockQuote | | | p | Intense Quote, Block Text, Quote | +| Source Code | SourceCode | + | | p | | +| Table Caption | TableCaption | | | p | caption | +| Definition Term | DefinitionTerm | ? | | p | ? | +| Definition | Definition | ? | | p | ? | +| Verbatim Char | VerbatimChar | + | | c | | +| Footnote Ref | FootnoteRef | + | | c | footnote reference | +| Footnote Text | FootnoteText | | + | p | | +| Link | Link | + | | c | Hyperlink | +| Normal | Normal | | d | p | | +| Default Paragraph Font | DefaultParagraphFont | | d | c | | +| Normal Table | TableNormal | | d | t | | +| Body Text Char | BodyTextChar | + | | c | | + + +| Name | ambiguous | type | can be replaced by | +|:----------------|:---------:|:----:|:---------------------------------| +| Author | yes | p | ? | +| Abstract | yes | p | ? | +| Compact | yes | p | ? | +| Image Caption | yes | p | caption | +| Block Quote | yes | p | Intense Quote, Block Text, Quote | +| Table Caption | yes | p | caption | +| Definition Term | yes | p | ? | +| Definition | yes | p | ? | +| Link | no | c | Hyperlink | +| Footnote Ref | no | c | footnote reference | diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index f217dd9bc..9984c243f 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -619,12 +619,12 @@ writeOpenXML opts (Pandoc meta blocks) = do _ -> [] title <- withParaPropM (pStyleM "Title") $ blocksToOpenXML opts [Para tit | not (null tit)] subtitle <- withParaPropM (pStyleM "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')] - authors <- withParaPropM (pStyleM "Author") $ blocksToOpenXML opts $ + authors <- withParaProp (pCustomStyle "Author") $ blocksToOpenXML opts $ map Para auths date <- withParaPropM (pStyleM "Date") $ blocksToOpenXML opts [Para dat | not (null dat)] abstract <- if null abstract' then return [] - else withParaPropM (pStyleM "Abstract") $ blocksToOpenXML opts abstract' + else withParaProp (pCustomStyle "Abstract") $ blocksToOpenXML opts abstract' let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs convertSpace xs = xs @@ -693,14 +693,14 @@ blockToOpenXML opts (Header lev (ident,_,_) lst) = do ,("w:name",bookmarkName)] () let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () return [mknode "w:p" [] (paraProps ++ [bookmarkStart, bookmarkEnd] ++ contents)] -blockToOpenXML opts (Plain lst) = withParaPropM (pStyleM "Compact") +blockToOpenXML opts (Plain lst) = withParaProp (pCustomStyle "Compact") $ blockToOpenXML opts (Para lst) -- title beginning with fig: indicates that the image is a figure blockToOpenXML opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = do setFirstPara paraProps <- getParaProps False contents <- inlinesToOpenXML opts [Image alt (src,tit)] - captionNode <- withParaPropM (pStyleM "Image Caption") + captionNode <- withParaProp (pCustomStyle "ImageCaption") $ blockToOpenXML opts (Para alt) return $ mknode "w:p" [] (paraProps ++ contents) : captionNode -- fixDisplayMath sometimes produces a Para [] as artifact @@ -712,8 +712,8 @@ blockToOpenXML opts (Para lst) = do _ -> False pSM <- gets stParaStyles let paraProps' = case paraProps of - [] | isFirstPara -> [mknode "w:pPr" [] [(pStyle "First Paragraph" pSM)]] - [] -> [mknode "w:pPr" [] [(pStyle "Body Text" pSM)]] + [] | isFirstPara -> [mknode "w:pPr" [] [pCustomStyle "FirstParagraph"]] + [] -> [mknode "w:pPr" [] [pStyle "Body Text" pSM]] ps -> ps modify $ \s -> s { stFirstPara = False } contents <- inlinesToOpenXML opts lst @@ -741,7 +741,7 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do let captionStr = stringify caption caption' <- if null caption then return [] - else withParaPropM (pStyleM "Table Caption") + else withParaProp (pCustomStyle "TableCaption") $ blockToOpenXML opts (Para caption) let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] () let cellToOpenXML (al, cell) = withParaProp (alignmentFor al) @@ -752,8 +752,7 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do [ mknode "w:tcBorders" [] $ mknode "w:bottom" [("w:val","single")] () , mknode "w:vAlign" [("w:val","bottom")] () ] - let emptyCell = [mknode "w:p" [] [mknode "w:pPr" [] - [mknode "w:pStyle" [("w:val","Compact")] ()]]] + let emptyCell = [mknode "w:p" [] [pCustomStyle "Compact"]] let mkcell border contents = mknode "w:tc" [] $ [ borderProps | border ] ++ if null contents @@ -801,9 +800,9 @@ blockToOpenXML opts (DefinitionList items) = do definitionListItemToOpenXML :: WriterOptions -> ([Inline],[[Block]]) -> WS [Element] definitionListItemToOpenXML opts (term,defs) = do - term' <- withParaPropM (pStyleM "Definition Term") + term' <- withParaProp (pCustomStyle "DefinitionTerm") $ blockToOpenXML opts (Para term) - defs' <- withParaPropM (pStyleM "Definition") + defs' <- withParaProp (pCustomStyle "Definition") $ concat `fmap` mapM (blocksToOpenXML opts) defs return $ term' ++ defs' From ba153585db4b124ed86245c04e6275c6ed0c4049 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Tue, 24 Feb 2015 02:34:46 +0300 Subject: [PATCH 07/16] Comment out unused functions to make CI happy --- src/Text/Pandoc/Writers/Docx.hs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 9984c243f..ebd060d38 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -652,16 +652,16 @@ pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] () pStyleM :: String -> WS XML.Element pStyleM = flip fmap (gets stParaStyles) . pStyle -rStyle :: String -> CharStyleMap -> Element -rStyle sty (CharStyleMap m) = mknode "w:rStyle" [("w:val",sty')] () - where - sty' = getStyleId sty m +-- rStyle :: String -> CharStyleMap -> Element +-- rStyle sty (CharStyleMap m) = mknode "w:rStyle" [("w:val",sty')] () +-- where +-- sty' = getStyleId sty m rCustomStyle :: String -> Element rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] () -rStyleM :: String -> WS XML.Element -rStyleM = flip fmap (gets stCharStyles) . rStyle +-- rStyleM :: String -> WS XML.Element +-- rStyleM = flip fmap (gets stCharStyles) . rStyle getUniqueId :: MonadIO m => m String -- the + 20 is to ensure that there are no clashes with the rIds @@ -866,8 +866,8 @@ withTextProp d p = do popTextProp return res -withTextPropM :: WS Element -> WS a -> WS a -withTextPropM = (. flip withTextProp) . (>>=) +-- withTextPropM :: WS Element -> WS a -> WS a +-- withTextPropM = (. flip withTextProp) . (>>=) getParaProps :: Bool -> WS [Element] getParaProps displayMathPara = do From 542e19f6ce022b0df4fdd9a5ed7e6538db887949 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Tue, 24 Feb 2015 02:35:00 +0300 Subject: [PATCH 08/16] Remove accidentally added file --- docxstyles.txt | 44 -------------------------------------------- 1 file changed, 44 deletions(-) delete mode 100644 docxstyles.txt diff --git a/docxstyles.txt b/docxstyles.txt deleted file mode 100644 index 6bc405d15..000000000 --- a/docxstyles.txt +++ /dev/null @@ -1,44 +0,0 @@ -| Name | Id | custom | word | type | alt | -|:-----------------------|:---------------------|:------:|:----:|:----:|:---------------------------------| -| Title | Title | | + | p | | -| Subtitle | Subtitle | | + | p | | -| Author | Author | ? | | p | ? | -| Date | Date | | + | p | | -| Abstract | Abstract | ? | | p | ? | -| Bibliography | Bibliography | | + | p | | -| Heading 1 | Heading1 | | + | p | | -| Heading 2 | Heading2 | | + | p | | -| Heading 3 | Heading3 | | + | p | | -| Heading 4 | Heading4 | | + | p | | -| Heading 5 | Heading5 | | + | p | | -| Compact | Compact | ? | | p | ? | -| Image Caption | ImageCaption | | | p | caption | -| First Paragraph | FirstParagraph | | ??? | p | | -| Body Text | BodyText | | + | p | | -| Block Quote | BlockQuote | | | p | Intense Quote, Block Text, Quote | -| Source Code | SourceCode | + | | p | | -| Table Caption | TableCaption | | | p | caption | -| Definition Term | DefinitionTerm | ? | | p | ? | -| Definition | Definition | ? | | p | ? | -| Verbatim Char | VerbatimChar | + | | c | | -| Footnote Ref | FootnoteRef | + | | c | footnote reference | -| Footnote Text | FootnoteText | | + | p | | -| Link | Link | + | | c | Hyperlink | -| Normal | Normal | | d | p | | -| Default Paragraph Font | DefaultParagraphFont | | d | c | | -| Normal Table | TableNormal | | d | t | | -| Body Text Char | BodyTextChar | + | | c | | - - -| Name | ambiguous | type | can be replaced by | -|:----------------|:---------:|:----:|:---------------------------------| -| Author | yes | p | ? | -| Abstract | yes | p | ? | -| Compact | yes | p | ? | -| Image Caption | yes | p | caption | -| Block Quote | yes | p | Intense Quote, Block Text, Quote | -| Table Caption | yes | p | caption | -| Definition Term | yes | p | ? | -| Definition | yes | p | ? | -| Link | no | c | Hyperlink | -| Footnote Ref | no | c | footnote reference | From 1cb601d2885df09fe07533006c06d8a603c3020d Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Sun, 1 Mar 2015 18:15:39 +0300 Subject: [PATCH 09/16] Update reference.docx Following styles are set as custom: * Author * Abstract * Compact * Image Caption * Table Caption * Definition Term * Definition * First Paragraph Following styles are renamed to correspond with Word Normal.dotm * Block Quote -> Block Text * Link -> Hyperlink * Footnote Ref -> Footnote Reference Following styles added: * Caption Following styles' inheritance changed: * Image Caption <- Caption * Table Caption <- Caption --- data/reference.docx | Bin 8474 -> 29139 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/data/reference.docx b/data/reference.docx index c2e5075b11253700e3bcdf65e739f7dfd4c3f00b..d0025ebe90c72b03300c624be30c1c0584797f4a 100644 GIT binary patch literal 29139 zcmeG_33yXg)?rhmsK9^=!Vm(oC~cZHU1^J5DbPY8X<0^KUXs`3(U-jN-b)w7QHmgo z!`K4quONybGe06Ci-Mve2#kOtA}XSSASmzy4h|rQ{O8`cCokz@k#PjRK;FIQo_o)7 z&pCHFc|}87wCrdw7+M<+EZdl~?#-K%$2Bt;?!3-m_=BN~;ZNC&k0*Wnczczf^S10_RHuq^Vv4}rKSU~$1dLbYD~vTQL~2Kvv`zaZ{hu$ zKbp6swkUaBqP4cTIwSc8OYJise%9^#zjSy#F{<^R`yZHHvVNiI%YKhmvK{g-UcZy$ zEd!%JIyJg>)4lzy_g+4*obTOg=LgkiYUX_YZrh?S7B`-0#UU= zk}@po`A*L46CG|J(A;1^pC}O3c$TC&a}4?yizsXBEF1a=BKo!kh{y$UP8$F+G?tSr zM=`#1lO-nBWF&n~#zpzu>84_PUUaf)Kt`Li5hP9U0K$1FKWBtCJ}%wl;dy_m*~~dT zq?h1g7(eNQno@@K5?A zVtI<>lrh0@ilCS?6MlchA;s5!Na49}_L4l|B6z~=WLPrtfDnW{BC|#jGLQ}m=HeWf zi*Fydbclk<{~_pLd(goUK4VRc14Ds6LB?W1ZQ;<(v2CZG<7VDw$f0%s;b2G*08mQF8qt&6Pr~{T}vEYvPQ3U?r7ik?1ozZ7kA`87TdG+7g61JnKo~{^z8g$R{GQYwflyz8(zE1 z*^Qm?!oEKD=gthkuWG$}?C>+A_Qniew!S#2 zk1&;HO#;p(@*)m^J@?D-Q+RZB|)@KofgFum0}tW5Pk0CUkNh{0<<8a8Z`@wV$u zU(9G#b?Wi*W7h5a#y9` z-ZFH|ydqahtbO^(UI}wgmkjtS?VUHr{NFhLEr(zDbmEwPT-n@cqUV^k^FJ@o=}W(G zt3BqsH8s`yceS0={{LRT?XeYSCT%L*-|XY!-phtX{k(Mexl4P#p5yylhfbG<@A!Cl zqGx`$PoKMcW0&>KM@8>=$Xgq%Vw2Jvx}3+l=VI^9HCl1dX(90a!76{+3;%~J(rIY}eWApzl18kLG* zbU6)m6#OUS40!Xrywt!EplOms{x1OmyG-!i z&w#rG*9vaNtiVw|L=(c3Zn7prbw#R&@VT+;#m@pb#tzM3=m@)-5R1T^Uzw1kxQL`! zz}q*1mbj}zi^Xb6PJjHZ7?y`t3ymOniAn>Onf{dw!y?C zToqzqtPzQ^#x`pHCtej|tnrZz0NAejNd}CX%Jw7U2KEv}3jZS0FIYDf#30!R$jk&# zY*XQX!PQBA5q$gJS|YE@k^HGtUxNXCZkGHa$5+vUi^ooJbIck0{8v7=H*#%Mkt*O5 zJtA8lkEmUm=qz4LP32$P7>&SJh!k_kQUY8hmjNZXj4fitGNDK+l%9m=JXrE`AQOUf zQj{H5dayY2Qa*;w&jhE8CLl?0d?rT`x|JRTrCa0VconuR$_2>Gn!dUF06TX1aj*~z zuFFQF3R6pi8pdyxHScU%BNf|5?134~0jksFXtAhYG4D-RTv1)}5i>7>K zQmuqP+e5Ij{s_t;=>cMttOX7dHb+267QxtyN`aq5rO>KGBql_bfHT1HjMs*=Bv=Z0 z6wC2Nvbbc1s-=Npj0#YA21zVXlkZc7&U*FXCEIACvKcRMIgFsC%7#&)8D;SF))i|! z%no^~qlZwTYV9B+Q+*pG8y%|4NYXzP$yc#r6{)DOfbx-? zrZiOV#YK)}428&BG?>7Q3G&*?2_}wIO(~6`R2k<8E{d2C!9}6UT+wYInQ6R~rlsPg zP;ro#f-yTZBs;(YE0!P9>pVQ3Cau$qitEYzlKA#?dDi z*Zfxy8%f^cMwvbfGNoD-8-iwKXgKpLDn+P2h!@t0;LdElwogn4-q7ORp-RZo(KONPU<95Ai)&fcJ{=0zd&k z%vDxvvU8Od)4J+C0k+#ShXGIk?yn-s4MZ+;mgu?_>UDs)X-e!1R z3-CJDN{zOpp{sF=W;KqV)RC2NTcs489-zHmd@>{jHUGg;W{|om@u6yaAXIIX5c?Ne zwmk7dS$NHnAnH<{$WB;_R1Tkv7*?_&sCW4oQ&p%p4@2(3;&Ycpijriosnw&`asW)m4daz+s1pF#%jQNz;Xd_7Iw9{Lm&+TihC5LlfXE zu@IK(_ktlE3=e0E^})$fZcp7vk(h$VE|v*=h%86V$#aCwQ&23*N1;pXc9QjmRX!00 zW+MQp6s$ozsY+jJ6!Pv7W@Dpvu|jqL5N~9>FgfIV3xcR*0sySx8!`3bW?h_Ab088E z8z5zQV>X-cBZ2AfRN=nw>U-NxWsAVJEm>jG80 z)>A!EcOrATzOs>ZhfbPh8?n{km92eWGTr)hEtehL7 z(Y`ore4mGQ zszgsUsLLr-%FvY@+|m`ZO)DxYVk+YCFyK_oVjfjkV8aQh(Ugzl32fQZT+UQ{eF?6H zD_`N8lO+kBfn*(2tUQFUGf1Y*TtoBrfH^{+c50c5$(l_e(qLK21V>1()w@Lwck@EC zOa)Pv!;Rd!b9VKRN=jUUQmi=aWYR{$tmZP3?Ev5)cb?YfuwA z>Gh*4+Zn=cL5z#xy=i8-6oCM{Y@{=Q?rLX1r|L(Hf#C_-PI<|UL}(~Kpemb_pvi17 z0uq%<0jgF8m0?XvP!AGCvT5>W3`jA}ta=hE-H7+79DQj6Uu0twG&|DgwOM_ktHFRi zoum{Ce3#LT|Ftw#_8}o$oD^+c<2?zw>6~sFhV#dbc61jL(r85FxuRV|1x}j9riC=B zTVPmMnFT>`Ra`dB8u3jL@K`RULfC(UD3^x{*ssBVke5g^6RBb=nh(?1WRx4`;XhJC zv4yKF4)@Bs9dfspJZnl4oQ+F8U7A+7B9z3QG8D4QY@Zn`q6%>j9+!h7uYigfy^bWoBpN zTi8ki3?(#33~7*;l#*x-YJj0UD6|d%N21NklQ=KKlrnU_E<9vK;VdtKrwSO5FKZg= zq!ji>!|9zDT^d5`Dg=mrPQhnHZnP9E5v-;NnsTr>jRtB-LRE!F3H@-PWvDx&upCJ@ z4fYd0lM3e48_Q3;O-Q{>;a!UO#f^^&4CdFQLfv4 zHsRBVIyc}4lS}C$&Gw*E1f};PNaW(d0r^NC_9RG_flX6A{*I<74SyAXKAJLS5wPp4 z*UC;gG^(Yv%BLVviKx;YBZwZ#l`$5U7_%5!w}TwATnh@*1QMWhRM1Y+8g33G0zBc> zMQSI|`b+7*0M?DV1GQDLPBmaI%Cd*3i)M_uE=a}KZ8VHUR4dqrfK3Ub0flrG?2xNL z;Wtgnbg>o?45J3kVagylfs`FRUf3ufEQA%jgSax#Si)EYv5y{nA)}o$=oth;_`*xR zghHwQN*DE*@x^eKp>Com3#ti#+6s1EW1(@PWO#G!1#2yl)Rg8LtmBdS3-oyPA8e>I zffu@SBiKBL1q?RI*$~T9zYUw5Lj-(9B+~m(_jJu9!(Nr~nn{+zyx)gO4m->1ZHQ5l zx?p=D!bTC15LDyv&l@(vE~RMNhVo_$I2>fdkp<<-gF@5}@5PYv+RN{Ouae={079+^ zZe(N`9`6-+2(;OSe$jwPb#o$aH)j-o873@pQ~^;wFRU>W(3(TE5gfr6GA^MUQfH(J zpj93vUUa(QH)&9xLRaw!1s;VAC7=*!(RLS~Bw+=ljp`0=5jCJc3m%+c0yX_9bnB7_ zg)&beTronF1H4Xz{Xx_@#QCsIf`3m!^wE-ZFk`Naq(s6+T2M}>z-Dc^il-JVLs^;7 zmL^g(9T7H+rwZ#phV>W+RuDnQehIH-3<|FSUGU_hed?)3!x34!DJ3BeE_yl%*vc-2 z6)t@B`ayUc3i1fr4ZmIB$5mQ`E1Ej>h8czv$_d)7d{Q~`btnEU_b)Wr3FQwrc96=2tc76?D%2t$B6y`r%16*^fMtqEAp~- zW&~JTBmCGQ!Up;S3LcGY9MW#N(1Cx81{Vrig>VTyiD41KcZK9GjwXZw^$%tn$Z;jY z(u!C+Gn3IYmO?lFG>~(T^XhG*`DJ z(owOk;6>%u=yxFNYArXxeS;;JTjL!!Jq&P!Cb@UC7Q}s{`6W<}zDW$DeZP=%^`QVW zI)rn&E3+V;hv%HIx{NHbMsSX=tb=nR!+?jOY6#a6SOV5DHoTw#ha@@{tieGC$@lNi zPkr+Sg8_Y1H@|#5zMtAur^pp}FF^be^CrG&1@5};3l`doMA~1&ZPTkqN`JVLyOOy}-diirgd^pcGCV z=qjlmP9R{v{@lKBfP)@*(t(5zq8X5=jQAKKibHLp&Wh}VF5>gU}ORynF_d=mn zycWiZ@nBy8$Pf&aXz++bL^lc%+XR|}c^JIIui;E6Gvi_tW8=}0HhkjXn>Cmd!OVn_ zX<2pUm>DlJO2Y)qI)Ry(=aHBx3I%3$Bbk}az-<+h#iyNhFfs|4mjWz|!;G|EW29zg zkA$-|^y@3=N|kB`1?wZyK@0vQgGMIDCZP-+t-1y4V_Y!6mD5K&DPiJ<)5j}6xw8r3 zBrPHn)Cbf8bqz)$L;fahUwDlSjLtM3RYEcIZv%tAnfq~NB z->)7@KAXdlpztP4Z&u?wu?-T2iE_{Od2<^6W_5q`J% ztn}KU1tnv-MX?_jBvUuS;HcsYhkm^@HR8XK2?``vPMg8*%$8J|XbsxF@_{t=q8fsl$`6bGF+P z-End1V>b7)-S>Qy(0bA5)x+;xQ&8~q;P%^k5*^>`w$%Rjf2>OE6_vVR1RourUo&KJ zhs_6uFHar#&#c|cAKJsOf2se8J_k1cx$?f9ao!CxF8_Pl;eDx}Eg0hH{o0!C>F26> z?jK(do;av-s?k*EQ;(Jz;DT%NY=p5ZUP zv*&i_-}>Kqy{S5Re_0DhU)%F{H$PN6$TPj!++%H%2NbmY=APK~X`}j@S`P2z2<$tN zk~XK~z3Q%qZf@6RR9ax)IO%TXqR-#>UhIXxrhIgJ?~D7+E!z9l!NUC`KfbVdjYICXJW{7-KrotV94 z>5s2JlXU*GvK@5st{JUoF0qfDa%kmiv!f^42l)EV8gzVIjdj|EPNwO5O1F$CyQP_L zs`;~ooA6@U%4uUiaBS#& z{7)~Acea>y;{@8Xl9;^w+-rL$CNQV_6WyZHYUXe6^7SK6U2d7MH>zmb^X*RN+E+|J z{?yTer`yHt?~_-u{im&y&AmSRIpMW+7oxOz2~)e+-`@&9ijF=VrCYicq{9|GtA63T zPJCQ!stT=mSrGb>Ia-g}yyN!Mk?y zrkg~(cmqlFNq`i+OCaby0>S*yCb%g`7f%i(i%2z`z>*BBAPN(-nMZ28*8T>)Jfu2! zAih~noBX0i+d5r^R!)VfA69MBfEsJBdqgJSEh0?{LH$r`w~^GSnQ=0IHJeJ2@P-RY z1iZ*ptesrtk(5DWOaZMAr&a7l4Mt<;1UyI76Ld;yjay3l)n;@wB9A?cAbFW>p=@bxx3N> z>WQ*M7%)P`59QeafY)(jw92*~^alj;{7R{w~fvkA_?$ z;Dm2?1MxG?Z+9p;>Q9@NYJ6;#KL&w62q^y{)a=-@PWy@*%hgy@sLtly%ypZMUsbW^1)l#i#fixLG(cZS2`#&#EPMEZrtu z0tykQFyj@hfXQfuH#g4H-qgD_KGa zmEaG|cm5bU$t2ZPl@{nm zflJxp0-yLXPs=iw`OFwKQ+dlX`9;oQ-Ct!z`Exsk-QInlg)Hd5k2*Q=HEV5B@a(B( zA{|*ieOj_K%h3a)W=kUx==SMM;Ow;K+taebGxB5$g&pWTxx+JD%<(8gZJuP}sm!2+ zA{r7pz}x*I;s=_bx3769!#^-s9$FYHEj79Z2jmRw{qcy^a%PkL=&)Cli*Qbt+Pn5c?D1l%!O3XPKKC)0_gvzKB zd98&Aa|*Y-GrDTdsQ=-J?0A0CDkBkjm{(dIochd=^+9__1pEhwkI}6qipU-_Yawih z0-0$lw)5Gw?H7+t*W%^~bhsBPtaO480q|$)$`qR)nWL@vsRt{%;Pe$Wa5~sfSyu>) z4i9fAcZ*n~_f3Odb~#!tubeZ_yVWpt9lY1$jUx5QE8{oB7&l0(3Qcy6sAXAOb9!7> zFOVJLlf^|h_QsQ-wJ!c0Rt%1^ExVXR?UD zH7_m^#wmpkDI;C;%5lV9I|lp(?sV4+&0yC~j>e$$68I?0%ie z-bkh<=}5!hu$*4iyRZkhtkufOnh)155L=qNKR&>gu{y13r_l!;e~7^mkOM@?%SX(4 zR!Ppy;g1T72fZ9l1n+*mAwwt8AHlN|W#%hne4kcT#ewLnj%qPPh-50=vHB@`c2T8i zistFTg|X9t+<~9D!tDe-O`LPVFBvZi8$-HJ#Ek=Uwv&ZgJ_8_*(5P9m=*3-`mnLh> z^ZU+>rBRrP6-B!PfKERG>kC#k^h5&#NcpjGX7H?@jrc37(M)_Y45-F!)kX!wbl6 zVI(uYmtOQ#j$vWRqfQJ|ITcl? zTHhS}07@?8Z;#dCNV?%wYTsMV!4d$Wb)`q7wB41j;Hi)`T>Rx{V-(Cb>T{9P7N z5pGZ*ceL|CHvi(ip1pDEC0B6x5x9FpDK>fV-jY-ZH#LD|b0k`!_bLLA zFsHYCK%TGyJ6iJJP_Qlz>wFu?D zC)uunBGx>6UJQoH%fU0tLoL(GY6Bn1p0fc4tj?CN#oV@U4$?TU#P1nFXA=r?1s|^o zo+;=@m<>{nskL-cV`03l2`k8Z_wZBqfw>i)Zt&xjIqdU5N1?(tL29ja>G7QQDOd*C z@QE&O8Ex-w-7CqMqO`~V)Smwew$M4B_37*9kGqxz(CgL4&05P7aC7(KwRZEg{V5Av zQq>XNGKA5)KjQh1g?I27OLyiao-vd!`1Eyutc?VdzD2e>0c^7}J(qXaSQ;ub7&4kwsBMQmw%np^(UiTJ2?4OTsbcnRF! zIW14?KdW&|;TdSj+ArI{>~?0j#pdp)?<*F=Vy)^kGN1?BNp%E*wrzWbTL(pwwB47- zOo@4#dV)gUFEB-XEJ*2cFd|AGIKD8~-MKaQIPc4FXo!^o*+q|XZucsly}aV&Unr>H z14SkmjQ1VjqFpW88jrfo=cw8n!IbCfBPTCCQoHxVn+h4XIU?6#2cC4V*&ik z6@|o^22wiRtskV0(MfDc7UI-eZlf!jVTiJw_nr2RP9Gygl@}+smkd61358_weTAL` z)QY|o&7pf%oL}!$Oh_A|_?~r6rZq!yQE#xG^6Jk1jcZbXe+gH~NBGTD*P8W=76iI= zP0HW8)!V_=#g_N)neQhvJI3m6Gs2V|yM~bGZH>;HKH=hu+mk!vf88W$lTT?@Ksd8$ zGi)+<+}!ba9cnpO{|0}%^-X<^48it?%WP-bR-z4aO2vK>Tiqm7>rA{5fTLWJw|=>X z_|OrwD$2M3;Y)U$Z9ym8Bq*3HmP(GO^$8SgA8Dv{4soY)jfE)Us(25ISWMtYsR%pg z1bW^+ND!252ofpe3YQ}?wm2uGQ)P!XUo6=It!CkLm%t*x6{^lqyEdc?fFDhuX<@VMv#?hMX3k#^W z{CKSVp~#&z!mEOcST0J6d-juNubPn$9PDmqhDCH1NV}`W;e#bLAAaEqn|YJ`k+bAM z`L>7g!&i&K(*#)R@=kH}^Ir~U4>=0;Nv08z8rC&0$Er!DSvrwNKw;b1Zk;}!tE!PH zBOz~1T?+*+{w)$3dallogPH6ca8k8es(sqHU9uLfF6WvL{NRT2su#HP#m;PCjsdL& z%2?xY))CZGDQ5q{`VSxx$S1Q^FFNx)#2EN1A3C=7P#a?vgPo5z|8nnOiqzfnYP0_` zUz1Kk^w||E&i7_75T+5hKerA@o?%^_uFOynzp940=kCp<7Fzbk?Ir;oC0Mt(Ehmv^qNN&tCk|uvYq$3d zVXPL!`j`PR{k;|nX1ViKlrQNUD_*m%9KtIZ{Mc9|-wh@MNW-mWqkGE*O42!7uO)Gb zleW$YD(LQ!8qZ9!HPaWxSG-WLqsL}b!YJ9@0L!t?BNI9I=Q`<%(HdA!6J&|4yeyRb zxdyA9MAPxIsa!HD>J%K-i8i#Y^y-s{sstYzk*#uKr5%WT{Z#6kADqPg{(hSWBs+m6SitsMhuP)`G}(NtmT9VH z(bQq0@vhX>+UVW!@-`c;L?`P8$q5XK9M61y`d8GFKmfJIGBRWQ@;vjRgXNW5f1S}P zr^r&{@$^}t30~p@%}LUGeH~d)?})0}q0+e@S{jvMjJNKbF=wQB1>drGUGNY)F+QaM z_gFqlMKW0jx_96AwXEE;c2%R6uCX36#PJx~be5HcCZ2Z7{#(sEp(aR)5_m$tMF(0{ z>4bcU1bj52?`!8Tk8gOH*9?zaaZF%`ZXRR5HxUp)1*S4cCx!4>=H<3!v6mH=aYRb- z4>?JR*R+x99w+*?7aECVTD$P_@{kxRFO)mgyilLOpu6{cT!HS%igkucn8XfSDwfB@ zfx_C#Cc>+8^Dz6&^R{cv@GTa+Wh?3fcyWbc4?w};u_^b_rEFa2aFLISIBK)DQ~;c`S8!&{~JAtT$JQJ_~wizT|k_HrQJ3$JQcQ9PZ%VQjR z;@1}xu#$d|tV=nF+P+{2$QD6xh-Isu^B5lL;c93Zm>Z@>Ub1U=!#DF-E zyJH^Tvtnd2H+qS%z0_4M(Lm0B-q_Sh@UP-oDr1uXx{>m)=X95@889+^(NI46K~vv5 zNR#7preP6#znwp$^3)rifgn0vKp0?^aDoO1t_A0H2RdNX0}`6Ma`(TPhrd$?LU?u4 zu?s9(4J_6oJ>A$Rhi*;YunoD(HMuozpDrUfM+F=39TiCkO*;`{YKzBN>!WfCSnaDx zZ}3vkpvfnBCSlQ9K{(oAQ7?TJfH3glQPtmD`}}*g51dsI&$+Jl9eDrpL+spKz4a`u zP+#4#ft^Q4%?LPI^zJdIih8>jSu|>Ka!J{DGwQuR(uhjk0Wr-#`|9|07Ex;~-~PKa zL3X##d3h3IWx4M^2iUokf4CP95tC2k;;?#VWkp*wvAXn5`P-C*33eQrlIa4uxE4$; zsULG@+(S)lRQ`!B*eOmAO?jg4WXg({uvwf)0BA%tAQRuV7E$rJr)wD3YLnJns5a|V z>et%bYl;5k-{3AuzPd99u}X*R*Xd1In|Sice(f3^Rz6{u)EfX z0yyu=cjCT?hL}3`IBMii-*qmd;-M;=eppm`$R!31h9f0zq$AoT>9vHUulw_d#QqC9mEZ6?$zgV#?7MoUN}( zRV#NfskGIRSJ}-A`HaneeMWv4B7jYHPRGEr^}yQ+Z6Aw_^-FcyMFbNct_wltfFyV6 zk4?l)0S<p=!WrFf77$mF|3U2urPIl9YJ)0qtte*jhAU@UnJ zzSqc{>Fl=BLXtczCo3!V?Yok$Fz;?Rei~P_+E*{`8c7cKXVl)ab4Te24ldkWeJW#d*1e&=M0uyhlIC$NCZu6ISKJ|3Z+?>D%e~lL z+NT~wZBBmBEyJWq%dH`N`>iRj5fdDGv&vU@52;a$&Pbu3q5fBh|LL4wKgU0xs_Rqw zFCP_^;KG4XkAZ4)(Z|5XLJ_%&v;BLY-;Kxr?(uUqy79LSsf7Sv_d36RShMzbWAGOQ z<63t8f1B_po(_}j`fGo)e~JJ9X#ZbuVd8(m|LFv@5H7zb z@lTcd3*L9#N&HXE`ey`x{vrL^f{X&S;7*1j^x(kwOo Date: Sun, 1 Mar 2015 18:49:44 +0300 Subject: [PATCH 10/16] Update Docx writer for 1cb601d reference.docx --- src/Text/Pandoc/Writers/Docx.hs | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index ebd060d38..eb7fa344b 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -652,16 +652,16 @@ pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] () pStyleM :: String -> WS XML.Element pStyleM = flip fmap (gets stParaStyles) . pStyle --- rStyle :: String -> CharStyleMap -> Element --- rStyle sty (CharStyleMap m) = mknode "w:rStyle" [("w:val",sty')] () --- where --- sty' = getStyleId sty m +rStyle :: String -> CharStyleMap -> Element +rStyle sty (CharStyleMap m) = mknode "w:rStyle" [("w:val",sty')] () + where + sty' = getStyleId sty m rCustomStyle :: String -> Element rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] () --- rStyleM :: String -> WS XML.Element --- rStyleM = flip fmap (gets stCharStyles) . rStyle +rStyleM :: String -> WS XML.Element +rStyleM = flip fmap (gets stCharStyles) . rStyle getUniqueId :: MonadIO m => m String -- the + 20 is to ensure that there are no clashes with the rIds @@ -722,7 +722,7 @@ blockToOpenXML _ (RawBlock format str) | format == Format "openxml" = return [ x | Elem x <- parseXML str ] | otherwise = return [] blockToOpenXML opts (BlockQuote blocks) = do - p <- withParaPropM (pStyleM "Block Quote") $ blocksToOpenXML opts blocks + p <- withParaPropM (pStyleM "Block Text") $ blocksToOpenXML opts blocks setFirstPara return p blockToOpenXML opts (CodeBlock attrs str) = do @@ -866,8 +866,8 @@ withTextProp d p = do popTextProp return res --- withTextPropM :: WS Element -> WS a -> WS a --- withTextPropM = (. flip withTextProp) . (>>=) +withTextPropM :: WS Element -> WS a -> WS a +withTextPropM = (. flip withTextProp) . (>>=) getParaProps :: Bool -> WS [Element] getParaProps displayMathPara = do @@ -999,8 +999,9 @@ inlineToOpenXML opts (Code attrs str) = do inlineToOpenXML opts (Note bs) = do notes <- gets stFootnotes notenum <- getUniqueId + footnoteStyle <- rStyleM "Footnote Reference" let notemarker = mknode "w:r" [] - [ mknode "w:rPr" [] (rCustomStyle "FootnoteRef") + [ mknode "w:rPr" [] footnoteStyle , mknode "w:footnoteRef" [] () ] let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : ils) : xs @@ -1017,15 +1018,15 @@ inlineToOpenXML opts (Note bs) = do let newnote = mknode "w:footnote" [("w:id", notenum)] $ contents modify $ \s -> s{ stFootnotes = newnote : notes } return [ mknode "w:r" [] - [ mknode "w:rPr" [] (rCustomStyle "FootnoteRef") + [ mknode "w:rPr" [] footnoteStyle , mknode "w:footnoteReference" [("w:id", notenum)] () ] ] -- internal link: inlineToOpenXML opts (Link txt ('#':xs,_)) = do - contents <- withTextProp (rCustomStyle "Link") $ inlinesToOpenXML opts txt + contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt return [ mknode "w:hyperlink" [("w:anchor",xs)] contents ] -- external link: inlineToOpenXML opts (Link txt (src,_)) = do - contents <- withTextProp (rCustomStyle "Link") $ inlinesToOpenXML opts txt + contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt extlinks <- gets stExternalLinks id' <- case M.lookup src extlinks of Just i -> return i From 409111f647d3efa403ff1efff12eebc3173017b5 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Sun, 1 Mar 2015 22:57:35 +0300 Subject: [PATCH 11/16] Started moving StyleMap out of writer code --- pandoc.cabal | 4 +- src/Text/Pandoc/Readers/Docx/Parse.hs | 26 ++---- src/Text/Pandoc/Readers/Docx/StyleMap.hs | 105 +++++++++++++++++++++++ src/Text/Pandoc/Readers/Docx/Util.hs | 26 ++++++ src/Text/Pandoc/Writers/Docx.hs | 71 ++++++--------- 5 files changed, 165 insertions(+), 67 deletions(-) create mode 100644 src/Text/Pandoc/Readers/Docx/StyleMap.hs create mode 100644 src/Text/Pandoc/Readers/Docx/Util.hs diff --git a/pandoc.cabal b/pandoc.cabal index 16106f896..32bbfbd26 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -338,7 +338,9 @@ Library Other-Modules: Text.Pandoc.Readers.Docx.Lists, Text.Pandoc.Readers.Docx.Reducible, Text.Pandoc.Readers.Docx.Parse, - Text.Pandoc.Readers.Docx.Fonts + Text.Pandoc.Readers.Docx.Fonts, + Text.Pandoc.Readers.Docx.Util, + Text.Pandoc.Readers.Docx.StyleMap Text.Pandoc.Writers.Shared, Text.Pandoc.Asciify, Text.Pandoc.MIME, diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index b644923c4..cce80fb48 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -65,6 +65,7 @@ import Text.Pandoc.Compat.Except import Text.TeXMath.Readers.OMML (readOMML) import Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..)) import Text.TeXMath (Exp) +import Text.Pandoc.Readers.Docx.Util import Data.Char (readLitChar, ord, chr, isDigit) data ReaderEnv = ReaderEnv { envNotes :: Notes @@ -108,8 +109,6 @@ mapD f xs = in concatMapM handler xs -type NameSpaces = [(String, String)] - data Docx = Docx Document deriving Show @@ -249,10 +248,6 @@ type ChangeId = String type Author = String type ChangeDate = String -attrToNSPair :: Attr -> Maybe (String, String) -attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) -attrToNSPair _ = Nothing - archiveToDocx :: Archive -> Either DocxError Docx archiveToDocx archive = do let notes = archiveToNotes archive @@ -269,7 +264,7 @@ archiveToDocument :: Archive -> D Document archiveToDocument zf = do entry <- maybeToD $ findEntryByPath "word/document.xml" zf docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry - let namespaces = mapMaybe attrToNSPair (elAttribs docElem) + let namespaces = elemToNameSpaces docElem bodyElem <- maybeToD $ findChild (elemName namespaces "w" "body") docElem body <- elemToBody namespaces bodyElem return $ Document namespaces body @@ -288,7 +283,7 @@ archiveToStyles zf = case stylesElem of Nothing -> (M.empty, M.empty) Just styElem -> - let namespaces = mapMaybe attrToNSPair (elAttribs styElem) + let namespaces = elemToNameSpaces styElem in ( M.fromList $ buildBasedOnList namespaces styElem (Nothing :: Maybe CharStyle), @@ -356,10 +351,10 @@ archiveToNotes zf = enElem = findEntryByPath "word/endnotes.xml" zf >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) fn_namespaces = case fnElem of - Just e -> mapMaybe attrToNSPair (elAttribs e) + Just e -> elemToNameSpaces e Nothing -> [] en_namespaces = case enElem of - Just e -> mapMaybe attrToNSPair (elAttribs e) + Just e -> elemToNameSpaces e Nothing -> [] ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces fn = fnElem >>= (elemToNotes ns "footnote") @@ -459,7 +454,7 @@ archiveToNumbering' zf = do Nothing -> Just $ Numbering [] [] [] Just entry -> do numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry - let namespaces = mapMaybe attrToNSPair (elAttribs numberingElem) + let namespaces = elemToNameSpaces numberingElem numElems = findChildren (QName "num" (lookup "w" namespaces) (Just "w")) numberingElem @@ -488,15 +483,6 @@ elemToNotes _ _ _ = Nothing --------------------------------------------- --------------------------------------------- -elemName :: NameSpaces -> String -> String -> QName -elemName ns prefix name = (QName name (lookup prefix ns) (Just prefix)) - -isElem :: NameSpaces -> String -> String -> Element -> Bool -isElem ns prefix name element = - qName (elName element) == name && - qURI (elName element) == (lookup prefix ns) - - elemToTblGrid :: NameSpaces -> Element -> D TblGrid elemToTblGrid ns element | isElem ns "w" "tblGrid" element = let cols = findChildren (elemName ns "w" "gridCol") element diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs new file mode 100644 index 000000000..2e3d6db95 --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/StyleMap.hs @@ -0,0 +1,105 @@ +module Text.Pandoc.Readers.Docx.StyleMap ( StyleMap + , ParaStyleMap + , CharStyleMap + , StyleMaps(..) + , defaultStyleMaps + , getStyleMaps + , getStyleId + , hasStyleName + ) where + +import Text.XML.Light +import Text.Pandoc.Readers.Docx.Util +import Control.Monad.State +import Data.Char (toLower) +import Data.Maybe (fromMaybe) +import qualified Data.Map as M + +newtype ParaStyleMap = ParaStyleMap ( M.Map String String ) +newtype CharStyleMap = CharStyleMap ( M.Map String String ) + +class StyleMap a where + alterMap :: (M.Map String String -> M.Map String String) -> a -> a + getMap :: a -> M.Map String String + +instance StyleMap ParaStyleMap where + alterMap f (ParaStyleMap m) = ParaStyleMap $ f m + getMap (ParaStyleMap m) = m + +instance StyleMap CharStyleMap where + alterMap f (CharStyleMap m) = CharStyleMap $ f m + getMap (CharStyleMap m) = m + +insert :: (StyleMap a) => String -> String -> a -> a +insert k v = alterMap $ M.insert k v + +getStyleId :: (StyleMap a) => String -> a -> String +getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) . getMap + +hasStyleName :: (StyleMap a) => String -> a -> Bool +hasStyleName styleName = M.member (map toLower styleName) . getMap + +data StyleMaps = StyleMaps { sNameSpaces :: NameSpaces + , sParaStyleMap :: ParaStyleMap + , sCharStyleMap :: CharStyleMap + } + +data StyleType = ParaStyle | CharStyle + +defaultStyleMaps :: StyleMaps +defaultStyleMaps = StyleMaps { sNameSpaces = [] + , sParaStyleMap = ParaStyleMap M.empty + , sCharStyleMap = CharStyleMap M.empty + } + +type StateM a = StateT StyleMaps Maybe a + +getStyleMaps :: Element -> StyleMaps +getStyleMaps docElem = fromMaybe state' $ execStateT genStyleMap state' + where + state' = defaultStyleMaps {sNameSpaces = elemToNameSpaces docElem} + insertPara key val = modify $ \s -> + s { sParaStyleMap = insert key val $ sParaStyleMap s } + insertChar key val = modify $ \s -> + s { sCharStyleMap = insert key val $ sCharStyleMap s } + genStyleItem e = do + styleType <- getStyleType e + nameVal <- getNameVal e + styleId <- getAttrStyleId e + let nameValLC = map toLower nameVal + case styleType of + ParaStyle -> insertPara nameValLC styleId + CharStyle -> insertChar nameValLC styleId + genStyleMap = do + style <- elemName' "style" + let styles = findChildren style docElem + forM_ styles genStyleItem + +getStyleType :: Element -> StateM StyleType +getStyleType e = do + styleTypeStr <- getAttrType e + case styleTypeStr of + "paragraph" -> return ParaStyle + "character" -> return CharStyle + _ -> lift Nothing + +getAttrType :: Element -> StateM String +getAttrType el = do + name <- elemName' "type" + lift $ findAttr name el + +getAttrStyleId :: Element -> StateM String +getAttrStyleId el = do + name <- elemName' "styleId" + lift $ findAttr name el + +getNameVal :: Element -> StateM String +getNameVal el = do + name <- elemName' "name" + val <- elemName' "val" + lift $ findChild name el >>= findAttr val + +elemName' :: String -> StateM QName +elemName' name = do + namespaces <- gets sNameSpaces + return $ elemName namespaces "w" name diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs new file mode 100644 index 000000000..891f107b0 --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/Util.hs @@ -0,0 +1,26 @@ +module Text.Pandoc.Readers.Docx.Util ( + NameSpaces + , elemName + , isElem + , elemToNameSpaces + ) where + +import Text.XML.Light +import Data.Maybe (mapMaybe) + +type NameSpaces = [(String, String)] + +elemToNameSpaces :: Element -> NameSpaces +elemToNameSpaces = mapMaybe attrToNSPair . elAttribs + +attrToNSPair :: Attr -> Maybe (String, String) +attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) +attrToNSPair _ = Nothing + +elemName :: NameSpaces -> String -> String -> QName +elemName ns prefix name = QName name (lookup prefix ns) (Just prefix) + +isElem :: NameSpaces -> String -> String -> Element -> Bool +isElem ns prefix name element = + qName (elName element) == name && + qURI (elName element) == lookup prefix ns diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index eb7fa344b..53065309b 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -54,6 +54,8 @@ import Text.Pandoc.Walk import Text.Highlighting.Kate.Types () import Text.XML.Light as XML import Text.TeXMath +import Text.Pandoc.Readers.Docx.StyleMap +import Text.Pandoc.Readers.Docx.Util (elemName) import Control.Monad.State import Text.Highlighting.Kate import Data.Unique (hashUnique, newUnique) @@ -64,7 +66,6 @@ import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef, extensionFromMimeType) import Control.Applicative ((<$>), (<|>), (<*>)) import Data.Maybe (fromMaybe, mapMaybe, maybeToList) -import Data.Char (toLower) data ListMarker = NoMarker | BulletMarker @@ -90,9 +91,6 @@ listMarkerToId (NumberMarker sty delim n) = OneParen -> '2' TwoParens -> '3' -newtype ParaStyleMap = ParaStyleMap (M.Map String String) deriving Show -newtype CharStyleMap = CharStyleMap (M.Map String String) deriving Show - data WriterState = WriterState{ stTextProperties :: [Element] , stParaProperties :: [Element] @@ -109,8 +107,7 @@ data WriterState = WriterState{ , stChangesAuthor :: String , stChangesDate :: String , stPrintWidth :: Integer - , stParaStyles :: ParaStyleMap - , stCharStyles :: CharStyleMap + , stStyleMaps :: StyleMaps , stFirstPara :: Bool } @@ -131,8 +128,7 @@ defaultWriterState = WriterState{ , stChangesAuthor = "unknown" , stChangesDate = "1969-12-31T19:00:00Z" , stPrintWidth = 1 - , stParaStyles = ParaStyleMap M.empty - , stCharStyles = CharStyleMap M.empty + , stStyleMaps = defaultStyleMaps , stFirstPara = False } @@ -220,28 +216,14 @@ writeDocx opts doc@(Pandoc meta _) = do styledoc <- parseXml refArchive distArchive stylepath -- parse styledoc for heading styles - let styleNamespaces = map ((,) <$> qName . attrKey <*> attrVal) . - filter ((==Just "xmlns") . qPrefix . attrKey) . - elAttribs $ styledoc - mywURI = lookup "w" styleNamespaces - myName name = QName name mywURI (Just "w") - getAttrStyleId = findAttr (myName "styleId") - getAttrType = findAttr (myName "type") - isParaStyle = (Just "paragraph" ==) . getAttrType - isCharStyle = (Just "character" ==) . getAttrType - getNameVal = findChild (myName "name") >=> findAttr (myName "val") >=> return . map toLower - genStyleItem f e | f e = liftM2 (,) <$> getNameVal <*> getAttrStyleId $ e - | otherwise = Nothing - genStyleMap f = M.fromList $ mapMaybe (genStyleItem f) $ findChildren (myName "style") styledoc - paraStyles = ParaStyleMap $ genStyleMap isParaStyle - charStyles = CharStyleMap $ genStyleMap isCharStyle + let styleMaps = getStyleMaps styledoc ((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc') defaultWriterState{ stChangesAuthor = fromMaybe "unknown" username , stChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime , stPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth) - , stParaStyles = paraStyles - , stCharStyles = charStyles} + , stStyleMaps = styleMaps + } let epochtime = floor $ utcTimeToPOSIXSeconds utctime let imgs = M.elems $ stImages st @@ -394,7 +376,7 @@ writeDocx opts doc@(Pandoc meta _) = do linkrels -- styles - let newstyles = styleToOpenXml charStyles paraStyles $ writerHighlightStyle opts + let newstyles = styleToOpenXml styleMaps $ writerHighlightStyle opts let styledoc' = styledoc{ elContent = modifyContent (elContent styledoc) } where modifyContent @@ -402,9 +384,10 @@ writeDocx opts doc@(Pandoc meta _) = do | otherwise = filter notTokStyle notTokStyle (Elem el) = notStyle el || notTokId el notTokStyle _ = True - notStyle = (/= myName "style") . elName - notTokId = maybe True (`notElem` tokStys) . getAttrStyleId + notStyle = (/= elemName' "style") . elName + notTokId = maybe True (`notElem` tokStys) . findAttr (elemName' "styleId") tokStys = "SourceCode" : map show (enumFromTo KeywordTok NormalTok) + elemName' = elemName (sNameSpaces styleMaps) "w" let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' -- construct word/numbering.xml @@ -481,12 +464,11 @@ writeDocx opts doc@(Pandoc meta _) = do miscRelEntries ++ otherMediaEntries return $ fromArchive archive -styleToOpenXml :: CharStyleMap -> ParaStyleMap -> Style -> [Element] -styleToOpenXml (CharStyleMap csm) (ParaStyleMap psm) style = +styleToOpenXml :: StyleMaps -> Style -> [Element] +styleToOpenXml sm style = maybeToList parStyle ++ mapMaybe toStyle alltoktypes where alltoktypes = enumFromTo KeywordTok NormalTok - styleExists m styleName = M.member (map toLower styleName) m - toStyle toktype | styleExists csm $ show toktype = Nothing + toStyle toktype | hasStyleName (show toktype) (sCharStyleMap sm) = Nothing | otherwise = Just $ mknode "w:style" [("w:type","character"), ("w:customStyle","1"),("w:styleId",show toktype)] @@ -509,7 +491,7 @@ styleToOpenXml (CharStyleMap csm) (ParaStyleMap psm) style = tokBg toktype = maybe "auto" (drop 1 . fromColor) $ (tokenBackground =<< lookup toktype tokStyles) `mplus` backgroundColor style - parStyle | styleExists psm "Source Code" = Nothing + parStyle | hasStyleName "Source Code" (sParaStyleMap sm) = Nothing | otherwise = Just $ mknode "w:style" [("w:type","paragraph"), ("w:customStyle","1"),("w:styleId","SourceCode")] @@ -638,30 +620,27 @@ writeOpenXML opts (Pandoc meta blocks) = do blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element] blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls -getStyleId :: String -> M.Map String String -> String -getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) - -pStyle :: String -> ParaStyleMap -> Element -pStyle sty (ParaStyleMap m) = mknode "w:pStyle" [("w:val",sty')] () +pStyle :: String -> StyleMaps -> Element +pStyle sty m = mknode "w:pStyle" [("w:val",sty')] () where - sty' = getStyleId sty m + sty' = getStyleId sty $ sParaStyleMap m pCustomStyle :: String -> Element pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] () pStyleM :: String -> WS XML.Element -pStyleM = flip fmap (gets stParaStyles) . pStyle +pStyleM = (`fmap` gets stStyleMaps) . pStyle -rStyle :: String -> CharStyleMap -> Element -rStyle sty (CharStyleMap m) = mknode "w:rStyle" [("w:val",sty')] () +rStyle :: String -> StyleMaps -> Element +rStyle sty m = mknode "w:rStyle" [("w:val",sty')] () where - sty' = getStyleId sty m + sty' = getStyleId sty $ sCharStyleMap m rCustomStyle :: String -> Element rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] () rStyleM :: String -> WS XML.Element -rStyleM = flip fmap (gets stCharStyles) . rStyle +rStyleM = (`fmap` gets stStyleMaps) . rStyle getUniqueId :: MonadIO m => m String -- the + 20 is to ensure that there are no clashes with the rIds @@ -710,10 +689,10 @@ blockToOpenXML opts (Para lst) = do paraProps <- getParaProps $ case lst of [Math DisplayMath _] -> True _ -> False - pSM <- gets stParaStyles + sm <- gets stStyleMaps let paraProps' = case paraProps of [] | isFirstPara -> [mknode "w:pPr" [] [pCustomStyle "FirstParagraph"]] - [] -> [mknode "w:pPr" [] [pStyle "Body Text" pSM]] + [] -> [mknode "w:pPr" [] [pStyle "Body Text" sm]] ps -> ps modify $ \s -> s { stFirstPara = False } contents <- inlinesToOpenXML opts lst From 65c80822e7900e92b4bba67912da77062654cc26 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Tue, 3 Mar 2015 13:08:52 +0300 Subject: [PATCH 12/16] Code cleanup --- src/Text/Pandoc/Readers/Docx/StyleMap.hs | 24 ++++++++++++------------ src/Text/Pandoc/Writers/Docx.hs | 24 ++++++++++-------------- 2 files changed, 22 insertions(+), 26 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs index 2e3d6db95..5a4e9cfc2 100644 --- a/src/Text/Pandoc/Readers/Docx/StyleMap.hs +++ b/src/Text/Pandoc/Readers/Docx/StyleMap.hs @@ -1,7 +1,4 @@ -module Text.Pandoc.Readers.Docx.StyleMap ( StyleMap - , ParaStyleMap - , CharStyleMap - , StyleMaps(..) +module Text.Pandoc.Readers.Docx.StyleMap ( StyleMaps(..) , defaultStyleMaps , getStyleMaps , getStyleId @@ -58,23 +55,26 @@ getStyleMaps :: Element -> StyleMaps getStyleMaps docElem = fromMaybe state' $ execStateT genStyleMap state' where state' = defaultStyleMaps {sNameSpaces = elemToNameSpaces docElem} - insertPara key val = modify $ \s -> - s { sParaStyleMap = insert key val $ sParaStyleMap s } - insertChar key val = modify $ \s -> - s { sCharStyleMap = insert key val $ sCharStyleMap s } genStyleItem e = do styleType <- getStyleType e - nameVal <- getNameVal e styleId <- getAttrStyleId e - let nameValLC = map toLower nameVal + nameValLowercase <- map toLower `fmap` getNameVal e case styleType of - ParaStyle -> insertPara nameValLC styleId - CharStyle -> insertChar nameValLC styleId + ParaStyle -> modParaStyleMap $ insert nameValLowercase styleId + CharStyle -> modCharStyleMap $ insert nameValLowercase styleId genStyleMap = do style <- elemName' "style" let styles = findChildren style docElem forM_ styles genStyleItem +modParaStyleMap :: (ParaStyleMap -> ParaStyleMap) -> StateM () +modParaStyleMap f = modify $ \s -> + s {sParaStyleMap = f $ sParaStyleMap s} + +modCharStyleMap :: (CharStyleMap -> CharStyleMap) -> StateM () +modCharStyleMap f = modify $ \s -> + s {sCharStyleMap = f $ sCharStyleMap s} + getStyleType :: Element -> StateM StyleType getStyleType e = do styleTypeStr <- getAttrType e diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 53065309b..c4de12d2f 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -620,27 +620,23 @@ writeOpenXML opts (Pandoc meta blocks) = do blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element] blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls -pStyle :: String -> StyleMaps -> Element -pStyle sty m = mknode "w:pStyle" [("w:val",sty')] () - where - sty' = getStyleId sty $ sParaStyleMap m - pCustomStyle :: String -> Element pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] () pStyleM :: String -> WS XML.Element -pStyleM = (`fmap` gets stStyleMaps) . pStyle - -rStyle :: String -> StyleMaps -> Element -rStyle sty m = mknode "w:rStyle" [("w:val",sty')] () - where - sty' = getStyleId sty $ sCharStyleMap m +pStyleM styleName = do + styleMaps <- gets stStyleMaps + let sty' = getStyleId styleName $ sParaStyleMap styleMaps + return $ mknode "w:pStyle" [("w:val",sty')] () rCustomStyle :: String -> Element rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] () rStyleM :: String -> WS XML.Element -rStyleM = (`fmap` gets stStyleMaps) . rStyle +rStyleM styleName = do + styleMaps <- gets stStyleMaps + let sty' = getStyleId styleName $ sCharStyleMap styleMaps + return $ mknode "w:rStyle" [("w:val",sty')] () getUniqueId :: MonadIO m => m String -- the + 20 is to ensure that there are no clashes with the rIds @@ -689,10 +685,10 @@ blockToOpenXML opts (Para lst) = do paraProps <- getParaProps $ case lst of [Math DisplayMath _] -> True _ -> False - sm <- gets stStyleMaps + bodyTextStyle <- pStyleM "Body Text" let paraProps' = case paraProps of [] | isFirstPara -> [mknode "w:pPr" [] [pCustomStyle "FirstParagraph"]] - [] -> [mknode "w:pPr" [] [pStyle "Body Text" sm]] + [] -> [mknode "w:pPr" [] [bodyTextStyle]] ps -> ps modify $ \s -> s { stFirstPara = False } contents <- inlinesToOpenXML opts lst From ae07d5ed490b1d2f8cbb943a8d876a798d57f470 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Tue, 3 Mar 2015 14:37:02 +0300 Subject: [PATCH 13/16] Initial tests for writer --- tests/Tests/Writers/Docx.hs | 128 +++++++++++++++++++++ tests/docx/image_no_embed_writer.native | 2 + tests/docx/inline_formatting_writer.native | 5 + tests/docx/inline_images_writer.native | 2 + tests/docx/links_writer.native | 6 + tests/media/rId25.jpg | 0 tests/media/rId26.jpg | 0 tests/media/rId27.jpg | 0 tests/test-pandoc.hs | 2 + 9 files changed, 145 insertions(+) create mode 100644 tests/Tests/Writers/Docx.hs create mode 100644 tests/docx/image_no_embed_writer.native create mode 100644 tests/docx/inline_formatting_writer.native create mode 100644 tests/docx/inline_images_writer.native create mode 100644 tests/docx/links_writer.native create mode 100644 tests/media/rId25.jpg create mode 100644 tests/media/rId26.jpg create mode 100644 tests/media/rId27.jpg diff --git a/tests/Tests/Writers/Docx.hs b/tests/Tests/Writers/Docx.hs new file mode 100644 index 000000000..4a199755d --- /dev/null +++ b/tests/Tests/Writers/Docx.hs @@ -0,0 +1,128 @@ +module Tests.Writers.Docx (tests) where + +import Text.Pandoc.Options +import Text.Pandoc.Readers.Native +import Text.Pandoc.Definition +import Tests.Helpers +import Test.Framework +import Text.Pandoc.Readers.Docx +import Text.Pandoc.Writers.Docx + +type Options = (WriterOptions, ReaderOptions) + +compareOutput :: Options + -> FilePath + -> IO (Pandoc, Pandoc) +compareOutput opts nativeFile = do + nf <- Prelude.readFile nativeFile + df <- writeDocx (fst opts) (readNative nf) + let (p, _) = readDocx (snd opts) df + return (p, readNative nf) + +testCompareWithOptsIO :: Options -> String -> FilePath -> IO Test +testCompareWithOptsIO opts name nativeFile = do + (dp, np) <- compareOutput opts nativeFile + return $ test id name (dp, np) + +testCompareWithOpts :: Options -> String -> FilePath -> Test +testCompareWithOpts opts name nativeFile = + buildTest $ testCompareWithOptsIO opts name nativeFile + +testCompare :: String -> FilePath -> Test +testCompare = testCompareWithOpts def + +tests :: [Test] +tests = [ testGroup "inlines" + [ testCompare + "font formatting" + "docx/inline_formatting_writer.native" + , testCompare + "font formatting with character styles" + "docx/char_styles.native" + , testCompare + "hyperlinks" + "docx/links_writer.native" + , testCompare + "inline image" + "docx/image_no_embed_writer.native" + , testCompare + "inline image in links" + "docx/inline_images_writer.native" + , testCompare + "handling unicode input" + "docx/unicode.native" + , testCompare + "literal tabs" + "docx/tabs.native" + , testCompare + "normalizing inlines" + "docx/normalize.native" + , testCompare + "normalizing inlines deep inside blocks" + "docx/deep_normalize.native" + , testCompare + "move trailing spaces outside of formatting" + "docx/trailing_spaces_in_formatting.native" + , testCompare + "inline code (with VerbatimChar style)" + "docx/inline_code.native" + , testCompare + "inline code in subscript and superscript" + "docx/verbatim_subsuper.native" + ] + , testGroup "blocks" + [ testCompare + "headers" + "docx/headers.native" + , testCompare + "headers already having auto identifiers" + "docx/already_auto_ident.native" + , testCompare + "numbered headers automatically made into list" + "docx/numbered_header.native" + , testCompare + "i18n blocks (headers and blockquotes)" + "docx/i18n_blocks.native" + -- some level problems, look into that + -- , testCompare + -- "lists" + -- "docx/lists.native" + , testCompare + "definition lists" + "docx/definition_list.native" + , testCompare + "custom defined lists in styles" + "docx/german_styled_lists.native" + , testCompare + "footnotes and endnotes" + "docx/notes.native" + , testCompare + "blockquotes (parsing indent as blockquote)" + "docx/block_quotes_parse_indent.native" + , testCompare + "hanging indents" + "docx/hanging_indent.native" + -- tables headers do not survive round-trip, should look into that + -- , testCompare + -- "tables" + -- "docx/tables.native" + -- , testCompare + -- "tables with lists in cells" + -- "docx/table_with_list_cell.native" + , testCompare + "code block" + "docx/codeblock.native" + , testCompare + "dropcap paragraphs" + "docx/drop_cap.native" + ] + , testGroup "metadata" + [ testCompareWithOpts (def,def{readerStandalone=True}) + "metadata fields" + "docx/metadata.native" + , testCompareWithOpts (def,def{readerStandalone=True}) + "stop recording metadata with normal text" + "docx/metadata_after_normal.native" + ] + + ] diff --git a/tests/docx/image_no_embed_writer.native b/tests/docx/image_no_embed_writer.native new file mode 100644 index 000000000..21802ebd1 --- /dev/null +++ b/tests/docx/image_no_embed_writer.native @@ -0,0 +1,2 @@ +[Para [Str "An",Space,Str "image:"] +,Para [Image [] ("media/rId25.jpg","")]] diff --git a/tests/docx/inline_formatting_writer.native b/tests/docx/inline_formatting_writer.native new file mode 100644 index 000000000..be346204e --- /dev/null +++ b/tests/docx/inline_formatting_writer.native @@ -0,0 +1,5 @@ +[Para [Str "Regular",Space,Str "text",Space,Emph [Str "italics"],Space,Strong [Str "bold",Space,Emph [Str "bold",Space,Str "italics"]],Str "."] +,Para [Str "This",Space,Str "is",Space,SmallCaps [Str "Small",Space,Str "Caps"],Str ",",Space,Str "and",Space,Str "this",Space,Str "is",Space,Strikeout [Str "strikethrough"],Str "."] +,Para [Str "Some",Space,Str "people",Space,Str "use",Space,Emph [Str "single",Space,Str "underlines",Space,Str "for",Space,Str "emphasis"],Str "."] +,Para [Str "Above",Space,Str "the",Space,Str "line",Space,Str "is",Space,Superscript [Str "superscript"],Space,Str "and",Space,Str "below",Space,Str "the",Space,Str "line",Space,Str "is",Space,Subscript [Str "subscript"],Str "."] +,Para [Str "A",Space,Str "line",LineBreak,Str "break."]] diff --git a/tests/docx/inline_images_writer.native b/tests/docx/inline_images_writer.native new file mode 100644 index 000000000..da2a2709b --- /dev/null +++ b/tests/docx/inline_images_writer.native @@ -0,0 +1,2 @@ +[Para [Str "This",Space,Str "picture",Space,Image [] ("media/rId26.jpg",""),Space,Str "is",Space,Str "an",Space,Str "identicon."] +,Para [Str "Here",Space,Str "is",Space,Link [Str "one",Space,Image [] ("media/rId27.jpg",""),Space,Str "that"] ("http://www.google.com",""),Space,Str "links."]] diff --git a/tests/docx/links_writer.native b/tests/docx/links_writer.native new file mode 100644 index 000000000..cc00e4326 --- /dev/null +++ b/tests/docx/links_writer.native @@ -0,0 +1,6 @@ +[Header 2 ("an-internal-link-and-an-external-link",[],[]) [Str "An",Space,Str "internal",Space,Str "link",Space,Str "and",Space,Str "an",Space,Str "external",Space,Str "link"] +,Para [Str "An",Space,Link [Str "external",Space,Str "link"] ("http://google.com",""),Space,Str "to",Space,Str "a",Space,Str "popular",Space,Str "website."] +,Para [Str "An",Space,Link [Str "external",Space,Str "link"] ("http://johnmacfarlane.net/pandoc/README.html#synopsis",""),Space,Str "to",Space,Str "a",Space,Str "website",Space,Str "with",Space,Str "an",Space,Str "anchor."] +,Para [Str "An",Space,Link [Str "internal",Space,Str "link"] ("#a-section-for-testing-link-targets",""),Space,Str "to",Space,Str "a",Space,Str "section",Space,Str "header."] +,Para [Str "An",Space,Link [Str "internal",Space,Str "link"] ("#my_bookmark",""),Space,Str "to",Space,Str "a",Space,Str "bookmark."] +,Header 2 ("a-section-for-testing-link-targets",[],[]) [Str "A",Space,Str "section",Space,Str "for",Space,Str "testing",Space,Str "link",Space,Str "targets"]] diff --git a/tests/media/rId25.jpg b/tests/media/rId25.jpg new file mode 100644 index 000000000..e69de29bb diff --git a/tests/media/rId26.jpg b/tests/media/rId26.jpg new file mode 100644 index 000000000..e69de29bb diff --git a/tests/media/rId27.jpg b/tests/media/rId27.jpg new file mode 100644 index 000000000..e69de29bb diff --git a/tests/test-pandoc.hs b/tests/test-pandoc.hs index b7b1c30b1..dd92a271a 100644 --- a/tests/test-pandoc.hs +++ b/tests/test-pandoc.hs @@ -20,6 +20,7 @@ import qualified Tests.Writers.Native import qualified Tests.Writers.Markdown import qualified Tests.Writers.Plain import qualified Tests.Writers.AsciiDoc +import qualified Tests.Writers.Docx import qualified Tests.Shared import qualified Tests.Walk import Text.Pandoc.Shared (inDirectory) @@ -38,6 +39,7 @@ tests = [ testGroup "Old" Tests.Old.tests , testGroup "Markdown" Tests.Writers.Markdown.tests , testGroup "Plain" Tests.Writers.Plain.tests , testGroup "AsciiDoc" Tests.Writers.AsciiDoc.tests + , testGroup "Docx" Tests.Writers.Docx.tests ] , testGroup "Readers" [ testGroup "LaTeX" Tests.Readers.LaTeX.tests From a82dedf1ff1b6fe345640843dad653386a8f9bc7 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Sun, 8 Mar 2015 03:59:48 +0300 Subject: [PATCH 14/16] Lists test --- tests/Tests/Writers/Docx.hs | 8 ++++---- tests/docx/lists_writer.native | 17 +++++++++++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) create mode 100644 tests/docx/lists_writer.native diff --git a/tests/Tests/Writers/Docx.hs b/tests/Tests/Writers/Docx.hs index 4a199755d..a22daadb8 100644 --- a/tests/Tests/Writers/Docx.hs +++ b/tests/Tests/Writers/Docx.hs @@ -83,10 +83,10 @@ tests = [ testGroup "inlines" , testCompare "i18n blocks (headers and blockquotes)" "docx/i18n_blocks.native" - -- some level problems, look into that - -- , testCompare - -- "lists" - -- "docx/lists.native" + -- Continuation does not survive round-trip + , testCompare + "lists" + "docx/lists_writer.native" , testCompare "definition lists" "docx/definition_list.native" diff --git a/tests/docx/lists_writer.native b/tests/docx/lists_writer.native new file mode 100644 index 000000000..4c44ea603 --- /dev/null +++ b/tests/docx/lists_writer.native @@ -0,0 +1,17 @@ +[Header 2 ("some-nested-lists",[],[]) [Str "Some",Space,Str "nested",Space,Str "lists"] +,OrderedList (1,Decimal,Period) + [[Para [Str "one"]] + ,[Para [Str "two"] + ,OrderedList (1,LowerAlpha,DefaultDelim) + [[Para [Str "a"]] + ,[Para [Str "b"]]]]] +,BulletList + [[Para [Str "one"]] + ,[Para [Str "two"] + ,BulletList + [[Para [Str "three"] + ,BulletList + [[Para [Str "four"]]]]]] + ,[Para [Str "Same",Space,Str "list"]]] +,BulletList + [[Para [Str "Different",Space,Str "list",Space,Str "adjacent",Space,Str "to",Space,Str "the",Space,Str "one",Space,Str "above."]]]] From c0c9b313e6109e0f390cdda1bb868e394faae21b Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Sun, 8 Mar 2015 04:42:23 +0300 Subject: [PATCH 15/16] Docx Writer: set firstRow information in tables --- src/Text/Pandoc/Writers/Docx.hs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index c4de12d2f..81369e278 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -733,25 +733,30 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do if null contents then emptyCell else contents - let mkrow border cells = mknode "w:tr" [] $ map (mkcell border) cells + let mkrow border cells = mknode "w:tr" [] $ + [mknode "w:trPr" [] [ + mknode "w:cnfStyle" [("w:firstRow","1")] ()] | border] + ++ map (mkcell border) cells let textwidth = 7920 -- 5.5 in in twips, 1/20 pt let fullrow = 5000 -- 100% specified in pct let rowwidth = fullrow * sum widths let mkgridcol w = mknode "w:gridCol" [("w:w", show (floor (textwidth * w) :: Integer))] () + let hasHeader = not (all null headers) return $ caption' ++ [mknode "w:tbl" [] ( mknode "w:tblPr" [] ( mknode "w:tblStyle" [("w:val","TableNormal")] () : mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () : + mknode "w:tblLook" [("w:firstRow","1") | hasHeader ] () : [ mknode "w:tblCaption" [("w:val", captionStr)] () | not (null caption) ] ) : mknode "w:tblGrid" [] (if all (==0) widths then [] else map mkgridcol widths) - : [ mkrow True headers' | not (all null headers) ] ++ + : [ mkrow True headers' | hasHeader ] ++ map (mkrow False) rows' )] blockToOpenXML opts (BulletList lst) = do From 59c4d28d8c70e3d23428897d78a3c401e30612e5 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Sun, 8 Mar 2015 04:42:50 +0300 Subject: [PATCH 16/16] Docx Writer: Tables test --- tests/Tests/Writers/Docx.hs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/Tests/Writers/Docx.hs b/tests/Tests/Writers/Docx.hs index a22daadb8..80ce0014d 100644 --- a/tests/Tests/Writers/Docx.hs +++ b/tests/Tests/Writers/Docx.hs @@ -103,12 +103,12 @@ tests = [ testGroup "inlines" "hanging indents" "docx/hanging_indent.native" -- tables headers do not survive round-trip, should look into that - -- , testCompare - -- "tables" - -- "docx/tables.native" - -- , testCompare - -- "tables with lists in cells" - -- "docx/table_with_list_cell.native" + , testCompare + "tables" + "docx/tables.native" + , testCompare + "tables with lists in cells" + "docx/table_with_list_cell.native" , testCompare "code block" "docx/codeblock.native"