Merge pull request #6941 from tarleb/docx-raw

Docx writer: keep raw openxml strings verbatim
This commit is contained in:
John MacFarlane 2020-12-13 11:08:41 -08:00 committed by GitHub
commit 32902d0fad
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 97 additions and 59 deletions

View file

@ -441,7 +441,7 @@ writeDocx opts doc = do
Nothing -> mknode "w:sectPr" [] ()
-- let sectpr = fromMaybe (mknode "w:sectPr" [] ()) mbsectpr'
let contents' = contents ++ [sectpr]
let contents' = contents ++ [Elem sectpr]
let docContents = mknode "w:document" stdAttributes
$ mknode "w:body" [] contents'
@ -538,7 +538,8 @@ writeDocx opts doc = do
-- docProps/custom.xml
let customProperties :: [(String, String)]
customProperties = [(T.unpack k, T.unpack $ lookupMetaString k meta) | k <- M.keys (unMeta meta)
customProperties = [ (T.unpack k, T.unpack $ lookupMetaString k meta)
| k <- M.keys (unMeta meta)
, k `notElem` (["title", "author", "keywords"]
++ extraCoreProps)]
let mkCustomProp (k, v) pid = mknode "property"
@ -788,7 +789,7 @@ makeTOC opts = do
mknode "w:docPartUnique" [] ()]
-- w:docPartObj
), -- w:sdtPr
mknode "w:sdtContent" [] (title++[
mknode "w:sdtContent" [] (title ++ [ Elem $
mknode "w:p" [] (
mknode "w:r" [] [
mknode "w:fldChar" [("w:fldCharType","begin"),("w:dirty","true")] (),
@ -802,7 +803,9 @@ makeTOC opts = do
-- | Convert Pandoc document to two lists of
-- OpenXML elements (the main document and footnotes).
writeOpenXML :: (PandocMonad m) => WriterOptions -> Pandoc -> WS m ([Element], [Element],[Element])
writeOpenXML :: (PandocMonad m)
=> WriterOptions -> Pandoc
-> WS m ([Content], [Element], [Element])
writeOpenXML opts (Pandoc meta blocks) = do
let tit = docTitle meta
let auths = docAuthors meta
@ -830,6 +833,7 @@ writeOpenXML opts (Pandoc meta blocks) = do
return $
mknode "w:comment" [('w':':':T.unpack k,T.unpack v) | (k,v) <- kvs]
[ mknode "w:p" [] $
map Elem
[ mknode "w:pPr" []
[ mknode "w:pStyle" [("w:val", "CommentText")] () ]
, mknode "w:r" []
@ -844,11 +848,11 @@ writeOpenXML opts (Pandoc meta blocks) = do
toc <- if includeTOC
then makeTOC opts
else return []
let meta' = title ++ subtitle ++ authors ++ date ++ abstract ++ toc
let meta' = title ++ subtitle ++ authors ++ date ++ abstract ++ map Elem toc
return (meta' ++ doc', notes', comments')
-- | Convert a list of Pandoc blocks to OpenXML.
blocksToOpenXML :: (PandocMonad m) => WriterOptions -> [Block] -> WS m [Element]
blocksToOpenXML :: (PandocMonad m) => WriterOptions -> [Block] -> WS m [Content]
blocksToOpenXML opts = fmap concat . mapM (blockToOpenXML opts) . separateTables
-- Word combines adjacent tables unless you put an empty paragraph between
@ -884,10 +888,10 @@ dynamicStyleKey :: T.Text
dynamicStyleKey = "custom-style"
-- | Convert a Pandoc block element to OpenXML.
blockToOpenXML :: (PandocMonad m) => WriterOptions -> Block -> WS m [Element]
blockToOpenXML :: (PandocMonad m) => WriterOptions -> Block -> WS m [Content]
blockToOpenXML opts blk = withDirection $ blockToOpenXML' opts blk
blockToOpenXML' :: (PandocMonad m) => WriterOptions -> Block -> WS m [Element]
blockToOpenXML' :: (PandocMonad m) => WriterOptions -> Block -> WS m [Content]
blockToOpenXML' _ Null = return []
blockToOpenXML' opts (Div (ident,_classes,kvs) bs) = do
stylemod <- case lookup dynamicStyleKey kvs of
@ -921,18 +925,18 @@ blockToOpenXML' opts (Header lev (ident,_,kvs) lst) = do
Just n -> do
num <- withTextPropM (rStyleM "SectionNumber")
(inlineToOpenXML opts (Str n))
return $ num ++ [mknode "w:r" [] [mknode "w:tab" [] ()]]
return $ num ++ [Elem $ mknode "w:r" [] [mknode "w:tab" [] ()]]
Nothing -> return []
else return []
contents <- (number ++) <$> inlinesToOpenXML opts lst
if T.null ident
then return [mknode "w:p" [] (paraProps ++ contents)]
then return [Elem $ mknode "w:p" [] (map Elem paraProps ++ contents)]
else do
let bookmarkName = ident
modify $ \s -> s{ stSectionIds = Set.insert bookmarkName
$ stSectionIds s }
bookmarkedContents <- wrapBookmark bookmarkName contents
return [mknode "w:p" [] (paraProps ++ bookmarkedContents)]
return [Elem $ mknode "w:p" [] (map Elem paraProps ++ bookmarkedContents)]
blockToOpenXML' opts (Plain lst) = do
isInTable <- gets stInTable
isInList <- gets stInList
@ -952,7 +956,9 @@ blockToOpenXML' opts (Para [Image attr alt (src,T.stripPrefix "fig:" -> Just tit
contents <- inlinesToOpenXML opts [Image attr alt (src,tit)]
captionNode <- withParaPropM (pStyleM "Image Caption")
$ blockToOpenXML opts (Para alt)
return $ mknode "w:p" [] (paraProps ++ contents) : captionNode
return $
Elem (mknode "w:p" [] (map Elem paraProps ++ contents))
: captionNode
blockToOpenXML' opts (Para lst)
| null lst && not (isEnabled Ext_empty_paragraphs opts) = return []
| otherwise = do
@ -969,10 +975,12 @@ blockToOpenXML' opts (Para lst)
ps -> ps
modify $ \s -> s { stFirstPara = False }
contents <- inlinesToOpenXML opts lst
return [mknode "w:p" [] (paraProps' ++ contents)]
return [Elem $ mknode "w:p" [] (map Elem paraProps' ++ contents)]
blockToOpenXML' opts (LineBlock lns) = blockToOpenXML opts $ linesToPara lns
blockToOpenXML' _ b@(RawBlock format str)
| format == Format "openxml" = return [ x | Elem x <- parseXML str ]
| format == Format "openxml" = return [
Text (CData CDataRaw (T.unpack str) Nothing)
]
| otherwise = do
report $ BlockNotRendered b
return []
@ -987,7 +995,7 @@ blockToOpenXML' opts (CodeBlock attrs@(ident, _, _) str) = do
wrapBookmark ident p
blockToOpenXML' _ HorizontalRule = do
setFirstPara
return [
return [ Elem $
mknode "w:p" [] $ mknode "w:r" [] $ mknode "w:pict" []
$ mknode "v:rect" [("style","width:0;height:1.5pt"),
("o:hralign","center"),
@ -1006,26 +1014,28 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do
-- Not in the spec but in Word 2007, 2010. See #4953.
let cellToOpenXML (al, cell) = do
es <- withParaProp (alignmentFor al) $ blocksToOpenXML opts cell
return $ if any (\e -> qName (elName e) == "p") es
return $ if any (\e -> qName (elName e) == "p") (onlyElems es)
then es
else es ++ [mknode "w:p" [] ()]
else es ++ [Elem $ mknode "w:p" [] ()]
headers' <- mapM cellToOpenXML $ zip aligns headers
rows' <- mapM (mapM cellToOpenXML . zip aligns) rows
let borderProps = mknode "w:tcPr" []
let borderProps = Elem $ mknode "w:tcPr" []
[ mknode "w:tcBorders" []
$ mknode "w:bottom" [("w:val","single")] ()
, mknode "w:vAlign" [("w:val","bottom")] () ]
compactStyle <- pStyleM "Compact"
let emptyCell' = [mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]]
let emptyCell' = [Elem $ mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]]
let mkcell border contents = mknode "w:tc" []
$ [ borderProps | border ] ++
if null contents
then emptyCell'
else contents
let mkrow border cells = mknode "w:tr" [] $
[mknode "w:trPr" [] [
mknode "w:cnfStyle" [("w:firstRow","1")] ()] | border]
++ map (mkcell border) cells
let mkrow border cells =
mknode "w:tr" [] $
[ mknode "w:trPr" []
[ mknode "w:cnfStyle" [("w:firstRow","1")] ()]
| border]
++ map (mkcell border) cells
let textwidth = 7920 -- 5.5 in in twips, 1/20 pt
let fullrow = 5000 -- 100% specified in pct
let rowwidth = fullrow * sum widths
@ -1035,7 +1045,8 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do
modify $ \s -> s { stInTable = False }
return $
caption' ++
[mknode "w:tbl" []
[Elem $
mknode "w:tbl" []
( mknode "w:tblPr" []
( mknode "w:tblStyle" [("w:val","Table")] () :
mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () :
@ -1070,7 +1081,9 @@ blockToOpenXML' opts (DefinitionList items) = do
setFirstPara
return l
definitionListItemToOpenXML :: (PandocMonad m) => WriterOptions -> ([Inline],[[Block]]) -> WS m [Element]
definitionListItemToOpenXML :: (PandocMonad m)
=> WriterOptions -> ([Inline],[[Block]])
-> WS m [Content]
definitionListItemToOpenXML opts (term,defs) = do
term' <- withParaPropM (pStyleM "Definition Term")
$ blockToOpenXML opts (Para term)
@ -1083,8 +1096,11 @@ addList marker = do
lists <- gets stLists
modify $ \st -> st{ stLists = lists ++ [marker] }
listItemToOpenXML :: (PandocMonad m) => WriterOptions -> Int -> [Block] -> WS m [Element]
listItemToOpenXML _ _ [] = return []
listItemToOpenXML :: (PandocMonad m)
=> WriterOptions
-> Int -> [Block]
-> WS m [Content]
listItemToOpenXML _ _ [] = return []
listItemToOpenXML opts numid (first:rest) = do
oldInList <- gets stInList
modify $ \st -> st{ stInList = True }
@ -1111,7 +1127,7 @@ alignmentToString alignment = case alignment of
AlignDefault -> "left"
-- | Convert a list of inline elements to OpenXML.
inlinesToOpenXML :: (PandocMonad m) => WriterOptions -> [Inline] -> WS m [Element]
inlinesToOpenXML :: PandocMonad m => WriterOptions -> [Inline] -> WS m [Content]
inlinesToOpenXML opts lst = concat `fmap` mapM (inlineToOpenXML opts) lst
withNumId :: (PandocMonad m) => Int -> WS m a -> WS m a
@ -1186,12 +1202,12 @@ setFirstPara :: PandocMonad m => WS m ()
setFirstPara = modify $ \s -> s { stFirstPara = True }
-- | Convert an inline element to OpenXML.
inlineToOpenXML :: PandocMonad m => WriterOptions -> Inline -> WS m [Element]
inlineToOpenXML :: PandocMonad m => WriterOptions -> Inline -> WS m [Content]
inlineToOpenXML opts il = withDirection $ inlineToOpenXML' opts il
inlineToOpenXML' :: PandocMonad m => WriterOptions -> Inline -> WS m [Element]
inlineToOpenXML' :: PandocMonad m => WriterOptions -> Inline -> WS m [Content]
inlineToOpenXML' _ (Str str) =
formattedString str
map Elem <$> formattedString str
inlineToOpenXML' opts Space = inlineToOpenXML opts (Str " ")
inlineToOpenXML' opts SoftBreak = inlineToOpenXML opts (Str " ")
inlineToOpenXML' opts (Span ("",["csl-block"],[]) ils) =
@ -1199,10 +1215,11 @@ inlineToOpenXML' opts (Span ("",["csl-block"],[]) ils) =
inlineToOpenXML' opts (Span ("",["csl-left-margin"],[]) ils) =
inlinesToOpenXML opts ils
inlineToOpenXML' opts (Span ("",["csl-right-inline"],[]) ils) =
([mknode "w:r" []
(mknode "w:t"
[("xml:space","preserve")]
("\t" :: String))] ++)
([Elem $
mknode "w:r" []
(mknode "w:t"
[("xml:space","preserve")]
("\t" :: String))] ++)
<$> inlinesToOpenXML opts ils
inlineToOpenXML' opts (Span ("",["csl-indent"],[]) ils) =
inlinesToOpenXML opts ils
@ -1212,18 +1229,18 @@ inlineToOpenXML' _ (Span (ident,["comment-start"],kvs) ils) = do
let ident' = fromMaybe ident (lookup "id" kvs)
kvs' = filter (("id" /=) . fst) kvs
modify $ \st -> st{ stComments = (("id",ident'):kvs', ils) : stComments st }
return [ mknode "w:commentRangeStart" [("w:id", T.unpack ident')] () ]
return [ Elem $ mknode "w:commentRangeStart" [("w:id", T.unpack ident')] () ]
inlineToOpenXML' _ (Span (ident,["comment-end"],kvs) _) =
-- prefer the "id" in kvs, since that is the one produced by the docx
-- reader.
let ident' = fromMaybe ident (lookup "id" kvs)
in
return [ mknode "w:commentRangeEnd" [("w:id", T.unpack ident')] ()
, mknode "w:r" []
[ mknode "w:rPr" []
[ mknode "w:rStyle" [("w:val", "CommentReference")] () ]
, mknode "w:commentReference" [("w:id", T.unpack ident')] () ]
]
in return . map Elem $
[ mknode "w:commentRangeEnd" [("w:id", T.unpack ident')] ()
, mknode "w:r" []
[ mknode "w:rPr" []
[ mknode "w:rStyle" [("w:val", "CommentReference")] () ]
, mknode "w:commentReference" [("w:id", T.unpack ident')] () ]
]
inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do
stylemod <- case lookup dynamicStyleKey kvs of
Just (fromString . T.unpack -> sty) -> do
@ -1255,8 +1272,9 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do
modify $ \s -> s{stInsId = insId + 1}
return $ \f -> do
x <- f
return [ mknode "w:ins"
(("w:id", show insId) : changeAuthorDate) x]
return [Elem $
mknode "w:ins"
(("w:id", show insId) : changeAuthorDate) x]
else return id
delmod <- if "deletion" `elem` classes
then do
@ -1265,8 +1283,8 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do
modify $ \s -> s{stDelId = delId + 1}
return $ \f -> local (\env->env{envInDel=True}) $ do
x <- f
return [mknode "w:del"
(("w:id", show delId) : changeAuthorDate) x]
return [Elem $ mknode "w:del"
(("w:id", show delId) : changeAuthorDate) x]
else return id
contents <- insmod $ delmod $ dirmod $ stylemod $ pmod
$ inlinesToOpenXML opts ils
@ -1294,9 +1312,10 @@ inlineToOpenXML' opts (SmallCaps lst) =
inlineToOpenXML' opts (Strikeout lst) =
withTextProp (mknode "w:strike" [] ())
$ inlinesToOpenXML opts lst
inlineToOpenXML' _ LineBreak = return [br]
inlineToOpenXML' _ LineBreak = return [Elem br]
inlineToOpenXML' _ il@(RawInline f str)
| f == Format "openxml" = return [ x | Elem x <- parseXML str ]
| f == Format "openxml" = return
[Text (CData CDataRaw (T.unpack str) Nothing)]
| otherwise = do
report $ InlineNotRendered il
return []
@ -1309,13 +1328,13 @@ inlineToOpenXML' opts (Math mathType str) = do
when (mathType == DisplayMath) setFirstPara
res <- (lift . lift) (convertMath writeOMML mathType str)
case res of
Right r -> return [r]
Right r -> return [Elem r]
Left il -> inlineToOpenXML' opts il
inlineToOpenXML' opts (Cite _ lst) = inlinesToOpenXML opts lst
inlineToOpenXML' opts (Code attrs str) = do
let alltoktypes = [KeywordTok ..]
tokTypesMap <- mapM (\tt -> (,) tt <$> rStyleM (fromString $ show tt)) alltoktypes
let unhighlighted = intercalate [br] `fmap`
let unhighlighted = (map Elem . intercalate [br]) `fmap`
mapM formattedString (T.lines str)
formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok)
toHlTok (toktype,tok) =
@ -1328,7 +1347,7 @@ inlineToOpenXML' opts (Code attrs str) = do
then unhighlighted
else case highlight (writerSyntaxMap opts)
formatOpenXML attrs str of
Right h -> return h
Right h -> return (map Elem h)
Left msg -> do
unless (T.null msg) $ report $ CouldNotHighlight msg
unhighlighted
@ -1351,14 +1370,14 @@ inlineToOpenXML' opts (Note bs) = do
$ insertNoteRef bs)
let newnote = mknode "w:footnote" [("w:id", notenum)] contents
modify $ \s -> s{ stFootnotes = newnote : notes }
return [ mknode "w:r" []
return [ Elem $ mknode "w:r" []
[ mknode "w:rPr" [] footnoteStyle
, mknode "w:footnoteReference" [("w:id", notenum)] () ] ]
-- internal link:
inlineToOpenXML' opts (Link _ txt (T.uncons -> Just ('#', xs),_)) = do
contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt
return
[ mknode "w:hyperlink" [("w:anchor", T.unpack $ toBookmarkName xs)] contents ]
[ Elem $ mknode "w:hyperlink" [("w:anchor", T.unpack $ toBookmarkName xs)] contents ]
-- external link:
inlineToOpenXML' opts (Link _ txt (src,_)) = do
contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt
@ -1370,7 +1389,7 @@ inlineToOpenXML' opts (Link _ txt (src,_)) = do
modify $ \st -> st{ stExternalLinks =
M.insert (T.unpack src) i extlinks }
return i
return [ mknode "w:hyperlink" [("r:id",id')] contents ]
return [ Elem $ mknode "w:hyperlink" [("r:id",id')] contents ]
inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
pageWidth <- asks envPrintWidth
imgs <- gets stImages
@ -1434,7 +1453,7 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
imgElt
wrapBookmark imgident =<< case stImage of
Just imgData -> return [generateImgElt imgData]
Just imgData -> return [Elem $ generateImgElt imgData]
Nothing -> ( do --try
(img, mt) <- P.fetchItem src
ident <- ("rId"++) `fmap` getUniqueId
@ -1462,7 +1481,7 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
else do
-- insert mime type to use in constructing [Content_Types].xml
modify $ \st -> st { stImages = M.insert (T.unpack src) imgData $ stImages st }
return [generateImgElt imgData]
return [Elem $ generateImgElt imgData]
)
`catchError` ( \e -> do
report $ CouldNotFetchResource src $ T.pack (show e)
@ -1512,7 +1531,7 @@ withDirection x = do
, envTextProperties = EnvProps textStyle textProps'
}
wrapBookmark :: (PandocMonad m) => T.Text -> [Element] -> WS m [Element]
wrapBookmark :: (PandocMonad m) => T.Text -> [Content] -> WS m [Content]
wrapBookmark "" contents = return contents
wrapBookmark ident contents = do
id' <- getUniqueId
@ -1520,7 +1539,7 @@ wrapBookmark ident contents = do
[("w:id", id')
,("w:name", T.unpack $ toBookmarkName ident)] ()
bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] ()
return $ bookmarkStart : contents ++ [bookmarkEnd]
return $ Elem bookmarkStart : contents ++ [Elem bookmarkEnd]
-- Word imposes a 40 character limit on bookmark names and requires
-- that they begin with a letter. So we just use a hash of the

View file

@ -128,6 +128,16 @@ tests = [ testGroup "inlines"
def
"docx/codeblock.native"
"docx/golden/codeblock.docx"
, docxTest
"raw OOXML blocks"
def
"docx/raw-blocks.native"
"docx/golden/raw-blocks.docx"
, docxTest
"raw bookmark markers"
def
"docx/raw-bookmarks.native"
"docx/golden/raw-bookmarks.docx"
]
, testGroup "track changes"
[ docxTest

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,6 @@
[Para [Str "Cell",Space,Str "compartments"]
,RawBlock (Format "openxml") "<w:tbl>\n<w:tblPr>\n<w:tblW w:w=\"2000\" w:type=\"pct\"/>\n<w:tblBorders>\n<w:top w:val=\"single\" w:sz=\"4\" w:color=\"198200\"/>\n<w:start w:val=\"single\" w:sz=\"4\" w:color=\"198200\"/>\n<w:bottom w:val=\"single\" w:sz=\"4\" w:color=\"198200\"/>\n<w:end w:val=\"single\" w:sz=\"4\" w:color=\"198200\"/>\n</w:tblBorders>\n</w:tblPr>\n<w:tblGrid>\n<w:gridCol w:w=\"1871\" />\n<w:gridCol w:w=\"1872\" />\n</w:tblGrid>\n<w:tr>\n<w:tc>"
,Para [Str "Ribosome"]
,RawBlock (Format "openxml") "</w:tc>\n<w:tc>"
,Para [Str "Lysosome"]
,RawBlock (Format "openxml") "</w:tc>\n</w:tr>\n</w:tbl>"]

View file

@ -0,0 +1,3 @@
[Para [Str "Manual",Space,Str "endnotes."]
,Para [Str "Nullam",Space,Str "eu",Space,Str "ante",Space,Str "vel",Space,Str "est",Space,Str "convallis",Space,Str "dignissim.",Space,Str "Nunc",Space,Str "porta",Space,Str "vulputate",Space,Str "tellus.",Space,Str "Nunc",Space,Str "rutrum",Space,Str "turpis",Space,Str "sed",Space,Str "pede.",Space,Str "Sed",Space,Str "bibendum.",RawInline (Format "openxml") "<w:bookmarkStart w:id=\"0\" w:name=\"Aliquam\"/>",Str "Aliquam",Space,Str "posuere."]
,Para [Str "Nunc",Space,Str "aliquet,",Space,Str "augue",Space,Str "nec",Space,Str "adipiscing",Space,Str "interdum,",Space,Str "lacus",Space,Str "tellus",Space,Str "malesuada",Space,Str "massa,",Space,Str "quis",Space,Str "varius",Space,Str "mi",Space,Str "purus",Space,Str "non",Space,Str "odio.",RawInline (Format "openxml") "<w:bookmarkEnd w:id=\"0\"/>",Str "Pellentesque",Space,Str "condimentum,",Space,Str "magna",Space,Str "ut",Space,Str "suscipit",Space,Str "hendrerit,",Space,Str "ipsum",Space,Str "augue",Space,Str "ornare",Space,Str "nulla,",Space,Str "non",Space,Str "luctus",Space,Str "diam",Space,Str "neque",Space,Str "sit",Space,Str "amet",Space,Str "urna.",Space,Str "Curabitur",Space,Str "vulputate",Space,Str "vestibulum",Space,Str "lorem."]]