Docx reader: add simple comment functionality.

This adds simple track-changes comment parsing to the docx reader. It is
turned on with `--track-changes=all`. All comments are converted to
inlines, which can list some information. In the future a warning will
be added for comments with formatting that seems like it will be
excessively denatured.

Note that comments can extend across blocks. For that reason there are
two spans: `comment-start` and `comment-end`. `comment-start` will
contain the comment. `comment-end` will always be empty. The two will be
associated by a numeric id.
This commit is contained in:
Jesse Rosenthal 2016-06-22 13:49:19 -04:00
parent cbc2c15f0f
commit 8bb739f7ff
2 changed files with 73 additions and 1 deletions

View file

@ -339,6 +339,22 @@ parPartToInlines (Deletion _ author date runs) = do
ils <- smushInlines <$> mapM runToInlines runs
let attr = ("", ["deletion"], [("author", author), ("date", date)])
return $ spanWith attr ils
parPartToInlines (CommentStart cmtId author date bodyParts) = do
opts <- asks docxOptions
case readerTrackChanges opts of
AllChanges -> do
blks <- smushBlocks <$> mapM bodyPartToBlocks bodyParts
let ils = fromList $ blocksToInlines $ toList blks
attr = ("", ["comment-start"], [("id", cmtId), ("author", author), ("date", date)])
return $ spanWith attr ils
_ -> return mempty
parPartToInlines (CommentEnd cmtId) = do
opts <- asks docxOptions
case readerTrackChanges opts of
AllChanges -> do
let attr = ("", ["comment-end"], [("id", cmtId)])
return $ spanWith attr mempty
_ -> return mempty
parPartToInlines (BookMark _ anchor) | anchor `elem` dummyAnchors =
return mempty
parPartToInlines (BookMark _ anchor) =

View file

@ -73,6 +73,7 @@ import Text.Pandoc.Readers.Docx.Util
import Data.Char (readLitChar, ord, chr, isDigit)
data ReaderEnv = ReaderEnv { envNotes :: Notes
, envComments :: Comments
, envNumbering :: Numbering
, envRelationships :: [Relationship]
, envMedia :: Media
@ -160,6 +161,9 @@ data Notes = Notes NameSpaces
(Maybe (M.Map String Element))
deriving Show
data Comments = Comments NameSpaces (M.Map String Element)
deriving Show
data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer
, rightParIndent :: Maybe Integer
, hangingParIndent :: Maybe Integer}
@ -210,6 +214,8 @@ type Extent = Maybe (Double, Double)
data ParPart = PlainRun Run
| Insertion ChangeId Author ChangeDate [Run]
| Deletion ChangeId Author ChangeDate [Run]
| CommentStart CommentId Author CommentDate [BodyPart]
| CommentEnd CommentId
| BookMark BookMarkId Anchor
| InternalHyperLink Anchor [Run]
| ExternalHyperLink URL [Run]
@ -259,8 +265,10 @@ type URL = String
type BookMarkId = String
type RelId = String
type ChangeId = String
type CommentId = String
type Author = String
type ChangeDate = String
type CommentDate = String
archiveToDocx :: Archive -> Either DocxError Docx
archiveToDocx archive = fst <$> archiveToDocxWithWarnings archive
@ -268,12 +276,13 @@ archiveToDocx archive = fst <$> archiveToDocxWithWarnings archive
archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [String])
archiveToDocxWithWarnings archive = do
let notes = archiveToNotes archive
comments = archiveToComments archive
numbering = archiveToNumbering archive
rels = archiveToRelationships archive
media = archiveToMedia archive
(styles, parstyles) = archiveToStyles archive
rEnv =
ReaderEnv notes numbering rels media Nothing styles parstyles InDocument
ReaderEnv notes comments numbering rels media Nothing styles parstyles InDocument
rState = ReaderState { stateWarnings = [] }
(eitherDoc, st) = runD (archiveToDocument archive) rEnv rState
case eitherDoc of
@ -384,6 +393,20 @@ archiveToNotes zf =
in
Notes ns fn en
archiveToComments :: Archive -> Comments
archiveToComments zf =
let cmtsElem = findEntryByPath "word/comments.xml" zf
>>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
cmts_namespaces = case cmtsElem of
Just e -> elemToNameSpaces e
Nothing -> []
cmts = (elemToComments cmts_namespaces) <$> cmtsElem
in
case cmts of
Just c -> Comments cmts_namespaces c
Nothing -> Comments cmts_namespaces M.empty
filePathToRelType :: FilePath -> Maybe DocumentLocation
filePathToRelType "word/_rels/document.xml.rels" = Just InDocument
filePathToRelType "word/_rels/footnotes.xml.rels" = Just InFootnote
@ -504,6 +527,18 @@ elemToNotes ns notetype element
Just $ M.fromList $ pairs
elemToNotes _ _ _ = Nothing
elemToComments :: NameSpaces -> Element -> M.Map String Element
elemToComments ns element
| isElem ns "w" "comments" element =
let pairs = mapMaybe
(\e -> findAttr (elemName ns "w" "id") e >>=
(\a -> Just (a, e)))
(findChildren (elemName ns "w" "comment") element)
in
M.fromList $ pairs
elemToComments _ _ = M.empty
---------------------------------------------
---------------------------------------------
@ -696,11 +731,32 @@ elemToParPart ns element
, Just anchor <- findAttr (elemName ns "w" "anchor") element = do
runs <- mapD (elemToRun ns) (elChildren element)
return $ InternalHyperLink anchor runs
elemToParPart ns element
| isElem ns "w" "commentRangeStart" element
, Just cmtId <- findAttr (elemName ns "w" "id") element = do
(Comments _ commentMap) <- asks envComments
case M.lookup cmtId commentMap of
Just cmtElem -> elemToCommentStart ns cmtElem
Nothing -> throwError WrongElem
elemToParPart ns element
| isElem ns "w" "commentRangeEnd" element
, Just cmtId <- findAttr (elemName ns "w" "id") element =
return $ CommentEnd cmtId
elemToParPart ns element
| isElem ns "m" "oMath" element =
(eitherToD $ readOMML $ showElement element) >>= (return . PlainOMath)
elemToParPart _ _ = throwError WrongElem
elemToCommentStart :: NameSpaces -> Element -> D ParPart
elemToCommentStart ns element
| isElem ns "w" "comment" element
, Just cmtId <- findAttr (elemName ns "w" "id") element
, Just cmtAuthor <- findAttr (elemName ns "w" "author") element
, Just cmtDate <- findAttr (elemName ns "w" "date") element = do
bps <- mapD (elemToBodyPart ns) (elChildren element)
return $ CommentStart cmtId cmtAuthor cmtDate bps
elemToCommentStart _ _ = throwError WrongElem
lookupFootnote :: String -> Notes -> Maybe Element
lookupFootnote s (Notes _ fns _) = fns >>= (M.lookup s)