Docx reader: add simple comment functionality.
This adds simple track-changes comment parsing to the docx reader. It is turned on with `--track-changes=all`. All comments are converted to inlines, which can list some information. In the future a warning will be added for comments with formatting that seems like it will be excessively denatured. Note that comments can extend across blocks. For that reason there are two spans: `comment-start` and `comment-end`. `comment-start` will contain the comment. `comment-end` will always be empty. The two will be associated by a numeric id.
This commit is contained in:
parent
cbc2c15f0f
commit
8bb739f7ff
2 changed files with 73 additions and 1 deletions
|
@ -339,6 +339,22 @@ parPartToInlines (Deletion _ author date runs) = do
|
|||
ils <- smushInlines <$> mapM runToInlines runs
|
||||
let attr = ("", ["deletion"], [("author", author), ("date", date)])
|
||||
return $ spanWith attr ils
|
||||
parPartToInlines (CommentStart cmtId author date bodyParts) = do
|
||||
opts <- asks docxOptions
|
||||
case readerTrackChanges opts of
|
||||
AllChanges -> do
|
||||
blks <- smushBlocks <$> mapM bodyPartToBlocks bodyParts
|
||||
let ils = fromList $ blocksToInlines $ toList blks
|
||||
attr = ("", ["comment-start"], [("id", cmtId), ("author", author), ("date", date)])
|
||||
return $ spanWith attr ils
|
||||
_ -> return mempty
|
||||
parPartToInlines (CommentEnd cmtId) = do
|
||||
opts <- asks docxOptions
|
||||
case readerTrackChanges opts of
|
||||
AllChanges -> do
|
||||
let attr = ("", ["comment-end"], [("id", cmtId)])
|
||||
return $ spanWith attr mempty
|
||||
_ -> return mempty
|
||||
parPartToInlines (BookMark _ anchor) | anchor `elem` dummyAnchors =
|
||||
return mempty
|
||||
parPartToInlines (BookMark _ anchor) =
|
||||
|
|
|
@ -73,6 +73,7 @@ import Text.Pandoc.Readers.Docx.Util
|
|||
import Data.Char (readLitChar, ord, chr, isDigit)
|
||||
|
||||
data ReaderEnv = ReaderEnv { envNotes :: Notes
|
||||
, envComments :: Comments
|
||||
, envNumbering :: Numbering
|
||||
, envRelationships :: [Relationship]
|
||||
, envMedia :: Media
|
||||
|
@ -160,6 +161,9 @@ data Notes = Notes NameSpaces
|
|||
(Maybe (M.Map String Element))
|
||||
deriving Show
|
||||
|
||||
data Comments = Comments NameSpaces (M.Map String Element)
|
||||
deriving Show
|
||||
|
||||
data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer
|
||||
, rightParIndent :: Maybe Integer
|
||||
, hangingParIndent :: Maybe Integer}
|
||||
|
@ -210,6 +214,8 @@ type Extent = Maybe (Double, Double)
|
|||
data ParPart = PlainRun Run
|
||||
| Insertion ChangeId Author ChangeDate [Run]
|
||||
| Deletion ChangeId Author ChangeDate [Run]
|
||||
| CommentStart CommentId Author CommentDate [BodyPart]
|
||||
| CommentEnd CommentId
|
||||
| BookMark BookMarkId Anchor
|
||||
| InternalHyperLink Anchor [Run]
|
||||
| ExternalHyperLink URL [Run]
|
||||
|
@ -259,8 +265,10 @@ type URL = String
|
|||
type BookMarkId = String
|
||||
type RelId = String
|
||||
type ChangeId = String
|
||||
type CommentId = String
|
||||
type Author = String
|
||||
type ChangeDate = String
|
||||
type CommentDate = String
|
||||
|
||||
archiveToDocx :: Archive -> Either DocxError Docx
|
||||
archiveToDocx archive = fst <$> archiveToDocxWithWarnings archive
|
||||
|
@ -268,12 +276,13 @@ archiveToDocx archive = fst <$> archiveToDocxWithWarnings archive
|
|||
archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [String])
|
||||
archiveToDocxWithWarnings archive = do
|
||||
let notes = archiveToNotes archive
|
||||
comments = archiveToComments archive
|
||||
numbering = archiveToNumbering archive
|
||||
rels = archiveToRelationships archive
|
||||
media = archiveToMedia archive
|
||||
(styles, parstyles) = archiveToStyles archive
|
||||
rEnv =
|
||||
ReaderEnv notes numbering rels media Nothing styles parstyles InDocument
|
||||
ReaderEnv notes comments numbering rels media Nothing styles parstyles InDocument
|
||||
rState = ReaderState { stateWarnings = [] }
|
||||
(eitherDoc, st) = runD (archiveToDocument archive) rEnv rState
|
||||
case eitherDoc of
|
||||
|
@ -384,6 +393,20 @@ archiveToNotes zf =
|
|||
in
|
||||
Notes ns fn en
|
||||
|
||||
archiveToComments :: Archive -> Comments
|
||||
archiveToComments zf =
|
||||
let cmtsElem = findEntryByPath "word/comments.xml" zf
|
||||
>>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
|
||||
cmts_namespaces = case cmtsElem of
|
||||
Just e -> elemToNameSpaces e
|
||||
Nothing -> []
|
||||
cmts = (elemToComments cmts_namespaces) <$> cmtsElem
|
||||
in
|
||||
case cmts of
|
||||
Just c -> Comments cmts_namespaces c
|
||||
Nothing -> Comments cmts_namespaces M.empty
|
||||
|
||||
|
||||
filePathToRelType :: FilePath -> Maybe DocumentLocation
|
||||
filePathToRelType "word/_rels/document.xml.rels" = Just InDocument
|
||||
filePathToRelType "word/_rels/footnotes.xml.rels" = Just InFootnote
|
||||
|
@ -504,6 +527,18 @@ elemToNotes ns notetype element
|
|||
Just $ M.fromList $ pairs
|
||||
elemToNotes _ _ _ = Nothing
|
||||
|
||||
elemToComments :: NameSpaces -> Element -> M.Map String Element
|
||||
elemToComments ns element
|
||||
| isElem ns "w" "comments" element =
|
||||
let pairs = mapMaybe
|
||||
(\e -> findAttr (elemName ns "w" "id") e >>=
|
||||
(\a -> Just (a, e)))
|
||||
(findChildren (elemName ns "w" "comment") element)
|
||||
in
|
||||
M.fromList $ pairs
|
||||
elemToComments _ _ = M.empty
|
||||
|
||||
|
||||
---------------------------------------------
|
||||
---------------------------------------------
|
||||
|
||||
|
@ -696,11 +731,32 @@ elemToParPart ns element
|
|||
, Just anchor <- findAttr (elemName ns "w" "anchor") element = do
|
||||
runs <- mapD (elemToRun ns) (elChildren element)
|
||||
return $ InternalHyperLink anchor runs
|
||||
elemToParPart ns element
|
||||
| isElem ns "w" "commentRangeStart" element
|
||||
, Just cmtId <- findAttr (elemName ns "w" "id") element = do
|
||||
(Comments _ commentMap) <- asks envComments
|
||||
case M.lookup cmtId commentMap of
|
||||
Just cmtElem -> elemToCommentStart ns cmtElem
|
||||
Nothing -> throwError WrongElem
|
||||
elemToParPart ns element
|
||||
| isElem ns "w" "commentRangeEnd" element
|
||||
, Just cmtId <- findAttr (elemName ns "w" "id") element =
|
||||
return $ CommentEnd cmtId
|
||||
elemToParPart ns element
|
||||
| isElem ns "m" "oMath" element =
|
||||
(eitherToD $ readOMML $ showElement element) >>= (return . PlainOMath)
|
||||
elemToParPart _ _ = throwError WrongElem
|
||||
|
||||
elemToCommentStart :: NameSpaces -> Element -> D ParPart
|
||||
elemToCommentStart ns element
|
||||
| isElem ns "w" "comment" element
|
||||
, Just cmtId <- findAttr (elemName ns "w" "id") element
|
||||
, Just cmtAuthor <- findAttr (elemName ns "w" "author") element
|
||||
, Just cmtDate <- findAttr (elemName ns "w" "date") element = do
|
||||
bps <- mapD (elemToBodyPart ns) (elChildren element)
|
||||
return $ CommentStart cmtId cmtAuthor cmtDate bps
|
||||
elemToCommentStart _ _ = throwError WrongElem
|
||||
|
||||
lookupFootnote :: String -> Notes -> Maybe Element
|
||||
lookupFootnote s (Notes _ fns _) = fns >>= (M.lookup s)
|
||||
|
||||
|
|
Loading…
Reference in a new issue