DOCX reader: Allow empty dates in comments and tracked changes (#6726)

For security reasons, some legal firms delete the date from comments and
tracked changes.

* Make date optional (Maybe) in tracked changes and comments datatypes
* Add tests
This commit is contained in:
Diego Balseiro 2020-10-06 23:03:00 -05:00 committed by GitHub
parent a27a0b5419
commit eda5540719
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 42 additions and 23 deletions

View file

@ -365,7 +365,7 @@ parPartToInlines' (ChangedRuns (TrackedChange Insertion (ChangeInfo _ author dat
RejectChanges -> return mempty RejectChanges -> return mempty
AllChanges -> do AllChanges -> do
ils <- smushInlines <$> mapM runToInlines runs ils <- smushInlines <$> mapM runToInlines runs
let attr = ("", ["insertion"], [("author", author), ("date", date)]) let attr = ("", ["insertion"], addAuthorAndDate author date)
return $ spanWith attr ils return $ spanWith attr ils
parPartToInlines' (ChangedRuns (TrackedChange Deletion (ChangeInfo _ author date)) runs) = do parPartToInlines' (ChangedRuns (TrackedChange Deletion (ChangeInfo _ author date)) runs) = do
opts <- asks docxOptions opts <- asks docxOptions
@ -374,7 +374,7 @@ parPartToInlines' (ChangedRuns (TrackedChange Deletion (ChangeInfo _ author date
RejectChanges -> smushInlines <$> mapM runToInlines runs RejectChanges -> smushInlines <$> mapM runToInlines runs
AllChanges -> do AllChanges -> do
ils <- smushInlines <$> mapM runToInlines runs ils <- smushInlines <$> mapM runToInlines runs
let attr = ("", ["deletion"], [("author", author), ("date", date)]) let attr = ("", ["deletion"], addAuthorAndDate author date)
return $ spanWith attr ils return $ spanWith attr ils
parPartToInlines' (CommentStart cmtId author date bodyParts) = do parPartToInlines' (CommentStart cmtId author date bodyParts) = do
opts <- asks docxOptions opts <- asks docxOptions
@ -382,7 +382,7 @@ parPartToInlines' (CommentStart cmtId author date bodyParts) = do
AllChanges -> do AllChanges -> do
blks <- smushBlocks <$> mapM bodyPartToBlocks bodyParts blks <- smushBlocks <$> mapM bodyPartToBlocks bodyParts
ils <- blocksToInlinesWarn cmtId blks ils <- blocksToInlinesWarn cmtId blks
let attr = ("", ["comment-start"], [("id", cmtId), ("author", author), ("date", date)]) let attr = ("", ["comment-start"], ("id", cmtId) : addAuthorAndDate author date)
return $ spanWith attr ils return $ spanWith attr ils
_ -> return mempty _ -> return mempty
parPartToInlines' (CommentEnd cmtId) = do parPartToInlines' (CommentEnd cmtId) = do
@ -593,7 +593,7 @@ bodyPartToBlocks (Paragraph pPr parparts)
return mempty return mempty
(Just (TrackedChange Insertion (ChangeInfo _ cAuthor cDate)) (Just (TrackedChange Insertion (ChangeInfo _ cAuthor cDate))
, AllChanges) -> do , AllChanges) -> do
let attr = ("", ["paragraph-insertion"], [("author", cAuthor), ("date", cDate)]) let attr = ("", ["paragraph-insertion"], addAuthorAndDate cAuthor cDate)
insertMark = spanWith attr mempty insertMark = spanWith attr mempty
transform <- parStyleToTransform pPr' transform <- parStyleToTransform pPr'
return $ transform $ return $ transform $
@ -605,7 +605,7 @@ bodyPartToBlocks (Paragraph pPr parparts)
handleInsertion handleInsertion
(Just (TrackedChange Deletion (ChangeInfo _ cAuthor cDate)) (Just (TrackedChange Deletion (ChangeInfo _ cAuthor cDate))
, AllChanges) -> do , AllChanges) -> do
let attr = ("", ["paragraph-deletion"], [("author", cAuthor), ("date", cDate)]) let attr = ("", ["paragraph-deletion"], addAuthorAndDate cAuthor cDate)
insertMark = spanWith attr mempty insertMark = spanWith attr mempty
transform <- parStyleToTransform pPr' transform <- parStyleToTransform pPr'
return $ transform $ return $ transform $
@ -732,3 +732,8 @@ docxToOutput :: PandocMonad m
docxToOutput opts (Docx (Document _ body)) = docxToOutput opts (Docx (Document _ body)) =
let dEnv = def { docxOptions = opts} in let dEnv = def { docxOptions = opts} in
evalDocxContext (bodyToOutput body) dEnv def evalDocxContext (bodyToOutput body) dEnv def
addAuthorAndDate :: T.Text -> Maybe T.Text -> [(T.Text, T.Text)]
addAuthorAndDate author mdate =
("author", author) : maybe [] (\date -> [("date", date)]) mdate

View file

@ -213,7 +213,7 @@ data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer
data ChangeType = Insertion | Deletion data ChangeType = Insertion | Deletion
deriving Show deriving Show
data ChangeInfo = ChangeInfo ChangeId Author ChangeDate data ChangeInfo = ChangeInfo ChangeId Author (Maybe ChangeDate)
deriving Show deriving Show
data TrackedChange = TrackedChange ChangeType ChangeInfo data TrackedChange = TrackedChange ChangeType ChangeInfo
@ -276,7 +276,7 @@ type Extent = Maybe (Double, Double)
data ParPart = PlainRun Run data ParPart = PlainRun Run
| ChangedRuns TrackedChange [Run] | ChangedRuns TrackedChange [Run]
| CommentStart CommentId Author CommentDate [BodyPart] | CommentStart CommentId Author (Maybe CommentDate) [BodyPart]
| CommentEnd CommentId | CommentEnd CommentId
| BookMark BookMarkId Anchor | BookMark BookMarkId Anchor
| InternalHyperLink Anchor [Run] | InternalHyperLink Anchor [Run]
@ -852,7 +852,7 @@ elemToCommentStart ns element
| isElem ns "w" "comment" element | isElem ns "w" "comment" element
, Just cmtId <- findAttrTextByName ns "w" "id" element , Just cmtId <- findAttrTextByName ns "w" "id" element
, Just cmtAuthor <- findAttrTextByName ns "w" "author" element , Just cmtAuthor <- findAttrTextByName ns "w" "author" element
, Just cmtDate <- findAttrTextByName ns "w" "date" element = do , cmtDate <- findAttrTextByName ns "w" "date" element = do
bps <- mapD (elemToBodyPart ns) (elChildren element) bps <- mapD (elemToBodyPart ns) (elChildren element)
return $ CommentStart cmtId cmtAuthor cmtDate bps return $ CommentStart cmtId cmtAuthor cmtDate bps
elemToCommentStart _ _ = throwError WrongElem elemToCommentStart _ _ = throwError WrongElem
@ -958,14 +958,14 @@ getTrackedChange ns element
| isElem ns "w" "ins" element || isElem ns "w" "moveTo" element | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element
, Just cId <- findAttrTextByName ns "w" "id" element , Just cId <- findAttrTextByName ns "w" "id" element
, Just cAuthor <- findAttrTextByName ns "w" "author" element , Just cAuthor <- findAttrTextByName ns "w" "author" element
, Just cDate <- findAttrTextByName ns "w" "date" element = , mcDate <- findAttrTextByName ns "w" "date" element =
Just $ TrackedChange Insertion (ChangeInfo cId cAuthor cDate) Just $ TrackedChange Insertion (ChangeInfo cId cAuthor mcDate)
getTrackedChange ns element getTrackedChange ns element
| isElem ns "w" "del" element || isElem ns "w" "moveFrom" element | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element
, Just cId <- findAttrTextByName ns "w" "id" element , Just cId <- findAttrTextByName ns "w" "id" element
, Just cAuthor <- findAttrTextByName ns "w" "author" element , Just cAuthor <- findAttrTextByName ns "w" "author" element
, Just cDate <- findAttrTextByName ns "w" "date" element = , mcDate <- findAttrTextByName ns "w" "date" element =
Just $ TrackedChange Deletion (ChangeInfo cId cAuthor cDate) Just $ TrackedChange Deletion (ChangeInfo cId cAuthor mcDate)
getTrackedChange _ _ = Nothing getTrackedChange _ _ = Nothing
elemToParagraphStyle :: NameSpaces -> Element -> ParStyleMap -> ParagraphStyle elemToParagraphStyle :: NameSpaces -> Element -> ParStyleMap -> ParagraphStyle

View file

@ -1244,33 +1244,29 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do
else id) else id)
getChangeAuthorDate = do getChangeAuthorDate = do
defaultAuthor <- asks envChangesAuthor defaultAuthor <- asks envChangesAuthor
defaultDate <- asks envChangesDate
let author = fromMaybe defaultAuthor (lookup "author" kvs) let author = fromMaybe defaultAuthor (lookup "author" kvs)
date = fromMaybe defaultDate (lookup "date" kvs) let mdate = lookup "date" kvs
return (author, date) return $ ("w:author", T.unpack author) :
maybe [] (\date -> [("w:date", T.unpack date)]) mdate
insmod <- if "insertion" `elem` classes insmod <- if "insertion" `elem` classes
then do then do
(author, date) <- getChangeAuthorDate changeAuthorDate <- getChangeAuthorDate
insId <- gets stInsId insId <- gets stInsId
modify $ \s -> s{stInsId = insId + 1} modify $ \s -> s{stInsId = insId + 1}
return $ \f -> do return $ \f -> do
x <- f x <- f
return [ mknode "w:ins" return [ mknode "w:ins"
[("w:id", show insId), (("w:id", show insId) : changeAuthorDate) x]
("w:author", T.unpack author),
("w:date", T.unpack date)] x ]
else return id else return id
delmod <- if "deletion" `elem` classes delmod <- if "deletion" `elem` classes
then do then do
(author, date) <- getChangeAuthorDate changeAuthorDate <- getChangeAuthorDate
delId <- gets stDelId delId <- gets stDelId
modify $ \s -> s{stDelId = delId + 1} modify $ \s -> s{stDelId = delId + 1}
return $ \f -> local (\env->env{envInDel=True}) $ do return $ \f -> local (\env->env{envInDel=True}) $ do
x <- f x <- f
return [mknode "w:del" return [mknode "w:del"
[("w:id", show delId), (("w:id", show delId) : changeAuthorDate) x]
("w:author", T.unpack author),
("w:date", T.unpack date)] x]
else return id else return id
contents <- insmod $ delmod $ dirmod $ stylemod $ pmod contents <- insmod $ delmod $ dirmod $ stylemod $ pmod
$ inlinesToOpenXML opts ils $ inlinesToOpenXML opts ils

View file

@ -405,6 +405,10 @@ tests = [ testGroup "document"
"paragraph insertion/deletion (all)" "paragraph insertion/deletion (all)"
"docx/paragraph_insertion_deletion.docx" "docx/paragraph_insertion_deletion.docx"
"docx/paragraph_insertion_deletion_all.native" "docx/paragraph_insertion_deletion_all.native"
, testCompareWithOpts def{readerTrackChanges=AllChanges}
"paragraph insertion/deletion (all)"
"docx/track_changes_scrubbed_metadata.docx"
"docx/track_changes_scrubbed_metadata.native"
, testForWarningsWithOpts def{readerTrackChanges=AcceptChanges} , testForWarningsWithOpts def{readerTrackChanges=AcceptChanges}
"comment warnings (accept -- no warnings)" "comment warnings (accept -- no warnings)"
"docx/comments_warning.docx" "docx/comments_warning.docx"

View file

@ -150,6 +150,11 @@ tests = [ testGroup "inlines"
def def
"docx/comments.native" "docx/comments.native"
"docx/golden/comments.docx" "docx/golden/comments.docx"
, docxTest
"scrubbed metadata"
def
"docx/track_changes_scrubbed_metadata.native"
"docx/golden/track_changes_scrubbed_metadata.docx"
] ]
, testGroup "custom styles" , testGroup "custom styles"
[ docxTest "custom styles without reference.docx" [ docxTest "custom styles without reference.docx"

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,9 @@
[Para [ Str "Here", Space, Str "is", Space, Str "a", Space
, Span ("",["deletion"],[("author","Author")]) [Str "dummy"]
, Span ("",["insertion"],[("author","Author")]) [Str "test"]
, Space
, Span ("",["comment-start"],[("id","3"),("author","Author")])
[Str "With",Space,Str "a",Space,Str "comment!"]
, Str "document",Span ("",["comment-end"],[("id","3")]) [],Str "."
]
]