From 4b2d07a642c9ab570a8da64deca32a4139b2268b Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 16 Jul 2014 03:44:51 +0100 Subject: [PATCH] Docx Reader: Fix hdr auto-id when already auto-id. If header anchors (bookmarks in a header paragraph) already have an auto-id, which will happen if they're generated by pandoc, we don't want to rename it twice, and thus end up with an unnecessary number at the end. So we add a state value to check if we're in a header. If we are, we don't rename the bookmark -- wait until we rename it in our header handling. --- src/Text/Pandoc/Readers/Docx.hs | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index a5237f2e1..196a3cec5 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -105,6 +105,7 @@ readDocx opts bytes = Left _ -> error $ "couldn't parse docx file" data DState = DState { docxAnchorMap :: M.Map String String + , docxInHeaderBlock :: Bool , docxInTexSubscript :: Bool } data DEnv = DEnv { docxOptions :: ReaderOptions @@ -292,17 +293,22 @@ parPartToInlines (BookMark _ anchor) = -- We record these, so we can make sure not to overwrite -- user-defined anchor links with header auto ids. do + -- get whether we're in a header. + inHdrBool <- gets docxInHeaderBlock -- Get the anchor map. anchorMap <- gets docxAnchorMap - -- Check to see if the id is already in there. Rewrite if - -- necessary. This will have the possible effect of rewriting - -- user-defined anchor links. However, since these are not defined - -- in pandoc, it seems like a necessary evil to avoid an extra - -- pass. - let newAnchor = case anchor `elem` (M.elems anchorMap) of - True -> uniqueIdent [Str anchor] (M.elems anchorMap) - False -> anchor - return [Span (anchor, ["anchor"], []) []] + -- We don't want to rewrite if we're in a header, since we'll take + -- care of that later, when we make the header anchor. If the + -- bookmark were already in uniqueIdent form, this would lead to a + -- duplication. Otherwise, we check to see if the id is already in + -- there. Rewrite if necessary. This will have the possible effect + -- of rewriting user-defined anchor links. However, since these + -- are not defined in pandoc, it seems like a necessary evil to + -- avoid an extra pass. + let newAnchor = + if not inHdrBool && anchor `elem` (M.elems anchorMap) + then uniqueIdent [Str anchor] (M.elems anchorMap) + else anchor modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap} return [Span (newAnchor, ["anchor"], []) []] parPartToInlines (Drawing fp bs) = do @@ -493,7 +499,7 @@ makeHeaderAnchor (Header n (_, classes, kvs) ils) do hdrIDMap <- gets docxAnchorMap let newIdent = uniqueIdent ils (M.elems hdrIDMap) - updateDState $ \s -> s {docxAnchorMap = M.insert ident newIdent hdrIDMap} + modify $ \s -> s {docxAnchorMap = M.insert ident newIdent hdrIDMap} return $ Header n (newIdent, classes, kvs) (ils \\ (x:xs)) -- Otherwise we just give it a name, and register that name (associate -- it with itself.) @@ -537,7 +543,8 @@ bodyPartToBlocks (Paragraph pPr parparts) [CodeBlock ("", [], []) (concatMap parPartToString parparts)] bodyPartToBlocks (Paragraph pPr parparts) | any isHeaderContainer (parStyleToContainers pPr) = do - ils <- parPartsToInlines parparts >>= (return . normalizeSpaces) + ils <-withDState (\s -> s{docxInHeaderBlock = True}) $ + parPartsToInlines parparts >>= (return . normalizeSpaces) let (Container hdrFun) = head $ filter isHeaderContainer (parStyleToContainers pPr) Header n attr _ = hdrFun [] hdr <- makeHeaderAnchor $ Header n attr ils @@ -620,6 +627,7 @@ bodyToBlocks (Body bps) = do docxToBlocks :: ReaderOptions -> Docx -> [Block] docxToBlocks opts d@(Docx (Document _ body)) = let dState = DState { docxAnchorMap = M.empty + , docxInHeaderBlock = False , docxInTexSubscript = False} dEnv = DEnv { docxOptions = opts , docxDocument = d}