Docx writer: Fix bookmarks to headers with long titles.

Word has a 40 character limit for bookmark names.  In
addition, bookmarks must begin with a letter.  Since
pandoc's auto-generated identifiers may not respect
these constraints, some internal links did not work.

With this change, pandoc uses a bookmark name based
on the SHA1 hash of the identifier when the identifier
isn't a legal bookmark name.

Closes #5091.
This commit is contained in:
John MacFarlane 2018-11-20 23:43:21 -05:00
parent 2d265917b0
commit d333c283cc
3 changed files with 19 additions and 5 deletions

View file

@ -2,7 +2,7 @@ version?=$(shell grep '^[Vv]ersion:' pandoc.cabal | awk '{print $$2;}')
pandoc=$(shell find dist -name pandoc -type f -exec ls -t {} \; | head -1)
SOURCEFILES?=$(shell find pandoc.hs src test -name '*.hs')
BRANCH?=master
RESOLVER=lts-12
RESOLVER?=lts-12
GHCOPTS=-fdiagnostics-color=always -Wall -fno-warn-unused-do-bind -Wincomplete-record-updates -Wnoncanonical-monad-instances -Wnoncanonical-monadfail-instances -Wincomplete-uni-patterns -Werror=missing-home-modules -Widentities -Wcpp-undef -fhide-source-paths -j +RTS -A32M -RTS
# Later:
# -Wpartial-fields (currently used in Powerpoint writer)

View file

@ -41,19 +41,21 @@ import Control.Monad.Reader
import Control.Monad.State.Strict
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import Data.Char (isSpace, ord, toLower)
import Data.Char (isSpace, ord, toLower, isLetter)
import Data.List (intercalate, isPrefixOf, isSuffixOf)
import qualified Data.Map as M
import Data.Maybe (fromMaybe, isNothing, mapMaybe, maybeToList)
import qualified Data.Set as Set
import qualified Data.Text as T
import Data.Time.Clock.POSIX
import Data.Digest.Pure.SHA (sha1, showDigest)
import Skylighting
import System.Random (randomR, StdGen, mkStdGen)
import Text.Pandoc.BCP47 (getLang, renderLang)
import Text.Pandoc.Class (PandocMonad, report, toLang)
import qualified Text.Pandoc.Class as P
import Data.Time
import Text.Pandoc.UTF8 (fromStringLazy)
import Text.Pandoc.Definition
import Text.Pandoc.Generic
import Text.Pandoc.Highlighting (highlight)
@ -1268,7 +1270,8 @@ inlineToOpenXML' opts (Note bs) = do
-- internal link:
inlineToOpenXML' opts (Link _ txt ('#':xs,_)) = do
contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt
return [ mknode "w:hyperlink" [("w:anchor",xs)] contents ]
return
[ mknode "w:hyperlink" [("w:anchor", toBookmarkName xs)] contents ]
-- external link:
inlineToOpenXML' opts (Link _ txt (src,_)) = do
contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt
@ -1427,7 +1430,18 @@ wrapBookmark :: (PandocMonad m) => String -> [Element] -> WS m [Element]
wrapBookmark [] contents = return contents
wrapBookmark ident contents = do
id' <- getUniqueId
let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id')
,("w:name",ident)] ()
let bookmarkStart = mknode "w:bookmarkStart"
[("w:id", id')
,("w:name", toBookmarkName ident)] ()
bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] ()
return $ bookmarkStart : contents ++ [bookmarkEnd]
-- Word imposes a 40 character limit on bookmark names and requires
-- that they begin with a letter. So we just use a hash of the
-- identifer when otherwise we'd have an illegal bookmark name.
toBookmarkName :: String -> String
toBookmarkName s =
case s of
(c:_) | isLetter c
, length s <= 40 -> s
_ -> 'X' : drop 1 (showDigest (sha1 (fromStringLazy s)))