EPUB writer: Improved chapter splitting and internal link rewriting.
Closes #1887. Closes #2163. Closes #2162.
This commit is contained in:
parent
1daa26468c
commit
dbf2a63669
1 changed files with 34 additions and 53 deletions
|
@ -56,10 +56,10 @@ import Text.Pandoc.Options ( WriterOptions(..)
|
|||
, EPUBVersion(..)
|
||||
, ObfuscationMethod(NoObfuscation) )
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Walk (walk, walkM)
|
||||
import Text.Pandoc.Walk (walk, walkM, query)
|
||||
import Data.Default
|
||||
import Text.Pandoc.Writers.Markdown (writePlain)
|
||||
import Control.Monad.State (modify, get, execState, State, put, evalState)
|
||||
import Control.Monad.State (modify, get, State, put, evalState)
|
||||
import Control.Monad (mplus, liftM, when)
|
||||
import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs
|
||||
, strContent, lookupAttr, Node(..), QName(..), parseXML
|
||||
|
@ -408,11 +408,6 @@ writeEPUB opts doc@(Pandoc meta _) = do
|
|||
(docTitle' meta) : blocks
|
||||
|
||||
let chapterHeaderLevel = writerEpubChapterLevel opts
|
||||
-- internal reference IDs change when we chunk the file,
|
||||
-- so that '#my-header-1' might turn into 'chap004.xhtml#my-header'.
|
||||
-- the next two lines fix that:
|
||||
let reftable = correlateRefs chapterHeaderLevel blocks'
|
||||
let blocks'' = replaceRefs reftable blocks'
|
||||
|
||||
let isChapterHeader (Header n _ _) = n <= chapterHeaderLevel
|
||||
isChapterHeader (Div ("",["references"],[]) (Header n _ _:_)) =
|
||||
|
@ -443,7 +438,37 @@ writeEPUB opts doc@(Pandoc meta _) = do
|
|||
let (xs,ys) = break isChapterHeader bs
|
||||
(Chapter Nothing (b:xs) :) `fmap` toChapters ys
|
||||
|
||||
let chapters = evalState (toChapters blocks'') []
|
||||
let chapters' = evalState (toChapters blocks') []
|
||||
|
||||
let extractLinkURL' :: Int -> Inline -> [(String, String)]
|
||||
extractLinkURL' num (Span (ident, _, _) _)
|
||||
| not (null ident) = [(ident, showChapter num ++ ('#':ident))]
|
||||
extractLinkURL' _ _ = []
|
||||
|
||||
let extractLinkURL :: Int -> Block -> [(String, String)]
|
||||
extractLinkURL num (Div (ident, _, _) _)
|
||||
| not (null ident) = [(ident, showChapter num ++ ('#':ident))]
|
||||
extractLinkURL num (Header _ (ident, _, _) _)
|
||||
| not (null ident) = [(ident, showChapter num ++ ('#':ident))]
|
||||
extractLinkURL num b = query (extractLinkURL' num) b
|
||||
|
||||
let reftable = concat $ zipWith (\(Chapter _ bs) num ->
|
||||
query (extractLinkURL num) bs)
|
||||
chapters' [1..]
|
||||
|
||||
let fixInternalReferences :: Inline -> Inline
|
||||
fixInternalReferences (Link lab ('#':xs, tit)) =
|
||||
case lookup xs reftable of
|
||||
Just ys -> Link lab (ys, tit)
|
||||
Nothing -> Link lab ('#':xs, tit)
|
||||
fixInternalReferences x = x
|
||||
|
||||
-- internal reference IDs change when we chunk the file,
|
||||
-- so that '#my-header-1' might turn into 'chap004.xhtml#my-header'.
|
||||
-- this fixes that:
|
||||
let chapters = map (\(Chapter mbnum bs) ->
|
||||
Chapter mbnum $ walk fixInternalReferences bs)
|
||||
chapters'
|
||||
|
||||
let chapToEntry :: Int -> Chapter -> Entry
|
||||
chapToEntry num (Chapter mbnum bs) = mkEntry (showChapter num)
|
||||
|
@ -549,7 +574,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
|
|||
let contentsEntry = mkEntry "content.opf" contentsData
|
||||
|
||||
-- toc.ncx
|
||||
let secs = hierarchicalize blocks''
|
||||
let secs = hierarchicalize blocks'
|
||||
|
||||
let tocLevel = writerTOCDepth opts
|
||||
|
||||
|
@ -889,11 +914,6 @@ mediaTypeOf x =
|
|||
Just y | any (`isPrefixOf` y) mediaPrefixes -> Just y
|
||||
_ -> Nothing
|
||||
|
||||
data IdentState = IdentState{
|
||||
chapterNumber :: Int,
|
||||
identTable :: [(String,String)]
|
||||
} deriving (Read, Show)
|
||||
|
||||
-- Returns filename for chapter number.
|
||||
showChapter :: Int -> String
|
||||
showChapter = printf "ch%03d.xhtml"
|
||||
|
@ -910,45 +930,6 @@ addIdentifiers bs = evalState (mapM go bs) []
|
|||
return $ Header n (ident',classes,kvs) ils
|
||||
go x = return x
|
||||
|
||||
-- Go through a block list and construct a table
|
||||
-- correlating the automatically constructed references
|
||||
-- that would be used in a normal pandoc document with
|
||||
-- new URLs to be used in the EPUB. For example, what
|
||||
-- was "header-1" might turn into "ch006.xhtml#header".
|
||||
correlateRefs :: Int -> [Block] -> [(String,String)]
|
||||
correlateRefs chapterHeaderLevel bs =
|
||||
identTable $ execState (walkM goBlock bs >>= walkM goInline)
|
||||
IdentState{ chapterNumber = 0
|
||||
, identTable = [] }
|
||||
where goBlock :: Block -> State IdentState Block
|
||||
goBlock x@(Header n (ident,_,_) _) = x <$ addIdentifier (Just n) ident
|
||||
goBlock x@(Div (ident,_,_) _) = x <$ addIdentifier Nothing ident
|
||||
goBlock x = return x
|
||||
goInline :: Inline -> State IdentState Inline
|
||||
goInline x@(Span (ident,_,_) _) = x <$ addIdentifier Nothing ident
|
||||
goInline x = return x
|
||||
addIdentifier mbHeaderLevel ident = do
|
||||
case mbHeaderLevel of
|
||||
Just n | n <= chapterHeaderLevel ->
|
||||
modify $ \s -> s{ chapterNumber = chapterNumber s + 1 }
|
||||
_ -> return ()
|
||||
st <- get
|
||||
let chapterid = showChapter (chapterNumber st) ++
|
||||
case mbHeaderLevel of
|
||||
Just n | n <= chapterHeaderLevel -> ""
|
||||
_ -> '#' : ident
|
||||
modify $ \s -> s{ identTable = (ident, chapterid) : identTable st }
|
||||
|
||||
-- Replace internal link references using the table produced
|
||||
-- by correlateRefs.
|
||||
replaceRefs :: [(String,String)] -> [Block] -> [Block]
|
||||
replaceRefs refTable = walk replaceOneRef
|
||||
where replaceOneRef x@(Link lab ('#':xs,tit)) =
|
||||
case lookup xs refTable of
|
||||
Just url -> Link lab (url,tit)
|
||||
Nothing -> x
|
||||
replaceOneRef x = x
|
||||
|
||||
-- Variant of normalizeDate that allows partial dates: YYYY, YYYY-MM
|
||||
normalizeDate' :: String -> Maybe String
|
||||
normalizeDate' xs =
|
||||
|
|
Loading…
Reference in a new issue