EPUB writer: Improved chapter splitting and internal link rewriting.
Closes #1887. Closes #2163. Closes #2162.
This commit is contained in:
parent
1daa26468c
commit
dbf2a63669
1 changed files with 34 additions and 53 deletions
|
@ -56,10 +56,10 @@ import Text.Pandoc.Options ( WriterOptions(..)
|
||||||
, EPUBVersion(..)
|
, EPUBVersion(..)
|
||||||
, ObfuscationMethod(NoObfuscation) )
|
, ObfuscationMethod(NoObfuscation) )
|
||||||
import Text.Pandoc.Definition
|
import Text.Pandoc.Definition
|
||||||
import Text.Pandoc.Walk (walk, walkM)
|
import Text.Pandoc.Walk (walk, walkM, query)
|
||||||
import Data.Default
|
import Data.Default
|
||||||
import Text.Pandoc.Writers.Markdown (writePlain)
|
import Text.Pandoc.Writers.Markdown (writePlain)
|
||||||
import Control.Monad.State (modify, get, execState, State, put, evalState)
|
import Control.Monad.State (modify, get, State, put, evalState)
|
||||||
import Control.Monad (mplus, liftM, when)
|
import Control.Monad (mplus, liftM, when)
|
||||||
import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs
|
import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs
|
||||||
, strContent, lookupAttr, Node(..), QName(..), parseXML
|
, strContent, lookupAttr, Node(..), QName(..), parseXML
|
||||||
|
@ -408,11 +408,6 @@ writeEPUB opts doc@(Pandoc meta _) = do
|
||||||
(docTitle' meta) : blocks
|
(docTitle' meta) : blocks
|
||||||
|
|
||||||
let chapterHeaderLevel = writerEpubChapterLevel opts
|
let chapterHeaderLevel = writerEpubChapterLevel opts
|
||||||
-- internal reference IDs change when we chunk the file,
|
|
||||||
-- so that '#my-header-1' might turn into 'chap004.xhtml#my-header'.
|
|
||||||
-- the next two lines fix that:
|
|
||||||
let reftable = correlateRefs chapterHeaderLevel blocks'
|
|
||||||
let blocks'' = replaceRefs reftable blocks'
|
|
||||||
|
|
||||||
let isChapterHeader (Header n _ _) = n <= chapterHeaderLevel
|
let isChapterHeader (Header n _ _) = n <= chapterHeaderLevel
|
||||||
isChapterHeader (Div ("",["references"],[]) (Header n _ _:_)) =
|
isChapterHeader (Div ("",["references"],[]) (Header n _ _:_)) =
|
||||||
|
@ -443,7 +438,37 @@ writeEPUB opts doc@(Pandoc meta _) = do
|
||||||
let (xs,ys) = break isChapterHeader bs
|
let (xs,ys) = break isChapterHeader bs
|
||||||
(Chapter Nothing (b:xs) :) `fmap` toChapters ys
|
(Chapter Nothing (b:xs) :) `fmap` toChapters ys
|
||||||
|
|
||||||
let chapters = evalState (toChapters blocks'') []
|
let chapters' = evalState (toChapters blocks') []
|
||||||
|
|
||||||
|
let extractLinkURL' :: Int -> Inline -> [(String, String)]
|
||||||
|
extractLinkURL' num (Span (ident, _, _) _)
|
||||||
|
| not (null ident) = [(ident, showChapter num ++ ('#':ident))]
|
||||||
|
extractLinkURL' _ _ = []
|
||||||
|
|
||||||
|
let extractLinkURL :: Int -> Block -> [(String, String)]
|
||||||
|
extractLinkURL num (Div (ident, _, _) _)
|
||||||
|
| not (null ident) = [(ident, showChapter num ++ ('#':ident))]
|
||||||
|
extractLinkURL num (Header _ (ident, _, _) _)
|
||||||
|
| not (null ident) = [(ident, showChapter num ++ ('#':ident))]
|
||||||
|
extractLinkURL num b = query (extractLinkURL' num) b
|
||||||
|
|
||||||
|
let reftable = concat $ zipWith (\(Chapter _ bs) num ->
|
||||||
|
query (extractLinkURL num) bs)
|
||||||
|
chapters' [1..]
|
||||||
|
|
||||||
|
let fixInternalReferences :: Inline -> Inline
|
||||||
|
fixInternalReferences (Link lab ('#':xs, tit)) =
|
||||||
|
case lookup xs reftable of
|
||||||
|
Just ys -> Link lab (ys, tit)
|
||||||
|
Nothing -> Link lab ('#':xs, tit)
|
||||||
|
fixInternalReferences x = x
|
||||||
|
|
||||||
|
-- internal reference IDs change when we chunk the file,
|
||||||
|
-- so that '#my-header-1' might turn into 'chap004.xhtml#my-header'.
|
||||||
|
-- this fixes that:
|
||||||
|
let chapters = map (\(Chapter mbnum bs) ->
|
||||||
|
Chapter mbnum $ walk fixInternalReferences bs)
|
||||||
|
chapters'
|
||||||
|
|
||||||
let chapToEntry :: Int -> Chapter -> Entry
|
let chapToEntry :: Int -> Chapter -> Entry
|
||||||
chapToEntry num (Chapter mbnum bs) = mkEntry (showChapter num)
|
chapToEntry num (Chapter mbnum bs) = mkEntry (showChapter num)
|
||||||
|
@ -549,7 +574,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
|
||||||
let contentsEntry = mkEntry "content.opf" contentsData
|
let contentsEntry = mkEntry "content.opf" contentsData
|
||||||
|
|
||||||
-- toc.ncx
|
-- toc.ncx
|
||||||
let secs = hierarchicalize blocks''
|
let secs = hierarchicalize blocks'
|
||||||
|
|
||||||
let tocLevel = writerTOCDepth opts
|
let tocLevel = writerTOCDepth opts
|
||||||
|
|
||||||
|
@ -889,11 +914,6 @@ mediaTypeOf x =
|
||||||
Just y | any (`isPrefixOf` y) mediaPrefixes -> Just y
|
Just y | any (`isPrefixOf` y) mediaPrefixes -> Just y
|
||||||
_ -> Nothing
|
_ -> Nothing
|
||||||
|
|
||||||
data IdentState = IdentState{
|
|
||||||
chapterNumber :: Int,
|
|
||||||
identTable :: [(String,String)]
|
|
||||||
} deriving (Read, Show)
|
|
||||||
|
|
||||||
-- Returns filename for chapter number.
|
-- Returns filename for chapter number.
|
||||||
showChapter :: Int -> String
|
showChapter :: Int -> String
|
||||||
showChapter = printf "ch%03d.xhtml"
|
showChapter = printf "ch%03d.xhtml"
|
||||||
|
@ -910,45 +930,6 @@ addIdentifiers bs = evalState (mapM go bs) []
|
||||||
return $ Header n (ident',classes,kvs) ils
|
return $ Header n (ident',classes,kvs) ils
|
||||||
go x = return x
|
go x = return x
|
||||||
|
|
||||||
-- Go through a block list and construct a table
|
|
||||||
-- correlating the automatically constructed references
|
|
||||||
-- that would be used in a normal pandoc document with
|
|
||||||
-- new URLs to be used in the EPUB. For example, what
|
|
||||||
-- was "header-1" might turn into "ch006.xhtml#header".
|
|
||||||
correlateRefs :: Int -> [Block] -> [(String,String)]
|
|
||||||
correlateRefs chapterHeaderLevel bs =
|
|
||||||
identTable $ execState (walkM goBlock bs >>= walkM goInline)
|
|
||||||
IdentState{ chapterNumber = 0
|
|
||||||
, identTable = [] }
|
|
||||||
where goBlock :: Block -> State IdentState Block
|
|
||||||
goBlock x@(Header n (ident,_,_) _) = x <$ addIdentifier (Just n) ident
|
|
||||||
goBlock x@(Div (ident,_,_) _) = x <$ addIdentifier Nothing ident
|
|
||||||
goBlock x = return x
|
|
||||||
goInline :: Inline -> State IdentState Inline
|
|
||||||
goInline x@(Span (ident,_,_) _) = x <$ addIdentifier Nothing ident
|
|
||||||
goInline x = return x
|
|
||||||
addIdentifier mbHeaderLevel ident = do
|
|
||||||
case mbHeaderLevel of
|
|
||||||
Just n | n <= chapterHeaderLevel ->
|
|
||||||
modify $ \s -> s{ chapterNumber = chapterNumber s + 1 }
|
|
||||||
_ -> return ()
|
|
||||||
st <- get
|
|
||||||
let chapterid = showChapter (chapterNumber st) ++
|
|
||||||
case mbHeaderLevel of
|
|
||||||
Just n | n <= chapterHeaderLevel -> ""
|
|
||||||
_ -> '#' : ident
|
|
||||||
modify $ \s -> s{ identTable = (ident, chapterid) : identTable st }
|
|
||||||
|
|
||||||
-- Replace internal link references using the table produced
|
|
||||||
-- by correlateRefs.
|
|
||||||
replaceRefs :: [(String,String)] -> [Block] -> [Block]
|
|
||||||
replaceRefs refTable = walk replaceOneRef
|
|
||||||
where replaceOneRef x@(Link lab ('#':xs,tit)) =
|
|
||||||
case lookup xs refTable of
|
|
||||||
Just url -> Link lab (url,tit)
|
|
||||||
Nothing -> x
|
|
||||||
replaceOneRef x = x
|
|
||||||
|
|
||||||
-- Variant of normalizeDate that allows partial dates: YYYY, YYYY-MM
|
-- Variant of normalizeDate that allows partial dates: YYYY, YYYY-MM
|
||||||
normalizeDate' :: String -> Maybe String
|
normalizeDate' :: String -> Maybe String
|
||||||
normalizeDate' xs =
|
normalizeDate' xs =
|
||||||
|
|
Loading…
Reference in a new issue