From 30361308e7913eb82eb5e6f0cb7339153bd4ea32 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Fri, 4 Jan 2013 22:29:41 -0800 Subject: [PATCH] Added `--epub-chapter-level` and `--epub-toc-level` options. Also added writerEpubChapterLevel and writerEpubTOCLevel fields to WriterOptions. --- README | 14 ++++++++++++++ pandoc.hs | 30 ++++++++++++++++++++++++++++++ src/Text/Pandoc/Options.hs | 4 ++++ src/Text/Pandoc/Writers/EPUB.hs | 24 +++++++++++++----------- 4 files changed, 61 insertions(+), 11 deletions(-) diff --git a/README b/README index cf822d52f..b7bb05ebe 100644 --- a/README +++ b/README @@ -512,6 +512,20 @@ Options affecting specific writers } body { font-family: "DejaVuSans"; } +`--epub-chapter-level=`*NUMBER* +: Specify the header level at which to split the EPUB into separate + "chapter" files. The default is to split into chapters at level 1 + headers. This option only affects the internal composition of the + EPUB, not the way chapters and sections are displayed to users. Some + readers may be slow if the chapter files are too large, so for large + documents with few level 1 headers, one might want to use a chapter + level of 2 or 3. + +`--epub-toc-level=`*NUMBER* +: Specify the number of section levels to include in an EPUB's table + of contents. The default is 3 (which means that level 1, 2, and 3 + headers will be listed in the contents). + `--latex-engine=`*pdflatex|lualatex|xelatex* : Use the specified LaTeX engine when producing PDF output. The default is `pdflatex`. If the engine is not in your PATH, diff --git a/pandoc.hs b/pandoc.hs index c93cd62bc..a9d02431d 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -114,6 +114,8 @@ data Opt = Opt , optEPUBStylesheet :: Maybe String -- ^ EPUB stylesheet , optEPUBMetadata :: String -- ^ EPUB metadata , optEPUBFonts :: [FilePath] -- ^ EPUB fonts to embed + , optEPUBChapterLevel :: Int -- ^ Header level at which to split chapters + , optEPUBTOCLevel :: Int -- ^ Number of levels to include in TOC , optDumpArgs :: Bool -- ^ Output command-line arguments , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst @@ -166,6 +168,8 @@ defaultOpts = Opt , optEPUBStylesheet = Nothing , optEPUBMetadata = "" , optEPUBFonts = [] + , optEPUBChapterLevel = 1 + , optEPUBTOCLevel = 3 , optDumpArgs = False , optIgnoreArgs = False , optReferenceLinks = False @@ -554,6 +558,28 @@ options = "FILE") "" -- "Directory of fonts to embed" + , Option "" ["epub-chapter-level"] + (ReqArg + (\arg opt -> do + case safeRead arg of + Just t | t >= 1 && t <= 6 -> + return opt { optEPUBChapterLevel = t } + _ -> err 59 $ + "chapter level must be a number between 1 and 6") + "NUMBER") + "" -- "Header level at which to split chapters in EPUB" + + , Option "" ["epub-toc-level"] + (ReqArg + (\arg opt -> do + case safeRead arg of + Just t | t >= 1 && t <= 6 -> + return opt { optEPUBTOCLevel = t } + _ -> err 57 $ + "TOC level must be a number between 1 and 6") + "NUMBER") + "" -- "Number of levels to include in EPUB TOC" + , Option "" ["latex-engine"] (ReqArg (\arg opt -> do @@ -803,6 +829,8 @@ main = do , optEPUBStylesheet = epubStylesheet , optEPUBMetadata = epubMetadata , optEPUBFonts = epubFonts + , optEPUBChapterLevel = epubChapterLevel + , optEPUBTOCLevel = epubTOCLevel , optDumpArgs = dumpArgs , optIgnoreArgs = ignoreArgs , optReferenceLinks = referenceLinks @@ -992,6 +1020,8 @@ main = do writerTeXLigatures = texLigatures, writerEpubStylesheet = epubStylesheet, writerEpubFonts = epubFonts, + writerEpubChapterLevel = epubChapterLevel, + writerEpubTOCLevel = epubTOCLevel, writerReferenceODT = referenceODT, writerReferenceDocx = referenceDocx } diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 86b1f5b99..0424b434f 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -237,6 +237,8 @@ data WriterOptions = WriterOptions , writerTeXLigatures :: Bool -- ^ Use tex ligatures quotes, dashes in latex , writerEpubStylesheet :: Maybe String -- ^ EPUB stylesheet specified at command line , writerEpubFonts :: [FilePath] -- ^ Paths to fonts to embed + , writerEpubChapterLevel :: Int -- ^ Header level for chapters (separate files) + , writerEpubTOCLevel :: Int -- ^ Number of levels to include in TOC , writerReferenceODT :: Maybe FilePath -- ^ Path to reference ODT if specified , writerReferenceDocx :: Maybe FilePath -- ^ Ptah to reference DOCX if specified } deriving Show @@ -275,6 +277,8 @@ instance Default WriterOptions where , writerTeXLigatures = True , writerEpubStylesheet = Nothing , writerEpubFonts = [] + , writerEpubChapterLevel = 1 + , writerEpubTOCLevel = 3 , writerReferenceODT = Nothing , writerReferenceDocx = Nothing } diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 5d3325ba9..024823b38 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -60,9 +60,6 @@ import Text.Blaze.Html.Renderer.Utf8 (renderHtml) data EPUBVersion = EPUB2 | EPUB3 deriving Eq --- TODO - make an option -chapterHeaderLevel = 1 - writeEPUB2, writeEPUB3 :: WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert -> IO B.ByteString @@ -133,10 +130,12 @@ writeEPUB version opts doc@(Pandoc meta _) = do (Header 1 _ : _) -> blocks _ -> Header 1 (docTitle meta) : blocks + let chapterHeaderLevel = writerEpubChapterLevel opts + -- internal reference IDs change when we chunk the file, -- so that '#my-header-1' might turn into 'chap004.xhtml#my-header'. -- the next two lines fix that: - let reftable = correlateRefs blocks' + let reftable = correlateRefs chapterHeaderLevel blocks' let blocks'' = replaceRefs reftable blocks' let isChapterHeader (Header n _) = n <= chapterHeaderLevel @@ -230,6 +229,8 @@ writeEPUB version opts doc@(Pandoc meta _) = do -- toc.ncx let secs = hierarchicalize blocks'' + let tocLevel = writerEpubTOCLevel opts + let navPointNode :: (Int -> String -> String -> [Element] -> Element) -> Shared.Element -> State Int Element navPointNode formatter (Sec _ nums ident ils children) = do @@ -244,7 +245,7 @@ writeEPUB version opts doc@(Pandoc meta _) = do let src = case lookup ident reftable of Just x -> x Nothing -> error (ident ++ " not found in reftable") - let isSec (Sec lev _ _ _ _) = lev <= 3 -- only includes levels 1-3 + let isSec (Sec lev _ _ _ _) = lev <= tocLevel isSec _ = False let subsecs = filter isSec children subs <- mapM (navPointNode formatter) subsecs @@ -443,12 +444,13 @@ showChapter = printf "ch%03d.xhtml" -- that would be used in a normal pandoc document with -- new URLs to be used in the EPUB. For example, what -- was "header-1" might turn into "ch006.xhtml#header". -correlateRefs :: [Block] -> [(String,String)] -correlateRefs bs = identTable $ execState (mapM_ go bs) - IdentState{ chapterNumber = 0 - , runningIdents = [] - , chapterIdents = [] - , identTable = [] } +correlateRefs :: Int -> [Block] -> [(String,String)] +correlateRefs chapterHeaderLevel bs = + identTable $ execState (mapM_ go bs) + IdentState{ chapterNumber = 0 + , runningIdents = [] + , chapterIdents = [] + , identTable = [] } where go :: Block -> State IdentState () go (Header n ils) = do when (n <= chapterHeaderLevel) $