From 30361308e7913eb82eb5e6f0cb7339153bd4ea32 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Fri, 4 Jan 2013 22:29:41 -0800
Subject: [PATCH] Added `--epub-chapter-level` and `--epub-toc-level` options.

Also added writerEpubChapterLevel and writerEpubTOCLevel fields
to WriterOptions.
---
 README                          | 14 ++++++++++++++
 pandoc.hs                       | 30 ++++++++++++++++++++++++++++++
 src/Text/Pandoc/Options.hs      |  4 ++++
 src/Text/Pandoc/Writers/EPUB.hs | 24 +++++++++++++-----------
 4 files changed, 61 insertions(+), 11 deletions(-)

diff --git a/README b/README
index cf822d52f..b7bb05ebe 100644
--- a/README
+++ b/README
@@ -512,6 +512,20 @@ Options affecting specific writers
         }
         body { font-family: "DejaVuSans"; }
 
+`--epub-chapter-level=`*NUMBER*
+:   Specify the header level at which to split the EPUB into separate
+    "chapter" files. The default is to split into chapters at level 1
+    headers. This option only affects the internal composition of the
+    EPUB, not the way chapters and sections are displayed to users. Some
+    readers may be slow if the chapter files are too large, so for large
+    documents with few level 1 headers, one might want to use a chapter
+    level of 2 or 3.
+
+`--epub-toc-level=`*NUMBER*
+:   Specify the number of section levels to include in an EPUB's table
+    of contents.  The default is 3 (which means that level 1, 2, and 3
+    headers will be listed in the contents).
+
 `--latex-engine=`*pdflatex|lualatex|xelatex*
 :   Use the specified LaTeX engine when producing PDF output.
     The default is `pdflatex`.  If the engine is not in your PATH,
diff --git a/pandoc.hs b/pandoc.hs
index c93cd62bc..a9d02431d 100644
--- a/pandoc.hs
+++ b/pandoc.hs
@@ -114,6 +114,8 @@ data Opt = Opt
     , optEPUBStylesheet    :: Maybe String   -- ^ EPUB stylesheet
     , optEPUBMetadata      :: String  -- ^ EPUB metadata
     , optEPUBFonts         :: [FilePath] -- ^ EPUB fonts to embed
+    , optEPUBChapterLevel  :: Int     -- ^ Header level at which to split chapters
+    , optEPUBTOCLevel      :: Int     -- ^ Number of levels to include in TOC
     , optDumpArgs          :: Bool    -- ^ Output command-line arguments
     , optIgnoreArgs        :: Bool    -- ^ Ignore command-line arguments
     , optReferenceLinks    :: Bool    -- ^ Use reference links in writing markdown, rst
@@ -166,6 +168,8 @@ defaultOpts = Opt
     , optEPUBStylesheet    = Nothing
     , optEPUBMetadata      = ""
     , optEPUBFonts         = []
+    , optEPUBChapterLevel  = 1
+    , optEPUBTOCLevel      = 3
     , optDumpArgs          = False
     , optIgnoreArgs        = False
     , optReferenceLinks    = False
@@ -554,6 +558,28 @@ options =
                   "FILE")
                  "" -- "Directory of fonts to embed"
 
+    , Option "" ["epub-chapter-level"]
+                 (ReqArg
+                  (\arg opt -> do
+                      case safeRead arg of
+                           Just t | t >= 1 && t <= 6 ->
+                                    return opt { optEPUBChapterLevel = t }
+                           _      -> err 59 $
+                                    "chapter level must be a number between 1 and 6")
+                 "NUMBER")
+                 "" -- "Header level at which to split chapters in EPUB"
+
+    , Option "" ["epub-toc-level"]
+                 (ReqArg
+                  (\arg opt -> do
+                      case safeRead arg of
+                           Just t | t >= 1 && t <= 6 ->
+                                    return opt { optEPUBTOCLevel = t }
+                           _      -> err 57 $
+                                    "TOC level must be a number between 1 and 6")
+                 "NUMBER")
+                 "" -- "Number of levels to include in EPUB TOC"
+
     , Option "" ["latex-engine"]
                  (ReqArg
                   (\arg opt -> do
@@ -803,6 +829,8 @@ main = do
               , optEPUBStylesheet    = epubStylesheet
               , optEPUBMetadata      = epubMetadata
               , optEPUBFonts         = epubFonts
+              , optEPUBChapterLevel  = epubChapterLevel
+              , optEPUBTOCLevel      = epubTOCLevel
               , optDumpArgs          = dumpArgs
               , optIgnoreArgs        = ignoreArgs
               , optReferenceLinks    = referenceLinks
@@ -992,6 +1020,8 @@ main = do
                             writerTeXLigatures     = texLigatures,
                             writerEpubStylesheet   = epubStylesheet,
                             writerEpubFonts        = epubFonts,
+                            writerEpubChapterLevel = epubChapterLevel,
+                            writerEpubTOCLevel     = epubTOCLevel,
                             writerReferenceODT     = referenceODT,
                             writerReferenceDocx    = referenceDocx
                           }
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index 86b1f5b99..0424b434f 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -237,6 +237,8 @@ data WriterOptions = WriterOptions
   , writerTeXLigatures     :: Bool       -- ^ Use tex ligatures quotes, dashes in latex
   , writerEpubStylesheet   :: Maybe String -- ^ EPUB stylesheet specified at command line
   , writerEpubFonts        :: [FilePath] -- ^ Paths to fonts to embed
+  , writerEpubChapterLevel :: Int            -- ^ Header level for chapters (separate files)
+  , writerEpubTOCLevel     :: Int            -- ^ Number of levels to include in TOC
   , writerReferenceODT     :: Maybe FilePath -- ^ Path to reference ODT if specified
   , writerReferenceDocx    :: Maybe FilePath -- ^ Ptah to reference DOCX if specified
   } deriving Show
@@ -275,6 +277,8 @@ instance Default WriterOptions where
                       , writerTeXLigatures     = True
                       , writerEpubStylesheet   = Nothing
                       , writerEpubFonts        = []
+                      , writerEpubChapterLevel = 1
+                      , writerEpubTOCLevel     = 3
                       , writerReferenceODT     = Nothing
                       , writerReferenceDocx    = Nothing
                       }
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 5d3325ba9..024823b38 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -60,9 +60,6 @@ import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
 
 data EPUBVersion = EPUB2 | EPUB3 deriving Eq
 
--- TODO - make an option
-chapterHeaderLevel = 1
-
 writeEPUB2, writeEPUB3 :: WriterOptions   -- ^ Writer options
                        -> Pandoc          -- ^ Document to convert
                        -> IO B.ByteString
@@ -133,10 +130,12 @@ writeEPUB version opts doc@(Pandoc meta _) = do
                       (Header 1 _ : _) -> blocks
                       _                -> Header 1 (docTitle meta) : blocks
 
+  let chapterHeaderLevel = writerEpubChapterLevel opts
+
   -- internal reference IDs change when we chunk the file,
   -- so that '#my-header-1' might turn into 'chap004.xhtml#my-header'.
   -- the next two lines fix that:
-  let reftable = correlateRefs blocks'
+  let reftable = correlateRefs chapterHeaderLevel blocks'
   let blocks'' = replaceRefs reftable blocks'
 
   let isChapterHeader (Header n _) = n <= chapterHeaderLevel
@@ -230,6 +229,8 @@ writeEPUB version opts doc@(Pandoc meta _) = do
   -- toc.ncx
   let secs = hierarchicalize blocks''
 
+  let tocLevel = writerEpubTOCLevel opts
+
   let navPointNode :: (Int -> String -> String -> [Element] -> Element)
                    -> Shared.Element -> State Int Element
       navPointNode formatter (Sec _ nums ident ils children) = do
@@ -244,7 +245,7 @@ writeEPUB version opts doc@(Pandoc meta _) = do
         let src = case lookup ident reftable of
                        Just x  -> x
                        Nothing -> error (ident ++ " not found in reftable")
-        let isSec (Sec lev _ _ _ _) = lev <= 3  -- only includes levels 1-3
+        let isSec (Sec lev _ _ _ _) = lev <= tocLevel
             isSec _                 = False
         let subsecs = filter isSec children
         subs <- mapM (navPointNode formatter) subsecs
@@ -443,12 +444,13 @@ showChapter = printf "ch%03d.xhtml"
 -- that would be used in a normal pandoc document with
 -- new URLs to be used in the EPUB.  For example, what
 -- was "header-1" might turn into "ch006.xhtml#header".
-correlateRefs :: [Block] -> [(String,String)]
-correlateRefs bs = identTable $ execState (mapM_ go bs)
-                                IdentState{ chapterNumber = 0
-                                          , runningIdents = []
-                                          , chapterIdents = []
-                                          , identTable = [] }
+correlateRefs :: Int -> [Block] -> [(String,String)]
+correlateRefs chapterHeaderLevel bs =
+  identTable $ execState (mapM_ go bs)
+    IdentState{ chapterNumber = 0
+              , runningIdents = []
+              , chapterIdents = []
+              , identTable = [] }
  where go :: Block -> State IdentState ()
        go (Header n ils) = do
           when (n <= chapterHeaderLevel) $