Preliminary changes for epub3 format.
* EPUB writer now exports writeEPUB2 and writeEPUB3. * 'epub' output format is epub v2, while 'epub3' is v3.
This commit is contained in:
parent
7818033df9
commit
43448d7d53
5 changed files with 93 additions and 43 deletions
48
README
48
README
|
@ -17,7 +17,7 @@ another, and a command-line tool that uses this library. It can read
|
|||
text, [markdown], [reStructuredText], [XHTML], [HTML 5], [LaTeX]
|
||||
(including [beamer] slide shows), [ConTeXt], [RTF], [DocBook XML],
|
||||
[OpenDocument XML], [ODT], [Word docx], [GNU Texinfo], [MediaWiki
|
||||
markup], [EPUB], [FictionBook2], [Textile], [groff man] pages, [Emacs
|
||||
markup], [EPUB] (v2 or v3), [FictionBook2], [Textile], [groff man] pages, [Emacs
|
||||
Org-Mode], [AsciiDoc], and [Slidy], [Slideous], [DZSlides], or [S5] HTML
|
||||
slide shows. It can also produce [PDF] output on systems where LaTeX is
|
||||
installed.
|
||||
|
@ -44,7 +44,7 @@ If no *input-file* is specified, input is read from *stdin*.
|
|||
Otherwise, the *input-files* are concatenated (with a blank
|
||||
line between each) and used as input. Output goes to *stdout* by
|
||||
default (though output to *stdout* is disabled for the `odt`, `docx`,
|
||||
and `epub` output formats). For output to a file, use the
|
||||
`epub`, and `epub3` output formats). For output to a file, use the
|
||||
`-o` option:
|
||||
|
||||
pandoc -o output.html input.txt
|
||||
|
@ -159,29 +159,27 @@ General options
|
|||
`json` (JSON version of native AST), `plain` (plain text),
|
||||
`markdown` (pandoc's extended markdown), `markdown_strict` (original
|
||||
unextended markdown), `rst` (reStructuredText), `html` (XHTML
|
||||
1), `html5` (HTML 5), `latex` (LaTeX), `beamer` (LaTeX beamer
|
||||
slide show), `context` (ConTeXt), `man` (groff man), `mediawiki`
|
||||
(MediaWiki markup), `textile` (Textile), `org` (Emacs Org-Mode),
|
||||
`texinfo` (GNU Texinfo), `docbook` (DocBook XML), `opendocument`
|
||||
(OpenDocument XML), `odt` (OpenOffice text document), `docx`
|
||||
(Word docx), `epub` (EPUB book), `fb2` (FictionBook2 e-book),
|
||||
`asciidoc` (AsciiDoc), `slidy` (Slidy HTML and javascript slide
|
||||
show), `slideous` (Slideous HTML and javascript slide show),
|
||||
`dzslides` (HTML5 + javascript slide show), `s5` (S5 HTML and
|
||||
javascript slide show), or `rtf` (rich text format). Note that
|
||||
`odt` and `epub` output will not be directed to *stdout*; an output
|
||||
filename must be specified using the `-o/--output` option. If `+lhs`
|
||||
is appended to `markdown`, `rst`, `latex`, `beamer`, `html`, or
|
||||
`html5`, the output will be rendered as literate Haskell source:
|
||||
see [Literate Haskell support](#literate-haskell-support), below.
|
||||
Markdown syntax extensions can be individually enabled or disabled
|
||||
by appending `+EXTENSION` or `-EXTENSION` to the format name, as
|
||||
described above under `-f`.
|
||||
1), `html5` (HTML 5), `latex` (LaTeX), `beamer` (LaTeX beamer slide show),
|
||||
`context` (ConTeXt), `man` (groff man), `mediawiki` (MediaWiki markup),
|
||||
`textile` (Textile), `org` (Emacs Org-Mode), `texinfo` (GNU Texinfo),
|
||||
`docbook` (DocBook XML), `opendocument` (OpenDocument XML), `odt`
|
||||
(OpenOffice text document), `docx` (Word docx), `epub` (EPUB book), `epub3`
|
||||
(EPUB v3), `fb2` (FictionBook2 e-book), `asciidoc` (AsciiDoc), `slidy`
|
||||
(Slidy HTML and javascript slide show), `slideous` (Slideous HTML and
|
||||
javascript slide show), `dzslides` (HTML5 + javascript slide show), `s5`
|
||||
(S5 HTML and javascript slide show), or `rtf` (rich text format). Note that
|
||||
`odt`, `epub`, and `epub3` output will not be directed to *stdout*; an output
|
||||
filename must be specified using the `-o/--output` option. If `+lhs` is
|
||||
appended to `markdown`, `rst`, `latex`, `beamer`, `html`, or `html5`, the
|
||||
output will be rendered as literate Haskell source: see [Literate Haskell
|
||||
support](#literate-haskell-support), below. Markdown syntax extensions can
|
||||
be individually enabled or disabled by appending `+EXTENSION` or
|
||||
`-EXTENSION` to the format name, as described above under `-f`.
|
||||
|
||||
`-o` *FILE*, `--output=`*FILE*
|
||||
: Write output to *FILE* instead of *stdout*. If *FILE* is
|
||||
`-`, output will go to *stdout*. (Exception: if the output
|
||||
format is `odt`, `docx`, or `epub`, output to stdout is disabled.)
|
||||
format is `odt`, `docx`, `epub`, or `epub3`, output to stdout is disabled.)
|
||||
|
||||
`--data-dir=`*DIRECTORY*
|
||||
: Specify the user data directory to search for pandoc data files.
|
||||
|
@ -259,7 +257,7 @@ General writer options
|
|||
`-s`, `--standalone`
|
||||
: Produce output with an appropriate header and footer (e.g. a
|
||||
standalone HTML, LaTeX, or RTF file, not a fragment). This option
|
||||
is set automatically for `pdf`, `epub`, `fb2`, `docx`, and `odt`
|
||||
is set automatically for `pdf`, `epub`, `epub3`, `fb2`, `docx`, and `odt`
|
||||
output.
|
||||
|
||||
`--template=`*FILE*
|
||||
|
@ -662,9 +660,9 @@ the system default templates for a given output format `FORMAT`
|
|||
by putting a file `templates/default.FORMAT` in the user data
|
||||
directory (see `--data-dir`, above). *Exceptions:* For `odt` output,
|
||||
customize the `default.opendocument` template. For `pdf` output,
|
||||
customize the `default.latex` template. For `epub` output, customize
|
||||
the `epub-page.html`, `epub-coverimage.html`, and `epub-titlepage.html`
|
||||
templates.
|
||||
customize the `default.latex` template. For `epub` and `epub3` output,
|
||||
customize the `epub-page.html`, `epub-coverimage.html`, and
|
||||
`epub-titlepage.html` templates.
|
||||
|
||||
Templates may contain *variables*. Variable names are sequences of
|
||||
alphanumerics, `-`, and `_`, starting with a letter. A variable name
|
||||
|
|
|
@ -92,7 +92,8 @@ module Text.Pandoc
|
|||
, writeRTF
|
||||
, writeODT
|
||||
, writeDocx
|
||||
, writeEPUB
|
||||
, writeEPUB2
|
||||
, writeEPUB3
|
||||
, writeFB2
|
||||
, writeOrg
|
||||
, writeAsciiDoc
|
||||
|
@ -199,7 +200,8 @@ writers = [
|
|||
,("json" , PureStringWriter $ \_ -> encodeJSON)
|
||||
,("docx" , IOByteStringWriter writeDocx)
|
||||
,("odt" , IOByteStringWriter writeODT)
|
||||
,("epub" , IOByteStringWriter writeEPUB)
|
||||
,("epub" , IOByteStringWriter writeEPUB2)
|
||||
,("epub3" , IOByteStringWriter writeEPUB3)
|
||||
,("fb2" , IOStringWriter writeFB2)
|
||||
,("html" , PureStringWriter writeHtmlString)
|
||||
,("html5" , PureStringWriter $ \o ->
|
||||
|
|
|
@ -94,6 +94,7 @@ getDefaultTemplate user writer = do
|
|||
"json" -> return $ Right ""
|
||||
"docx" -> return $ Right ""
|
||||
"epub" -> return $ Right ""
|
||||
"epub3" -> return $ Right ""
|
||||
"odt" -> getDefaultTemplate user "opendocument"
|
||||
"markdown_strict" -> getDefaultTemplate user "markdown"
|
||||
"multimarkdown" -> getDefaultTemplate user "markdown"
|
||||
|
|
|
@ -27,7 +27,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|||
|
||||
Conversion of 'Pandoc' documents to EPUB.
|
||||
-}
|
||||
module Text.Pandoc.Writers.EPUB ( writeEPUB ) where
|
||||
module Text.Pandoc.Writers.EPUB ( writeEPUB2, writeEPUB3 ) where
|
||||
import Data.IORef
|
||||
import Data.Maybe ( fromMaybe, isNothing )
|
||||
import Data.List ( isPrefixOf, intercalate )
|
||||
|
@ -38,6 +38,8 @@ import qualified Data.ByteString.Lazy as B
|
|||
import Text.Pandoc.UTF8 ( fromStringLazy )
|
||||
import Codec.Archive.Zip
|
||||
import Data.Time.Clock.POSIX
|
||||
import Data.Time
|
||||
import System.Locale
|
||||
import Text.Pandoc.Shared hiding ( Element )
|
||||
import qualified Text.Pandoc.Shared as Shared
|
||||
import Text.Pandoc.Options
|
||||
|
@ -56,11 +58,20 @@ import Prelude hiding (catch)
|
|||
import Control.Exception (catch, SomeException)
|
||||
import Text.HTML.TagSoup
|
||||
|
||||
data EPUBVersion = EPUB2 | EPUB3 deriving Eq
|
||||
|
||||
writeEPUB2, writeEPUB3 :: WriterOptions -- ^ Writer options
|
||||
-> Pandoc -- ^ Document to convert
|
||||
-> IO B.ByteString
|
||||
writeEPUB2 = writeEPUB EPUB2
|
||||
writeEPUB3 = writeEPUB EPUB3
|
||||
|
||||
-- | Produce an EPUB file from a Pandoc document.
|
||||
writeEPUB :: WriterOptions -- ^ Writer options
|
||||
writeEPUB :: EPUBVersion
|
||||
-> WriterOptions -- ^ Writer options
|
||||
-> Pandoc -- ^ Document to convert
|
||||
-> IO B.ByteString
|
||||
writeEPUB opts doc@(Pandoc meta _) = do
|
||||
writeEPUB version opts doc@(Pandoc meta _) = do
|
||||
epochtime <- floor `fmap` getPOSIXTime
|
||||
let mkEntry path content = toEntry path epochtime content
|
||||
let opts' = opts{ writerEmailObfuscation = NoObfuscation
|
||||
|
@ -163,17 +174,23 @@ writeEPUB opts doc@(Pandoc meta _) = do
|
|||
let plainTitle = plainify $ docTitle meta
|
||||
let plainAuthors = map plainify $ docAuthors meta
|
||||
let plainDate = maybe "" id $ normalizeDate $ stringify $ docDate meta
|
||||
currentTime <- getCurrentTime
|
||||
let contentsData = fromStringLazy $ ppTopElement $
|
||||
unode "package" ! [("version","2.0")
|
||||
unode "package" ! [("version", case version of
|
||||
EPUB2 -> "2.0"
|
||||
EPUB3 -> "3.0")
|
||||
,("xmlns","http://www.idpf.org/2007/opf")
|
||||
,("unique-identifier","BookId")] $
|
||||
[ metadataElement (writerEPUBMetadata opts')
|
||||
uuid lang plainTitle plainAuthors plainDate mbCoverImage
|
||||
[ metadataElement version (writerEPUBMetadata opts')
|
||||
uuid lang plainTitle plainAuthors plainDate currentTime mbCoverImage
|
||||
, unode "manifest" $
|
||||
[ unode "item" ! [("id","ncx"), ("href","toc.ncx")
|
||||
,("media-type","application/x-dtbncx+xml")] $ ()
|
||||
, unode "item" ! [("id","style"), ("href","stylesheet.css")
|
||||
,("media-type","text/css")] $ ()
|
||||
, unode "item" ! [("id","nav"), ("href","nav.xhtml")
|
||||
,("properties","nav")
|
||||
,("media-type","application/xhtml+xml")] $ ()
|
||||
] ++
|
||||
map chapterNode (cpgEntry ++ (tpEntry : chapterEntries)) ++
|
||||
map pictureNode (cpicEntry ++ picEntries) ++
|
||||
|
@ -190,8 +207,9 @@ writeEPUB opts doc@(Pandoc meta _) = do
|
|||
-- toc.ncx
|
||||
let secs = hierarchicalize blocks''
|
||||
|
||||
let navPointNode :: Shared.Element -> State Int Element
|
||||
navPointNode (Sec _ nums ident ils children) = do
|
||||
let navPointNode :: (Int -> String -> String -> [Element] -> Element)
|
||||
-> Shared.Element -> State Int Element
|
||||
navPointNode formatter (Sec _ nums ident ils children) = do
|
||||
n <- get
|
||||
modify (+1)
|
||||
let showNums :: [Int] -> String
|
||||
|
@ -206,14 +224,17 @@ writeEPUB opts doc@(Pandoc meta _) = do
|
|||
let isSec (Sec lev _ _ _ _) = lev <= 3 -- only includes levels 1-3
|
||||
isSec _ = False
|
||||
let subsecs = filter isSec children
|
||||
subs <- mapM navPointNode subsecs
|
||||
return $ unode "navPoint" !
|
||||
subs <- mapM (navPointNode formatter) subsecs
|
||||
return $ formatter n tit src subs
|
||||
navPointNode _ (Blk _) = error "navPointNode encountered Blk"
|
||||
|
||||
let navMapFormatter :: Int -> String -> String -> [Element] -> Element
|
||||
navMapFormatter n tit src subs = unode "navPoint" !
|
||||
[("id", "navPoint-" ++ show n)
|
||||
,("playOrder", show n)] $
|
||||
[ unode "navLabel" $ unode "text" tit
|
||||
, unode "content" ! [("src", src)] $ ()
|
||||
] ++ subs
|
||||
navPointNode (Blk _) = error "navPointNode encountered Blk"
|
||||
|
||||
let tpNode = unode "navPoint" ! [("id", "navPoint-0")] $
|
||||
[ unode "navLabel" $ unode "text" (plainify $ docTitle meta)
|
||||
|
@ -236,10 +257,31 @@ writeEPUB opts doc@(Pandoc meta _) = do
|
|||
Just _ -> [unode "meta" ! [("name","cover"),
|
||||
("content","cover-image")] $ ()]
|
||||
, unode "docTitle" $ unode "text" $ plainTitle
|
||||
, unode "navMap" $ tpNode : evalState (mapM navPointNode secs) 1
|
||||
, unode "navMap" $
|
||||
tpNode : evalState (mapM (navPointNode navMapFormatter) secs) 1
|
||||
]
|
||||
let tocEntry = mkEntry "toc.ncx" tocData
|
||||
|
||||
let navXhtmlFormatter :: Int -> String -> String -> [Element] -> Element
|
||||
navXhtmlFormatter n tit src subs = unode "li" !
|
||||
[("id", "toc-li-" ++ show n)] $
|
||||
unode "a" ! [("href",src)] $
|
||||
unode "span" tit :
|
||||
case subs of
|
||||
[] -> []
|
||||
(_:_) -> [unode "ol" subs]
|
||||
|
||||
let navData = fromStringLazy $ ppTopElement $
|
||||
unode "html" ! [("xmlns","http://www.w3.org/1999/xhtml")
|
||||
,("xmlns:epub","http://www.idpf.org/2007/ops")] $
|
||||
[ unode "head" $ unode "title" plainTitle
|
||||
, unode "body" $
|
||||
unode "nav" ! [("epub:type","toc")] $
|
||||
[ unode "h1" plainTitle
|
||||
, unode "ol" $ evalState (mapM (navPointNode navXhtmlFormatter) secs) 1]
|
||||
]
|
||||
let navEntry = mkEntry "nav.xhtml" navData
|
||||
|
||||
-- mimetype
|
||||
let mimetypeEntry = mkEntry "mimetype" $ fromStringLazy "application/epub+zip"
|
||||
|
||||
|
@ -269,11 +311,13 @@ writeEPUB opts doc@(Pandoc meta _) = do
|
|||
let archive = foldr addEntryToArchive emptyArchive
|
||||
(mimetypeEntry : containerEntry : appleEntry : stylesheetEntry : tpEntry :
|
||||
contentsEntry : tocEntry :
|
||||
(picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries ++ fontEntries) )
|
||||
([navEntry | version == EPUB3] ++ picEntries ++ cpicEntry ++ cpgEntry ++
|
||||
chapterEntries ++ fontEntries) )
|
||||
return $ fromArchive archive
|
||||
|
||||
metadataElement :: String -> UUID -> String -> String -> [String] -> String -> Maybe a -> Element
|
||||
metadataElement metadataXML uuid lang title authors date mbCoverImage =
|
||||
metadataElement :: EPUBVersion -> String -> UUID -> String -> String -> [String]
|
||||
-> String -> UTCTime -> Maybe a -> Element
|
||||
metadataElement version metadataXML uuid lang title authors date currentTime mbCoverImage =
|
||||
let userNodes = parseXML metadataXML
|
||||
elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/")
|
||||
,("xmlns:opf","http://www.idpf.org/2007/opf")] $
|
||||
|
@ -292,10 +336,15 @@ metadataElement metadataXML uuid lang title authors date mbCoverImage =
|
|||
not (elt `contains` "identifier") ] ++
|
||||
[ unode "dc:creator" ! [("opf:role","aut")] $ a | a <- authors ] ++
|
||||
[ unode "dc:date" date | not (elt `contains` "date") ] ++
|
||||
[ unode "meta" ! [("property", "dcterms:modified")] $
|
||||
(showDateTimeISO8601 currentTime) | version == EPUB3 ] ++
|
||||
[ unode "meta" ! [("name","cover"), ("content","cover-image")] $ () |
|
||||
not (isNothing mbCoverImage) ]
|
||||
in elt{ elContent = elContent elt ++ map Elem newNodes }
|
||||
|
||||
showDateTimeISO8601 :: UTCTime -> String
|
||||
showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ"
|
||||
|
||||
transformInlines :: HTMLMathMethod
|
||||
-> FilePath
|
||||
-> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) images
|
||||
|
|
|
@ -83,7 +83,7 @@ wrapWords indent c = wrap' (c - indent) (c - indent)
|
|||
else ", " ++ x ++ wrap' cols (remaining - (length x + 2)) xs
|
||||
|
||||
isTextFormat :: String -> Bool
|
||||
isTextFormat s = takeWhile (`notElem` "+-") s `notElem` ["odt","docx","epub"]
|
||||
isTextFormat s = takeWhile (`notElem` "+-") s `notElem` ["odt","docx","epub","epub3"]
|
||||
|
||||
-- | Data structure for command line options.
|
||||
data Opt = Opt
|
||||
|
|
Loading…
Add table
Reference in a new issue