diff --git a/epub.css b/epub.css new file mode 100644 index 000000000..a270e8d6e --- /dev/null +++ b/epub.css @@ -0,0 +1,30 @@ +/* This defines styles and classes used in the book */ +body { margin-left: 5%; margin-right: 5%; margin-top: 5%; margin-bottom: 5%; text-align: justify; font-size: medium; } +code { font-family: monospace; } +h1 { text-align: center; } +h2 { text-align: center; } +h3 { text-align: center; } +h4 { text-align: center; } +h5 { text-align: center; } +h6 { text-align: center; } + +/* for syntax highlighting produced by highlighting-kate */ +table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode, table.sourceCode pre + { margin: 0; padding: 0; border: 0; vertical-align: baseline; border: none; } +td.lineNumbers { text-align: right; background-color: #EBEBEB; color: black; padding-right: 5px; padding-left: 5px; } +td.sourceCode { padding-left: 5px; } +pre.sourceCode { } +pre.sourceCode span.Normal { } +pre.sourceCode span.Keyword { font-weight: bold; } +pre.sourceCode span.DataType { color: #800000; } +pre.sourceCode span.DecVal { color: #0000FF; } +pre.sourceCode span.BaseN { color: #0000FF; } +pre.sourceCode span.Float { color: #800080; } +pre.sourceCode span.Char { color: #FF00FF; } +pre.sourceCode span.String { color: #DD0000; } +pre.sourceCode span.Comment { color: #808080; font-style: italic; } +pre.sourceCode span.Others { } +pre.sourceCode span.Alert { color: green; font-weight: bold; } +pre.sourceCode span.Function { color: #000080; } +pre.sourceCode span.RegionMarker { } +pre.sourceCode span.Error { color: red; font-weight: bold; } diff --git a/pandoc.cabal b/pandoc.cabal index 93283b346..f5d88234b 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -18,8 +18,8 @@ Description: Pandoc is a Haskell library for converting from one markup this library. It can read markdown and (subsets of) reStructuredText, HTML, and LaTeX, and it can write markdown, reStructuredText, HTML, LaTeX, ConTeXt, Docbook, - OpenDocument, ODT, RTF, MediaWiki, groff man pages, and - S5 HTML slide shows. + OpenDocument, ODT, RTF, MediaWiki, groff man pages, EPUB, + and S5 HTML slide shows. . Pandoc extends standard markdown syntax with footnotes, embedded LaTeX, definition lists, tables, and other @@ -44,6 +44,8 @@ Data-Files: templates/mediawiki.template, templates/rtf.template, -- data for ODT writer reference.odt, + -- stylesheet for EPUB writer + epub.css, -- data for LaTeXMathML writer data/LaTeXMathML.js, data/MathMLinHTML.js, @@ -146,7 +148,7 @@ Library bytestring >= 0.9, zip-archive >= 0.1.1.4, utf8-string >= 0.3, old-time >= 1, HTTP >= 4000.0.5, texmath, xml >= 1.3.5 && < 1.4, - extensible-exceptions + random, extensible-exceptions if impl(ghc >= 6.10) Build-depends: base >= 4 && < 5, syb else @@ -183,9 +185,11 @@ Library Text.Pandoc.Writers.RTF, Text.Pandoc.Writers.S5, Text.Pandoc.Writers.ODT, + Text.Pandoc.Writers.EPUB, Text.Pandoc.Templates Other-Modules: Text.Pandoc.XML, Text.Pandoc.UTF8, + Text.Pandoc.UUID, Paths_pandoc Extensions: CPP if impl(ghc >= 6.12) diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 8cbaaa109..463befffc 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -87,6 +87,7 @@ module Text.Pandoc , writeMediaWiki , writeRTF , writeODT + , writeEPUB , prettyPandoc -- * Writer options used in writers , WriterOptions (..) @@ -111,6 +112,7 @@ import Text.Pandoc.Writers.Texinfo import Text.Pandoc.Writers.HTML import Text.Pandoc.Writers.S5 import Text.Pandoc.Writers.ODT +import Text.Pandoc.Writers.EPUB import Text.Pandoc.Writers.Docbook import Text.Pandoc.Writers.OpenDocument import Text.Pandoc.Writers.Man diff --git a/src/Text/Pandoc/UUID.hs b/src/Text/Pandoc/UUID.hs new file mode 100644 index 000000000..082644eea --- /dev/null +++ b/src/Text/Pandoc/UUID.hs @@ -0,0 +1,77 @@ +{- +Copyright (C) 2010 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.UUID + Copyright : Copyright (C) 2010 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +UUID generation using Version 4 (random method) described +in RFC4122. See http://tools.ietf.org/html/rfc4122 +-} + +module Text.Pandoc.UUID ( UUID, getRandomUUID ) where + +import Text.Printf ( printf ) +import System.Random ( randomIO ) +import Data.Word +import Data.Bits ( setBit, clearBit ) +import Control.Monad ( liftM ) + +data UUID = UUID Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8 + Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8 + +instance Show UUID where + show (UUID a b c d e f g h i j k l m n o p) = + "urn:uuid:" ++ + printf "%02x" a ++ + printf "%02x" b ++ + printf "%02x" c ++ + printf "%02x" d ++ + "-" ++ + printf "%02x" e ++ + printf "%02x" f ++ + "-" ++ + printf "%02x" g ++ + printf "%02x" h ++ + "-" ++ + printf "%02x" i ++ + printf "%02x" j ++ + "-" ++ + printf "%02x" k ++ + printf "%02x" l ++ + printf "%02x" m ++ + printf "%02x" n ++ + printf "%02x" o ++ + printf "%02x" p + +getRandomUUID :: IO UUID +getRandomUUID = do + let getRN :: a -> IO Word8 + getRN _ = liftM fromIntegral (randomIO :: IO Int) + [a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p] <- mapM getRN ([1..16] :: [Int]) + -- set variant + let i' = i `setBit` 7 `clearBit` 6 + -- set version (0100 for random) + let g' = g `clearBit` 7 `setBit` 6 `clearBit` 5 `clearBit` 4 + return $ UUID a b c d e f g' h i' j k l m n o p + diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs new file mode 100644 index 000000000..05d9afa7c --- /dev/null +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -0,0 +1,247 @@ +{- +Copyright (C) 2010 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.EPUB + Copyright : Copyright (C) 2010 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to EPUB. +-} +module Text.Pandoc.Writers.EPUB ( writeEPUB ) where +import Data.IORef +import Data.Maybe ( fromMaybe, isNothing ) +import Data.List ( findIndices, isPrefixOf ) +import System.Environment ( getEnv ) +import System.FilePath ( (), takeBaseName, takeExtension ) +import qualified Data.ByteString.Lazy as B +import Data.ByteString.Lazy.UTF8 ( fromString ) +import Codec.Archive.Zip +import System.Time +import Text.Pandoc.Shared hiding ( Element ) +import Text.Pandoc.Definition +import Control.Monad (liftM) +import Text.XML.Light hiding (ppTopElement) +import Text.Pandoc.UUID +import Text.Pandoc.Writers.HTML +import Text.Pandoc.Writers.Markdown ( writePlain ) +import Data.Char ( toLower ) + +-- | Produce an EPUB file from a Pandoc document. +writeEPUB :: FilePath -- ^ Relative directory of source file + -> String -- ^ EPUB stylesheet + -> WriterOptions -- ^ Writer options + -> Pandoc -- ^ Document to convert + -> IO B.ByteString +writeEPUB sourceDir stylesheet opts doc = do + (TOD epochtime _) <- getClockTime + let opts' = opts{ writerEmailObfuscation = NoObfuscation + , writerStandalone = True + , writerWrapText = False } + -- mimetype + let mimetypeEntry = toEntry "mimetype" epochtime $ fromString "application/epub+zip" + -- container.xml + let containerData = fromString $ ppTopElement $ + unode "container" ! [("version","1.0") + ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $ + unode "rootfiles" $ + unode "rootfile" ! [("full-path","content.opf") + ,("media-type","application/oebps-package+xml")] $ () + let containerEntry = toEntry "META-INF/container.xml" epochtime containerData + -- stylesheet + let stylesheetEntry = toEntry "stylesheet.css" epochtime $ + fromString stylesheet + -- title page + let vars = writerVariables opts' + let tpContent = fromString $ + writeHtmlString opts'{writerTemplate = pageTemplate + ,writerVariables = ("titlepage","yes"):vars} doc + let tpEntry = toEntry "title_page.xhtml" epochtime tpContent + -- handle pictures + picEntriesRef <- newIORef ([] :: [Entry]) + Pandoc meta blocks <- liftM (processWith transformBlock) $ + processWithM (transformInline (writerHTMLMathMethod opts) + sourceDir picEntriesRef) doc + picEntries <- readIORef picEntriesRef + -- body pages + let isH1 (Header 1 _) = True + isH1 _ = False + let chunks = splitByIndices (dropWhile (==0) $ findIndices isH1 blocks) blocks + let titleize (Header 1 xs : ys) = Pandoc meta{docTitle = xs} ys + titleize xs = Pandoc meta xs + let chapToHtml = writeHtmlString opts'{ writerTemplate = pageTemplate + , writerHTMLMathMethod = PlainMath} + let chapters = map titleize chunks + let chapterToEntry :: Int -> Pandoc -> Entry + chapterToEntry num chap = toEntry ("ch" ++ show num ++ ".xhtml") + epochtime $ fromString $ chapToHtml chap + let chapterEntries = zipWith chapterToEntry [1..] chapters + -- contents.opf + uuid <- getRandomUUID + let chapterNode ent = unode "item" ! + [("id", takeBaseName $ eRelativePath ent), + ("href", eRelativePath ent), + ("media-type", "application/xhtml+xml")] $ () + let chapterRefNode ent = unode "itemref" ! + [("idref", takeBaseName $ eRelativePath ent)] $ () + let pictureNode ent = unode "item" ! + [("id", takeBaseName $ eRelativePath ent), + ("href", eRelativePath ent), + ("media-type", fromMaybe "application/octet-stream" + $ imageTypeOf $ eRelativePath ent)] $ () + let plainify t = removeTrailingSpace $ + writePlain opts'{ writerStandalone = False } $ + Pandoc meta [Plain t] + let plainTitle = plainify $ docTitle meta + let plainAuthors = map plainify $ docAuthors meta + lang <- catch (liftM (takeWhile (/='.')) $ getEnv "lang") + (\_ -> return "en-US") + let contentsData = fromString $ ppTopElement $ + unode "package" ! [("version","2.0") + ,("xmlns","http://www.idpf.org/2007/opf") + ,("unique-identifier","BookId")] $ + [ unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/") + ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ + [ unode "dc:title" plainTitle + , unode "dc:language" lang + , unode "dc:identifier" ! [("id","BookId")] $ show uuid + ] ++ + map (unode "dc:creator" ! [("opf:role","aut")]) plainAuthors + , unode "manifest" $ + [ unode "item" ! [("id","ncx"), ("href","toc.ncx") + ,("media-type","application/x-dtbncx+xml")] $ () + , unode "item" ! [("id","style"), ("href","stylesheet.css") + ,("media-type","text/css")] $ () + ] ++ + map chapterNode (tpEntry : chapterEntries) ++ + map pictureNode picEntries + , unode "spine" ! [("toc","ncx")] $ + map chapterRefNode (tpEntry : chapterEntries) + ] + let contentsEntry = toEntry "content.opf" epochtime contentsData + -- toc.ncx + let navPointNode ent n tit = unode "navPoint" ! + [("id", "navPoint-" ++ show n) + ,("playOrder", show n)] $ + [ unode "navLabel" $ unode "text" tit + , unode "content" ! [("src", + eRelativePath ent)] $ () + ] + let tocData = fromString $ ppTopElement $ + unode "ncx" ! [("version","2005-1") + ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $ + [ unode "head" + [ unode "meta" ! [("name","dtb:uid") + ,("content", show uuid)] $ () + , unode "meta" ! [("name","dtb:depth") + ,("content", "1")] $ () + , unode "meta" ! [("name","dtb:totalPageCount") + ,("content", "0")] $ () + , unode "meta" ! [("name","dtb:maxPageNumber") + ,("content", "0")] $ () + ] + , unode "docTitle" $ unode "text" $ plainTitle + , unode "navMap" $ zipWith3 navPointNode (tpEntry : chapterEntries) + [1..(length chapterEntries + 1)] + ("Title Page" : map (\(Pandoc m _) -> + plainify $ docTitle m) chapters) + ] + let tocEntry = toEntry "toc.ncx" epochtime tocData + -- construct archive + let archive = foldr addEntryToArchive emptyArchive + (mimetypeEntry : containerEntry : stylesheetEntry : tpEntry : + contentsEntry : tocEntry : (picEntries ++ chapterEntries) ) + return $ fromArchive archive + +transformInline :: HTMLMathMethod + -> FilePath + -> IORef [Entry] + -> Inline + -> IO Inline +transformInline _ _ _ (Image lab (src,_)) | isNothing (imageTypeOf src) = + return (Emph lab) +transformInline _ sourceDir picsRef (Image lab (src,tit)) = do + entries <- readIORef picsRef + let newsrc = "images/img" ++ show (length entries) ++ takeExtension src + catch (readEntry [] (sourceDir src) >>= \entry -> + modifyIORef picsRef (entry{ eRelativePath = newsrc } :) >> + return (Image lab (newsrc, tit))) + (\_ -> return (Emph lab)) +transformInline (MathML _) _ _ x@(Math _ _) = do + let writeHtmlInline opts z = removeTrailingSpace $ + writeHtmlString opts $ Pandoc (Meta [] [] []) [Plain [z]] + mathml = writeHtmlInline defaultWriterOptions{ + writerHTMLMathMethod = MathML Nothing } x + fallback = writeHtmlInline defaultWriterOptions{ + writerHTMLMathMethod = PlainMath } x + inOps = "" ++ + "" ++ + mathml ++ "" ++ fallback ++ "" ++ + "" + return $ HtmlInline $ if " Block +transformBlock (RawHtml _) = Null +transformBlock x = x + +(!) :: Node t => (t -> Element) -> [(String, String)] -> t -> Element +(!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n) + +-- | Version of 'ppTopElement' that specifies UTF-8 encoding. +ppTopElement :: Element -> String +ppTopElement = ("\n" ++) . ppElement + +imageTypeOf :: FilePath -> Maybe String +imageTypeOf x = case drop 1 (map toLower (takeExtension x)) of + "jpg" -> Just "image/jpeg" + "jpeg" -> Just "image/jpeg" + "jfif" -> Just "image/jpeg" + "png" -> Just "image/png" + "gif" -> Just "image/gif" + "svg" -> Just "image/svg+xml" + _ -> Nothing + +pageTemplate :: String +pageTemplate = unlines + [ "" + , "" + , "" + , "" + , "$title$" + , "" + , "" + , "" + , "$if(titlepage)$" + , "

$title$

" + , "$for(author)$" + , "

$author$

" + , "$endfor$" + , "$else$" + , "

$title$

" + , "$body$" + , "$endif$" + , "" + , "" + ] + diff --git a/src/pandoc.hs b/src/pandoc.hs index 3356a6d58..00df92e2c 100644 --- a/src/pandoc.hs +++ b/src/pandoc.hs @@ -109,6 +109,7 @@ writers = [("native" , writeDoc) ,("docbook" , writeDocbook) ,("opendocument" , writeOpenDocument) ,("odt" , \_ _ -> "") + ,("epub" , \_ _ -> "") ,("latex" , writeLaTeX) ,("latex+lhs" , writeLaTeX) ,("context" , writeConTeXt) @@ -124,7 +125,7 @@ writers = [("native" , writeDoc) ] isNonTextOutput :: String -> Bool -isNonTextOutput = (`elem` ["odt"]) +isNonTextOutput = (`elem` ["odt","epub"]) -- | Writer for Pandoc native format. writeDoc :: WriterOptions -> Pandoc -> String @@ -157,6 +158,7 @@ data Opt = Opt , optSmart :: Bool -- ^ Use smart typography , optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math , optReferenceODT :: Maybe FilePath -- ^ Path of reference.odt + , optEPUBStylesheet :: Maybe String -- ^ EPUB stylesheet , optDumpArgs :: Bool -- ^ Output command-line arguments , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments , optStrict :: Bool -- ^ Use strict markdown syntax @@ -197,6 +199,7 @@ defaultOpts = Opt , optSmart = False , optHTMLMathMethod = PlainMath , optReferenceODT = Nothing + , optEPUBStylesheet = Nothing , optDumpArgs = False , optIgnoreArgs = False , optStrict = False @@ -467,6 +470,14 @@ options = "FILENAME") "" -- "Path of custom reference.odt" + , Option "" ["epub-stylesheet"] + (ReqArg + (\arg opt -> do + text <- UTF8.readFile arg + return opt { optEPUBStylesheet = Just text }) + "FILENAME") + "" -- "Path of epub.css" + , Option "D" ["print-default-template"] (ReqArg (\arg _ -> do @@ -580,6 +591,7 @@ defaultWriterName x = ".texinfo" -> "texinfo" ".db" -> "docbook" ".odt" -> "odt" + ".epub" -> "epub" ['.',y] | y `elem` ['1'..'9'] -> "man" _ -> "html" @@ -628,6 +640,7 @@ main = do , optSmart = smart , optHTMLMathMethod = mathMethod , optReferenceODT = referenceODT + , optEPUBStylesheet = epubStylesheet , optDumpArgs = dumpArgs , optIgnoreArgs = ignoreArgs , optStrict = strict @@ -686,6 +699,11 @@ main = do Nothing -> error ("Unknown reader: " ++ readerName') writer <- case (lookup writerName' writers) of + Just _ | writerName' == "epub" -> do + epubstyle <- case epubStylesheet of + Just s -> return s + Nothing -> readDataFile datadir "epub.css" + return (writeEPUB sourceDirRelative epubstyle) Just _ | writerName' == "odt" -> return (writeODT datadir sourceDirRelative referenceODT) Just r -> return $ \o d ->