Merge pull request #1516 from mpickering/epubmetadata
EPUB improvements
This commit is contained in:
commit
e690fe4a3e
3 changed files with 72 additions and 26 deletions
18
README
18
README
|
@ -2972,43 +2972,60 @@ The following fields are recognized:
|
|||
`GTIN-13`, `UPC`, `ISMN-10`, `DOI`, `LCCN`, `GTIN-14`,
|
||||
`ISBN-13`, `Legal deposit number`, `URN`, `OCLC`,
|
||||
`ISMN-13`, `ISBN-A`, `JP`, `OLCC`.
|
||||
|
||||
`title`
|
||||
~ Either a string value, or an object with fields `file-as` and
|
||||
`type`, or a list of such objects. Valid values for `type` are
|
||||
`main`, `subtitle`, `short`, `collection`, `edition`, `extended`.
|
||||
|
||||
`creator`
|
||||
~ Either a string value, or an object with fields `role`, `file-as`,
|
||||
and `text`, or a list of such objects. Valid values for `role` are
|
||||
[marc relators](http://www.loc.gov/marc/relators/relaterm.html), but
|
||||
pandoc will attempt to translate the human-readable versions
|
||||
(like "author" and "editor") to the appropriate marc relators.
|
||||
|
||||
`contributor`
|
||||
~ Same format as `creator`.
|
||||
|
||||
`date`
|
||||
~ A string value in `YYYY-MM-DD` format. (Only the year is necessary.)
|
||||
Pandoc will attempt to convert other common date formats.
|
||||
|
||||
`language`
|
||||
~ A string value in [RFC5646] format. Pandoc will default to the local
|
||||
language if nothing is specified.
|
||||
|
||||
`subject`
|
||||
~ A string value or a list of such values.
|
||||
|
||||
`description`
|
||||
~ A string value.
|
||||
|
||||
`type`
|
||||
~ A string value.
|
||||
|
||||
`format`
|
||||
~ A string value.
|
||||
|
||||
`relation`
|
||||
~ A string value.
|
||||
|
||||
`coverage`
|
||||
~ A string value.
|
||||
|
||||
`rights`
|
||||
~ A string value.
|
||||
|
||||
`cover-image`
|
||||
~ A string value (path to cover image).
|
||||
|
||||
`stylesheet`
|
||||
~ A string value (path to CSS stylesheet).
|
||||
|
||||
`page-progression-direction`
|
||||
~ Either `ltr` or `rtl`. Specifies the synonymous spine [attribute][EPUBspine].
|
||||
|
||||
Literate Haskell support
|
||||
========================
|
||||
|
||||
|
@ -3126,3 +3143,4 @@ Rosenthal.
|
|||
[InDesign ICML]: https://www.adobe.com/content/dam/Adobe/en/devnet/indesign/cs55-docs/IDML/idml-specification.pdf
|
||||
[txt2tags]: http://txt2tags.org/
|
||||
[EPUB]: http://idpf.org/epub
|
||||
[EPUBspine]: http://www.idpf.org/epub/301/spec/epub-publications.html#sec-spine-elem
|
||||
|
|
|
@ -14,7 +14,7 @@ import Text.Pandoc.Walk (walk, query)
|
|||
import Text.Pandoc.Generic(bottomUp)
|
||||
import Text.Pandoc.Readers.HTML (readHtml)
|
||||
import Text.Pandoc.Options ( ReaderOptions(..), readerTrace)
|
||||
import Text.Pandoc.Shared (escapeURI, collapseFilePath)
|
||||
import Text.Pandoc.Shared (escapeURI, collapseFilePath, addMetaField)
|
||||
import Text.Pandoc.MediaBag (MediaBag, insertMedia)
|
||||
import Text.Pandoc.Compat.Except (MonadError, throwError, runExcept, Except)
|
||||
import qualified Text.Pandoc.Builder as B
|
||||
|
@ -155,7 +155,7 @@ parseMeta content = do
|
|||
-- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-metadata-elem
|
||||
parseMetaItem :: Element -> Meta -> Meta
|
||||
parseMetaItem e@(stripNamespace . elName -> field) meta =
|
||||
B.setMeta (renameMeta field) (B.str $ strContent e) meta
|
||||
addMetaField (renameMeta field) (B.str $ strContent e) meta
|
||||
|
||||
renameMeta :: String -> String
|
||||
renameMeta "creator" = "author"
|
||||
|
@ -169,10 +169,10 @@ getManifest archive = do
|
|||
ns <- mkE "xmlns not in namespaces" (lookup "xmlns" namespaces)
|
||||
as <- liftM ((map attrToPair) . elAttribs)
|
||||
(findElementE (QName "rootfile" (Just ns) Nothing) docElem)
|
||||
root <- mkE "Root not found" (lookup "full-path" as)
|
||||
let rootdir = dropFileName root
|
||||
manifestFile <- mkE "Root not found" (lookup "full-path" as)
|
||||
let rootdir = dropFileName manifestFile
|
||||
--mime <- lookup "media-type" as
|
||||
manifest <- findEntryByPathE root archive
|
||||
manifest <- findEntryByPathE manifestFile archive
|
||||
liftM ((,) rootdir) (parseXMLDocE . UTF8.toStringLazy . fromEntry $ manifest)
|
||||
|
||||
-- Fixup
|
||||
|
@ -272,7 +272,8 @@ findAttrE :: MonadError String m => QName -> Element -> m String
|
|||
findAttrE q e = mkE "findAttr" $ findAttr q e
|
||||
|
||||
findEntryByPathE :: MonadError String m => FilePath -> Archive -> m Entry
|
||||
findEntryByPathE path a = mkE ("No entry on path: " ++ path) $ findEntryByPath path a
|
||||
findEntryByPathE (normalise -> path) a =
|
||||
mkE ("No entry on path: " ++ path) $ findEntryByPath path a
|
||||
|
||||
parseXMLDocE :: MonadError String m => String -> m Element
|
||||
parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables #-}
|
||||
{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables, ViewPatterns #-}
|
||||
{-
|
||||
Copyright (C) 2010-2014 John MacFarlane <jgm@berkeley.edu>
|
||||
|
||||
|
@ -29,7 +29,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|||
Conversion of 'Pandoc' documents to EPUB.
|
||||
-}
|
||||
module Text.Pandoc.Writers.EPUB ( writeEPUB ) where
|
||||
import Data.IORef
|
||||
import Data.IORef ( IORef, newIORef, readIORef, modifyIORef )
|
||||
import qualified Data.Map as M
|
||||
import Data.Maybe ( fromMaybe )
|
||||
import Data.List ( isPrefixOf, isInfixOf, intercalate )
|
||||
|
@ -40,27 +40,35 @@ import qualified Data.ByteString.Lazy as B
|
|||
import qualified Data.ByteString.Lazy.Char8 as B8
|
||||
import qualified Text.Pandoc.UTF8 as UTF8
|
||||
import Text.Pandoc.SelfContained ( makeSelfContained )
|
||||
import Codec.Archive.Zip
|
||||
import Codec.Archive.Zip ( emptyArchive, addEntryToArchive, eRelativePath, fromEntry , Entry, toEntry, fromArchive)
|
||||
import Control.Applicative ((<$>))
|
||||
import Data.Time.Clock.POSIX
|
||||
import Data.Time
|
||||
import System.Locale
|
||||
import Text.Pandoc.Shared hiding ( Element )
|
||||
import qualified Text.Pandoc.Shared as Shared
|
||||
import Data.Time.Clock.POSIX ( getPOSIXTime )
|
||||
import Data.Time (getCurrentTime,UTCTime, formatTime)
|
||||
import System.Locale ( defaultTimeLocale )
|
||||
import Text.Pandoc.Shared ( trimr, renderTags', safeRead, uniqueIdent, trim
|
||||
, normalizeDate, readDataFile, stringify, warn
|
||||
, hierarchicalize, fetchItem' )
|
||||
import qualified Text.Pandoc.Shared as S (Element(..))
|
||||
import Text.Pandoc.Builder (fromList, setMeta)
|
||||
import Text.Pandoc.Options
|
||||
import Text.Pandoc.Options ( WriterOptions(..)
|
||||
, HTMLMathMethod(..)
|
||||
, EPUBVersion(..)
|
||||
, ObfuscationMethod(NoObfuscation) )
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Walk
|
||||
import Control.Monad.State
|
||||
import Text.XML.Light hiding (ppTopElement)
|
||||
import Text.Pandoc.UUID
|
||||
import Text.Pandoc.Writers.HTML
|
||||
import Text.Pandoc.Walk (walk, walkM)
|
||||
import Control.Monad.State (modify, get, execState, State, put, evalState)
|
||||
import Control.Monad (foldM, when, mplus, liftM)
|
||||
import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs
|
||||
, strContent, lookupAttr, Node(..), QName(..), parseXML
|
||||
, onlyElems, node, ppElement)
|
||||
import Text.Pandoc.UUID (getRandomUUID)
|
||||
import Text.Pandoc.Writers.HTML (writeHtmlString, writeHtml)
|
||||
import Data.Char ( toLower, isDigit, isAlphaNum )
|
||||
import Network.URI ( unEscapeString )
|
||||
import Text.Pandoc.MIME (getMimeType)
|
||||
import qualified Control.Exception as E
|
||||
import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
|
||||
import Text.HTML.TagSoup
|
||||
import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags)
|
||||
|
||||
-- A Chapter includes a list of blocks and maybe a section
|
||||
-- number offset. Note, some chapters are unnumbered. The section
|
||||
|
@ -86,6 +94,7 @@ data EPUBMetadata = EPUBMetadata{
|
|||
, epubRights :: Maybe String
|
||||
, epubCoverImage :: Maybe String
|
||||
, epubStylesheet :: Maybe Stylesheet
|
||||
, epubPageDirection :: ProgressionDirection
|
||||
} deriving Show
|
||||
|
||||
data Stylesheet = StylesheetPath FilePath
|
||||
|
@ -114,6 +123,8 @@ data Title = Title{
|
|||
, titleType :: Maybe String
|
||||
} deriving Show
|
||||
|
||||
data ProgressionDirection = LTR | RTL | Default deriving Show
|
||||
|
||||
dcName :: String -> QName
|
||||
dcName n = QName n Nothing (Just "dc")
|
||||
|
||||
|
@ -288,6 +299,7 @@ metadataFromMeta opts meta = EPUBMetadata{
|
|||
, epubRights = rights
|
||||
, epubCoverImage = coverImage
|
||||
, epubStylesheet = stylesheet
|
||||
, epubPageDirection = pageDirection
|
||||
}
|
||||
where identifiers = getIdentifier meta
|
||||
titles = getTitle meta
|
||||
|
@ -310,6 +322,14 @@ metadataFromMeta opts meta = EPUBMetadata{
|
|||
stylesheet = (StylesheetContents <$> writerEpubStylesheet opts) `mplus`
|
||||
((StylesheetPath . metaValueToString) <$>
|
||||
lookupMeta "stylesheet" meta)
|
||||
pageDirection = maybe Default stringToPageDirection
|
||||
(lookupMeta "page-progression-direction" meta)
|
||||
stringToPageDirection (metaValueToString -> s) =
|
||||
case s of
|
||||
"ltr" -> LTR
|
||||
"rtl" -> RTL
|
||||
_ -> Default
|
||||
|
||||
|
||||
-- | Produce an EPUB file from a Pandoc document.
|
||||
writeEPUB :: WriterOptions -- ^ Writer options
|
||||
|
@ -374,6 +394,12 @@ writeEPUB opts doc@(Pandoc meta _) = do
|
|||
let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f
|
||||
fontEntries <- mapM mkFontEntry $ writerEpubFonts opts'
|
||||
|
||||
-- set page progression direction
|
||||
let progressionDirection = case epubPageDirection metadata of
|
||||
LTR -> "ltr"
|
||||
RTL -> "rtl"
|
||||
Default -> "default"
|
||||
|
||||
-- body pages
|
||||
|
||||
-- add level 1 header to beginning if none there
|
||||
|
@ -493,7 +519,8 @@ writeEPUB opts doc@(Pandoc meta _) = do
|
|||
(pictureNode x)]) ++
|
||||
map pictureNode picEntries ++
|
||||
map fontNode fontEntries
|
||||
, unode "spine" ! [("toc","ncx")] $
|
||||
, unode "spine" ! [("toc","ncx")
|
||||
,("page-progression-direction", progressionDirection)] $
|
||||
case epubCoverImage metadata of
|
||||
Nothing -> []
|
||||
Just _ -> [ unode "itemref" !
|
||||
|
@ -524,8 +551,8 @@ writeEPUB opts doc@(Pandoc meta _) = do
|
|||
let tocLevel = writerTOCDepth opts
|
||||
|
||||
let navPointNode :: (Int -> String -> String -> [Element] -> Element)
|
||||
-> Shared.Element -> State Int Element
|
||||
navPointNode formatter (Sec _ nums (ident,_,_) ils children) = do
|
||||
-> S.Element -> State Int Element
|
||||
navPointNode formatter (S.Sec _ nums (ident,_,_) ils children) = do
|
||||
n <- get
|
||||
modify (+1)
|
||||
let showNums :: [Int] -> String
|
||||
|
@ -537,12 +564,12 @@ writeEPUB opts doc@(Pandoc meta _) = do
|
|||
let src = case lookup ident reftable of
|
||||
Just x -> x
|
||||
Nothing -> error (ident ++ " not found in reftable")
|
||||
let isSec (Sec lev _ _ _ _) = lev <= tocLevel
|
||||
let isSec (S.Sec lev _ _ _ _) = lev <= tocLevel
|
||||
isSec _ = False
|
||||
let subsecs = filter isSec children
|
||||
subs <- mapM (navPointNode formatter) subsecs
|
||||
return $ formatter n tit src subs
|
||||
navPointNode _ (Blk _) = error "navPointNode encountered Blk"
|
||||
navPointNode _ (S.Blk _) = error "navPointNode encountered Blk"
|
||||
|
||||
let navMapFormatter :: Int -> String -> String -> [Element] -> Element
|
||||
navMapFormatter n tit src subs = unode "navPoint" !
|
||||
|
|
Loading…
Add table
Reference in a new issue