EPUB writer: Properly handle image URLs without an extension.
We now look at the mime type from the server and attach an appropriate extension. Closes #1855.
This commit is contained in:
parent
16133ed1ac
commit
0fa753b999
1 changed files with 41 additions and 35 deletions
|
@ -31,7 +31,7 @@ Conversion of 'Pandoc' documents to EPUB.
|
|||
module Text.Pandoc.Writers.EPUB ( writeEPUB ) where
|
||||
import Data.IORef ( IORef, newIORef, readIORef, modifyIORef )
|
||||
import qualified Data.Map as M
|
||||
import Data.Maybe ( fromMaybe )
|
||||
import Data.Maybe ( fromMaybe, catMaybes )
|
||||
import Data.List ( isPrefixOf, isInfixOf, intercalate )
|
||||
import System.Environment ( getEnv )
|
||||
import Text.Printf (printf)
|
||||
|
@ -60,14 +60,14 @@ import Text.Pandoc.Walk (walk, walkM)
|
|||
import Data.Default
|
||||
import Text.Pandoc.Writers.Markdown (writePlain)
|
||||
import Control.Monad.State (modify, get, execState, State, put, evalState)
|
||||
import Control.Monad (foldM, mplus, liftM, when)
|
||||
import Control.Monad (mplus, liftM, when)
|
||||
import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs
|
||||
, strContent, lookupAttr, Node(..), QName(..), parseXML
|
||||
, onlyElems, node, ppElement)
|
||||
import Text.Pandoc.UUID (getRandomUUID)
|
||||
import Text.Pandoc.Writers.HTML (writeHtmlString, writeHtml)
|
||||
import Data.Char ( toLower, isDigit, isAlphaNum )
|
||||
import Text.Pandoc.MIME (MimeType, getMimeType)
|
||||
import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType)
|
||||
import qualified Control.Exception as E
|
||||
import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
|
||||
import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags)
|
||||
|
@ -378,17 +378,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
|
|||
mediaRef <- newIORef []
|
||||
Pandoc _ blocks <- walkM (transformInline opts' mediaRef) doc >>=
|
||||
walkM (transformBlock opts' mediaRef)
|
||||
pics <- readIORef mediaRef
|
||||
let readPicEntry entries (oldsrc, newsrc) = do
|
||||
res <- fetchItem' (writerMediaBag opts')
|
||||
(writerSourceURL opts') oldsrc
|
||||
case res of
|
||||
Left _ -> do
|
||||
warn $ "Could not find media `" ++ oldsrc ++ "', skipping..."
|
||||
return entries
|
||||
Right (img,_) -> return $
|
||||
(toEntry newsrc epochtime $ B.fromChunks . (:[]) $ img) : entries
|
||||
picEntries <- foldM readPicEntry [] pics
|
||||
picEntries <- (catMaybes . map (snd . snd)) <$> readIORef mediaRef
|
||||
|
||||
-- handle fonts
|
||||
let matchingGlob f = do
|
||||
|
@ -794,59 +784,75 @@ metadataElement version md currentTime =
|
|||
showDateTimeISO8601 :: UTCTime -> String
|
||||
showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ"
|
||||
|
||||
transformTag :: IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media
|
||||
transformTag :: WriterOptions
|
||||
-> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media
|
||||
-> Tag String
|
||||
-> IO (Tag String)
|
||||
transformTag mediaRef tag@(TagOpen name attr)
|
||||
transformTag opts mediaRef tag@(TagOpen name attr)
|
||||
| name `elem` ["video", "source", "img", "audio"] = do
|
||||
let src = fromAttrib "src" tag
|
||||
let poster = fromAttrib "poster" tag
|
||||
newsrc <- modifyMediaRef mediaRef src
|
||||
newposter <- modifyMediaRef mediaRef poster
|
||||
newsrc <- modifyMediaRef opts mediaRef src
|
||||
newposter <- modifyMediaRef opts mediaRef poster
|
||||
let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++
|
||||
[("src", newsrc) | not (null newsrc)] ++
|
||||
[("poster", newposter) | not (null newposter)]
|
||||
return $ TagOpen name attr'
|
||||
transformTag _ tag = return tag
|
||||
transformTag _ _ tag = return tag
|
||||
|
||||
modifyMediaRef :: IORef [(FilePath, FilePath)] -> FilePath -> IO FilePath
|
||||
modifyMediaRef _ "" = return ""
|
||||
modifyMediaRef mediaRef oldsrc = do
|
||||
modifyMediaRef :: WriterOptions
|
||||
-> IORef [(FilePath, (FilePath, Maybe Entry))]
|
||||
-> FilePath
|
||||
-> IO FilePath
|
||||
modifyMediaRef _ _ "" = return ""
|
||||
modifyMediaRef opts mediaRef oldsrc = do
|
||||
media <- readIORef mediaRef
|
||||
case lookup oldsrc media of
|
||||
Just n -> return n
|
||||
Nothing -> do
|
||||
let new = "media/file" ++ show (length media) ++
|
||||
takeExtension (takeWhile (/='?') oldsrc) -- remove query
|
||||
modifyIORef mediaRef ( (oldsrc, new): )
|
||||
Just (n,_) -> return n
|
||||
Nothing -> do
|
||||
res <- fetchItem' (writerMediaBag opts)
|
||||
(writerSourceURL opts) oldsrc
|
||||
(new, mbEntry) <-
|
||||
case res of
|
||||
Left _ -> do
|
||||
warn $ "Could not find media `" ++ oldsrc ++ "', skipping..."
|
||||
return (oldsrc, Nothing)
|
||||
Right (img,mbMime) -> do
|
||||
let new = "media/file" ++ show (length media) ++
|
||||
fromMaybe (takeExtension (takeWhile (/='?') oldsrc))
|
||||
(('.':) <$> (mbMime >>= extensionFromMimeType))
|
||||
epochtime <- floor `fmap` getPOSIXTime
|
||||
let entry = toEntry new epochtime $ B.fromChunks . (:[]) $ img
|
||||
return (new, Just entry)
|
||||
modifyIORef mediaRef ( (oldsrc, (new, mbEntry)): )
|
||||
return new
|
||||
|
||||
transformBlock :: WriterOptions
|
||||
-> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media
|
||||
-> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media
|
||||
-> Block
|
||||
-> IO Block
|
||||
transformBlock _ mediaRef (RawBlock fmt raw)
|
||||
transformBlock opts mediaRef (RawBlock fmt raw)
|
||||
| fmt == Format "html" = do
|
||||
let tags = parseTags raw
|
||||
tags' <- mapM (transformTag mediaRef) tags
|
||||
tags' <- mapM (transformTag opts mediaRef) tags
|
||||
return $ RawBlock fmt (renderTags' tags')
|
||||
transformBlock _ _ b = return b
|
||||
|
||||
transformInline :: WriterOptions
|
||||
-> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media
|
||||
-> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath) media
|
||||
-> Inline
|
||||
-> IO Inline
|
||||
transformInline _ mediaRef (Image lab (src,tit)) = do
|
||||
newsrc <- modifyMediaRef mediaRef src
|
||||
transformInline opts mediaRef (Image lab (src,tit)) = do
|
||||
newsrc <- modifyMediaRef opts mediaRef src
|
||||
return $ Image lab (newsrc, tit)
|
||||
transformInline opts _ (x@(Math _ _))
|
||||
| WebTeX _ <- writerHTMLMathMethod opts = do
|
||||
raw <- makeSelfContained opts $ writeHtmlInline opts x
|
||||
return $ RawInline (Format "html") raw
|
||||
transformInline _ mediaRef (RawInline fmt raw)
|
||||
transformInline opts mediaRef (RawInline fmt raw)
|
||||
| fmt == Format "html" = do
|
||||
let tags = parseTags raw
|
||||
tags' <- mapM (transformTag mediaRef) tags
|
||||
tags' <- mapM (transformTag opts mediaRef) tags
|
||||
return $ RawInline fmt (renderTags' tags')
|
||||
transformInline _ _ x = return x
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue