Allow --self-contained to get content from MediaBag.

Added a parameter to makeSelfContained (API change).
This commit is contained in:
John MacFarlane 2014-07-30 15:26:40 -07:00
parent 23d806644f
commit e4913d6dba
3 changed files with 38 additions and 27 deletions

View file

@ -1281,7 +1281,9 @@ main = do
["html","html+lhs","html5","html5+lhs", ["html","html+lhs","html5","html5+lhs",
"s5","slidy","slideous","dzslides","revealjs"] "s5","slidy","slideous","dzslides","revealjs"]
selfcontain = if selfContained && htmlFormat selfcontain = if selfContained && htmlFormat
then makeSelfContained datadir then makeSelfContained
(writerMediaBag writerOptions')
(writerUserDataDir writerOptions')
else return else return
handleEntities = if htmlFormat && ascii handleEntities = if htmlFormat && ascii
then toEntities then toEntities

View file

@ -35,50 +35,53 @@ import Text.HTML.TagSoup
import Network.URI (isURI, escapeURIString) import Network.URI (isURI, escapeURIString)
import Data.ByteString.Base64 import Data.ByteString.Base64
import qualified Data.ByteString.Char8 as B import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString as BS
import Data.ByteString (ByteString) import Data.ByteString (ByteString)
import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>)) import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>))
import Data.Char (toLower, isAscii, isAlphaNum) import Data.Char (toLower, isAscii, isAlphaNum)
import Codec.Compression.GZip as Gzip import Codec.Compression.GZip as Gzip
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err) import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err, MediaBag)
import Text.Pandoc.UTF8 (toString, fromString) import Text.Pandoc.UTF8 (toString, fromString)
import Text.Pandoc.MIME (getMimeType) import Text.Pandoc.MIME (getMimeType)
import System.Directory (doesFileExist) import System.Directory (doesFileExist)
import qualified Data.Map as M
isOk :: Char -> Bool isOk :: Char -> Bool
isOk c = isAscii c && isAlphaNum c isOk c = isAscii c && isAlphaNum c
convertTag :: Maybe FilePath -> Tag String -> IO (Tag String) convertTag :: MediaBag -> Maybe FilePath -> Tag String -> IO (Tag String)
convertTag userdata t@(TagOpen tagname as) convertTag media userdata t@(TagOpen tagname as)
| tagname `elem` ["img", "embed", "video", "input", "audio", "source"] = do | tagname `elem` ["img", "embed", "video", "input", "audio", "source"] = do
as' <- mapM processAttribute as as' <- mapM processAttribute as
return $ TagOpen tagname as' return $ TagOpen tagname as'
where processAttribute (x,y) = where processAttribute (x,y) =
if x == "src" || x == "href" || x == "poster" if x == "src" || x == "href" || x == "poster"
then do then do
(raw, mime) <- getRaw userdata (fromAttrib "type" t) y (raw, mime) <- getRaw media userdata (fromAttrib "type" t) y
let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw) let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw)
return (x, enc) return (x, enc)
else return (x,y) else return (x,y)
convertTag userdata t@(TagOpen "script" as) = convertTag media userdata t@(TagOpen "script" as) =
case fromAttrib "src" t of case fromAttrib "src" t of
[] -> return t [] -> return t
src -> do src -> do
(raw, mime) <- getRaw userdata (fromAttrib "type" t) src (raw, mime) <- getRaw media userdata (fromAttrib "type" t) src
let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw) let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw)
return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"]) return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"])
convertTag userdata t@(TagOpen "link" as) = convertTag media userdata t@(TagOpen "link" as) =
case fromAttrib "href" t of case fromAttrib "href" t of
[] -> return t [] -> return t
src -> do src -> do
(raw, mime) <- getRaw userdata (fromAttrib "type" t) src (raw, mime) <- getRaw media userdata (fromAttrib "type" t) src
let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw) let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw)
return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"]) return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"])
convertTag _ t = return t convertTag _ _ t = return t
-- NOTE: This is really crude, it doesn't respect CSS comments. -- NOTE: This is really crude, it doesn't respect CSS comments.
cssURLs :: Maybe FilePath -> FilePath -> ByteString -> IO ByteString cssURLs :: MediaBag -> Maybe FilePath -> FilePath -> ByteString
cssURLs userdata d orig = -> IO ByteString
cssURLs media userdata d orig =
case B.breakSubstring "url(" orig of case B.breakSubstring "url(" orig of
(x,y) | B.null y -> return orig (x,y) | B.null y -> return orig
| otherwise -> do | otherwise -> do
@ -91,14 +94,15 @@ cssURLs userdata d orig =
let url' = if isURI url let url' = if isURI url
then url then url
else d </> url else d </> url
(raw, mime) <- getRaw userdata "" url' (raw, mime) <- getRaw media userdata "" url'
rest <- cssURLs userdata d v rest <- cssURLs media userdata d v
let enc = "data:" `B.append` fromString mime `B.append` let enc = "data:" `B.append` fromString mime `B.append`
";base64," `B.append` (encode raw) ";base64," `B.append` (encode raw)
return $ x `B.append` "url(" `B.append` enc `B.append` rest return $ x `B.append` "url(" `B.append` enc `B.append` rest
getItem :: Maybe FilePath -> String -> IO (ByteString, Maybe String) getItem :: MediaBag -> Maybe FilePath -> String
getItem userdata f = -> IO (ByteString, Maybe String)
getItem media userdata f =
if isURI f if isURI f
then openURL f >>= either handleErr return then openURL f >>= either handleErr return
else do else do
@ -110,14 +114,19 @@ getItem userdata f =
".gz" -> getMimeType $ dropExtension f' ".gz" -> getMimeType $ dropExtension f'
x -> getMimeType x x -> getMimeType x
exists <- doesFileExist f' exists <- doesFileExist f'
cont <- if exists then B.readFile f' else readDataFile userdata f' cont <- if exists
then B.readFile f'
else case M.lookup f media of
Just bs -> return $ BS.concat $ L.toChunks bs
Nothing -> readDataFile userdata f'
return (cont, mime) return (cont, mime)
where handleErr e = err 61 $ "Failed to retrieve " ++ f ++ "\n" ++ show e where handleErr e = err 61 $ "Failed to retrieve " ++ f ++ "\n" ++ show e
getRaw :: Maybe FilePath -> String -> String -> IO (ByteString, String) getRaw :: MediaBag -> Maybe FilePath -> String -> String
getRaw userdata mimetype src = do -> IO (ByteString, String)
getRaw media userdata mimetype src = do
let ext = map toLower $ takeExtension src let ext = map toLower $ takeExtension src
(raw, respMime) <- getItem userdata src (raw, respMime) <- getItem media userdata src
let raw' = if ext == ".gz" let raw' = if ext == ".gz"
then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks
$ [raw] $ [raw]
@ -128,20 +137,20 @@ getRaw userdata mimetype src = do
(x, Nothing) -> x (x, Nothing) -> x
(_, Just x ) -> x (_, Just x ) -> x
result <- if mime == "text/css" result <- if mime == "text/css"
then cssURLs userdata (takeDirectory src) raw' then cssURLs media userdata (takeDirectory src) raw'
else return raw' else return raw'
return (result, mime) return (result, mime)
-- | Convert HTML into self-contained HTML, incorporating images, -- | Convert HTML into self-contained HTML, incorporating images,
-- scripts, and CSS using data: URIs. Items specified using absolute -- scripts, and CSS using data: URIs. Items specified using absolute
-- URLs will be downloaded; those specified using relative URLs will -- URLs will be downloaded; those specified using relative URLs will
-- be sought first relative to the working directory, then relative -- be sought first relative to the working directory, then in the
-- media bag, then relative
-- to the user data directory (if the first parameter is 'Just' -- to the user data directory (if the first parameter is 'Just'
-- a directory), and finally relative to pandoc's default data -- a directory), and finally relative to pandoc's default data
-- directory. -- directory.
makeSelfContained :: Maybe FilePath -> String -> IO String makeSelfContained :: MediaBag -> Maybe FilePath -> String -> IO String
makeSelfContained userdata inp = do makeSelfContained media userdata inp = do
let tags = parseTags inp let tags = parseTags inp
out' <- mapM (convertTag userdata) tags out' <- mapM (convertTag media userdata) tags
return $ renderTags' out' return $ renderTags' out'

View file

@ -793,7 +793,7 @@ transformInline opts mediaRef (Image lab (src,tit)) = do
return $ Image lab (newsrc, tit) return $ Image lab (newsrc, tit)
transformInline opts _ (x@(Math _ _)) transformInline opts _ (x@(Math _ _))
| WebTeX _ <- writerHTMLMathMethod opts = do | WebTeX _ <- writerHTMLMathMethod opts = do
raw <- makeSelfContained Nothing $ writeHtmlInline opts x raw <- makeSelfContained M.empty Nothing $ writeHtmlInline opts x
return $ RawInline (Format "html") raw return $ RawInline (Format "html") raw
transformInline opts mediaRef (RawInline fmt raw) transformInline opts mediaRef (RawInline fmt raw)
| fmt == Format "html" = do | fmt == Format "html" = do