Text.Pandoc.SelfContained changes.

* mkSelfContained now takes just two arguments, WriterOptions and
  the string.
* It no longer looks in data files.  This only made sense when we
  had copies of slidy and S5 code there.
* Shared.fetchItem' is used instead of the nearly duplicate getItem.
This commit is contained in:
John MacFarlane 2014-08-02 16:07:19 -07:00
parent cbaaa17d49
commit ce8922437d
3 changed files with 30 additions and 62 deletions

View file

@ -1148,6 +1148,7 @@ main = do
$ lines dztempl
return $ ("dzslides-core", dzcore) : variables'
else return variables'
let sourceURL = case sources of
[] -> Nothing
(x:_) -> case parseURI x of
@ -1288,9 +1289,7 @@ main = do
["html","html+lhs","html5","html5+lhs",
"s5","slidy","slideous","dzslides","revealjs"]
selfcontain = if selfContained && htmlFormat
then makeSelfContained
(writerMediaBag writerOptions)
(writerUserDataDir writerOptions)
then makeSelfContained writerOptions
else return
handleEntities = if htmlFormat && ascii
then toEntities

View file

@ -35,53 +35,51 @@ import Text.HTML.TagSoup
import Network.URI (isURI, escapeURIString)
import Data.ByteString.Base64
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString as BS
import Data.ByteString (ByteString)
import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>))
import System.FilePath (takeExtension, takeDirectory, (</>))
import Data.Char (toLower, isAscii, isAlphaNum)
import Codec.Compression.GZip as Gzip
import qualified Data.ByteString.Lazy as L
import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err)
import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
import Text.Pandoc.Shared (renderTags', err, fetchItem')
import Text.Pandoc.MediaBag (MediaBag)
import Text.Pandoc.UTF8 (toString, fromString)
import Text.Pandoc.MIME (getMimeType)
import System.Directory (doesFileExist)
import Text.Pandoc.Options (WriterOptions(..))
isOk :: Char -> Bool
isOk c = isAscii c && isAlphaNum c
convertTag :: MediaBag -> Maybe FilePath -> Tag String -> IO (Tag String)
convertTag media userdata t@(TagOpen tagname as)
convertTag :: MediaBag -> Maybe String -> Tag String -> IO (Tag String)
convertTag media sourceURL t@(TagOpen tagname as)
| tagname `elem` ["img", "embed", "video", "input", "audio", "source"] = do
as' <- mapM processAttribute as
return $ TagOpen tagname as'
where processAttribute (x,y) =
if x == "src" || x == "href" || x == "poster"
then do
(raw, mime) <- getRaw media userdata (fromAttrib "type" t) y
(raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) y
let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw)
return (x, enc)
else return (x,y)
convertTag media userdata t@(TagOpen "script" as) =
convertTag media sourceURL t@(TagOpen "script" as) =
case fromAttrib "src" t of
[] -> return t
src -> do
(raw, mime) <- getRaw media userdata (fromAttrib "type" t) src
(raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) src
let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw)
return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"])
convertTag media userdata t@(TagOpen "link" as) =
convertTag media sourceURL t@(TagOpen "link" as) =
case fromAttrib "href" t of
[] -> return t
src -> do
(raw, mime) <- getRaw media userdata (fromAttrib "type" t) src
(raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) src
let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw)
return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"])
convertTag _ _ t = return t
-- NOTE: This is really crude, it doesn't respect CSS comments.
cssURLs :: MediaBag -> Maybe FilePath -> FilePath -> ByteString
cssURLs :: MediaBag -> Maybe String -> FilePath -> ByteString
-> IO ByteString
cssURLs media userdata d orig =
cssURLs media sourceURL d orig =
case B.breakSubstring "url(" orig of
(x,y) | B.null y -> return orig
| otherwise -> do
@ -94,43 +92,21 @@ cssURLs media userdata d orig =
let url' = if isURI url
then url
else d </> url
(raw, mime) <- getRaw media userdata "" url'
rest <- cssURLs media userdata d v
(raw, mime) <- getRaw media sourceURL "" url'
rest <- cssURLs media sourceURL d v
let enc = "data:" `B.append` fromString mime `B.append`
";base64," `B.append` (encode raw)
return $ x `B.append` "url(" `B.append` enc `B.append` rest
getItem :: MediaBag -> Maybe FilePath -> String
-> IO (ByteString, Maybe String)
getItem media userdata f =
if isURI f
then openURL f >>= either handleErr return
else do
-- strip off trailing query or fragment part, if relative URL.
-- this is needed for things like cmunrm.eot?#iefix,
-- which is used to get old versions of IE to work with web fonts.
let f' = takeWhile (\c -> c /= '?' && c /= '#') f
let mbMime = case takeExtension f' of
".gz" -> getMimeType $ dropExtension f'
x -> getMimeType x
exists <- doesFileExist f'
if exists
then do
cont <- B.readFile f'
return (cont, mbMime)
else case lookupMedia f media of
Just (mime,bs) -> return (BS.concat $ L.toChunks bs,
Just mime)
Nothing -> do
cont <- readDataFile userdata f'
return (cont, mbMime)
where handleErr e = err 61 $ "Failed to retrieve " ++ f ++ "\n" ++ show e
getRaw :: MediaBag -> Maybe FilePath -> String -> String
getRaw :: MediaBag -> Maybe String -> String -> String
-> IO (ByteString, String)
getRaw media userdata mimetype src = do
getRaw media sourceURL mimetype src = do
let ext = map toLower $ takeExtension src
(raw, respMime) <- getItem media userdata src
fetchResult <- fetchItem' media sourceURL src
(raw, respMime) <- case fetchResult of
Left msg -> err 67 $ "Could not fetch " ++ src ++
"\n" ++ show msg
Right x -> return x
let raw' = if ext == ".gz"
then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks
$ [raw]
@ -141,20 +117,14 @@ getRaw media userdata mimetype src = do
(x, Nothing) -> x
(_, Just x ) -> x
result <- if mime == "text/css"
then cssURLs media userdata (takeDirectory src) raw'
then cssURLs media (Just src) (takeDirectory src) raw'
else return raw'
return (result, mime)
-- | Convert HTML into self-contained HTML, incorporating images,
-- scripts, and CSS using data: URIs. Items specified using absolute
-- URLs will be downloaded; those specified using relative URLs will
-- be sought first relative to the working directory, then in the
-- media bag, then relative
-- to the user data directory (if the first parameter is 'Just'
-- a directory), and finally relative to pandoc's default data
-- directory.
makeSelfContained :: MediaBag -> Maybe FilePath -> String -> IO String
makeSelfContained media userdata inp = do
-- scripts, and CSS using data: URIs.
makeSelfContained :: WriterOptions -> String -> IO String
makeSelfContained opts inp = do
let tags = parseTags inp
out' <- mapM (convertTag media userdata) tags
out' <- mapM (convertTag (writerMediaBag opts) (writerSourceURL opts)) tags
return $ renderTags' out'

View file

@ -61,7 +61,6 @@ import Text.Pandoc.MIME (getMimeType)
import qualified Control.Exception as E
import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
import Text.HTML.TagSoup
import Data.Monoid
-- A Chapter includes a list of blocks and maybe a section
-- number offset. Note, some chapters are unnumbered. The section
@ -794,7 +793,7 @@ transformInline opts mediaRef (Image lab (src,tit)) = do
return $ Image lab (newsrc, tit)
transformInline opts _ (x@(Math _ _))
| WebTeX _ <- writerHTMLMathMethod opts = do
raw <- makeSelfContained mempty Nothing $ writeHtmlInline opts x
raw <- makeSelfContained opts $ writeHtmlInline opts x
return $ RawInline (Format "html") raw
transformInline opts mediaRef (RawInline fmt raw)
| fmt == Format "html" = do