Text.Pandoc.SelfContained changes.
* mkSelfContained now takes just two arguments, WriterOptions and the string. * It no longer looks in data files. This only made sense when we had copies of slidy and S5 code there. * Shared.fetchItem' is used instead of the nearly duplicate getItem.
This commit is contained in:
parent
cbaaa17d49
commit
ce8922437d
3 changed files with 30 additions and 62 deletions
|
@ -1148,6 +1148,7 @@ main = do
|
|||
$ lines dztempl
|
||||
return $ ("dzslides-core", dzcore) : variables'
|
||||
else return variables'
|
||||
|
||||
let sourceURL = case sources of
|
||||
[] -> Nothing
|
||||
(x:_) -> case parseURI x of
|
||||
|
@ -1288,9 +1289,7 @@ main = do
|
|||
["html","html+lhs","html5","html5+lhs",
|
||||
"s5","slidy","slideous","dzslides","revealjs"]
|
||||
selfcontain = if selfContained && htmlFormat
|
||||
then makeSelfContained
|
||||
(writerMediaBag writerOptions)
|
||||
(writerUserDataDir writerOptions)
|
||||
then makeSelfContained writerOptions
|
||||
else return
|
||||
handleEntities = if htmlFormat && ascii
|
||||
then toEntities
|
||||
|
|
|
@ -35,53 +35,51 @@ import Text.HTML.TagSoup
|
|||
import Network.URI (isURI, escapeURIString)
|
||||
import Data.ByteString.Base64
|
||||
import qualified Data.ByteString.Char8 as B
|
||||
import qualified Data.ByteString as BS
|
||||
import Data.ByteString (ByteString)
|
||||
import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>))
|
||||
import System.FilePath (takeExtension, takeDirectory, (</>))
|
||||
import Data.Char (toLower, isAscii, isAlphaNum)
|
||||
import Codec.Compression.GZip as Gzip
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err)
|
||||
import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
|
||||
import Text.Pandoc.Shared (renderTags', err, fetchItem')
|
||||
import Text.Pandoc.MediaBag (MediaBag)
|
||||
import Text.Pandoc.UTF8 (toString, fromString)
|
||||
import Text.Pandoc.MIME (getMimeType)
|
||||
import System.Directory (doesFileExist)
|
||||
import Text.Pandoc.Options (WriterOptions(..))
|
||||
|
||||
isOk :: Char -> Bool
|
||||
isOk c = isAscii c && isAlphaNum c
|
||||
|
||||
convertTag :: MediaBag -> Maybe FilePath -> Tag String -> IO (Tag String)
|
||||
convertTag media userdata t@(TagOpen tagname as)
|
||||
convertTag :: MediaBag -> Maybe String -> Tag String -> IO (Tag String)
|
||||
convertTag media sourceURL t@(TagOpen tagname as)
|
||||
| tagname `elem` ["img", "embed", "video", "input", "audio", "source"] = do
|
||||
as' <- mapM processAttribute as
|
||||
return $ TagOpen tagname as'
|
||||
where processAttribute (x,y) =
|
||||
if x == "src" || x == "href" || x == "poster"
|
||||
then do
|
||||
(raw, mime) <- getRaw media userdata (fromAttrib "type" t) y
|
||||
(raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) y
|
||||
let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw)
|
||||
return (x, enc)
|
||||
else return (x,y)
|
||||
convertTag media userdata t@(TagOpen "script" as) =
|
||||
convertTag media sourceURL t@(TagOpen "script" as) =
|
||||
case fromAttrib "src" t of
|
||||
[] -> return t
|
||||
src -> do
|
||||
(raw, mime) <- getRaw media userdata (fromAttrib "type" t) src
|
||||
(raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) src
|
||||
let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw)
|
||||
return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"])
|
||||
convertTag media userdata t@(TagOpen "link" as) =
|
||||
convertTag media sourceURL t@(TagOpen "link" as) =
|
||||
case fromAttrib "href" t of
|
||||
[] -> return t
|
||||
src -> do
|
||||
(raw, mime) <- getRaw media userdata (fromAttrib "type" t) src
|
||||
(raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) src
|
||||
let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw)
|
||||
return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"])
|
||||
convertTag _ _ t = return t
|
||||
|
||||
-- NOTE: This is really crude, it doesn't respect CSS comments.
|
||||
cssURLs :: MediaBag -> Maybe FilePath -> FilePath -> ByteString
|
||||
cssURLs :: MediaBag -> Maybe String -> FilePath -> ByteString
|
||||
-> IO ByteString
|
||||
cssURLs media userdata d orig =
|
||||
cssURLs media sourceURL d orig =
|
||||
case B.breakSubstring "url(" orig of
|
||||
(x,y) | B.null y -> return orig
|
||||
| otherwise -> do
|
||||
|
@ -94,43 +92,21 @@ cssURLs media userdata d orig =
|
|||
let url' = if isURI url
|
||||
then url
|
||||
else d </> url
|
||||
(raw, mime) <- getRaw media userdata "" url'
|
||||
rest <- cssURLs media userdata d v
|
||||
(raw, mime) <- getRaw media sourceURL "" url'
|
||||
rest <- cssURLs media sourceURL d v
|
||||
let enc = "data:" `B.append` fromString mime `B.append`
|
||||
";base64," `B.append` (encode raw)
|
||||
return $ x `B.append` "url(" `B.append` enc `B.append` rest
|
||||
|
||||
getItem :: MediaBag -> Maybe FilePath -> String
|
||||
-> IO (ByteString, Maybe String)
|
||||
getItem media userdata f =
|
||||
if isURI f
|
||||
then openURL f >>= either handleErr return
|
||||
else do
|
||||
-- strip off trailing query or fragment part, if relative URL.
|
||||
-- this is needed for things like cmunrm.eot?#iefix,
|
||||
-- which is used to get old versions of IE to work with web fonts.
|
||||
let f' = takeWhile (\c -> c /= '?' && c /= '#') f
|
||||
let mbMime = case takeExtension f' of
|
||||
".gz" -> getMimeType $ dropExtension f'
|
||||
x -> getMimeType x
|
||||
exists <- doesFileExist f'
|
||||
if exists
|
||||
then do
|
||||
cont <- B.readFile f'
|
||||
return (cont, mbMime)
|
||||
else case lookupMedia f media of
|
||||
Just (mime,bs) -> return (BS.concat $ L.toChunks bs,
|
||||
Just mime)
|
||||
Nothing -> do
|
||||
cont <- readDataFile userdata f'
|
||||
return (cont, mbMime)
|
||||
where handleErr e = err 61 $ "Failed to retrieve " ++ f ++ "\n" ++ show e
|
||||
|
||||
getRaw :: MediaBag -> Maybe FilePath -> String -> String
|
||||
getRaw :: MediaBag -> Maybe String -> String -> String
|
||||
-> IO (ByteString, String)
|
||||
getRaw media userdata mimetype src = do
|
||||
getRaw media sourceURL mimetype src = do
|
||||
let ext = map toLower $ takeExtension src
|
||||
(raw, respMime) <- getItem media userdata src
|
||||
fetchResult <- fetchItem' media sourceURL src
|
||||
(raw, respMime) <- case fetchResult of
|
||||
Left msg -> err 67 $ "Could not fetch " ++ src ++
|
||||
"\n" ++ show msg
|
||||
Right x -> return x
|
||||
let raw' = if ext == ".gz"
|
||||
then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks
|
||||
$ [raw]
|
||||
|
@ -141,20 +117,14 @@ getRaw media userdata mimetype src = do
|
|||
(x, Nothing) -> x
|
||||
(_, Just x ) -> x
|
||||
result <- if mime == "text/css"
|
||||
then cssURLs media userdata (takeDirectory src) raw'
|
||||
then cssURLs media (Just src) (takeDirectory src) raw'
|
||||
else return raw'
|
||||
return (result, mime)
|
||||
|
||||
-- | Convert HTML into self-contained HTML, incorporating images,
|
||||
-- scripts, and CSS using data: URIs. Items specified using absolute
|
||||
-- URLs will be downloaded; those specified using relative URLs will
|
||||
-- be sought first relative to the working directory, then in the
|
||||
-- media bag, then relative
|
||||
-- to the user data directory (if the first parameter is 'Just'
|
||||
-- a directory), and finally relative to pandoc's default data
|
||||
-- directory.
|
||||
makeSelfContained :: MediaBag -> Maybe FilePath -> String -> IO String
|
||||
makeSelfContained media userdata inp = do
|
||||
-- scripts, and CSS using data: URIs.
|
||||
makeSelfContained :: WriterOptions -> String -> IO String
|
||||
makeSelfContained opts inp = do
|
||||
let tags = parseTags inp
|
||||
out' <- mapM (convertTag media userdata) tags
|
||||
out' <- mapM (convertTag (writerMediaBag opts) (writerSourceURL opts)) tags
|
||||
return $ renderTags' out'
|
||||
|
|
|
@ -61,7 +61,6 @@ import Text.Pandoc.MIME (getMimeType)
|
|||
import qualified Control.Exception as E
|
||||
import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
|
||||
import Text.HTML.TagSoup
|
||||
import Data.Monoid
|
||||
|
||||
-- A Chapter includes a list of blocks and maybe a section
|
||||
-- number offset. Note, some chapters are unnumbered. The section
|
||||
|
@ -794,7 +793,7 @@ transformInline opts mediaRef (Image lab (src,tit)) = do
|
|||
return $ Image lab (newsrc, tit)
|
||||
transformInline opts _ (x@(Math _ _))
|
||||
| WebTeX _ <- writerHTMLMathMethod opts = do
|
||||
raw <- makeSelfContained mempty Nothing $ writeHtmlInline opts x
|
||||
raw <- makeSelfContained opts $ writeHtmlInline opts x
|
||||
return $ RawInline (Format "html") raw
|
||||
transformInline opts mediaRef (RawInline fmt raw)
|
||||
| fmt == Format "html" = do
|
||||
|
|
Loading…
Reference in a new issue