Text.Pandoc.SelfContained changes.

* mkSelfContained now takes just two arguments, WriterOptions and the string. * It no longer looks in data files. This only made sense when we had copies of slidy and S5 code there. * Shared.fetchItem' is used instead of the nearly duplicate getItem.
2014-08-02 16:07:19 -07:00 · 2014-08-02 16:07:19 -07:00 · ce8922437d
commit ce8922437d
parent cbaaa17d49
3 changed files with 30 additions and 62 deletions
--- a/pandoc.hs
+++ b/pandoc.hs
@ -1148,6 +1148,7 @@ main = do
                                             $ lines dztempl
                        return $ ("dzslides-core", dzcore) : variables'
                    else return variables'
+
  let sourceURL = case sources of
                        []    -> Nothing
                        (x:_) -> case parseURI x of
@ -1288,9 +1289,7 @@ main = do
                               ["html","html+lhs","html5","html5+lhs",
                               "s5","slidy","slideous","dzslides","revealjs"]
                selfcontain = if selfContained && htmlFormat
-                                 then makeSelfContained
-                                      (writerMediaBag writerOptions)
-                                      (writerUserDataDir writerOptions)
+                                 then makeSelfContained writerOptions
                                 else return
                handleEntities = if htmlFormat && ascii
                                    then toEntities
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@ -35,53 +35,51 @@ import Text.HTML.TagSoup
 import Network.URI (isURI, escapeURIString)
 import Data.ByteString.Base64
 import qualified Data.ByteString.Char8 as B
-import qualified Data.ByteString as BS
 import Data.ByteString (ByteString)
-import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>))
+import System.FilePath (takeExtension, takeDirectory, (</>))
 import Data.Char (toLower, isAscii, isAlphaNum)
 import Codec.Compression.GZip as Gzip
 import qualified Data.ByteString.Lazy as L
-import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err)
-import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
+import Text.Pandoc.Shared (renderTags', err, fetchItem')
+import Text.Pandoc.MediaBag (MediaBag)
 import Text.Pandoc.UTF8 (toString,  fromString)
-import Text.Pandoc.MIME (getMimeType)
-import System.Directory (doesFileExist)
+import Text.Pandoc.Options (WriterOptions(..))

 isOk :: Char -> Bool
 isOk c = isAscii c && isAlphaNum c

-convertTag :: MediaBag -> Maybe FilePath -> Tag String -> IO (Tag String)
-convertTag media userdata t@(TagOpen tagname as)
+convertTag :: MediaBag -> Maybe String -> Tag String -> IO (Tag String)
+convertTag media sourceURL t@(TagOpen tagname as)
  | tagname `elem` ["img", "embed", "video", "input", "audio", "source"] = do
       as' <- mapM processAttribute as
       return $ TagOpen tagname as'
  where processAttribute (x,y) =
           if x == "src" || x == "href" || x == "poster"
              then do
-                (raw, mime) <- getRaw media userdata (fromAttrib "type" t) y
+                (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) y
                let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw)
                return (x, enc)
              else return (x,y)
-convertTag media userdata t@(TagOpen "script" as) =
+convertTag media sourceURL t@(TagOpen "script" as) =
  case fromAttrib "src" t of
       []     -> return t
       src    -> do
-           (raw, mime) <- getRaw media userdata (fromAttrib "type" t) src
+           (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) src
           let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw)
           return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"])
-convertTag media userdata t@(TagOpen "link" as) =
+convertTag media sourceURL t@(TagOpen "link" as) =
  case fromAttrib "href" t of
       []  -> return t
       src -> do
-           (raw, mime) <- getRaw media userdata (fromAttrib "type" t) src
+           (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) src
           let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw)
           return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"])
 convertTag _ _ t = return t

 -- NOTE: This is really crude, it doesn't respect CSS comments.
-cssURLs :: MediaBag -> Maybe FilePath -> FilePath -> ByteString
+cssURLs :: MediaBag -> Maybe String -> FilePath -> ByteString
        -> IO ByteString
-cssURLs media userdata d orig =
+cssURLs media sourceURL d orig =
  case B.breakSubstring "url(" orig of
       (x,y) | B.null y  -> return orig
             | otherwise -> do
@ -94,43 +92,21 @@ cssURLs media userdata d orig =
                  let url' = if isURI url
                                then url
                                else d </> url
-                  (raw, mime) <- getRaw media userdata "" url'
-                  rest <- cssURLs media userdata d v
+                  (raw, mime) <- getRaw media sourceURL "" url'
+                  rest <- cssURLs media sourceURL d v
                  let enc = "data:" `B.append` fromString mime `B.append`
                               ";base64," `B.append` (encode raw)
                  return $ x `B.append` "url(" `B.append` enc `B.append` rest

-getItem :: MediaBag -> Maybe FilePath -> String
-        -> IO (ByteString, Maybe String)
-getItem media userdata f =
-  if isURI f
-     then openURL f >>= either handleErr return
-     else do
-       -- strip off trailing query or fragment part, if relative URL.
-       -- this is needed for things like cmunrm.eot?#iefix,
-       -- which is used to get old versions of IE to work with web fonts.
-       let f' = takeWhile (\c -> c /= '?' && c /= '#') f
-       let mbMime = case takeExtension f' of
-                         ".gz" -> getMimeType $ dropExtension f'
-                         x     -> getMimeType x
-       exists <- doesFileExist f'
-       if exists
-          then do
-            cont <- B.readFile f'
-            return (cont, mbMime)
-          else case lookupMedia f media of
-                    Just (mime,bs) -> return (BS.concat $ L.toChunks bs,
-                                              Just mime)
-                    Nothing        -> do
-                      cont <- readDataFile userdata f'
-                      return (cont, mbMime)
-  where handleErr e = err 61 $ "Failed to retrieve " ++ f ++ "\n" ++ show e
-
-getRaw :: MediaBag -> Maybe FilePath -> String -> String
+getRaw :: MediaBag -> Maybe String -> String -> String
       -> IO (ByteString, String)
-getRaw media userdata mimetype src = do
+getRaw media sourceURL mimetype src = do
  let ext = map toLower $ takeExtension src
-  (raw, respMime) <- getItem media userdata src
+  fetchResult <- fetchItem' media sourceURL src
+  (raw, respMime) <- case fetchResult of
+                          Left msg -> err 67 $ "Could not fetch " ++ src ++
+                                               "\n" ++ show msg
+                          Right x  -> return x
  let raw' = if ext == ".gz"
                then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks
                      $ [raw]
@ -141,20 +117,14 @@ getRaw media userdata mimetype src = do
                  (x, Nothing) -> x
                  (_, Just x ) -> x
  result <- if mime == "text/css"
-               then cssURLs media userdata (takeDirectory src) raw'
+               then cssURLs media (Just src) (takeDirectory src) raw'
               else return raw'
  return (result, mime)

 -- | Convert HTML into self-contained HTML, incorporating images,
-- scripts, and CSS using data: URIs.  Items specified using absolute
-- URLs will be downloaded; those specified using relative URLs will
-- be sought first relative to the working directory, then in the
-- media bag, then relative
-- to the user data directory (if the first parameter is 'Just'
-- a directory), and finally relative to pandoc's default data
-- directory.
-makeSelfContained :: MediaBag -> Maybe FilePath -> String -> IO String
-makeSelfContained media userdata inp = do
+-- scripts, and CSS using data: URIs.
+makeSelfContained :: WriterOptions -> String -> IO String
+makeSelfContained opts inp = do
  let tags = parseTags inp
-  out' <- mapM (convertTag media userdata) tags
+  out' <- mapM (convertTag (writerMediaBag opts) (writerSourceURL opts)) tags
  return $ renderTags' out'
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@ -61,7 +61,6 @@ import Text.Pandoc.MIME (getMimeType)
 import qualified Control.Exception as E
 import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
 import Text.HTML.TagSoup
-import Data.Monoid

 -- A Chapter includes a list of blocks and maybe a section
 -- number offset.  Note, some chapters are unnumbered. The section
@ -794,7 +793,7 @@ transformInline opts mediaRef (Image lab (src,tit)) = do
    return $ Image lab (newsrc, tit)
 transformInline opts _ (x@(Math _ _))
  | WebTeX _ <- writerHTMLMathMethod opts = do
-    raw <- makeSelfContained mempty Nothing $ writeHtmlInline opts x
+    raw <- makeSelfContained opts $ writeHtmlInline opts x
    return $ RawInline (Format "html") raw
 transformInline opts mediaRef  (RawInline fmt raw)
  | fmt == Format "html" = do