From cca9e8feb4b8c0638147e330cd7151bddb150c70 Mon Sep 17 00:00:00 2001 From: Artyom Kazak Date: Sun, 17 Aug 2014 20:42:30 +0400 Subject: [PATCH] MIME cleanup. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Create a type synonym for MIME type (instead of `String`). * Add `getMimeTypeDef` function. * Avoid recreating MIME type `Map`s every time. * Move “Formula-...” case handling into `getMimeType`. --- src/Text/Pandoc/MIME.hs | 40 ++++++++++++++++++++++++-------- src/Text/Pandoc/MediaBag.hs | 20 ++++++++-------- src/Text/Pandoc/SelfContained.hs | 5 ++-- src/Text/Pandoc/Shared.hs | 8 +++---- src/Text/Pandoc/Writers/Docx.hs | 16 ++++++------- src/Text/Pandoc/Writers/EPUB.hs | 4 ++-- src/Text/Pandoc/Writers/ODT.hs | 8 ++----- 7 files changed, 59 insertions(+), 42 deletions(-) diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index 6e6284b25..3b3b3b5b3 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -27,24 +27,44 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Mime type lookup for ODT writer. -} -module Text.Pandoc.MIME ( getMimeType, extensionFromMimeType ) -where +module Text.Pandoc.MIME ( MimeType, getMimeType, getMimeTypeDef, + extensionFromMimeType )where import System.FilePath import Data.Char ( toLower ) +import Data.List (isPrefixOf, isSuffixOf) +import Data.Maybe (fromMaybe) import qualified Data.Map as M +type MimeType = String + -- | Determine mime type appropriate for file path. -getMimeType :: FilePath -> Maybe String -getMimeType "layout-cache" = Just "application/binary" -- in ODT -getMimeType f = M.lookup (map toLower $ drop 1 $ takeExtension f) mimeTypes - where mimeTypes = M.fromList mimeTypesList +getMimeType :: FilePath -> Maybe MimeType +getMimeType fp + -- ODT + | fp == "layout-cache" = + Just "application/binary" + | "Formula-" `isPrefixOf` fp && "/" `isSuffixOf` fp = + Just "application/vnd.oasis.opendocument.formula" + -- generic + | otherwise = M.lookup (map toLower $ drop 1 $ takeExtension fp) mimeTypes -extensionFromMimeType :: String -> Maybe String -extensionFromMimeType mimetype = M.lookup (takeWhile (/=';') mimetype) reverseMimeTypes +-- | Determime mime type appropriate for file path, defaulting to +-- “application/octet-stream” if nothing else fits. +getMimeTypeDef :: FilePath -> MimeType +getMimeTypeDef = fromMaybe "application/octet-stream" . getMimeType + +extensionFromMimeType :: MimeType -> Maybe String +extensionFromMimeType mimetype = + M.lookup (takeWhile (/=';') mimetype) reverseMimeTypes -- note: we just look up the basic mime type, dropping the content-encoding etc. - where reverseMimeTypes = M.fromList $ map (\(k,v) -> (v,k)) mimeTypesList -mimeTypesList :: [(String, String)] +reverseMimeTypes :: M.Map MimeType String +reverseMimeTypes = M.fromList $ map (\(k,v) -> (v,k)) mimeTypesList + +mimeTypes :: M.Map String MimeType +mimeTypes = M.fromList mimeTypesList + +mimeTypesList :: [(String, MimeType)] mimeTypesList = -- List borrowed from happstack-server. [("gz","application/x-gzip") ,("cabal","application/x-cabal") diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index b19804b5f..ed15edacb 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -42,7 +42,7 @@ import qualified Data.Map as M import qualified Data.ByteString.Lazy as BL import Data.Monoid (Monoid) import Control.Monad (when, MonadPlus(..)) -import Text.Pandoc.MIME (getMimeType) +import Text.Pandoc.MIME (MimeType, getMimeTypeDef) import qualified Text.Pandoc.UTF8 as UTF8 import Data.Maybe (fromMaybe) import System.IO (stderr) @@ -51,7 +51,7 @@ import System.IO (stderr) -- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty' -- can be used for an empty 'MediaBag', and '<>' can be used to append -- two 'MediaBag's. -newtype MediaBag = MediaBag (M.Map String (String, BL.ByteString)) +newtype MediaBag = MediaBag (M.Map String (MimeType, BL.ByteString)) deriving (Monoid) instance Show MediaBag where @@ -59,27 +59,27 @@ instance Show MediaBag where -- | Insert a media item into a 'MediaBag', replacing any existing -- value with the same name. -insertMedia :: FilePath -- ^ relative path and canonical name of resource - -> Maybe String -- ^ mime type (Nothing = determine from extension) - -> BL.ByteString -- ^ contents of resource +insertMedia :: FilePath -- ^ relative path and canonical name of resource + -> Maybe MimeType -- ^ mime type (Nothing = determine from extension) + -> BL.ByteString -- ^ contents of resource -> MediaBag -> MediaBag insertMedia fp mbMime contents (MediaBag mediamap) = MediaBag (M.insert fp (mime, contents) mediamap) - where mime = fromMaybe "application/octet-stream" (mbMime `mplus` fallback) + where mime = fromMaybe fallback mbMime fallback = case takeExtension fp of - ".gz" -> getMimeType $ dropExtension fp - _ -> getMimeType fp + ".gz" -> getMimeTypeDef $ dropExtension fp + _ -> getMimeTypeDef fp -- | Lookup a media item in a 'MediaBag', returning mime type and contents. lookupMedia :: FilePath -> MediaBag - -> Maybe (String, BL.ByteString) + -> Maybe (MimeType, BL.ByteString) lookupMedia fp (MediaBag mediamap) = M.lookup fp mediamap -- | Get a list of the file paths stored in a 'MediaBag', with -- their corresponding mime types and the lengths in bytes of the contents. -mediaDirectory :: MediaBag -> [(String, String, Int)] +mediaDirectory :: MediaBag -> [(String, MimeType, Int)] mediaDirectory (MediaBag mediamap) = M.foldWithKey (\fp (mime,contents) -> ((fp, mime, fromIntegral $ BL.length contents):)) [] mediamap diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 1a4e037cf..36839ddd0 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -42,6 +42,7 @@ import Codec.Compression.GZip as Gzip import qualified Data.ByteString.Lazy as L import Text.Pandoc.Shared (renderTags', err, fetchItem') import Text.Pandoc.MediaBag (MediaBag) +import Text.Pandoc.MIME (MimeType) import Text.Pandoc.UTF8 (toString, fromString) import Text.Pandoc.Options (WriterOptions(..)) @@ -98,8 +99,8 @@ cssURLs media sourceURL d orig = ";base64," `B.append` (encode raw) return $ x `B.append` "url(" `B.append` enc `B.append` rest -getRaw :: MediaBag -> Maybe String -> String -> String - -> IO (ByteString, String) +getRaw :: MediaBag -> Maybe String -> MimeType -> String + -> IO (ByteString, MimeType) getRaw media sourceURL mimetype src = do let ext = map toLower $ takeExtension src fetchResult <- fetchItem' media sourceURL src diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 3adf2a1f6..ced3a48db 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -107,7 +107,7 @@ import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo, import qualified Data.Set as Set import System.Directory import System.FilePath (joinPath, splitDirectories) -import Text.Pandoc.MIME (getMimeType) +import Text.Pandoc.MIME (MimeType, getMimeType) import System.FilePath ( (), takeExtension, dropExtension) import Data.Generics (Typeable, Data) import qualified Control.Monad.State as S @@ -779,7 +779,7 @@ readDataFileUTF8 userDir fname = -- | Fetch an image or other item from the local filesystem or the net. -- Returns raw content and maybe mime type. fetchItem :: Maybe String -> String - -> IO (Either E.SomeException (BS.ByteString, Maybe String)) + -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType)) fetchItem sourceURL s = case (sourceURL >>= parseURIReference . ensureEscaped, ensureEscaped s) of (_, s') | isURI s' -> openURL s' @@ -800,14 +800,14 @@ fetchItem sourceURL s = -- | Like 'fetchItem', but also looks for items in a 'MediaBag'. fetchItem' :: MediaBag -> Maybe String -> String - -> IO (Either E.SomeException (BS.ByteString, Maybe String)) + -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType)) fetchItem' media sourceURL s = do case lookupMedia s media of Nothing -> fetchItem sourceURL s Just (mime, bs) -> return $ Right (BS.concat $ toChunks bs, Just mime) -- | Read from a URL and return raw data and maybe mime type. -openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe String)) +openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType)) openURL u | Just u' <- stripPrefix "data:" u = let mime = takeWhile (/=',') u' diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 38031b7dc..21d440eb8 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -57,8 +57,9 @@ import Data.Unique (hashUnique, newUnique) import System.Random (randomRIO) import Text.Printf (printf) import qualified Control.Exception as E -import Text.Pandoc.MIME (getMimeType, extensionFromMimeType) -import Control.Applicative ((<|>), (<$>)) +import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef, + extensionFromMimeType) +import Control.Applicative ((<$>), (<|>)) import Data.Maybe (mapMaybe) data ListMarker = NoMarker @@ -91,7 +92,7 @@ data WriterState = WriterState{ , stFootnotes :: [Element] , stSectionIds :: [String] , stExternalLinks :: M.Map String String - , stImages :: M.Map FilePath (String, String, Maybe String, Element, B.ByteString) + , stImages :: M.Map FilePath (String, String, Maybe MimeType, Element, B.ByteString) , stListLevel :: Int , stListNumId :: Int , stLists :: [ListMarker] @@ -185,11 +186,10 @@ writeDocx opts doc@(Pandoc meta _) = do let mkOverrideNode (part', contentType') = mknode "Override" [("PartName",part'),("ContentType",contentType')] () let mkImageOverride (_, imgpath, mbMimeType, _, _) = - mkOverrideNode ("/word/" ++ imgpath, - fromMaybe "application/octet-stream" mbMimeType) - let mkMediaOverride imgpath = mkOverrideNode ('/':imgpath, - fromMaybe "application/octet-stream" - $ getMimeType imgpath) + mkOverrideNode ("/word/" ++ imgpath, + fromMaybe "application/octet-stream" mbMimeType) + let mkMediaOverride imgpath = + mkOverrideNode ('/':imgpath, getMimeTypeDef imgpath) let overrides = map mkOverrideNode ( [("/word/webSettings.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml") diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 2aab7701f..62986df75 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -65,7 +65,7 @@ import Text.Pandoc.UUID (getRandomUUID) import Text.Pandoc.Writers.HTML (writeHtmlString, writeHtml) import Data.Char ( toLower, isDigit, isAlphaNum ) import Network.URI ( unEscapeString ) -import Text.Pandoc.MIME (getMimeType) +import Text.Pandoc.MIME (MimeType, getMimeType) import qualified Control.Exception as E import Text.Blaze.Html.Renderer.Utf8 (renderHtml) import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags) @@ -851,7 +851,7 @@ ppTopElement = ("\n" ++) . unEntity . Nothing -> '&':'#':unEntity xs unEntity (x:xs) = x : unEntity xs -mediaTypeOf :: FilePath -> Maybe String +mediaTypeOf :: FilePath -> Maybe MimeType mediaTypeOf x = let mediaPrefixes = ["image", "video", "audio"] in case getMimeType x of diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index feaa0167c..03f8e8ba4 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -30,7 +30,7 @@ Conversion of 'Pandoc' documents to ODT. -} module Text.Pandoc.Writers.ODT ( writeODT ) where import Data.IORef -import Data.List ( isPrefixOf, isSuffixOf ) +import Data.List ( isPrefixOf ) import Data.Maybe ( fromMaybe ) import Text.XML.Light.Output import Text.TeXMath @@ -77,11 +77,7 @@ writeODT opts doc@(Pandoc meta _) = do $ contentEntry : picEntries -- construct META-INF/manifest.xml based on archive let toFileEntry fp = case getMimeType fp of - Nothing -> if "Formula-" `isPrefixOf` fp && "/" `isSuffixOf` fp - then selfClosingTag "manifest:file-entry" - [("manifest:media-type","application/vnd.oasis.opendocument.formula") - ,("manifest:full-path",fp)] - else empty + Nothing -> empty Just m -> selfClosingTag "manifest:file-entry" [("manifest:media-type", m) ,("manifest:full-path", fp)