From 6dd24184765800bdedc1d28a87f9564f7f44d4f4 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 31 Jul 2014 12:00:21 -0700
Subject: [PATCH] New module, Text.Pandoc.MediaBag.

Moved `MediaBag` definition and functions from Shared:
`lookupMedia`, `mediaDirectory`, `insertMedia`, `extractMediaBag`.
Removed `emptyMediaBag`; use `mempty` instead, since `MediaBag`
is a Monoid.
---
 pandoc.cabal                     |   1 +
 pandoc.hs                        |   7 +-
 src/Text/Pandoc.hs               |   3 +-
 src/Text/Pandoc/MediaBag.hs      | 107 +++++++++++++++++++++++++++++++
 src/Text/Pandoc/Options.hs       |   5 +-
 src/Text/Pandoc/Readers/Docx.hs  |   4 +-
 src/Text/Pandoc/SelfContained.hs |   4 +-
 src/Text/Pandoc/Shared.hs        |  79 ++---------------------
 src/Text/Pandoc/Writers/EPUB.hs  |   3 +-
 tests/Tests/Readers/Docx.hs      |   2 +-
 10 files changed, 129 insertions(+), 86 deletions(-)
 create mode 100644 src/Text/Pandoc/MediaBag.hs

diff --git a/pandoc.cabal b/pandoc.cabal
index 377a3b6c4..47bdb6587 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -273,6 +273,7 @@ Library
                    Text.Pandoc.Options,
                    Text.Pandoc.Pretty,
                    Text.Pandoc.Shared,
+                   Text.Pandoc.MediaBag,
                    Text.Pandoc.Readers.HTML,
                    Text.Pandoc.Readers.LaTeX,
                    Text.Pandoc.Readers.Markdown,
diff --git a/pandoc.hs b/pandoc.hs
index 64128221c..607d0e964 100644
--- a/pandoc.hs
+++ b/pandoc.hs
@@ -37,8 +37,8 @@ import Text.Pandoc.Walk (walk)
 import Text.Pandoc.Readers.LaTeX (handleIncludes)
 import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, readDataFile,
                             safeRead, headerShift, normalize, err, warn,
-                            openURL, mediaDirectory, extractMediaBag,
-                            emptyMediaBag )
+                            openURL )
+import Text.Pandoc.MediaBag ( mediaDirectory, extractMediaBag )
 import Text.Pandoc.XML ( toEntities )
 import Text.Pandoc.SelfContained ( makeSelfContained )
 import Text.Pandoc.Process (pipeProcess)
@@ -69,6 +69,7 @@ import qualified Data.Yaml as Yaml
 import qualified Data.Text as T
 import Control.Applicative ((<$>))
 import Text.Pandoc.Readers.Txt2Tags (getT2TMeta)
+import Data.Monoid
 
 copyrightMessage :: String
 copyrightMessage = "\nCopyright (C) 2006-2014 John MacFarlane\n" ++
@@ -1217,7 +1218,7 @@ main = do
             inp <- readSources sources >>=
                        handleIncludes' . convertTabs . intercalate "\n"
             d <- r readerOpts inp
-            return (d, emptyMediaBag)
+            return (d, mempty)
           ByteStringReader r -> do
               (d, media) <- readFiles sources >>= r readerOpts
               d' <- case mbExtractMedia of
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index 11553383c..77eb3e82f 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -160,7 +160,8 @@ import Text.Pandoc.Writers.Haddock
 import Text.Pandoc.Writers.Custom
 import Text.Pandoc.Templates
 import Text.Pandoc.Options
-import Text.Pandoc.Shared (safeRead, warn, MediaBag)
+import Text.Pandoc.Shared (safeRead, warn)
+import Text.Pandoc.MediaBag (MediaBag)
 import Data.Aeson
 import qualified Data.ByteString.Lazy as BL
 import Data.List (intercalate)
diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs
new file mode 100644
index 000000000..667089f55
--- /dev/null
+++ b/src/Text/Pandoc/MediaBag.hs
@@ -0,0 +1,107 @@
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-
+Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.MediaBag
+   Copyright   : Copyright (C) 2014 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Definition of a MediaBag object to hold binary resources, and an
+interface for interacting with it.
+-}
+module Text.Pandoc.MediaBag (
+                     MediaBag,
+                     lookupMedia,
+                     insertMedia,
+                     mediaDirectory,
+                     extractMediaBag
+                     ) where
+import System.FilePath
+import System.Directory (createDirectoryIfMissing)
+import qualified Data.Map as M
+import qualified Data.ByteString.Lazy as BL
+import Data.Monoid (Monoid)
+import Control.Monad (when, MonadPlus(..))
+import Text.Pandoc.MIME (getMimeType)
+import qualified Text.Pandoc.UTF8 as UTF8
+import Data.Maybe (fromMaybe)
+import System.IO (stderr)
+
+-- | A container for a collection of binary resources, with names and
+-- mime types.  Note that a 'MediaBag' is a Monoid, so 'mempty'
+-- can be used for an empty 'MediaBag', and '<>' can be used to append
+-- two 'MediaBag's.
+newtype MediaBag = MediaBag (M.Map String (String, BL.ByteString))
+        deriving (Monoid)
+
+instance Show MediaBag where
+  show bag = "MediaBag " ++ show (mediaDirectory bag)
+
+-- | Insert a media item into a 'MediaBag', replacing any existing
+-- value with the same name.
+insertMedia :: FilePath      -- ^ relative path and canonical name of resource
+            -> Maybe String  -- ^ mime type (Nothing = determine from extension)
+            -> BL.ByteString -- ^ contents of resource
+            -> MediaBag
+            -> MediaBag
+insertMedia fp mbMime contents (MediaBag mediamap) =
+  MediaBag (M.insert fp (mime, contents) mediamap)
+  where mime = fromMaybe "application/octet-stream" (mbMime `mplus` fallback)
+        fallback = case takeExtension fp of
+                        ".gz"   -> getMimeType $ dropExtension fp
+                        _       -> getMimeType fp
+
+-- | Lookup a media item in a 'MediaBag', returning mime type and contents.
+lookupMedia :: FilePath
+            -> MediaBag
+            -> Maybe (String, BL.ByteString)
+lookupMedia fp (MediaBag mediamap) = M.lookup fp mediamap
+
+-- | Get a list of the file paths stored in a 'MediaBag', with
+-- their corresponding mime types and the lengths in bytes of the contents.
+mediaDirectory :: MediaBag -> [(String, String, Int)]
+mediaDirectory (MediaBag mediamap) =
+  M.foldWithKey (\fp (mime,contents) ->
+      ((fp, mime, fromIntegral $ BL.length contents):)) [] mediamap
+
+-- | Extract contents of MediaBag to a given directory.  Print informational
+-- messages if 'verbose' is true.
+extractMediaBag :: Bool
+                -> FilePath
+                -> MediaBag
+                -> IO ()
+extractMediaBag verbose dir (MediaBag mediamap) = do
+  sequence_ $ M.foldWithKey
+     (\fp (_ ,contents) ->
+        ((writeMedia verbose dir (fp, contents)):)) [] mediamap
+
+writeMedia :: Bool -> FilePath -> (FilePath, BL.ByteString) -> IO ()
+writeMedia verbose dir (subpath, bs) = do
+  -- we join and split to convert a/b/c to a\b\c on Windows;
+  -- in zip containers all paths use /
+  let fullpath = dir </> joinPath (splitPath subpath)
+  createDirectoryIfMissing True $ takeDirectory fullpath
+  when verbose $ UTF8.hPutStrLn stderr $ "pandoc: extracting " ++ fullpath
+  BL.writeFile fullpath bs
+
+
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index bf6b3d910..85a6a3096 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -49,7 +49,8 @@ import Data.Set (Set)
 import qualified Data.Set as Set
 import Data.Default
 import Text.Pandoc.Highlighting (Style, pygments)
-import Text.Pandoc.Shared (MediaBag, emptyMediaBag)
+import Text.Pandoc.MediaBag (MediaBag)
+import Data.Monoid
 
 -- | Individually selectable syntax extensions.
 data Extension =
@@ -358,7 +359,7 @@ instance Default WriterOptions where
                       , writerTOCDepth         = 3
                       , writerReferenceODT     = Nothing
                       , writerReferenceDocx    = Nothing
-                      , writerMediaBag         = emptyMediaBag
+                      , writerMediaBag         = mempty
                       }
 
 -- | Returns True if the given extension is enabled.
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 2fb4da2d9..7a89c0b04 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -84,8 +84,10 @@ import Text.Pandoc.Readers.Docx.Lists
 import Text.Pandoc.Readers.Docx.Reducible
 import Text.Pandoc.Readers.Docx.TexChar
 import Text.Pandoc.Shared
+import Text.Pandoc.MediaBag (insertMedia, MediaBag)
 import Data.Maybe (mapMaybe, fromMaybe)
 import Data.List (delete, isPrefixOf, (\\), intercalate, intersect)
+import Data.Monoid
 import qualified Data.ByteString.Lazy as B
 import qualified Data.Map as M
 import Control.Monad.Reader
@@ -108,7 +110,7 @@ data DState = DState { docxAnchorMap :: M.Map String String
 
 defaultDState :: DState
 defaultDState = DState { docxAnchorMap = M.empty
-                       , docxMediaBag  = emptyMediaBag
+                       , docxMediaBag  = mempty
                        , docxInHeaderBlock = False
                        , docxInTexSubscript = False}
 
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index 77f8b6530..adb2c0014 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -41,8 +41,8 @@ import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>))
 import Data.Char (toLower, isAscii, isAlphaNum)
 import Codec.Compression.GZip as Gzip
 import qualified Data.ByteString.Lazy as L
-import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err,
-                           MediaBag, lookupMedia)
+import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err)
+import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
 import Text.Pandoc.UTF8 (toString,  fromString)
 import Text.Pandoc.MIME (getMimeType)
 import System.Directory (doesFileExist)
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index deab1abc0..d5769c1ab 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -1,5 +1,5 @@
 {-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses,
-    FlexibleContexts, ScopedTypeVariables, GeneralizedNewtypeDeriving #-}
+    FlexibleContexts, ScopedTypeVariables #-}
 {-
 Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
 
@@ -48,13 +48,6 @@ module Text.Pandoc.Shared (
                      toRomanNumeral,
                      escapeURI,
                      tabFilter,
-                     -- * Media Handling
-                     MediaBag,
-                     emptyMediaBag,
-                     lookupMedia,
-                     insertMedia,
-                     mediaDirectory,
-                     extractMediaBag,
                      -- * Date/time
                      normalizeDate,
                      -- * Pandoc block and inline list processing
@@ -97,6 +90,7 @@ module Text.Pandoc.Shared (
 
 import Text.Pandoc.Definition
 import Text.Pandoc.Walk
+import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
 import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..))
 import qualified Text.Pandoc.Builder as B
 import qualified Text.Pandoc.UTF8 as UTF8
@@ -106,18 +100,16 @@ import Data.Char ( toLower, isLower, isUpper, isAlpha,
                    isLetter, isDigit, isSpace )
 import Data.List ( find, isPrefixOf, intercalate )
 import qualified Data.Map as M
-import Data.Maybe ( fromMaybe )
 import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo,
                      unEscapeString, parseURIReference )
 import qualified Data.Set as Set
 import System.Directory
 import Text.Pandoc.MIME (getMimeType)
-import System.FilePath ( (</>), takeExtension, dropExtension, takeDirectory,
-                         splitPath, joinPath )
+import System.FilePath ( (</>), takeExtension, dropExtension)
 import Data.Generics (Typeable, Data)
 import qualified Control.Monad.State as S
 import qualified Control.Exception as E
-import Control.Monad (msum, unless, MonadPlus(..), when)
+import Control.Monad (msum, unless)
 import Text.Pandoc.Pretty (charWidth)
 import System.Locale (defaultTimeLocale)
 import Data.Time
@@ -127,7 +119,6 @@ import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
          renderOptions)
 import qualified Data.ByteString as BS
 import qualified Data.ByteString.Char8 as B8
-import qualified Data.ByteString.Lazy as BL
 import Text.Pandoc.Compat.Monoid
 import Data.ByteString.Base64 (decodeLenient)
 import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
@@ -294,68 +285,6 @@ tabFilter tabStop =
         x : go (spsToNextStop - 1) xs
   in  go tabStop
 
----
---- Media handling
----
-
--- | A container for a collection of binary resources, with names and
--- mime types.
-newtype MediaBag = MediaBag (M.Map String (String, BL.ByteString))
-        deriving (Monoid)
-
-instance Show MediaBag where
-  show bag = "MediaBag " ++ show (mediaDirectory bag)
-
-emptyMediaBag :: MediaBag
-emptyMediaBag = MediaBag M.empty
-
--- | Insert a media item into a 'MediaBag', replacing any existing
--- value with the same name.
-insertMedia :: FilePath      -- ^ relative path and canonical name of resource
-            -> Maybe String  -- ^ mime type (Nothing = determine from extension)
-            -> BL.ByteString -- ^ contents of resource
-            -> MediaBag
-            -> MediaBag
-insertMedia fp mbMime contents (MediaBag mediamap) =
-  MediaBag (M.insert fp (mime, contents) mediamap)
-  where mime = fromMaybe "application/octet-stream" (mbMime `mplus` fallback)
-        fallback = case takeExtension fp of
-                        ".gz"   -> getMimeType $ dropExtension fp
-                        _       -> getMimeType fp
-
--- | Lookup a media item in a 'MediaBag', returning mime type and contents.
-lookupMedia :: FilePath
-            -> MediaBag
-            -> Maybe (String, BL.ByteString)
-lookupMedia fp (MediaBag mediamap) = M.lookup fp mediamap
-
--- | Get a list of the file paths stored in a 'MediaBag', with
--- their corresponding mime types and the lengths in bytes of the contents.
-mediaDirectory :: MediaBag -> [(String, String, Int)]
-mediaDirectory (MediaBag mediamap) =
-  M.foldWithKey (\fp (mime,contents) ->
-      ((fp, mime, fromIntegral $ BL.length contents):)) [] mediamap
-
--- | Extract contents of MediaBag to a given directory.  Print informational
--- messages if 'verbose' is true.
-extractMediaBag :: Bool
-                -> FilePath
-                -> MediaBag
-                -> IO ()
-extractMediaBag verbose dir (MediaBag mediamap) = do
-  sequence_ $ M.foldWithKey
-     (\fp (_ ,contents) ->
-        ((writeMedia verbose dir (fp, contents)):)) [] mediamap
-
-writeMedia :: Bool -> FilePath -> (FilePath, BL.ByteString) -> IO ()
-writeMedia verbose dir (subpath, bs) = do
-  -- we join and split to convert a/b/c to a\b\c on Windows;
-  -- in zip containers all paths use /
-  let fullpath = dir </> joinPath (splitPath subpath)
-  createDirectoryIfMissing True $ takeDirectory fullpath
-  when verbose $ warn $ "extracting " ++ fullpath
-  BL.writeFile fullpath bs
-
 --
 -- Date/time
 --
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 62dd70e73..770b6f244 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -61,6 +61,7 @@ import Text.Pandoc.MIME (getMimeType)
 import qualified Control.Exception as E
 import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
 import Text.HTML.TagSoup
+import Data.Monoid
 
 -- A Chapter includes a list of blocks and maybe a section
 -- number offset.  Note, some chapters are unnumbered. The section
@@ -793,7 +794,7 @@ transformInline opts mediaRef (Image lab (src,tit)) = do
     return $ Image lab (newsrc, tit)
 transformInline opts _ (x@(Math _ _))
   | WebTeX _ <- writerHTMLMathMethod opts = do
-    raw <- makeSelfContained emptyMediaBag Nothing $ writeHtmlInline opts x
+    raw <- makeSelfContained mempty Nothing $ writeHtmlInline opts x
     return $ RawInline (Format "html") raw
 transformInline opts mediaRef  (RawInline fmt raw)
   | fmt == Format "html" = do
diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs
index 0eae20e22..85a02debd 100644
--- a/tests/Tests/Readers/Docx.hs
+++ b/tests/Tests/Readers/Docx.hs
@@ -12,7 +12,7 @@ import qualified Data.ByteString.Base64 as B64
 import Text.Pandoc.Readers.Docx
 import Text.Pandoc.Writers.Native (writeNative)
 import qualified Data.Map as M
-import Text.Pandoc.Shared (lookupMedia)
+import Text.Pandoc.MediaBag (lookupMedia)
 
 -- We define a wrapper around pandoc that doesn't normalize in the
 -- tests. Since we do our own normalization, we want to make sure