From ec49643d647080218bedfaf61b3eb9b9383e42c0 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sun, 15 Mar 2020 04:49:36 +0100
Subject: [PATCH] Subdivide Text.Pandoc.Class into small modules (#6106)

* Extract CommonState into submodule

* Extract PandocMonad into submodule

* PandocMonad: ensure all functions have Haddock documentation
---
 pandoc.cabal                         |   2 +
 src/Text/Pandoc/Class.hs             | 732 ++-------------------------
 src/Text/Pandoc/Class/CommonState.hs |  80 +++
 src/Text/Pandoc/Class/PandocMonad.hs | 706 ++++++++++++++++++++++++++
 4 files changed, 825 insertions(+), 695 deletions(-)
 create mode 100644 src/Text/Pandoc/Class/CommonState.hs
 create mode 100644 src/Text/Pandoc/Class/PandocMonad.hs

diff --git a/pandoc.cabal b/pandoc.cabal
index 59581add8..66a309515 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -569,6 +569,8 @@ library
                    Text.Pandoc.App.FormatHeuristics,
                    Text.Pandoc.App.Opt,
                    Text.Pandoc.App.OutputSettings,
+                   Text.Pandoc.Class.CommonState,
+                   Text.Pandoc.Class.PandocMonad,
                    Text.Pandoc.Filter.JSON,
                    Text.Pandoc.Filter.Lua,
                    Text.Pandoc.Filter.Path,
diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs
index 436238139..b125e3bab 100644
--- a/src/Text/Pandoc/Class.hs
+++ b/src/Text/Pandoc/Class.hs
@@ -74,356 +74,57 @@ module Text.Pandoc.Class ( PandocMonad(..)
                          , Translations
                          ) where
 
-import Prelude
-import System.Random (StdGen, next, mkStdGen)
-import qualified System.Random as IO (newStdGen)
 import Codec.Archive.Zip
-import qualified Data.CaseInsensitive as CI
-import Data.Unique (hashUnique)
-import qualified Data.Unique as IO (newUnique)
-import qualified Text.Pandoc.UTF8 as UTF8
-import qualified System.Directory as Directory
-import Data.Time (UTCTime)
-import Text.Pandoc.Logging
-import Text.Pandoc.Shared (uriPathToPath)
-import Text.Parsec (ParsecT, getPosition, sourceLine, sourceName)
-import qualified Data.Time as IO (getCurrentTime)
-import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType)
-import Text.Pandoc.Definition
-import Data.Digest.Pure.SHA (sha1, showDigest)
-import Data.Maybe (fromMaybe)
-import Data.Time.Clock.POSIX ( utcTimeToPOSIXSeconds
-                             , posixSecondsToUTCTime
-                             , POSIXTime )
-import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime, utc)
+import Control.Monad.Except
+import Control.Monad.State.Strict
 import Data.ByteString.Base64 (decodeLenient)
-import Network.URI ( escapeURIString, nonStrictRelativeTo,
-                     unEscapeString, parseURIReference, isAllowedInURI,
-                     parseURI, URI(..) )
+import Data.ByteString.Lazy (toChunks)
+import Data.Default
+import Data.Time (UTCTime)
+import Data.Time.Clock.POSIX ( posixSecondsToUTCTime )
+import Data.Time.LocalTime (TimeZone, utc)
+import Data.Unique (hashUnique)
+import Data.Word (Word8)
 import Network.HTTP.Client
        (httpLbs, responseBody, responseHeaders,
         Request(port, host, requestHeaders), parseRequest, newManager)
 import Network.HTTP.Client.Internal (addProxy)
 import Network.HTTP.Client.TLS (tlsManagerSettings)
-import System.Environment (getEnv)
 import Network.HTTP.Types.Header ( hContentType )
 import Network.Socket (withSocketsDo)
-import Data.ByteString.Lazy (toChunks)
-import qualified Control.Exception as E
-import qualified Data.Time.LocalTime as IO (getCurrentTimeZone)
+import Network.URI ( unEscapeString )
+import Prelude
+import System.Directory (createDirectoryIfMissing, getDirectoryContents,
+                         doesDirectoryExist)
+import System.Environment (getEnv)
+import System.FilePath ((</>), takeDirectory, normalise)
+import System.FilePath.Glob (match, compile)
+import System.IO (stderr)
+import System.IO.Error
+import System.Random (StdGen, next, mkStdGen)
+import Text.Pandoc.Class.CommonState (CommonState (..))
+import Text.Pandoc.Class.PandocMonad
+import Text.Pandoc.Definition
+import Text.Pandoc.Error
+import Text.Pandoc.Logging
 import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory)
-import Text.Pandoc.Walk (walkM, walk)
-import qualified Text.Pandoc.MediaBag as MB
+import Text.Pandoc.Translations (Translations)
+import Text.Pandoc.Walk (walk)
+import qualified Control.Exception as E
 import qualified Data.ByteString as B
 import qualified Data.ByteString.Lazy as BL
-import qualified System.Environment as IO (lookupEnv)
-import System.FilePath.Glob (match, compile)
-import System.Directory (createDirectoryIfMissing, getDirectoryContents,
-                          doesDirectoryExist)
-import System.FilePath
-       ((</>), (<.>), takeDirectory, takeExtension, dropExtension,
-        isRelative, normalise, splitDirectories)
-import qualified System.FilePath.Glob as IO (glob)
-import qualified System.FilePath.Posix as Posix
-import qualified System.Directory as IO (getModificationTime)
-import Control.Monad.State.Strict
-import Control.Monad.Except
-import Data.Word (Word8)
-import Data.Default
-import System.IO.Error
-import System.IO (stderr)
+import qualified Data.CaseInsensitive as CI
 import qualified Data.Map as M
 import qualified Data.Text as T
-import Text.Pandoc.Error
-import Text.Pandoc.BCP47 (Lang(..), parseBCP47, renderLang)
-import Text.Pandoc.Translations (Term(..), Translations, lookupTerm,
-                                 readTranslations)
-import qualified Debug.Trace
-#ifdef EMBED_DATA_FILES
-import Text.Pandoc.Data (dataFiles)
-#else
-import qualified Paths_pandoc as Paths
-#endif
-
--- | The PandocMonad typeclass contains all the potentially
--- IO-related functions used in pandoc's readers and writers.
--- Instances of this typeclass may implement these functions
--- in IO (as in 'PandocIO') or using an internal state that
--- represents a file system, time, and so on (as in 'PandocPure').
-class (Functor m, Applicative m, Monad m, MonadError PandocError m)
-      => PandocMonad m where
-  -- | Lookup an environment variable.
-  lookupEnv :: T.Text -> m (Maybe T.Text)
-  -- | Get the current (UTC) time.
-  getCurrentTime :: m UTCTime
-  -- | Get the locale's time zone.
-  getCurrentTimeZone :: m TimeZone
-  -- | Return a new generator for random numbers.
-  newStdGen :: m StdGen
-  -- | Return a new unique integer.
-  newUniqueHash :: m Int
-  -- | Retrieve contents and mime type from a URL, raising
-  -- an error on failure.
-  openURL :: T.Text -> m (B.ByteString, Maybe MimeType)
-  -- | Read the lazy ByteString contents from a file path,
-  -- raising an error on failure.
-  readFileLazy :: FilePath -> m BL.ByteString
-  -- | Read the strict ByteString contents from a file path,
-  -- raising an error on failure.
-  readFileStrict :: FilePath -> m B.ByteString
-  -- | Return a list of paths that match a glob, relative to
-  -- the working directory.  See 'System.FilePath.Glob' for
-  -- the glob syntax.
-  glob :: String -> m [FilePath]
-  -- | Returns True if file exists.
-  fileExists :: FilePath -> m Bool
-  -- | Returns the path of data file.
-  getDataFileName :: FilePath -> m FilePath
-  -- | Return the modification time of a file.
-  getModificationTime :: FilePath -> m UTCTime
-  -- | Get the value of the 'CommonState' used by all instances
-  -- of 'PandocMonad'.
-  getCommonState :: m CommonState
-  -- | Set the value of the 'CommonState' used by all instances
-  -- of 'PandocMonad'.
-  -- | Get the value of a specific field of 'CommonState'.
-  putCommonState :: CommonState -> m ()
-  -- | Get the value of a specific field of 'CommonState'.
-  getsCommonState :: (CommonState -> a) -> m a
-  getsCommonState f = f <$> getCommonState
-  -- | Modify the 'CommonState'.
-  modifyCommonState :: (CommonState -> CommonState) -> m ()
-  modifyCommonState f = getCommonState >>= putCommonState . f
-  -- Output a log message.
-  logOutput :: LogMessage -> m ()
-  -- Output a debug message to sterr, using 'Debug.Trace.trace',
-  -- if tracing is enabled.  Note: this writes to stderr even in
-  -- pure instances.
-  trace :: T.Text -> m ()
-  trace msg = do
-    tracing <- getsCommonState stTrace
-    when tracing $ Debug.Trace.trace ("[trace] " ++ T.unpack msg) (return ())
-
--- * Functions defined for all PandocMonad instances
-
--- | Set the verbosity level.
-setVerbosity :: PandocMonad m => Verbosity -> m ()
-setVerbosity verbosity =
-  modifyCommonState $ \st -> st{ stVerbosity = verbosity }
-
--- | Get the verbosity level.
-getVerbosity :: PandocMonad m => m Verbosity
-getVerbosity = getsCommonState stVerbosity
-
--- Get the accomulated log messages (in temporal order).
-getLog :: PandocMonad m => m [LogMessage]
-getLog = reverse <$> getsCommonState stLog
-
--- | Log a message using 'logOutput'.  Note that 'logOutput' is
--- called only if the verbosity level exceeds the level of the
--- message, but the message is added to the list of log messages
--- that will be retrieved by 'getLog' regardless of its verbosity level.
-report :: PandocMonad m => LogMessage -> m ()
-report msg = do
-  verbosity <- getsCommonState stVerbosity
-  let level = messageVerbosity msg
-  when (level <= verbosity) $ logOutput msg
-  modifyCommonState $ \st -> st{ stLog = msg : stLog st }
-
--- | Determine whether tracing is enabled.  This affects
--- the behavior of 'trace'.  If tracing is not enabled,
--- 'trace' does nothing.
-setTrace :: PandocMonad m => Bool -> m ()
-setTrace useTracing = modifyCommonState $ \st -> st{stTrace = useTracing}
-
--- | Set request header to use in HTTP requests.
-setRequestHeader :: PandocMonad m
-                 => T.Text  -- ^ Header name
-                 -> T.Text  -- ^ Value
-                 -> m ()
-setRequestHeader name val = modifyCommonState $ \st ->
-  st{ stRequestHeaders =
-       (name, val) : filter (\(n,_) -> n /= name) (stRequestHeaders st)  }
-
--- | Initialize the media bag.
-setMediaBag :: PandocMonad m => MediaBag -> m ()
-setMediaBag mb = modifyCommonState $ \st -> st{stMediaBag = mb}
-
--- Retrieve the media bag.
-getMediaBag :: PandocMonad m => m MediaBag
-getMediaBag = getsCommonState stMediaBag
-
--- Insert an item into the media bag.
-insertMedia :: PandocMonad m => FilePath -> Maybe MimeType -> BL.ByteString -> m ()
-insertMedia fp mime bs = do
-  mb <- getMediaBag
-  let mb' = MB.insertMedia fp mime bs mb
-  setMediaBag mb'
-
--- Retrieve the input filenames.
-getInputFiles :: PandocMonad m => m [FilePath]
-getInputFiles = getsCommonState stInputFiles
-
--- Set the input filenames.
-setInputFiles :: PandocMonad m => [FilePath] -> m ()
-setInputFiles fs = do
-  let sourceURL = case fs of
-                    []    -> Nothing
-                    (x:_) -> case parseURI x of
-                                Just u
-                                  | uriScheme u `elem` ["http:","https:"] ->
-                                      Just $ show u{ uriQuery = "",
-                                                     uriFragment = "" }
-                                _ -> Nothing
-
-  modifyCommonState $ \st -> st{ stInputFiles = fs
-                               , stSourceURL = T.pack <$> sourceURL }
-
--- Retrieve the output filename.
-getOutputFile :: PandocMonad m => m (Maybe FilePath)
-getOutputFile = getsCommonState stOutputFile
-
--- Set the output filename.
-setOutputFile :: PandocMonad m => Maybe FilePath -> m ()
-setOutputFile mbf = modifyCommonState $ \st -> st{ stOutputFile = mbf }
-
--- Retrieve the resource path searched by 'fetchItem'.
-getResourcePath :: PandocMonad m => m [FilePath]
-getResourcePath = getsCommonState stResourcePath
-
--- Set the resource path searched by 'fetchItem'.
-setResourcePath :: PandocMonad m => [FilePath] -> m ()
-setResourcePath ps = modifyCommonState $ \st -> st{stResourcePath = ps}
-
--- Get the POSIX time.
-getPOSIXTime :: PandocMonad m => m POSIXTime
-getPOSIXTime = utcTimeToPOSIXSeconds <$> getCurrentTime
-
--- Get the zoned time.
-getZonedTime :: PandocMonad m => m ZonedTime
-getZonedTime = do
-  t <- getCurrentTime
-  tz <- getCurrentTimeZone
-  return $ utcToZonedTime tz t
-
--- | Read file, checking in any number of directories.
-readFileFromDirs :: PandocMonad m => [FilePath] -> FilePath -> m (Maybe T.Text)
-readFileFromDirs [] _ = return Nothing
-readFileFromDirs (d:ds) f = catchError
-    ((Just . T.pack . UTF8.toStringLazy) <$> readFileLazy (d </> f))
-    (\_ -> readFileFromDirs ds f)
-
--- | 'CommonState' represents state that is used by all
--- instances of 'PandocMonad'.  Normally users should not
--- need to interact with it directly; instead, auxiliary
--- functions like 'setVerbosity' and 'withMediaBag' should be used.
-data CommonState = CommonState { stLog          :: [LogMessage]
-                                 -- ^ A list of log messages in reverse order
-                               , stUserDataDir  :: Maybe FilePath
-                                 -- ^ Directory to search for data files
-                               , stSourceURL    :: Maybe T.Text
-                                 -- ^ Absolute URL + dir of 1st source file
-                               , stRequestHeaders :: [(T.Text, T.Text)]
-                                 -- ^ Headers to add for HTTP requests
-                               , stMediaBag     :: MediaBag
-                                 -- ^ Media parsed from binary containers
-                               , stTranslations :: Maybe
-                                                  (Lang, Maybe Translations)
-                                 -- ^ Translations for localization
-                               , stInputFiles   :: [FilePath]
-                                 -- ^ List of input files from command line
-                               , stOutputFile   :: Maybe FilePath
-                                 -- ^ Output file from command line
-                               , stResourcePath :: [FilePath]
-                                 -- ^ Path to search for resources like
-                                 -- included images
-                               , stVerbosity    :: Verbosity
-                                 -- ^ Verbosity level
-                               , stTrace        :: Bool
-                                 -- ^ Controls whether tracing messages are
-                                 -- issued.
-                               }
-
-instance Default CommonState where
-  def = CommonState { stLog = []
-                    , stUserDataDir = Nothing
-                    , stSourceURL = Nothing
-                    , stRequestHeaders = []
-                    , stMediaBag = mempty
-                    , stTranslations = Nothing
-                    , stInputFiles = []
-                    , stOutputFile = Nothing
-                    , stResourcePath = ["."]
-                    , stVerbosity = WARNING
-                    , stTrace = False
-                    }
-
--- | Convert BCP47 string to a Lang, issuing warning
--- if there are problems.
-toLang :: PandocMonad m => Maybe T.Text -> m (Maybe Lang)
-toLang Nothing = return Nothing
-toLang (Just s) =
-  case parseBCP47 s of
-       Left _ -> do
-         report $ InvalidLang s
-         return Nothing
-       Right l -> return (Just l)
-
--- | Select the language to use with 'translateTerm'.
--- Note that this does not read a translation file;
--- that is only done the first time 'translateTerm' is
--- used.
-setTranslations :: PandocMonad m => Lang -> m ()
-setTranslations lang =
-  modifyCommonState $ \st -> st{ stTranslations = Just (lang, Nothing) }
-
--- | Load term map.
-getTranslations :: PandocMonad m => m Translations
-getTranslations = do
-  mbtrans <- getsCommonState stTranslations
-  case mbtrans of
-       Nothing -> return mempty  -- no language defined
-       Just (_, Just t) -> return t
-       Just (lang, Nothing) -> do  -- read from file
-         let translationFile = "translations/" <> renderLang lang <> ".yaml"
-         let fallbackFile = "translations/" <> langLanguage lang <> ".yaml"
-         let getTrans fp = do
-               bs <- readDataFile fp
-               case readTranslations (UTF8.toText bs) of
-                    Left e   -> do
-                      report $ CouldNotLoadTranslations (renderLang lang)
-                        (T.pack fp <> ": " <> e)
-                      -- make sure we don't try again...
-                      modifyCommonState $ \st ->
-                        st{ stTranslations = Nothing }
-                      return mempty
-                    Right t -> do
-                      modifyCommonState $ \st ->
-                                  st{ stTranslations = Just (lang, Just t) }
-                      return t
-         catchError (getTrans $ T.unpack translationFile)
-           (\_ ->
-             catchError (getTrans $ T.unpack fallbackFile)
-               (\e -> do
-                 report $ CouldNotLoadTranslations (renderLang lang)
-                          $ case e of
-                               PandocCouldNotFindDataFileError _ ->
-                                 "data file " <> fallbackFile <> " not found"
-                               _ -> ""
-                 -- make sure we don't try again...
-                 modifyCommonState $ \st -> st{ stTranslations = Nothing }
-                 return mempty))
-
--- | Get a translation from the current term map.
--- Issue a warning if the term is not defined.
-translateTerm :: PandocMonad m => Term -> m T.Text
-translateTerm term = do
-  translations <- getTranslations
-  case lookupTerm term translations of
-       Just s -> return s
-       Nothing -> do
-         report $ NoTranslation $ T.pack $ show term
-         return ""
+import qualified Data.Time as IO (getCurrentTime)
+import qualified Data.Time.LocalTime as IO (getCurrentTimeZone)
+import qualified Data.Unique as IO (newUnique)
+import qualified System.Directory as Directory
+import qualified System.Directory as IO (getModificationTime)
+import qualified System.Environment as IO (lookupEnv)
+import qualified System.FilePath.Glob as IO (glob)
+import qualified System.Random as IO (newStdGen)
+import qualified Text.Pandoc.UTF8 as UTF8
 
 -- | Evaluate a 'PandocIO' operation.
 runIO :: PandocIO a -> IO (Either PandocError a)
@@ -519,314 +220,6 @@ alertIndent (l:ls) = do
   where go l' = do UTF8.hPutStr stderr "  "
                    UTF8.hPutStrLn stderr $ T.unpack l'
 
--- | Specialized version of parseURIReference that disallows
--- single-letter schemes.  Reason:  these are usually windows absolute
--- paths.
-parseURIReference' :: T.Text -> Maybe URI
-parseURIReference' s = do
-  u <- parseURIReference (T.unpack s)
-  case uriScheme u of
-       [_] -> Nothing
-       _   -> Just u
-
--- | Set the user data directory in common state.
-setUserDataDir :: PandocMonad m
-               => Maybe FilePath
-               -> m ()
-setUserDataDir mbfp = modifyCommonState $ \st -> st{ stUserDataDir = mbfp }
-
--- | Get the user data directory from common state.
-getUserDataDir :: PandocMonad m
-               => m (Maybe FilePath)
-getUserDataDir = getsCommonState stUserDataDir
-
--- | Fetch an image or other item from the local filesystem or the net.
--- Returns raw content and maybe mime type.
-fetchItem :: PandocMonad m
-          => T.Text
-          -> m (B.ByteString, Maybe MimeType)
-fetchItem s = do
-  mediabag <- getMediaBag
-  case lookupMedia (T.unpack s) mediabag of
-    Just (mime, bs) -> return (BL.toStrict bs, Just mime)
-    Nothing -> downloadOrRead s
-
-downloadOrRead :: PandocMonad m
-               => T.Text
-               -> m (B.ByteString, Maybe MimeType)
-downloadOrRead s = do
-  sourceURL <- getsCommonState stSourceURL
-  case (sourceURL >>= parseURIReference' .
-                       ensureEscaped, ensureEscaped s) of
-    (Just u, s') -> -- try fetching from relative path at source
-       case parseURIReference' s' of
-            Just u' -> openURL $ T.pack $ show $ u' `nonStrictRelativeTo` u
-            Nothing -> openURL s' -- will throw error
-    (Nothing, s'@(T.unpack -> ('/':'/':c:_))) | c /= '?' ->  -- protocol-relative URI
-                -- we exclude //? because of //?UNC/ on Windows
-       case parseURIReference' s' of
-            Just u' -> openURL $ T.pack $ show $ u' `nonStrictRelativeTo` httpcolon
-            Nothing -> openURL s' -- will throw error
-    (Nothing, s') ->
-       case parseURI (T.unpack s') of  -- requires absolute URI
-            Just u' | uriScheme u' == "file:" ->
-                 readLocalFile $ uriPathToPath (T.pack $ uriPath u')
-            -- We don't want to treat C:/ as a scheme:
-            Just u' | length (uriScheme u') > 2 -> openURL (T.pack $ show u')
-            _ -> readLocalFile fp -- get from local file system
-   where readLocalFile f = do
-             resourcePath <- getResourcePath
-             cont <- if isRelative f
-                        then withPaths resourcePath readFileStrict f
-                        else readFileStrict f
-             return (cont, mime)
-         httpcolon = URI{ uriScheme = "http:",
-                          uriAuthority = Nothing,
-                          uriPath = "",
-                          uriQuery = "",
-                          uriFragment = "" }
-         dropFragmentAndQuery = T.takeWhile (\c -> c /= '?' && c /= '#')
-         fp = unEscapeString $ T.unpack $ dropFragmentAndQuery s
-         mime = getMimeType $ case takeExtension fp of
-                     ".gz" -> dropExtension fp
-                     ".svgz" -> dropExtension fp ++ ".svg"
-                     x     -> x
-         ensureEscaped = T.pack . escapeURIString isAllowedInURI . T.unpack . T.map convertSlash
-         convertSlash '\\' = '/'
-         convertSlash x    = x
-
--- Retrieve default reference.docx.
-getDefaultReferenceDocx :: PandocMonad m => m Archive
-getDefaultReferenceDocx = do
-  let paths = ["[Content_Types].xml",
-               "_rels/.rels",
-               "docProps/app.xml",
-               "docProps/core.xml",
-               "docProps/custom.xml",
-               "word/document.xml",
-               "word/fontTable.xml",
-               "word/footnotes.xml",
-               "word/comments.xml",
-               "word/numbering.xml",
-               "word/settings.xml",
-               "word/webSettings.xml",
-               "word/styles.xml",
-               "word/_rels/document.xml.rels",
-               "word/_rels/footnotes.xml.rels",
-               "word/theme/theme1.xml"]
-  let toLazy = BL.fromChunks . (:[])
-  let pathToEntry path = do
-        epochtime <- (floor . utcTimeToPOSIXSeconds) <$> getCurrentTime
-        contents <- toLazy <$> readDataFile ("docx/" ++ path)
-        return $ toEntry path epochtime contents
-  datadir <- getUserDataDir
-  mbArchive <- case datadir of
-                    Nothing   -> return Nothing
-                    Just d    -> do
-                       exists <- fileExists (d </> "reference.docx")
-                       if exists
-                          then return (Just (d </> "reference.docx"))
-                          else return Nothing
-  case mbArchive of
-     Just arch -> toArchive <$> readFileLazy arch
-     Nothing   -> foldr addEntryToArchive emptyArchive <$>
-                     mapM pathToEntry paths
-
--- Retrieve default reference.odt.
-getDefaultReferenceODT :: PandocMonad m => m Archive
-getDefaultReferenceODT = do
-  let paths = ["mimetype",
-               "manifest.rdf",
-               "styles.xml",
-               "content.xml",
-               "meta.xml",
-               "settings.xml",
-               "Configurations2/accelerator/current.xml",
-               "Thumbnails/thumbnail.png",
-               "META-INF/manifest.xml"]
-  let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime
-                            contents <- (BL.fromChunks . (:[])) `fmap`
-                                          readDataFile ("odt/" ++ path)
-                            return $ toEntry path epochtime contents
-  datadir <- getUserDataDir
-  mbArchive <- case datadir of
-                    Nothing   -> return Nothing
-                    Just d    -> do
-                       exists <- fileExists (d </> "reference.odt")
-                       if exists
-                          then return (Just (d </> "reference.odt"))
-                          else return Nothing
-  case mbArchive of
-     Just arch -> toArchive <$> readFileLazy arch
-     Nothing   -> foldr addEntryToArchive emptyArchive <$>
-                     mapM pathToEntry paths
-
-getDefaultReferencePptx :: PandocMonad m => m Archive
-getDefaultReferencePptx = do
-  -- We're going to narrow this down substantially once we get it
-  -- working.
-  let paths = [ "[Content_Types].xml"
-              , "_rels/.rels"
-              , "docProps/app.xml"
-              , "docProps/core.xml"
-              , "ppt/_rels/presentation.xml.rels"
-              , "ppt/presProps.xml"
-              , "ppt/presentation.xml"
-              , "ppt/slideLayouts/_rels/slideLayout1.xml.rels"
-              , "ppt/slideLayouts/_rels/slideLayout2.xml.rels"
-              , "ppt/slideLayouts/_rels/slideLayout3.xml.rels"
-              , "ppt/slideLayouts/_rels/slideLayout4.xml.rels"
-              , "ppt/slideLayouts/_rels/slideLayout5.xml.rels"
-              , "ppt/slideLayouts/_rels/slideLayout6.xml.rels"
-              , "ppt/slideLayouts/_rels/slideLayout7.xml.rels"
-              , "ppt/slideLayouts/_rels/slideLayout8.xml.rels"
-              , "ppt/slideLayouts/_rels/slideLayout9.xml.rels"
-              , "ppt/slideLayouts/_rels/slideLayout10.xml.rels"
-              , "ppt/slideLayouts/_rels/slideLayout11.xml.rels"
-              , "ppt/slideLayouts/slideLayout1.xml"
-              , "ppt/slideLayouts/slideLayout10.xml"
-              , "ppt/slideLayouts/slideLayout11.xml"
-              , "ppt/slideLayouts/slideLayout2.xml"
-              , "ppt/slideLayouts/slideLayout3.xml"
-              , "ppt/slideLayouts/slideLayout4.xml"
-              , "ppt/slideLayouts/slideLayout5.xml"
-              , "ppt/slideLayouts/slideLayout6.xml"
-              , "ppt/slideLayouts/slideLayout7.xml"
-              , "ppt/slideLayouts/slideLayout8.xml"
-              , "ppt/slideLayouts/slideLayout9.xml"
-              , "ppt/slideMasters/_rels/slideMaster1.xml.rels"
-              , "ppt/slideMasters/slideMaster1.xml"
-              , "ppt/slides/_rels/slide1.xml.rels"
-              , "ppt/slides/slide1.xml"
-              , "ppt/slides/_rels/slide2.xml.rels"
-              , "ppt/slides/slide2.xml"
-              , "ppt/slides/_rels/slide3.xml.rels"
-              , "ppt/slides/slide3.xml"
-              , "ppt/slides/_rels/slide4.xml.rels"
-              , "ppt/slides/slide4.xml"
-              , "ppt/tableStyles.xml"
-              , "ppt/theme/theme1.xml"
-              , "ppt/viewProps.xml"
-              -- These relate to notes slides.
-              , "ppt/notesMasters/notesMaster1.xml"
-              , "ppt/notesMasters/_rels/notesMaster1.xml.rels"
-              , "ppt/notesSlides/notesSlide1.xml"
-              , "ppt/notesSlides/_rels/notesSlide1.xml.rels"
-              , "ppt/notesSlides/notesSlide2.xml"
-              , "ppt/notesSlides/_rels/notesSlide2.xml.rels"
-              , "ppt/theme/theme2.xml"
-              ]
-  let toLazy = BL.fromChunks . (:[])
-  let pathToEntry path = do
-        epochtime <- (floor . utcTimeToPOSIXSeconds) <$> getCurrentTime
-        contents <- toLazy <$> readDataFile ("pptx/" ++ path)
-        return $ toEntry path epochtime contents
-  datadir <- getUserDataDir
-  mbArchive <- case datadir of
-                    Nothing   -> return Nothing
-                    Just d    -> do
-                       exists <- fileExists (d </> "reference.pptx")
-                       if exists
-                          then return (Just (d </> "reference.pptx"))
-                          else return Nothing
-  case mbArchive of
-     Just arch -> toArchive <$> readFileLazy arch
-     Nothing   -> foldr addEntryToArchive emptyArchive <$>
-                     mapM pathToEntry paths
-
-
--- | Read file from user data directory or,
--- if not found there, from Cabal data directory.
-readDataFile :: PandocMonad m => FilePath -> m B.ByteString
-readDataFile fname = do
-  datadir <- getUserDataDir
-  case datadir of
-       Nothing -> readDefaultDataFile fname
-       Just userDir -> do
-         exists <- fileExists (userDir </> fname)
-         if exists
-            then readFileStrict (userDir </> fname)
-            else readDefaultDataFile fname
-
--- | Read file from from Cabal data directory.
-readDefaultDataFile :: PandocMonad m => FilePath -> m B.ByteString
-readDefaultDataFile "reference.docx" =
-  (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceDocx
-readDefaultDataFile "reference.pptx" =
-  (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferencePptx
-readDefaultDataFile "reference.odt" =
-  (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceODT
-readDefaultDataFile fname =
-#ifdef EMBED_DATA_FILES
-  case lookup (makeCanonical fname) dataFiles of
-    Nothing       -> throwError $ PandocCouldNotFindDataFileError $ T.pack fname
-    Just contents -> return contents
-#else
-  getDataFileName fname' >>= checkExistence >>= readFileStrict
-    where fname' = if fname == "MANUAL.txt" then fname else "data" </> fname
-
-checkExistence :: PandocMonad m => FilePath -> m FilePath
-checkExistence fn = do
-  exists <- fileExists fn
-  if exists
-     then return fn
-     else throwError $ PandocCouldNotFindDataFileError $ T.pack fn
-#endif
-
-makeCanonical :: FilePath -> FilePath
-makeCanonical = Posix.joinPath . transformPathParts . splitDirectories
- where  transformPathParts = reverse . foldl go []
-        go as     "."  = as
-        go (_:as) ".." = as
-        go as     x    = x : as
-
-withPaths :: PandocMonad m => [FilePath] -> (FilePath -> m a) -> FilePath -> m a
-withPaths [] _ fp = throwError $ PandocResourceNotFound $ T.pack fp
-withPaths (p:ps) action fp =
-  catchError (action (p </> fp))
-             (\_ -> withPaths ps action fp)
-
--- | Fetch local or remote resource (like an image) and provide data suitable
--- for adding it to the MediaBag.
-fetchMediaResource :: PandocMonad m
-              => T.Text -> m (FilePath, Maybe MimeType, BL.ByteString)
-fetchMediaResource src = do
-  (bs, mt) <- downloadOrRead src
-  let ext = fromMaybe (T.pack $ takeExtension $ T.unpack src)
-                      (mt >>= extensionFromMimeType)
-  let bs' = BL.fromChunks [bs]
-  let basename = showDigest $ sha1 bs'
-  let fname = basename <.> T.unpack ext
-  return (fname, mt, bs')
-
--- | Traverse tree, filling media bag for any images that
--- aren't already in the media bag.
-fillMediaBag :: PandocMonad m => Pandoc -> m Pandoc
-fillMediaBag d = walkM handleImage d
-  where handleImage :: PandocMonad m => Inline -> m Inline
-        handleImage (Image attr lab (src, tit)) = catchError
-          (do mediabag <- getMediaBag
-              case lookupMedia (T.unpack src) mediabag of
-                Just (_, _) -> return $ Image attr lab (src, tit)
-                Nothing -> do
-                  (fname, mt, bs) <- fetchMediaResource src
-                  insertMedia fname mt bs
-                  return $ Image attr lab (T.pack fname, tit))
-          (\e ->
-              case e of
-                PandocResourceNotFound _ -> do
-                  report $ CouldNotFetchResource src
-                            "replacing image with description"
-                  -- emit alt text
-                  return $ Span ("",["image"],[]) lab
-                PandocHttpError u er -> do
-                  report $ CouldNotFetchResource u
-                            (T.pack $ show er ++ "\rReplacing image with description.")
-                  -- emit alt text
-                  return $ Span ("",["image"],[]) lab
-                _ -> throwError e)
-        handleImage x = return x
-
 -- | Extract media from the mediabag into a directory.
 extractMedia :: FilePath -> Pandoc -> PandocIO Pandoc
 extractMedia dir d = do
@@ -1015,54 +408,3 @@ instance PandocMonad PandocPure where
   putCommonState x = PandocPure $ lift $ put x
 
   logOutput _msg = return ()
-
--- This requires UndecidableInstances.  We could avoid that
--- by repeating the definitions below for every monad transformer
--- we use: ReaderT, WriterT, StateT, RWST.  But this seems to
--- be harmless.
-instance (MonadTrans t, PandocMonad m, Functor (t m),
-          MonadError PandocError (t m), Monad (t m),
-          Applicative (t m)) => PandocMonad (t m) where
-  lookupEnv = lift . lookupEnv
-  getCurrentTime = lift getCurrentTime
-  getCurrentTimeZone = lift getCurrentTimeZone
-  newStdGen = lift newStdGen
-  newUniqueHash = lift newUniqueHash
-  openURL = lift . openURL
-  readFileLazy = lift . readFileLazy
-  readFileStrict = lift . readFileStrict
-  glob = lift . glob
-  fileExists = lift . fileExists
-  getDataFileName = lift . getDataFileName
-  getModificationTime = lift . getModificationTime
-  getCommonState = lift getCommonState
-  putCommonState = lift . putCommonState
-  logOutput = lift . logOutput
-
-instance {-# OVERLAPS #-} PandocMonad m => PandocMonad (ParsecT s st m) where
-  lookupEnv = lift . lookupEnv
-  getCurrentTime = lift getCurrentTime
-  getCurrentTimeZone = lift getCurrentTimeZone
-  newStdGen = lift newStdGen
-  newUniqueHash = lift newUniqueHash
-  openURL = lift . openURL
-  readFileLazy = lift . readFileLazy
-  readFileStrict = lift . readFileStrict
-  glob = lift . glob
-  fileExists = lift . fileExists
-  getDataFileName = lift . getDataFileName
-  getModificationTime = lift . getModificationTime
-  getCommonState = lift getCommonState
-  putCommonState = lift . putCommonState
-  trace msg = do
-    tracing <- getsCommonState stTrace
-    when tracing $ do
-      pos <- getPosition
-      Debug.Trace.trace
-        ("[trace] Parsed " ++ T.unpack msg ++ " at line " ++
-            show (sourceLine pos) ++
-            if sourceName pos == "chunk"
-               then " of chunk"
-               else "")
-        (return ())
-  logOutput = lift . logOutput
diff --git a/src/Text/Pandoc/Class/CommonState.hs b/src/Text/Pandoc/Class/CommonState.hs
new file mode 100644
index 000000000..4a0f66859
--- /dev/null
+++ b/src/Text/Pandoc/Class/CommonState.hs
@@ -0,0 +1,80 @@
+{- |
+Module      : Text.Pandoc.Class.CommonState
+Copyright   : Copyright (C) 2016-2020 Jesse Rosenthal, John MacFarlane
+License     : GNU GPL, version 2 or above
+
+Maintainer  : Jesse Rosenthal <jrosenthal@jhu.edu>
+Stability   : alpha
+Portability : portable
+
+Common state shared by all pandoc-specific operations, including
+those in readers, writers, and Lua filters.
+-}
+
+module Text.Pandoc.Class.CommonState
+  ( CommonState(..)
+  , defaultCommonState
+  )
+where
+
+import Data.Default (Default (def))
+import Data.Text (Text)
+import Text.Pandoc.BCP47 (Lang)
+import Text.Pandoc.MediaBag (MediaBag)
+import Text.Pandoc.Logging (LogMessage, Verbosity (WARNING))
+import Text.Pandoc.Translations (Translations)
+
+-- | 'CommonState' represents state that is used by all
+-- instances of 'PandocMonad'.  Normally users should not
+-- need to interact with it directly; instead, auxiliary
+-- functions like 'setVerbosity' and 'withMediaBag' should be used.
+data CommonState = CommonState
+  { stLog          :: [LogMessage]
+    -- ^ A list of log messages in reverse order
+  , stUserDataDir  :: Maybe FilePath
+    -- ^ Directory to search for data files
+  , stSourceURL    :: Maybe Text
+    -- ^ Absolute URL + dir of 1st source file
+  , stRequestHeaders :: [(Text, Text)]
+    -- ^ Headers to add for HTTP requests
+  , stMediaBag     :: MediaBag
+    -- ^ Media parsed from binary containers
+  , stTranslations :: Maybe (Lang, Maybe Translations)
+    -- ^ Translations for localization
+  , stInputFiles   :: [FilePath]
+    -- ^ List of input files from command line
+  , stOutputFile   :: Maybe FilePath
+    -- ^ Output file from command line
+  , stResourcePath :: [FilePath]
+    -- ^ Path to search for resources like
+    -- included images
+  , stVerbosity    :: Verbosity
+    -- ^ Verbosity level
+  , stTrace        :: Bool
+    -- ^ Controls whether tracing messages are
+    -- issued.
+  }
+
+-- | The default @'CommonState'@. All fields are initialized as the
+-- monoid identity of their resprective type, except for:
+--
+--   * @'stResourcePath'@, which is set to @["."]@,
+--   * @'stTrace'@, which is set to @'False'@, and
+--   * @'stVerbosity'@, which is set to @WARNING@.
+defaultCommonState :: CommonState
+defaultCommonState = CommonState
+  { stLog = []
+  , stUserDataDir = Nothing
+  , stSourceURL = Nothing
+  , stRequestHeaders = []
+  , stMediaBag = mempty
+  , stTranslations = Nothing
+  , stInputFiles = []
+  , stOutputFile = Nothing
+  , stResourcePath = ["."]
+  , stVerbosity = WARNING
+  , stTrace = False
+  }
+
+instance Default CommonState where
+  def = defaultCommonState
diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs
new file mode 100644
index 000000000..7f31eec96
--- /dev/null
+++ b/src/Text/Pandoc/Class/PandocMonad.hs
@@ -0,0 +1,706 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TypeSynonymInstances #-}
+{-# LANGUAGE UndecidableInstances #-}
+{-# LANGUAGE ViewPatterns #-}
+#if MIN_VERSION_base(4,8,0)
+#else
+{-# LANGUAGE OverlappingInstances #-}
+#endif
+{- |
+Module      : Text.Pandoc.Class.PandocMonad
+Copyright   : Copyright (C) 2016-20 Jesse Rosenthal, John MacFarlane
+License     : GNU GPL, version 2 or above
+
+Maintainer  : Jesse Rosenthal <jrosenthal@jhu.edu>
+Stability   : alpha
+Portability : portable
+
+This module defines a type class, 'PandocMonad', for pandoc readers
+and writers.
+-}
+
+module Text.Pandoc.Class.PandocMonad
+  ( PandocMonad(..)
+  , getPOSIXTime
+  , getZonedTime
+  , readFileFromDirs
+  , report
+  , setTrace
+  , setRequestHeader
+  , getLog
+  , setVerbosity
+  , getVerbosity
+  , getMediaBag
+  , setMediaBag
+  , insertMedia
+  , setUserDataDir
+  , getUserDataDir
+  , fetchItem
+  , fetchMediaResource
+  , getInputFiles
+  , setInputFiles
+  , getOutputFile
+  , setOutputFile
+  , setResourcePath
+  , getResourcePath
+  , readDefaultDataFile
+  , readDataFile
+  , fillMediaBag
+  , toLang
+  , setTranslations
+  , translateTerm
+  , makeCanonical
+  ) where
+
+import Prelude
+import Codec.Archive.Zip
+import Control.Monad.Except (MonadError (catchError, throwError),
+                             MonadTrans, lift, when)
+import Data.Digest.Pure.SHA (sha1, showDigest)
+import Data.Maybe (fromMaybe)
+import Data.Time (UTCTime)
+import Data.Time.Clock.POSIX (POSIXTime, utcTimeToPOSIXSeconds)
+import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime)
+import Network.URI ( escapeURIString, nonStrictRelativeTo,
+                     unEscapeString, parseURIReference, isAllowedInURI,
+                     parseURI, URI(..) )
+import System.FilePath ((</>), (<.>), takeExtension, dropExtension,
+                        isRelative, splitDirectories)
+import System.Random (StdGen)
+import Text.Pandoc.BCP47 (Lang(..), parseBCP47, renderLang)
+import Text.Pandoc.Class.CommonState (CommonState (..))
+import Text.Pandoc.Definition
+import Text.Pandoc.Error
+import Text.Pandoc.Logging
+import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType)
+import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
+import Text.Pandoc.Shared (uriPathToPath)
+import Text.Pandoc.Translations (Term(..), Translations, lookupTerm,
+                                 readTranslations)
+import Text.Pandoc.Walk (walkM)
+import Text.Parsec (ParsecT, getPosition, sourceLine, sourceName)
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as BL
+import qualified Data.Text as T
+import qualified Debug.Trace
+import qualified System.FilePath.Posix as Posix
+import qualified Text.Pandoc.MediaBag as MB
+import qualified Text.Pandoc.UTF8 as UTF8
+#ifdef EMBED_DATA_FILES
+import Text.Pandoc.Data (dataFiles)
+#endif
+
+-- | The PandocMonad typeclass contains all the potentially
+-- IO-related functions used in pandoc's readers and writers.
+-- Instances of this typeclass may implement these functions
+-- in IO (as in 'PandocIO') or using an internal state that
+-- represents a file system, time, and so on (as in 'PandocPure').
+class (Functor m, Applicative m, Monad m, MonadError PandocError m)
+      => PandocMonad m where
+  -- | Lookup an environment variable.
+  lookupEnv :: T.Text -> m (Maybe T.Text)
+  -- | Get the current (UTC) time.
+  getCurrentTime :: m UTCTime
+  -- | Get the locale's time zone.
+  getCurrentTimeZone :: m TimeZone
+  -- | Return a new generator for random numbers.
+  newStdGen :: m StdGen
+  -- | Return a new unique integer.
+  newUniqueHash :: m Int
+  -- | Retrieve contents and mime type from a URL, raising
+  -- an error on failure.
+  openURL :: T.Text -> m (B.ByteString, Maybe MimeType)
+  -- | Read the lazy ByteString contents from a file path,
+  -- raising an error on failure.
+  readFileLazy :: FilePath -> m BL.ByteString
+  -- | Read the strict ByteString contents from a file path,
+  -- raising an error on failure.
+  readFileStrict :: FilePath -> m B.ByteString
+  -- | Return a list of paths that match a glob, relative to
+  -- the working directory.  See 'System.FilePath.Glob' for
+  -- the glob syntax.
+  glob :: String -> m [FilePath]
+  -- | Returns True if file exists.
+  fileExists :: FilePath -> m Bool
+  -- | Returns the path of data file.
+  getDataFileName :: FilePath -> m FilePath
+  -- | Return the modification time of a file.
+  getModificationTime :: FilePath -> m UTCTime
+  -- | Get the value of the 'CommonState' used by all instances
+  -- of 'PandocMonad'.
+  getCommonState :: m CommonState
+  -- | Set the value of the 'CommonState' used by all instances
+  -- of 'PandocMonad'.
+  -- | Get the value of a specific field of 'CommonState'.
+  putCommonState :: CommonState -> m ()
+  -- | Get the value of a specific field of 'CommonState'.
+  getsCommonState :: (CommonState -> a) -> m a
+  getsCommonState f = f <$> getCommonState
+  -- | Modify the 'CommonState'.
+  modifyCommonState :: (CommonState -> CommonState) -> m ()
+  modifyCommonState f = getCommonState >>= putCommonState . f
+  -- | Output a log message.
+  logOutput :: LogMessage -> m ()
+  -- | Output a debug message to sterr, using 'Debug.Trace.trace',
+  -- if tracing is enabled.  Note: this writes to stderr even in
+  -- pure instances.
+  trace :: T.Text -> m ()
+  trace msg = do
+    tracing <- getsCommonState stTrace
+    when tracing $ Debug.Trace.trace ("[trace] " ++ T.unpack msg) (return ())
+
+-- * Functions defined for all PandocMonad instances
+
+-- | Set the verbosity level.
+setVerbosity :: PandocMonad m => Verbosity -> m ()
+setVerbosity verbosity =
+  modifyCommonState $ \st -> st{ stVerbosity = verbosity }
+
+-- | Get the verbosity level.
+getVerbosity :: PandocMonad m => m Verbosity
+getVerbosity = getsCommonState stVerbosity
+
+-- | Get the accomulated log messages (in temporal order).
+getLog :: PandocMonad m => m [LogMessage]
+getLog = reverse <$> getsCommonState stLog
+
+-- | Log a message using 'logOutput'.  Note that 'logOutput' is
+-- called only if the verbosity level exceeds the level of the
+-- message, but the message is added to the list of log messages
+-- that will be retrieved by 'getLog' regardless of its verbosity level.
+report :: PandocMonad m => LogMessage -> m ()
+report msg = do
+  verbosity <- getsCommonState stVerbosity
+  let level = messageVerbosity msg
+  when (level <= verbosity) $ logOutput msg
+  modifyCommonState $ \st -> st{ stLog = msg : stLog st }
+
+-- | Determine whether tracing is enabled.  This affects
+-- the behavior of 'trace'.  If tracing is not enabled,
+-- 'trace' does nothing.
+setTrace :: PandocMonad m => Bool -> m ()
+setTrace useTracing = modifyCommonState $ \st -> st{stTrace = useTracing}
+
+-- | Set request header to use in HTTP requests.
+setRequestHeader :: PandocMonad m
+                 => T.Text  -- ^ Header name
+                 -> T.Text  -- ^ Value
+                 -> m ()
+setRequestHeader name val = modifyCommonState $ \st ->
+  st{ stRequestHeaders =
+       (name, val) : filter (\(n,_) -> n /= name) (stRequestHeaders st)  }
+
+-- | Initialize the media bag.
+setMediaBag :: PandocMonad m => MediaBag -> m ()
+setMediaBag mb = modifyCommonState $ \st -> st{stMediaBag = mb}
+
+-- | Retrieve the media bag.
+getMediaBag :: PandocMonad m => m MediaBag
+getMediaBag = getsCommonState stMediaBag
+
+-- | Insert an item into the media bag.
+insertMedia :: PandocMonad m => FilePath -> Maybe MimeType -> BL.ByteString -> m ()
+insertMedia fp mime bs = do
+  mb <- getMediaBag
+  let mb' = MB.insertMedia fp mime bs mb
+  setMediaBag mb'
+
+-- | Retrieve the input filenames.
+getInputFiles :: PandocMonad m => m [FilePath]
+getInputFiles = getsCommonState stInputFiles
+
+-- | Set the input filenames.
+setInputFiles :: PandocMonad m => [FilePath] -> m ()
+setInputFiles fs = do
+  let sourceURL = case fs of
+                    []    -> Nothing
+                    (x:_) -> case parseURI x of
+                                Just u
+                                  | uriScheme u `elem` ["http:","https:"] ->
+                                      Just $ show u{ uriQuery = "",
+                                                     uriFragment = "" }
+                                _ -> Nothing
+
+  modifyCommonState $ \st -> st{ stInputFiles = fs
+                               , stSourceURL = T.pack <$> sourceURL }
+
+-- | Retrieve the output filename.
+getOutputFile :: PandocMonad m => m (Maybe FilePath)
+getOutputFile = getsCommonState stOutputFile
+
+-- | Set the output filename.
+setOutputFile :: PandocMonad m => Maybe FilePath -> m ()
+setOutputFile mbf = modifyCommonState $ \st -> st{ stOutputFile = mbf }
+
+-- | Retrieve the resource path searched by 'fetchItem'.
+getResourcePath :: PandocMonad m => m [FilePath]
+getResourcePath = getsCommonState stResourcePath
+
+-- | Set the resource path searched by 'fetchItem'.
+setResourcePath :: PandocMonad m => [FilePath] -> m ()
+setResourcePath ps = modifyCommonState $ \st -> st{stResourcePath = ps}
+
+-- | Get the POSIX time.
+getPOSIXTime :: PandocMonad m => m POSIXTime
+getPOSIXTime = utcTimeToPOSIXSeconds <$> getCurrentTime
+
+-- | Get the zoned time.
+getZonedTime :: PandocMonad m => m ZonedTime
+getZonedTime = do
+  t <- getCurrentTime
+  tz <- getCurrentTimeZone
+  return $ utcToZonedTime tz t
+
+-- | Read file, checking in any number of directories.
+readFileFromDirs :: PandocMonad m => [FilePath] -> FilePath -> m (Maybe T.Text)
+readFileFromDirs [] _ = return Nothing
+readFileFromDirs (d:ds) f = catchError
+    (Just . T.pack . UTF8.toStringLazy <$> readFileLazy (d </> f))
+    (\_ -> readFileFromDirs ds f)
+
+-- | Convert BCP47 string to a Lang, issuing warning
+-- if there are problems.
+toLang :: PandocMonad m => Maybe T.Text -> m (Maybe Lang)
+toLang Nothing = return Nothing
+toLang (Just s) =
+  case parseBCP47 s of
+       Left _ -> do
+         report $ InvalidLang s
+         return Nothing
+       Right l -> return (Just l)
+
+-- | Select the language to use with 'translateTerm'.
+-- Note that this does not read a translation file;
+-- that is only done the first time 'translateTerm' is
+-- used.
+setTranslations :: PandocMonad m => Lang -> m ()
+setTranslations lang =
+  modifyCommonState $ \st -> st{ stTranslations = Just (lang, Nothing) }
+
+-- | Load term map.
+getTranslations :: PandocMonad m => m Translations
+getTranslations = do
+  mbtrans <- getsCommonState stTranslations
+  case mbtrans of
+       Nothing -> return mempty  -- no language defined
+       Just (_, Just t) -> return t
+       Just (lang, Nothing) -> do  -- read from file
+         let translationFile = "translations/" <> renderLang lang <> ".yaml"
+         let fallbackFile = "translations/" <> langLanguage lang <> ".yaml"
+         let getTrans fp = do
+               bs <- readDataFile fp
+               case readTranslations (UTF8.toText bs) of
+                    Left e   -> do
+                      report $ CouldNotLoadTranslations (renderLang lang)
+                        (T.pack fp <> ": " <> e)
+                      -- make sure we don't try again...
+                      modifyCommonState $ \st ->
+                        st{ stTranslations = Nothing }
+                      return mempty
+                    Right t -> do
+                      modifyCommonState $ \st ->
+                                  st{ stTranslations = Just (lang, Just t) }
+                      return t
+         catchError (getTrans $ T.unpack translationFile)
+           (\_ ->
+             catchError (getTrans $ T.unpack fallbackFile)
+               (\e -> do
+                 report $ CouldNotLoadTranslations (renderLang lang)
+                          $ case e of
+                               PandocCouldNotFindDataFileError _ ->
+                                 "data file " <> fallbackFile <> " not found"
+                               _ -> ""
+                 -- make sure we don't try again...
+                 modifyCommonState $ \st -> st{ stTranslations = Nothing }
+                 return mempty))
+
+-- | Get a translation from the current term map.
+-- Issue a warning if the term is not defined.
+translateTerm :: PandocMonad m => Term -> m T.Text
+translateTerm term = do
+  translations <- getTranslations
+  case lookupTerm term translations of
+       Just s -> return s
+       Nothing -> do
+         report $ NoTranslation $ T.pack $ show term
+         return ""
+
+-- | Specialized version of parseURIReference that disallows
+-- single-letter schemes.  Reason:  these are usually windows absolute
+-- paths.
+parseURIReference' :: T.Text -> Maybe URI
+parseURIReference' s = do
+  u <- parseURIReference (T.unpack s)
+  case uriScheme u of
+       [_] -> Nothing
+       _   -> Just u
+
+-- | Set the user data directory in common state.
+setUserDataDir :: PandocMonad m
+               => Maybe FilePath
+               -> m ()
+setUserDataDir mbfp = modifyCommonState $ \st -> st{ stUserDataDir = mbfp }
+
+-- | Get the user data directory from common state.
+getUserDataDir :: PandocMonad m
+               => m (Maybe FilePath)
+getUserDataDir = getsCommonState stUserDataDir
+
+-- | Fetch an image or other item from the local filesystem or the net.
+-- Returns raw content and maybe mime type.
+fetchItem :: PandocMonad m
+          => T.Text
+          -> m (B.ByteString, Maybe MimeType)
+fetchItem s = do
+  mediabag <- getMediaBag
+  case lookupMedia (T.unpack s) mediabag of
+    Just (mime, bs) -> return (BL.toStrict bs, Just mime)
+    Nothing -> downloadOrRead s
+
+-- | Returns the content and, if available, the MIME type of a resource.
+-- If the given resource location is a valid URI, then download the
+-- resource from that URI. Otherwise, treat the resource identifier as a
+-- local file name.
+--
+-- Note that resources are treated relative to the URL of the first
+-- input source, if any.
+downloadOrRead :: PandocMonad m
+               => T.Text
+               -> m (B.ByteString, Maybe MimeType)
+downloadOrRead s = do
+  sourceURL <- getsCommonState stSourceURL
+  case (sourceURL >>= parseURIReference' .
+                       ensureEscaped, ensureEscaped s) of
+    (Just u, s') -> -- try fetching from relative path at source
+       case parseURIReference' s' of
+            Just u' -> openURL $ T.pack $ show $ u' `nonStrictRelativeTo` u
+            Nothing -> openURL s' -- will throw error
+    (Nothing, s'@(T.unpack -> ('/':'/':c:_))) | c /= '?' ->  -- protocol-relative URI
+                -- we exclude //? because of //?UNC/ on Windows
+       case parseURIReference' s' of
+            Just u' -> openURL $ T.pack $ show $ u' `nonStrictRelativeTo` httpcolon
+            Nothing -> openURL s' -- will throw error
+    (Nothing, s') ->
+       case parseURI (T.unpack s') of  -- requires absolute URI
+            Just u' | uriScheme u' == "file:" ->
+                 readLocalFile $ uriPathToPath (T.pack $ uriPath u')
+            -- We don't want to treat C:/ as a scheme:
+            Just u' | length (uriScheme u') > 2 -> openURL (T.pack $ show u')
+            _ -> readLocalFile fp -- get from local file system
+   where readLocalFile f = do
+             resourcePath <- getResourcePath
+             cont <- if isRelative f
+                        then withPaths resourcePath readFileStrict f
+                        else readFileStrict f
+             return (cont, mime)
+         httpcolon = URI{ uriScheme = "http:",
+                          uriAuthority = Nothing,
+                          uriPath = "",
+                          uriQuery = "",
+                          uriFragment = "" }
+         dropFragmentAndQuery = T.takeWhile (\c -> c /= '?' && c /= '#')
+         fp = unEscapeString $ T.unpack $ dropFragmentAndQuery s
+         mime = getMimeType $ case takeExtension fp of
+                     ".gz" -> dropExtension fp
+                     ".svgz" -> dropExtension fp ++ ".svg"
+                     x     -> x
+         ensureEscaped = T.pack . escapeURIString isAllowedInURI . T.unpack . T.map convertSlash
+         convertSlash '\\' = '/'
+         convertSlash x    = x
+
+-- | Retrieve default reference.docx.
+getDefaultReferenceDocx :: PandocMonad m => m Archive
+getDefaultReferenceDocx = do
+  let paths = ["[Content_Types].xml",
+               "_rels/.rels",
+               "docProps/app.xml",
+               "docProps/core.xml",
+               "docProps/custom.xml",
+               "word/document.xml",
+               "word/fontTable.xml",
+               "word/footnotes.xml",
+               "word/comments.xml",
+               "word/numbering.xml",
+               "word/settings.xml",
+               "word/webSettings.xml",
+               "word/styles.xml",
+               "word/_rels/document.xml.rels",
+               "word/_rels/footnotes.xml.rels",
+               "word/theme/theme1.xml"]
+  let toLazy = BL.fromChunks . (:[])
+  let pathToEntry path = do
+        epochtime <- (floor . utcTimeToPOSIXSeconds) <$> getCurrentTime
+        contents <- toLazy <$> readDataFile ("docx/" ++ path)
+        return $ toEntry path epochtime contents
+  datadir <- getUserDataDir
+  mbArchive <- case datadir of
+                    Nothing   -> return Nothing
+                    Just d    -> do
+                       exists <- fileExists (d </> "reference.docx")
+                       if exists
+                          then return (Just (d </> "reference.docx"))
+                          else return Nothing
+  case mbArchive of
+     Just arch -> toArchive <$> readFileLazy arch
+     Nothing   -> foldr addEntryToArchive emptyArchive <$>
+                     mapM pathToEntry paths
+
+-- | Retrieve default reference.odt.
+getDefaultReferenceODT :: PandocMonad m => m Archive
+getDefaultReferenceODT = do
+  let paths = ["mimetype",
+               "manifest.rdf",
+               "styles.xml",
+               "content.xml",
+               "meta.xml",
+               "settings.xml",
+               "Configurations2/accelerator/current.xml",
+               "Thumbnails/thumbnail.png",
+               "META-INF/manifest.xml"]
+  let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime
+                            contents <- (BL.fromChunks . (:[])) `fmap`
+                                          readDataFile ("odt/" ++ path)
+                            return $ toEntry path epochtime contents
+  datadir <- getUserDataDir
+  mbArchive <- case datadir of
+                    Nothing   -> return Nothing
+                    Just d    -> do
+                       exists <- fileExists (d </> "reference.odt")
+                       if exists
+                          then return (Just (d </> "reference.odt"))
+                          else return Nothing
+  case mbArchive of
+     Just arch -> toArchive <$> readFileLazy arch
+     Nothing   -> foldr addEntryToArchive emptyArchive <$>
+                     mapM pathToEntry paths
+
+-- | Retrieve default reference.pptx.
+getDefaultReferencePptx :: PandocMonad m => m Archive
+getDefaultReferencePptx = do
+  -- We're going to narrow this down substantially once we get it
+  -- working.
+  let paths = [ "[Content_Types].xml"
+              , "_rels/.rels"
+              , "docProps/app.xml"
+              , "docProps/core.xml"
+              , "ppt/_rels/presentation.xml.rels"
+              , "ppt/presProps.xml"
+              , "ppt/presentation.xml"
+              , "ppt/slideLayouts/_rels/slideLayout1.xml.rels"
+              , "ppt/slideLayouts/_rels/slideLayout2.xml.rels"
+              , "ppt/slideLayouts/_rels/slideLayout3.xml.rels"
+              , "ppt/slideLayouts/_rels/slideLayout4.xml.rels"
+              , "ppt/slideLayouts/_rels/slideLayout5.xml.rels"
+              , "ppt/slideLayouts/_rels/slideLayout6.xml.rels"
+              , "ppt/slideLayouts/_rels/slideLayout7.xml.rels"
+              , "ppt/slideLayouts/_rels/slideLayout8.xml.rels"
+              , "ppt/slideLayouts/_rels/slideLayout9.xml.rels"
+              , "ppt/slideLayouts/_rels/slideLayout10.xml.rels"
+              , "ppt/slideLayouts/_rels/slideLayout11.xml.rels"
+              , "ppt/slideLayouts/slideLayout1.xml"
+              , "ppt/slideLayouts/slideLayout10.xml"
+              , "ppt/slideLayouts/slideLayout11.xml"
+              , "ppt/slideLayouts/slideLayout2.xml"
+              , "ppt/slideLayouts/slideLayout3.xml"
+              , "ppt/slideLayouts/slideLayout4.xml"
+              , "ppt/slideLayouts/slideLayout5.xml"
+              , "ppt/slideLayouts/slideLayout6.xml"
+              , "ppt/slideLayouts/slideLayout7.xml"
+              , "ppt/slideLayouts/slideLayout8.xml"
+              , "ppt/slideLayouts/slideLayout9.xml"
+              , "ppt/slideMasters/_rels/slideMaster1.xml.rels"
+              , "ppt/slideMasters/slideMaster1.xml"
+              , "ppt/slides/_rels/slide1.xml.rels"
+              , "ppt/slides/slide1.xml"
+              , "ppt/slides/_rels/slide2.xml.rels"
+              , "ppt/slides/slide2.xml"
+              , "ppt/slides/_rels/slide3.xml.rels"
+              , "ppt/slides/slide3.xml"
+              , "ppt/slides/_rels/slide4.xml.rels"
+              , "ppt/slides/slide4.xml"
+              , "ppt/tableStyles.xml"
+              , "ppt/theme/theme1.xml"
+              , "ppt/viewProps.xml"
+              -- These relate to notes slides.
+              , "ppt/notesMasters/notesMaster1.xml"
+              , "ppt/notesMasters/_rels/notesMaster1.xml.rels"
+              , "ppt/notesSlides/notesSlide1.xml"
+              , "ppt/notesSlides/_rels/notesSlide1.xml.rels"
+              , "ppt/notesSlides/notesSlide2.xml"
+              , "ppt/notesSlides/_rels/notesSlide2.xml.rels"
+              , "ppt/theme/theme2.xml"
+              ]
+  let toLazy = BL.fromChunks . (:[])
+  let pathToEntry path = do
+        epochtime <- (floor . utcTimeToPOSIXSeconds) <$> getCurrentTime
+        contents <- toLazy <$> readDataFile ("pptx/" ++ path)
+        return $ toEntry path epochtime contents
+  datadir <- getUserDataDir
+  mbArchive <- case datadir of
+                    Nothing   -> return Nothing
+                    Just d    -> do
+                       exists <- fileExists (d </> "reference.pptx")
+                       if exists
+                          then return (Just (d </> "reference.pptx"))
+                          else return Nothing
+  case mbArchive of
+     Just arch -> toArchive <$> readFileLazy arch
+     Nothing   -> foldr addEntryToArchive emptyArchive <$>
+                     mapM pathToEntry paths
+
+-- | Read file from user data directory or,
+-- if not found there, from Cabal data directory.
+readDataFile :: PandocMonad m => FilePath -> m B.ByteString
+readDataFile fname = do
+  datadir <- getUserDataDir
+  case datadir of
+       Nothing -> readDefaultDataFile fname
+       Just userDir -> do
+         exists <- fileExists (userDir </> fname)
+         if exists
+            then readFileStrict (userDir </> fname)
+            else readDefaultDataFile fname
+
+-- | Read file from from Cabal data directory.
+readDefaultDataFile :: PandocMonad m => FilePath -> m B.ByteString
+readDefaultDataFile "reference.docx" =
+  (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceDocx
+readDefaultDataFile "reference.pptx" =
+  (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferencePptx
+readDefaultDataFile "reference.odt" =
+  (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceODT
+readDefaultDataFile fname =
+#ifdef EMBED_DATA_FILES
+  case lookup (makeCanonical fname) dataFiles of
+    Nothing       -> throwError $ PandocCouldNotFindDataFileError $ T.pack fname
+    Just contents -> return contents
+#else
+  getDataFileName fname' >>= checkExistence >>= readFileStrict
+    where fname' = if fname == "MANUAL.txt" then fname else "data" </> fname
+
+-- | Returns the input filename unchanged if the file exits, and throws
+-- a `PandocCouldNotFindDataFileError` if it doesn't.
+checkExistence :: PandocMonad m => FilePath -> m FilePath
+checkExistence fn = do
+  exists <- fileExists fn
+  if exists
+     then return fn
+     else throwError $ PandocCouldNotFindDataFileError $ T.pack fn
+#endif
+
+-- | Canonicalizes a file path by removing redundant @.@ and @..@.
+makeCanonical :: FilePath -> FilePath
+makeCanonical = Posix.joinPath . transformPathParts . splitDirectories
+ where  transformPathParts = reverse . foldl go []
+        go as     "."  = as
+        go (_:as) ".." = as
+        go as     x    = x : as
+
+-- | Trys to run an action on a file: for each directory given, a
+-- filepath is created from the given filename, and the action is run on
+-- that filepath. Returns the result of the first successful execution
+-- of the action, or throws a @PandocResourceNotFound@ exception if the
+-- action errors for all filepaths.
+withPaths :: PandocMonad m => [FilePath] -> (FilePath -> m a) -> FilePath -> m a
+withPaths [] _ fp = throwError $ PandocResourceNotFound $ T.pack fp
+withPaths (p:ps) action fp =
+  catchError (action (p </> fp))
+             (\_ -> withPaths ps action fp)
+
+-- | Fetch local or remote resource (like an image) and provide data suitable
+-- for adding it to the MediaBag.
+fetchMediaResource :: PandocMonad m
+              => T.Text -> m (FilePath, Maybe MimeType, BL.ByteString)
+fetchMediaResource src = do
+  (bs, mt) <- downloadOrRead src
+  let ext = fromMaybe (T.pack $ takeExtension $ T.unpack src)
+                      (mt >>= extensionFromMimeType)
+  let bs' = BL.fromChunks [bs]
+  let basename = showDigest $ sha1 bs'
+  let fname = basename <.> T.unpack ext
+  return (fname, mt, bs')
+
+-- | Traverse tree, filling media bag for any images that
+-- aren't already in the media bag.
+fillMediaBag :: PandocMonad m => Pandoc -> m Pandoc
+fillMediaBag d = walkM handleImage d
+  where handleImage :: PandocMonad m => Inline -> m Inline
+        handleImage (Image attr lab (src, tit)) = catchError
+          (do mediabag <- getMediaBag
+              case lookupMedia (T.unpack src) mediabag of
+                Just (_, _) -> return $ Image attr lab (src, tit)
+                Nothing -> do
+                  (fname, mt, bs) <- fetchMediaResource src
+                  insertMedia fname mt bs
+                  return $ Image attr lab (T.pack fname, tit))
+          (\e ->
+              case e of
+                PandocResourceNotFound _ -> do
+                  report $ CouldNotFetchResource src
+                            "replacing image with description"
+                  -- emit alt text
+                  return $ Span ("",["image"],[]) lab
+                PandocHttpError u er -> do
+                  report $ CouldNotFetchResource u
+                            (T.pack $ show er ++ "\rReplacing image with description.")
+                  -- emit alt text
+                  return $ Span ("",["image"],[]) lab
+                _ -> throwError e)
+        handleImage x = return x
+
+-- This requires UndecidableInstances.  We could avoid that
+-- by repeating the definitions below for every monad transformer
+-- we use: ReaderT, WriterT, StateT, RWST.  But this seems to
+-- be harmless.
+instance (MonadTrans t, PandocMonad m, Functor (t m),
+          MonadError PandocError (t m), Monad (t m),
+          Applicative (t m)) => PandocMonad (t m) where
+  lookupEnv = lift . lookupEnv
+  getCurrentTime = lift getCurrentTime
+  getCurrentTimeZone = lift getCurrentTimeZone
+  newStdGen = lift newStdGen
+  newUniqueHash = lift newUniqueHash
+  openURL = lift . openURL
+  readFileLazy = lift . readFileLazy
+  readFileStrict = lift . readFileStrict
+  glob = lift . glob
+  fileExists = lift . fileExists
+  getDataFileName = lift . getDataFileName
+  getModificationTime = lift . getModificationTime
+  getCommonState = lift getCommonState
+  putCommonState = lift . putCommonState
+  logOutput = lift . logOutput
+
+instance {-# OVERLAPS #-} PandocMonad m => PandocMonad (ParsecT s st m) where
+  lookupEnv = lift . lookupEnv
+  getCurrentTime = lift getCurrentTime
+  getCurrentTimeZone = lift getCurrentTimeZone
+  newStdGen = lift newStdGen
+  newUniqueHash = lift newUniqueHash
+  openURL = lift . openURL
+  readFileLazy = lift . readFileLazy
+  readFileStrict = lift . readFileStrict
+  glob = lift . glob
+  fileExists = lift . fileExists
+  getDataFileName = lift . getDataFileName
+  getModificationTime = lift . getModificationTime
+  getCommonState = lift getCommonState
+  putCommonState = lift . putCommonState
+  trace msg = do
+    tracing <- getsCommonState stTrace
+    when tracing $ do
+      pos <- getPosition
+      Debug.Trace.trace
+        ("[trace] Parsed " ++ T.unpack msg ++ " at line " ++
+            show (sourceLine pos) ++
+            if sourceName pos == "chunk"
+               then " of chunk"
+               else "")
+        (return ())
+  logOutput = lift . logOutput