diff --git a/pandoc.cabal b/pandoc.cabal index 43f3adf8b..9b7480f26 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -570,6 +570,8 @@ library Text.Pandoc.App.OutputSettings, Text.Pandoc.Class.CommonState, Text.Pandoc.Class.PandocMonad, + Text.Pandoc.Class.PandocIO, + Text.Pandoc.Class.PandocPure, Text.Pandoc.Filter.JSON, Text.Pandoc.Filter.Lua, Text.Pandoc.Filter.Path, diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs index d2d58f0ed..2f28ac4dd 100644 --- a/src/Text/Pandoc/Class.hs +++ b/src/Text/Pandoc/Class.hs @@ -1,17 +1,6 @@ -{-# LANGUAGE CPP #-} -{-# LANGUAGE PatternGuards #-} -{-# LANGUAGE DeriveFunctor #-} -{-# LANGUAGE MultiParamTypeClasses #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE UndecidableInstances #-} -{-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Class - Copyright : Copyright (C) 2016-17 Jesse Rosenthal, John MacFarlane + Copyright : Copyright (C) 2016-2020 Jesse Rosenthal, John MacFarlane License : GNU GPL, version 2 or above Maintainer : Jesse Rosenthal @@ -25,388 +14,16 @@ whether they want conversions to perform IO operations (such as reading include files or images). -} -module Text.Pandoc.Class ( PandocMonad(..) - , CommonState(..) - , PureState(..) - , getPureState - , getsPureState - , putPureState - , modifyPureState - , getPOSIXTime - , getZonedTime - , readFileFromDirs - , report - , setTrace - , setRequestHeader - , getLog - , setVerbosity - , getVerbosity - , getMediaBag - , setMediaBag - , insertMedia - , setUserDataDir - , getUserDataDir - , fetchItem - , getInputFiles - , setInputFiles - , getOutputFile - , setOutputFile - , setResourcePath - , getResourcePath - , PandocIO(..) - , PandocPure(..) - , FileTree - , FileInfo(..) - , addToFileTree - , insertInFileTree - , runIO - , runIOorExplode - , runPure - , readDefaultDataFile - , readDataFile - , fetchMediaResource - , fillMediaBag - , extractMedia - , toLang - , setTranslations - , translateTerm - , Translations - ) where +module Text.Pandoc.Class + ( module Text.Pandoc.Class.CommonState + , module Text.Pandoc.Class.PandocIO + , module Text.Pandoc.Class.PandocMonad + , module Text.Pandoc.Class.PandocPure + , Translations + ) where -import Codec.Archive.Zip -import Control.Monad.Except -import Control.Monad.State.Strict -import Data.ByteString.Base64 (decodeLenient) -import Data.ByteString.Lazy (toChunks) -import Data.Default -import Data.Time (UTCTime) -import Data.Time.Clock.POSIX ( posixSecondsToUTCTime ) -import Data.Time.LocalTime (TimeZone, utc) -import Data.Unique (hashUnique) -import Data.Word (Word8) -import Network.HTTP.Client - (httpLbs, responseBody, responseHeaders, - Request(port, host, requestHeaders), parseRequest, newManager) -import Network.HTTP.Client.Internal (addProxy) -import Network.HTTP.Client.TLS (tlsManagerSettings) -import Network.HTTP.Types.Header ( hContentType ) -import Network.Socket (withSocketsDo) -import Network.URI ( unEscapeString ) -import Prelude -import System.Directory (createDirectoryIfMissing, getDirectoryContents, - doesDirectoryExist) -import System.Environment (getEnv) -import System.FilePath ((), takeDirectory, normalise) -import System.FilePath.Glob (match, compile) -import System.IO (stderr) -import System.IO.Error -import System.Random (StdGen, next, mkStdGen) import Text.Pandoc.Class.CommonState (CommonState (..)) import Text.Pandoc.Class.PandocMonad -import Text.Pandoc.Definition -import Text.Pandoc.Error -import Text.Pandoc.Logging -import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory) +import Text.Pandoc.Class.PandocIO +import Text.Pandoc.Class.PandocPure import Text.Pandoc.Translations (Translations) -import Text.Pandoc.Walk (walk) -import qualified Control.Exception as E -import qualified Data.ByteString as B -import qualified Data.ByteString.Lazy as BL -import qualified Data.CaseInsensitive as CI -import qualified Data.Map as M -import qualified Data.Text as T -import qualified Data.Time as IO (getCurrentTime) -import qualified Data.Time.LocalTime as IO (getCurrentTimeZone) -import qualified Data.Unique as IO (newUnique) -import qualified System.Directory as Directory -import qualified System.Directory as IO (getModificationTime) -import qualified System.Environment as IO (lookupEnv) -import qualified System.FilePath.Glob as IO (glob) -import qualified System.Random as IO (newStdGen) -import qualified Text.Pandoc.UTF8 as UTF8 -#ifndef EMBED_DATA_FILES -import qualified Paths_pandoc as Paths -#endif - --- | Evaluate a 'PandocIO' operation. -runIO :: PandocIO a -> IO (Either PandocError a) -runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma - --- | Evaluate a 'PandocIO' operation, handling any errors --- by exiting with an appropriate message and error status. -runIOorExplode :: PandocIO a -> IO a -runIOorExplode ma = runIO ma >>= handleError - -newtype PandocIO a = PandocIO { - unPandocIO :: ExceptT PandocError (StateT CommonState IO) a - } deriving ( MonadIO - , Functor - , Applicative - , Monad - , MonadError PandocError - ) - --- | Utility function to lift IO errors into 'PandocError's. -liftIOError :: (String -> IO a) -> String -> PandocIO a -liftIOError f u = do - res <- liftIO $ tryIOError $ f u - case res of - Left e -> throwError $ PandocIOError (T.pack u) e - Right r -> return r - --- | Show potential IO errors to the user continuing execution anyway -logIOError :: IO () -> PandocIO () -logIOError f = do - res <- liftIO $ tryIOError f - case res of - Left e -> report $ IgnoredIOError $ T.pack $ E.displayException e - Right _ -> pure () - -instance PandocMonad PandocIO where - lookupEnv = fmap (fmap T.pack) . liftIO . IO.lookupEnv . T.unpack - getCurrentTime = liftIO IO.getCurrentTime - getCurrentTimeZone = liftIO IO.getCurrentTimeZone - newStdGen = liftIO IO.newStdGen - newUniqueHash = hashUnique <$> liftIO IO.newUnique - - openURL u - | Just u'' <- T.stripPrefix "data:" u = do - let mime = T.takeWhile (/=',') u'' - let contents = UTF8.fromString $ - unEscapeString $ T.unpack $ T.drop 1 $ T.dropWhile (/=',') u'' - return (decodeLenient contents, Just mime) - | otherwise = do - let toReqHeader (n, v) = (CI.mk (UTF8.fromText n), UTF8.fromText v) - customHeaders <- map toReqHeader <$> getsCommonState stRequestHeaders - report $ Fetching u - res <- liftIO $ E.try $ withSocketsDo $ do - let parseReq = parseRequest - proxy <- tryIOError (getEnv "http_proxy") - let addProxy' x = case proxy of - Left _ -> return x - Right pr -> parseReq pr >>= \r -> - return (addProxy (host r) (port r) x) - req <- parseReq (T.unpack u) >>= addProxy' - let req' = req{requestHeaders = customHeaders ++ requestHeaders req} - resp <- newManager tlsManagerSettings >>= httpLbs req' - return (B.concat $ toChunks $ responseBody resp, - UTF8.toText `fmap` lookup hContentType (responseHeaders resp)) - - case res of - Right r -> return r - Left e -> throwError $ PandocHttpError u e - - readFileLazy s = liftIOError BL.readFile s - readFileStrict s = liftIOError B.readFile s - - glob = liftIOError IO.glob - fileExists = liftIOError Directory.doesFileExist -#ifdef EMBED_DATA_FILES - getDataFileName = return -#else - getDataFileName = liftIOError Paths.getDataFileName -#endif - getModificationTime = liftIOError IO.getModificationTime - getCommonState = PandocIO $ lift get - putCommonState x = PandocIO $ lift $ put x - logOutput msg = liftIO $ do - UTF8.hPutStr stderr $ - "[" ++ show (messageVerbosity msg) ++ "] " - alertIndent $ T.lines $ showLogMessage msg - -alertIndent :: [T.Text] -> IO () -alertIndent [] = return () -alertIndent (l:ls) = do - UTF8.hPutStrLn stderr $ T.unpack l - mapM_ go ls - where go l' = do UTF8.hPutStr stderr " " - UTF8.hPutStrLn stderr $ T.unpack l' - --- | Extract media from the mediabag into a directory. -extractMedia :: FilePath -> Pandoc -> PandocIO Pandoc -extractMedia dir d = do - media <- getMediaBag - case [fp | (fp, _, _) <- mediaDirectory media] of - [] -> return d - fps -> do - mapM_ (writeMedia dir media) fps - return $ walk (adjustImagePath dir fps) d - --- Write the contents of a media bag to a path. -writeMedia :: FilePath -> MediaBag -> FilePath -> PandocIO () -writeMedia dir mediabag subpath = do - -- we join and split to convert a/b/c to a\b\c on Windows; - -- in zip containers all paths use / - let fullpath = dir unEscapeString (normalise subpath) - let mbcontents = lookupMedia subpath mediabag - case mbcontents of - Nothing -> throwError $ PandocResourceNotFound $ T.pack subpath - Just (_, bs) -> do - report $ Extracting $ T.pack fullpath - liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath) - logIOError $ BL.writeFile fullpath bs - -adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline -adjustImagePath dir paths (Image attr lab (src, tit)) - | T.unpack src `elem` paths = Image attr lab (T.pack dir <> "/" <> src, tit) -adjustImagePath _ _ x = x - --- | The 'PureState' contains ersatz representations --- of things that would normally be obtained through IO. -data PureState = PureState { stStdGen :: StdGen - , stWord8Store :: [Word8] -- should be - -- infinite, - -- i.e. [1..] - , stUniqStore :: [Int] -- should be - -- infinite and - -- contain every - -- element at most - -- once, e.g. [1..] - , stEnv :: [(T.Text, T.Text)] - , stTime :: UTCTime - , stTimeZone :: TimeZone - , stReferenceDocx :: Archive - , stReferencePptx :: Archive - , stReferenceODT :: Archive - , stFiles :: FileTree - , stUserDataFiles :: FileTree - , stCabalDataFiles :: FileTree - } - -instance Default PureState where - def = PureState { stStdGen = mkStdGen 1848 - , stWord8Store = [1..] - , stUniqStore = [1..] - , stEnv = [("USER", "pandoc-user")] - , stTime = posixSecondsToUTCTime 0 - , stTimeZone = utc - , stReferenceDocx = emptyArchive - , stReferencePptx = emptyArchive - , stReferenceODT = emptyArchive - , stFiles = mempty - , stUserDataFiles = mempty - , stCabalDataFiles = mempty - } - - -getPureState :: PandocPure PureState -getPureState = PandocPure $ lift $ lift get - -getsPureState :: (PureState -> a) -> PandocPure a -getsPureState f = f <$> getPureState - -putPureState :: PureState -> PandocPure () -putPureState ps= PandocPure $ lift $ lift $ put ps - -modifyPureState :: (PureState -> PureState) -> PandocPure () -modifyPureState f = PandocPure $ lift $ lift $ modify f - - -data FileInfo = FileInfo { infoFileMTime :: UTCTime - , infoFileContents :: B.ByteString - } - -newtype FileTree = FileTree {unFileTree :: M.Map FilePath FileInfo} - deriving (Semigroup, Monoid) - -getFileInfo :: FilePath -> FileTree -> Maybe FileInfo -getFileInfo fp tree = - M.lookup (makeCanonical fp) (unFileTree tree) - --- | Add the specified file to the FileTree. If file --- is a directory, add its contents recursively. -addToFileTree :: FileTree -> FilePath -> IO FileTree -addToFileTree tree fp = do - isdir <- doesDirectoryExist fp - if isdir - then do -- recursively add contents of directories - let isSpecial ".." = True - isSpecial "." = True - isSpecial _ = False - fs <- (map (fp ) . filter (not . isSpecial)) <$> getDirectoryContents fp - foldM addToFileTree tree fs - else do - contents <- B.readFile fp - mtime <- IO.getModificationTime fp - return $ insertInFileTree fp FileInfo{ infoFileMTime = mtime - , infoFileContents = contents } tree - --- | Insert an ersatz file into the 'FileTree'. -insertInFileTree :: FilePath -> FileInfo -> FileTree -> FileTree -insertInFileTree fp info (FileTree treemap) = - FileTree $ M.insert (makeCanonical fp) info treemap - -newtype PandocPure a = PandocPure { - unPandocPure :: ExceptT PandocError - (StateT CommonState (State PureState)) a - } deriving ( Functor - , Applicative - , Monad - , MonadError PandocError - ) - --- Run a 'PandocPure' operation. -runPure :: PandocPure a -> Either PandocError a -runPure x = flip evalState def $ - flip evalStateT def $ - runExceptT $ - unPandocPure x - -instance PandocMonad PandocPure where - lookupEnv s = do - env <- getsPureState stEnv - return (lookup s env) - - getCurrentTime = getsPureState stTime - - getCurrentTimeZone = getsPureState stTimeZone - - newStdGen = do - g <- getsPureState stStdGen - let (_, nxtGen) = next g - modifyPureState $ \st -> st { stStdGen = nxtGen } - return g - - newUniqueHash = do - uniqs <- getsPureState stUniqStore - case uniqs of - u : us -> do - modifyPureState $ \st -> st { stUniqStore = us } - return u - _ -> throwError $ PandocShouldNeverHappenError - "uniq store ran out of elements" - openURL u = throwError $ PandocResourceNotFound u - readFileLazy fp = do - fps <- getsPureState stFiles - case infoFileContents <$> getFileInfo fp fps of - Just bs -> return (BL.fromStrict bs) - Nothing -> throwError $ PandocResourceNotFound $ T.pack fp - readFileStrict fp = do - fps <- getsPureState stFiles - case infoFileContents <$> getFileInfo fp fps of - Just bs -> return bs - Nothing -> throwError $ PandocResourceNotFound $ T.pack fp - - glob s = do - FileTree ftmap <- getsPureState stFiles - return $ filter (match (compile s)) $ M.keys ftmap - - fileExists fp = do - fps <- getsPureState stFiles - case getFileInfo fp fps of - Nothing -> return False - Just _ -> return True - - getDataFileName fp = return $ "data/" ++ fp - - getModificationTime fp = do - fps <- getsPureState stFiles - case infoFileMTime <$> getFileInfo fp fps of - Just tm -> return tm - Nothing -> throwError $ PandocIOError (T.pack fp) - (userError "Can't get modification time") - - getCommonState = PandocPure $ lift get - putCommonState x = PandocPure $ lift $ put x - - logOutput _msg = return () diff --git a/src/Text/Pandoc/Class/PandocIO.hs b/src/Text/Pandoc/Class/PandocIO.hs new file mode 100644 index 000000000..1cbfd680e --- /dev/null +++ b/src/Text/Pandoc/Class/PandocIO.hs @@ -0,0 +1,218 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE OverloadedStrings #-} +{- | +Module : Text.Pandoc.Class.PandocIO +Copyright : Copyright (C) 2016-2020 Jesse Rosenthal, John MacFarlane +License : GNU GPL, version 2 or above + +Maintainer : Jesse Rosenthal +Stability : alpha +Portability : portable + +This module defines @'PandocIO'@, an IO-based instance of the +@'PandocMonad'@ type class. File, data, and network access all are run +using IO operators. +-} +module Text.Pandoc.Class.PandocIO + ( getPOSIXTime + , getZonedTime + , readFileFromDirs + , report + , setTrace + , setRequestHeader + , getLog + , setVerbosity + , getVerbosity + , getMediaBag + , setMediaBag + , insertMedia + , setUserDataDir + , getUserDataDir + , fetchItem + , getInputFiles + , setInputFiles + , getOutputFile + , setOutputFile + , setResourcePath + , getResourcePath + , PandocIO(..) + , runIO + , runIOorExplode + , extractMedia + ) where + +import Control.Monad.Except +import Control.Monad.State.Strict +import Data.ByteString.Base64 (decodeLenient) +import Data.ByteString.Lazy (toChunks) +import Data.Default +import Data.Text (Text) +import Data.Unique (hashUnique) +import Network.HTTP.Client + (httpLbs, responseBody, responseHeaders, + Request(port, host, requestHeaders), parseRequest, newManager) +import Network.HTTP.Client.Internal (addProxy) +import Network.HTTP.Client.TLS (tlsManagerSettings) +import Network.HTTP.Types.Header ( hContentType ) +import Network.Socket (withSocketsDo) +import Network.URI ( unEscapeString ) +import Prelude +import System.Directory (createDirectoryIfMissing) +import System.Environment (getEnv) +import System.FilePath ((), takeDirectory, normalise) +import System.IO (stderr) +import System.IO.Error +import Text.Pandoc.Class.CommonState (CommonState (..)) +import Text.Pandoc.Class.PandocMonad +import Text.Pandoc.Definition +import Text.Pandoc.Error +import Text.Pandoc.Logging +import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory) +import Text.Pandoc.Walk (walk) +import qualified Control.Exception as E +import qualified Data.ByteString as B +import qualified Data.ByteString.Lazy as BL +import qualified Data.CaseInsensitive as CI +import qualified Data.Text as T +import qualified Data.Time as IO (getCurrentTime) +import qualified Data.Time.LocalTime as IO (getCurrentTimeZone) +import qualified Data.Unique as IO (newUnique) +import qualified System.Directory as Directory +import qualified System.Directory as IO (getModificationTime) +import qualified System.Environment as IO (lookupEnv) +import qualified System.FilePath.Glob as IO (glob) +import qualified System.Random as IO (newStdGen) +import qualified Text.Pandoc.UTF8 as UTF8 +#ifndef EMBED_DATA_FILES +import qualified Paths_pandoc as Paths +#endif + +-- | Evaluate a 'PandocIO' operation. +runIO :: PandocIO a -> IO (Either PandocError a) +runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma + +-- | Evaluate a 'PandocIO' operation, handling any errors +-- by exiting with an appropriate message and error status. +runIOorExplode :: PandocIO a -> IO a +runIOorExplode ma = runIO ma >>= handleError + +newtype PandocIO a = PandocIO { + unPandocIO :: ExceptT PandocError (StateT CommonState IO) a + } deriving ( MonadIO + , Functor + , Applicative + , Monad + , MonadError PandocError + ) + +-- | Utility function to lift IO errors into 'PandocError's. +liftIOError :: (String -> IO a) -> String -> PandocIO a +liftIOError f u = do + res <- liftIO $ tryIOError $ f u + case res of + Left e -> throwError $ PandocIOError (T.pack u) e + Right r -> return r + +-- | Show potential IO errors to the user continuing execution anyway +logIOError :: IO () -> PandocIO () +logIOError f = do + res <- liftIO $ tryIOError f + case res of + Left e -> report $ IgnoredIOError $ T.pack $ E.displayException e + Right _ -> pure () + +instance PandocMonad PandocIO where + lookupEnv = fmap (fmap T.pack) . liftIO . IO.lookupEnv . T.unpack + getCurrentTime = liftIO IO.getCurrentTime + getCurrentTimeZone = liftIO IO.getCurrentTimeZone + newStdGen = liftIO IO.newStdGen + newUniqueHash = hashUnique <$> liftIO IO.newUnique + + openURL u + | Just u'' <- T.stripPrefix "data:" u = do + let mime = T.takeWhile (/=',') u'' + let contents = UTF8.fromString $ + unEscapeString $ T.unpack $ T.drop 1 $ T.dropWhile (/=',') u'' + return (decodeLenient contents, Just mime) + | otherwise = do + let toReqHeader (n, v) = (CI.mk (UTF8.fromText n), UTF8.fromText v) + customHeaders <- map toReqHeader <$> getsCommonState stRequestHeaders + report $ Fetching u + res <- liftIO $ E.try $ withSocketsDo $ do + let parseReq = parseRequest + proxy <- tryIOError (getEnv "http_proxy") + let addProxy' x = case proxy of + Left _ -> return x + Right pr -> parseReq pr >>= \r -> + return (addProxy (host r) (port r) x) + req <- parseReq (T.unpack u) >>= addProxy' + let req' = req{requestHeaders = customHeaders ++ requestHeaders req} + resp <- newManager tlsManagerSettings >>= httpLbs req' + return (B.concat $ toChunks $ responseBody resp, + UTF8.toText `fmap` lookup hContentType (responseHeaders resp)) + + case res of + Right r -> return r + Left e -> throwError $ PandocHttpError u e + + readFileLazy s = liftIOError BL.readFile s + readFileStrict s = liftIOError B.readFile s + + glob = liftIOError IO.glob + fileExists = liftIOError Directory.doesFileExist +#ifdef EMBED_DATA_FILES + getDataFileName = return +#else + getDataFileName = liftIOError Paths.getDataFileName +#endif + getModificationTime = liftIOError IO.getModificationTime + getCommonState = PandocIO $ lift get + putCommonState x = PandocIO $ lift $ put x + logOutput msg = liftIO $ do + UTF8.hPutStr stderr $ + "[" ++ show (messageVerbosity msg) ++ "] " + alertIndent $ T.lines $ showLogMessage msg + +-- | Prints the list of lines to @stderr@, indenting every but the first +-- line by two spaces. +alertIndent :: [Text] -> IO () +alertIndent [] = return () +alertIndent (l:ls) = do + UTF8.hPutStrLn stderr $ T.unpack l + mapM_ go ls + where go l' = do UTF8.hPutStr stderr " " + UTF8.hPutStrLn stderr $ T.unpack l' + +-- | Extract media from the mediabag into a directory. +extractMedia :: FilePath -> Pandoc -> PandocIO Pandoc +extractMedia dir d = do + media <- getMediaBag + case [fp | (fp, _, _) <- mediaDirectory media] of + [] -> return d + fps -> do + mapM_ (writeMedia dir media) fps + return $ walk (adjustImagePath dir fps) d + +-- | Write the contents of a media bag to a path. +writeMedia :: FilePath -> MediaBag -> FilePath -> PandocIO () +writeMedia dir mediabag subpath = do + -- we join and split to convert a/b/c to a\b\c on Windows; + -- in zip containers all paths use / + let fullpath = dir unEscapeString (normalise subpath) + let mbcontents = lookupMedia subpath mediabag + case mbcontents of + Nothing -> throwError $ PandocResourceNotFound $ T.pack subpath + Just (_, bs) -> do + report $ Extracting $ T.pack fullpath + liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath) + logIOError $ BL.writeFile fullpath bs + +-- | If the given Inline element is an image with a @src@ path equal to +-- one in the list of @paths@, then prepends @dir@ to the image source; +-- returns the element unchanged otherwise. +adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline +adjustImagePath dir paths (Image attr lab (src, tit)) + | T.unpack src `elem` paths = Image attr lab (T.pack dir <> "/" <> src, tit) +adjustImagePath _ _ x = x diff --git a/src/Text/Pandoc/Class/PandocPure.hs b/src/Text/Pandoc/Class/PandocPure.hs new file mode 100644 index 000000000..010ead44f --- /dev/null +++ b/src/Text/Pandoc/Class/PandocPure.hs @@ -0,0 +1,218 @@ +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE OverloadedStrings #-} +{- | +Module : Text.Pandoc.Class.Pure +Copyright : Copyright (C) 2016-2020 Jesse Rosenthal, John MacFarlane +License : GNU GPL, version 2 or above + +Maintainer : Jesse Rosenthal +Stability : alpha +Portability : portable + +This module defines a pure instance 'PandocPure' of the @'PandocMonad'@ +typeclass. This instance is useful for testing, or when all IO access is +prohibited for security reasons. +-} +module Text.Pandoc.Class.PandocPure + ( PureState(..) + , getPureState + , getsPureState + , putPureState + , modifyPureState + , PandocPure(..) + , FileTree + , FileInfo(..) + , addToFileTree + , insertInFileTree + , runPure + ) where + +import Codec.Archive.Zip +import Control.Monad.Except +import Control.Monad.State.Strict +import Data.Default +import Data.Text (Text) +import Data.Time (UTCTime) +import Data.Time.Clock.POSIX ( posixSecondsToUTCTime ) +import Data.Time.LocalTime (TimeZone, utc) +import Data.Word (Word8) +import System.Directory (doesDirectoryExist, getDirectoryContents) +import System.FilePath (()) +import System.FilePath.Glob (match, compile) +import System.Random (StdGen, next, mkStdGen) +import Text.Pandoc.Class.CommonState (CommonState (..)) +import Text.Pandoc.Class.PandocMonad +import Text.Pandoc.Error +import qualified Data.ByteString as B +import qualified Data.ByteString.Lazy as BL +import qualified Data.Map as M +import qualified Data.Text as T +import qualified System.Directory as Directory (getModificationTime) + +-- | The 'PureState' contains ersatz representations +-- of things that would normally be obtained through IO. +data PureState = PureState + { stStdGen :: StdGen + , stWord8Store :: [Word8] -- ^ should be infinite, i.e. [1..] + , stUniqStore :: [Int] -- ^ should be infinite and contain every + -- element at most once, e.g. [1..] + , stEnv :: [(Text, Text)] + , stTime :: UTCTime + , stTimeZone :: TimeZone + , stReferenceDocx :: Archive + , stReferencePptx :: Archive + , stReferenceODT :: Archive + , stFiles :: FileTree + , stUserDataFiles :: FileTree + , stCabalDataFiles :: FileTree + } + +instance Default PureState where + def = PureState + { stStdGen = mkStdGen 1848 + , stWord8Store = [1..] + , stUniqStore = [1..] + , stEnv = [("USER", "pandoc-user")] + , stTime = posixSecondsToUTCTime 0 + , stTimeZone = utc + , stReferenceDocx = emptyArchive + , stReferencePptx = emptyArchive + , stReferenceODT = emptyArchive + , stFiles = mempty + , stUserDataFiles = mempty + , stCabalDataFiles = mempty + } + + +-- | Retrieve the underlying state of the @'PandocPure'@ type. +getPureState :: PandocPure PureState +getPureState = PandocPure $ lift $ lift get + +-- | Retrieve a value from the underlying state of the @'PandocPure'@ +-- type. +getsPureState :: (PureState -> a) -> PandocPure a +getsPureState f = f <$> getPureState + +-- | Set a new state for the @'PandocPure'@ type. +putPureState :: PureState -> PandocPure () +putPureState ps= PandocPure $ lift $ lift $ put ps + +-- | Modify the underlying state of the @'PandocPure'@ type. +modifyPureState :: (PureState -> PureState) -> PandocPure () +modifyPureState f = PandocPure $ lift $ lift $ modify f + +-- | Captures all file-level information necessary for a @'PandocMonad'@ +-- conforming mock file system. +data FileInfo = FileInfo + { infoFileMTime :: UTCTime + , infoFileContents :: B.ByteString + } + +-- | Basis of the mock file system used by @'PandocPure'@. +newtype FileTree = FileTree { unFileTree :: M.Map FilePath FileInfo } + deriving (Semigroup, Monoid) + +-- | Retrieve @'FileInfo'@ of the given @'FilePath'@ from a +-- @'FileTree'@. +getFileInfo :: FilePath -> FileTree -> Maybe FileInfo +getFileInfo fp tree = + M.lookup (makeCanonical fp) (unFileTree tree) + +-- | Add the specified file to the FileTree. If file +-- is a directory, add its contents recursively. +addToFileTree :: FileTree -> FilePath -> IO FileTree +addToFileTree tree fp = do + isdir <- doesDirectoryExist fp + if isdir + then do -- recursively add contents of directories + let isSpecial ".." = True + isSpecial "." = True + isSpecial _ = False + fs <- map (fp ) . filter (not . isSpecial) <$> getDirectoryContents fp + foldM addToFileTree tree fs + else do + contents <- B.readFile fp + mtime <- Directory.getModificationTime fp + return $ insertInFileTree fp FileInfo{ infoFileMTime = mtime + , infoFileContents = contents } tree + +-- | Insert an ersatz file into the 'FileTree'. +insertInFileTree :: FilePath -> FileInfo -> FileTree -> FileTree +insertInFileTree fp info (FileTree treemap) = + FileTree $ M.insert (makeCanonical fp) info treemap + +newtype PandocPure a = PandocPure { + unPandocPure :: ExceptT PandocError + (StateT CommonState (State PureState)) a + } deriving ( Functor + , Applicative + , Monad + , MonadError PandocError + ) + +-- | Run a 'PandocPure' operation. +runPure :: PandocPure a -> Either PandocError a +runPure x = flip evalState def $ + flip evalStateT def $ + runExceptT $ + unPandocPure x + +instance PandocMonad PandocPure where + lookupEnv s = do + env <- getsPureState stEnv + return (lookup s env) + + getCurrentTime = getsPureState stTime + + getCurrentTimeZone = getsPureState stTimeZone + + newStdGen = do + g <- getsPureState stStdGen + let (_, nxtGen) = next g + modifyPureState $ \st -> st { stStdGen = nxtGen } + return g + + newUniqueHash = do + uniqs <- getsPureState stUniqStore + case uniqs of + u : us -> do + modifyPureState $ \st -> st { stUniqStore = us } + return u + _ -> throwError $ PandocShouldNeverHappenError + "uniq store ran out of elements" + openURL u = throwError $ PandocResourceNotFound u + readFileLazy fp = do + fps <- getsPureState stFiles + case infoFileContents <$> getFileInfo fp fps of + Just bs -> return (BL.fromStrict bs) + Nothing -> throwError $ PandocResourceNotFound $ T.pack fp + readFileStrict fp = do + fps <- getsPureState stFiles + case infoFileContents <$> getFileInfo fp fps of + Just bs -> return bs + Nothing -> throwError $ PandocResourceNotFound $ T.pack fp + + glob s = do + FileTree ftmap <- getsPureState stFiles + return $ filter (match (compile s)) $ M.keys ftmap + + fileExists fp = do + fps <- getsPureState stFiles + case getFileInfo fp fps of + Nothing -> return False + Just _ -> return True + + getDataFileName fp = return $ "data/" ++ fp + + getModificationTime fp = do + fps <- getsPureState stFiles + case infoFileMTime <$> getFileInfo fp fps of + Just tm -> return tm + Nothing -> throwError $ PandocIOError (T.pack fp) + (userError "Can't get modification time") + + getCommonState = PandocPure $ lift get + putCommonState x = PandocPure $ lift $ put x + + logOutput _msg = return ()