Text.Pandoc.Class: extract submodules PandocIO, PandocPure
This commit is contained in:
parent
66375f3fc4
commit
2a042ff711
4 changed files with 448 additions and 393 deletions
|
@ -570,6 +570,8 @@ library
|
||||||
Text.Pandoc.App.OutputSettings,
|
Text.Pandoc.App.OutputSettings,
|
||||||
Text.Pandoc.Class.CommonState,
|
Text.Pandoc.Class.CommonState,
|
||||||
Text.Pandoc.Class.PandocMonad,
|
Text.Pandoc.Class.PandocMonad,
|
||||||
|
Text.Pandoc.Class.PandocIO,
|
||||||
|
Text.Pandoc.Class.PandocPure,
|
||||||
Text.Pandoc.Filter.JSON,
|
Text.Pandoc.Filter.JSON,
|
||||||
Text.Pandoc.Filter.Lua,
|
Text.Pandoc.Filter.Lua,
|
||||||
Text.Pandoc.Filter.Path,
|
Text.Pandoc.Filter.Path,
|
||||||
|
|
|
@ -1,17 +1,6 @@
|
||||||
{-# LANGUAGE CPP #-}
|
|
||||||
{-# LANGUAGE PatternGuards #-}
|
|
||||||
{-# LANGUAGE DeriveFunctor #-}
|
|
||||||
{-# LANGUAGE MultiParamTypeClasses #-}
|
|
||||||
{-# LANGUAGE FlexibleInstances #-}
|
|
||||||
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
|
||||||
{-# LANGUAGE FlexibleContexts #-}
|
|
||||||
{-# LANGUAGE UndecidableInstances #-}
|
|
||||||
{-# LANGUAGE ScopedTypeVariables #-}
|
|
||||||
{-# LANGUAGE OverloadedStrings #-}
|
|
||||||
{-# LANGUAGE ViewPatterns #-}
|
|
||||||
{- |
|
{- |
|
||||||
Module : Text.Pandoc.Class
|
Module : Text.Pandoc.Class
|
||||||
Copyright : Copyright (C) 2016-17 Jesse Rosenthal, John MacFarlane
|
Copyright : Copyright (C) 2016-2020 Jesse Rosenthal, John MacFarlane
|
||||||
License : GNU GPL, version 2 or above
|
License : GNU GPL, version 2 or above
|
||||||
|
|
||||||
Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
|
Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
|
||||||
|
@ -25,388 +14,16 @@ whether they want conversions to perform IO operations (such as
|
||||||
reading include files or images).
|
reading include files or images).
|
||||||
-}
|
-}
|
||||||
|
|
||||||
module Text.Pandoc.Class ( PandocMonad(..)
|
module Text.Pandoc.Class
|
||||||
, CommonState(..)
|
( module Text.Pandoc.Class.CommonState
|
||||||
, PureState(..)
|
, module Text.Pandoc.Class.PandocIO
|
||||||
, getPureState
|
, module Text.Pandoc.Class.PandocMonad
|
||||||
, getsPureState
|
, module Text.Pandoc.Class.PandocPure
|
||||||
, putPureState
|
, Translations
|
||||||
, modifyPureState
|
) where
|
||||||
, getPOSIXTime
|
|
||||||
, getZonedTime
|
|
||||||
, readFileFromDirs
|
|
||||||
, report
|
|
||||||
, setTrace
|
|
||||||
, setRequestHeader
|
|
||||||
, getLog
|
|
||||||
, setVerbosity
|
|
||||||
, getVerbosity
|
|
||||||
, getMediaBag
|
|
||||||
, setMediaBag
|
|
||||||
, insertMedia
|
|
||||||
, setUserDataDir
|
|
||||||
, getUserDataDir
|
|
||||||
, fetchItem
|
|
||||||
, getInputFiles
|
|
||||||
, setInputFiles
|
|
||||||
, getOutputFile
|
|
||||||
, setOutputFile
|
|
||||||
, setResourcePath
|
|
||||||
, getResourcePath
|
|
||||||
, PandocIO(..)
|
|
||||||
, PandocPure(..)
|
|
||||||
, FileTree
|
|
||||||
, FileInfo(..)
|
|
||||||
, addToFileTree
|
|
||||||
, insertInFileTree
|
|
||||||
, runIO
|
|
||||||
, runIOorExplode
|
|
||||||
, runPure
|
|
||||||
, readDefaultDataFile
|
|
||||||
, readDataFile
|
|
||||||
, fetchMediaResource
|
|
||||||
, fillMediaBag
|
|
||||||
, extractMedia
|
|
||||||
, toLang
|
|
||||||
, setTranslations
|
|
||||||
, translateTerm
|
|
||||||
, Translations
|
|
||||||
) where
|
|
||||||
|
|
||||||
import Codec.Archive.Zip
|
|
||||||
import Control.Monad.Except
|
|
||||||
import Control.Monad.State.Strict
|
|
||||||
import Data.ByteString.Base64 (decodeLenient)
|
|
||||||
import Data.ByteString.Lazy (toChunks)
|
|
||||||
import Data.Default
|
|
||||||
import Data.Time (UTCTime)
|
|
||||||
import Data.Time.Clock.POSIX ( posixSecondsToUTCTime )
|
|
||||||
import Data.Time.LocalTime (TimeZone, utc)
|
|
||||||
import Data.Unique (hashUnique)
|
|
||||||
import Data.Word (Word8)
|
|
||||||
import Network.HTTP.Client
|
|
||||||
(httpLbs, responseBody, responseHeaders,
|
|
||||||
Request(port, host, requestHeaders), parseRequest, newManager)
|
|
||||||
import Network.HTTP.Client.Internal (addProxy)
|
|
||||||
import Network.HTTP.Client.TLS (tlsManagerSettings)
|
|
||||||
import Network.HTTP.Types.Header ( hContentType )
|
|
||||||
import Network.Socket (withSocketsDo)
|
|
||||||
import Network.URI ( unEscapeString )
|
|
||||||
import Prelude
|
|
||||||
import System.Directory (createDirectoryIfMissing, getDirectoryContents,
|
|
||||||
doesDirectoryExist)
|
|
||||||
import System.Environment (getEnv)
|
|
||||||
import System.FilePath ((</>), takeDirectory, normalise)
|
|
||||||
import System.FilePath.Glob (match, compile)
|
|
||||||
import System.IO (stderr)
|
|
||||||
import System.IO.Error
|
|
||||||
import System.Random (StdGen, next, mkStdGen)
|
|
||||||
import Text.Pandoc.Class.CommonState (CommonState (..))
|
import Text.Pandoc.Class.CommonState (CommonState (..))
|
||||||
import Text.Pandoc.Class.PandocMonad
|
import Text.Pandoc.Class.PandocMonad
|
||||||
import Text.Pandoc.Definition
|
import Text.Pandoc.Class.PandocIO
|
||||||
import Text.Pandoc.Error
|
import Text.Pandoc.Class.PandocPure
|
||||||
import Text.Pandoc.Logging
|
|
||||||
import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory)
|
|
||||||
import Text.Pandoc.Translations (Translations)
|
import Text.Pandoc.Translations (Translations)
|
||||||
import Text.Pandoc.Walk (walk)
|
|
||||||
import qualified Control.Exception as E
|
|
||||||
import qualified Data.ByteString as B
|
|
||||||
import qualified Data.ByteString.Lazy as BL
|
|
||||||
import qualified Data.CaseInsensitive as CI
|
|
||||||
import qualified Data.Map as M
|
|
||||||
import qualified Data.Text as T
|
|
||||||
import qualified Data.Time as IO (getCurrentTime)
|
|
||||||
import qualified Data.Time.LocalTime as IO (getCurrentTimeZone)
|
|
||||||
import qualified Data.Unique as IO (newUnique)
|
|
||||||
import qualified System.Directory as Directory
|
|
||||||
import qualified System.Directory as IO (getModificationTime)
|
|
||||||
import qualified System.Environment as IO (lookupEnv)
|
|
||||||
import qualified System.FilePath.Glob as IO (glob)
|
|
||||||
import qualified System.Random as IO (newStdGen)
|
|
||||||
import qualified Text.Pandoc.UTF8 as UTF8
|
|
||||||
#ifndef EMBED_DATA_FILES
|
|
||||||
import qualified Paths_pandoc as Paths
|
|
||||||
#endif
|
|
||||||
|
|
||||||
-- | Evaluate a 'PandocIO' operation.
|
|
||||||
runIO :: PandocIO a -> IO (Either PandocError a)
|
|
||||||
runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma
|
|
||||||
|
|
||||||
-- | Evaluate a 'PandocIO' operation, handling any errors
|
|
||||||
-- by exiting with an appropriate message and error status.
|
|
||||||
runIOorExplode :: PandocIO a -> IO a
|
|
||||||
runIOorExplode ma = runIO ma >>= handleError
|
|
||||||
|
|
||||||
newtype PandocIO a = PandocIO {
|
|
||||||
unPandocIO :: ExceptT PandocError (StateT CommonState IO) a
|
|
||||||
} deriving ( MonadIO
|
|
||||||
, Functor
|
|
||||||
, Applicative
|
|
||||||
, Monad
|
|
||||||
, MonadError PandocError
|
|
||||||
)
|
|
||||||
|
|
||||||
-- | Utility function to lift IO errors into 'PandocError's.
|
|
||||||
liftIOError :: (String -> IO a) -> String -> PandocIO a
|
|
||||||
liftIOError f u = do
|
|
||||||
res <- liftIO $ tryIOError $ f u
|
|
||||||
case res of
|
|
||||||
Left e -> throwError $ PandocIOError (T.pack u) e
|
|
||||||
Right r -> return r
|
|
||||||
|
|
||||||
-- | Show potential IO errors to the user continuing execution anyway
|
|
||||||
logIOError :: IO () -> PandocIO ()
|
|
||||||
logIOError f = do
|
|
||||||
res <- liftIO $ tryIOError f
|
|
||||||
case res of
|
|
||||||
Left e -> report $ IgnoredIOError $ T.pack $ E.displayException e
|
|
||||||
Right _ -> pure ()
|
|
||||||
|
|
||||||
instance PandocMonad PandocIO where
|
|
||||||
lookupEnv = fmap (fmap T.pack) . liftIO . IO.lookupEnv . T.unpack
|
|
||||||
getCurrentTime = liftIO IO.getCurrentTime
|
|
||||||
getCurrentTimeZone = liftIO IO.getCurrentTimeZone
|
|
||||||
newStdGen = liftIO IO.newStdGen
|
|
||||||
newUniqueHash = hashUnique <$> liftIO IO.newUnique
|
|
||||||
|
|
||||||
openURL u
|
|
||||||
| Just u'' <- T.stripPrefix "data:" u = do
|
|
||||||
let mime = T.takeWhile (/=',') u''
|
|
||||||
let contents = UTF8.fromString $
|
|
||||||
unEscapeString $ T.unpack $ T.drop 1 $ T.dropWhile (/=',') u''
|
|
||||||
return (decodeLenient contents, Just mime)
|
|
||||||
| otherwise = do
|
|
||||||
let toReqHeader (n, v) = (CI.mk (UTF8.fromText n), UTF8.fromText v)
|
|
||||||
customHeaders <- map toReqHeader <$> getsCommonState stRequestHeaders
|
|
||||||
report $ Fetching u
|
|
||||||
res <- liftIO $ E.try $ withSocketsDo $ do
|
|
||||||
let parseReq = parseRequest
|
|
||||||
proxy <- tryIOError (getEnv "http_proxy")
|
|
||||||
let addProxy' x = case proxy of
|
|
||||||
Left _ -> return x
|
|
||||||
Right pr -> parseReq pr >>= \r ->
|
|
||||||
return (addProxy (host r) (port r) x)
|
|
||||||
req <- parseReq (T.unpack u) >>= addProxy'
|
|
||||||
let req' = req{requestHeaders = customHeaders ++ requestHeaders req}
|
|
||||||
resp <- newManager tlsManagerSettings >>= httpLbs req'
|
|
||||||
return (B.concat $ toChunks $ responseBody resp,
|
|
||||||
UTF8.toText `fmap` lookup hContentType (responseHeaders resp))
|
|
||||||
|
|
||||||
case res of
|
|
||||||
Right r -> return r
|
|
||||||
Left e -> throwError $ PandocHttpError u e
|
|
||||||
|
|
||||||
readFileLazy s = liftIOError BL.readFile s
|
|
||||||
readFileStrict s = liftIOError B.readFile s
|
|
||||||
|
|
||||||
glob = liftIOError IO.glob
|
|
||||||
fileExists = liftIOError Directory.doesFileExist
|
|
||||||
#ifdef EMBED_DATA_FILES
|
|
||||||
getDataFileName = return
|
|
||||||
#else
|
|
||||||
getDataFileName = liftIOError Paths.getDataFileName
|
|
||||||
#endif
|
|
||||||
getModificationTime = liftIOError IO.getModificationTime
|
|
||||||
getCommonState = PandocIO $ lift get
|
|
||||||
putCommonState x = PandocIO $ lift $ put x
|
|
||||||
logOutput msg = liftIO $ do
|
|
||||||
UTF8.hPutStr stderr $
|
|
||||||
"[" ++ show (messageVerbosity msg) ++ "] "
|
|
||||||
alertIndent $ T.lines $ showLogMessage msg
|
|
||||||
|
|
||||||
alertIndent :: [T.Text] -> IO ()
|
|
||||||
alertIndent [] = return ()
|
|
||||||
alertIndent (l:ls) = do
|
|
||||||
UTF8.hPutStrLn stderr $ T.unpack l
|
|
||||||
mapM_ go ls
|
|
||||||
where go l' = do UTF8.hPutStr stderr " "
|
|
||||||
UTF8.hPutStrLn stderr $ T.unpack l'
|
|
||||||
|
|
||||||
-- | Extract media from the mediabag into a directory.
|
|
||||||
extractMedia :: FilePath -> Pandoc -> PandocIO Pandoc
|
|
||||||
extractMedia dir d = do
|
|
||||||
media <- getMediaBag
|
|
||||||
case [fp | (fp, _, _) <- mediaDirectory media] of
|
|
||||||
[] -> return d
|
|
||||||
fps -> do
|
|
||||||
mapM_ (writeMedia dir media) fps
|
|
||||||
return $ walk (adjustImagePath dir fps) d
|
|
||||||
|
|
||||||
-- Write the contents of a media bag to a path.
|
|
||||||
writeMedia :: FilePath -> MediaBag -> FilePath -> PandocIO ()
|
|
||||||
writeMedia dir mediabag subpath = do
|
|
||||||
-- we join and split to convert a/b/c to a\b\c on Windows;
|
|
||||||
-- in zip containers all paths use /
|
|
||||||
let fullpath = dir </> unEscapeString (normalise subpath)
|
|
||||||
let mbcontents = lookupMedia subpath mediabag
|
|
||||||
case mbcontents of
|
|
||||||
Nothing -> throwError $ PandocResourceNotFound $ T.pack subpath
|
|
||||||
Just (_, bs) -> do
|
|
||||||
report $ Extracting $ T.pack fullpath
|
|
||||||
liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
|
|
||||||
logIOError $ BL.writeFile fullpath bs
|
|
||||||
|
|
||||||
adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
|
|
||||||
adjustImagePath dir paths (Image attr lab (src, tit))
|
|
||||||
| T.unpack src `elem` paths = Image attr lab (T.pack dir <> "/" <> src, tit)
|
|
||||||
adjustImagePath _ _ x = x
|
|
||||||
|
|
||||||
-- | The 'PureState' contains ersatz representations
|
|
||||||
-- of things that would normally be obtained through IO.
|
|
||||||
data PureState = PureState { stStdGen :: StdGen
|
|
||||||
, stWord8Store :: [Word8] -- should be
|
|
||||||
-- infinite,
|
|
||||||
-- i.e. [1..]
|
|
||||||
, stUniqStore :: [Int] -- should be
|
|
||||||
-- infinite and
|
|
||||||
-- contain every
|
|
||||||
-- element at most
|
|
||||||
-- once, e.g. [1..]
|
|
||||||
, stEnv :: [(T.Text, T.Text)]
|
|
||||||
, stTime :: UTCTime
|
|
||||||
, stTimeZone :: TimeZone
|
|
||||||
, stReferenceDocx :: Archive
|
|
||||||
, stReferencePptx :: Archive
|
|
||||||
, stReferenceODT :: Archive
|
|
||||||
, stFiles :: FileTree
|
|
||||||
, stUserDataFiles :: FileTree
|
|
||||||
, stCabalDataFiles :: FileTree
|
|
||||||
}
|
|
||||||
|
|
||||||
instance Default PureState where
|
|
||||||
def = PureState { stStdGen = mkStdGen 1848
|
|
||||||
, stWord8Store = [1..]
|
|
||||||
, stUniqStore = [1..]
|
|
||||||
, stEnv = [("USER", "pandoc-user")]
|
|
||||||
, stTime = posixSecondsToUTCTime 0
|
|
||||||
, stTimeZone = utc
|
|
||||||
, stReferenceDocx = emptyArchive
|
|
||||||
, stReferencePptx = emptyArchive
|
|
||||||
, stReferenceODT = emptyArchive
|
|
||||||
, stFiles = mempty
|
|
||||||
, stUserDataFiles = mempty
|
|
||||||
, stCabalDataFiles = mempty
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
getPureState :: PandocPure PureState
|
|
||||||
getPureState = PandocPure $ lift $ lift get
|
|
||||||
|
|
||||||
getsPureState :: (PureState -> a) -> PandocPure a
|
|
||||||
getsPureState f = f <$> getPureState
|
|
||||||
|
|
||||||
putPureState :: PureState -> PandocPure ()
|
|
||||||
putPureState ps= PandocPure $ lift $ lift $ put ps
|
|
||||||
|
|
||||||
modifyPureState :: (PureState -> PureState) -> PandocPure ()
|
|
||||||
modifyPureState f = PandocPure $ lift $ lift $ modify f
|
|
||||||
|
|
||||||
|
|
||||||
data FileInfo = FileInfo { infoFileMTime :: UTCTime
|
|
||||||
, infoFileContents :: B.ByteString
|
|
||||||
}
|
|
||||||
|
|
||||||
newtype FileTree = FileTree {unFileTree :: M.Map FilePath FileInfo}
|
|
||||||
deriving (Semigroup, Monoid)
|
|
||||||
|
|
||||||
getFileInfo :: FilePath -> FileTree -> Maybe FileInfo
|
|
||||||
getFileInfo fp tree =
|
|
||||||
M.lookup (makeCanonical fp) (unFileTree tree)
|
|
||||||
|
|
||||||
-- | Add the specified file to the FileTree. If file
|
|
||||||
-- is a directory, add its contents recursively.
|
|
||||||
addToFileTree :: FileTree -> FilePath -> IO FileTree
|
|
||||||
addToFileTree tree fp = do
|
|
||||||
isdir <- doesDirectoryExist fp
|
|
||||||
if isdir
|
|
||||||
then do -- recursively add contents of directories
|
|
||||||
let isSpecial ".." = True
|
|
||||||
isSpecial "." = True
|
|
||||||
isSpecial _ = False
|
|
||||||
fs <- (map (fp </>) . filter (not . isSpecial)) <$> getDirectoryContents fp
|
|
||||||
foldM addToFileTree tree fs
|
|
||||||
else do
|
|
||||||
contents <- B.readFile fp
|
|
||||||
mtime <- IO.getModificationTime fp
|
|
||||||
return $ insertInFileTree fp FileInfo{ infoFileMTime = mtime
|
|
||||||
, infoFileContents = contents } tree
|
|
||||||
|
|
||||||
-- | Insert an ersatz file into the 'FileTree'.
|
|
||||||
insertInFileTree :: FilePath -> FileInfo -> FileTree -> FileTree
|
|
||||||
insertInFileTree fp info (FileTree treemap) =
|
|
||||||
FileTree $ M.insert (makeCanonical fp) info treemap
|
|
||||||
|
|
||||||
newtype PandocPure a = PandocPure {
|
|
||||||
unPandocPure :: ExceptT PandocError
|
|
||||||
(StateT CommonState (State PureState)) a
|
|
||||||
} deriving ( Functor
|
|
||||||
, Applicative
|
|
||||||
, Monad
|
|
||||||
, MonadError PandocError
|
|
||||||
)
|
|
||||||
|
|
||||||
-- Run a 'PandocPure' operation.
|
|
||||||
runPure :: PandocPure a -> Either PandocError a
|
|
||||||
runPure x = flip evalState def $
|
|
||||||
flip evalStateT def $
|
|
||||||
runExceptT $
|
|
||||||
unPandocPure x
|
|
||||||
|
|
||||||
instance PandocMonad PandocPure where
|
|
||||||
lookupEnv s = do
|
|
||||||
env <- getsPureState stEnv
|
|
||||||
return (lookup s env)
|
|
||||||
|
|
||||||
getCurrentTime = getsPureState stTime
|
|
||||||
|
|
||||||
getCurrentTimeZone = getsPureState stTimeZone
|
|
||||||
|
|
||||||
newStdGen = do
|
|
||||||
g <- getsPureState stStdGen
|
|
||||||
let (_, nxtGen) = next g
|
|
||||||
modifyPureState $ \st -> st { stStdGen = nxtGen }
|
|
||||||
return g
|
|
||||||
|
|
||||||
newUniqueHash = do
|
|
||||||
uniqs <- getsPureState stUniqStore
|
|
||||||
case uniqs of
|
|
||||||
u : us -> do
|
|
||||||
modifyPureState $ \st -> st { stUniqStore = us }
|
|
||||||
return u
|
|
||||||
_ -> throwError $ PandocShouldNeverHappenError
|
|
||||||
"uniq store ran out of elements"
|
|
||||||
openURL u = throwError $ PandocResourceNotFound u
|
|
||||||
readFileLazy fp = do
|
|
||||||
fps <- getsPureState stFiles
|
|
||||||
case infoFileContents <$> getFileInfo fp fps of
|
|
||||||
Just bs -> return (BL.fromStrict bs)
|
|
||||||
Nothing -> throwError $ PandocResourceNotFound $ T.pack fp
|
|
||||||
readFileStrict fp = do
|
|
||||||
fps <- getsPureState stFiles
|
|
||||||
case infoFileContents <$> getFileInfo fp fps of
|
|
||||||
Just bs -> return bs
|
|
||||||
Nothing -> throwError $ PandocResourceNotFound $ T.pack fp
|
|
||||||
|
|
||||||
glob s = do
|
|
||||||
FileTree ftmap <- getsPureState stFiles
|
|
||||||
return $ filter (match (compile s)) $ M.keys ftmap
|
|
||||||
|
|
||||||
fileExists fp = do
|
|
||||||
fps <- getsPureState stFiles
|
|
||||||
case getFileInfo fp fps of
|
|
||||||
Nothing -> return False
|
|
||||||
Just _ -> return True
|
|
||||||
|
|
||||||
getDataFileName fp = return $ "data/" ++ fp
|
|
||||||
|
|
||||||
getModificationTime fp = do
|
|
||||||
fps <- getsPureState stFiles
|
|
||||||
case infoFileMTime <$> getFileInfo fp fps of
|
|
||||||
Just tm -> return tm
|
|
||||||
Nothing -> throwError $ PandocIOError (T.pack fp)
|
|
||||||
(userError "Can't get modification time")
|
|
||||||
|
|
||||||
getCommonState = PandocPure $ lift get
|
|
||||||
putCommonState x = PandocPure $ lift $ put x
|
|
||||||
|
|
||||||
logOutput _msg = return ()
|
|
||||||
|
|
218
src/Text/Pandoc/Class/PandocIO.hs
Normal file
218
src/Text/Pandoc/Class/PandocIO.hs
Normal file
|
@ -0,0 +1,218 @@
|
||||||
|
{-# LANGUAGE CPP #-}
|
||||||
|
{-# LANGUAGE DeriveFunctor #-}
|
||||||
|
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
{- |
|
||||||
|
Module : Text.Pandoc.Class.PandocIO
|
||||||
|
Copyright : Copyright (C) 2016-2020 Jesse Rosenthal, John MacFarlane
|
||||||
|
License : GNU GPL, version 2 or above
|
||||||
|
|
||||||
|
Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
|
||||||
|
Stability : alpha
|
||||||
|
Portability : portable
|
||||||
|
|
||||||
|
This module defines @'PandocIO'@, an IO-based instance of the
|
||||||
|
@'PandocMonad'@ type class. File, data, and network access all are run
|
||||||
|
using IO operators.
|
||||||
|
-}
|
||||||
|
module Text.Pandoc.Class.PandocIO
|
||||||
|
( getPOSIXTime
|
||||||
|
, getZonedTime
|
||||||
|
, readFileFromDirs
|
||||||
|
, report
|
||||||
|
, setTrace
|
||||||
|
, setRequestHeader
|
||||||
|
, getLog
|
||||||
|
, setVerbosity
|
||||||
|
, getVerbosity
|
||||||
|
, getMediaBag
|
||||||
|
, setMediaBag
|
||||||
|
, insertMedia
|
||||||
|
, setUserDataDir
|
||||||
|
, getUserDataDir
|
||||||
|
, fetchItem
|
||||||
|
, getInputFiles
|
||||||
|
, setInputFiles
|
||||||
|
, getOutputFile
|
||||||
|
, setOutputFile
|
||||||
|
, setResourcePath
|
||||||
|
, getResourcePath
|
||||||
|
, PandocIO(..)
|
||||||
|
, runIO
|
||||||
|
, runIOorExplode
|
||||||
|
, extractMedia
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Control.Monad.Except
|
||||||
|
import Control.Monad.State.Strict
|
||||||
|
import Data.ByteString.Base64 (decodeLenient)
|
||||||
|
import Data.ByteString.Lazy (toChunks)
|
||||||
|
import Data.Default
|
||||||
|
import Data.Text (Text)
|
||||||
|
import Data.Unique (hashUnique)
|
||||||
|
import Network.HTTP.Client
|
||||||
|
(httpLbs, responseBody, responseHeaders,
|
||||||
|
Request(port, host, requestHeaders), parseRequest, newManager)
|
||||||
|
import Network.HTTP.Client.Internal (addProxy)
|
||||||
|
import Network.HTTP.Client.TLS (tlsManagerSettings)
|
||||||
|
import Network.HTTP.Types.Header ( hContentType )
|
||||||
|
import Network.Socket (withSocketsDo)
|
||||||
|
import Network.URI ( unEscapeString )
|
||||||
|
import Prelude
|
||||||
|
import System.Directory (createDirectoryIfMissing)
|
||||||
|
import System.Environment (getEnv)
|
||||||
|
import System.FilePath ((</>), takeDirectory, normalise)
|
||||||
|
import System.IO (stderr)
|
||||||
|
import System.IO.Error
|
||||||
|
import Text.Pandoc.Class.CommonState (CommonState (..))
|
||||||
|
import Text.Pandoc.Class.PandocMonad
|
||||||
|
import Text.Pandoc.Definition
|
||||||
|
import Text.Pandoc.Error
|
||||||
|
import Text.Pandoc.Logging
|
||||||
|
import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory)
|
||||||
|
import Text.Pandoc.Walk (walk)
|
||||||
|
import qualified Control.Exception as E
|
||||||
|
import qualified Data.ByteString as B
|
||||||
|
import qualified Data.ByteString.Lazy as BL
|
||||||
|
import qualified Data.CaseInsensitive as CI
|
||||||
|
import qualified Data.Text as T
|
||||||
|
import qualified Data.Time as IO (getCurrentTime)
|
||||||
|
import qualified Data.Time.LocalTime as IO (getCurrentTimeZone)
|
||||||
|
import qualified Data.Unique as IO (newUnique)
|
||||||
|
import qualified System.Directory as Directory
|
||||||
|
import qualified System.Directory as IO (getModificationTime)
|
||||||
|
import qualified System.Environment as IO (lookupEnv)
|
||||||
|
import qualified System.FilePath.Glob as IO (glob)
|
||||||
|
import qualified System.Random as IO (newStdGen)
|
||||||
|
import qualified Text.Pandoc.UTF8 as UTF8
|
||||||
|
#ifndef EMBED_DATA_FILES
|
||||||
|
import qualified Paths_pandoc as Paths
|
||||||
|
#endif
|
||||||
|
|
||||||
|
-- | Evaluate a 'PandocIO' operation.
|
||||||
|
runIO :: PandocIO a -> IO (Either PandocError a)
|
||||||
|
runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma
|
||||||
|
|
||||||
|
-- | Evaluate a 'PandocIO' operation, handling any errors
|
||||||
|
-- by exiting with an appropriate message and error status.
|
||||||
|
runIOorExplode :: PandocIO a -> IO a
|
||||||
|
runIOorExplode ma = runIO ma >>= handleError
|
||||||
|
|
||||||
|
newtype PandocIO a = PandocIO {
|
||||||
|
unPandocIO :: ExceptT PandocError (StateT CommonState IO) a
|
||||||
|
} deriving ( MonadIO
|
||||||
|
, Functor
|
||||||
|
, Applicative
|
||||||
|
, Monad
|
||||||
|
, MonadError PandocError
|
||||||
|
)
|
||||||
|
|
||||||
|
-- | Utility function to lift IO errors into 'PandocError's.
|
||||||
|
liftIOError :: (String -> IO a) -> String -> PandocIO a
|
||||||
|
liftIOError f u = do
|
||||||
|
res <- liftIO $ tryIOError $ f u
|
||||||
|
case res of
|
||||||
|
Left e -> throwError $ PandocIOError (T.pack u) e
|
||||||
|
Right r -> return r
|
||||||
|
|
||||||
|
-- | Show potential IO errors to the user continuing execution anyway
|
||||||
|
logIOError :: IO () -> PandocIO ()
|
||||||
|
logIOError f = do
|
||||||
|
res <- liftIO $ tryIOError f
|
||||||
|
case res of
|
||||||
|
Left e -> report $ IgnoredIOError $ T.pack $ E.displayException e
|
||||||
|
Right _ -> pure ()
|
||||||
|
|
||||||
|
instance PandocMonad PandocIO where
|
||||||
|
lookupEnv = fmap (fmap T.pack) . liftIO . IO.lookupEnv . T.unpack
|
||||||
|
getCurrentTime = liftIO IO.getCurrentTime
|
||||||
|
getCurrentTimeZone = liftIO IO.getCurrentTimeZone
|
||||||
|
newStdGen = liftIO IO.newStdGen
|
||||||
|
newUniqueHash = hashUnique <$> liftIO IO.newUnique
|
||||||
|
|
||||||
|
openURL u
|
||||||
|
| Just u'' <- T.stripPrefix "data:" u = do
|
||||||
|
let mime = T.takeWhile (/=',') u''
|
||||||
|
let contents = UTF8.fromString $
|
||||||
|
unEscapeString $ T.unpack $ T.drop 1 $ T.dropWhile (/=',') u''
|
||||||
|
return (decodeLenient contents, Just mime)
|
||||||
|
| otherwise = do
|
||||||
|
let toReqHeader (n, v) = (CI.mk (UTF8.fromText n), UTF8.fromText v)
|
||||||
|
customHeaders <- map toReqHeader <$> getsCommonState stRequestHeaders
|
||||||
|
report $ Fetching u
|
||||||
|
res <- liftIO $ E.try $ withSocketsDo $ do
|
||||||
|
let parseReq = parseRequest
|
||||||
|
proxy <- tryIOError (getEnv "http_proxy")
|
||||||
|
let addProxy' x = case proxy of
|
||||||
|
Left _ -> return x
|
||||||
|
Right pr -> parseReq pr >>= \r ->
|
||||||
|
return (addProxy (host r) (port r) x)
|
||||||
|
req <- parseReq (T.unpack u) >>= addProxy'
|
||||||
|
let req' = req{requestHeaders = customHeaders ++ requestHeaders req}
|
||||||
|
resp <- newManager tlsManagerSettings >>= httpLbs req'
|
||||||
|
return (B.concat $ toChunks $ responseBody resp,
|
||||||
|
UTF8.toText `fmap` lookup hContentType (responseHeaders resp))
|
||||||
|
|
||||||
|
case res of
|
||||||
|
Right r -> return r
|
||||||
|
Left e -> throwError $ PandocHttpError u e
|
||||||
|
|
||||||
|
readFileLazy s = liftIOError BL.readFile s
|
||||||
|
readFileStrict s = liftIOError B.readFile s
|
||||||
|
|
||||||
|
glob = liftIOError IO.glob
|
||||||
|
fileExists = liftIOError Directory.doesFileExist
|
||||||
|
#ifdef EMBED_DATA_FILES
|
||||||
|
getDataFileName = return
|
||||||
|
#else
|
||||||
|
getDataFileName = liftIOError Paths.getDataFileName
|
||||||
|
#endif
|
||||||
|
getModificationTime = liftIOError IO.getModificationTime
|
||||||
|
getCommonState = PandocIO $ lift get
|
||||||
|
putCommonState x = PandocIO $ lift $ put x
|
||||||
|
logOutput msg = liftIO $ do
|
||||||
|
UTF8.hPutStr stderr $
|
||||||
|
"[" ++ show (messageVerbosity msg) ++ "] "
|
||||||
|
alertIndent $ T.lines $ showLogMessage msg
|
||||||
|
|
||||||
|
-- | Prints the list of lines to @stderr@, indenting every but the first
|
||||||
|
-- line by two spaces.
|
||||||
|
alertIndent :: [Text] -> IO ()
|
||||||
|
alertIndent [] = return ()
|
||||||
|
alertIndent (l:ls) = do
|
||||||
|
UTF8.hPutStrLn stderr $ T.unpack l
|
||||||
|
mapM_ go ls
|
||||||
|
where go l' = do UTF8.hPutStr stderr " "
|
||||||
|
UTF8.hPutStrLn stderr $ T.unpack l'
|
||||||
|
|
||||||
|
-- | Extract media from the mediabag into a directory.
|
||||||
|
extractMedia :: FilePath -> Pandoc -> PandocIO Pandoc
|
||||||
|
extractMedia dir d = do
|
||||||
|
media <- getMediaBag
|
||||||
|
case [fp | (fp, _, _) <- mediaDirectory media] of
|
||||||
|
[] -> return d
|
||||||
|
fps -> do
|
||||||
|
mapM_ (writeMedia dir media) fps
|
||||||
|
return $ walk (adjustImagePath dir fps) d
|
||||||
|
|
||||||
|
-- | Write the contents of a media bag to a path.
|
||||||
|
writeMedia :: FilePath -> MediaBag -> FilePath -> PandocIO ()
|
||||||
|
writeMedia dir mediabag subpath = do
|
||||||
|
-- we join and split to convert a/b/c to a\b\c on Windows;
|
||||||
|
-- in zip containers all paths use /
|
||||||
|
let fullpath = dir </> unEscapeString (normalise subpath)
|
||||||
|
let mbcontents = lookupMedia subpath mediabag
|
||||||
|
case mbcontents of
|
||||||
|
Nothing -> throwError $ PandocResourceNotFound $ T.pack subpath
|
||||||
|
Just (_, bs) -> do
|
||||||
|
report $ Extracting $ T.pack fullpath
|
||||||
|
liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
|
||||||
|
logIOError $ BL.writeFile fullpath bs
|
||||||
|
|
||||||
|
-- | If the given Inline element is an image with a @src@ path equal to
|
||||||
|
-- one in the list of @paths@, then prepends @dir@ to the image source;
|
||||||
|
-- returns the element unchanged otherwise.
|
||||||
|
adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
|
||||||
|
adjustImagePath dir paths (Image attr lab (src, tit))
|
||||||
|
| T.unpack src `elem` paths = Image attr lab (T.pack dir <> "/" <> src, tit)
|
||||||
|
adjustImagePath _ _ x = x
|
218
src/Text/Pandoc/Class/PandocPure.hs
Normal file
218
src/Text/Pandoc/Class/PandocPure.hs
Normal file
|
@ -0,0 +1,218 @@
|
||||||
|
{-# LANGUAGE DeriveFunctor #-}
|
||||||
|
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
{- |
|
||||||
|
Module : Text.Pandoc.Class.Pure
|
||||||
|
Copyright : Copyright (C) 2016-2020 Jesse Rosenthal, John MacFarlane
|
||||||
|
License : GNU GPL, version 2 or above
|
||||||
|
|
||||||
|
Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
|
||||||
|
Stability : alpha
|
||||||
|
Portability : portable
|
||||||
|
|
||||||
|
This module defines a pure instance 'PandocPure' of the @'PandocMonad'@
|
||||||
|
typeclass. This instance is useful for testing, or when all IO access is
|
||||||
|
prohibited for security reasons.
|
||||||
|
-}
|
||||||
|
module Text.Pandoc.Class.PandocPure
|
||||||
|
( PureState(..)
|
||||||
|
, getPureState
|
||||||
|
, getsPureState
|
||||||
|
, putPureState
|
||||||
|
, modifyPureState
|
||||||
|
, PandocPure(..)
|
||||||
|
, FileTree
|
||||||
|
, FileInfo(..)
|
||||||
|
, addToFileTree
|
||||||
|
, insertInFileTree
|
||||||
|
, runPure
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Codec.Archive.Zip
|
||||||
|
import Control.Monad.Except
|
||||||
|
import Control.Monad.State.Strict
|
||||||
|
import Data.Default
|
||||||
|
import Data.Text (Text)
|
||||||
|
import Data.Time (UTCTime)
|
||||||
|
import Data.Time.Clock.POSIX ( posixSecondsToUTCTime )
|
||||||
|
import Data.Time.LocalTime (TimeZone, utc)
|
||||||
|
import Data.Word (Word8)
|
||||||
|
import System.Directory (doesDirectoryExist, getDirectoryContents)
|
||||||
|
import System.FilePath ((</>))
|
||||||
|
import System.FilePath.Glob (match, compile)
|
||||||
|
import System.Random (StdGen, next, mkStdGen)
|
||||||
|
import Text.Pandoc.Class.CommonState (CommonState (..))
|
||||||
|
import Text.Pandoc.Class.PandocMonad
|
||||||
|
import Text.Pandoc.Error
|
||||||
|
import qualified Data.ByteString as B
|
||||||
|
import qualified Data.ByteString.Lazy as BL
|
||||||
|
import qualified Data.Map as M
|
||||||
|
import qualified Data.Text as T
|
||||||
|
import qualified System.Directory as Directory (getModificationTime)
|
||||||
|
|
||||||
|
-- | The 'PureState' contains ersatz representations
|
||||||
|
-- of things that would normally be obtained through IO.
|
||||||
|
data PureState = PureState
|
||||||
|
{ stStdGen :: StdGen
|
||||||
|
, stWord8Store :: [Word8] -- ^ should be infinite, i.e. [1..]
|
||||||
|
, stUniqStore :: [Int] -- ^ should be infinite and contain every
|
||||||
|
-- element at most once, e.g. [1..]
|
||||||
|
, stEnv :: [(Text, Text)]
|
||||||
|
, stTime :: UTCTime
|
||||||
|
, stTimeZone :: TimeZone
|
||||||
|
, stReferenceDocx :: Archive
|
||||||
|
, stReferencePptx :: Archive
|
||||||
|
, stReferenceODT :: Archive
|
||||||
|
, stFiles :: FileTree
|
||||||
|
, stUserDataFiles :: FileTree
|
||||||
|
, stCabalDataFiles :: FileTree
|
||||||
|
}
|
||||||
|
|
||||||
|
instance Default PureState where
|
||||||
|
def = PureState
|
||||||
|
{ stStdGen = mkStdGen 1848
|
||||||
|
, stWord8Store = [1..]
|
||||||
|
, stUniqStore = [1..]
|
||||||
|
, stEnv = [("USER", "pandoc-user")]
|
||||||
|
, stTime = posixSecondsToUTCTime 0
|
||||||
|
, stTimeZone = utc
|
||||||
|
, stReferenceDocx = emptyArchive
|
||||||
|
, stReferencePptx = emptyArchive
|
||||||
|
, stReferenceODT = emptyArchive
|
||||||
|
, stFiles = mempty
|
||||||
|
, stUserDataFiles = mempty
|
||||||
|
, stCabalDataFiles = mempty
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
-- | Retrieve the underlying state of the @'PandocPure'@ type.
|
||||||
|
getPureState :: PandocPure PureState
|
||||||
|
getPureState = PandocPure $ lift $ lift get
|
||||||
|
|
||||||
|
-- | Retrieve a value from the underlying state of the @'PandocPure'@
|
||||||
|
-- type.
|
||||||
|
getsPureState :: (PureState -> a) -> PandocPure a
|
||||||
|
getsPureState f = f <$> getPureState
|
||||||
|
|
||||||
|
-- | Set a new state for the @'PandocPure'@ type.
|
||||||
|
putPureState :: PureState -> PandocPure ()
|
||||||
|
putPureState ps= PandocPure $ lift $ lift $ put ps
|
||||||
|
|
||||||
|
-- | Modify the underlying state of the @'PandocPure'@ type.
|
||||||
|
modifyPureState :: (PureState -> PureState) -> PandocPure ()
|
||||||
|
modifyPureState f = PandocPure $ lift $ lift $ modify f
|
||||||
|
|
||||||
|
-- | Captures all file-level information necessary for a @'PandocMonad'@
|
||||||
|
-- conforming mock file system.
|
||||||
|
data FileInfo = FileInfo
|
||||||
|
{ infoFileMTime :: UTCTime
|
||||||
|
, infoFileContents :: B.ByteString
|
||||||
|
}
|
||||||
|
|
||||||
|
-- | Basis of the mock file system used by @'PandocPure'@.
|
||||||
|
newtype FileTree = FileTree { unFileTree :: M.Map FilePath FileInfo }
|
||||||
|
deriving (Semigroup, Monoid)
|
||||||
|
|
||||||
|
-- | Retrieve @'FileInfo'@ of the given @'FilePath'@ from a
|
||||||
|
-- @'FileTree'@.
|
||||||
|
getFileInfo :: FilePath -> FileTree -> Maybe FileInfo
|
||||||
|
getFileInfo fp tree =
|
||||||
|
M.lookup (makeCanonical fp) (unFileTree tree)
|
||||||
|
|
||||||
|
-- | Add the specified file to the FileTree. If file
|
||||||
|
-- is a directory, add its contents recursively.
|
||||||
|
addToFileTree :: FileTree -> FilePath -> IO FileTree
|
||||||
|
addToFileTree tree fp = do
|
||||||
|
isdir <- doesDirectoryExist fp
|
||||||
|
if isdir
|
||||||
|
then do -- recursively add contents of directories
|
||||||
|
let isSpecial ".." = True
|
||||||
|
isSpecial "." = True
|
||||||
|
isSpecial _ = False
|
||||||
|
fs <- map (fp </>) . filter (not . isSpecial) <$> getDirectoryContents fp
|
||||||
|
foldM addToFileTree tree fs
|
||||||
|
else do
|
||||||
|
contents <- B.readFile fp
|
||||||
|
mtime <- Directory.getModificationTime fp
|
||||||
|
return $ insertInFileTree fp FileInfo{ infoFileMTime = mtime
|
||||||
|
, infoFileContents = contents } tree
|
||||||
|
|
||||||
|
-- | Insert an ersatz file into the 'FileTree'.
|
||||||
|
insertInFileTree :: FilePath -> FileInfo -> FileTree -> FileTree
|
||||||
|
insertInFileTree fp info (FileTree treemap) =
|
||||||
|
FileTree $ M.insert (makeCanonical fp) info treemap
|
||||||
|
|
||||||
|
newtype PandocPure a = PandocPure {
|
||||||
|
unPandocPure :: ExceptT PandocError
|
||||||
|
(StateT CommonState (State PureState)) a
|
||||||
|
} deriving ( Functor
|
||||||
|
, Applicative
|
||||||
|
, Monad
|
||||||
|
, MonadError PandocError
|
||||||
|
)
|
||||||
|
|
||||||
|
-- | Run a 'PandocPure' operation.
|
||||||
|
runPure :: PandocPure a -> Either PandocError a
|
||||||
|
runPure x = flip evalState def $
|
||||||
|
flip evalStateT def $
|
||||||
|
runExceptT $
|
||||||
|
unPandocPure x
|
||||||
|
|
||||||
|
instance PandocMonad PandocPure where
|
||||||
|
lookupEnv s = do
|
||||||
|
env <- getsPureState stEnv
|
||||||
|
return (lookup s env)
|
||||||
|
|
||||||
|
getCurrentTime = getsPureState stTime
|
||||||
|
|
||||||
|
getCurrentTimeZone = getsPureState stTimeZone
|
||||||
|
|
||||||
|
newStdGen = do
|
||||||
|
g <- getsPureState stStdGen
|
||||||
|
let (_, nxtGen) = next g
|
||||||
|
modifyPureState $ \st -> st { stStdGen = nxtGen }
|
||||||
|
return g
|
||||||
|
|
||||||
|
newUniqueHash = do
|
||||||
|
uniqs <- getsPureState stUniqStore
|
||||||
|
case uniqs of
|
||||||
|
u : us -> do
|
||||||
|
modifyPureState $ \st -> st { stUniqStore = us }
|
||||||
|
return u
|
||||||
|
_ -> throwError $ PandocShouldNeverHappenError
|
||||||
|
"uniq store ran out of elements"
|
||||||
|
openURL u = throwError $ PandocResourceNotFound u
|
||||||
|
readFileLazy fp = do
|
||||||
|
fps <- getsPureState stFiles
|
||||||
|
case infoFileContents <$> getFileInfo fp fps of
|
||||||
|
Just bs -> return (BL.fromStrict bs)
|
||||||
|
Nothing -> throwError $ PandocResourceNotFound $ T.pack fp
|
||||||
|
readFileStrict fp = do
|
||||||
|
fps <- getsPureState stFiles
|
||||||
|
case infoFileContents <$> getFileInfo fp fps of
|
||||||
|
Just bs -> return bs
|
||||||
|
Nothing -> throwError $ PandocResourceNotFound $ T.pack fp
|
||||||
|
|
||||||
|
glob s = do
|
||||||
|
FileTree ftmap <- getsPureState stFiles
|
||||||
|
return $ filter (match (compile s)) $ M.keys ftmap
|
||||||
|
|
||||||
|
fileExists fp = do
|
||||||
|
fps <- getsPureState stFiles
|
||||||
|
case getFileInfo fp fps of
|
||||||
|
Nothing -> return False
|
||||||
|
Just _ -> return True
|
||||||
|
|
||||||
|
getDataFileName fp = return $ "data/" ++ fp
|
||||||
|
|
||||||
|
getModificationTime fp = do
|
||||||
|
fps <- getsPureState stFiles
|
||||||
|
case infoFileMTime <$> getFileInfo fp fps of
|
||||||
|
Just tm -> return tm
|
||||||
|
Nothing -> throwError $ PandocIOError (T.pack fp)
|
||||||
|
(userError "Can't get modification time")
|
||||||
|
|
||||||
|
getCommonState = PandocPure $ lift get
|
||||||
|
putCommonState x = PandocPure $ lift $ put x
|
||||||
|
|
||||||
|
logOutput _msg = return ()
|
Loading…
Reference in a new issue