1003 lines
39 KiB
Haskell
1003 lines
39 KiB
Haskell
{-# LANGUAGE CPP #-}
|
|
{-# LANGUAGE PatternGuards #-}
|
|
{-# LANGUAGE DeriveFunctor #-}
|
|
{-# LANGUAGE TypeSynonymInstances #-}
|
|
{-# LANGUAGE FlexibleInstances #-}
|
|
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
|
{-# LANGUAGE FlexibleContexts #-}
|
|
{-# LANGUAGE UndecidableInstances #-}
|
|
{-# LANGUAGE ScopedTypeVariables #-}
|
|
#if MIN_VERSION_base(4,8,0)
|
|
#else
|
|
{-# LANGUAGE OverlappingInstances #-}
|
|
#endif
|
|
|
|
{-
|
|
Copyright (C) 2016-17 Jesse Rosenthal <jrosenthal@jhu.edu>
|
|
and John MacFarlane.
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
-}
|
|
|
|
{- |
|
|
Module : Text.Pandoc.Class
|
|
Copyright : Copyright (C) 2016-17 Jesse Rosenthal, John MacFarlane
|
|
License : GNU GPL, version 2 or above
|
|
|
|
Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
|
|
Stability : alpha
|
|
Portability : portable
|
|
|
|
This module defines a type class, 'PandocMonad', for pandoc readers
|
|
and writers. A pure instance 'PandocPure' and an impure instance
|
|
'PandocIO' are provided. This allows users of the library to choose
|
|
whether they want conversions to perform IO operations (such as
|
|
reading include files or images).
|
|
-}
|
|
|
|
module Text.Pandoc.Class ( PandocMonad(..)
|
|
, CommonState(..)
|
|
, PureState(..)
|
|
, getPureState
|
|
, getsPureState
|
|
, putPureState
|
|
, modifyPureState
|
|
, getPOSIXTime
|
|
, getZonedTime
|
|
, readFileFromDirs
|
|
, report
|
|
, setTrace
|
|
, setRequestHeader
|
|
, getLog
|
|
, setVerbosity
|
|
, getVerbosity
|
|
, getMediaBag
|
|
, setMediaBag
|
|
, insertMedia
|
|
, setUserDataDir
|
|
, getUserDataDir
|
|
, fetchItem
|
|
, getInputFiles
|
|
, setInputFiles
|
|
, getOutputFile
|
|
, setOutputFile
|
|
, setResourcePath
|
|
, getResourcePath
|
|
, PandocIO(..)
|
|
, PandocPure(..)
|
|
, FileTree(..)
|
|
, FileInfo(..)
|
|
, addToFileTree
|
|
, runIO
|
|
, runIOorExplode
|
|
, runPure
|
|
, readDefaultDataFile
|
|
, readDataFile
|
|
, fetchMediaResource
|
|
, fillMediaBag
|
|
, extractMedia
|
|
, toLang
|
|
, setTranslations
|
|
, translateTerm
|
|
, Translations
|
|
, Term(..)
|
|
) where
|
|
|
|
import Prelude hiding (readFile)
|
|
import System.Random (StdGen, next, mkStdGen)
|
|
import qualified System.Random as IO (newStdGen)
|
|
import Codec.Archive.Zip
|
|
import qualified Data.CaseInsensitive as CI
|
|
import Data.Unique (hashUnique)
|
|
import Data.List (stripPrefix)
|
|
import qualified Data.Unique as IO (newUnique)
|
|
import qualified Text.Pandoc.UTF8 as UTF8
|
|
import qualified System.Directory as Directory
|
|
import Text.Pandoc.Compat.Time (UTCTime)
|
|
import Text.Pandoc.Logging
|
|
import Text.Parsec (ParsecT, getPosition, sourceLine, sourceName)
|
|
import qualified Text.Pandoc.Compat.Time as IO (getCurrentTime)
|
|
import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType)
|
|
import Text.Pandoc.Definition
|
|
import Data.Digest.Pure.SHA (sha1, showDigest)
|
|
import Data.Maybe (fromMaybe)
|
|
import Data.Time.Clock.POSIX ( utcTimeToPOSIXSeconds
|
|
, posixSecondsToUTCTime
|
|
, POSIXTime )
|
|
import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime, utc)
|
|
import Data.ByteString.Base64 (decodeLenient)
|
|
import Network.URI ( escapeURIString, nonStrictRelativeTo,
|
|
unEscapeString, parseURIReference, isAllowedInURI,
|
|
parseURI, URI(..) )
|
|
import Network.HTTP.Client (httpLbs, responseBody, responseHeaders,
|
|
Request(port,host,requestHeaders))
|
|
import Network.HTTP.Client (parseRequest)
|
|
import Network.HTTP.Client (newManager)
|
|
import Network.HTTP.Client.Internal (addProxy)
|
|
import Network.HTTP.Client.TLS (tlsManagerSettings)
|
|
import System.Environment (getEnv)
|
|
import Network.HTTP.Types.Header ( hContentType )
|
|
import Network (withSocketsDo)
|
|
import Data.ByteString.Lazy (toChunks)
|
|
import qualified Control.Exception as E
|
|
import qualified Data.Time.LocalTime as IO (getCurrentTimeZone)
|
|
import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory)
|
|
import Text.Pandoc.Walk (walkM, walk)
|
|
import qualified Text.Pandoc.MediaBag as MB
|
|
import qualified Data.ByteString as B
|
|
import qualified Data.ByteString.Lazy as BL
|
|
import qualified System.Environment as IO (lookupEnv)
|
|
import System.FilePath.Glob (match, compile)
|
|
import System.Directory (createDirectoryIfMissing, getDirectoryContents,
|
|
doesDirectoryExist)
|
|
import System.FilePath ((</>), (<.>), takeDirectory,
|
|
takeExtension, dropExtension, isRelative, normalise)
|
|
import qualified System.FilePath.Glob as IO (glob)
|
|
import qualified System.Directory as IO (getModificationTime)
|
|
import Control.Monad as M (fail)
|
|
import Control.Monad.State.Strict
|
|
import Control.Monad.Except
|
|
import Data.Word (Word8)
|
|
import Data.Default
|
|
import System.IO.Error
|
|
import System.IO (stderr)
|
|
import qualified Data.Map as M
|
|
import Text.Pandoc.Error
|
|
import Text.Pandoc.BCP47 (Lang(..), parseBCP47, renderLang)
|
|
import Text.Pandoc.Translations (Term(..), Translations, lookupTerm,
|
|
readTranslations)
|
|
import qualified Debug.Trace
|
|
#ifdef EMBED_DATA_FILES
|
|
import Text.Pandoc.Data (dataFiles)
|
|
import qualified System.FilePath.Posix as Posix
|
|
import System.FilePath (splitDirectories)
|
|
#else
|
|
import qualified Paths_pandoc as Paths
|
|
#endif
|
|
|
|
-- | The PandocMonad typeclass contains all the potentially
|
|
-- IO-related functions used in pandoc's readers and writers.
|
|
-- Instances of this typeclass may implement these functions
|
|
-- in IO (as in 'PandocIO') or using an internal state that
|
|
-- represents a file system, time, and so on (as in 'PandocPure').
|
|
class (Functor m, Applicative m, Monad m, MonadError PandocError m)
|
|
=> PandocMonad m where
|
|
-- | Lookup an environment variable.
|
|
lookupEnv :: String -> m (Maybe String)
|
|
-- | Get the current (UTC) time.
|
|
getCurrentTime :: m UTCTime
|
|
-- | Get the locale's time zone.
|
|
getCurrentTimeZone :: m TimeZone
|
|
-- | Return a new generator for random numbers.
|
|
newStdGen :: m StdGen
|
|
-- | Return a new unique integer.
|
|
newUniqueHash :: m Int
|
|
-- | Retrieve contents and mime type from a URL, raising
|
|
-- an error on failure.
|
|
openURL :: String -> m (B.ByteString, Maybe MimeType)
|
|
-- | Read the lazy ByteString contents from a file path,
|
|
-- raising an error on failure.
|
|
readFileLazy :: FilePath -> m BL.ByteString
|
|
-- | Read the strict ByteString contents from a file path,
|
|
-- raising an error on failure.
|
|
readFileStrict :: FilePath -> m B.ByteString
|
|
-- | Return a list of paths that match a glob, relative to
|
|
-- the working directory. See 'System.FilePath.Glob' for
|
|
-- the glob syntax.
|
|
glob :: String -> m [FilePath]
|
|
-- | Returns True if file exists.
|
|
fileExists :: FilePath -> m Bool
|
|
-- | Returns the path of data file.
|
|
getDataFileName :: FilePath -> m FilePath
|
|
-- | Return the modification time of a file.
|
|
getModificationTime :: FilePath -> m UTCTime
|
|
-- | Get the value of the 'CommonState' used by all instances
|
|
-- of 'PandocMonad'.
|
|
getCommonState :: m CommonState
|
|
-- | Set the value of the 'CommonState' used by all instances
|
|
-- of 'PandocMonad'.
|
|
-- | Get the value of a specific field of 'CommonState'.
|
|
putCommonState :: CommonState -> m ()
|
|
-- | Get the value of a specific field of 'CommonState'.
|
|
getsCommonState :: (CommonState -> a) -> m a
|
|
getsCommonState f = f <$> getCommonState
|
|
-- | Modify the 'CommonState'.
|
|
modifyCommonState :: (CommonState -> CommonState) -> m ()
|
|
modifyCommonState f = getCommonState >>= putCommonState . f
|
|
-- Output a log message.
|
|
logOutput :: LogMessage -> m ()
|
|
-- Output a debug message to sterr, using 'Debug.Trace.trace',
|
|
-- if tracing is enabled. Note: this writes to stderr even in
|
|
-- pure instances.
|
|
trace :: String -> m ()
|
|
trace msg = do
|
|
tracing <- getsCommonState stTrace
|
|
when tracing $ Debug.Trace.trace ("[trace] " ++ msg) (return ())
|
|
|
|
-- * Functions defined for all PandocMonad instances
|
|
|
|
-- | Set the verbosity level.
|
|
setVerbosity :: PandocMonad m => Verbosity -> m ()
|
|
setVerbosity verbosity =
|
|
modifyCommonState $ \st -> st{ stVerbosity = verbosity }
|
|
|
|
-- | Get the verbosity level.
|
|
getVerbosity :: PandocMonad m => m Verbosity
|
|
getVerbosity = getsCommonState stVerbosity
|
|
|
|
-- Get the accomulated log messages (in temporal order).
|
|
getLog :: PandocMonad m => m [LogMessage]
|
|
getLog = reverse <$> getsCommonState stLog
|
|
|
|
-- | Log a message using 'logOutput'. Note that
|
|
-- 'logOutput' is called only if the verbosity
|
|
-- level exceeds the level of the message, but
|
|
-- the message is added to the list of log messages
|
|
-- that will be retrieved by 'getLog' regardless
|
|
-- of its verbosity level.
|
|
report :: PandocMonad m => LogMessage -> m ()
|
|
report msg = do
|
|
verbosity <- getsCommonState stVerbosity
|
|
let level = messageVerbosity msg
|
|
when (level <= verbosity) $ logOutput msg
|
|
modifyCommonState $ \st -> st{ stLog = msg : stLog st }
|
|
|
|
-- | Determine whether tracing is enabled. This affects
|
|
-- the behavior of 'trace'. If tracing is not enabled,
|
|
-- 'trace' does nothing.
|
|
setTrace :: PandocMonad m => Bool -> m ()
|
|
setTrace useTracing = modifyCommonState $ \st -> st{stTrace = useTracing}
|
|
|
|
-- | Set request header to use in HTTP requests.
|
|
setRequestHeader :: PandocMonad m
|
|
=> String -- ^ Header name
|
|
-> String -- ^ Value
|
|
-> m ()
|
|
setRequestHeader name val = modifyCommonState $ \st ->
|
|
st{ stRequestHeaders =
|
|
(name, val) : filter (\(n,_) -> n /= name) (stRequestHeaders st) }
|
|
|
|
-- | Initialize the media bag.
|
|
setMediaBag :: PandocMonad m => MediaBag -> m ()
|
|
setMediaBag mb = modifyCommonState $ \st -> st{stMediaBag = mb}
|
|
|
|
-- Retrieve the media bag.
|
|
getMediaBag :: PandocMonad m => m MediaBag
|
|
getMediaBag = getsCommonState stMediaBag
|
|
|
|
-- Insert an item into the media bag.
|
|
insertMedia :: PandocMonad m => FilePath -> Maybe MimeType -> BL.ByteString -> m ()
|
|
insertMedia fp mime bs = do
|
|
mb <- getMediaBag
|
|
let mb' = MB.insertMedia fp mime bs mb
|
|
setMediaBag mb'
|
|
|
|
-- Retrieve the input filenames.
|
|
getInputFiles :: PandocMonad m => m [FilePath]
|
|
getInputFiles = getsCommonState stInputFiles
|
|
|
|
-- Set the input filenames.
|
|
setInputFiles :: PandocMonad m => [FilePath] -> m ()
|
|
setInputFiles fs = do
|
|
let sourceURL = case fs of
|
|
[] -> Nothing
|
|
(x:_) -> case parseURI x of
|
|
Just u
|
|
| uriScheme u `elem` ["http:","https:"] ->
|
|
Just $ show u{ uriQuery = "",
|
|
uriFragment = "" }
|
|
_ -> Nothing
|
|
|
|
modifyCommonState $ \st -> st{ stInputFiles = fs
|
|
, stSourceURL = sourceURL }
|
|
|
|
-- Retrieve the output filename.
|
|
getOutputFile :: PandocMonad m => m (Maybe FilePath)
|
|
getOutputFile = getsCommonState stOutputFile
|
|
|
|
-- Set the output filename.
|
|
setOutputFile :: PandocMonad m => Maybe FilePath -> m ()
|
|
setOutputFile mbf = modifyCommonState $ \st -> st{ stOutputFile = mbf }
|
|
|
|
-- Retrieve the resource path searched by 'fetchItem'.
|
|
getResourcePath :: PandocMonad m => m [FilePath]
|
|
getResourcePath = getsCommonState stResourcePath
|
|
|
|
-- Set the resource path searched by 'fetchItem'.
|
|
setResourcePath :: PandocMonad m => [FilePath] -> m ()
|
|
setResourcePath ps = modifyCommonState $ \st -> st{stResourcePath = ps}
|
|
|
|
-- Get the POSIX time.
|
|
getPOSIXTime :: PandocMonad m => m POSIXTime
|
|
getPOSIXTime = utcTimeToPOSIXSeconds <$> getCurrentTime
|
|
|
|
-- Get the zoned time.
|
|
getZonedTime :: PandocMonad m => m ZonedTime
|
|
getZonedTime = do
|
|
t <- getCurrentTime
|
|
tz <- getCurrentTimeZone
|
|
return $ utcToZonedTime tz t
|
|
|
|
-- | Read file, checking in any number of directories.
|
|
readFileFromDirs :: PandocMonad m => [FilePath] -> FilePath -> m (Maybe String)
|
|
readFileFromDirs [] _ = return Nothing
|
|
readFileFromDirs (d:ds) f = catchError
|
|
((Just . UTF8.toStringLazy) <$> readFileLazy (d </> f))
|
|
(\_ -> readFileFromDirs ds f)
|
|
|
|
--
|
|
|
|
-- | 'CommonState' represents state that is used by all
|
|
-- instances of 'PandocMonad'. Normally users should not
|
|
-- need to interact with it directly; instead, auxiliary
|
|
-- functions like 'setVerbosity' and 'withMediaBag' should be used.
|
|
data CommonState = CommonState { stLog :: [LogMessage]
|
|
-- ^ A list of log messages in reverse order
|
|
, stUserDataDir :: Maybe FilePath
|
|
-- ^ Directory to search for data files
|
|
, stSourceURL :: Maybe String
|
|
-- ^ Absolute URL + dir of 1st source file
|
|
, stRequestHeaders :: [(String, String)]
|
|
-- ^ Headers to add for HTTP requests
|
|
, stMediaBag :: MediaBag
|
|
-- ^ Media parsed from binary containers
|
|
, stTranslations :: Maybe
|
|
(Lang, Maybe Translations)
|
|
-- ^ Translations for localization
|
|
, stInputFiles :: [FilePath]
|
|
-- ^ List of input files from command line
|
|
, stOutputFile :: Maybe FilePath
|
|
-- ^ Output file from command line
|
|
, stResourcePath :: [FilePath]
|
|
-- ^ Path to search for resources like
|
|
-- included images
|
|
, stVerbosity :: Verbosity
|
|
-- ^ Verbosity level
|
|
, stTrace :: Bool
|
|
-- ^ Controls whether tracing messages are
|
|
-- issued.
|
|
}
|
|
|
|
instance Default CommonState where
|
|
def = CommonState { stLog = []
|
|
, stUserDataDir = Nothing
|
|
, stSourceURL = Nothing
|
|
, stRequestHeaders = []
|
|
, stMediaBag = mempty
|
|
, stTranslations = Nothing
|
|
, stInputFiles = []
|
|
, stOutputFile = Nothing
|
|
, stResourcePath = ["."]
|
|
, stVerbosity = WARNING
|
|
, stTrace = False
|
|
}
|
|
|
|
-- | Convert BCP47 string to a Lang, issuing warning
|
|
-- if there are problems.
|
|
toLang :: PandocMonad m => Maybe String -> m (Maybe Lang)
|
|
toLang Nothing = return Nothing
|
|
toLang (Just s) =
|
|
case parseBCP47 s of
|
|
Left _ -> do
|
|
report $ InvalidLang s
|
|
return Nothing
|
|
Right l -> return (Just l)
|
|
|
|
-- | Select the language to use with 'translateTerm'.
|
|
-- Note that this does not read a translation file;
|
|
-- that is only done the first time 'translateTerm' is
|
|
-- used.
|
|
setTranslations :: PandocMonad m => Lang -> m ()
|
|
setTranslations lang =
|
|
modifyCommonState $ \st -> st{ stTranslations = Just (lang, Nothing) }
|
|
|
|
-- | Load term map.
|
|
getTranslations :: PandocMonad m => m Translations
|
|
getTranslations = do
|
|
mbtrans <- getsCommonState stTranslations
|
|
case mbtrans of
|
|
Nothing -> return mempty -- no language defined
|
|
Just (_, Just t) -> return t
|
|
Just (lang, Nothing) -> do -- read from file
|
|
let translationFile = "translations/" ++ renderLang lang ++ ".yaml"
|
|
let fallbackFile = "translations/" ++ langLanguage lang ++ ".yaml"
|
|
let getTrans fp = do
|
|
bs <- readDataFile fp
|
|
case readTranslations (UTF8.toString bs) of
|
|
Left e -> do
|
|
report $ CouldNotLoadTranslations (renderLang lang)
|
|
(fp ++ ": " ++ e)
|
|
-- make sure we don't try again...
|
|
modifyCommonState $ \st ->
|
|
st{ stTranslations = Nothing }
|
|
return mempty
|
|
Right t -> do
|
|
modifyCommonState $ \st ->
|
|
st{ stTranslations = Just (lang, Just t) }
|
|
return t
|
|
catchError (getTrans translationFile)
|
|
(\_ ->
|
|
catchError (getTrans fallbackFile)
|
|
(\e -> do
|
|
report $ CouldNotLoadTranslations (renderLang lang)
|
|
$ case e of
|
|
PandocCouldNotFindDataFileError _ ->
|
|
("data file " ++ fallbackFile ++ " not found")
|
|
_ -> ""
|
|
-- make sure we don't try again...
|
|
modifyCommonState $ \st -> st{ stTranslations = Nothing }
|
|
return mempty))
|
|
|
|
-- | Get a translation from the current term map.
|
|
-- Issue a warning if the term is not defined.
|
|
translateTerm :: PandocMonad m => Term -> m String
|
|
translateTerm term = do
|
|
translations <- getTranslations
|
|
case lookupTerm term translations of
|
|
Just s -> return s
|
|
Nothing -> do
|
|
report $ NoTranslation (show term)
|
|
return ""
|
|
|
|
-- | Evaluate a 'PandocIO' operation.
|
|
runIO :: PandocIO a -> IO (Either PandocError a)
|
|
runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma
|
|
|
|
-- | Evaluate a 'PandocIO' operation, handling any errors
|
|
-- by exiting with an appropriate message and error status.
|
|
runIOorExplode :: PandocIO a -> IO a
|
|
runIOorExplode ma = runIO ma >>= handleError
|
|
|
|
newtype PandocIO a = PandocIO {
|
|
unPandocIO :: ExceptT PandocError (StateT CommonState IO) a
|
|
} deriving ( MonadIO
|
|
, Functor
|
|
, Applicative
|
|
, Monad
|
|
, MonadError PandocError
|
|
)
|
|
|
|
-- | Utility function to lift IO errors into 'PandocError's.
|
|
liftIOError :: (String -> IO a) -> String -> PandocIO a
|
|
liftIOError f u = do
|
|
res <- liftIO $ tryIOError $ f u
|
|
case res of
|
|
Left e -> throwError $ PandocIOError u e
|
|
Right r -> return r
|
|
|
|
instance PandocMonad PandocIO where
|
|
lookupEnv = liftIO . IO.lookupEnv
|
|
getCurrentTime = liftIO IO.getCurrentTime
|
|
getCurrentTimeZone = liftIO IO.getCurrentTimeZone
|
|
newStdGen = liftIO IO.newStdGen
|
|
newUniqueHash = hashUnique <$> liftIO IO.newUnique
|
|
|
|
openURL u
|
|
| Just u'' <- stripPrefix "data:" u = do
|
|
let mime = takeWhile (/=',') u''
|
|
let contents = UTF8.fromString $
|
|
unEscapeString $ drop 1 $ dropWhile (/=',') u''
|
|
return (decodeLenient contents, Just mime)
|
|
| otherwise = do
|
|
let toReqHeader (n, v) = (CI.mk (UTF8.fromString n), UTF8.fromString v)
|
|
customHeaders <- map toReqHeader <$> getsCommonState stRequestHeaders
|
|
report $ Fetching u
|
|
res <- liftIO $ E.try $ withSocketsDo $ do
|
|
let parseReq = parseRequest
|
|
proxy <- tryIOError (getEnv "http_proxy")
|
|
let addProxy' x = case proxy of
|
|
Left _ -> return x
|
|
Right pr -> parseReq pr >>= \r ->
|
|
return (addProxy (host r) (port r) x)
|
|
req <- parseReq u >>= addProxy'
|
|
let req' = req{requestHeaders = customHeaders ++ requestHeaders req}
|
|
resp <- newManager tlsManagerSettings >>= httpLbs req'
|
|
return (B.concat $ toChunks $ responseBody resp,
|
|
UTF8.toString `fmap` lookup hContentType (responseHeaders resp))
|
|
|
|
case res of
|
|
Right r -> return r
|
|
Left e -> throwError $ PandocHttpError u e
|
|
|
|
readFileLazy s = liftIOError BL.readFile s
|
|
readFileStrict s = liftIOError B.readFile s
|
|
|
|
glob = liftIOError IO.glob
|
|
fileExists = liftIOError Directory.doesFileExist
|
|
#ifdef EMBED_DATA_FILES
|
|
getDataFileName = return
|
|
#else
|
|
getDataFileName = liftIOError Paths.getDataFileName
|
|
#endif
|
|
getModificationTime = liftIOError IO.getModificationTime
|
|
getCommonState = PandocIO $ lift get
|
|
putCommonState x = PandocIO $ lift $ put x
|
|
logOutput msg = liftIO $ do
|
|
UTF8.hPutStr stderr $
|
|
"[" ++ show (messageVerbosity msg) ++ "] "
|
|
alertIndent $ lines $ showLogMessage msg
|
|
|
|
alertIndent :: [String] -> IO ()
|
|
alertIndent [] = return ()
|
|
alertIndent (l:ls) = do
|
|
UTF8.hPutStrLn stderr l
|
|
mapM_ go ls
|
|
where go l' = do UTF8.hPutStr stderr " "
|
|
UTF8.hPutStrLn stderr l'
|
|
|
|
-- | Specialized version of parseURIReference that disallows
|
|
-- single-letter schemes. Reason: these are usually windows absolute
|
|
-- paths.
|
|
parseURIReference' :: String -> Maybe URI
|
|
parseURIReference' s =
|
|
case parseURIReference s of
|
|
Just u
|
|
| length (uriScheme u) > 2 -> Just u
|
|
| null (uriScheme u) -> Just u -- protocol-relative
|
|
_ -> Nothing
|
|
|
|
-- | Set the user data directory in common state.
|
|
setUserDataDir :: PandocMonad m
|
|
=> Maybe FilePath
|
|
-> m ()
|
|
setUserDataDir mbfp = modifyCommonState $ \st -> st{ stUserDataDir = mbfp }
|
|
|
|
-- | Get the user data directory from common state.
|
|
getUserDataDir :: PandocMonad m
|
|
=> m (Maybe FilePath)
|
|
getUserDataDir = getsCommonState stUserDataDir
|
|
|
|
-- | Fetch an image or other item from the local filesystem or the net.
|
|
-- Returns raw content and maybe mime type.
|
|
fetchItem :: PandocMonad m
|
|
=> String
|
|
-> m (B.ByteString, Maybe MimeType)
|
|
fetchItem s = do
|
|
mediabag <- getMediaBag
|
|
case lookupMedia s mediabag of
|
|
Just (mime, bs) -> return (BL.toStrict bs, Just mime)
|
|
Nothing -> downloadOrRead s
|
|
|
|
downloadOrRead :: PandocMonad m
|
|
=> String
|
|
-> m (B.ByteString, Maybe MimeType)
|
|
downloadOrRead s = do
|
|
sourceURL <- getsCommonState stSourceURL
|
|
case (sourceURL >>= parseURIReference' .
|
|
ensureEscaped, ensureEscaped s) of
|
|
(Just u, s') -> -- try fetching from relative path at source
|
|
case parseURIReference' s' of
|
|
Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u
|
|
Nothing -> openURL s' -- will throw error
|
|
(Nothing, s'@('/':'/':_)) -> -- protocol-relative URI
|
|
case parseURIReference' s' of
|
|
Just u' -> openURL $ show $ u' `nonStrictRelativeTo` httpcolon
|
|
Nothing -> openURL s' -- will throw error
|
|
(Nothing, s') ->
|
|
case parseURI s' of -- requires absolute URI
|
|
-- We don't want to treat C:/ as a scheme:
|
|
Just u' | length (uriScheme u') > 2 -> openURL (show u')
|
|
Just u' | uriScheme u' == "file:" ->
|
|
readLocalFile $ dropWhile (=='/') (uriPath u')
|
|
_ -> readLocalFile fp -- get from local file system
|
|
where readLocalFile f = do
|
|
resourcePath <- getResourcePath
|
|
cont <- if isRelative f
|
|
then withPaths resourcePath readFileStrict f
|
|
else readFileStrict f
|
|
return (cont, mime)
|
|
httpcolon = URI{ uriScheme = "http:",
|
|
uriAuthority = Nothing,
|
|
uriPath = "",
|
|
uriQuery = "",
|
|
uriFragment = "" }
|
|
dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#')
|
|
fp = unEscapeString $ dropFragmentAndQuery s
|
|
mime = case takeExtension fp of
|
|
".gz" -> getMimeType $ dropExtension fp
|
|
".svgz" -> getMimeType $ dropExtension fp ++ ".svg"
|
|
x -> getMimeType x
|
|
ensureEscaped = escapeURIString isAllowedInURI . map convertSlash
|
|
convertSlash '\\' = '/'
|
|
convertSlash x = x
|
|
|
|
-- Retrieve default reference.docx.
|
|
getDefaultReferenceDocx :: PandocMonad m => m Archive
|
|
getDefaultReferenceDocx = do
|
|
let paths = ["[Content_Types].xml",
|
|
"_rels/.rels",
|
|
"docProps/app.xml",
|
|
"docProps/core.xml",
|
|
"word/document.xml",
|
|
"word/fontTable.xml",
|
|
"word/footnotes.xml",
|
|
"word/numbering.xml",
|
|
"word/settings.xml",
|
|
"word/webSettings.xml",
|
|
"word/styles.xml",
|
|
"word/_rels/document.xml.rels",
|
|
"word/_rels/footnotes.xml.rels",
|
|
"word/theme/theme1.xml"]
|
|
let toLazy = BL.fromChunks . (:[])
|
|
let pathToEntry path = do
|
|
epochtime <- (floor . utcTimeToPOSIXSeconds) <$> getCurrentTime
|
|
contents <- toLazy <$> readDataFile ("docx/" ++ path)
|
|
return $ toEntry path epochtime contents
|
|
datadir <- getUserDataDir
|
|
mbArchive <- case datadir of
|
|
Nothing -> return Nothing
|
|
Just d -> do
|
|
exists <- fileExists (d </> "reference.docx")
|
|
if exists
|
|
then return (Just (d </> "reference.docx"))
|
|
else return Nothing
|
|
case mbArchive of
|
|
Just arch -> toArchive <$> readFileLazy arch
|
|
Nothing -> foldr addEntryToArchive emptyArchive <$>
|
|
mapM pathToEntry paths
|
|
|
|
-- Retrieve default reference.odt.
|
|
getDefaultReferenceODT :: PandocMonad m => m Archive
|
|
getDefaultReferenceODT = do
|
|
let paths = ["mimetype",
|
|
"manifest.rdf",
|
|
"styles.xml",
|
|
"content.xml",
|
|
"meta.xml",
|
|
"settings.xml",
|
|
"Configurations2/accelerator/current.xml",
|
|
"Thumbnails/thumbnail.png",
|
|
"META-INF/manifest.xml"]
|
|
let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime
|
|
contents <- (BL.fromChunks . (:[])) `fmap`
|
|
readDataFile ("odt/" ++ path)
|
|
return $ toEntry path epochtime contents
|
|
datadir <- getUserDataDir
|
|
mbArchive <- case datadir of
|
|
Nothing -> return Nothing
|
|
Just d -> do
|
|
exists <- fileExists (d </> "reference.odt")
|
|
if exists
|
|
then return (Just (d </> "reference.odt"))
|
|
else return Nothing
|
|
case mbArchive of
|
|
Just arch -> toArchive <$> readFileLazy arch
|
|
Nothing -> foldr addEntryToArchive emptyArchive <$>
|
|
mapM pathToEntry paths
|
|
|
|
-- | Read file from user data directory or,
|
|
-- if not found there, from Cabal data directory.
|
|
readDataFile :: PandocMonad m => FilePath -> m B.ByteString
|
|
readDataFile fname = do
|
|
datadir <- getUserDataDir
|
|
case datadir of
|
|
Nothing -> readDefaultDataFile fname
|
|
Just userDir -> do
|
|
exists <- fileExists (userDir </> fname)
|
|
if exists
|
|
then readFileStrict (userDir </> fname)
|
|
else readDefaultDataFile fname
|
|
|
|
-- | Read file from from Cabal data directory.
|
|
readDefaultDataFile :: PandocMonad m => FilePath -> m B.ByteString
|
|
readDefaultDataFile "reference.docx" =
|
|
(B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceDocx
|
|
readDefaultDataFile "reference.odt" =
|
|
(B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceODT
|
|
readDefaultDataFile fname =
|
|
#ifdef EMBED_DATA_FILES
|
|
case lookup (makeCanonical fname) dataFiles of
|
|
Nothing -> throwError $ PandocCouldNotFindDataFileError fname
|
|
Just contents -> return contents
|
|
where makeCanonical = Posix.joinPath . transformPathParts . splitDirectories
|
|
transformPathParts = reverse . foldl go []
|
|
go as "." = as
|
|
go (_:as) ".." = as
|
|
go as x = x : as
|
|
#else
|
|
getDataFileName fname' >>= checkExistence >>= readFileStrict
|
|
where fname' = if fname == "MANUAL.txt" then fname else "data" </> fname
|
|
|
|
checkExistence :: PandocMonad m => FilePath -> m FilePath
|
|
checkExistence fn = do
|
|
exists <- fileExists fn
|
|
if exists
|
|
then return fn
|
|
else throwError $ PandocCouldNotFindDataFileError fn
|
|
#endif
|
|
|
|
withPaths :: PandocMonad m => [FilePath] -> (FilePath -> m a) -> FilePath -> m a
|
|
withPaths [] _ fp = throwError $ PandocResourceNotFound fp
|
|
withPaths (p:ps) action fp =
|
|
catchError (action (p </> fp))
|
|
(\_ -> withPaths ps action fp)
|
|
|
|
-- | Fetch local or remote resource (like an image) and provide data suitable
|
|
-- for adding it to the MediaBag.
|
|
fetchMediaResource :: PandocMonad m
|
|
=> String -> m (FilePath, Maybe MimeType, BL.ByteString)
|
|
fetchMediaResource src = do
|
|
(bs, mt) <- downloadOrRead src
|
|
let ext = fromMaybe (takeExtension src)
|
|
(mt >>= extensionFromMimeType)
|
|
let bs' = BL.fromChunks [bs]
|
|
let basename = showDigest $ sha1 bs'
|
|
let fname = basename <.> ext
|
|
return (fname, mt, bs')
|
|
|
|
-- | Traverse tree, filling media bag for any images that
|
|
-- aren't already in the media bag.
|
|
fillMediaBag :: PandocMonad m => Pandoc -> m Pandoc
|
|
fillMediaBag d = walkM handleImage d
|
|
where handleImage :: PandocMonad m => Inline -> m Inline
|
|
handleImage (Image attr lab (src, tit)) = catchError
|
|
(do mediabag <- getMediaBag
|
|
case lookupMedia src mediabag of
|
|
Just (_, _) -> return $ Image attr lab (src, tit)
|
|
Nothing -> do
|
|
(fname, mt, bs) <- fetchMediaResource src
|
|
insertMedia fname mt bs
|
|
return $ Image attr lab (fname, tit))
|
|
(\e ->
|
|
case e of
|
|
PandocResourceNotFound _ -> do
|
|
report $ CouldNotFetchResource src
|
|
"replacing image with description"
|
|
-- emit alt text
|
|
return $ Span ("",["image"],[]) lab
|
|
PandocHttpError u er -> do
|
|
report $ CouldNotFetchResource u
|
|
(show er ++ "\rReplacing image with description.")
|
|
-- emit alt text
|
|
return $ Span ("",["image"],[]) lab
|
|
_ -> throwError e)
|
|
handleImage x = return x
|
|
|
|
-- | Extract media from the mediabag into a directory.
|
|
extractMedia :: FilePath -> Pandoc -> PandocIO Pandoc
|
|
extractMedia dir d = do
|
|
media <- getMediaBag
|
|
case [fp | (fp, _, _) <- mediaDirectory media] of
|
|
[] -> return d
|
|
fps -> do
|
|
mapM_ (writeMedia dir media) fps
|
|
return $ walk (adjustImagePath dir fps) d
|
|
|
|
-- Write the contents of a media bag to a path.
|
|
writeMedia :: FilePath -> MediaBag -> FilePath -> PandocIO ()
|
|
writeMedia dir mediabag subpath = do
|
|
-- we join and split to convert a/b/c to a\b\c on Windows;
|
|
-- in zip containers all paths use /
|
|
let fullpath = dir </> normalise subpath
|
|
let mbcontents = lookupMedia subpath mediabag
|
|
case mbcontents of
|
|
Nothing -> throwError $ PandocResourceNotFound subpath
|
|
Just (_, bs) -> do
|
|
report $ Extracting fullpath
|
|
liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
|
|
liftIOError (\p -> BL.writeFile p bs) fullpath
|
|
|
|
adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
|
|
adjustImagePath dir paths (Image attr lab (src, tit))
|
|
| src `elem` paths = Image attr lab (dir ++ "/" ++ src, tit)
|
|
adjustImagePath _ _ x = x
|
|
|
|
-- | The 'PureState' contains ersatz representations
|
|
-- of things that would normally be obtained through IO.
|
|
data PureState = PureState { stStdGen :: StdGen
|
|
, stWord8Store :: [Word8] -- should be
|
|
-- inifinite,
|
|
-- i.e. [1..]
|
|
, stUniqStore :: [Int] -- should be
|
|
-- inifinite and
|
|
-- contain every
|
|
-- element at most
|
|
-- once, e.g. [1..]
|
|
, stEnv :: [(String, String)]
|
|
, stTime :: UTCTime
|
|
, stTimeZone :: TimeZone
|
|
, stReferenceDocx :: Archive
|
|
, stReferenceODT :: Archive
|
|
, stFiles :: FileTree
|
|
, stUserDataFiles :: FileTree
|
|
, stCabalDataFiles :: FileTree
|
|
}
|
|
|
|
instance Default PureState where
|
|
def = PureState { stStdGen = mkStdGen 1848
|
|
, stWord8Store = [1..]
|
|
, stUniqStore = [1..]
|
|
, stEnv = [("USER", "pandoc-user")]
|
|
, stTime = posixSecondsToUTCTime 0
|
|
, stTimeZone = utc
|
|
, stReferenceDocx = emptyArchive
|
|
, stReferenceODT = emptyArchive
|
|
, stFiles = mempty
|
|
, stUserDataFiles = mempty
|
|
, stCabalDataFiles = mempty
|
|
}
|
|
|
|
|
|
getPureState :: PandocPure PureState
|
|
getPureState = PandocPure $ lift $ lift get
|
|
|
|
getsPureState :: (PureState -> a) -> PandocPure a
|
|
getsPureState f = f <$> getPureState
|
|
|
|
putPureState :: PureState -> PandocPure ()
|
|
putPureState ps= PandocPure $ lift $ lift $ put ps
|
|
|
|
modifyPureState :: (PureState -> PureState) -> PandocPure ()
|
|
modifyPureState f = PandocPure $ lift $ lift $ modify f
|
|
|
|
|
|
data FileInfo = FileInfo { infoFileMTime :: UTCTime
|
|
, infoFileContents :: B.ByteString
|
|
}
|
|
|
|
newtype FileTree = FileTree {unFileTree :: M.Map FilePath FileInfo}
|
|
deriving (Monoid)
|
|
|
|
getFileInfo :: FilePath -> FileTree -> Maybe FileInfo
|
|
getFileInfo fp tree = M.lookup fp $ unFileTree tree
|
|
|
|
-- | Add the specified file to the FileTree. If file
|
|
-- is a directory, add its contents recursively.
|
|
addToFileTree :: FileTree -> FilePath -> IO FileTree
|
|
addToFileTree (FileTree treemap) fp = do
|
|
isdir <- doesDirectoryExist fp
|
|
if isdir
|
|
then do -- recursively add contents of directories
|
|
let isSpecial ".." = True
|
|
isSpecial "." = True
|
|
isSpecial _ = False
|
|
fs <- (map (fp </>) . filter (not . isSpecial)) <$> getDirectoryContents fp
|
|
foldM addToFileTree (FileTree treemap) fs
|
|
else do
|
|
contents <- B.readFile fp
|
|
mtime <- IO.getModificationTime fp
|
|
return $ FileTree $
|
|
M.insert fp FileInfo{ infoFileMTime = mtime
|
|
, infoFileContents = contents } treemap
|
|
|
|
newtype PandocPure a = PandocPure {
|
|
unPandocPure :: ExceptT PandocError
|
|
(StateT CommonState (State PureState)) a
|
|
} deriving ( Functor
|
|
, Applicative
|
|
, Monad
|
|
, MonadError PandocError
|
|
)
|
|
|
|
-- Run a 'PandocPure' operation.
|
|
runPure :: PandocPure a -> Either PandocError a
|
|
runPure x = flip evalState def $
|
|
flip evalStateT def $
|
|
runExceptT $
|
|
unPandocPure x
|
|
|
|
instance PandocMonad PandocPure where
|
|
lookupEnv s = do
|
|
env <- getsPureState stEnv
|
|
return (lookup s env)
|
|
|
|
getCurrentTime = getsPureState stTime
|
|
|
|
getCurrentTimeZone = getsPureState stTimeZone
|
|
|
|
newStdGen = do
|
|
g <- getsPureState stStdGen
|
|
let (_, nxtGen) = next g
|
|
modifyPureState $ \st -> st { stStdGen = nxtGen }
|
|
return g
|
|
|
|
newUniqueHash = do
|
|
uniqs <- getsPureState stUniqStore
|
|
case uniqs of
|
|
u : us -> do
|
|
modifyPureState $ \st -> st { stUniqStore = us }
|
|
return u
|
|
_ -> M.fail "uniq store ran out of elements"
|
|
openURL u = throwError $ PandocResourceNotFound u
|
|
readFileLazy fp = do
|
|
fps <- getsPureState stFiles
|
|
case infoFileContents <$> getFileInfo fp fps of
|
|
Just bs -> return (BL.fromStrict bs)
|
|
Nothing -> throwError $ PandocResourceNotFound fp
|
|
readFileStrict fp = do
|
|
fps <- getsPureState stFiles
|
|
case infoFileContents <$> getFileInfo fp fps of
|
|
Just bs -> return bs
|
|
Nothing -> throwError $ PandocResourceNotFound fp
|
|
|
|
glob s = do
|
|
FileTree ftmap <- getsPureState stFiles
|
|
return $ filter (match (compile s)) $ M.keys ftmap
|
|
|
|
fileExists fp = do
|
|
fps <- getsPureState stFiles
|
|
case getFileInfo fp fps of
|
|
Nothing -> return False
|
|
Just _ -> return True
|
|
|
|
getDataFileName fp = return $ "data/" ++ fp
|
|
|
|
getModificationTime fp = do
|
|
fps <- getsPureState stFiles
|
|
case infoFileMTime <$> getFileInfo fp fps of
|
|
Just tm -> return tm
|
|
Nothing -> throwError $ PandocIOError fp
|
|
(userError "Can't get modification time")
|
|
|
|
getCommonState = PandocPure $ lift get
|
|
putCommonState x = PandocPure $ lift $ put x
|
|
|
|
logOutput _msg = return ()
|
|
|
|
-- This requires UndecidableInstances. We could avoid that
|
|
-- by repeating the definitions below for every monad transformer
|
|
-- we use: ReaderT, WriterT, StateT, RWST. But this seems to
|
|
-- be harmless.
|
|
instance (MonadTrans t, PandocMonad m, Functor (t m),
|
|
MonadError PandocError (t m), Monad (t m),
|
|
Applicative (t m)) => PandocMonad (t m) where
|
|
lookupEnv = lift . lookupEnv
|
|
getCurrentTime = lift getCurrentTime
|
|
getCurrentTimeZone = lift getCurrentTimeZone
|
|
newStdGen = lift newStdGen
|
|
newUniqueHash = lift newUniqueHash
|
|
openURL = lift . openURL
|
|
readFileLazy = lift . readFileLazy
|
|
readFileStrict = lift . readFileStrict
|
|
glob = lift . glob
|
|
fileExists = lift . fileExists
|
|
getDataFileName = lift . getDataFileName
|
|
getModificationTime = lift . getModificationTime
|
|
getCommonState = lift getCommonState
|
|
putCommonState = lift . putCommonState
|
|
logOutput = lift . logOutput
|
|
|
|
#if MIN_VERSION_base(4,8,0)
|
|
instance {-# OVERLAPS #-} PandocMonad m => PandocMonad (ParsecT s st m) where
|
|
#else
|
|
instance PandocMonad m => PandocMonad (ParsecT s st m) where
|
|
#endif
|
|
lookupEnv = lift . lookupEnv
|
|
getCurrentTime = lift getCurrentTime
|
|
getCurrentTimeZone = lift getCurrentTimeZone
|
|
newStdGen = lift newStdGen
|
|
newUniqueHash = lift newUniqueHash
|
|
openURL = lift . openURL
|
|
readFileLazy = lift . readFileLazy
|
|
readFileStrict = lift . readFileStrict
|
|
glob = lift . glob
|
|
fileExists = lift . fileExists
|
|
getDataFileName = lift . getDataFileName
|
|
getModificationTime = lift . getModificationTime
|
|
getCommonState = lift getCommonState
|
|
putCommonState = lift . putCommonState
|
|
trace msg = do
|
|
tracing <- getsCommonState stTrace
|
|
when tracing $ do
|
|
pos <- getPosition
|
|
Debug.Trace.trace
|
|
("[trace] Parsed " ++ msg ++ " at line " ++
|
|
show (sourceLine pos) ++
|
|
if sourceName pos == "chunk"
|
|
then " of chunk"
|
|
else "")
|
|
(return ())
|
|
logOutput = lift . logOutput
|
|
|