2016-11-24 09:54:18 -05:00
|
|
|
{-# LANGUAGE DeriveFunctor, DeriveDataTypeable, TypeSynonymInstances,
|
2017-02-24 13:48:07 +01:00
|
|
|
FlexibleInstances, GeneralizedNewtypeDeriving, FlexibleContexts,
|
|
|
|
StandaloneDeriving #-}
|
2016-11-21 09:30:08 -05:00
|
|
|
|
|
|
|
{-
|
|
|
|
Copyright (C) 2016 Jesse Rosenthal <jrosenthal@jhu.edu>
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
-}
|
|
|
|
|
|
|
|
{- |
|
|
|
|
Module : Text.Pandoc.Class
|
|
|
|
Copyright : Copyright (C) 2016 Jesse Rosenthal
|
|
|
|
License : GNU GPL, version 2 or above
|
|
|
|
|
|
|
|
Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
|
|
|
|
Stability : alpha
|
|
|
|
Portability : portable
|
|
|
|
|
|
|
|
Typeclass for pandoc readers and writers, allowing both IO and pure instances.
|
|
|
|
-}
|
|
|
|
|
|
|
|
module Text.Pandoc.Class ( PandocMonad(..)
|
2016-12-01 15:39:54 -05:00
|
|
|
, CommonState(..)
|
2016-11-27 15:31:17 -05:00
|
|
|
, PureState(..)
|
2016-12-03 23:39:01 -05:00
|
|
|
, getPureState
|
|
|
|
, getsPureState
|
|
|
|
, putPureState
|
|
|
|
, modifyPureState
|
2016-11-21 09:30:08 -05:00
|
|
|
, getPOSIXTime
|
2016-11-30 09:46:08 -05:00
|
|
|
, getZonedTime
|
2017-02-07 22:33:05 +01:00
|
|
|
, readFileFromDirs
|
2017-01-22 23:49:05 +01:00
|
|
|
, report
|
2017-01-22 22:10:11 +01:00
|
|
|
, getLog
|
|
|
|
, setVerbosity
|
2016-12-01 15:21:49 -05:00
|
|
|
, getMediaBag
|
|
|
|
, setMediaBag
|
|
|
|
, insertMedia
|
2016-12-11 23:10:46 +01:00
|
|
|
, fetchItem
|
2016-12-01 15:21:49 -05:00
|
|
|
, getInputFiles
|
|
|
|
, getOutputFile
|
2017-02-24 13:48:07 +01:00
|
|
|
, setResourcePath
|
|
|
|
, getResourcePath
|
2016-11-24 09:54:18 -05:00
|
|
|
, PandocIO(..)
|
|
|
|
, PandocPure(..)
|
2017-01-27 11:15:42 +01:00
|
|
|
, FileTree(..)
|
2016-11-30 09:21:21 -05:00
|
|
|
, FileInfo(..)
|
2016-11-24 09:54:18 -05:00
|
|
|
, runIO
|
|
|
|
, runIOorExplode
|
2016-11-24 11:52:06 -05:00
|
|
|
, runPure
|
2016-12-01 10:00:21 -05:00
|
|
|
, withMediaBag
|
2016-11-21 09:30:08 -05:00
|
|
|
) where
|
|
|
|
|
2016-12-10 23:41:37 +01:00
|
|
|
import Prelude hiding (readFile)
|
2016-11-24 11:52:06 -05:00
|
|
|
import System.Random (StdGen, next, mkStdGen)
|
2016-11-21 09:30:08 -05:00
|
|
|
import qualified System.Random as IO (newStdGen)
|
2016-11-24 11:52:06 -05:00
|
|
|
import Codec.Archive.Zip (Archive, fromArchive, emptyArchive)
|
2016-11-21 09:30:08 -05:00
|
|
|
import Data.Unique (hashUnique)
|
|
|
|
import qualified Data.Unique as IO (newUnique)
|
2016-12-11 23:10:46 +01:00
|
|
|
import qualified Text.Pandoc.Shared as IO ( readDataFile
|
|
|
|
, openURL )
|
2017-01-22 23:49:05 +01:00
|
|
|
import qualified Text.Pandoc.UTF8 as UTF8
|
2016-11-21 09:30:08 -05:00
|
|
|
import Text.Pandoc.Compat.Time (UTCTime)
|
2017-02-10 20:59:54 +01:00
|
|
|
import Text.Pandoc.Logging
|
2017-02-10 23:59:47 +01:00
|
|
|
import Text.Parsec (ParsecT)
|
2016-11-21 09:30:08 -05:00
|
|
|
import qualified Text.Pandoc.Compat.Time as IO (getCurrentTime)
|
2016-12-11 23:10:46 +01:00
|
|
|
import Text.Pandoc.MIME (MimeType, getMimeType)
|
2016-11-24 11:52:06 -05:00
|
|
|
import Data.Time.Clock.POSIX ( utcTimeToPOSIXSeconds
|
|
|
|
, posixSecondsToUTCTime
|
|
|
|
, POSIXTime )
|
2016-11-30 09:46:08 -05:00
|
|
|
import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime, utc)
|
2016-12-11 23:10:46 +01:00
|
|
|
import Network.URI ( escapeURIString, nonStrictRelativeTo,
|
|
|
|
unEscapeString, parseURIReference, isAllowedInURI,
|
|
|
|
parseURI, URI(..) )
|
2016-11-30 09:46:08 -05:00
|
|
|
import qualified Data.Time.LocalTime as IO (getCurrentTimeZone)
|
2016-12-11 23:10:46 +01:00
|
|
|
import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
|
2016-11-27 15:29:46 -05:00
|
|
|
import qualified Text.Pandoc.MediaBag as MB
|
2016-11-21 09:30:08 -05:00
|
|
|
import qualified Data.ByteString as B
|
|
|
|
import qualified Data.ByteString.Lazy as BL
|
|
|
|
import qualified System.Environment as IO (lookupEnv)
|
|
|
|
import System.FilePath.Glob (match, compile)
|
2017-02-24 13:48:07 +01:00
|
|
|
import System.FilePath ((</>), takeExtension, dropExtension, isRelative)
|
2016-11-21 09:30:08 -05:00
|
|
|
import qualified System.FilePath.Glob as IO (glob)
|
2016-11-30 09:21:21 -05:00
|
|
|
import qualified System.Directory as IO (getModificationTime)
|
2016-12-10 23:41:37 +01:00
|
|
|
import Control.Monad as M (fail)
|
2016-12-08 22:12:04 +01:00
|
|
|
import Control.Monad.Reader (ReaderT)
|
2016-12-10 23:41:37 +01:00
|
|
|
import Control.Monad.State
|
|
|
|
import Control.Monad.Except
|
2016-12-08 22:12:04 +01:00
|
|
|
import Control.Monad.Writer (WriterT)
|
|
|
|
import Control.Monad.RWS (RWST)
|
2016-11-21 09:30:08 -05:00
|
|
|
import Data.Word (Word8)
|
2016-11-24 09:54:18 -05:00
|
|
|
import Data.Default
|
|
|
|
import System.IO.Error
|
2017-01-22 23:49:05 +01:00
|
|
|
import System.IO (stderr)
|
2016-11-30 09:21:21 -05:00
|
|
|
import qualified Data.Map as M
|
2016-12-01 12:13:51 -05:00
|
|
|
import Text.Pandoc.Error
|
2017-01-22 23:49:05 +01:00
|
|
|
import Text.Printf (printf)
|
2016-11-21 09:30:08 -05:00
|
|
|
|
2016-12-08 22:12:04 +01:00
|
|
|
class (Functor m, Applicative m, Monad m, MonadError PandocError m)
|
|
|
|
=> PandocMonad m where
|
2016-11-21 09:30:08 -05:00
|
|
|
lookupEnv :: String -> m (Maybe String)
|
|
|
|
getCurrentTime :: m UTCTime
|
2016-11-30 09:46:08 -05:00
|
|
|
getCurrentTimeZone :: m TimeZone
|
2016-11-21 09:30:08 -05:00
|
|
|
newStdGen :: m StdGen
|
|
|
|
newUniqueHash :: m Int
|
2016-12-11 23:10:46 +01:00
|
|
|
openURL :: String -> m (B.ByteString, Maybe MimeType)
|
2016-11-21 09:30:08 -05:00
|
|
|
readFileLazy :: FilePath -> m BL.ByteString
|
2016-12-11 23:10:46 +01:00
|
|
|
readFileStrict :: FilePath -> m B.ByteString
|
2017-02-23 16:21:59 +01:00
|
|
|
readDataFile :: Maybe FilePath -> FilePath -> m B.ByteString
|
2016-11-21 09:30:08 -05:00
|
|
|
glob :: String -> m [FilePath]
|
2016-11-30 12:55:30 -05:00
|
|
|
getModificationTime :: FilePath -> m UTCTime
|
2016-12-08 21:32:25 +01:00
|
|
|
getCommonState :: m CommonState
|
|
|
|
putCommonState :: CommonState -> m ()
|
2016-11-30 12:55:30 -05:00
|
|
|
|
2016-12-08 21:32:25 +01:00
|
|
|
getsCommonState :: (CommonState -> a) -> m a
|
|
|
|
getsCommonState f = f <$> getCommonState
|
2016-12-05 11:30:55 +01:00
|
|
|
|
2016-12-08 21:32:25 +01:00
|
|
|
modifyCommonState :: (CommonState -> CommonState) -> m ()
|
|
|
|
modifyCommonState f = getCommonState >>= putCommonState . f
|
2016-11-21 09:30:08 -05:00
|
|
|
|
2017-02-10 23:59:47 +01:00
|
|
|
logOutput :: LogMessage -> m ()
|
2017-01-22 22:10:11 +01:00
|
|
|
|
2016-12-02 09:11:24 -05:00
|
|
|
-- Functions defined for all PandocMonad instances
|
2016-11-21 09:30:08 -05:00
|
|
|
|
2017-01-22 22:10:11 +01:00
|
|
|
setVerbosity :: PandocMonad m => Verbosity -> m ()
|
|
|
|
setVerbosity verbosity =
|
|
|
|
modifyCommonState $ \st -> st{ stVerbosity = verbosity }
|
|
|
|
|
2017-02-10 23:59:47 +01:00
|
|
|
getLog :: PandocMonad m => m [LogMessage]
|
2017-01-22 23:49:05 +01:00
|
|
|
getLog = reverse <$> getsCommonState stLog
|
2017-01-22 22:10:11 +01:00
|
|
|
|
2017-02-10 23:59:47 +01:00
|
|
|
report :: PandocMonad m => LogMessage -> m ()
|
|
|
|
report msg = do
|
2017-01-22 23:49:05 +01:00
|
|
|
verbosity <- getsCommonState stVerbosity
|
2017-02-10 23:59:47 +01:00
|
|
|
let level = messageVerbosity msg
|
2017-01-22 23:49:05 +01:00
|
|
|
when (level <= verbosity) $ do
|
2017-02-10 23:59:47 +01:00
|
|
|
logOutput msg
|
2017-02-11 09:06:49 +01:00
|
|
|
unless (level == DEBUG) $
|
|
|
|
modifyCommonState $ \st -> st{ stLog = msg : stLog st }
|
2016-12-01 15:21:49 -05:00
|
|
|
|
|
|
|
setMediaBag :: PandocMonad m => MediaBag -> m ()
|
Reverted deferred media bag code.
This was not actually being used. Since it adds considerable
complexity, it's best not to include it unless we are
actually going to use it.
The original thought was that we could do all loading in the
readers, always deferred and thus costless. This was supposed
to eliminate the need to traverse trees loading resources in
the docx, epub, odt writers and in PDF and SelfContained.
(It would also have the side effect that --extract-media could
be used with all input formats. This wasn't an intended side
effect, and it could be debated whether it's desirable, since
--extract-media was originally designed to extract the media
contained in a docx or odt or epub container.)
However, we never actually took the step of moving all of this
work to the readers, for a couple of reasons. The main reason
is that we'd still need to fetch resources in the docx,
epub, odt, pdf and self-contained writers, since the Pandoc AST might
have been built programatically and hence not generated by a reader.
So it's not clear that doing lazy loading in the readers would have
any real advantage.
I'm still not completely sure about this --- if we change our
minds it would be easy to undo this commit.
@jkr comments welcome.
2017-02-09 21:26:24 +01:00
|
|
|
setMediaBag mb = modifyCommonState $ \st -> st{stMediaBag = mb}
|
2016-12-01 15:21:49 -05:00
|
|
|
|
|
|
|
getMediaBag :: PandocMonad m => m MediaBag
|
Reverted deferred media bag code.
This was not actually being used. Since it adds considerable
complexity, it's best not to include it unless we are
actually going to use it.
The original thought was that we could do all loading in the
readers, always deferred and thus costless. This was supposed
to eliminate the need to traverse trees loading resources in
the docx, epub, odt writers and in PDF and SelfContained.
(It would also have the side effect that --extract-media could
be used with all input formats. This wasn't an intended side
effect, and it could be debated whether it's desirable, since
--extract-media was originally designed to extract the media
contained in a docx or odt or epub container.)
However, we never actually took the step of moving all of this
work to the readers, for a couple of reasons. The main reason
is that we'd still need to fetch resources in the docx,
epub, odt, pdf and self-contained writers, since the Pandoc AST might
have been built programatically and hence not generated by a reader.
So it's not clear that doing lazy loading in the readers would have
any real advantage.
I'm still not completely sure about this --- if we change our
minds it would be easy to undo this commit.
@jkr comments welcome.
2017-02-09 21:26:24 +01:00
|
|
|
getMediaBag = getsCommonState stMediaBag
|
2016-12-01 15:21:49 -05:00
|
|
|
|
|
|
|
insertMedia :: PandocMonad m => FilePath -> Maybe MimeType -> BL.ByteString -> m ()
|
2016-12-13 20:22:50 -05:00
|
|
|
insertMedia fp mime bs = do
|
Reverted deferred media bag code.
This was not actually being used. Since it adds considerable
complexity, it's best not to include it unless we are
actually going to use it.
The original thought was that we could do all loading in the
readers, always deferred and thus costless. This was supposed
to eliminate the need to traverse trees loading resources in
the docx, epub, odt writers and in PDF and SelfContained.
(It would also have the side effect that --extract-media could
be used with all input formats. This wasn't an intended side
effect, and it could be debated whether it's desirable, since
--extract-media was originally designed to extract the media
contained in a docx or odt or epub container.)
However, we never actually took the step of moving all of this
work to the readers, for a couple of reasons. The main reason
is that we'd still need to fetch resources in the docx,
epub, odt, pdf and self-contained writers, since the Pandoc AST might
have been built programatically and hence not generated by a reader.
So it's not clear that doing lazy loading in the readers would have
any real advantage.
I'm still not completely sure about this --- if we change our
minds it would be easy to undo this commit.
@jkr comments welcome.
2017-02-09 21:26:24 +01:00
|
|
|
mb <- getsCommonState stMediaBag
|
2016-12-13 20:22:50 -05:00
|
|
|
let mb' = MB.insertMedia fp mime bs mb
|
Reverted deferred media bag code.
This was not actually being used. Since it adds considerable
complexity, it's best not to include it unless we are
actually going to use it.
The original thought was that we could do all loading in the
readers, always deferred and thus costless. This was supposed
to eliminate the need to traverse trees loading resources in
the docx, epub, odt writers and in PDF and SelfContained.
(It would also have the side effect that --extract-media could
be used with all input formats. This wasn't an intended side
effect, and it could be debated whether it's desirable, since
--extract-media was originally designed to extract the media
contained in a docx or odt or epub container.)
However, we never actually took the step of moving all of this
work to the readers, for a couple of reasons. The main reason
is that we'd still need to fetch resources in the docx,
epub, odt, pdf and self-contained writers, since the Pandoc AST might
have been built programatically and hence not generated by a reader.
So it's not clear that doing lazy loading in the readers would have
any real advantage.
I'm still not completely sure about this --- if we change our
minds it would be easy to undo this commit.
@jkr comments welcome.
2017-02-09 21:26:24 +01:00
|
|
|
modifyCommonState $ \st -> st{stMediaBag = mb' }
|
2016-12-13 21:02:57 -05:00
|
|
|
|
2016-12-01 15:21:49 -05:00
|
|
|
getInputFiles :: PandocMonad m => m (Maybe [FilePath])
|
2016-12-08 21:32:25 +01:00
|
|
|
getInputFiles = getsCommonState stInputFiles
|
2016-12-01 15:21:49 -05:00
|
|
|
|
|
|
|
getOutputFile :: PandocMonad m => m (Maybe FilePath)
|
2016-12-08 21:32:25 +01:00
|
|
|
getOutputFile = getsCommonState stOutputFile
|
2016-12-01 15:21:49 -05:00
|
|
|
|
2017-02-24 13:48:07 +01:00
|
|
|
setResourcePath :: PandocMonad m => [FilePath] -> m ()
|
|
|
|
setResourcePath ps = modifyCommonState $ \st -> st{stResourcePath = ps}
|
|
|
|
|
|
|
|
getResourcePath :: PandocMonad m => m [FilePath]
|
|
|
|
getResourcePath = getsCommonState stResourcePath
|
|
|
|
|
2016-11-21 09:30:08 -05:00
|
|
|
getPOSIXTime :: (PandocMonad m) => m POSIXTime
|
|
|
|
getPOSIXTime = utcTimeToPOSIXSeconds <$> getCurrentTime
|
|
|
|
|
2016-11-30 09:46:08 -05:00
|
|
|
getZonedTime :: (PandocMonad m) => m ZonedTime
|
|
|
|
getZonedTime = do
|
|
|
|
t <- getCurrentTime
|
|
|
|
tz <- getCurrentTimeZone
|
|
|
|
return $ utcToZonedTime tz t
|
|
|
|
|
2017-02-07 21:42:35 +01:00
|
|
|
-- | Read file, checking in any number of directories.
|
2017-02-10 23:59:47 +01:00
|
|
|
readFileFromDirs :: PandocMonad m => [FilePath] -> FilePath -> m (Maybe String)
|
|
|
|
readFileFromDirs [] _ = return Nothing
|
2017-02-07 21:42:35 +01:00
|
|
|
readFileFromDirs (d:ds) f = catchError
|
2017-02-10 23:59:47 +01:00
|
|
|
((Just . UTF8.toStringLazy) <$> readFileLazy (d </> f))
|
2017-02-07 21:42:35 +01:00
|
|
|
(\_ -> readFileFromDirs ds f)
|
|
|
|
|
2016-12-02 09:11:24 -05:00
|
|
|
--
|
|
|
|
|
2017-02-24 13:48:07 +01:00
|
|
|
data CommonState = CommonState { stLog :: [LogMessage]
|
|
|
|
, stMediaBag :: MediaBag
|
|
|
|
, stInputFiles :: Maybe [FilePath]
|
|
|
|
, stOutputFile :: Maybe FilePath
|
|
|
|
, stResourcePath :: [FilePath]
|
|
|
|
, stVerbosity :: Verbosity
|
2016-11-30 12:55:30 -05:00
|
|
|
}
|
2016-12-01 15:21:49 -05:00
|
|
|
|
|
|
|
instance Default CommonState where
|
2017-01-22 22:10:11 +01:00
|
|
|
def = CommonState { stLog = []
|
Reverted deferred media bag code.
This was not actually being used. Since it adds considerable
complexity, it's best not to include it unless we are
actually going to use it.
The original thought was that we could do all loading in the
readers, always deferred and thus costless. This was supposed
to eliminate the need to traverse trees loading resources in
the docx, epub, odt writers and in PDF and SelfContained.
(It would also have the side effect that --extract-media could
be used with all input formats. This wasn't an intended side
effect, and it could be debated whether it's desirable, since
--extract-media was originally designed to extract the media
contained in a docx or odt or epub container.)
However, we never actually took the step of moving all of this
work to the readers, for a couple of reasons. The main reason
is that we'd still need to fetch resources in the docx,
epub, odt, pdf and self-contained writers, since the Pandoc AST might
have been built programatically and hence not generated by a reader.
So it's not clear that doing lazy loading in the readers would have
any real advantage.
I'm still not completely sure about this --- if we change our
minds it would be easy to undo this commit.
@jkr comments welcome.
2017-02-09 21:26:24 +01:00
|
|
|
, stMediaBag = mempty
|
2016-12-01 15:21:49 -05:00
|
|
|
, stInputFiles = Nothing
|
|
|
|
, stOutputFile = Nothing
|
2017-02-24 13:48:07 +01:00
|
|
|
, stResourcePath = ["."]
|
2017-01-22 22:10:11 +01:00
|
|
|
, stVerbosity = WARNING
|
2016-11-30 12:55:30 -05:00
|
|
|
}
|
|
|
|
|
2016-12-01 12:13:51 -05:00
|
|
|
runIO :: PandocIO a -> IO (Either PandocError a)
|
2016-12-01 15:21:49 -05:00
|
|
|
runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma
|
2016-11-24 09:54:18 -05:00
|
|
|
|
2016-12-01 10:00:21 -05:00
|
|
|
withMediaBag :: PandocMonad m => m a -> m (a, MediaBag)
|
|
|
|
withMediaBag ma = ((,)) <$> ma <*> getMediaBag
|
|
|
|
|
2016-11-24 09:54:18 -05:00
|
|
|
runIOorExplode :: PandocIO a -> IO a
|
2016-12-03 22:35:58 +01:00
|
|
|
runIOorExplode ma = runIO ma >>= handleError
|
2016-12-01 10:00:21 -05:00
|
|
|
|
2016-11-24 09:54:18 -05:00
|
|
|
newtype PandocIO a = PandocIO {
|
2016-12-01 15:21:49 -05:00
|
|
|
unPandocIO :: ExceptT PandocError (StateT CommonState IO) a
|
2016-11-27 15:32:28 -05:00
|
|
|
} deriving ( MonadIO
|
|
|
|
, Functor
|
|
|
|
, Applicative
|
|
|
|
, Monad
|
2016-12-01 12:13:51 -05:00
|
|
|
, MonadError PandocError
|
2016-11-27 15:32:28 -05:00
|
|
|
)
|
2016-11-24 09:54:18 -05:00
|
|
|
|
2017-02-23 16:21:59 +01:00
|
|
|
liftIOError :: (String -> IO a) -> String -> PandocIO a
|
|
|
|
liftIOError f u = do
|
|
|
|
res <- liftIO $ tryIOError $ f u
|
|
|
|
case res of
|
|
|
|
Left e -> throwError $ PandocIOError u e
|
|
|
|
Right r -> return r
|
|
|
|
|
2016-11-27 15:45:08 +01:00
|
|
|
instance PandocMonad PandocIO where
|
2016-11-24 09:54:18 -05:00
|
|
|
lookupEnv = liftIO . IO.lookupEnv
|
|
|
|
getCurrentTime = liftIO IO.getCurrentTime
|
2016-11-30 09:46:08 -05:00
|
|
|
getCurrentTimeZone = liftIO IO.getCurrentTimeZone
|
2016-11-24 09:54:18 -05:00
|
|
|
newStdGen = liftIO IO.newStdGen
|
|
|
|
newUniqueHash = hashUnique <$> (liftIO IO.newUnique)
|
2017-02-23 16:24:20 +01:00
|
|
|
openURL u = do
|
|
|
|
report $ Fetching u
|
|
|
|
liftIOError IO.openURL u
|
2017-02-23 16:21:59 +01:00
|
|
|
readFileLazy s = liftIOError BL.readFile s
|
|
|
|
readFileStrict s = liftIOError B.readFile s
|
|
|
|
readDataFile mfp fname = liftIOError (IO.readDataFile mfp) fname
|
2016-11-24 09:54:18 -05:00
|
|
|
glob = liftIO . IO.glob
|
2017-02-23 16:21:59 +01:00
|
|
|
getModificationTime fp = liftIOError IO.getModificationTime fp
|
2016-12-08 21:32:25 +01:00
|
|
|
getCommonState = PandocIO $ lift get
|
|
|
|
putCommonState x = PandocIO $ lift $ put x
|
2017-03-04 11:26:49 +01:00
|
|
|
logOutput msg = liftIO $ do
|
|
|
|
UTF8.hPutStr stderr $ printf "%-7s " (show (messageVerbosity msg))
|
|
|
|
hangingIndent 2 $ lines $ showLogMessage msg
|
|
|
|
|
|
|
|
hangingIndent :: Int -> [String] -> IO ()
|
|
|
|
hangingIndent _level [] = return ()
|
|
|
|
hangingIndent level (l:ls) = do
|
|
|
|
UTF8.hPutStrLn stderr l
|
|
|
|
mapM_ go ls
|
|
|
|
where go l' = do UTF8.hPutStr stderr ind
|
|
|
|
UTF8.hPutStrLn stderr l'
|
|
|
|
ind = replicate level ' '
|
2016-12-11 23:10:46 +01:00
|
|
|
|
|
|
|
-- | Specialized version of parseURIReference that disallows
|
|
|
|
-- single-letter schemes. Reason: these are usually windows absolute
|
|
|
|
-- paths.
|
|
|
|
parseURIReference' :: String -> Maybe URI
|
|
|
|
parseURIReference' s =
|
|
|
|
case parseURIReference s of
|
|
|
|
Just u
|
|
|
|
| length (uriScheme u) > 2 -> Just u
|
|
|
|
| null (uriScheme u) -> Just u -- protocol-relative
|
|
|
|
_ -> Nothing
|
|
|
|
|
|
|
|
-- | Fetch an image or other item from the local filesystem or the net.
|
|
|
|
-- Returns raw content and maybe mime type.
|
|
|
|
fetchItem :: PandocMonad m
|
|
|
|
=> Maybe String
|
|
|
|
-> String
|
|
|
|
-> m (B.ByteString, Maybe MimeType)
|
|
|
|
fetchItem sourceURL s = do
|
2016-12-13 21:44:02 -05:00
|
|
|
mediabag <- getMediaBag
|
2016-12-11 23:10:46 +01:00
|
|
|
case lookupMedia s mediabag of
|
2016-12-13 21:44:02 -05:00
|
|
|
Just (mime, bs) -> return $ (BL.toStrict bs, Just mime)
|
|
|
|
Nothing -> downloadOrRead sourceURL s
|
|
|
|
|
|
|
|
downloadOrRead :: PandocMonad m
|
|
|
|
=> Maybe String
|
|
|
|
-> String
|
|
|
|
-> m (B.ByteString, Maybe MimeType)
|
|
|
|
downloadOrRead sourceURL s = do
|
|
|
|
case (sourceURL >>= parseURIReference' .
|
|
|
|
ensureEscaped, ensureEscaped s) of
|
|
|
|
(Just u, s') -> -- try fetching from relative path at source
|
|
|
|
case parseURIReference' s' of
|
|
|
|
Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u
|
|
|
|
Nothing -> openURL s' -- will throw error
|
|
|
|
(Nothing, s'@('/':'/':_)) -> -- protocol-relative URI
|
|
|
|
case parseURIReference' s' of
|
|
|
|
Just u' -> openURL $ show $ u' `nonStrictRelativeTo` httpcolon
|
|
|
|
Nothing -> openURL s' -- will throw error
|
|
|
|
(Nothing, s') ->
|
|
|
|
case parseURI s' of -- requires absolute URI
|
|
|
|
-- We don't want to treat C:/ as a scheme:
|
|
|
|
Just u' | length (uriScheme u') > 2 -> openURL (show u')
|
|
|
|
Just u' | uriScheme u' == "file:" ->
|
|
|
|
readLocalFile $ dropWhile (=='/') (uriPath u')
|
|
|
|
_ -> readLocalFile fp -- get from local file system
|
|
|
|
where readLocalFile f = do
|
2017-02-24 13:48:07 +01:00
|
|
|
resourcePath <- getResourcePath
|
|
|
|
cont <- if isRelative f
|
|
|
|
then withPaths resourcePath readFileStrict f
|
|
|
|
else readFileStrict f
|
2017-02-23 16:21:59 +01:00
|
|
|
return (cont, mime)
|
2016-12-13 21:44:02 -05:00
|
|
|
httpcolon = URI{ uriScheme = "http:",
|
|
|
|
uriAuthority = Nothing,
|
|
|
|
uriPath = "",
|
|
|
|
uriQuery = "",
|
|
|
|
uriFragment = "" }
|
|
|
|
dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#')
|
|
|
|
fp = unEscapeString $ dropFragmentAndQuery s
|
|
|
|
mime = case takeExtension fp of
|
|
|
|
".gz" -> getMimeType $ dropExtension fp
|
|
|
|
".svgz" -> getMimeType $ dropExtension fp ++ ".svg"
|
|
|
|
x -> getMimeType x
|
|
|
|
ensureEscaped = escapeURIString isAllowedInURI . map convertSlash
|
|
|
|
convertSlash '\\' = '/'
|
|
|
|
convertSlash x = x
|
2016-12-11 23:10:46 +01:00
|
|
|
|
2017-02-24 13:48:07 +01:00
|
|
|
withPaths :: PandocMonad m => [FilePath] -> (FilePath -> m a) -> FilePath -> m a
|
|
|
|
withPaths [] _ fp = throwError $ PandocIOError fp
|
|
|
|
(userError "file not found in resource path")
|
|
|
|
withPaths (p:ps) action fp =
|
2017-02-24 14:29:56 +01:00
|
|
|
catchError (action (p </> fp))
|
2017-02-24 13:48:07 +01:00
|
|
|
(\_ -> withPaths ps action fp)
|
|
|
|
|
2016-11-27 15:31:17 -05:00
|
|
|
data PureState = PureState { stStdGen :: StdGen
|
2016-11-21 09:30:08 -05:00
|
|
|
, stWord8Store :: [Word8] -- should be
|
|
|
|
-- inifinite,
|
|
|
|
-- i.e. [1..]
|
|
|
|
, stUniqStore :: [Int] -- should be
|
|
|
|
-- inifinite and
|
|
|
|
-- contain every
|
|
|
|
-- element at most
|
|
|
|
-- once, e.g. [1..]
|
2016-12-03 23:25:36 -05:00
|
|
|
, stEnv :: [(String, String)]
|
|
|
|
, stTime :: UTCTime
|
|
|
|
, stTimeZone :: TimeZone
|
|
|
|
, stReferenceDocx :: Archive
|
|
|
|
, stReferenceODT :: Archive
|
|
|
|
, stFiles :: FileTree
|
|
|
|
, stUserDataDir :: FileTree
|
|
|
|
, stCabalDataDir :: FileTree
|
2016-12-08 21:16:57 +01:00
|
|
|
, stFontFiles :: [FilePath]
|
2016-11-21 09:30:08 -05:00
|
|
|
}
|
|
|
|
|
2016-11-27 15:31:17 -05:00
|
|
|
instance Default PureState where
|
|
|
|
def = PureState { stStdGen = mkStdGen 1848
|
2016-11-24 11:52:06 -05:00
|
|
|
, stWord8Store = [1..]
|
|
|
|
, stUniqStore = [1..]
|
2016-12-03 23:25:36 -05:00
|
|
|
, stEnv = [("USER", "pandoc-user")]
|
|
|
|
, stTime = posixSecondsToUTCTime 0
|
|
|
|
, stTimeZone = utc
|
|
|
|
, stReferenceDocx = emptyArchive
|
|
|
|
, stReferenceODT = emptyArchive
|
|
|
|
, stFiles = mempty
|
|
|
|
, stUserDataDir = mempty
|
|
|
|
, stCabalDataDir = mempty
|
|
|
|
, stFontFiles = []
|
2016-11-24 11:52:06 -05:00
|
|
|
}
|
2016-12-03 23:39:01 -05:00
|
|
|
|
|
|
|
|
|
|
|
getPureState :: PandocPure PureState
|
|
|
|
getPureState = PandocPure $ lift $ lift $ get
|
|
|
|
|
|
|
|
getsPureState :: (PureState -> a) -> PandocPure a
|
|
|
|
getsPureState f = f <$> getPureState
|
|
|
|
|
|
|
|
putPureState :: PureState -> PandocPure ()
|
|
|
|
putPureState ps= PandocPure $ lift $ lift $ put ps
|
|
|
|
|
|
|
|
modifyPureState :: (PureState -> PureState) -> PandocPure ()
|
|
|
|
modifyPureState f = PandocPure $ lift $ lift $ modify f
|
2016-12-08 21:16:57 +01:00
|
|
|
|
|
|
|
|
2016-11-30 09:21:21 -05:00
|
|
|
data FileInfo = FileInfo { infoFileMTime :: UTCTime
|
|
|
|
, infoFileContents :: B.ByteString
|
|
|
|
}
|
|
|
|
|
|
|
|
newtype FileTree = FileTree {unFileTree :: M.Map FilePath FileInfo}
|
|
|
|
deriving (Monoid)
|
|
|
|
|
|
|
|
getFileInfo :: FilePath -> FileTree -> Maybe FileInfo
|
|
|
|
getFileInfo fp tree = M.lookup fp $ unFileTree tree
|
2016-11-24 11:52:06 -05:00
|
|
|
|
|
|
|
|
2016-11-24 09:54:18 -05:00
|
|
|
newtype PandocPure a = PandocPure {
|
2016-12-01 12:13:51 -05:00
|
|
|
unPandocPure :: ExceptT PandocError
|
2016-12-01 18:35:05 -05:00
|
|
|
(StateT CommonState (State PureState)) a
|
2016-11-27 15:32:28 -05:00
|
|
|
} deriving ( Functor
|
|
|
|
, Applicative
|
|
|
|
, Monad
|
2016-12-01 12:13:51 -05:00
|
|
|
, MonadError PandocError
|
2016-11-27 15:32:28 -05:00
|
|
|
)
|
2016-11-21 09:30:08 -05:00
|
|
|
|
2016-12-01 12:13:51 -05:00
|
|
|
runPure :: PandocPure a -> Either PandocError a
|
2016-12-01 15:21:49 -05:00
|
|
|
runPure x = flip evalState def $
|
|
|
|
flip evalStateT def $
|
|
|
|
runExceptT $
|
|
|
|
unPandocPure x
|
2016-11-24 11:52:06 -05:00
|
|
|
|
2016-11-24 09:54:18 -05:00
|
|
|
instance PandocMonad PandocPure where
|
2016-12-03 23:39:01 -05:00
|
|
|
lookupEnv s = do
|
|
|
|
env <- getsPureState stEnv
|
2016-11-21 09:30:08 -05:00
|
|
|
return (lookup s env)
|
|
|
|
|
2016-12-03 23:39:01 -05:00
|
|
|
getCurrentTime = getsPureState stTime
|
2016-11-21 09:30:08 -05:00
|
|
|
|
2016-12-03 23:39:01 -05:00
|
|
|
getCurrentTimeZone = getsPureState stTimeZone
|
2016-11-30 09:46:08 -05:00
|
|
|
|
2016-12-03 23:39:01 -05:00
|
|
|
newStdGen = do
|
|
|
|
g <- getsPureState stStdGen
|
2016-11-21 09:30:08 -05:00
|
|
|
let (_, nxtGen) = next g
|
2016-12-03 23:39:01 -05:00
|
|
|
modifyPureState $ \st -> st { stStdGen = nxtGen }
|
2016-11-21 09:30:08 -05:00
|
|
|
return g
|
|
|
|
|
2016-12-03 23:39:01 -05:00
|
|
|
newUniqueHash = do
|
|
|
|
uniqs <- getsPureState stUniqStore
|
2016-11-21 09:30:08 -05:00
|
|
|
case uniqs of
|
|
|
|
u : us -> do
|
2016-12-03 23:39:01 -05:00
|
|
|
modifyPureState $ \st -> st { stUniqStore = us }
|
2016-11-21 09:30:08 -05:00
|
|
|
return u
|
|
|
|
_ -> M.fail "uniq store ran out of elements"
|
2017-02-23 16:21:59 +01:00
|
|
|
openURL u = throwError $ PandocIOError u $
|
|
|
|
userError "Cannot open URL in PandocPure"
|
2016-12-03 23:39:01 -05:00
|
|
|
readFileLazy fp = do
|
|
|
|
fps <- getsPureState stFiles
|
2016-11-30 09:21:21 -05:00
|
|
|
case infoFileContents <$> getFileInfo fp fps of
|
2016-11-21 09:30:08 -05:00
|
|
|
Just bs -> return (BL.fromStrict bs)
|
2017-02-23 16:21:59 +01:00
|
|
|
Nothing -> throwError $ PandocIOError fp
|
|
|
|
(userError "File not found in PureState")
|
2016-12-11 23:10:46 +01:00
|
|
|
readFileStrict fp = do
|
|
|
|
fps <- getsPureState stFiles
|
|
|
|
case infoFileContents <$> getFileInfo fp fps of
|
|
|
|
Just bs -> return bs
|
2017-02-23 16:21:59 +01:00
|
|
|
Nothing -> throwError $ PandocIOError fp
|
|
|
|
(userError "File not found in PureState")
|
2016-11-21 09:30:08 -05:00
|
|
|
readDataFile Nothing "reference.docx" = do
|
2016-12-11 16:21:08 +01:00
|
|
|
(B.concat . BL.toChunks . fromArchive) <$> getsPureState stReferenceDocx
|
2016-11-21 09:30:08 -05:00
|
|
|
readDataFile Nothing "reference.odt" = do
|
2016-12-11 16:21:08 +01:00
|
|
|
(B.concat . BL.toChunks . fromArchive) <$> getsPureState stReferenceODT
|
2016-11-21 09:30:08 -05:00
|
|
|
readDataFile Nothing fname = do
|
|
|
|
let fname' = if fname == "MANUAL.txt" then fname else "data" </> fname
|
2016-12-11 23:10:46 +01:00
|
|
|
readFileStrict fname'
|
2016-12-03 23:39:01 -05:00
|
|
|
readDataFile (Just userDir) fname = do
|
|
|
|
userDirFiles <- getsPureState stUserDataDir
|
2016-11-30 09:21:21 -05:00
|
|
|
case infoFileContents <$> (getFileInfo (userDir </> fname) userDirFiles) of
|
2016-11-21 09:30:08 -05:00
|
|
|
Just bs -> return bs
|
2016-12-03 23:39:01 -05:00
|
|
|
Nothing -> readDataFile Nothing fname
|
2016-11-21 09:30:08 -05:00
|
|
|
|
2016-12-03 23:39:01 -05:00
|
|
|
glob s = do
|
|
|
|
fontFiles <- getsPureState stFontFiles
|
2016-11-21 09:30:08 -05:00
|
|
|
return (filter (match (compile s)) fontFiles)
|
2016-11-27 15:29:46 -05:00
|
|
|
|
2016-12-03 23:39:01 -05:00
|
|
|
getModificationTime fp = do
|
|
|
|
fps <- getsPureState stFiles
|
2016-11-30 12:55:30 -05:00
|
|
|
case infoFileMTime <$> (getFileInfo fp fps) of
|
|
|
|
Just tm -> return tm
|
2017-02-23 16:21:59 +01:00
|
|
|
Nothing -> throwError $ PandocIOError fp
|
|
|
|
(userError "Can't get modification time")
|
2016-11-30 12:55:30 -05:00
|
|
|
|
2016-12-08 21:32:25 +01:00
|
|
|
getCommonState = PandocPure $ lift $ get
|
|
|
|
putCommonState x = PandocPure $ lift $ put x
|
|
|
|
|
2017-02-10 23:59:47 +01:00
|
|
|
logOutput _msg = return ()
|
2017-01-22 23:49:05 +01:00
|
|
|
|
2017-02-07 22:33:05 +01:00
|
|
|
instance PandocMonad m => PandocMonad (ParsecT s st m) where
|
2016-12-05 11:30:55 +01:00
|
|
|
lookupEnv = lift . lookupEnv
|
2016-12-05 11:36:23 +01:00
|
|
|
getCurrentTime = lift getCurrentTime
|
|
|
|
getCurrentTimeZone = lift getCurrentTimeZone
|
|
|
|
newStdGen = lift newStdGen
|
|
|
|
newUniqueHash = lift newUniqueHash
|
2016-12-11 23:10:46 +01:00
|
|
|
openURL = lift . openURL
|
2016-12-05 11:30:55 +01:00
|
|
|
readFileLazy = lift . readFileLazy
|
2016-12-11 23:10:46 +01:00
|
|
|
readFileStrict = lift . readFileStrict
|
2016-12-05 11:30:55 +01:00
|
|
|
readDataFile mbuserdir = lift . readDataFile mbuserdir
|
|
|
|
glob = lift . glob
|
|
|
|
getModificationTime = lift . getModificationTime
|
2016-12-08 21:32:25 +01:00
|
|
|
getCommonState = lift getCommonState
|
|
|
|
putCommonState = lift . putCommonState
|
2017-02-10 23:59:47 +01:00
|
|
|
logOutput = lift . logOutput
|
2016-12-08 21:32:25 +01:00
|
|
|
|
2016-12-08 22:12:04 +01:00
|
|
|
instance PandocMonad m => PandocMonad (ReaderT r m) where
|
|
|
|
lookupEnv = lift . lookupEnv
|
|
|
|
getCurrentTime = lift getCurrentTime
|
|
|
|
getCurrentTimeZone = lift getCurrentTimeZone
|
|
|
|
newStdGen = lift newStdGen
|
|
|
|
newUniqueHash = lift newUniqueHash
|
2016-12-11 23:10:46 +01:00
|
|
|
openURL = lift . openURL
|
2016-12-08 22:12:04 +01:00
|
|
|
readFileLazy = lift . readFileLazy
|
2016-12-11 23:10:46 +01:00
|
|
|
readFileStrict = lift . readFileStrict
|
2016-12-08 22:12:04 +01:00
|
|
|
readDataFile mbuserdir = lift . readDataFile mbuserdir
|
|
|
|
glob = lift . glob
|
|
|
|
getModificationTime = lift . getModificationTime
|
|
|
|
getCommonState = lift getCommonState
|
|
|
|
putCommonState = lift . putCommonState
|
2017-02-10 23:59:47 +01:00
|
|
|
logOutput = lift . logOutput
|
2016-12-08 22:12:04 +01:00
|
|
|
|
|
|
|
instance (PandocMonad m, Monoid w) => PandocMonad (WriterT w m) where
|
|
|
|
lookupEnv = lift . lookupEnv
|
|
|
|
getCurrentTime = lift getCurrentTime
|
|
|
|
getCurrentTimeZone = lift getCurrentTimeZone
|
|
|
|
newStdGen = lift newStdGen
|
|
|
|
newUniqueHash = lift newUniqueHash
|
2016-12-11 23:10:46 +01:00
|
|
|
openURL = lift . openURL
|
2016-12-08 22:12:04 +01:00
|
|
|
readFileLazy = lift . readFileLazy
|
2016-12-11 23:10:46 +01:00
|
|
|
readFileStrict = lift . readFileStrict
|
2016-12-08 22:12:04 +01:00
|
|
|
readDataFile mbuserdir = lift . readDataFile mbuserdir
|
|
|
|
glob = lift . glob
|
|
|
|
getModificationTime = lift . getModificationTime
|
|
|
|
getCommonState = lift getCommonState
|
|
|
|
putCommonState = lift . putCommonState
|
2017-02-10 23:59:47 +01:00
|
|
|
logOutput = lift . logOutput
|
2016-12-08 22:12:04 +01:00
|
|
|
|
|
|
|
instance (PandocMonad m, Monoid w) => PandocMonad (RWST r w st m) where
|
|
|
|
lookupEnv = lift . lookupEnv
|
|
|
|
getCurrentTime = lift getCurrentTime
|
|
|
|
getCurrentTimeZone = lift getCurrentTimeZone
|
|
|
|
newStdGen = lift newStdGen
|
|
|
|
newUniqueHash = lift newUniqueHash
|
2016-12-11 23:10:46 +01:00
|
|
|
openURL = lift . openURL
|
2016-12-08 22:12:04 +01:00
|
|
|
readFileLazy = lift . readFileLazy
|
2016-12-11 23:10:46 +01:00
|
|
|
readFileStrict = lift . readFileStrict
|
2016-12-08 22:12:04 +01:00
|
|
|
readDataFile mbuserdir = lift . readDataFile mbuserdir
|
|
|
|
glob = lift . glob
|
|
|
|
getModificationTime = lift . getModificationTime
|
|
|
|
getCommonState = lift getCommonState
|
|
|
|
putCommonState = lift . putCommonState
|
2017-02-10 23:59:47 +01:00
|
|
|
logOutput = lift . logOutput
|
2016-12-08 22:12:04 +01:00
|
|
|
|
|
|
|
instance PandocMonad m => PandocMonad (StateT st m) where
|
|
|
|
lookupEnv = lift . lookupEnv
|
|
|
|
getCurrentTime = lift getCurrentTime
|
|
|
|
getCurrentTimeZone = lift getCurrentTimeZone
|
|
|
|
newStdGen = lift newStdGen
|
|
|
|
newUniqueHash = lift newUniqueHash
|
2016-12-11 23:10:46 +01:00
|
|
|
openURL = lift . openURL
|
2016-12-08 22:12:04 +01:00
|
|
|
readFileLazy = lift . readFileLazy
|
2016-12-11 23:10:46 +01:00
|
|
|
readFileStrict = lift . readFileStrict
|
2016-12-08 22:12:04 +01:00
|
|
|
readDataFile mbuserdir = lift . readDataFile mbuserdir
|
|
|
|
glob = lift . glob
|
|
|
|
getModificationTime = lift . getModificationTime
|
|
|
|
getCommonState = lift getCommonState
|
|
|
|
putCommonState = lift . putCommonState
|
2017-02-10 23:59:47 +01:00
|
|
|
logOutput = lift . logOutput
|
2016-12-08 22:12:04 +01:00
|
|
|
|