2017-08-11 11:56:54 -07:00
|
|
|
{-# LANGUAGE CPP #-}
|
2017-06-01 14:19:43 +02:00
|
|
|
{-# LANGUAGE DeriveFunctor #-}
|
|
|
|
{-# LANGUAGE TypeSynonymInstances #-}
|
|
|
|
{-# LANGUAGE FlexibleInstances #-}
|
|
|
|
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
|
|
|
{-# LANGUAGE FlexibleContexts #-}
|
2017-08-19 16:39:22 -07:00
|
|
|
{-# LANGUAGE UndecidableInstances #-}
|
2017-08-20 10:43:31 -07:00
|
|
|
#if MIN_VERSION_base(4,8,0)
|
|
|
|
#else
|
|
|
|
{-# LANGUAGE OverlappingInstances #-}
|
|
|
|
#endif
|
2016-11-21 09:30:08 -05:00
|
|
|
|
|
|
|
{-
|
2017-07-19 21:31:46 +02:00
|
|
|
Copyright (C) 2016-17 Jesse Rosenthal <jrosenthal@jhu.edu>
|
|
|
|
and John MacFarlane.
|
2016-11-21 09:30:08 -05:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
-}
|
|
|
|
|
|
|
|
{- |
|
|
|
|
Module : Text.Pandoc.Class
|
2017-07-19 21:31:46 +02:00
|
|
|
Copyright : Copyright (C) 2016-17 Jesse Rosenthal, John MacFarlane
|
2016-11-21 09:30:08 -05:00
|
|
|
License : GNU GPL, version 2 or above
|
|
|
|
|
|
|
|
Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
|
|
|
|
Stability : alpha
|
|
|
|
Portability : portable
|
|
|
|
|
2017-07-29 20:54:25 +02:00
|
|
|
This module defines a type class, 'PandocMonad', for pandoc readers
|
|
|
|
and writers. A pure instance 'PandocPure' and an impure instance
|
|
|
|
'PandocIO' are provided. This allows users of the library to choose
|
|
|
|
whether they want conversions to perform IO operations (such as
|
|
|
|
reading include files or images).
|
2016-11-21 09:30:08 -05:00
|
|
|
-}
|
|
|
|
|
|
|
|
module Text.Pandoc.Class ( PandocMonad(..)
|
2016-12-01 15:39:54 -05:00
|
|
|
, CommonState(..)
|
2016-11-27 15:31:17 -05:00
|
|
|
, PureState(..)
|
2016-12-03 23:39:01 -05:00
|
|
|
, getPureState
|
|
|
|
, getsPureState
|
|
|
|
, putPureState
|
|
|
|
, modifyPureState
|
2016-11-21 09:30:08 -05:00
|
|
|
, getPOSIXTime
|
2016-11-30 09:46:08 -05:00
|
|
|
, getZonedTime
|
2017-02-07 22:33:05 +01:00
|
|
|
, readFileFromDirs
|
2017-01-22 23:49:05 +01:00
|
|
|
, report
|
2017-06-19 22:04:01 +02:00
|
|
|
, setTrace
|
2017-01-22 22:10:11 +01:00
|
|
|
, getLog
|
|
|
|
, setVerbosity
|
2016-12-01 15:21:49 -05:00
|
|
|
, getMediaBag
|
|
|
|
, setMediaBag
|
|
|
|
, insertMedia
|
2017-08-10 23:16:10 -07:00
|
|
|
, setUserDataDir
|
|
|
|
, getUserDataDir
|
2016-12-11 23:10:46 +01:00
|
|
|
, fetchItem
|
2016-12-01 15:21:49 -05:00
|
|
|
, getInputFiles
|
|
|
|
, getOutputFile
|
2017-02-24 13:48:07 +01:00
|
|
|
, setResourcePath
|
|
|
|
, getResourcePath
|
2016-11-24 09:54:18 -05:00
|
|
|
, PandocIO(..)
|
|
|
|
, PandocPure(..)
|
2017-01-27 11:15:42 +01:00
|
|
|
, FileTree(..)
|
2016-11-30 09:21:21 -05:00
|
|
|
, FileInfo(..)
|
2017-07-14 16:24:30 +02:00
|
|
|
, addToFileTree
|
2016-11-24 09:54:18 -05:00
|
|
|
, runIO
|
|
|
|
, runIOorExplode
|
2016-11-24 11:52:06 -05:00
|
|
|
, runPure
|
2017-08-11 11:56:54 -07:00
|
|
|
, readDefaultDataFile
|
|
|
|
, readDataFile
|
2017-05-07 21:03:18 +02:00
|
|
|
, fillMediaBag
|
2017-05-07 20:42:32 +02:00
|
|
|
, extractMedia
|
2017-08-11 11:56:54 -07:00
|
|
|
, toLang
|
|
|
|
, setTranslations
|
|
|
|
, translateTerm
|
2017-08-12 12:17:38 -07:00
|
|
|
, Translations
|
2017-08-11 11:56:54 -07:00
|
|
|
, Term(..)
|
2016-11-21 09:30:08 -05:00
|
|
|
) where
|
|
|
|
|
2016-12-10 23:41:37 +01:00
|
|
|
import Prelude hiding (readFile)
|
2016-11-24 11:52:06 -05:00
|
|
|
import System.Random (StdGen, next, mkStdGen)
|
2016-11-21 09:30:08 -05:00
|
|
|
import qualified System.Random as IO (newStdGen)
|
2017-08-11 23:24:55 -07:00
|
|
|
import Codec.Archive.Zip
|
2016-11-21 09:30:08 -05:00
|
|
|
import Data.Unique (hashUnique)
|
|
|
|
import qualified Data.Unique as IO (newUnique)
|
2017-08-11 11:56:54 -07:00
|
|
|
import qualified Text.Pandoc.Shared as Shared
|
2017-01-22 23:49:05 +01:00
|
|
|
import qualified Text.Pandoc.UTF8 as UTF8
|
2017-08-11 11:56:54 -07:00
|
|
|
import qualified System.Directory as Directory
|
2016-11-21 09:30:08 -05:00
|
|
|
import Text.Pandoc.Compat.Time (UTCTime)
|
2017-02-10 20:59:54 +01:00
|
|
|
import Text.Pandoc.Logging
|
2017-06-19 22:41:09 +02:00
|
|
|
import Text.Parsec (ParsecT, getPosition, sourceLine, sourceName)
|
2016-11-21 09:30:08 -05:00
|
|
|
import qualified Text.Pandoc.Compat.Time as IO (getCurrentTime)
|
2017-05-07 20:42:32 +02:00
|
|
|
import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType)
|
|
|
|
import Text.Pandoc.Definition
|
2017-03-09 10:30:57 +01:00
|
|
|
import Data.Char (toLower)
|
2017-05-07 20:42:32 +02:00
|
|
|
import Data.Digest.Pure.SHA (sha1, showDigest)
|
|
|
|
import Data.Maybe (fromMaybe)
|
2016-11-24 11:52:06 -05:00
|
|
|
import Data.Time.Clock.POSIX ( utcTimeToPOSIXSeconds
|
|
|
|
, posixSecondsToUTCTime
|
|
|
|
, POSIXTime )
|
2016-11-30 09:46:08 -05:00
|
|
|
import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime, utc)
|
2016-12-11 23:10:46 +01:00
|
|
|
import Network.URI ( escapeURIString, nonStrictRelativeTo,
|
|
|
|
unEscapeString, parseURIReference, isAllowedInURI,
|
|
|
|
parseURI, URI(..) )
|
2016-11-30 09:46:08 -05:00
|
|
|
import qualified Data.Time.LocalTime as IO (getCurrentTimeZone)
|
2017-06-12 15:28:39 +02:00
|
|
|
import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory)
|
2017-05-07 20:42:32 +02:00
|
|
|
import Text.Pandoc.Walk (walkM, walk)
|
2016-11-27 15:29:46 -05:00
|
|
|
import qualified Text.Pandoc.MediaBag as MB
|
2016-11-21 09:30:08 -05:00
|
|
|
import qualified Data.ByteString as B
|
|
|
|
import qualified Data.ByteString.Lazy as BL
|
|
|
|
import qualified System.Environment as IO (lookupEnv)
|
|
|
|
import System.FilePath.Glob (match, compile)
|
2017-07-15 11:38:43 +02:00
|
|
|
import System.Directory (createDirectoryIfMissing, getDirectoryContents,
|
2017-07-14 17:28:13 +02:00
|
|
|
doesDirectoryExist)
|
2017-08-11 23:11:37 -07:00
|
|
|
import System.FilePath ((</>), (<.>), takeDirectory,
|
2017-06-12 15:28:39 +02:00
|
|
|
takeExtension, dropExtension, isRelative, normalise)
|
2016-11-21 09:30:08 -05:00
|
|
|
import qualified System.FilePath.Glob as IO (glob)
|
2016-11-30 09:21:21 -05:00
|
|
|
import qualified System.Directory as IO (getModificationTime)
|
2016-12-10 23:41:37 +01:00
|
|
|
import Control.Monad as M (fail)
|
2017-06-16 23:29:37 +02:00
|
|
|
import Control.Monad.State.Strict
|
2016-12-10 23:41:37 +01:00
|
|
|
import Control.Monad.Except
|
2016-11-21 09:30:08 -05:00
|
|
|
import Data.Word (Word8)
|
2016-11-24 09:54:18 -05:00
|
|
|
import Data.Default
|
|
|
|
import System.IO.Error
|
2017-01-22 23:49:05 +01:00
|
|
|
import System.IO (stderr)
|
2016-11-30 09:21:21 -05:00
|
|
|
import qualified Data.Map as M
|
2016-12-01 12:13:51 -05:00
|
|
|
import Text.Pandoc.Error
|
2017-08-11 11:56:54 -07:00
|
|
|
import Text.Pandoc.BCP47 (Lang(..), parseBCP47, renderLang)
|
2017-08-12 12:17:38 -07:00
|
|
|
import Text.Pandoc.Translations (Term(..), Translations, lookupTerm,
|
|
|
|
readTranslations)
|
2017-06-19 22:04:01 +02:00
|
|
|
import qualified Debug.Trace
|
2017-08-11 11:56:54 -07:00
|
|
|
#ifdef EMBED_DATA_FILES
|
|
|
|
import Text.Pandoc.Data (dataFiles)
|
2017-08-11 23:11:37 -07:00
|
|
|
import qualified System.FilePath.Posix as Posix
|
2017-08-11 23:57:35 -07:00
|
|
|
import System.FilePath (splitDirectories)
|
2017-08-11 11:56:54 -07:00
|
|
|
#else
|
2017-08-11 23:09:51 -07:00
|
|
|
import qualified Paths_pandoc as Paths
|
2017-08-11 11:56:54 -07:00
|
|
|
#endif
|
2016-11-21 09:30:08 -05:00
|
|
|
|
2017-07-19 21:31:46 +02:00
|
|
|
-- | The PandocMonad typeclass contains all the potentially
|
|
|
|
-- IO-related functions used in pandoc's readers and writers.
|
|
|
|
-- Instances of this typeclass may implement these functions
|
|
|
|
-- in IO (as in 'PandocIO') or using an internal state that
|
|
|
|
-- represents a file system, time, and so on (as in 'PandocPure').
|
2016-12-08 22:12:04 +01:00
|
|
|
class (Functor m, Applicative m, Monad m, MonadError PandocError m)
|
|
|
|
=> PandocMonad m where
|
2017-07-19 21:31:46 +02:00
|
|
|
-- | Lookup an environment variable.
|
2016-11-21 09:30:08 -05:00
|
|
|
lookupEnv :: String -> m (Maybe String)
|
2017-07-19 21:31:46 +02:00
|
|
|
-- | Get the current (UTC) time.
|
2016-11-21 09:30:08 -05:00
|
|
|
getCurrentTime :: m UTCTime
|
2017-07-19 21:31:46 +02:00
|
|
|
-- | Get the locale's time zone.
|
2016-11-30 09:46:08 -05:00
|
|
|
getCurrentTimeZone :: m TimeZone
|
2017-07-19 21:31:46 +02:00
|
|
|
-- | Return a new generator for random numbers.
|
2016-11-21 09:30:08 -05:00
|
|
|
newStdGen :: m StdGen
|
2017-07-19 21:31:46 +02:00
|
|
|
-- | Return a new unique integer.
|
2016-11-21 09:30:08 -05:00
|
|
|
newUniqueHash :: m Int
|
2017-07-19 21:31:46 +02:00
|
|
|
-- | Retrieve contents and mime type from a URL, raising
|
|
|
|
-- an error on failure.
|
2016-12-11 23:10:46 +01:00
|
|
|
openURL :: String -> m (B.ByteString, Maybe MimeType)
|
2017-07-19 21:31:46 +02:00
|
|
|
-- | Read the lazy ByteString contents from a file path,
|
|
|
|
-- raising an error on failure.
|
2016-11-21 09:30:08 -05:00
|
|
|
readFileLazy :: FilePath -> m BL.ByteString
|
2017-07-19 21:31:46 +02:00
|
|
|
-- | Read the strict ByteString contents from a file path,
|
|
|
|
-- raising an error on failure.
|
2016-12-11 23:10:46 +01:00
|
|
|
readFileStrict :: FilePath -> m B.ByteString
|
2017-07-19 21:31:46 +02:00
|
|
|
-- | Return a list of paths that match a glob, relative to
|
|
|
|
-- the working directory. See 'System.FilePath.Glob' for
|
|
|
|
-- the glob syntax.
|
2016-11-21 09:30:08 -05:00
|
|
|
glob :: String -> m [FilePath]
|
2017-08-11 11:56:54 -07:00
|
|
|
-- | Returns True if file exists.
|
|
|
|
fileExists :: FilePath -> m Bool
|
2017-08-11 23:09:51 -07:00
|
|
|
-- | Returns the path of data file.
|
|
|
|
getDataFileName :: FilePath -> m FilePath
|
2017-07-19 21:31:46 +02:00
|
|
|
-- | Return the modification time of a file.
|
2016-11-30 12:55:30 -05:00
|
|
|
getModificationTime :: FilePath -> m UTCTime
|
2017-07-19 21:31:46 +02:00
|
|
|
-- | Get the value of the 'CommonState' used by all instances
|
|
|
|
-- of 'PandocMonad'.
|
2016-12-08 21:32:25 +01:00
|
|
|
getCommonState :: m CommonState
|
2017-07-19 21:31:46 +02:00
|
|
|
-- | Set the value of the 'CommonState' used by all instances
|
|
|
|
-- of 'PandocMonad'.
|
|
|
|
-- | Get the value of a specific field of 'CommonState'.
|
2016-12-08 21:32:25 +01:00
|
|
|
putCommonState :: CommonState -> m ()
|
2017-07-19 21:31:46 +02:00
|
|
|
-- | Get the value of a specific field of 'CommonState'.
|
2016-12-08 21:32:25 +01:00
|
|
|
getsCommonState :: (CommonState -> a) -> m a
|
|
|
|
getsCommonState f = f <$> getCommonState
|
2017-07-19 21:31:46 +02:00
|
|
|
-- | Modify the 'CommonState'.
|
2016-12-08 21:32:25 +01:00
|
|
|
modifyCommonState :: (CommonState -> CommonState) -> m ()
|
|
|
|
modifyCommonState f = getCommonState >>= putCommonState . f
|
2017-07-19 21:31:46 +02:00
|
|
|
-- Output a log message.
|
|
|
|
logOutput :: LogMessage -> m ()
|
2017-07-29 20:54:25 +02:00
|
|
|
-- Output a debug message to sterr, using 'Debug.Trace.trace',
|
|
|
|
-- if tracing is enabled. Note: this writes to stderr even in
|
|
|
|
-- pure instances.
|
2017-06-19 22:04:01 +02:00
|
|
|
trace :: String -> m ()
|
|
|
|
trace msg = do
|
|
|
|
tracing <- getsCommonState stTrace
|
|
|
|
when tracing $ Debug.Trace.trace ("[trace] " ++ msg) (return ())
|
|
|
|
|
2017-07-19 21:31:46 +02:00
|
|
|
-- * Functions defined for all PandocMonad instances
|
2016-11-21 09:30:08 -05:00
|
|
|
|
2017-07-29 21:04:25 +02:00
|
|
|
-- | Set the verbosity level.
|
2017-01-22 22:10:11 +01:00
|
|
|
setVerbosity :: PandocMonad m => Verbosity -> m ()
|
|
|
|
setVerbosity verbosity =
|
|
|
|
modifyCommonState $ \st -> st{ stVerbosity = verbosity }
|
|
|
|
|
2017-07-29 21:04:25 +02:00
|
|
|
-- Get the accomulated log messages (in temporal order).
|
2017-02-10 23:59:47 +01:00
|
|
|
getLog :: PandocMonad m => m [LogMessage]
|
2017-01-22 23:49:05 +01:00
|
|
|
getLog = reverse <$> getsCommonState stLog
|
2017-01-22 22:10:11 +01:00
|
|
|
|
2017-07-29 21:04:25 +02:00
|
|
|
-- | Log a message using 'logOutput'. Note that
|
|
|
|
-- 'logOutput' is called only if the verbosity
|
|
|
|
-- level exceeds the level of the message, but
|
|
|
|
-- the message is added to the list of log messages
|
|
|
|
-- that will be retrieved by 'getLog' regardless
|
|
|
|
-- of its verbosity level.
|
2017-02-10 23:59:47 +01:00
|
|
|
report :: PandocMonad m => LogMessage -> m ()
|
|
|
|
report msg = do
|
2017-01-22 23:49:05 +01:00
|
|
|
verbosity <- getsCommonState stVerbosity
|
2017-02-10 23:59:47 +01:00
|
|
|
let level = messageVerbosity msg
|
2017-06-19 22:04:01 +02:00
|
|
|
when (level <= verbosity) $ logOutput msg
|
|
|
|
modifyCommonState $ \st -> st{ stLog = msg : stLog st }
|
|
|
|
|
2017-07-29 21:04:25 +02:00
|
|
|
-- | Determine whether tracing is enabled. This affects
|
|
|
|
-- the behavior of 'trace'. If tracing is not enabled,
|
|
|
|
-- 'trace' does nothing.
|
2017-06-19 22:04:01 +02:00
|
|
|
setTrace :: PandocMonad m => Bool -> m ()
|
|
|
|
setTrace useTracing = modifyCommonState $ \st -> st{stTrace = useTracing}
|
2016-12-01 15:21:49 -05:00
|
|
|
|
2017-07-29 21:04:25 +02:00
|
|
|
-- | Initialize the media bag.
|
2016-12-01 15:21:49 -05:00
|
|
|
setMediaBag :: PandocMonad m => MediaBag -> m ()
|
Reverted deferred media bag code.
This was not actually being used. Since it adds considerable
complexity, it's best not to include it unless we are
actually going to use it.
The original thought was that we could do all loading in the
readers, always deferred and thus costless. This was supposed
to eliminate the need to traverse trees loading resources in
the docx, epub, odt writers and in PDF and SelfContained.
(It would also have the side effect that --extract-media could
be used with all input formats. This wasn't an intended side
effect, and it could be debated whether it's desirable, since
--extract-media was originally designed to extract the media
contained in a docx or odt or epub container.)
However, we never actually took the step of moving all of this
work to the readers, for a couple of reasons. The main reason
is that we'd still need to fetch resources in the docx,
epub, odt, pdf and self-contained writers, since the Pandoc AST might
have been built programatically and hence not generated by a reader.
So it's not clear that doing lazy loading in the readers would have
any real advantage.
I'm still not completely sure about this --- if we change our
minds it would be easy to undo this commit.
@jkr comments welcome.
2017-02-09 21:26:24 +01:00
|
|
|
setMediaBag mb = modifyCommonState $ \st -> st{stMediaBag = mb}
|
2016-12-01 15:21:49 -05:00
|
|
|
|
|
|
|
getMediaBag :: PandocMonad m => m MediaBag
|
Reverted deferred media bag code.
This was not actually being used. Since it adds considerable
complexity, it's best not to include it unless we are
actually going to use it.
The original thought was that we could do all loading in the
readers, always deferred and thus costless. This was supposed
to eliminate the need to traverse trees loading resources in
the docx, epub, odt writers and in PDF and SelfContained.
(It would also have the side effect that --extract-media could
be used with all input formats. This wasn't an intended side
effect, and it could be debated whether it's desirable, since
--extract-media was originally designed to extract the media
contained in a docx or odt or epub container.)
However, we never actually took the step of moving all of this
work to the readers, for a couple of reasons. The main reason
is that we'd still need to fetch resources in the docx,
epub, odt, pdf and self-contained writers, since the Pandoc AST might
have been built programatically and hence not generated by a reader.
So it's not clear that doing lazy loading in the readers would have
any real advantage.
I'm still not completely sure about this --- if we change our
minds it would be easy to undo this commit.
@jkr comments welcome.
2017-02-09 21:26:24 +01:00
|
|
|
getMediaBag = getsCommonState stMediaBag
|
2016-12-01 15:21:49 -05:00
|
|
|
|
|
|
|
insertMedia :: PandocMonad m => FilePath -> Maybe MimeType -> BL.ByteString -> m ()
|
2016-12-13 20:22:50 -05:00
|
|
|
insertMedia fp mime bs = do
|
Reverted deferred media bag code.
This was not actually being used. Since it adds considerable
complexity, it's best not to include it unless we are
actually going to use it.
The original thought was that we could do all loading in the
readers, always deferred and thus costless. This was supposed
to eliminate the need to traverse trees loading resources in
the docx, epub, odt writers and in PDF and SelfContained.
(It would also have the side effect that --extract-media could
be used with all input formats. This wasn't an intended side
effect, and it could be debated whether it's desirable, since
--extract-media was originally designed to extract the media
contained in a docx or odt or epub container.)
However, we never actually took the step of moving all of this
work to the readers, for a couple of reasons. The main reason
is that we'd still need to fetch resources in the docx,
epub, odt, pdf and self-contained writers, since the Pandoc AST might
have been built programatically and hence not generated by a reader.
So it's not clear that doing lazy loading in the readers would have
any real advantage.
I'm still not completely sure about this --- if we change our
minds it would be easy to undo this commit.
@jkr comments welcome.
2017-02-09 21:26:24 +01:00
|
|
|
mb <- getsCommonState stMediaBag
|
2016-12-13 20:22:50 -05:00
|
|
|
let mb' = MB.insertMedia fp mime bs mb
|
Reverted deferred media bag code.
This was not actually being used. Since it adds considerable
complexity, it's best not to include it unless we are
actually going to use it.
The original thought was that we could do all loading in the
readers, always deferred and thus costless. This was supposed
to eliminate the need to traverse trees loading resources in
the docx, epub, odt writers and in PDF and SelfContained.
(It would also have the side effect that --extract-media could
be used with all input formats. This wasn't an intended side
effect, and it could be debated whether it's desirable, since
--extract-media was originally designed to extract the media
contained in a docx or odt or epub container.)
However, we never actually took the step of moving all of this
work to the readers, for a couple of reasons. The main reason
is that we'd still need to fetch resources in the docx,
epub, odt, pdf and self-contained writers, since the Pandoc AST might
have been built programatically and hence not generated by a reader.
So it's not clear that doing lazy loading in the readers would have
any real advantage.
I'm still not completely sure about this --- if we change our
minds it would be easy to undo this commit.
@jkr comments welcome.
2017-02-09 21:26:24 +01:00
|
|
|
modifyCommonState $ \st -> st{stMediaBag = mb' }
|
2016-12-13 21:02:57 -05:00
|
|
|
|
2016-12-01 15:21:49 -05:00
|
|
|
getInputFiles :: PandocMonad m => m (Maybe [FilePath])
|
2016-12-08 21:32:25 +01:00
|
|
|
getInputFiles = getsCommonState stInputFiles
|
2016-12-01 15:21:49 -05:00
|
|
|
|
|
|
|
getOutputFile :: PandocMonad m => m (Maybe FilePath)
|
2016-12-08 21:32:25 +01:00
|
|
|
getOutputFile = getsCommonState stOutputFile
|
2016-12-01 15:21:49 -05:00
|
|
|
|
2017-02-24 13:48:07 +01:00
|
|
|
setResourcePath :: PandocMonad m => [FilePath] -> m ()
|
|
|
|
setResourcePath ps = modifyCommonState $ \st -> st{stResourcePath = ps}
|
|
|
|
|
|
|
|
getResourcePath :: PandocMonad m => m [FilePath]
|
|
|
|
getResourcePath = getsCommonState stResourcePath
|
|
|
|
|
2017-07-19 21:31:46 +02:00
|
|
|
getPOSIXTime :: PandocMonad m => m POSIXTime
|
2016-11-21 09:30:08 -05:00
|
|
|
getPOSIXTime = utcTimeToPOSIXSeconds <$> getCurrentTime
|
|
|
|
|
2017-07-19 21:31:46 +02:00
|
|
|
getZonedTime :: PandocMonad m => m ZonedTime
|
2016-11-30 09:46:08 -05:00
|
|
|
getZonedTime = do
|
|
|
|
t <- getCurrentTime
|
|
|
|
tz <- getCurrentTimeZone
|
|
|
|
return $ utcToZonedTime tz t
|
|
|
|
|
2017-02-07 21:42:35 +01:00
|
|
|
-- | Read file, checking in any number of directories.
|
2017-02-10 23:59:47 +01:00
|
|
|
readFileFromDirs :: PandocMonad m => [FilePath] -> FilePath -> m (Maybe String)
|
|
|
|
readFileFromDirs [] _ = return Nothing
|
2017-02-07 21:42:35 +01:00
|
|
|
readFileFromDirs (d:ds) f = catchError
|
2017-02-10 23:59:47 +01:00
|
|
|
((Just . UTF8.toStringLazy) <$> readFileLazy (d </> f))
|
2017-02-07 21:42:35 +01:00
|
|
|
(\_ -> readFileFromDirs ds f)
|
|
|
|
|
2016-12-02 09:11:24 -05:00
|
|
|
--
|
|
|
|
|
2017-07-29 20:54:25 +02:00
|
|
|
-- | 'CommonState' represents state that is used by all
|
|
|
|
-- instances of 'PandocMonad'. Normally users should not
|
|
|
|
-- need to interact with it directly; instead, auxiliary
|
|
|
|
-- functions like 'setVerbosity' and 'withMediaBag' should be used.
|
2017-02-24 13:48:07 +01:00
|
|
|
data CommonState = CommonState { stLog :: [LogMessage]
|
2017-07-29 20:54:25 +02:00
|
|
|
-- ^ A list of log messages in reverse order
|
2017-08-10 23:16:10 -07:00
|
|
|
, stUserDataDir :: Maybe FilePath
|
|
|
|
-- ^ Directory to search for data files
|
2017-02-24 13:48:07 +01:00
|
|
|
, stMediaBag :: MediaBag
|
2017-07-29 20:54:25 +02:00
|
|
|
-- ^ Media parsed from binary containers
|
2017-08-11 11:56:54 -07:00
|
|
|
, stTranslations :: Maybe
|
|
|
|
(Lang, Maybe Translations)
|
|
|
|
-- ^ Translations for localization
|
2017-02-24 13:48:07 +01:00
|
|
|
, stInputFiles :: Maybe [FilePath]
|
2017-07-29 20:54:25 +02:00
|
|
|
-- ^ List of input files from command line
|
2017-02-24 13:48:07 +01:00
|
|
|
, stOutputFile :: Maybe FilePath
|
2017-07-29 20:54:25 +02:00
|
|
|
-- ^ Output file from command line
|
2017-02-24 13:48:07 +01:00
|
|
|
, stResourcePath :: [FilePath]
|
2017-07-29 20:54:25 +02:00
|
|
|
-- ^ Path to search for resources like
|
|
|
|
-- included images
|
2017-02-24 13:48:07 +01:00
|
|
|
, stVerbosity :: Verbosity
|
2017-07-29 20:54:25 +02:00
|
|
|
-- ^ Verbosity level
|
2017-06-19 22:04:01 +02:00
|
|
|
, stTrace :: Bool
|
2017-07-29 20:54:25 +02:00
|
|
|
-- ^ Controls whether tracing messages are
|
|
|
|
-- issued.
|
2016-11-30 12:55:30 -05:00
|
|
|
}
|
2016-12-01 15:21:49 -05:00
|
|
|
|
|
|
|
instance Default CommonState where
|
2017-01-22 22:10:11 +01:00
|
|
|
def = CommonState { stLog = []
|
2017-08-10 23:16:10 -07:00
|
|
|
, stUserDataDir = Nothing
|
Reverted deferred media bag code.
This was not actually being used. Since it adds considerable
complexity, it's best not to include it unless we are
actually going to use it.
The original thought was that we could do all loading in the
readers, always deferred and thus costless. This was supposed
to eliminate the need to traverse trees loading resources in
the docx, epub, odt writers and in PDF and SelfContained.
(It would also have the side effect that --extract-media could
be used with all input formats. This wasn't an intended side
effect, and it could be debated whether it's desirable, since
--extract-media was originally designed to extract the media
contained in a docx or odt or epub container.)
However, we never actually took the step of moving all of this
work to the readers, for a couple of reasons. The main reason
is that we'd still need to fetch resources in the docx,
epub, odt, pdf and self-contained writers, since the Pandoc AST might
have been built programatically and hence not generated by a reader.
So it's not clear that doing lazy loading in the readers would have
any real advantage.
I'm still not completely sure about this --- if we change our
minds it would be easy to undo this commit.
@jkr comments welcome.
2017-02-09 21:26:24 +01:00
|
|
|
, stMediaBag = mempty
|
2017-08-11 11:56:54 -07:00
|
|
|
, stTranslations = Nothing
|
2016-12-01 15:21:49 -05:00
|
|
|
, stInputFiles = Nothing
|
|
|
|
, stOutputFile = Nothing
|
2017-02-24 13:48:07 +01:00
|
|
|
, stResourcePath = ["."]
|
2017-01-22 22:10:11 +01:00
|
|
|
, stVerbosity = WARNING
|
2017-06-19 22:04:01 +02:00
|
|
|
, stTrace = False
|
2016-11-30 12:55:30 -05:00
|
|
|
}
|
|
|
|
|
2017-08-11 11:56:54 -07:00
|
|
|
-- | Convert BCP47 string to a Lang, issuing warning
|
|
|
|
-- if there are problems.
|
|
|
|
toLang :: PandocMonad m => Maybe String -> m (Maybe Lang)
|
|
|
|
toLang Nothing = return Nothing
|
|
|
|
toLang (Just s) =
|
|
|
|
case parseBCP47 s of
|
|
|
|
Left _ -> do
|
|
|
|
report $ InvalidLang s
|
|
|
|
return Nothing
|
|
|
|
Right l -> return (Just l)
|
|
|
|
|
|
|
|
-- | Select the language to use with 'translateTerm'.
|
|
|
|
-- Note that this does not read a translation file;
|
|
|
|
-- that is only done the first time 'translateTerm' is
|
|
|
|
-- used.
|
|
|
|
setTranslations :: PandocMonad m => Lang -> m ()
|
|
|
|
setTranslations lang =
|
|
|
|
modifyCommonState $ \st -> st{ stTranslations = Just (lang, Nothing) }
|
|
|
|
|
|
|
|
-- | Load term map.
|
|
|
|
getTranslations :: PandocMonad m => m Translations
|
|
|
|
getTranslations = do
|
|
|
|
mbtrans <- getsCommonState stTranslations
|
|
|
|
case mbtrans of
|
|
|
|
Nothing -> return mempty -- no language defined
|
|
|
|
Just (_, Just t) -> return t
|
|
|
|
Just (lang, Nothing) -> do -- read from file
|
2017-08-12 12:17:38 -07:00
|
|
|
let translationFile = "translations/" ++ renderLang lang ++ ".yaml"
|
|
|
|
let fallbackFile = "translations/" ++ langLanguage lang ++ ".yaml"
|
2017-08-12 12:25:33 -07:00
|
|
|
let getTrans fp = do
|
|
|
|
bs <- readDataFile fp
|
2017-08-11 11:56:54 -07:00
|
|
|
case readTranslations (UTF8.toString bs) of
|
|
|
|
Left e -> do
|
2017-08-12 12:25:33 -07:00
|
|
|
report $ CouldNotLoadTranslations (renderLang lang)
|
|
|
|
(fp ++ ": " ++ e)
|
2017-08-11 11:56:54 -07:00
|
|
|
-- make sure we don't try again...
|
|
|
|
modifyCommonState $ \st ->
|
|
|
|
st{ stTranslations = Nothing }
|
|
|
|
return mempty
|
|
|
|
Right t -> do
|
|
|
|
modifyCommonState $ \st ->
|
|
|
|
st{ stTranslations = Just (lang, Just t) }
|
|
|
|
return t
|
2017-08-12 12:25:33 -07:00
|
|
|
catchError (getTrans translationFile)
|
2017-08-11 11:56:54 -07:00
|
|
|
(\_ ->
|
2017-08-12 12:25:33 -07:00
|
|
|
catchError (getTrans fallbackFile)
|
2017-08-11 11:56:54 -07:00
|
|
|
(\e -> do
|
|
|
|
report $ CouldNotLoadTranslations (renderLang lang)
|
|
|
|
$ case e of
|
|
|
|
PandocCouldNotFindDataFileError _ ->
|
|
|
|
("data file " ++ fallbackFile ++ " not found")
|
|
|
|
_ -> ""
|
|
|
|
-- make sure we don't try again...
|
|
|
|
modifyCommonState $ \st -> st{ stTranslations = Nothing }
|
|
|
|
return mempty))
|
|
|
|
|
|
|
|
-- | Get a translation from the current term map.
|
|
|
|
-- Issue a warning if the term is not defined.
|
|
|
|
translateTerm :: PandocMonad m => Term -> m String
|
|
|
|
translateTerm term = do
|
2017-08-12 12:17:38 -07:00
|
|
|
translations <- getTranslations
|
|
|
|
case lookupTerm term translations of
|
2017-08-11 11:56:54 -07:00
|
|
|
Just s -> return s
|
|
|
|
Nothing -> do
|
|
|
|
report $ NoTranslation (show term)
|
|
|
|
return ""
|
|
|
|
|
2017-07-29 20:54:25 +02:00
|
|
|
-- | Evaluate a 'PandocIO' operation.
|
2016-12-01 12:13:51 -05:00
|
|
|
runIO :: PandocIO a -> IO (Either PandocError a)
|
2016-12-01 15:21:49 -05:00
|
|
|
runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma
|
2016-11-24 09:54:18 -05:00
|
|
|
|
|
|
|
runIOorExplode :: PandocIO a -> IO a
|
2016-12-03 22:35:58 +01:00
|
|
|
runIOorExplode ma = runIO ma >>= handleError
|
2016-12-01 10:00:21 -05:00
|
|
|
|
2016-11-24 09:54:18 -05:00
|
|
|
newtype PandocIO a = PandocIO {
|
2016-12-01 15:21:49 -05:00
|
|
|
unPandocIO :: ExceptT PandocError (StateT CommonState IO) a
|
2016-11-27 15:32:28 -05:00
|
|
|
} deriving ( MonadIO
|
|
|
|
, Functor
|
|
|
|
, Applicative
|
|
|
|
, Monad
|
2016-12-01 12:13:51 -05:00
|
|
|
, MonadError PandocError
|
2016-11-27 15:32:28 -05:00
|
|
|
)
|
2016-11-24 09:54:18 -05:00
|
|
|
|
2017-02-23 16:21:59 +01:00
|
|
|
liftIOError :: (String -> IO a) -> String -> PandocIO a
|
|
|
|
liftIOError f u = do
|
|
|
|
res <- liftIO $ tryIOError $ f u
|
|
|
|
case res of
|
2017-08-11 11:56:54 -07:00
|
|
|
Left e -> throwError $ PandocIOError u e
|
2017-02-23 16:21:59 +01:00
|
|
|
Right r -> return r
|
|
|
|
|
2016-11-27 15:45:08 +01:00
|
|
|
instance PandocMonad PandocIO where
|
2016-11-24 09:54:18 -05:00
|
|
|
lookupEnv = liftIO . IO.lookupEnv
|
|
|
|
getCurrentTime = liftIO IO.getCurrentTime
|
2016-11-30 09:46:08 -05:00
|
|
|
getCurrentTimeZone = liftIO IO.getCurrentTimeZone
|
2016-11-24 09:54:18 -05:00
|
|
|
newStdGen = liftIO IO.newStdGen
|
2017-06-01 15:09:38 +02:00
|
|
|
newUniqueHash = hashUnique <$> liftIO IO.newUnique
|
2017-02-23 16:24:20 +01:00
|
|
|
openURL u = do
|
|
|
|
report $ Fetching u
|
2017-08-11 11:56:54 -07:00
|
|
|
res <- liftIOError Shared.openURL u
|
2017-05-07 13:11:04 +02:00
|
|
|
case res of
|
|
|
|
Right r -> return r
|
|
|
|
Left e -> throwError $ PandocHttpError u e
|
2017-02-23 16:21:59 +01:00
|
|
|
readFileLazy s = liftIOError BL.readFile s
|
|
|
|
readFileStrict s = liftIOError B.readFile s
|
2017-08-11 11:56:54 -07:00
|
|
|
|
|
|
|
glob = liftIOError IO.glob
|
|
|
|
fileExists = liftIOError Directory.doesFileExist
|
2017-08-11 23:09:51 -07:00
|
|
|
#ifdef EMBED_DATA_FILES
|
|
|
|
getDataFileName = return
|
|
|
|
#else
|
|
|
|
getDataFileName = liftIOError Paths.getDataFileName
|
|
|
|
#endif
|
|
|
|
getModificationTime = liftIOError IO.getModificationTime
|
2016-12-08 21:32:25 +01:00
|
|
|
getCommonState = PandocIO $ lift get
|
|
|
|
putCommonState x = PandocIO $ lift $ put x
|
2017-03-04 11:26:49 +01:00
|
|
|
logOutput msg = liftIO $ do
|
2017-03-09 10:30:57 +01:00
|
|
|
UTF8.hPutStr stderr $ "[" ++
|
2017-06-01 15:09:38 +02:00
|
|
|
map toLower (show (messageVerbosity msg)) ++ "] "
|
2017-03-09 10:30:57 +01:00
|
|
|
alertIndent $ lines $ showLogMessage msg
|
2017-03-04 11:26:49 +01:00
|
|
|
|
2017-03-09 10:30:57 +01:00
|
|
|
alertIndent :: [String] -> IO ()
|
|
|
|
alertIndent [] = return ()
|
|
|
|
alertIndent (l:ls) = do
|
2017-03-04 11:26:49 +01:00
|
|
|
UTF8.hPutStrLn stderr l
|
|
|
|
mapM_ go ls
|
2017-03-09 10:30:57 +01:00
|
|
|
where go l' = do UTF8.hPutStr stderr "! "
|
2017-03-04 11:26:49 +01:00
|
|
|
UTF8.hPutStrLn stderr l'
|
2016-12-11 23:10:46 +01:00
|
|
|
|
|
|
|
-- | Specialized version of parseURIReference that disallows
|
|
|
|
-- single-letter schemes. Reason: these are usually windows absolute
|
|
|
|
-- paths.
|
|
|
|
parseURIReference' :: String -> Maybe URI
|
|
|
|
parseURIReference' s =
|
|
|
|
case parseURIReference s of
|
|
|
|
Just u
|
|
|
|
| length (uriScheme u) > 2 -> Just u
|
|
|
|
| null (uriScheme u) -> Just u -- protocol-relative
|
|
|
|
_ -> Nothing
|
|
|
|
|
2017-08-10 23:16:10 -07:00
|
|
|
-- | Set the user data directory in common state.
|
|
|
|
setUserDataDir :: PandocMonad m
|
|
|
|
=> Maybe FilePath
|
|
|
|
-> m ()
|
|
|
|
setUserDataDir mbfp = modifyCommonState $ \st -> st{ stUserDataDir = mbfp }
|
|
|
|
|
|
|
|
-- | Get the user data directory from common state.
|
|
|
|
getUserDataDir :: PandocMonad m
|
|
|
|
=> m (Maybe FilePath)
|
|
|
|
getUserDataDir = getsCommonState stUserDataDir
|
|
|
|
|
2016-12-11 23:10:46 +01:00
|
|
|
-- | Fetch an image or other item from the local filesystem or the net.
|
|
|
|
-- Returns raw content and maybe mime type.
|
|
|
|
fetchItem :: PandocMonad m
|
|
|
|
=> Maybe String
|
|
|
|
-> String
|
|
|
|
-> m (B.ByteString, Maybe MimeType)
|
|
|
|
fetchItem sourceURL s = do
|
2016-12-13 21:44:02 -05:00
|
|
|
mediabag <- getMediaBag
|
2016-12-11 23:10:46 +01:00
|
|
|
case lookupMedia s mediabag of
|
2017-06-01 15:09:38 +02:00
|
|
|
Just (mime, bs) -> return (BL.toStrict bs, Just mime)
|
2016-12-13 21:44:02 -05:00
|
|
|
Nothing -> downloadOrRead sourceURL s
|
|
|
|
|
|
|
|
downloadOrRead :: PandocMonad m
|
|
|
|
=> Maybe String
|
|
|
|
-> String
|
|
|
|
-> m (B.ByteString, Maybe MimeType)
|
2017-06-01 15:09:38 +02:00
|
|
|
downloadOrRead sourceURL s =
|
2016-12-13 21:44:02 -05:00
|
|
|
case (sourceURL >>= parseURIReference' .
|
|
|
|
ensureEscaped, ensureEscaped s) of
|
|
|
|
(Just u, s') -> -- try fetching from relative path at source
|
|
|
|
case parseURIReference' s' of
|
|
|
|
Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u
|
|
|
|
Nothing -> openURL s' -- will throw error
|
|
|
|
(Nothing, s'@('/':'/':_)) -> -- protocol-relative URI
|
|
|
|
case parseURIReference' s' of
|
|
|
|
Just u' -> openURL $ show $ u' `nonStrictRelativeTo` httpcolon
|
|
|
|
Nothing -> openURL s' -- will throw error
|
|
|
|
(Nothing, s') ->
|
|
|
|
case parseURI s' of -- requires absolute URI
|
|
|
|
-- We don't want to treat C:/ as a scheme:
|
|
|
|
Just u' | length (uriScheme u') > 2 -> openURL (show u')
|
|
|
|
Just u' | uriScheme u' == "file:" ->
|
|
|
|
readLocalFile $ dropWhile (=='/') (uriPath u')
|
|
|
|
_ -> readLocalFile fp -- get from local file system
|
|
|
|
where readLocalFile f = do
|
2017-02-24 13:48:07 +01:00
|
|
|
resourcePath <- getResourcePath
|
|
|
|
cont <- if isRelative f
|
|
|
|
then withPaths resourcePath readFileStrict f
|
|
|
|
else readFileStrict f
|
2017-02-23 16:21:59 +01:00
|
|
|
return (cont, mime)
|
2016-12-13 21:44:02 -05:00
|
|
|
httpcolon = URI{ uriScheme = "http:",
|
|
|
|
uriAuthority = Nothing,
|
|
|
|
uriPath = "",
|
|
|
|
uriQuery = "",
|
|
|
|
uriFragment = "" }
|
|
|
|
dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#')
|
|
|
|
fp = unEscapeString $ dropFragmentAndQuery s
|
|
|
|
mime = case takeExtension fp of
|
|
|
|
".gz" -> getMimeType $ dropExtension fp
|
|
|
|
".svgz" -> getMimeType $ dropExtension fp ++ ".svg"
|
|
|
|
x -> getMimeType x
|
|
|
|
ensureEscaped = escapeURIString isAllowedInURI . map convertSlash
|
|
|
|
convertSlash '\\' = '/'
|
|
|
|
convertSlash x = x
|
2016-12-11 23:10:46 +01:00
|
|
|
|
2017-08-11 11:56:54 -07:00
|
|
|
getDefaultReferenceDocx :: PandocMonad m => m Archive
|
|
|
|
getDefaultReferenceDocx = do
|
|
|
|
let paths = ["[Content_Types].xml",
|
|
|
|
"_rels/.rels",
|
|
|
|
"docProps/app.xml",
|
|
|
|
"docProps/core.xml",
|
|
|
|
"word/document.xml",
|
|
|
|
"word/fontTable.xml",
|
|
|
|
"word/footnotes.xml",
|
|
|
|
"word/numbering.xml",
|
|
|
|
"word/settings.xml",
|
|
|
|
"word/webSettings.xml",
|
|
|
|
"word/styles.xml",
|
|
|
|
"word/_rels/document.xml.rels",
|
|
|
|
"word/_rels/footnotes.xml.rels",
|
|
|
|
"word/theme/theme1.xml"]
|
|
|
|
let toLazy = BL.fromChunks . (:[])
|
|
|
|
let pathToEntry path = do
|
|
|
|
epochtime <- (floor . utcTimeToPOSIXSeconds) <$> getCurrentTime
|
|
|
|
contents <- toLazy <$> readDataFile ("docx/" ++ path)
|
|
|
|
return $ toEntry path epochtime contents
|
|
|
|
datadir <- getUserDataDir
|
|
|
|
mbArchive <- case datadir of
|
|
|
|
Nothing -> return Nothing
|
|
|
|
Just d -> do
|
|
|
|
exists <- fileExists (d </> "reference.docx")
|
|
|
|
if exists
|
|
|
|
then return (Just (d </> "reference.docx"))
|
|
|
|
else return Nothing
|
|
|
|
case mbArchive of
|
|
|
|
Just arch -> toArchive <$> readFileLazy arch
|
|
|
|
Nothing -> foldr addEntryToArchive emptyArchive <$>
|
|
|
|
mapM pathToEntry paths
|
|
|
|
|
|
|
|
getDefaultReferenceODT :: PandocMonad m => m Archive
|
|
|
|
getDefaultReferenceODT = do
|
|
|
|
let paths = ["mimetype",
|
|
|
|
"manifest.rdf",
|
|
|
|
"styles.xml",
|
|
|
|
"content.xml",
|
|
|
|
"meta.xml",
|
|
|
|
"settings.xml",
|
|
|
|
"Configurations2/accelerator/current.xml",
|
|
|
|
"Thumbnails/thumbnail.png",
|
|
|
|
"META-INF/manifest.xml"]
|
|
|
|
let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime
|
|
|
|
contents <- (BL.fromChunks . (:[])) `fmap`
|
|
|
|
readDataFile ("odt/" ++ path)
|
|
|
|
return $ toEntry path epochtime contents
|
|
|
|
datadir <- getUserDataDir
|
|
|
|
mbArchive <- case datadir of
|
|
|
|
Nothing -> return Nothing
|
|
|
|
Just d -> do
|
|
|
|
exists <- fileExists (d </> "reference.odt")
|
|
|
|
if exists
|
|
|
|
then return (Just (d </> "reference.odt"))
|
|
|
|
else return Nothing
|
|
|
|
case mbArchive of
|
|
|
|
Just arch -> toArchive <$> readFileLazy arch
|
|
|
|
Nothing -> foldr addEntryToArchive emptyArchive <$>
|
|
|
|
mapM pathToEntry paths
|
|
|
|
|
|
|
|
-- | Read file from user data directory or,
|
|
|
|
-- if not found there, from Cabal data directory.
|
|
|
|
readDataFile :: PandocMonad m => FilePath -> m B.ByteString
|
|
|
|
readDataFile fname = do
|
|
|
|
datadir <- getUserDataDir
|
|
|
|
case datadir of
|
|
|
|
Nothing -> readDefaultDataFile fname
|
|
|
|
Just userDir -> do
|
|
|
|
exists <- fileExists (userDir </> fname)
|
|
|
|
if exists
|
|
|
|
then readFileStrict (userDir </> fname)
|
|
|
|
else readDefaultDataFile fname
|
|
|
|
|
|
|
|
-- | Read file from from Cabal data directory.
|
|
|
|
readDefaultDataFile :: PandocMonad m => FilePath -> m B.ByteString
|
|
|
|
readDefaultDataFile "reference.docx" =
|
|
|
|
(B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceDocx
|
|
|
|
readDefaultDataFile "reference.odt" =
|
|
|
|
(B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceODT
|
|
|
|
readDefaultDataFile fname =
|
|
|
|
#ifdef EMBED_DATA_FILES
|
|
|
|
case lookup (makeCanonical fname) dataFiles of
|
|
|
|
Nothing -> throwError $ PandocCouldNotFindDataFileError fname
|
|
|
|
Just contents -> return contents
|
|
|
|
where makeCanonical = Posix.joinPath . transformPathParts . splitDirectories
|
|
|
|
transformPathParts = reverse . foldl go []
|
|
|
|
go as "." = as
|
|
|
|
go (_:as) ".." = as
|
|
|
|
go as x = x : as
|
|
|
|
#else
|
|
|
|
getDataFileName fname' >>= checkExistence >>= readFileStrict
|
|
|
|
where fname' = if fname == "MANUAL.txt" then fname else "data" </> fname
|
|
|
|
|
|
|
|
checkExistence :: PandocMonad m => FilePath -> m FilePath
|
|
|
|
checkExistence fn = do
|
|
|
|
exists <- fileExists fn
|
|
|
|
if exists
|
|
|
|
then return fn
|
|
|
|
else throwError $ PandocCouldNotFindDataFileError fn
|
|
|
|
#endif
|
|
|
|
|
2017-02-24 13:48:07 +01:00
|
|
|
withPaths :: PandocMonad m => [FilePath] -> (FilePath -> m a) -> FilePath -> m a
|
2017-05-02 16:00:04 +02:00
|
|
|
withPaths [] _ fp = throwError $ PandocResourceNotFound fp
|
2017-02-24 13:48:07 +01:00
|
|
|
withPaths (p:ps) action fp =
|
2017-02-24 14:29:56 +01:00
|
|
|
catchError (action (p </> fp))
|
2017-02-24 13:48:07 +01:00
|
|
|
(\_ -> withPaths ps action fp)
|
|
|
|
|
2017-05-18 13:38:19 +02:00
|
|
|
-- | Traverse tree, filling media bag for any images that
|
|
|
|
-- aren't already in the media bag.
|
2017-05-07 21:03:18 +02:00
|
|
|
fillMediaBag :: PandocMonad m => Maybe String -> Pandoc -> m Pandoc
|
|
|
|
fillMediaBag sourceURL d = walkM handleImage d
|
2017-05-07 20:42:32 +02:00
|
|
|
where handleImage :: PandocMonad m => Inline -> m Inline
|
|
|
|
handleImage (Image attr lab (src, tit)) = catchError
|
2017-05-18 13:38:19 +02:00
|
|
|
(do mediabag <- getMediaBag
|
|
|
|
case lookupMedia src mediabag of
|
|
|
|
Just (_, _) -> return $ Image attr lab (src, tit)
|
|
|
|
Nothing -> do
|
|
|
|
(bs, mt) <- downloadOrRead sourceURL src
|
|
|
|
let ext = fromMaybe (takeExtension src)
|
|
|
|
(mt >>= extensionFromMimeType)
|
|
|
|
let bs' = BL.fromChunks [bs]
|
|
|
|
let basename = showDigest $ sha1 bs'
|
|
|
|
let fname = basename <.> ext
|
|
|
|
insertMedia fname mt bs'
|
|
|
|
return $ Image attr lab (fname, tit))
|
2017-06-02 15:06:14 +02:00
|
|
|
(\e ->
|
2017-05-07 20:42:32 +02:00
|
|
|
case e of
|
|
|
|
PandocResourceNotFound _ -> do
|
|
|
|
report $ CouldNotFetchResource src
|
|
|
|
"replacing image with description"
|
|
|
|
-- emit alt text
|
|
|
|
return $ Span ("",["image"],[]) lab
|
|
|
|
PandocHttpError u er -> do
|
|
|
|
report $ CouldNotFetchResource u
|
|
|
|
(show er ++ "\rReplacing image with description.")
|
|
|
|
-- emit alt text
|
|
|
|
return $ Span ("",["image"],[]) lab
|
|
|
|
_ -> throwError e)
|
|
|
|
handleImage x = return x
|
|
|
|
|
|
|
|
-- | Extract media from the mediabag into a directory.
|
|
|
|
extractMedia :: FilePath -> Pandoc -> PandocIO Pandoc
|
|
|
|
extractMedia dir d = do
|
|
|
|
media <- getMediaBag
|
|
|
|
case [fp | (fp, _, _) <- mediaDirectory media] of
|
|
|
|
[] -> return d
|
|
|
|
fps -> do
|
2017-06-12 15:28:39 +02:00
|
|
|
mapM_ (writeMedia dir media) fps
|
2017-05-07 20:42:32 +02:00
|
|
|
return $ walk (adjustImagePath dir fps) d
|
|
|
|
|
2017-06-12 15:28:39 +02:00
|
|
|
writeMedia :: FilePath -> MediaBag -> FilePath -> PandocIO ()
|
|
|
|
writeMedia dir mediabag subpath = do
|
|
|
|
-- we join and split to convert a/b/c to a\b\c on Windows;
|
|
|
|
-- in zip containers all paths use /
|
|
|
|
let fullpath = dir </> normalise subpath
|
|
|
|
let mbcontents = lookupMedia subpath mediabag
|
|
|
|
case mbcontents of
|
|
|
|
Nothing -> throwError $ PandocResourceNotFound subpath
|
|
|
|
Just (_, bs) -> do
|
|
|
|
report $ Extracting fullpath
|
2017-08-11 11:56:54 -07:00
|
|
|
liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
|
|
|
|
liftIOError (\p -> BL.writeFile p bs) fullpath
|
2017-06-12 15:28:39 +02:00
|
|
|
|
2017-05-07 20:42:32 +02:00
|
|
|
adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
|
|
|
|
adjustImagePath dir paths (Image attr lab (src, tit))
|
|
|
|
| src `elem` paths = Image attr lab (dir ++ "/" ++ src, tit)
|
|
|
|
adjustImagePath _ _ x = x
|
|
|
|
|
2016-11-27 15:31:17 -05:00
|
|
|
data PureState = PureState { stStdGen :: StdGen
|
2016-11-21 09:30:08 -05:00
|
|
|
, stWord8Store :: [Word8] -- should be
|
|
|
|
-- inifinite,
|
|
|
|
-- i.e. [1..]
|
|
|
|
, stUniqStore :: [Int] -- should be
|
|
|
|
-- inifinite and
|
|
|
|
-- contain every
|
|
|
|
-- element at most
|
|
|
|
-- once, e.g. [1..]
|
2016-12-03 23:25:36 -05:00
|
|
|
, stEnv :: [(String, String)]
|
|
|
|
, stTime :: UTCTime
|
|
|
|
, stTimeZone :: TimeZone
|
|
|
|
, stReferenceDocx :: Archive
|
|
|
|
, stReferenceODT :: Archive
|
|
|
|
, stFiles :: FileTree
|
2017-08-10 23:16:10 -07:00
|
|
|
, stUserDataFiles :: FileTree
|
|
|
|
, stCabalDataFiles :: FileTree
|
2016-11-21 09:30:08 -05:00
|
|
|
}
|
|
|
|
|
2016-11-27 15:31:17 -05:00
|
|
|
instance Default PureState where
|
|
|
|
def = PureState { stStdGen = mkStdGen 1848
|
2016-11-24 11:52:06 -05:00
|
|
|
, stWord8Store = [1..]
|
|
|
|
, stUniqStore = [1..]
|
2016-12-03 23:25:36 -05:00
|
|
|
, stEnv = [("USER", "pandoc-user")]
|
|
|
|
, stTime = posixSecondsToUTCTime 0
|
|
|
|
, stTimeZone = utc
|
|
|
|
, stReferenceDocx = emptyArchive
|
|
|
|
, stReferenceODT = emptyArchive
|
|
|
|
, stFiles = mempty
|
2017-08-10 23:16:10 -07:00
|
|
|
, stUserDataFiles = mempty
|
|
|
|
, stCabalDataFiles = mempty
|
2016-11-24 11:52:06 -05:00
|
|
|
}
|
2016-12-03 23:39:01 -05:00
|
|
|
|
|
|
|
|
|
|
|
getPureState :: PandocPure PureState
|
2017-06-01 15:09:38 +02:00
|
|
|
getPureState = PandocPure $ lift $ lift get
|
2016-12-03 23:39:01 -05:00
|
|
|
|
|
|
|
getsPureState :: (PureState -> a) -> PandocPure a
|
|
|
|
getsPureState f = f <$> getPureState
|
|
|
|
|
|
|
|
putPureState :: PureState -> PandocPure ()
|
|
|
|
putPureState ps= PandocPure $ lift $ lift $ put ps
|
|
|
|
|
|
|
|
modifyPureState :: (PureState -> PureState) -> PandocPure ()
|
|
|
|
modifyPureState f = PandocPure $ lift $ lift $ modify f
|
2016-12-08 21:16:57 +01:00
|
|
|
|
|
|
|
|
2016-11-30 09:21:21 -05:00
|
|
|
data FileInfo = FileInfo { infoFileMTime :: UTCTime
|
|
|
|
, infoFileContents :: B.ByteString
|
|
|
|
}
|
|
|
|
|
|
|
|
newtype FileTree = FileTree {unFileTree :: M.Map FilePath FileInfo}
|
|
|
|
deriving (Monoid)
|
|
|
|
|
|
|
|
getFileInfo :: FilePath -> FileTree -> Maybe FileInfo
|
|
|
|
getFileInfo fp tree = M.lookup fp $ unFileTree tree
|
2016-11-24 11:52:06 -05:00
|
|
|
|
2017-07-14 17:28:13 +02:00
|
|
|
-- | Add the specified file to the FileTree. If file
|
|
|
|
-- is a directory, add its contents recursively.
|
2017-07-14 16:24:30 +02:00
|
|
|
addToFileTree :: FileTree -> FilePath -> IO FileTree
|
|
|
|
addToFileTree (FileTree treemap) fp = do
|
2017-07-14 17:28:13 +02:00
|
|
|
isdir <- doesDirectoryExist fp
|
|
|
|
if isdir
|
|
|
|
then do -- recursively add contents of directories
|
2017-07-15 11:38:43 +02:00
|
|
|
let isSpecial ".." = True
|
|
|
|
isSpecial "." = True
|
|
|
|
isSpecial _ = False
|
|
|
|
fs <- (map (fp </>) . filter (not . isSpecial)) <$> getDirectoryContents fp
|
2017-07-14 17:28:13 +02:00
|
|
|
foldM addToFileTree (FileTree treemap) fs
|
|
|
|
else do
|
|
|
|
contents <- B.readFile fp
|
|
|
|
mtime <- IO.getModificationTime fp
|
|
|
|
return $ FileTree $
|
|
|
|
M.insert fp FileInfo{ infoFileMTime = mtime
|
|
|
|
, infoFileContents = contents } treemap
|
2016-11-24 11:52:06 -05:00
|
|
|
|
2016-11-24 09:54:18 -05:00
|
|
|
newtype PandocPure a = PandocPure {
|
2016-12-01 12:13:51 -05:00
|
|
|
unPandocPure :: ExceptT PandocError
|
2016-12-01 18:35:05 -05:00
|
|
|
(StateT CommonState (State PureState)) a
|
2016-11-27 15:32:28 -05:00
|
|
|
} deriving ( Functor
|
|
|
|
, Applicative
|
|
|
|
, Monad
|
2016-12-01 12:13:51 -05:00
|
|
|
, MonadError PandocError
|
2016-11-27 15:32:28 -05:00
|
|
|
)
|
2016-11-21 09:30:08 -05:00
|
|
|
|
2016-12-01 12:13:51 -05:00
|
|
|
runPure :: PandocPure a -> Either PandocError a
|
2016-12-01 15:21:49 -05:00
|
|
|
runPure x = flip evalState def $
|
|
|
|
flip evalStateT def $
|
|
|
|
runExceptT $
|
|
|
|
unPandocPure x
|
2016-11-24 11:52:06 -05:00
|
|
|
|
2016-11-24 09:54:18 -05:00
|
|
|
instance PandocMonad PandocPure where
|
2016-12-03 23:39:01 -05:00
|
|
|
lookupEnv s = do
|
|
|
|
env <- getsPureState stEnv
|
2016-11-21 09:30:08 -05:00
|
|
|
return (lookup s env)
|
|
|
|
|
2016-12-03 23:39:01 -05:00
|
|
|
getCurrentTime = getsPureState stTime
|
2016-11-21 09:30:08 -05:00
|
|
|
|
2016-12-03 23:39:01 -05:00
|
|
|
getCurrentTimeZone = getsPureState stTimeZone
|
2016-11-30 09:46:08 -05:00
|
|
|
|
2016-12-03 23:39:01 -05:00
|
|
|
newStdGen = do
|
|
|
|
g <- getsPureState stStdGen
|
2016-11-21 09:30:08 -05:00
|
|
|
let (_, nxtGen) = next g
|
2016-12-03 23:39:01 -05:00
|
|
|
modifyPureState $ \st -> st { stStdGen = nxtGen }
|
2016-11-21 09:30:08 -05:00
|
|
|
return g
|
|
|
|
|
2016-12-03 23:39:01 -05:00
|
|
|
newUniqueHash = do
|
|
|
|
uniqs <- getsPureState stUniqStore
|
2016-11-21 09:30:08 -05:00
|
|
|
case uniqs of
|
|
|
|
u : us -> do
|
2016-12-03 23:39:01 -05:00
|
|
|
modifyPureState $ \st -> st { stUniqStore = us }
|
2016-11-21 09:30:08 -05:00
|
|
|
return u
|
|
|
|
_ -> M.fail "uniq store ran out of elements"
|
2017-05-02 16:00:04 +02:00
|
|
|
openURL u = throwError $ PandocResourceNotFound u
|
2016-12-03 23:39:01 -05:00
|
|
|
readFileLazy fp = do
|
|
|
|
fps <- getsPureState stFiles
|
2016-11-30 09:21:21 -05:00
|
|
|
case infoFileContents <$> getFileInfo fp fps of
|
2016-11-21 09:30:08 -05:00
|
|
|
Just bs -> return (BL.fromStrict bs)
|
2017-05-02 16:00:04 +02:00
|
|
|
Nothing -> throwError $ PandocResourceNotFound fp
|
2016-12-11 23:10:46 +01:00
|
|
|
readFileStrict fp = do
|
|
|
|
fps <- getsPureState stFiles
|
|
|
|
case infoFileContents <$> getFileInfo fp fps of
|
|
|
|
Just bs -> return bs
|
2017-05-02 16:00:04 +02:00
|
|
|
Nothing -> throwError $ PandocResourceNotFound fp
|
2016-11-21 09:30:08 -05:00
|
|
|
|
2016-12-03 23:39:01 -05:00
|
|
|
glob s = do
|
2017-07-14 16:35:29 +02:00
|
|
|
FileTree ftmap <- getsPureState stFiles
|
|
|
|
return $ filter (match (compile s)) $ M.keys ftmap
|
2016-11-27 15:29:46 -05:00
|
|
|
|
2017-08-11 11:56:54 -07:00
|
|
|
fileExists fp = do
|
|
|
|
fps <- getsPureState stFiles
|
|
|
|
case getFileInfo fp fps of
|
|
|
|
Nothing -> return False
|
|
|
|
Just _ -> return True
|
|
|
|
|
2017-08-11 23:09:51 -07:00
|
|
|
getDataFileName fp = return $ "data/" ++ fp
|
|
|
|
|
2016-12-03 23:39:01 -05:00
|
|
|
getModificationTime fp = do
|
|
|
|
fps <- getsPureState stFiles
|
2017-06-01 15:09:38 +02:00
|
|
|
case infoFileMTime <$> getFileInfo fp fps of
|
2016-11-30 12:55:30 -05:00
|
|
|
Just tm -> return tm
|
2017-02-23 16:21:59 +01:00
|
|
|
Nothing -> throwError $ PandocIOError fp
|
|
|
|
(userError "Can't get modification time")
|
2016-11-30 12:55:30 -05:00
|
|
|
|
2017-06-01 15:09:38 +02:00
|
|
|
getCommonState = PandocPure $ lift get
|
2016-12-08 21:32:25 +01:00
|
|
|
putCommonState x = PandocPure $ lift $ put x
|
|
|
|
|
2017-02-10 23:59:47 +01:00
|
|
|
logOutput _msg = return ()
|
2017-01-22 23:49:05 +01:00
|
|
|
|
2017-08-19 16:39:22 -07:00
|
|
|
-- This requires UndecidableInstances. We could avoid that
|
|
|
|
-- by repeating the definitions below for every monad transformer
|
|
|
|
-- we use: ReaderT, WriterT, StateT, RWST. But this seems to
|
|
|
|
-- be harmless.
|
|
|
|
instance (MonadTrans t, PandocMonad m, Functor (t m),
|
|
|
|
MonadError PandocError (t m), Monad (t m),
|
|
|
|
Applicative (t m)) => PandocMonad (t m) where
|
2016-12-08 22:12:04 +01:00
|
|
|
lookupEnv = lift . lookupEnv
|
|
|
|
getCurrentTime = lift getCurrentTime
|
|
|
|
getCurrentTimeZone = lift getCurrentTimeZone
|
|
|
|
newStdGen = lift newStdGen
|
|
|
|
newUniqueHash = lift newUniqueHash
|
2016-12-11 23:10:46 +01:00
|
|
|
openURL = lift . openURL
|
2016-12-08 22:12:04 +01:00
|
|
|
readFileLazy = lift . readFileLazy
|
2016-12-11 23:10:46 +01:00
|
|
|
readFileStrict = lift . readFileStrict
|
2016-12-08 22:12:04 +01:00
|
|
|
glob = lift . glob
|
2017-08-11 11:56:54 -07:00
|
|
|
fileExists = lift . fileExists
|
2017-08-11 23:09:51 -07:00
|
|
|
getDataFileName = lift . getDataFileName
|
2016-12-08 22:12:04 +01:00
|
|
|
getModificationTime = lift . getModificationTime
|
|
|
|
getCommonState = lift getCommonState
|
|
|
|
putCommonState = lift . putCommonState
|
2017-02-10 23:59:47 +01:00
|
|
|
logOutput = lift . logOutput
|
2016-12-08 22:12:04 +01:00
|
|
|
|
2017-08-19 16:39:22 -07:00
|
|
|
instance {-# OVERLAPS #-} PandocMonad m => PandocMonad (ParsecT s st m) where
|
2016-12-08 22:12:04 +01:00
|
|
|
lookupEnv = lift . lookupEnv
|
|
|
|
getCurrentTime = lift getCurrentTime
|
|
|
|
getCurrentTimeZone = lift getCurrentTimeZone
|
|
|
|
newStdGen = lift newStdGen
|
|
|
|
newUniqueHash = lift newUniqueHash
|
2016-12-11 23:10:46 +01:00
|
|
|
openURL = lift . openURL
|
2016-12-08 22:12:04 +01:00
|
|
|
readFileLazy = lift . readFileLazy
|
2016-12-11 23:10:46 +01:00
|
|
|
readFileStrict = lift . readFileStrict
|
2016-12-08 22:12:04 +01:00
|
|
|
glob = lift . glob
|
2017-08-11 11:56:54 -07:00
|
|
|
fileExists = lift . fileExists
|
2017-08-11 23:09:51 -07:00
|
|
|
getDataFileName = lift . getDataFileName
|
2016-12-08 22:12:04 +01:00
|
|
|
getModificationTime = lift . getModificationTime
|
|
|
|
getCommonState = lift getCommonState
|
|
|
|
putCommonState = lift . putCommonState
|
2017-08-19 16:39:22 -07:00
|
|
|
trace msg = do
|
|
|
|
tracing <- getsCommonState stTrace
|
|
|
|
when tracing $ do
|
|
|
|
pos <- getPosition
|
|
|
|
Debug.Trace.trace
|
|
|
|
("[trace] Parsed " ++ msg ++ " at line " ++
|
|
|
|
show (sourceLine pos) ++
|
|
|
|
if sourceName pos == "chunk"
|
|
|
|
then " of chunk"
|
|
|
|
else "")
|
|
|
|
(return ())
|
2017-02-10 23:59:47 +01:00
|
|
|
logOutput = lift . logOutput
|
2016-12-08 22:12:04 +01:00
|
|
|
|