Add --sandbox option.

+ Add sandbox feature for readers.  When this option is used,
  readers and writers only have access to input files (and
  other files specified directly on command line).  This restriction
  is enforced in the type system.
+ Filters, PDF production, custom writers are unaffected.  This
  feature only insulates the actual readers and writers, not
  the pipeline around them in Text.Pandoc.App.
+ Note that when `--sandboxed` is specified, readers won't have
  access to the resource path, nor will anything have access to
  the user data directory.
+ Add module Text.Pandoc.Class.Sandbox, defining
  `sandbox`.  Exported via Text.Pandoc.Class. [API change]

Closes #5045.
This commit is contained in:
John MacFarlane 2021-08-21 15:30:13 -07:00
parent b76796eae8
commit d6d7c9620a
9 changed files with 120 additions and 85 deletions

View file

@ -730,6 +730,16 @@ header when requesting a document from a URL:
document in standalone mode. If no *VAL* is specified, the
key will be given the value `true`.
`--sandbox`
: Run pandoc in a sandbox, limiting IO operations in readers
and writers to reading the files specified on the command line.
Note that this option does not limit IO operations by
filters or in the production of PDF documents. But it does
offer security against, for example, disclosure of files
through the use of `include` directives. Anyone using
pandoc on untrusted user input should use this option.
`-D` *FORMAT*, `--print-default-template=`*FORMAT*
: Print the system default template for an output *FORMAT*. (See `-t`
@ -6543,7 +6553,8 @@ application, here are some things to keep in mind:
2. Several input formats (including HTML, Org, and RST) support `include`
directives that allow the contents of a file to be included in the
output. An untrusted attacker could use these to view the contents of
files on the file system.
files on the file system. (Using the `--sandbox` option can
protect against this threat.)
3. If your application uses pandoc as a Haskell library (rather than
shelling out to the executable), it is possible to use it in a mode

View file

@ -622,6 +622,7 @@ library
Text.Pandoc.Class.PandocMonad,
Text.Pandoc.Class.PandocIO,
Text.Pandoc.Class.PandocPure,
Text.Pandoc.Class.Sandbox,
Text.Pandoc.Filter.JSON,
Text.Pandoc.Filter.Lua,
Text.Pandoc.Filter.Path,

View file

@ -139,7 +139,26 @@ convertWithOpts opts = do
<> "` instead of `pandoc " <> inputFile <> " -o " <> outputFile <> "`."
_ -> return ()
(reader, readerExts) <- getReader readerName
let makeSandboxed pureReader =
let files = maybe id (:) (optReferenceDoc opts) .
maybe id (:) (optEpubMetadata opts) .
maybe id (:) (optEpubCoverImage opts) .
maybe id (:) (optCSL opts) .
maybe id (:) (optCitationAbbreviations opts) $
optEpubFonts opts ++
optBibliography opts
in case pureReader of
TextReader r -> TextReader $ \o t -> sandbox files (r o t)
ByteStringReader r
-> ByteStringReader $ \o t -> sandbox files (r o t)
(reader, readerExts) <-
if optSandbox opts
then case runPure (getReader readerName) of
Left e -> throwError e
Right (r, rexts) -> return (makeSandboxed r, rexts)
else getReader readerName
outputSettings <- optToOutputSettings opts
let format = outputFormat outputSettings
@ -274,8 +293,9 @@ convertWithOpts opts = do
ByteStringReader r ->
mconcat <$> mapM (r readerOpts . inputToLazyByteString) inputs)
>>=
( (if isJust (optExtractMedia opts)
|| writerNameBase == "docx" -- for fallback png creation
( (if not (optSandbox opts) &&
(isJust (optExtractMedia opts)
|| writerNameBase == "docx") -- for fallback pngs
then fillMediaBag
else return)
>=> return . adjustMetadata (metadataFromFile <>)
@ -286,7 +306,8 @@ convertWithOpts opts = do
>=> maybe return extractMedia (optExtractMedia opts)
)
when (writerNameBase == "docx") $ do -- create fallback pngs for svgs
when (writerNameBase == "docx" && not (optSandbox opts)) $ do
-- create fallback pngs for svgs
items <- mediaItems <$> getMediaBag
forM_ items $ \(fp, mt, bs) ->
case T.takeWhile (/=';') mt of

View file

@ -188,6 +188,11 @@ options =
(\opt -> return opt { optFileScope = True }))
"" -- "Parse input files before combining"
, Option "" ["sandbox"]
(NoArg
(\opt -> return opt { optSandbox = True }))
""
, Option "s" ["standalone"]
(NoArg
(\opt -> return opt { optStandalone = True }))

View file

@ -160,6 +160,7 @@ data Opt = Opt
, optCSL :: Maybe FilePath -- ^ CSL stylesheet
, optBibliography :: [FilePath] -- ^ Bibliography files
, optCitationAbbreviations :: Maybe FilePath -- ^ Citation abbreviations
, optSandbox :: Bool
} deriving (Generic, Show)
instance FromYAML (Opt -> Opt) where
@ -595,6 +596,8 @@ doOpt (k',v) = do
parseYAML v >>= \x -> return (\o -> o{ optEol = x })
"strip-comments" ->
parseYAML v >>= \x -> return (\o -> o { optStripComments = x })
"sandbox" ->
parseYAML v >>= \x -> return (\o -> o { optSandbox = x })
_ -> failAtNode k' $ "Unknown option " ++ show k
-- | Defaults for command-line options.
@ -673,6 +676,7 @@ defaultOpts = Opt
, optCSL = Nothing
, optBibliography = []
, optCitationAbbreviations = Nothing
, optSandbox = False
}
parseStringKey :: Node Pos -> Parser Text

View file

@ -90,11 +90,31 @@ optToOutputSettings opts = do
then writerName
else T.toLower $ baseWriterName writerName
let makeSandboxed pureWriter =
let files = maybe id (:) (optReferenceDoc opts) .
maybe id (:) (optEpubMetadata opts) .
maybe id (:) (optEpubCoverImage opts) .
maybe id (:) (optCSL opts) .
maybe id (:) (optCitationAbbreviations opts) $
optEpubFonts opts ++
optBibliography opts
in case pureWriter of
TextWriter w -> TextWriter $ \o d -> sandbox files (w o d)
ByteStringWriter w
-> ByteStringWriter $ \o d -> sandbox files (w o d)
(writer, writerExts) <-
if ".lua" `T.isSuffixOf` format
then return (TextWriter
(\o d -> writeCustom (T.unpack writerName) o d), mempty)
else getWriter (T.toLower writerName)
else if optSandbox opts
then
case runPure (getWriter writerName) of
Left e -> throwError e
Right (w, wexts) ->
return (makeSandboxed w, wexts)
else getWriter (T.toLower writerName)
let standalone = optStandalone opts || not (isTextFormat format) || pdfOutput

View file

@ -19,6 +19,7 @@ module Text.Pandoc.Class
, module Text.Pandoc.Class.PandocIO
, module Text.Pandoc.Class.PandocMonad
, module Text.Pandoc.Class.PandocPure
, module Text.Pandoc.Class.Sandbox
, Translations
) where
@ -27,3 +28,4 @@ import Text.Pandoc.Class.PandocMonad
import Text.Pandoc.Class.PandocIO
import Text.Pandoc.Class.PandocPure
import Text.Pandoc.Translations (Translations)
import Text.Pandoc.Class.Sandbox

View file

@ -1,79 +0,0 @@
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{- |
Module : Text.Pandoc.Class.PandocIO
Copyright : Copyright (C) 2016-2020 Jesse Rosenthal, John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
Stability : alpha
Portability : portable
This module defines @'PandocIO'@, an IO-based instance of the
@'PandocMonad'@ type class. File, data, and network access all are run
using IO operators.
-}
module Text.Pandoc.Class.PandocIO
( PandocIO(..)
, runIO
, runIOorExplode
, extractMedia
) where
import Control.Monad.Except (ExceptT, MonadError, runExceptT)
import Control.Monad.IO.Class (MonadIO)
import Control.Monad.State (StateT, evalStateT, lift, get, put)
import Data.Default (Default (def))
import Text.Pandoc.Class.CommonState (CommonState (..))
import Text.Pandoc.Class.PandocMonad
import Text.Pandoc.Definition
import Text.Pandoc.Error
import qualified Text.Pandoc.Class.IO as IO
import Control.Monad.Catch (MonadCatch, MonadMask, MonadThrow)
-- | Evaluate a 'PandocIO' operation.
runIO :: PandocIO a -> IO (Either PandocError a)
runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma
-- | Evaluate a 'PandocIO' operation, handling any errors
-- by exiting with an appropriate message and error status.
runIOorExplode :: PandocIO a -> IO a
runIOorExplode ma = runIO ma >>= handleError
newtype PandocIO a = PandocIO {
unPandocIO :: ExceptT PandocError (StateT CommonState IO) a
} deriving ( MonadIO
, Functor
, Applicative
, Monad
, MonadCatch
, MonadMask
, MonadThrow
, MonadError PandocError
)
instance PandocMonad PandocIO where
lookupEnv = IO.lookupEnv
getCurrentTime = IO.getCurrentTime
getCurrentTimeZone = IO.getCurrentTimeZone
newStdGen = IO.newStdGen
newUniqueHash = IO.newUniqueHash
openURL = IO.openURL
readFileLazy = IO.readFileLazy
readFileStrict = IO.readFileStrict
readStdinStrict = IO.readStdinStrict
glob = IO.glob
fileExists = IO.fileExists
getDataFileName = IO.getDataFileName
getModificationTime = IO.getModificationTime
getCommonState = PandocIO $ lift get
putCommonState = PandocIO . lift . put
logOutput = IO.logOutput
-- | Extract media from the mediabag into a directory.
extractMedia :: (PandocMonad m, MonadIO m) => FilePath -> Pandoc -> m Pandoc
extractMedia = IO.extractMedia

View file

@ -0,0 +1,50 @@
{- |
Module : Text.Pandoc.Class.Sandbox
Copyright : Copyright (C) 2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane (<jgm@berkeley.edu>)
Stability : alpha
Portability : portable
This module provides a way to run PandocMonad actions in a sandbox
(pure context, with no IO allowed and access only to designated files).
-}
module Text.Pandoc.Class.Sandbox
( sandbox )
where
import Control.Monad (foldM)
import Control.Monad.Except (throwError)
import Control.Monad.IO.Class (MonadIO (liftIO))
import Text.Pandoc.Class.PandocMonad
import Text.Pandoc.Class.PandocPure
import Text.Pandoc.Class.CommonState (CommonState(..))
import Text.Pandoc.Logging (messageVerbosity)
-- | Lift a PandocPure action into any instance of PandocMonad.
-- The main computation is done purely, but CommonState is preserved
-- continuously, and warnings are emitted after the action completes.
-- The parameter is a list of FilePaths which will be added to the
-- ersatz file system and be available for reading.
sandbox :: (PandocMonad m, MonadIO m) => [FilePath] -> PandocPure a -> m a
sandbox files action = do
oldState <- getCommonState
tree <- liftIO $ foldM addToFileTree mempty files
case runPure (do putCommonState oldState
modifyPureState $ \ps -> ps{ stFiles = tree }
result <- action
st <- getCommonState
return (st, result)) of
Left e -> throwError e
Right (st, result) -> do
putCommonState st
let verbosity = stVerbosity st
-- emit warnings, since these are not printed in runPure
let newMessages = reverse $ take
(length (stLog st) - length (stLog oldState)) (stLog st)
mapM_ logOutput
(filter ((<= verbosity) . messageVerbosity) newMessages)
return result