diff --git a/MANUAL.txt b/MANUAL.txt index 5dc35c8ff..75e74f1cd 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -730,6 +730,16 @@ header when requesting a document from a URL: document in standalone mode. If no *VAL* is specified, the key will be given the value `true`. +`--sandbox` + +: Run pandoc in a sandbox, limiting IO operations in readers + and writers to reading the files specified on the command line. + Note that this option does not limit IO operations by + filters or in the production of PDF documents. But it does + offer security against, for example, disclosure of files + through the use of `include` directives. Anyone using + pandoc on untrusted user input should use this option. + `-D` *FORMAT*, `--print-default-template=`*FORMAT* : Print the system default template for an output *FORMAT*. (See `-t` @@ -6543,7 +6553,8 @@ application, here are some things to keep in mind: 2. Several input formats (including HTML, Org, and RST) support `include` directives that allow the contents of a file to be included in the output. An untrusted attacker could use these to view the contents of - files on the file system. + files on the file system. (Using the `--sandbox` option can + protect against this threat.) 3. If your application uses pandoc as a Haskell library (rather than shelling out to the executable), it is possible to use it in a mode diff --git a/pandoc.cabal b/pandoc.cabal index b90a61942..da53cb1cd 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -622,6 +622,7 @@ library Text.Pandoc.Class.PandocMonad, Text.Pandoc.Class.PandocIO, Text.Pandoc.Class.PandocPure, + Text.Pandoc.Class.Sandbox, Text.Pandoc.Filter.JSON, Text.Pandoc.Filter.Lua, Text.Pandoc.Filter.Path, diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 15236896c..f7c1f218d 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -139,7 +139,26 @@ convertWithOpts opts = do <> "` instead of `pandoc " <> inputFile <> " -o " <> outputFile <> "`." _ -> return () - (reader, readerExts) <- getReader readerName + + let makeSandboxed pureReader = + let files = maybe id (:) (optReferenceDoc opts) . + maybe id (:) (optEpubMetadata opts) . + maybe id (:) (optEpubCoverImage opts) . + maybe id (:) (optCSL opts) . + maybe id (:) (optCitationAbbreviations opts) $ + optEpubFonts opts ++ + optBibliography opts + in case pureReader of + TextReader r -> TextReader $ \o t -> sandbox files (r o t) + ByteStringReader r + -> ByteStringReader $ \o t -> sandbox files (r o t) + + (reader, readerExts) <- + if optSandbox opts + then case runPure (getReader readerName) of + Left e -> throwError e + Right (r, rexts) -> return (makeSandboxed r, rexts) + else getReader readerName outputSettings <- optToOutputSettings opts let format = outputFormat outputSettings @@ -274,8 +293,9 @@ convertWithOpts opts = do ByteStringReader r -> mconcat <$> mapM (r readerOpts . inputToLazyByteString) inputs) >>= - ( (if isJust (optExtractMedia opts) - || writerNameBase == "docx" -- for fallback png creation + ( (if not (optSandbox opts) && + (isJust (optExtractMedia opts) + || writerNameBase == "docx") -- for fallback pngs then fillMediaBag else return) >=> return . adjustMetadata (metadataFromFile <>) @@ -286,7 +306,8 @@ convertWithOpts opts = do >=> maybe return extractMedia (optExtractMedia opts) ) - when (writerNameBase == "docx") $ do -- create fallback pngs for svgs + when (writerNameBase == "docx" && not (optSandbox opts)) $ do + -- create fallback pngs for svgs items <- mediaItems <$> getMediaBag forM_ items $ \(fp, mt, bs) -> case T.takeWhile (/=';') mt of diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs index d2c12573c..99017000a 100644 --- a/src/Text/Pandoc/App/CommandLineOptions.hs +++ b/src/Text/Pandoc/App/CommandLineOptions.hs @@ -188,6 +188,11 @@ options = (\opt -> return opt { optFileScope = True })) "" -- "Parse input files before combining" + , Option "" ["sandbox"] + (NoArg + (\opt -> return opt { optSandbox = True })) + "" + , Option "s" ["standalone"] (NoArg (\opt -> return opt { optStandalone = True })) diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs index d54d932b7..48eb15fdf 100644 --- a/src/Text/Pandoc/App/Opt.hs +++ b/src/Text/Pandoc/App/Opt.hs @@ -160,6 +160,7 @@ data Opt = Opt , optCSL :: Maybe FilePath -- ^ CSL stylesheet , optBibliography :: [FilePath] -- ^ Bibliography files , optCitationAbbreviations :: Maybe FilePath -- ^ Citation abbreviations + , optSandbox :: Bool } deriving (Generic, Show) instance FromYAML (Opt -> Opt) where @@ -595,6 +596,8 @@ doOpt (k',v) = do parseYAML v >>= \x -> return (\o -> o{ optEol = x }) "strip-comments" -> parseYAML v >>= \x -> return (\o -> o { optStripComments = x }) + "sandbox" -> + parseYAML v >>= \x -> return (\o -> o { optSandbox = x }) _ -> failAtNode k' $ "Unknown option " ++ show k -- | Defaults for command-line options. @@ -673,6 +676,7 @@ defaultOpts = Opt , optCSL = Nothing , optBibliography = [] , optCitationAbbreviations = Nothing + , optSandbox = False } parseStringKey :: Node Pos -> Parser Text diff --git a/src/Text/Pandoc/App/OutputSettings.hs b/src/Text/Pandoc/App/OutputSettings.hs index 3f83f4b21..7b057713b 100644 --- a/src/Text/Pandoc/App/OutputSettings.hs +++ b/src/Text/Pandoc/App/OutputSettings.hs @@ -90,11 +90,31 @@ optToOutputSettings opts = do then writerName else T.toLower $ baseWriterName writerName + let makeSandboxed pureWriter = + let files = maybe id (:) (optReferenceDoc opts) . + maybe id (:) (optEpubMetadata opts) . + maybe id (:) (optEpubCoverImage opts) . + maybe id (:) (optCSL opts) . + maybe id (:) (optCitationAbbreviations opts) $ + optEpubFonts opts ++ + optBibliography opts + in case pureWriter of + TextWriter w -> TextWriter $ \o d -> sandbox files (w o d) + ByteStringWriter w + -> ByteStringWriter $ \o d -> sandbox files (w o d) + + (writer, writerExts) <- if ".lua" `T.isSuffixOf` format then return (TextWriter (\o d -> writeCustom (T.unpack writerName) o d), mempty) - else getWriter (T.toLower writerName) + else if optSandbox opts + then + case runPure (getWriter writerName) of + Left e -> throwError e + Right (w, wexts) -> + return (makeSandboxed w, wexts) + else getWriter (T.toLower writerName) let standalone = optStandalone opts || not (isTextFormat format) || pdfOutput diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs index 2f28ac4dd..6394df251 100644 --- a/src/Text/Pandoc/Class.hs +++ b/src/Text/Pandoc/Class.hs @@ -19,6 +19,7 @@ module Text.Pandoc.Class , module Text.Pandoc.Class.PandocIO , module Text.Pandoc.Class.PandocMonad , module Text.Pandoc.Class.PandocPure + , module Text.Pandoc.Class.Sandbox , Translations ) where @@ -27,3 +28,4 @@ import Text.Pandoc.Class.PandocMonad import Text.Pandoc.Class.PandocIO import Text.Pandoc.Class.PandocPure import Text.Pandoc.Translations (Translations) +import Text.Pandoc.Class.Sandbox diff --git a/src/Text/Pandoc/Class/PandocSandboxed.hs b/src/Text/Pandoc/Class/PandocSandboxed.hs deleted file mode 100644 index 61ee1f1c6..000000000 --- a/src/Text/Pandoc/Class/PandocSandboxed.hs +++ /dev/null @@ -1,79 +0,0 @@ -{-# LANGUAGE DeriveFunctor #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} -{- | -Module : Text.Pandoc.Class.PandocIO -Copyright : Copyright (C) 2016-2020 Jesse Rosenthal, John MacFarlane -License : GNU GPL, version 2 or above - -Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> -Stability : alpha -Portability : portable - -This module defines @'PandocIO'@, an IO-based instance of the -@'PandocMonad'@ type class. File, data, and network access all are run -using IO operators. --} -module Text.Pandoc.Class.PandocIO - ( PandocIO(..) - , runIO - , runIOorExplode - , extractMedia - ) where - -import Control.Monad.Except (ExceptT, MonadError, runExceptT) -import Control.Monad.IO.Class (MonadIO) -import Control.Monad.State (StateT, evalStateT, lift, get, put) -import Data.Default (Default (def)) -import Text.Pandoc.Class.CommonState (CommonState (..)) -import Text.Pandoc.Class.PandocMonad -import Text.Pandoc.Definition -import Text.Pandoc.Error -import qualified Text.Pandoc.Class.IO as IO -import Control.Monad.Catch (MonadCatch, MonadMask, MonadThrow) - --- | Evaluate a 'PandocIO' operation. -runIO :: PandocIO a -> IO (Either PandocError a) -runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma - --- | Evaluate a 'PandocIO' operation, handling any errors --- by exiting with an appropriate message and error status. -runIOorExplode :: PandocIO a -> IO a -runIOorExplode ma = runIO ma >>= handleError - -newtype PandocIO a = PandocIO { - unPandocIO :: ExceptT PandocError (StateT CommonState IO) a - } deriving ( MonadIO - , Functor - , Applicative - , Monad - , MonadCatch - , MonadMask - , MonadThrow - , MonadError PandocError - ) - -instance PandocMonad PandocIO where - lookupEnv = IO.lookupEnv - getCurrentTime = IO.getCurrentTime - getCurrentTimeZone = IO.getCurrentTimeZone - newStdGen = IO.newStdGen - newUniqueHash = IO.newUniqueHash - - openURL = IO.openURL - readFileLazy = IO.readFileLazy - readFileStrict = IO.readFileStrict - readStdinStrict = IO.readStdinStrict - - glob = IO.glob - fileExists = IO.fileExists - getDataFileName = IO.getDataFileName - getModificationTime = IO.getModificationTime - - getCommonState = PandocIO $ lift get - putCommonState = PandocIO . lift . put - - logOutput = IO.logOutput - --- | Extract media from the mediabag into a directory. -extractMedia :: (PandocMonad m, MonadIO m) => FilePath -> Pandoc -> m Pandoc -extractMedia = IO.extractMedia diff --git a/src/Text/Pandoc/Class/Sandbox.hs b/src/Text/Pandoc/Class/Sandbox.hs new file mode 100644 index 000000000..8bc0f1e77 --- /dev/null +++ b/src/Text/Pandoc/Class/Sandbox.hs @@ -0,0 +1,50 @@ +{- | +Module : Text.Pandoc.Class.Sandbox +Copyright : Copyright (C) 2021 John MacFarlane +License : GNU GPL, version 2 or above + +Maintainer : John MacFarlane (<jgm@berkeley.edu>) +Stability : alpha +Portability : portable + +This module provides a way to run PandocMonad actions in a sandbox +(pure context, with no IO allowed and access only to designated files). +-} + +module Text.Pandoc.Class.Sandbox + ( sandbox ) +where + +import Control.Monad (foldM) +import Control.Monad.Except (throwError) +import Control.Monad.IO.Class (MonadIO (liftIO)) +import Text.Pandoc.Class.PandocMonad +import Text.Pandoc.Class.PandocPure +import Text.Pandoc.Class.CommonState (CommonState(..)) +import Text.Pandoc.Logging (messageVerbosity) + +-- | Lift a PandocPure action into any instance of PandocMonad. +-- The main computation is done purely, but CommonState is preserved +-- continuously, and warnings are emitted after the action completes. +-- The parameter is a list of FilePaths which will be added to the +-- ersatz file system and be available for reading. +sandbox :: (PandocMonad m, MonadIO m) => [FilePath] -> PandocPure a -> m a +sandbox files action = do + oldState <- getCommonState + tree <- liftIO $ foldM addToFileTree mempty files + case runPure (do putCommonState oldState + modifyPureState $ \ps -> ps{ stFiles = tree } + result <- action + st <- getCommonState + return (st, result)) of + Left e -> throwError e + Right (st, result) -> do + putCommonState st + let verbosity = stVerbosity st + -- emit warnings, since these are not printed in runPure + let newMessages = reverse $ take + (length (stLog st) - length (stLog oldState)) (stLog st) + mapM_ logOutput + (filter ((<= verbosity) . messageVerbosity) newMessages) + return result +