Add openURL and readFileStrict to PandocMonad.
Removed fetchItem and fetchItem'. Provide fetchItem in PandocMonad (it uses openURL and readFileStrict). TODO: - PandocPure instance for openURL. - Fix places where fetchItem is used so that we trap the exception instead of checking for a Left value. (At least in the places where we want a warning rather than a failure.)
This commit is contained in:
parent
be140ab496
commit
4cb124d147
1 changed files with 98 additions and 38 deletions
|
@ -46,6 +46,7 @@ module Text.Pandoc.Class ( PandocMonad(..)
|
||||||
, getMediaBag
|
, getMediaBag
|
||||||
, setMediaBag
|
, setMediaBag
|
||||||
, insertMedia
|
, insertMedia
|
||||||
|
, fetchItem
|
||||||
, getInputFiles
|
, getInputFiles
|
||||||
, getOutputFile
|
, getOutputFile
|
||||||
, PandocIO(..)
|
, PandocIO(..)
|
||||||
|
@ -64,27 +65,28 @@ import qualified System.Random as IO (newStdGen)
|
||||||
import Codec.Archive.Zip (Archive, fromArchive, emptyArchive)
|
import Codec.Archive.Zip (Archive, fromArchive, emptyArchive)
|
||||||
import Data.Unique (hashUnique)
|
import Data.Unique (hashUnique)
|
||||||
import qualified Data.Unique as IO (newUnique)
|
import qualified Data.Unique as IO (newUnique)
|
||||||
import qualified Text.Pandoc.Shared as IO ( fetchItem
|
import qualified Text.Pandoc.Shared as IO ( readDataFile
|
||||||
, fetchItem'
|
, warn
|
||||||
, readDataFile
|
, openURL )
|
||||||
, warn)
|
|
||||||
import Text.Pandoc.Compat.Time (UTCTime)
|
import Text.Pandoc.Compat.Time (UTCTime)
|
||||||
import Text.Pandoc.Parsing (ParserT, SourcePos)
|
import Text.Pandoc.Parsing (ParserT, SourcePos)
|
||||||
import qualified Text.Pandoc.Compat.Time as IO (getCurrentTime)
|
import qualified Text.Pandoc.Compat.Time as IO (getCurrentTime)
|
||||||
|
import Text.Pandoc.MIME (MimeType, getMimeType)
|
||||||
import Data.Time.Clock.POSIX ( utcTimeToPOSIXSeconds
|
import Data.Time.Clock.POSIX ( utcTimeToPOSIXSeconds
|
||||||
, posixSecondsToUTCTime
|
, posixSecondsToUTCTime
|
||||||
, POSIXTime )
|
, POSIXTime )
|
||||||
import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime, utc)
|
import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime, utc)
|
||||||
|
import Network.URI ( escapeURIString, nonStrictRelativeTo,
|
||||||
|
unEscapeString, parseURIReference, isAllowedInURI,
|
||||||
|
parseURI, URI(..) )
|
||||||
import qualified Data.Time.LocalTime as IO (getCurrentTimeZone)
|
import qualified Data.Time.LocalTime as IO (getCurrentTimeZone)
|
||||||
import Text.Pandoc.MIME (MimeType, getMimeType)
|
import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
|
||||||
import Text.Pandoc.MediaBag (MediaBag)
|
|
||||||
import qualified Text.Pandoc.MediaBag as MB
|
import qualified Text.Pandoc.MediaBag as MB
|
||||||
import qualified Data.ByteString as B
|
import qualified Data.ByteString as B
|
||||||
import qualified Data.ByteString.Lazy as BL
|
import qualified Data.ByteString.Lazy as BL
|
||||||
import qualified Control.Exception as E
|
|
||||||
import qualified System.Environment as IO (lookupEnv)
|
import qualified System.Environment as IO (lookupEnv)
|
||||||
import System.FilePath.Glob (match, compile)
|
import System.FilePath.Glob (match, compile)
|
||||||
import System.FilePath ((</>))
|
import System.FilePath ((</>), takeExtension, dropExtension)
|
||||||
import qualified System.FilePath.Glob as IO (glob)
|
import qualified System.FilePath.Glob as IO (glob)
|
||||||
import qualified System.Directory as IO (getModificationTime)
|
import qualified System.Directory as IO (getModificationTime)
|
||||||
import Control.Monad as M (fail)
|
import Control.Monad as M (fail)
|
||||||
|
@ -106,17 +108,12 @@ class (Functor m, Applicative m, Monad m, MonadError PandocError m)
|
||||||
getCurrentTimeZone :: m TimeZone
|
getCurrentTimeZone :: m TimeZone
|
||||||
newStdGen :: m StdGen
|
newStdGen :: m StdGen
|
||||||
newUniqueHash :: m Int
|
newUniqueHash :: m Int
|
||||||
|
openURL :: String -> m (B.ByteString, Maybe MimeType)
|
||||||
readFileLazy :: FilePath -> m BL.ByteString
|
readFileLazy :: FilePath -> m BL.ByteString
|
||||||
|
readFileStrict :: FilePath -> m B.ByteString
|
||||||
readDataFile :: Maybe FilePath
|
readDataFile :: Maybe FilePath
|
||||||
-> FilePath
|
-> FilePath
|
||||||
-> m B.ByteString
|
-> m B.ByteString
|
||||||
fetchItem :: Maybe String
|
|
||||||
-> String
|
|
||||||
-> m (Either E.SomeException (B.ByteString, Maybe MimeType))
|
|
||||||
fetchItem' :: MediaBag
|
|
||||||
-> Maybe String
|
|
||||||
-> String
|
|
||||||
-> m (Either E.SomeException (B.ByteString, Maybe MimeType))
|
|
||||||
glob :: String -> m [FilePath]
|
glob :: String -> m [FilePath]
|
||||||
getModificationTime :: FilePath -> m UTCTime
|
getModificationTime :: FilePath -> m UTCTime
|
||||||
getCommonState :: m CommonState
|
getCommonState :: m CommonState
|
||||||
|
@ -213,19 +210,28 @@ instance PandocMonad PandocIO where
|
||||||
getCurrentTimeZone = liftIO IO.getCurrentTimeZone
|
getCurrentTimeZone = liftIO IO.getCurrentTimeZone
|
||||||
newStdGen = liftIO IO.newStdGen
|
newStdGen = liftIO IO.newStdGen
|
||||||
newUniqueHash = hashUnique <$> (liftIO IO.newUnique)
|
newUniqueHash = hashUnique <$> (liftIO IO.newUnique)
|
||||||
|
openURL u = do
|
||||||
|
eitherRes <- liftIO $ (tryIOError $ IO.openURL u)
|
||||||
|
case eitherRes of
|
||||||
|
Right (Right res) -> return res
|
||||||
|
Right (Left _) -> throwError $ PandocFileReadError u
|
||||||
|
Left _ -> throwError $ PandocFileReadError u
|
||||||
readFileLazy s = do
|
readFileLazy s = do
|
||||||
eitherBS <- liftIO (tryIOError $ BL.readFile s)
|
eitherBS <- liftIO (tryIOError $ BL.readFile s)
|
||||||
case eitherBS of
|
case eitherBS of
|
||||||
Right bs -> return bs
|
Right bs -> return bs
|
||||||
Left _ -> throwError $ PandocFileReadError s
|
Left _ -> throwError $ PandocFileReadError s
|
||||||
|
readFileStrict s = do
|
||||||
|
eitherBS <- liftIO (tryIOError $ B.readFile s)
|
||||||
|
case eitherBS of
|
||||||
|
Right bs -> return bs
|
||||||
|
Left _ -> throwError $ PandocFileReadError s
|
||||||
-- TODO: Make this more sensitive to the different sorts of failure
|
-- TODO: Make this more sensitive to the different sorts of failure
|
||||||
readDataFile mfp fname = do
|
readDataFile mfp fname = do
|
||||||
eitherBS <- liftIO (tryIOError $ IO.readDataFile mfp fname)
|
eitherBS <- liftIO (tryIOError $ IO.readDataFile mfp fname)
|
||||||
case eitherBS of
|
case eitherBS of
|
||||||
Right bs -> return bs
|
Right bs -> return bs
|
||||||
Left _ -> throwError $ PandocFileReadError fname
|
Left _ -> throwError $ PandocFileReadError fname
|
||||||
fetchItem ms s = liftIO $ IO.fetchItem ms s
|
|
||||||
fetchItem' mb ms s = liftIO $ IO.fetchItem' mb ms s
|
|
||||||
glob = liftIO . IO.glob
|
glob = liftIO . IO.glob
|
||||||
getModificationTime fp = do
|
getModificationTime fp = do
|
||||||
eitherMtime <- liftIO (tryIOError $ IO.getModificationTime fp)
|
eitherMtime <- liftIO (tryIOError $ IO.getModificationTime fp)
|
||||||
|
@ -235,6 +241,64 @@ instance PandocMonad PandocIO where
|
||||||
getCommonState = PandocIO $ lift get
|
getCommonState = PandocIO $ lift get
|
||||||
putCommonState x = PandocIO $ lift $ put x
|
putCommonState x = PandocIO $ lift $ put x
|
||||||
|
|
||||||
|
|
||||||
|
-- | Specialized version of parseURIReference that disallows
|
||||||
|
-- single-letter schemes. Reason: these are usually windows absolute
|
||||||
|
-- paths.
|
||||||
|
parseURIReference' :: String -> Maybe URI
|
||||||
|
parseURIReference' s =
|
||||||
|
case parseURIReference s of
|
||||||
|
Just u
|
||||||
|
| length (uriScheme u) > 2 -> Just u
|
||||||
|
| null (uriScheme u) -> Just u -- protocol-relative
|
||||||
|
_ -> Nothing
|
||||||
|
|
||||||
|
-- | Fetch an image or other item from the local filesystem or the net.
|
||||||
|
-- Returns raw content and maybe mime type.
|
||||||
|
fetchItem :: PandocMonad m
|
||||||
|
=> Maybe String
|
||||||
|
-> String
|
||||||
|
-> m (B.ByteString, Maybe MimeType)
|
||||||
|
fetchItem sourceURL s = do
|
||||||
|
mediabag <- getMediaBag
|
||||||
|
case lookupMedia s mediabag of
|
||||||
|
Just (mime, bs) -> return $ (BL.toStrict bs, Just mime)
|
||||||
|
Nothing ->
|
||||||
|
case (sourceURL >>= parseURIReference' .
|
||||||
|
ensureEscaped, ensureEscaped s) of
|
||||||
|
(Just u, s') -> -- try fetching from relative path at source
|
||||||
|
case parseURIReference' s' of
|
||||||
|
Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u
|
||||||
|
Nothing -> openURL s' -- will throw error
|
||||||
|
(Nothing, s'@('/':'/':_)) -> -- protocol-relative URI
|
||||||
|
case parseURIReference' s' of
|
||||||
|
Just u' -> openURL $ show $ u' `nonStrictRelativeTo` httpcolon
|
||||||
|
Nothing -> openURL s' -- will throw error
|
||||||
|
(Nothing, s') ->
|
||||||
|
case parseURI s' of -- requires absolute URI
|
||||||
|
-- We don't want to treat C:/ as a scheme:
|
||||||
|
Just u' | length (uriScheme u') > 2 -> openURL (show u')
|
||||||
|
Just u' | uriScheme u' == "file:" ->
|
||||||
|
readLocalFile $ dropWhile (=='/') (uriPath u')
|
||||||
|
_ -> readLocalFile fp -- get from local file system
|
||||||
|
where readLocalFile f = do
|
||||||
|
cont <- readFileStrict f
|
||||||
|
return (cont, mime)
|
||||||
|
httpcolon = URI{ uriScheme = "http:",
|
||||||
|
uriAuthority = Nothing,
|
||||||
|
uriPath = "",
|
||||||
|
uriQuery = "",
|
||||||
|
uriFragment = "" }
|
||||||
|
dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#')
|
||||||
|
fp = unEscapeString $ dropFragmentAndQuery s
|
||||||
|
mime = case takeExtension fp of
|
||||||
|
".gz" -> getMimeType $ dropExtension fp
|
||||||
|
".svgz" -> getMimeType $ dropExtension fp ++ ".svg"
|
||||||
|
x -> getMimeType x
|
||||||
|
ensureEscaped = escapeURIString isAllowedInURI . map convertSlash
|
||||||
|
convertSlash '\\' = '/'
|
||||||
|
convertSlash x = x
|
||||||
|
|
||||||
data PureState = PureState { stStdGen :: StdGen
|
data PureState = PureState { stStdGen :: StdGen
|
||||||
, stWord8Store :: [Word8] -- should be
|
, stWord8Store :: [Word8] -- should be
|
||||||
-- inifinite,
|
-- inifinite,
|
||||||
|
@ -332,33 +396,29 @@ instance PandocMonad PandocPure where
|
||||||
modifyPureState $ \st -> st { stUniqStore = us }
|
modifyPureState $ \st -> st { stUniqStore = us }
|
||||||
return u
|
return u
|
||||||
_ -> M.fail "uniq store ran out of elements"
|
_ -> M.fail "uniq store ran out of elements"
|
||||||
|
openURL _ = undefined -- TODO
|
||||||
readFileLazy fp = do
|
readFileLazy fp = do
|
||||||
fps <- getsPureState stFiles
|
fps <- getsPureState stFiles
|
||||||
case infoFileContents <$> getFileInfo fp fps of
|
case infoFileContents <$> getFileInfo fp fps of
|
||||||
Just bs -> return (BL.fromStrict bs)
|
Just bs -> return (BL.fromStrict bs)
|
||||||
Nothing -> throwError $ PandocFileReadError fp
|
Nothing -> throwError $ PandocFileReadError fp
|
||||||
|
readFileStrict fp = do
|
||||||
|
fps <- getsPureState stFiles
|
||||||
|
case infoFileContents <$> getFileInfo fp fps of
|
||||||
|
Just bs -> return bs
|
||||||
|
Nothing -> throwError $ PandocFileReadError fp
|
||||||
readDataFile Nothing "reference.docx" = do
|
readDataFile Nothing "reference.docx" = do
|
||||||
(B.concat . BL.toChunks . fromArchive) <$> getsPureState stReferenceDocx
|
(B.concat . BL.toChunks . fromArchive) <$> getsPureState stReferenceDocx
|
||||||
readDataFile Nothing "reference.odt" = do
|
readDataFile Nothing "reference.odt" = do
|
||||||
(B.concat . BL.toChunks . fromArchive) <$> getsPureState stReferenceODT
|
(B.concat . BL.toChunks . fromArchive) <$> getsPureState stReferenceODT
|
||||||
readDataFile Nothing fname = do
|
readDataFile Nothing fname = do
|
||||||
let fname' = if fname == "MANUAL.txt" then fname else "data" </> fname
|
let fname' = if fname == "MANUAL.txt" then fname else "data" </> fname
|
||||||
BL.toStrict <$> (readFileLazy fname')
|
readFileStrict fname'
|
||||||
readDataFile (Just userDir) fname = do
|
readDataFile (Just userDir) fname = do
|
||||||
userDirFiles <- getsPureState stUserDataDir
|
userDirFiles <- getsPureState stUserDataDir
|
||||||
case infoFileContents <$> (getFileInfo (userDir </> fname) userDirFiles) of
|
case infoFileContents <$> (getFileInfo (userDir </> fname) userDirFiles) of
|
||||||
Just bs -> return bs
|
Just bs -> return bs
|
||||||
Nothing -> readDataFile Nothing fname
|
Nothing -> readDataFile Nothing fname
|
||||||
fetchItem _ fp = do
|
|
||||||
fps <- getsPureState stFiles
|
|
||||||
case infoFileContents <$> (getFileInfo fp fps) of
|
|
||||||
Just bs -> return (Right (bs, getMimeType fp))
|
|
||||||
Nothing -> return (Left $ E.toException $ PandocFileReadError fp)
|
|
||||||
|
|
||||||
fetchItem' media sourceUrl nm = do
|
|
||||||
case MB.lookupMedia nm media of
|
|
||||||
Nothing -> fetchItem sourceUrl nm
|
|
||||||
Just (mime, bs) -> return (Right (B.concat $ BL.toChunks bs, Just mime))
|
|
||||||
|
|
||||||
glob s = do
|
glob s = do
|
||||||
fontFiles <- getsPureState stFontFiles
|
fontFiles <- getsPureState stFontFiles
|
||||||
|
@ -379,10 +439,10 @@ instance PandocMonad m => PandocMonad (ParserT s st m) where
|
||||||
getCurrentTimeZone = lift getCurrentTimeZone
|
getCurrentTimeZone = lift getCurrentTimeZone
|
||||||
newStdGen = lift newStdGen
|
newStdGen = lift newStdGen
|
||||||
newUniqueHash = lift newUniqueHash
|
newUniqueHash = lift newUniqueHash
|
||||||
|
openURL = lift . openURL
|
||||||
readFileLazy = lift . readFileLazy
|
readFileLazy = lift . readFileLazy
|
||||||
|
readFileStrict = lift . readFileStrict
|
||||||
readDataFile mbuserdir = lift . readDataFile mbuserdir
|
readDataFile mbuserdir = lift . readDataFile mbuserdir
|
||||||
fetchItem media = lift . fetchItem media
|
|
||||||
fetchItem' media sourceUrl = lift . fetchItem' media sourceUrl
|
|
||||||
glob = lift . glob
|
glob = lift . glob
|
||||||
getModificationTime = lift . getModificationTime
|
getModificationTime = lift . getModificationTime
|
||||||
getCommonState = lift getCommonState
|
getCommonState = lift getCommonState
|
||||||
|
@ -394,10 +454,10 @@ instance PandocMonad m => PandocMonad (ReaderT r m) where
|
||||||
getCurrentTimeZone = lift getCurrentTimeZone
|
getCurrentTimeZone = lift getCurrentTimeZone
|
||||||
newStdGen = lift newStdGen
|
newStdGen = lift newStdGen
|
||||||
newUniqueHash = lift newUniqueHash
|
newUniqueHash = lift newUniqueHash
|
||||||
|
openURL = lift . openURL
|
||||||
readFileLazy = lift . readFileLazy
|
readFileLazy = lift . readFileLazy
|
||||||
|
readFileStrict = lift . readFileStrict
|
||||||
readDataFile mbuserdir = lift . readDataFile mbuserdir
|
readDataFile mbuserdir = lift . readDataFile mbuserdir
|
||||||
fetchItem media = lift . fetchItem media
|
|
||||||
fetchItem' media sourceUrl = lift . fetchItem' media sourceUrl
|
|
||||||
glob = lift . glob
|
glob = lift . glob
|
||||||
getModificationTime = lift . getModificationTime
|
getModificationTime = lift . getModificationTime
|
||||||
getCommonState = lift getCommonState
|
getCommonState = lift getCommonState
|
||||||
|
@ -409,10 +469,10 @@ instance (PandocMonad m, Monoid w) => PandocMonad (WriterT w m) where
|
||||||
getCurrentTimeZone = lift getCurrentTimeZone
|
getCurrentTimeZone = lift getCurrentTimeZone
|
||||||
newStdGen = lift newStdGen
|
newStdGen = lift newStdGen
|
||||||
newUniqueHash = lift newUniqueHash
|
newUniqueHash = lift newUniqueHash
|
||||||
|
openURL = lift . openURL
|
||||||
readFileLazy = lift . readFileLazy
|
readFileLazy = lift . readFileLazy
|
||||||
|
readFileStrict = lift . readFileStrict
|
||||||
readDataFile mbuserdir = lift . readDataFile mbuserdir
|
readDataFile mbuserdir = lift . readDataFile mbuserdir
|
||||||
fetchItem media = lift . fetchItem media
|
|
||||||
fetchItem' media sourceUrl = lift . fetchItem' media sourceUrl
|
|
||||||
glob = lift . glob
|
glob = lift . glob
|
||||||
getModificationTime = lift . getModificationTime
|
getModificationTime = lift . getModificationTime
|
||||||
getCommonState = lift getCommonState
|
getCommonState = lift getCommonState
|
||||||
|
@ -424,10 +484,10 @@ instance (PandocMonad m, Monoid w) => PandocMonad (RWST r w st m) where
|
||||||
getCurrentTimeZone = lift getCurrentTimeZone
|
getCurrentTimeZone = lift getCurrentTimeZone
|
||||||
newStdGen = lift newStdGen
|
newStdGen = lift newStdGen
|
||||||
newUniqueHash = lift newUniqueHash
|
newUniqueHash = lift newUniqueHash
|
||||||
|
openURL = lift . openURL
|
||||||
readFileLazy = lift . readFileLazy
|
readFileLazy = lift . readFileLazy
|
||||||
|
readFileStrict = lift . readFileStrict
|
||||||
readDataFile mbuserdir = lift . readDataFile mbuserdir
|
readDataFile mbuserdir = lift . readDataFile mbuserdir
|
||||||
fetchItem media = lift . fetchItem media
|
|
||||||
fetchItem' media sourceUrl = lift . fetchItem' media sourceUrl
|
|
||||||
glob = lift . glob
|
glob = lift . glob
|
||||||
getModificationTime = lift . getModificationTime
|
getModificationTime = lift . getModificationTime
|
||||||
getCommonState = lift getCommonState
|
getCommonState = lift getCommonState
|
||||||
|
@ -439,10 +499,10 @@ instance PandocMonad m => PandocMonad (StateT st m) where
|
||||||
getCurrentTimeZone = lift getCurrentTimeZone
|
getCurrentTimeZone = lift getCurrentTimeZone
|
||||||
newStdGen = lift newStdGen
|
newStdGen = lift newStdGen
|
||||||
newUniqueHash = lift newUniqueHash
|
newUniqueHash = lift newUniqueHash
|
||||||
|
openURL = lift . openURL
|
||||||
readFileLazy = lift . readFileLazy
|
readFileLazy = lift . readFileLazy
|
||||||
|
readFileStrict = lift . readFileStrict
|
||||||
readDataFile mbuserdir = lift . readDataFile mbuserdir
|
readDataFile mbuserdir = lift . readDataFile mbuserdir
|
||||||
fetchItem media = lift . fetchItem media
|
|
||||||
fetchItem' media sourceUrl = lift . fetchItem' media sourceUrl
|
|
||||||
glob = lift . glob
|
glob = lift . glob
|
||||||
getModificationTime = lift . getModificationTime
|
getModificationTime = lift . getModificationTime
|
||||||
getCommonState = lift getCommonState
|
getCommonState = lift getCommonState
|
||||||
|
|
Loading…
Add table
Reference in a new issue