diff --git a/pandoc.cabal b/pandoc.cabal index fde76dee4..481e5d076 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -336,6 +336,7 @@ library http-client >= 0.4.30 && < 0.6, http-client-tls >= 0.2.4 && < 0.4, http-types >= 0.8 && < 0.10, + case-insensitive >= 1.2 && < 1.3, csv-conduit >= 0.6 && < 0.7 if os(windows) cpp-options: -D_WINDOWS diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs index 451d430ca..65f8f33d0 100644 --- a/src/Text/Pandoc/Class.hs +++ b/src/Text/Pandoc/Class.hs @@ -1,10 +1,12 @@ {-# LANGUAGE CPP #-} +{-# LANGUAGE PatternGuards #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE TypeSynonymInstances #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE UndecidableInstances #-} +{-# LANGUAGE ScopedTypeVariables #-} #if MIN_VERSION_base(4,8,0) #else {-# LANGUAGE OverlappingInstances #-} @@ -97,9 +99,10 @@ import Prelude hiding (readFile) import System.Random (StdGen, next, mkStdGen) import qualified System.Random as IO (newStdGen) import Codec.Archive.Zip +import qualified Data.CaseInsensitive as CI import Data.Unique (hashUnique) +import Data.List (stripPrefix) import qualified Data.Unique as IO (newUnique) -import qualified Text.Pandoc.Shared as Shared import qualified Text.Pandoc.UTF8 as UTF8 import qualified System.Directory as Directory import Text.Pandoc.Compat.Time (UTCTime) @@ -115,9 +118,21 @@ import Data.Time.Clock.POSIX ( utcTimeToPOSIXSeconds , posixSecondsToUTCTime , POSIXTime ) import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime, utc) +import Data.ByteString.Base64 (decodeLenient) import Network.URI ( escapeURIString, nonStrictRelativeTo, unEscapeString, parseURIReference, isAllowedInURI, parseURI, URI(..) ) +import Network.HTTP.Client (httpLbs, responseBody, responseHeaders, + Request(port,host,requestHeaders)) +import Network.HTTP.Client (parseRequest) +import Network.HTTP.Client (newManager) +import Network.HTTP.Client.Internal (addProxy) +import Network.HTTP.Client.TLS (tlsManagerSettings) +import System.Environment (getEnv) +import Network.HTTP.Types.Header ( hContentType ) +import Network (withSocketsDo) +import Data.ByteString.Lazy (toChunks) +import qualified Control.Exception as E import qualified Data.Time.LocalTime as IO (getCurrentTimeZone) import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory) import Text.Pandoc.Walk (walkM, walk) @@ -456,12 +471,34 @@ instance PandocMonad PandocIO where getCurrentTimeZone = liftIO IO.getCurrentTimeZone newStdGen = liftIO IO.newStdGen newUniqueHash = hashUnique <$> liftIO IO.newUnique - openURL u = do - report $ Fetching u - res <- liftIOError Shared.openURL u - case res of - Right r -> return r - Left e -> throwError $ PandocHttpError u e + + openURL u + | Just u'' <- stripPrefix "data:" u = do + let mime = takeWhile (/=',') u'' + let contents = UTF8.fromString $ + unEscapeString $ drop 1 $ dropWhile (/=',') u'' + return (decodeLenient contents, Just mime) + | otherwise = do + let toReqHeader (n, v) = (CI.mk (UTF8.fromString n), UTF8.fromString v) + customHeaders <- map toReqHeader <$> getsCommonState stRequestHeaders + report $ Fetching u + res <- liftIO $ E.try $ withSocketsDo $ do + let parseReq = parseRequest + proxy <- tryIOError (getEnv "http_proxy") + let addProxy' x = case proxy of + Left _ -> return x + Right pr -> parseReq pr >>= \r -> + return (addProxy (host r) (port r) x) + req <- parseReq u >>= addProxy' + let req' = req{requestHeaders = customHeaders ++ requestHeaders req} + resp <- newManager tlsManagerSettings >>= httpLbs req' + return (B.concat $ toChunks $ responseBody resp, + UTF8.toString `fmap` lookup hContentType (responseHeaders resp)) + + case res of + Right r -> return r + Left e -> throwError $ PandocHttpError u e + readFileLazy s = liftIOError BL.readFile s readFileStrict s = liftIOError B.readFile s diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index f0c2f172e..4c5f464d8 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -76,7 +76,6 @@ module Text.Pandoc.Shared ( renderTags', -- * File handling inDirectory, - openURL, collapseFilePath, filteredFilesFromArchive, -- * URI handling @@ -98,19 +97,17 @@ import Text.Pandoc.Definition import Text.Pandoc.Walk import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..)) import qualified Text.Pandoc.Builder as B -import qualified Text.Pandoc.UTF8 as UTF8 import Data.Char ( toLower, isLower, isUpper, isAlpha, isLetter, isDigit, isSpace ) import Data.List ( find, stripPrefix, intercalate ) import Data.Maybe (mapMaybe) import Data.Version ( showVersion ) import qualified Data.Map as M -import Network.URI ( URI(uriScheme), escapeURIString, unEscapeString, parseURI ) +import Network.URI ( URI(uriScheme), escapeURIString, parseURI ) import qualified Data.Set as Set import System.Directory import System.FilePath (splitDirectories, isPathSeparator) import qualified System.FilePath.Posix as Posix -import Text.Pandoc.MIME (MimeType) import Data.Generics (Typeable, Data) import qualified Control.Monad.State.Strict as S import qualified Control.Exception as E @@ -118,33 +115,16 @@ import Control.Monad (msum, unless, MonadPlus(..)) import Text.Pandoc.Pretty (charWidth) import Text.Pandoc.Generic (bottomUp) import Text.Pandoc.Compat.Time -import System.IO.Error import System.IO.Temp import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..), renderOptions) import Data.Monoid ((<>)) -import qualified Data.ByteString as BS -import qualified Data.ByteString.Char8 as B8 -import Data.ByteString.Base64 (decodeLenient) import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr) import qualified Data.Text as T -import Data.ByteString.Lazy (toChunks) import qualified Data.ByteString.Lazy as BL import Paths_pandoc (version) - import Codec.Archive.Zip -import Network.HTTP.Client (httpLbs, responseBody, responseHeaders, - Request(port,host,requestHeaders), - HttpException) -import Network.HTTP.Client (parseRequest) -import Network.HTTP.Client (newManager) -import Network.HTTP.Client.Internal (addProxy) -import Network.HTTP.Client.TLS (tlsManagerSettings) -import System.Environment (getEnv) -import Network.HTTP.Types.Header ( hContentType, hUserAgent) -import Network (withSocketsDo) - -- | Version number of pandoc library. pandocVersion :: String pandocVersion = showVersion version @@ -606,36 +586,6 @@ inDirectory path action = E.bracket setCurrentDirectory (const $ setCurrentDirectory path >> action) --- | Read from a URL and return raw data and maybe mime type. -openURL :: String -> IO (Either HttpException (BS.ByteString, Maybe MimeType)) -openURL u - | Just u'' <- stripPrefix "data:" u = - let mime = takeWhile (/=',') u'' - contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u'' - in return $ Right (decodeLenient contents, Just mime) - | otherwise = E.try $ withSocketsDo $ do - let parseReq = parseRequest - (proxy :: Either IOError String) <- - tryIOError $ getEnv "http_proxy" - (useragent :: Either IOError String) <- - tryIOError $ getEnv "USER_AGENT" - req <- parseReq u - req' <- case proxy of - Left _ -> return req - Right pr -> (parseReq pr >>= \r -> - return $ addProxy (host r) (port r) req) - `mplus` return req - req'' <- case useragent of - Left _ -> return req' - Right ua -> do - let headers = requestHeaders req' - let useragentheader = (hUserAgent, B8.pack ua) - let headers' = useragentheader:headers - return $ req' {requestHeaders = headers'} - resp <- newManager tlsManagerSettings >>= httpLbs req'' - return (BS.concat $ toChunks $ responseBody resp, - UTF8.toString `fmap` lookup hContentType (responseHeaders resp)) - -- -- Error reporting --