Remove openURL from Shared (API change).

Now all the guts of openURL have been put into openURL from
Class.  openURL is now sensitive to stRequestHeaders in CommonState
and will add these custom headers when making a request.
It no longer looks at the USER_AGENT environment variable,
since you can now set the `User-Agent` header directly.
This commit is contained in:
John MacFarlane 2017-10-15 22:10:13 -07:00
parent a1f7a4263f
commit 2f66d57616
3 changed files with 46 additions and 58 deletions

View file

@ -336,6 +336,7 @@ library
http-client >= 0.4.30 && < 0.6,
http-client-tls >= 0.2.4 && < 0.4,
http-types >= 0.8 && < 0.10,
case-insensitive >= 1.2 && < 1.3,
csv-conduit >= 0.6 && < 0.7
if os(windows)
cpp-options: -D_WINDOWS

View file

@ -1,10 +1,12 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE ScopedTypeVariables #-}
#if MIN_VERSION_base(4,8,0)
#else
{-# LANGUAGE OverlappingInstances #-}
@ -97,9 +99,10 @@ import Prelude hiding (readFile)
import System.Random (StdGen, next, mkStdGen)
import qualified System.Random as IO (newStdGen)
import Codec.Archive.Zip
import qualified Data.CaseInsensitive as CI
import Data.Unique (hashUnique)
import Data.List (stripPrefix)
import qualified Data.Unique as IO (newUnique)
import qualified Text.Pandoc.Shared as Shared
import qualified Text.Pandoc.UTF8 as UTF8
import qualified System.Directory as Directory
import Text.Pandoc.Compat.Time (UTCTime)
@ -115,9 +118,21 @@ import Data.Time.Clock.POSIX ( utcTimeToPOSIXSeconds
, posixSecondsToUTCTime
, POSIXTime )
import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime, utc)
import Data.ByteString.Base64 (decodeLenient)
import Network.URI ( escapeURIString, nonStrictRelativeTo,
unEscapeString, parseURIReference, isAllowedInURI,
parseURI, URI(..) )
import Network.HTTP.Client (httpLbs, responseBody, responseHeaders,
Request(port,host,requestHeaders))
import Network.HTTP.Client (parseRequest)
import Network.HTTP.Client (newManager)
import Network.HTTP.Client.Internal (addProxy)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import System.Environment (getEnv)
import Network.HTTP.Types.Header ( hContentType )
import Network (withSocketsDo)
import Data.ByteString.Lazy (toChunks)
import qualified Control.Exception as E
import qualified Data.Time.LocalTime as IO (getCurrentTimeZone)
import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory)
import Text.Pandoc.Walk (walkM, walk)
@ -456,12 +471,34 @@ instance PandocMonad PandocIO where
getCurrentTimeZone = liftIO IO.getCurrentTimeZone
newStdGen = liftIO IO.newStdGen
newUniqueHash = hashUnique <$> liftIO IO.newUnique
openURL u = do
report $ Fetching u
res <- liftIOError Shared.openURL u
case res of
Right r -> return r
Left e -> throwError $ PandocHttpError u e
openURL u
| Just u'' <- stripPrefix "data:" u = do
let mime = takeWhile (/=',') u''
let contents = UTF8.fromString $
unEscapeString $ drop 1 $ dropWhile (/=',') u''
return (decodeLenient contents, Just mime)
| otherwise = do
let toReqHeader (n, v) = (CI.mk (UTF8.fromString n), UTF8.fromString v)
customHeaders <- map toReqHeader <$> getsCommonState stRequestHeaders
report $ Fetching u
res <- liftIO $ E.try $ withSocketsDo $ do
let parseReq = parseRequest
proxy <- tryIOError (getEnv "http_proxy")
let addProxy' x = case proxy of
Left _ -> return x
Right pr -> parseReq pr >>= \r ->
return (addProxy (host r) (port r) x)
req <- parseReq u >>= addProxy'
let req' = req{requestHeaders = customHeaders ++ requestHeaders req}
resp <- newManager tlsManagerSettings >>= httpLbs req'
return (B.concat $ toChunks $ responseBody resp,
UTF8.toString `fmap` lookup hContentType (responseHeaders resp))
case res of
Right r -> return r
Left e -> throwError $ PandocHttpError u e
readFileLazy s = liftIOError BL.readFile s
readFileStrict s = liftIOError B.readFile s

View file

@ -76,7 +76,6 @@ module Text.Pandoc.Shared (
renderTags',
-- * File handling
inDirectory,
openURL,
collapseFilePath,
filteredFilesFromArchive,
-- * URI handling
@ -98,19 +97,17 @@ import Text.Pandoc.Definition
import Text.Pandoc.Walk
import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..))
import qualified Text.Pandoc.Builder as B
import qualified Text.Pandoc.UTF8 as UTF8
import Data.Char ( toLower, isLower, isUpper, isAlpha,
isLetter, isDigit, isSpace )
import Data.List ( find, stripPrefix, intercalate )
import Data.Maybe (mapMaybe)
import Data.Version ( showVersion )
import qualified Data.Map as M
import Network.URI ( URI(uriScheme), escapeURIString, unEscapeString, parseURI )
import Network.URI ( URI(uriScheme), escapeURIString, parseURI )
import qualified Data.Set as Set
import System.Directory
import System.FilePath (splitDirectories, isPathSeparator)
import qualified System.FilePath.Posix as Posix
import Text.Pandoc.MIME (MimeType)
import Data.Generics (Typeable, Data)
import qualified Control.Monad.State.Strict as S
import qualified Control.Exception as E
@ -118,33 +115,16 @@ import Control.Monad (msum, unless, MonadPlus(..))
import Text.Pandoc.Pretty (charWidth)
import Text.Pandoc.Generic (bottomUp)
import Text.Pandoc.Compat.Time
import System.IO.Error
import System.IO.Temp
import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
renderOptions)
import Data.Monoid ((<>))
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as B8
import Data.ByteString.Base64 (decodeLenient)
import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
import qualified Data.Text as T
import Data.ByteString.Lazy (toChunks)
import qualified Data.ByteString.Lazy as BL
import Paths_pandoc (version)
import Codec.Archive.Zip
import Network.HTTP.Client (httpLbs, responseBody, responseHeaders,
Request(port,host,requestHeaders),
HttpException)
import Network.HTTP.Client (parseRequest)
import Network.HTTP.Client (newManager)
import Network.HTTP.Client.Internal (addProxy)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import System.Environment (getEnv)
import Network.HTTP.Types.Header ( hContentType, hUserAgent)
import Network (withSocketsDo)
-- | Version number of pandoc library.
pandocVersion :: String
pandocVersion = showVersion version
@ -606,36 +586,6 @@ inDirectory path action = E.bracket
setCurrentDirectory
(const $ setCurrentDirectory path >> action)
-- | Read from a URL and return raw data and maybe mime type.
openURL :: String -> IO (Either HttpException (BS.ByteString, Maybe MimeType))
openURL u
| Just u'' <- stripPrefix "data:" u =
let mime = takeWhile (/=',') u''
contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u''
in return $ Right (decodeLenient contents, Just mime)
| otherwise = E.try $ withSocketsDo $ do
let parseReq = parseRequest
(proxy :: Either IOError String) <-
tryIOError $ getEnv "http_proxy"
(useragent :: Either IOError String) <-
tryIOError $ getEnv "USER_AGENT"
req <- parseReq u
req' <- case proxy of
Left _ -> return req
Right pr -> (parseReq pr >>= \r ->
return $ addProxy (host r) (port r) req)
`mplus` return req
req'' <- case useragent of
Left _ -> return req'
Right ua -> do
let headers = requestHeaders req'
let useragentheader = (hUserAgent, B8.pack ua)
let headers' = useragentheader:headers
return $ req' {requestHeaders = headers'}
resp <- newManager tlsManagerSettings >>= httpLbs req''
return (BS.concat $ toChunks $ responseBody resp,
UTF8.toString `fmap` lookup hContentType (responseHeaders resp))
--
-- Error reporting
--