Allow user to specify User-Agent (#3421)
This commit enables users to specify the User-Agent header used when pandoc requests a document from a URL. This is done by setting an environment variable. For instance, one can do: USER_AGENT="..." ./pandoc -f html -t markdown http://example.com Signed-off-by: Thenaesh Elango <thenaeshelango@gmail.com>
This commit is contained in:
parent
2c3eeca8df
commit
942189056d
2 changed files with 16 additions and 3 deletions
|
@ -120,6 +120,11 @@ pandoc will fetch the content using HTTP:
|
||||||
|
|
||||||
pandoc -f html -t markdown http://www.fsf.org
|
pandoc -f html -t markdown http://www.fsf.org
|
||||||
|
|
||||||
|
It is possible to supply a custom User-Agent string when requesting a
|
||||||
|
document from a URL, by setting an environment variable:
|
||||||
|
|
||||||
|
USER_AGENT="Mozilla/5.0" pandoc -f html -t markdown http://www.fsf.org
|
||||||
|
|
||||||
If multiple input files are given, `pandoc` will concatenate them all (with
|
If multiple input files are given, `pandoc` will concatenate them all (with
|
||||||
blank lines between them) before parsing. This feature is disabled for
|
blank lines between them) before parsing. This feature is disabled for
|
||||||
binary input formats such as `EPUB`, `odt`, and `docx`.
|
binary input formats such as `EPUB`, `odt`, and `docx`.
|
||||||
|
|
|
@ -146,13 +146,13 @@ import Paths_pandoc (getDataFileName)
|
||||||
#endif
|
#endif
|
||||||
#ifdef HTTP_CLIENT
|
#ifdef HTTP_CLIENT
|
||||||
import Network.HTTP.Client (httpLbs, responseBody, responseHeaders,
|
import Network.HTTP.Client (httpLbs, responseBody, responseHeaders,
|
||||||
Request(port,host))
|
Request(port,host,requestHeaders))
|
||||||
import Network.HTTP.Client (parseRequest)
|
import Network.HTTP.Client (parseRequest)
|
||||||
import Network.HTTP.Client (newManager)
|
import Network.HTTP.Client (newManager)
|
||||||
import Network.HTTP.Client.Internal (addProxy)
|
import Network.HTTP.Client.Internal (addProxy)
|
||||||
import Network.HTTP.Client.TLS (tlsManagerSettings)
|
import Network.HTTP.Client.TLS (tlsManagerSettings)
|
||||||
import System.Environment (getEnv)
|
import System.Environment (getEnv)
|
||||||
import Network.HTTP.Types.Header ( hContentType)
|
import Network.HTTP.Types.Header ( hContentType, hUserAgent)
|
||||||
import Network (withSocketsDo)
|
import Network (withSocketsDo)
|
||||||
#else
|
#else
|
||||||
import Network.URI (parseURI)
|
import Network.URI (parseURI)
|
||||||
|
@ -742,13 +742,21 @@ openURL u
|
||||||
| otherwise = withSocketsDo $ E.try $ do
|
| otherwise = withSocketsDo $ E.try $ do
|
||||||
let parseReq = parseRequest
|
let parseReq = parseRequest
|
||||||
(proxy :: Either E.SomeException String) <- E.try $ getEnv "http_proxy"
|
(proxy :: Either E.SomeException String) <- E.try $ getEnv "http_proxy"
|
||||||
|
(useragent :: Either E.SomeException String) <- E.try $ getEnv "USER_AGENT"
|
||||||
req <- parseReq u
|
req <- parseReq u
|
||||||
req' <- case proxy of
|
req' <- case proxy of
|
||||||
Left _ -> return req
|
Left _ -> return req
|
||||||
Right pr -> (parseReq pr >>= \r ->
|
Right pr -> (parseReq pr >>= \r ->
|
||||||
return $ addProxy (host r) (port r) req)
|
return $ addProxy (host r) (port r) req)
|
||||||
`mplus` return req
|
`mplus` return req
|
||||||
resp <- newManager tlsManagerSettings >>= httpLbs req'
|
req'' <- case useragent of
|
||||||
|
Left _ -> return req'
|
||||||
|
Right ua -> do
|
||||||
|
let headers = requestHeaders req'
|
||||||
|
let useragentheader = (hUserAgent, B8.pack ua)
|
||||||
|
let headers' = useragentheader:headers
|
||||||
|
return $ req' {requestHeaders = headers'}
|
||||||
|
resp <- newManager tlsManagerSettings >>= httpLbs req''
|
||||||
return (BS.concat $ toChunks $ responseBody resp,
|
return (BS.concat $ toChunks $ responseBody resp,
|
||||||
UTF8.toString `fmap` lookup hContentType (responseHeaders resp))
|
UTF8.toString `fmap` lookup hContentType (responseHeaders resp))
|
||||||
#else
|
#else
|
||||||
|
|
Loading…
Add table
Reference in a new issue