Allow user to specify User-Agent (#3421)

This commit enables users to specify the User-Agent
header used when pandoc requests a document from
a URL. This is done by setting an environment variable.
For instance, one can do:
USER_AGENT="..." ./pandoc -f html -t markdown http://example.com

Signed-off-by: Thenaesh Elango <thenaeshelango@gmail.com>
This commit is contained in:
Thenaesh Elango 2017-02-05 18:28:39 +08:00 committed by John MacFarlane
parent 2c3eeca8df
commit 942189056d
2 changed files with 16 additions and 3 deletions

View file

@ -120,6 +120,11 @@ pandoc will fetch the content using HTTP:
pandoc -f html -t markdown http://www.fsf.org
It is possible to supply a custom User-Agent string when requesting a
document from a URL, by setting an environment variable:
USER_AGENT="Mozilla/5.0" pandoc -f html -t markdown http://www.fsf.org
If multiple input files are given, `pandoc` will concatenate them all (with
blank lines between them) before parsing. This feature is disabled for
binary input formats such as `EPUB`, `odt`, and `docx`.

View file

@ -146,13 +146,13 @@ import Paths_pandoc (getDataFileName)
#endif
#ifdef HTTP_CLIENT
import Network.HTTP.Client (httpLbs, responseBody, responseHeaders,
Request(port,host))
Request(port,host,requestHeaders))
import Network.HTTP.Client (parseRequest)
import Network.HTTP.Client (newManager)
import Network.HTTP.Client.Internal (addProxy)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import System.Environment (getEnv)
import Network.HTTP.Types.Header ( hContentType)
import Network.HTTP.Types.Header ( hContentType, hUserAgent)
import Network (withSocketsDo)
#else
import Network.URI (parseURI)
@ -742,13 +742,21 @@ openURL u
| otherwise = withSocketsDo $ E.try $ do
let parseReq = parseRequest
(proxy :: Either E.SomeException String) <- E.try $ getEnv "http_proxy"
(useragent :: Either E.SomeException String) <- E.try $ getEnv "USER_AGENT"
req <- parseReq u
req' <- case proxy of
Left _ -> return req
Right pr -> (parseReq pr >>= \r ->
return $ addProxy (host r) (port r) req)
`mplus` return req
resp <- newManager tlsManagerSettings >>= httpLbs req'
req'' <- case useragent of
Left _ -> return req'
Right ua -> do
let headers = requestHeaders req'
let useragentheader = (hUserAgent, B8.pack ua)
let headers' = useragentheader:headers
return $ req' {requestHeaders = headers'}
resp <- newManager tlsManagerSettings >>= httpLbs req''
return (BS.concat $ toChunks $ responseBody resp,
UTF8.toString `fmap` lookup hContentType (responseHeaders resp))
#else