Allow user to specify User-Agent (#3421)
This commit enables users to specify the User-Agent header used when pandoc requests a document from a URL. This is done by setting an environment variable. For instance, one can do: USER_AGENT="..." ./pandoc -f html -t markdown http://example.com Signed-off-by: Thenaesh Elango <thenaeshelango@gmail.com>
This commit is contained in:
parent
2c3eeca8df
commit
942189056d
2 changed files with 16 additions and 3 deletions
|
@ -120,6 +120,11 @@ pandoc will fetch the content using HTTP:
|
|||
|
||||
pandoc -f html -t markdown http://www.fsf.org
|
||||
|
||||
It is possible to supply a custom User-Agent string when requesting a
|
||||
document from a URL, by setting an environment variable:
|
||||
|
||||
USER_AGENT="Mozilla/5.0" pandoc -f html -t markdown http://www.fsf.org
|
||||
|
||||
If multiple input files are given, `pandoc` will concatenate them all (with
|
||||
blank lines between them) before parsing. This feature is disabled for
|
||||
binary input formats such as `EPUB`, `odt`, and `docx`.
|
||||
|
|
|
@ -146,13 +146,13 @@ import Paths_pandoc (getDataFileName)
|
|||
#endif
|
||||
#ifdef HTTP_CLIENT
|
||||
import Network.HTTP.Client (httpLbs, responseBody, responseHeaders,
|
||||
Request(port,host))
|
||||
Request(port,host,requestHeaders))
|
||||
import Network.HTTP.Client (parseRequest)
|
||||
import Network.HTTP.Client (newManager)
|
||||
import Network.HTTP.Client.Internal (addProxy)
|
||||
import Network.HTTP.Client.TLS (tlsManagerSettings)
|
||||
import System.Environment (getEnv)
|
||||
import Network.HTTP.Types.Header ( hContentType)
|
||||
import Network.HTTP.Types.Header ( hContentType, hUserAgent)
|
||||
import Network (withSocketsDo)
|
||||
#else
|
||||
import Network.URI (parseURI)
|
||||
|
@ -742,13 +742,21 @@ openURL u
|
|||
| otherwise = withSocketsDo $ E.try $ do
|
||||
let parseReq = parseRequest
|
||||
(proxy :: Either E.SomeException String) <- E.try $ getEnv "http_proxy"
|
||||
(useragent :: Either E.SomeException String) <- E.try $ getEnv "USER_AGENT"
|
||||
req <- parseReq u
|
||||
req' <- case proxy of
|
||||
Left _ -> return req
|
||||
Right pr -> (parseReq pr >>= \r ->
|
||||
return $ addProxy (host r) (port r) req)
|
||||
`mplus` return req
|
||||
resp <- newManager tlsManagerSettings >>= httpLbs req'
|
||||
req'' <- case useragent of
|
||||
Left _ -> return req'
|
||||
Right ua -> do
|
||||
let headers = requestHeaders req'
|
||||
let useragentheader = (hUserAgent, B8.pack ua)
|
||||
let headers' = useragentheader:headers
|
||||
return $ req' {requestHeaders = headers'}
|
||||
resp <- newManager tlsManagerSettings >>= httpLbs req''
|
||||
return (BS.concat $ toChunks $ responseBody resp,
|
||||
UTF8.toString `fmap` lookup hContentType (responseHeaders resp))
|
||||
#else
|
||||
|
|
Loading…
Reference in a new issue