App: added --request-header option.

This commit is contained in:
John MacFarlane 2017-10-15 22:11:43 -07:00
parent 2f66d57616
commit d8804f4747
2 changed files with 28 additions and 11 deletions

View file

@ -130,10 +130,11 @@ pandoc will fetch the content using HTTP:
pandoc -f html -t markdown http://www.fsf.org
It is possible to supply a custom User-Agent string when requesting a
document from a URL, by setting an environment variable:
It is possible to supply a custom User-Agent string or other
header when requesting a document from a URL:
USER_AGENT="Mozilla/5.0" pandoc -f html -t markdown http://www.fsf.org
pandoc -f html -t markdown --request-header User-Agent:"Mozilla/5.0" \
http://www.fsf.org
If multiple input files are given, `pandoc` will concatenate them all (with
blank lines between them) before parsing. This feature is disabled for
@ -728,6 +729,13 @@ General writer options
`--resource-path=.:test` will search the working directory
and the `test` subdirectory, in that order.
`--request-header=`*NAME*`:`*VAL*
: Set the request header *NAME* to the value *VAL* when making
HTTP requests (for example, when a URL is given on the
command line, or when resources used in a document must be
downloaded).
Options affecting specific writers
----------------------------------

View file

@ -75,9 +75,9 @@ import qualified System.IO as IO (Newline (..))
import Text.Pandoc
import Text.Pandoc.Builder (setMeta)
import Text.Pandoc.Class (PandocIO, extractMedia, fillMediaBag, getLog,
setResourcePath, setTrace, report,
setResourcePath, setTrace, report, setRequestHeader,
setUserDataDir, readFileStrict, readDataFile,
readDefaultDataFile, setTranslations,
readDefaultDataFile, setTranslations, openURL,
setInputFiles, setOutputFile)
import Text.Pandoc.Highlighting (highlightingStyles)
import Text.Pandoc.BCP47 (parseBCP47, Lang(..))
@ -86,7 +86,7 @@ import Text.Pandoc.Writers.Math (defaultMathJaxURL, defaultKaTeXURL)
import Text.Pandoc.PDF (makePDF)
import Text.Pandoc.Process (pipeProcess)
import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained)
import Text.Pandoc.Shared (headerShift, isURI, openURL, ordNub,
import Text.Pandoc.Shared (headerShift, isURI, ordNub,
safeRead, tabFilter, eastAsianLineBreakFilter)
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.XML (toEntities)
@ -491,7 +491,10 @@ convertWithOpts opts = do
when (readerName == "markdown_github" ||
writerName == "markdown_github") $
report $ Deprecated "markdown_github" "Use gfm instead."
setResourcePath (optResourcePath opts)
mapM_ (\(n,v) -> setRequestHeader n v) (optRequestHeaders opts)
doc <- sourceToDoc sources >>=
( (if isJust (optExtractMedia opts)
then fillMediaBag
@ -641,6 +644,7 @@ data Opt = Opt
, optIncludeAfterBody :: [FilePath] -- ^ Files to include after body
, optIncludeInHeader :: [FilePath] -- ^ Files to include in header
, optResourcePath :: [FilePath] -- ^ Path to search for images etc
, optRequestHeaders :: [(String, String)] -- ^ Headers for HTTP requests
, optEol :: LineEnding -- ^ Style of line-endings to use
, optStripComments :: Bool -- ^ Skip HTML comments
} deriving (Generic, Show)
@ -716,6 +720,7 @@ defaultOpts = Opt
, optIncludeAfterBody = []
, optIncludeInHeader = []
, optResourcePath = ["."]
, optRequestHeaders = []
, optEol = Native
, optStripComments = False
}
@ -863,11 +868,7 @@ readSource src = case parseURI src of
BS.readFile src
readURI :: FilePath -> PandocIO Text
readURI src = do
res <- liftIO $ openURL src
case res of
Left e -> throwError $ PandocHttpError src e
Right (contents, _) -> return $ UTF8.toText contents
readURI src = UTF8.toText . fst <$> openURL src
readFile' :: MonadIO m => FilePath -> m B.ByteString
readFile' "-" = liftIO B.getContents
@ -1161,6 +1162,14 @@ options =
"SEARCHPATH")
"" -- "Paths to search for images and other resources"
, Option "" ["request-header"]
(ReqArg
(\arg opt -> do
let (key, val) = splitField arg
return opt{ optRequestHeaders =
(key, val) : optRequestHeaders opt })
"NAME:VALUE")
""
, Option "" ["self-contained"]
(NoArg