From 47a16065c44eb5028ca9b9e86993fe880ef2a37d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 6 Feb 2017 23:33:23 +0100
Subject: [PATCH] Removed --parse-raw and readerParseRaw.

These were confusing.

Now we rely on the +raw_tex or +raw_html extension with latex
or html input.

Thus, instead of

    --parse-raw -f latex

we use

    -f latex+raw_tex

and instead of

     --parse-raw -f html

we use

    -f html+raw_html
---
 MANUAL.txt                             | 11 ---------
 src/Text/Pandoc.hs                     |  1 +
 src/Text/Pandoc/App.hs                 | 33 +++++++++++---------------
 src/Text/Pandoc/Options.hs             |  2 --
 src/Text/Pandoc/Readers/EPUB.hs        |  3 ++-
 src/Text/Pandoc/Readers/HTML.hs        | 14 +++++------
 src/Text/Pandoc/Readers/LaTeX.hs       | 19 +++++++++------
 src/Text/Pandoc/Readers/Markdown.hs    |  4 ----
 src/Text/Pandoc/Readers/Org/Inlines.hs |  3 ++-
 src/Text/Pandoc/Readers/Textile.hs     |  6 -----
 test/Tests/Old.hs                      |  2 +-
 test/command/parse-raw.md              | 27 +++++++++++++++++++++
 12 files changed, 66 insertions(+), 59 deletions(-)
 create mode 100644 test/command/parse-raw.md

diff --git a/MANUAL.txt b/MANUAL.txt
index 33aa51abe..36bc8e5ac 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -391,17 +391,6 @@ General options
 Reader options
 --------------
 
-`-R`, `--parse-raw`
-
-:   Parse untranslatable HTML codes and LaTeX environments as raw HTML
-    or LaTeX, instead of ignoring them.  Affects only HTML and LaTeX
-    input. Raw HTML can be printed in Markdown, reStructuredText, Emacs Org
-    mode, HTML, Slidy, Slideous, DZSlides, reveal.js, and S5 output; raw LaTeX
-    can be printed in Markdown, reStructuredText, Emacs Org mode, LaTeX, and
-    ConTeXt output.  The default is for the readers to omit untranslatable
-    HTML codes and LaTeX environments.  (The LaTeX reader does pass through
-    untranslatable LaTeX *commands*, even if `-R` is not specified.)
-
 `--base-header-level=`*NUMBER*
 
 :   Specify the base level for headers (defaults to 1).
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index 920bc9655..1a0bbc4ab 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -336,6 +336,7 @@ getDefaultExtensions "context"         = extensionsFromList
 getDefaultExtensions "textile"         = extensionsFromList
                                           [Ext_old_dashes,
                                            Ext_smart,
+                                           Ext_raw_html,
                                            Ext_auto_identifiers]
 getDefaultExtensions _                 = extensionsFromList
                                           [Ext_auto_identifiers]
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 637d3fe49..e99767f0b 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -90,7 +90,7 @@ parseOptions options' defaults = do
   let (actions, args, unrecognizedOpts, errors) =
            getOpt' Permute options' rawArgs
 
-  let unknownOptionErrors = foldr handelUnrecognizedOption [] unrecognizedOpts
+  let unknownOptionErrors = foldr handleUnrecognizedOption [] unrecognizedOpts
 
   unless (null errors && null unknownOptionErrors) $
      err 2 $ concat errors ++ unlines unknownOptionErrors ++
@@ -273,7 +273,6 @@ convertWithOpts opts = do
                                 _ -> Nothing
 
   let readerOpts = def{ readerStandalone = standalone
-                      , readerParseRaw = optParseRaw opts
                       , readerColumns = optColumns opts
                       , readerTabStop = optTabStop opts
                       , readerIndentedCodeClasses = optIndentedCodeClasses opts
@@ -463,7 +462,6 @@ data Opt = Opt
     , optStandalone        :: Bool    -- ^ Include header, footer
     , optReader            :: Maybe String  -- ^ Reader format
     , optWriter            :: Maybe String  -- ^ Writer format
-    , optParseRaw          :: Bool    -- ^ Parse unconvertable HTML and TeX
     , optTableOfContents   :: Bool    -- ^ Include table of contents
     , optBaseHeaderLevel   :: Int     -- ^ Base header level
     , optTemplate          :: Maybe FilePath  -- ^ Custom template
@@ -529,7 +527,6 @@ defaultOpts = Opt
     , optStandalone            = False
     , optReader                = Nothing
     , optWriter                = Nothing
-    , optParseRaw              = False
     , optTableOfContents       = False
     , optBaseHeaderLevel       = 1
     , optTemplate              = Nothing
@@ -777,11 +774,6 @@ options =
                  "DIRECTORY") -- "Directory containing pandoc data files."
                 ""
 
-    , Option "R" ["parse-raw"]
-                 (NoArg
-                  (\opt -> return opt { optParseRaw = True }))
-                 "" -- "Parse untranslatable HTML codes and LaTeX environments as raw"
-
     , Option "" ["base-header-level"]
                  (ReqArg
                   (\arg opt ->
@@ -1403,22 +1395,25 @@ compileInfo =
   "\nCompiled with pandoc-types " ++ VERSION_pandoc_types ++ ", texmath " ++
   VERSION_texmath ++ ", skylighting " ++ VERSION_skylighting
 
-handelUnrecognizedOption :: String -> [String] -> [String]
-handelUnrecognizedOption "--smart" =
-  (("--smart has been removed.  Use +smart or -smart extension instead.\n" ++
+handleUnrecognizedOption :: String -> [String] -> [String]
+handleUnrecognizedOption "--smart" =
+  (("--smart/-S has been removed.  Use +smart or -smart extension instead.\n" ++
     "For example: pandoc -f markdown+smart -t markdown-smart.") :)
-handelUnrecognizedOption "-S" = handelUnrecognizedOption "--smart"
-handelUnrecognizedOption "--old-dashes" =
+handleUnrecognizedOption "-S" = handleUnrecognizedOption "--smart"
+handleUnrecognizedOption "--old-dashes" =
   ("--old-dashes has been removed.  Use +old_dashes extension instead." :)
-handelUnrecognizedOption "--no-wrap" =
+handleUnrecognizedOption "--no-wrap" =
   ("--no-wrap has been removed.  Use --wrap=none instead." :)
-handelUnrecognizedOption "--chapters" =
+handleUnrecognizedOption "--chapters" =
   ("--chapters has been removed. Use --top-level-division=chapter instead." :)
-handelUnrecognizedOption "--reference-docx" =
+handleUnrecognizedOption "--reference-docx" =
   ("--reference-docx has been removed. Use --reference-doc instead." :)
-handelUnrecognizedOption "--reference-odt" =
+handleUnrecognizedOption "--reference-odt" =
   ("--reference-odt has been removed. Use --reference-doc instead." :)
-handelUnrecognizedOption x =
+handleUnrecognizedOption "--parse-raw" =
+  (("--parse-raw/-R has been removed. Use +raw_html or +raw_tex extension.\n") :)
+handleUnrecognizedOption "-R" = handleUnrecognizedOption "--parse-raw"
+handleUnrecognizedOption x =
   (("Unknown option " ++ x ++ ".") :)
 
 uppercaseFirstLetter :: String -> String
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index 38c083dfd..2e11a64d0 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -55,7 +55,6 @@ import GHC.Generics (Generic)
 data ReaderOptions = ReaderOptions{
          readerExtensions      :: Extensions  -- ^ Syntax extensions
        , readerStandalone      :: Bool -- ^ Standalone document with header
-       , readerParseRaw        :: Bool -- ^ Parse raw HTML, LaTeX
        , readerColumns         :: Int  -- ^ Number of columns in terminal
        , readerTabStop         :: Int  -- ^ Tab stop
        , readerApplyMacros     :: Bool -- ^ Apply macros to TeX math
@@ -69,7 +68,6 @@ instance Default ReaderOptions
   where def = ReaderOptions{
                  readerExtensions            = emptyExtensions
                , readerStandalone            = False
-               , readerParseRaw              = False
                , readerColumns               = 80
                , readerTabStop               = 4
                , readerApplyMacros           = True
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index f24adb5b1..851d4771f 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -13,6 +13,7 @@ import Text.Pandoc.Definition hiding (Attr)
 import Text.Pandoc.Readers.HTML (readHtml)
 import Text.Pandoc.Walk (walk, query)
 import Text.Pandoc.Options ( ReaderOptions(..), Verbosity(..))
+import Text.Pandoc.Extensions (enableExtension, Extension(Ext_raw_html))
 import Text.Pandoc.Shared (escapeURI, collapseFilePath, addMetaField)
 import Network.URI (unEscapeString)
 import Text.Pandoc.MediaBag (MediaBag, insertMedia)
@@ -66,7 +67,7 @@ archiveToEPUB os archive = do
   P.setMediaBag $ fetchImages (M.elems items) root archive ast
   return ast
   where
-    os' = os {readerParseRaw = True}
+    os' = os {readerExtensions = enableExtension Ext_raw_html (readerExtensions os)}
     parseSpineElem :: PandocMonad m => FilePath -> (FilePath, MimeType) -> m Pandoc
     parseSpineElem (normalise -> r) (normalise -> path, mime) = do
       report DEBUG ("parseSpineElem called with path " ++ show path)
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 0bb837ba9..219ee81b6 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -45,9 +45,9 @@ import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..))
 import Text.Pandoc.Shared ( extractSpaces, renderTags', addMetaField
                           , escapeURI, safeRead )
-import Text.Pandoc.Options (ReaderOptions(readerParseRaw),
-                            Verbosity(..), Extension (Ext_epub_html_exts,
-                               Ext_native_divs, Ext_native_spans))
+import Text.Pandoc.Options (ReaderOptions(readerExtensions), extensionEnabled,
+                               Verbosity(..), Extension (Ext_epub_html_exts,
+                               Ext_raw_html, Ext_native_divs, Ext_native_spans))
 import Text.Pandoc.Parsing hiding ((<|>))
 import Text.Pandoc.Walk
 import qualified Data.Map as M
@@ -367,8 +367,8 @@ pDiv = try $ do
 pRawHtmlBlock :: PandocMonad m => TagParser m Blocks
 pRawHtmlBlock = do
   raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag
-  parseRaw <- getOption readerParseRaw
-  if parseRaw && not (null raw)
+  exts <- getOption readerExtensions
+  if extensionEnabled Ext_raw_html exts && not (null raw)
      then return $ B.rawBlock "html" raw
      else return mempty
 
@@ -690,8 +690,8 @@ pRawHtmlInline = do
             <|> if inplain
                    then pSatisfy (not . isBlockTag)
                    else pSatisfy isInlineTag
-  parseRaw <- getOption readerParseRaw
-  if parseRaw
+  exts <- getOption readerExtensions
+  if extensionEnabled Ext_raw_html exts
      then return $ B.rawInline "html" $ renderTags' [result]
      else return mempty
 
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 86ff2b83a..ea284efef 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -303,14 +303,18 @@ ignoreInlines name = (name, doraw <|> (mempty <$ optargs))
   where optargs = skipopts *> skipMany (try $ optional sp *> braced)
         contseq = '\\':name
         doraw = (rawInline "latex" . (contseq ++) . snd) <$>
-                 (getOption readerParseRaw >>= guard >> withRaw optargs)
+                 (do exts <- getOption readerExtensions
+                     guard $ extensionEnabled Ext_raw_tex exts
+                     withRaw optargs)
 
 ignoreBlocks :: PandocMonad m => String -> (String, LP m Blocks)
 ignoreBlocks name = (name, doraw <|> (mempty <$ optargs))
   where optargs = skipopts *> skipMany (try $ optional sp *> braced)
         contseq = '\\':name
         doraw = (rawBlock "latex" . (contseq ++) . snd) <$>
-                 (getOption readerParseRaw >>= guard >> withRaw optargs)
+                 (do exts <- getOption readerExtensions
+                     guard $ extensionEnabled Ext_raw_tex exts
+                     withRaw optargs)
 
 blockCommands :: PandocMonad m => M.Map String (LP m Blocks)
 blockCommands = M.fromList $
@@ -423,7 +427,7 @@ inlineCommand = try $ do
   name <- anyControlSeq
   guard $ name /= "begin" && name /= "end"
   guard $ not $ isBlockCommand name
-  parseRaw <- getOption readerParseRaw
+  exts <- getOption readerExtensions
   star <- option "" (string "*")
   let name' = name ++ star
   let raw = do
@@ -433,7 +437,7 @@ inlineCommand = try $ do
         transformed <- applyMacros' rawcommand
         if transformed /= rawcommand
            then parseFromString inlines transformed
-           else if parseRaw
+           else if extensionEnabled Ext_raw_tex exts
                    then return $ rawInline "latex" rawcommand
                    else return mempty
   (lookupListDefault mzero [name',name] inlineCommands <*
@@ -441,7 +445,8 @@ inlineCommand = try $ do
     <|> raw
 
 unlessParseRaw :: PandocMonad m => LP m ()
-unlessParseRaw = getOption readerParseRaw >>= guard . not
+unlessParseRaw = getOption readerExtensions >>=
+                 guard . not . extensionEnabled Ext_raw_tex
 
 isBlockCommand :: String -> Bool
 isBlockCommand s = s `M.member` (blockCommands :: M.Map String (LP PandocPure Blocks))
@@ -923,10 +928,10 @@ inlineEnvironment = try $ do
 
 rawEnv :: PandocMonad m => String -> LP m Blocks
 rawEnv name = do
-  parseRaw <- getOption readerParseRaw
+  exts <- getOption readerExtensions
   rawOptions <- mconcat <$> many rawopt
   let addBegin x = "\\begin{" ++ name ++ "}" ++ rawOptions ++ x
-  if parseRaw
+  if extensionEnabled Ext_raw_tex exts
      then (rawBlock "latex" . addBegin) <$>
             (withRaw (env name blocks) >>= applyMacros' . snd)
      else env name blocks
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 1d8f7c78e..0c10889d4 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -359,10 +359,6 @@ kvPair allowEmpty = try $ do
 
 parseMarkdown :: PandocMonad m => MarkdownParser m Pandoc
 parseMarkdown = do
-  -- markdown allows raw HTML
-  updateState $ \state -> state { stateOptions =
-                let oldOpts = stateOptions state in
-                    oldOpts{ readerParseRaw = True } }
   optional titleBlock
   blocks <- parseBlocks
   st <- getState
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index bcf8f6df9..f3671641a 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -806,7 +806,8 @@ inlineLaTeX = try $ do
     where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1
 
    state :: ParserState
-   state = def{ stateOptions = def{ readerParseRaw = True }}
+   state = def{ stateOptions = def{ readerExtensions =
+                    enableExtension Ext_raw_tex (readerExtensions def) } }
 
    texMathToPandoc :: String -> Maybe [Inline]
    texMathToPandoc cs = (maybeRight $ readTeX cs) >>= writePandoc DisplayInline
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 804ee39aa..07fb65b20 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -85,12 +85,6 @@ readTextile opts s = do
 -- | Generate a Pandoc ADT from a textile document
 parseTextile :: PandocMonad m => ParserT [Char] ParserState m Pandoc
 parseTextile = do
-  -- textile allows raw HTML and does smart punctuation by default,
-  -- but we do not enable smart punctuation unless it is explicitly
-  -- asked for, for better conversion to other light markup formats
-  oldOpts <- stateOptions `fmap` getState
-  updateState $ \state -> state{ stateOptions =
-                                   oldOpts{ readerParseRaw = True } }
   many blankline
   startPos <- getPosition
   -- go through once just to get list of reference keys and notes
diff --git a/test/Tests/Old.hs b/test/Tests/Old.hs
index ba8ea8240..d9877c8a0 100644
--- a/test/Tests/Old.hs
+++ b/test/Tests/Old.hs
@@ -46,7 +46,7 @@ tests = [ testGroup "markdown"
         , testGroup "latex"
           [ testGroup "writer" (writerTests "latex" ++ lhsWriterTests "latex")
           , testGroup "reader"
-            [ test "basic" ["-r", "latex", "-w", "native", "-s", "-R"]
+            [ test "basic" ["-r", "latex+raw_tex", "-w", "native", "-s"]
               "latex-reader.latex" "latex-reader.native"
             , lhsReaderTest "latex+lhs"
             ]
diff --git a/test/command/parse-raw.md b/test/command/parse-raw.md
new file mode 100644
index 000000000..f4e493c69
--- /dev/null
+++ b/test/command/parse-raw.md
@@ -0,0 +1,27 @@
+```
+% pandoc -f latex+raw_tex -t markdown
+\emph{Hi \foo{there}}
+^D
+*Hi \foo{there}*
+```
+
+```
+% pandoc -f latex -t markdown
+\emph{Hi \foo{there}}
+^D
+*Hi*
+```
+
+```
+% pandoc -f html+raw_html -t markdown
+<em>Hi <blink>there</blink></em>
+^D
+*Hi <blink>there</blink>*
+```
+
+```
+% pandoc -f html -t markdown
+<em>Hi <blink>there</blink></em>
+^D
+*Hi there*
+```