From 2b34337a9cf8b025914e8219498b4c0258772be0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 22 Jun 2017 23:38:42 +0200
Subject: [PATCH] Text.Pandoc.Extensions: Added `Ext_raw_attribute`.

Documented in MANUAL.txt.

This is enabled by default in pandoc markdown and multimarkdown.
---
 MANUAL.txt                          | 27 ++++++++++++++++---
 src/Text/Pandoc/Extensions.hs       |  4 +++
 src/Text/Pandoc/Readers/Markdown.hs | 42 ++++++++++++++++++++++-------
 test/command/3537.md                | 28 +++++++++++++++++++
 4 files changed, 89 insertions(+), 12 deletions(-)
 create mode 100644 test/command/3537.md

diff --git a/MANUAL.txt b/MANUAL.txt
index a75c6fd2a..a4bc7a410 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -3033,9 +3033,6 @@ For the most part this should give the same output as `raw_html`,
 but it makes it easier to write pandoc filters to manipulate groups
 of inlines.
 
-Raw TeX
--------
-
 #### Extension: `raw_tex` ####
 
 In addition to raw HTML, pandoc allows raw LaTeX, TeX, and ConTeXt to be
@@ -3060,6 +3057,30 @@ LaTeX, not as Markdown.
 Inline LaTeX is ignored in output formats other than Markdown, LaTeX,
 Emacs Org mode, and ConTeXt.
 
+### Generic raw attribute ###
+
+#### Extension: `raw_attribute` ####
+
+Inline spans and fenced code blocks with a special
+kind of attribute will be parsed as raw content with the
+designated format.  For example, the following produces a raw
+groff `ms` block:
+
+    ```{=ms}
+    .MYMACRO
+    blah blah
+    ```
+And the following produces a raw `html` inline element:
+
+    This is `<a>html</a>`{=html}
+
+This extension presupposes that the relevant kind of
+inline code or fenced code block is enabled.  Thus, for
+example, to use a raw attribute with a backtick code block,
+`backtick_code_blocks` must be enabled.
+
+The raw attribute cannot be combined with regular attributes.
+
 LaTeX macros
 ------------
 
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 58e8c414d..398944d47 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -94,6 +94,7 @@ data Extension =
     | Ext_fenced_code_attributes  -- ^ Allow attributes on fenced code blocks
     | Ext_backtick_code_blocks    -- ^ GitHub style ``` code blocks
     | Ext_inline_code_attributes  -- ^ Allow attributes on inline code
+    | Ext_raw_attribute           -- ^ Allow explicit raw blocks/inlines
     | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks
     | Ext_native_divs             -- ^ Use Div blocks for contents of <div> tags
     | Ext_native_spans            -- ^ Use Span inlines for contents of <span>
@@ -162,6 +163,7 @@ pandocExtensions = extensionsFromList
   , Ext_fenced_code_attributes
   , Ext_backtick_code_blocks
   , Ext_inline_code_attributes
+  , Ext_raw_attribute
   , Ext_markdown_in_html_blocks
   , Ext_native_divs
   , Ext_native_spans
@@ -275,6 +277,8 @@ multimarkdownExtensions = extensionsFromList
   , Ext_subscript
   , Ext_backtick_code_blocks
   , Ext_spaced_reference_links
+  -- So far only in dev version of mmd:
+  , Ext_raw_attribute
   ]
 
 -- | Language extensions to be used with strict markdown.
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 793ee0996..b91efcd8c 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -681,19 +681,36 @@ specialAttr = do
   char '-'
   return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs)
 
+rawAttribute :: PandocMonad m => MarkdownParser m String
+rawAttribute = do
+  char '{'
+  skipMany spaceChar
+  char '='
+  format <- many1 $ satisfy (\c -> isAlphaNum c || c `elem` "-_")
+  skipMany spaceChar
+  char '}'
+  return format
+
 codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks)
 codeBlockFenced = try $ do
   c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~'))
      <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`'))
   size <- blockDelimiter (== c) Nothing
   skipMany spaceChar
-  attr <- option ([],[],[]) $
-            try (guardEnabled Ext_fenced_code_attributes >> attributes)
-           <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar)
+  rawattr <-
+     (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute))
+    <|>
+     (Right <$> option ("",[],[])
+         (try (guardEnabled Ext_fenced_code_attributes >> attributes)
+          <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar)))
   blankline
-  contents <- manyTill anyLine (blockDelimiter (== c) (Just size))
+  contents <- intercalate "\n" <$>
+                 manyTill anyLine (blockDelimiter (== c) (Just size))
   blanklines
-  return $ return $ B.codeBlockWith attr $ intercalate "\n" contents
+  return $ return $
+    case rawattr of
+          Left syn   -> B.rawBlock syn contents
+          Right attr -> B.codeBlockWith attr contents
 
 -- correctly handle github language identifiers
 toLanguageId :: String -> String
@@ -1516,13 +1533,20 @@ code :: PandocMonad m => MarkdownParser m (F Inlines)
 code = try $ do
   starts <- many1 (char '`')
   skipSpaces
-  result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
+  result <- (trim . concat) <$>
+            many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
                        (char '\n' >> notFollowedBy' blankline >> return " "))
                       (try (skipSpaces >> count (length starts) (char '`') >>
                       notFollowedBy (char '`')))
-  attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes
-                                   >> attributes)
-  return $ return $ B.codeWith attr $ trim $ concat result
+  rawattr <-
+     (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute))
+    <|>
+     (Right <$> option ("",[],[])
+         (try (guardEnabled Ext_inline_code_attributes >> attributes)))
+  return $ return $
+    case rawattr of
+         Left syn   -> B.rawInline syn result
+         Right attr -> B.codeWith attr result
 
 math :: PandocMonad m => MarkdownParser m (F Inlines)
 math =  (return . B.displayMath <$> (mathDisplay >>= applyMacros'))
diff --git a/test/command/3537.md b/test/command/3537.md
new file mode 100644
index 000000000..df4eeba7d
--- /dev/null
+++ b/test/command/3537.md
@@ -0,0 +1,28 @@
+Generalized raw attributes.
+
+````
+% pandoc -t native
+```{=ms}
+.MACRO
+foo bar
+```
+^D
+[RawBlock (Format "ms") ".MACRO\nfoo bar"]
+````
+
+````
+% pandoc -t native
+Hi `there`{=ms}.
+^D
+[Para [Str "Hi",Space,RawInline (Format "ms") "there",Str "."]]
+````
+
+````
+% pandoc -t native
+~~~ {=ms}
+.MACRO
+foo bar
+~~~
+^D
+[RawBlock (Format "ms") ".MACRO\nfoo bar"]
+````