From d247e9f72e9c9a86cb0053cffc607b5f84f8b3a4 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 22 Sep 2019 10:57:48 -0700
Subject: [PATCH] Make `plain` output plainer.

Previously we used the following Project Gutenberg conventions
for plain output:

- extra space before and after level 1 and 2 headings
- all-caps for strong emphasis `LIKE THIS`
- underscores surrounding regular emphasis `_like this_`

This commit makes `plain` output plainer. Strong and Emph
inlines are rendered without special formatting.  Headings
are also rendered without special formatting, and with only
one blank line following.

To restore the former behavior, use `-t plain+gutenberg`.

API change: Add `Ext_gutenberg` constructor to `Extension`.

See #5741.
---
 MANUAL.txt                          |   8 ++
 src/Text/Pandoc/Extensions.hs       |   1 +
 src/Text/Pandoc/Writers/Markdown.hs |  21 +++--
 test/Tests/Writers/Plain.hs         |   4 +-
 test/command/4529.md                |   2 +-
 test/writer.plain                   | 124 ++++++++--------------------
 6 files changed, 64 insertions(+), 96 deletions(-)

diff --git a/MANUAL.txt b/MANUAL.txt
index 7e01a5002..c9ff7bea5 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -4544,6 +4544,14 @@ in several respects:
     we must either disallow lazy wrapping or require a blank line between
     list items.
 
+#### Extension: `gutenberg` ####
+
+Use [Project Gutenberg] conventions for `plain` output:
+all-caps for strong emphasis, surround by underscores
+for regular emphasis, add extra blank space around headings.
+
+  [Project Gutenberg]: https://www.gutenberg.org
+
 Markdown variants
 -----------------
 
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 6cb87eef6..121b60e7b 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -155,6 +155,7 @@ data Extension =
     | Ext_tex_math_double_backslash  -- ^ TeX math btw \\(..\\) \\[..\\]
     | Ext_tex_math_single_backslash  -- ^ TeX math btw \(..\) \[..\]
     | Ext_yaml_metadata_block -- ^ YAML metadata block
+    | Ext_gutenberg           -- ^ Use Project Gutenberg conventions for plain
     deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable, Generic)
 
 -- | Extensions to be used with pandoc-flavored markdown.
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index e298fafe9..fc25cde44 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -523,16 +523,22 @@ blockToMarkdown' opts (Header level attr inlines) = do
   contents <- inlineListToMarkdown opts $
                  -- ensure no newlines; see #3736
                  walk lineBreakToSpace $
-                 if level == 1 && plain
+                 if level == 1 && plain && isEnabled Ext_gutenberg opts
                     then capitalize inlines
                     else inlines
   let setext = writerSetextHeaders opts
       hdr = nowrap $ case level of
-            1 | plain -> blanklines 3 <> contents <> blanklines 2
+            1 | plain ->
+                if isEnabled Ext_gutenberg opts
+                   then blanklines 3 <> contents <> blanklines 2
+                   else contents <> blankline
               | setext ->
                   contents <> attr' <> cr <> text (replicate (offset contents) '=') <>
                   blankline
-            2 | plain -> blanklines 2 <> contents <> blankline
+            2 | plain ->
+                if isEnabled Ext_gutenberg opts
+                   then blanklines 2 <> contents <> blankline
+                   else contents <> blankline
               | setext ->
                   contents <> attr' <> cr <> text (replicate (offset contents) '-') <>
                   blankline
@@ -1036,13 +1042,18 @@ inlineToMarkdown opts (Emph lst) = do
   plain <- asks envPlain
   contents <- inlineListToMarkdown opts lst
   return $ if plain
-              then "_" <> contents <> "_"
+              then if isEnabled Ext_gutenberg opts
+                      then "_" <> contents <> "_"
+                      else contents
               else "*" <> contents <> "*"
 inlineToMarkdown _ (Strong []) = return empty
 inlineToMarkdown opts (Strong lst) = do
   plain <- asks envPlain
   if plain
-     then inlineListToMarkdown opts $ capitalize lst
+     then inlineListToMarkdown opts $
+          if isEnabled Ext_gutenberg opts
+             then capitalize lst
+             else lst
      else do
        contents <- inlineListToMarkdown opts lst
        return $ "**" <> contents <> "**"
diff --git a/test/Tests/Writers/Plain.hs b/test/Tests/Writers/Plain.hs
index 2a2eb4226..b8d1f6693 100644
--- a/test/Tests/Writers/Plain.hs
+++ b/test/Tests/Writers/Plain.hs
@@ -13,7 +13,9 @@ import Text.Pandoc.Builder
 infix 4 =:
 (=:) :: (ToString a, ToPandoc a)
      => String -> (a, String) -> TestTree
-(=:) = test (purely (writePlain def) . toPandoc)
+(=:) = test (purely (writePlain def{ writerExtensions =
+                          enableExtension Ext_gutenberg plainExtensions }) .
+                      toPandoc)
 
 
 tests :: [TestTree]
diff --git a/test/command/4529.md b/test/command/4529.md
index 4a2125b9c..4242a65c2 100644
--- a/test/command/4529.md
+++ b/test/command/4529.md
@@ -1,5 +1,5 @@
 ```
-% pandoc -f latex -t plain
+% pandoc -f latex -t plain+gutenberg
 \chapter{First chapter}\label{sec:chp1}
 The next chapter is Chapter~\ref{sec:chp2}.
 \section{First section}\label{sec:chp1sec1}
diff --git a/test/writer.plain b/test/writer.plain
index 79921fa27..b8767957f 100644
--- a/test/writer.plain
+++ b/test/writer.plain
@@ -7,41 +7,31 @@ markdown test suite.
 
 ------------------------------------------------------------------------------
 
-
-
-HEADERS
-
+Headers
 
 Level 2 with an embedded link
 
-Level 3 with _emphasis_
+Level 3 with emphasis
 
 Level 4
 
 Level 5
 
+Level 1
 
-
-LEVEL 1
-
-
-Level 2 with _emphasis_
+Level 2 with emphasis
 
 Level 3
 
 with no blank line
 
-
 Level 2
 
 with no blank line
 
 ------------------------------------------------------------------------------
 
-
-
-PARAGRAPHS
-
+Paragraphs
 
 Here’s a regular paragraph.
 
@@ -56,10 +46,7 @@ here.
 
 ------------------------------------------------------------------------------
 
-
-
-BLOCK QUOTES
-
+Block Quotes
 
 E-mail style:
 
@@ -88,10 +75,7 @@ And a following paragraph.
 
 ------------------------------------------------------------------------------
 
-
-
-CODE BLOCKS
-
+Code Blocks
 
 Code:
 
@@ -111,10 +95,7 @@ And:
 
 ------------------------------------------------------------------------------
 
-
-
-LISTS
-
+Lists
 
 Unordered
 
@@ -160,7 +141,6 @@ Minuses loose:
 
 -   Minus 3
 
-
 Ordered
 
 Tight:
@@ -201,7 +181,6 @@ Multiple paragraphs:
 
 3.  Item 3.
 
-
 Nested
 
 -   Tab
@@ -229,7 +208,6 @@ Same thing but with paragraphs:
 
 3.  Third
 
-
 Tabs and spaces
 
 -   this is a list item indented with tabs
@@ -240,7 +218,6 @@ Tabs and spaces
 
     -   this is an example list item indented with spaces
 
-
 Fancy list markers
 
 (2) begins with 2
@@ -275,10 +252,7 @@ B. Williams
 
 ------------------------------------------------------------------------------
 
-
-
-DEFINITION LISTS
-
+Definition Lists
 
 Tight using spaces:
 
@@ -318,13 +292,13 @@ banana
 
 Multiple blocks with italics:
 
-_apple_
+apple
 
     red fruit
 
     contains seeds, crisp, pleasant to taste
 
-_orange_
+orange
 
     orange fruit
 
@@ -371,10 +345,7 @@ orange
     1.  sublist
     2.  sublist
 
-
-
-HTML BLOCKS
-
+HTML Blocks
 
 Simple block on one line:
 
@@ -388,8 +359,8 @@ bar
 
 Interpreted markdown in a table:
 
-This is _emphasized_
-And this is STRONG
+This is emphasized
+And this is strong
 Here’s a simple block:
 
 foo
@@ -426,30 +397,27 @@ Hr’s:
 
 ------------------------------------------------------------------------------
 
+Inline Markup
 
+This is emphasized, and so is this.
 
-INLINE MARKUP
+This is strong, and so is this.
 
+An emphasized link.
 
-This is _emphasized_, and so _is this_.
+This is strong and em.
 
-This is STRONG, and so IS THIS.
+So is this word.
 
-An _emphasized link_.
+This is strong and em.
 
-_THIS IS STRONG AND EM._
-
-So is _THIS_ word.
-
-_THIS IS STRONG AND EM._
-
-So is _THIS_ word.
+So is this word.
 
 This is code: >, $, \, \$, <html>.
 
-~~This is _strikeout_.~~
+~~This is strikeout.~~
 
-Superscripts: a^(bc)d a^(_hello_) a^(hello there).
+Superscripts: a^(bc)d a^(hello) a^(hello there).
 
 Subscripts: H₂O, H₂₃O, H_(many of them)O.
 
@@ -458,10 +426,7 @@ spaces: a^b c^d, a~b c~d.
 
 ------------------------------------------------------------------------------
 
-
-
-SMART QUOTES, ELLIPSES, DASHES
-
+Smart quotes, ellipses, dashes
 
 “Hello,” said the spider. “‘Shelob’ is my name.”
 
@@ -481,10 +446,7 @@ Ellipses…and…and….
 
 ------------------------------------------------------------------------------
 
-
-
-LATEX
-
+LaTeX
 
 -   
 -   2 + 2 = 4
@@ -499,19 +461,16 @@ LATEX
 These shouldn’t be math:
 
 -   To get the famous equation, write $e = mc^2$.
--   $22,000 is a _lot_ of money. So is $34,000. (It worked if “lot” is
+-   $22,000 is a lot of money. So is $34,000. (It worked if “lot” is
     emphasized.)
 -   Shoes ($20) and socks ($5).
--   Escaped $: $73 _this should be emphasized_ 23$.
+-   Escaped $: $73 this should be emphasized 23$.
 
 Here’s a LaTeX table:
 
 ------------------------------------------------------------------------------
 
-
-
-SPECIAL CHARACTERS
-
+Special Characters
 
 Here is some unicode:
 
@@ -565,10 +524,7 @@ Minus: -
 
 ------------------------------------------------------------------------------
 
-
-
-LINKS
-
+Links
 
 Explicit
 
@@ -590,7 +546,6 @@ Email link
 
 Empty.
 
-
 Reference
 
 Foo bar.
@@ -613,7 +568,6 @@ Foo bar.
 
 Foo biz.
 
-
 With ampersands
 
 Here’s a link with an ampersand in the URL.
@@ -624,7 +578,6 @@ Here’s an inline link.
 
 Here’s an inline link in pointy braces.
 
-
 Autolinks
 
 With an ampersand: http://example.com/?foo=1&bar=2
@@ -643,10 +596,7 @@ Auto-links should not occur here: <http://example.com/>
 
 ------------------------------------------------------------------------------
 
-
-
-IMAGES
-
+Images
 
 From “Voyage dans la Lune” by Georges Melies (1902):
 
@@ -656,14 +606,10 @@ Here is a movie [movie] icon.
 
 ------------------------------------------------------------------------------
 
+Footnotes
 
-
-FOOTNOTES
-
-
-Here is a footnote reference,[1] and another.[2] This should _not_ be a
-footnote reference, because it contains a space.[^my note] Here is an inline
-note.[3]
+Here is a footnote reference,[1] and another.[2] This should not be a footnote
+reference, because it contains a space.[^my note] Here is an inline note.[3]
 
   Notes can go in quotes.[4]
 
@@ -684,7 +630,7 @@ with list items).
 If you want, you can indent every line, but you can also be lazy and just
 indent the first line of each block.
 
-[3] This is _easier_ to type. Inline notes may contain links and ] verbatim
+[3] This is easier to type. Inline notes may contain links and ] verbatim
 characters, as well as [bracketed text].
 
 [4] In quote.