From 7b8c2b6691e3816ba52ee07ee7f63573d4ae7253 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 2 Dec 2017 15:21:59 -0800
Subject: [PATCH] Add --strip-empty-paragraphs option.

This works for any input format.
---
 MANUAL.txt                |  6 ++++++
 src/Text/Pandoc/App.hs    | 20 +++++++++++++++-----
 src/Text/Pandoc/Shared.hs |  9 +++++++++
 3 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/MANUAL.txt b/MANUAL.txt
index e8fdf9375..5adeebe58 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -427,6 +427,12 @@ Reader options
 
 :   Specify the base level for headers (defaults to 1).
 
+`--strip-empty-paragraphs`
+
+:   Ignore paragraphs with non content.  This option is useful
+    for converting word processing documents where users have
+    used empty paragraphs to create inter-paragraph space.
+
 `--indented-code-classes=`*CLASSES*
 
 :   Specify classes to use for indented code blocks--for example,
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index f1c21c69a..3fdbf1949 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -86,8 +86,8 @@ import Text.Pandoc.Lua (LuaException (..), runLuaFilter)
 import Text.Pandoc.PDF (makePDF)
 import Text.Pandoc.Process (pipeProcess)
 import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained)
-import Text.Pandoc.Shared (eastAsianLineBreakFilter, headerShift, isURI, ordNub,
-                           safeRead, tabFilter)
+import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs,
+         headerShift, isURI, ordNub, safeRead, tabFilter)
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Pandoc.Writers.Math (defaultKaTeXURL, defaultMathJaxURL)
 import Text.Pandoc.XML (toEntities)
@@ -461,14 +461,17 @@ convertWithOpts opts = do
 
     let transforms = (case optBaseHeaderLevel opts of
                           x | x > 1     -> (headerShift (x - 1) :)
-                            | otherwise -> id) $
+                            | otherwise -> id) .
+                     (if optStripEmptyParagraphs opts
+                         then (stripEmptyParagraphs :)
+                         else id) .
                      (if extensionEnabled Ext_east_asian_line_breaks
                             readerExts &&
                          not (extensionEnabled Ext_east_asian_line_breaks
                               writerExts &&
                               writerWrapText writerOptions == WrapPreserve)
                          then (eastAsianLineBreakFilter :)
-                         else id)
+                         else id) $
                      []
 
     let sourceToDoc :: [FilePath] -> PandocIO Pandoc
@@ -622,6 +625,7 @@ data Opt = Opt
     , optLuaFilters            :: [FilePath] -- ^ Lua filters to apply
     , optEmailObfuscation      :: ObfuscationMethod
     , optIdentifierPrefix      :: String
+    , optStripEmptyParagraphs  :: Bool -- ^ Strip empty paragraphs
     , optIndentedCodeClasses   :: [String] -- ^ Default classes for indented code blocks
     , optDataDir               :: Maybe FilePath
     , optCiteMethod            :: CiteMethod -- ^ Method to output cites
@@ -694,6 +698,7 @@ defaultOpts = Opt
     , optLuaFilters            = []
     , optEmailObfuscation      = NoObfuscation
     , optIdentifierPrefix      = ""
+    , optStripEmptyParagraphs  = False
     , optIndentedCodeClasses   = []
     , optDataDir               = Nothing
     , optCiteMethod            = Citeproc
@@ -940,7 +945,12 @@ options =
                   "NUMBER")
                  "" -- "Headers base level"
 
-     , Option "" ["indented-code-classes"]
+    , Option "" ["strip-empty-paragraphs"]
+                 (NoArg
+                  (\opt -> return opt{ optStripEmptyParagraphs = True }))
+                 "" -- "Strip empty paragraphs"
+
+    , Option "" ["indented-code-classes"]
                   (ReqArg
                    (\arg opt -> return opt { optIndentedCodeClasses = words $
                                              map (\c -> if c == ',' then ' ' else c) arg })
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 5c13e0acb..1c3a25cc7 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -72,6 +72,7 @@ module Text.Pandoc.Shared (
                      inlineListToIdentifier,
                      isHeaderBlock,
                      headerShift,
+                     stripEmptyParagraphs,
                      isTightList,
                      addMetaField,
                      makeMeta,
@@ -529,6 +530,14 @@ headerShift n = walk shift
         shift (Header level attr inner) = Header (level + n) attr inner
         shift x                         = x
 
+-- | Remove empty paragraphs.
+stripEmptyParagraphs :: Pandoc -> Pandoc
+stripEmptyParagraphs = walk go
+  where go :: [Block] -> [Block]
+        go = filter (not . isEmptyParagraph)
+        isEmptyParagraph (Para []) = True
+        isEmptyParagraph _         = False
+
 -- | Detect if a list is tight.
 isTightList :: [[Block]] -> Bool
 isTightList = all firstIsPlain