Add --strip-empty-paragraphs option.

This works for any input format.
This commit is contained in:
John MacFarlane 2017-12-02 15:21:59 -08:00
parent e09e6a6ffa
commit 7b8c2b6691
3 changed files with 30 additions and 5 deletions

View file

@ -427,6 +427,12 @@ Reader options
: Specify the base level for headers (defaults to 1).
`--strip-empty-paragraphs`
: Ignore paragraphs with non content. This option is useful
for converting word processing documents where users have
used empty paragraphs to create inter-paragraph space.
`--indented-code-classes=`*CLASSES*
: Specify classes to use for indented code blocks--for example,

View file

@ -86,8 +86,8 @@ import Text.Pandoc.Lua (LuaException (..), runLuaFilter)
import Text.Pandoc.PDF (makePDF)
import Text.Pandoc.Process (pipeProcess)
import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained)
import Text.Pandoc.Shared (eastAsianLineBreakFilter, headerShift, isURI, ordNub,
safeRead, tabFilter)
import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs,
headerShift, isURI, ordNub, safeRead, tabFilter)
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.Writers.Math (defaultKaTeXURL, defaultMathJaxURL)
import Text.Pandoc.XML (toEntities)
@ -461,14 +461,17 @@ convertWithOpts opts = do
let transforms = (case optBaseHeaderLevel opts of
x | x > 1 -> (headerShift (x - 1) :)
| otherwise -> id) $
| otherwise -> id) .
(if optStripEmptyParagraphs opts
then (stripEmptyParagraphs :)
else id) .
(if extensionEnabled Ext_east_asian_line_breaks
readerExts &&
not (extensionEnabled Ext_east_asian_line_breaks
writerExts &&
writerWrapText writerOptions == WrapPreserve)
then (eastAsianLineBreakFilter :)
else id)
else id) $
[]
let sourceToDoc :: [FilePath] -> PandocIO Pandoc
@ -622,6 +625,7 @@ data Opt = Opt
, optLuaFilters :: [FilePath] -- ^ Lua filters to apply
, optEmailObfuscation :: ObfuscationMethod
, optIdentifierPrefix :: String
, optStripEmptyParagraphs :: Bool -- ^ Strip empty paragraphs
, optIndentedCodeClasses :: [String] -- ^ Default classes for indented code blocks
, optDataDir :: Maybe FilePath
, optCiteMethod :: CiteMethod -- ^ Method to output cites
@ -694,6 +698,7 @@ defaultOpts = Opt
, optLuaFilters = []
, optEmailObfuscation = NoObfuscation
, optIdentifierPrefix = ""
, optStripEmptyParagraphs = False
, optIndentedCodeClasses = []
, optDataDir = Nothing
, optCiteMethod = Citeproc
@ -940,7 +945,12 @@ options =
"NUMBER")
"" -- "Headers base level"
, Option "" ["indented-code-classes"]
, Option "" ["strip-empty-paragraphs"]
(NoArg
(\opt -> return opt{ optStripEmptyParagraphs = True }))
"" -- "Strip empty paragraphs"
, Option "" ["indented-code-classes"]
(ReqArg
(\arg opt -> return opt { optIndentedCodeClasses = words $
map (\c -> if c == ',' then ' ' else c) arg })

View file

@ -72,6 +72,7 @@ module Text.Pandoc.Shared (
inlineListToIdentifier,
isHeaderBlock,
headerShift,
stripEmptyParagraphs,
isTightList,
addMetaField,
makeMeta,
@ -529,6 +530,14 @@ headerShift n = walk shift
shift (Header level attr inner) = Header (level + n) attr inner
shift x = x
-- | Remove empty paragraphs.
stripEmptyParagraphs :: Pandoc -> Pandoc
stripEmptyParagraphs = walk go
where go :: [Block] -> [Block]
go = filter (not . isEmptyParagraph)
isEmptyParagraph (Para []) = True
isEmptyParagraph _ = False
-- | Detect if a list is tight.
isTightList :: [[Block]] -> Bool
isTightList = all firstIsPlain