diff --git a/MANUAL.txt b/MANUAL.txt index 5d5e9ccdb..b6bf94e96 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -429,7 +429,8 @@ Reader options `--strip-empty-paragraphs` -: Ignore paragraphs with non content. This option is useful +: *Deprecated. Use the `+empty_paragraphs` extension instead.* + Ignore paragraphs with no content. This option is useful for converting word processing documents where users have used empty paragraphs to create inter-paragraph space. @@ -3817,6 +3818,12 @@ in several respects: we must either disallow lazy wrapping or require a blank line between list items. +#### Extension: `empty_paragraphs` #### + +Allows empty paragraphs. By default empty paragraphs are +omitted. This affects the `docx` reader and writer, the +`opendocument` and `odt` writer, and all HTML-based readers and writers. + Markdown variants ----------------- diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 3fdbf1949..7d7d630ea 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -947,7 +947,10 @@ options = , Option "" ["strip-empty-paragraphs"] (NoArg - (\opt -> return opt{ optStripEmptyParagraphs = True })) + (\opt -> do + deprecatedOption "--stripEmptyParagraphs" + "Use +empty_paragraphs extension." + return opt{ optStripEmptyParagraphs = True })) "" -- "Strip empty paragraphs" , Option "" ["indented-code-classes"] @@ -1472,7 +1475,7 @@ options = , Option "m" ["latexmathml", "asciimathml"] (OptArg (\arg opt -> do - deprecatedOption "--latexmathml, --asciimathml, -m" + deprecatedOption "--latexmathml, --asciimathml, -m" "" return opt { optHTMLMathMethod = LaTeXMathML arg }) "URL") "" -- "Use LaTeXMathML script in html output" @@ -1480,7 +1483,7 @@ options = , Option "" ["mimetex"] (OptArg (\arg opt -> do - deprecatedOption "--mimetex" + deprecatedOption "--mimetex" "" let url' = case arg of Just u -> u ++ "?" Nothing -> "/cgi-bin/mimetex.cgi?" @@ -1491,7 +1494,7 @@ options = , Option "" ["jsmath"] (OptArg (\arg opt -> do - deprecatedOption "--jsmath" + deprecatedOption "--jsmath" "" return opt { optHTMLMathMethod = JsMath arg}) "URL") "" -- "Use jsMath for HTML math" @@ -1499,7 +1502,7 @@ options = , Option "" ["gladtex"] (NoArg (\opt -> do - deprecatedOption "--gladtex" + deprecatedOption "--gladtex" "" return opt { optHTMLMathMethod = GladTeX })) "" -- "Use gladtex for HTML math" @@ -1699,9 +1702,9 @@ splitField s = baseWriterName :: String -> String baseWriterName = takeWhile (\c -> c /= '+' && c /= '-') -deprecatedOption :: String -> IO () -deprecatedOption o = - runIO (report $ Deprecated o "") >>= +deprecatedOption :: String -> String -> IO () +deprecatedOption o msg = + runIO (report $ Deprecated o msg) >>= \r -> case r of Right () -> return () Left e -> E.throwIO e diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 67ad2ad04..771898d70 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -152,6 +152,7 @@ data Extension = | Ext_old_dashes -- ^ -- = em, - before number = en | Ext_spaced_reference_links -- ^ Allow space between two parts of ref link | Ext_amuse -- ^ Enable Text::Amuse extensions to Emacs Muse markup + | Ext_empty_paragraphs -- ^ Allow empty paragraphs deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable, Generic) -- | Extensions to be used with pandoc-flavored markdown. diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 1fac98b14..651d46753 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -534,7 +534,10 @@ bodyPartToBlocks (Paragraph pPr parparts) then do modify $ \s -> s { docxDropCap = ils' } return mempty else do modify $ \s -> s { docxDropCap = mempty } - return $ parStyleToTransform pPr $ para ils' + opts <- asks docxOptions + if isNull ils' && not (isEnabled Ext_empty_paragraphs opts) + then return mempty + else return $ parStyleToTransform pPr $ para ils' bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do let kvs = case levelInfo of diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 24935fcd7..b0f5d38f9 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -68,9 +68,11 @@ import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps) import Text.Pandoc.Definition import Text.Pandoc.Error import Text.Pandoc.Logging -import Text.Pandoc.Options (Extension (Ext_epub_html_exts, Ext_native_divs, Ext_native_spans, Ext_raw_html), - ReaderOptions (readerExtensions, readerStripComments), - extensionEnabled) +import Text.Pandoc.Options ( + Extension (Ext_epub_html_exts, Ext_empty_paragraphs, Ext_native_divs, + Ext_native_spans, Ext_raw_html), + ReaderOptions (readerExtensions, readerStripComments), + extensionEnabled) import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Shared (addMetaField, crFilter, escapeURI, extractSpaces, safeRead, underlineSpan) @@ -575,7 +577,10 @@ pPlain = do pPara :: PandocMonad m => TagParser m Blocks pPara = do contents <- trimInlines <$> pInTags "p" inline - return $ B.para contents + (do guardDisabled Ext_empty_paragraphs + guard (B.isNull contents) + return mempty) + <|> return (B.para contents) pFigure :: PandocMonad m => TagParser m Blocks pFigure = try $ do diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index f80c2b59a..c9eaaf838 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -922,19 +922,22 @@ blockToOpenXML' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = do captionNode <- withParaProp (pCustomStyle "ImageCaption") $ blockToOpenXML opts (Para alt) return $ mknode "w:p" [] (paraProps ++ contents) : captionNode -blockToOpenXML' opts (Para lst) = do - isFirstPara <- gets stFirstPara - paraProps <- getParaProps $ case lst of - [Math DisplayMath _] -> True - _ -> False - bodyTextStyle <- pStyleM "Body Text" - let paraProps' = case paraProps of - [] | isFirstPara -> [mknode "w:pPr" [] [pCustomStyle "FirstParagraph"]] - [] -> [mknode "w:pPr" [] [bodyTextStyle]] - ps -> ps - modify $ \s -> s { stFirstPara = False } - contents <- inlinesToOpenXML opts lst - return [mknode "w:p" [] (paraProps' ++ contents)] +blockToOpenXML' opts (Para lst) + | null lst && not (isEnabled Ext_empty_paragraphs opts) = return [] + | otherwise = do + isFirstPara <- gets stFirstPara + paraProps <- getParaProps $ case lst of + [Math DisplayMath _] -> True + _ -> False + bodyTextStyle <- pStyleM "Body Text" + let paraProps' = case paraProps of + [] | isFirstPara -> [mknode "w:pPr" [] + [pCustomStyle "FirstParagraph"]] + [] -> [mknode "w:pPr" [] [bodyTextStyle]] + ps -> ps + modify $ \s -> s { stFirstPara = False } + contents <- inlinesToOpenXML opts lst + return [mknode "w:p" [] (paraProps' ++ contents)] blockToOpenXML' opts (LineBlock lns) = blockToOpenXML opts $ linesToPara lns blockToOpenXML' _ b@(RawBlock format str) | format == Format "openxml" = return [ x | Elem x <- parseXML str ] diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 756bc3fd8..f25bbadfb 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -56,7 +56,7 @@ import qualified Data.Text.Lazy as TL import Network.HTTP (urlEncode) import Network.URI (URI (..), parseURIReference, unEscapeString) import Numeric (showHex) -import Text.Blaze.Internal (customLeaf) +import Text.Blaze.Internal (customLeaf, MarkupM(Empty)) import Text.Blaze.Html hiding (contents) import Text.Pandoc.Definition import Text.Pandoc.Highlighting (formatHtmlBlock, formatHtmlInline, highlight, @@ -658,6 +658,7 @@ blockToHtml opts (Para [Image attr txt (s,'f':'i':'g':':':tit)]) = figure opts attr txt (s,tit) blockToHtml opts (Para lst) | isEmptyRaw lst = return mempty + | null lst && not (isEnabled Ext_empty_paragraphs opts) = return mempty | otherwise = do contents <- inlineListToHtml opts lst return $ H.p contents @@ -902,8 +903,7 @@ tableItemToHtml opts tag' align' item = do let tag'' = if null alignStr then tag' else tag' ! attribs - return $ ( - tag'' contents) >> nl opts + return $ tag'' contents >> nl opts toListItems :: WriterOptions -> [Html] -> [Html] toListItems opts items = map (toListItem opts) items ++ [nl opts] @@ -911,9 +911,13 @@ toListItems opts items = map (toListItem opts) items ++ [nl opts] toListItem :: WriterOptions -> Html -> Html toListItem opts item = nl opts >> H.li item -blockListToHtml :: PandocMonad m => WriterOptions -> [Block] -> StateT WriterState m Html +blockListToHtml :: PandocMonad m + => WriterOptions -> [Block] -> StateT WriterState m Html blockListToHtml opts lst = - (mconcat . intersperse (nl opts)) <$> mapM (blockToHtml opts) lst + (mconcat . intersperse (nl opts) . filter nonempty) + <$> mapM (blockToHtml opts) lst + where nonempty (Empty _) = False + nonempty _ = True -- | Convert list of Pandoc inline elements to HTML. inlineListToHtml :: PandocMonad m => WriterOptions -> [Inline] -> StateT WriterState m Html diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 702349636..8aa19dbb5 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -130,7 +130,6 @@ setFirstPara :: PandocMonad m => OD m () setFirstPara = modify $ \s -> s { stFirstPara = True } inParagraphTags :: PandocMonad m => Doc -> OD m Doc -inParagraphTags d | isEmpty d = return empty inParagraphTags d = do b <- gets stFirstPara a <- if b @@ -323,7 +322,8 @@ blockToOpenDocument o bs else inParagraphTags =<< inlinesToOpenDocument o b | Para [Image attr c (s,'f':'i':'g':':':t)] <- bs = figure attr c s t - | Para b <- bs = if null b + | Para b <- bs = if null b && + not (isEnabled Ext_empty_paragraphs o) then return empty else inParagraphTags =<< inlinesToOpenDocument o b | LineBlock b <- bs = blockToOpenDocument o $ linesToPara b diff --git a/test/Tests/Readers/Docx.hs b/test/Tests/Readers/Docx.hs index b5d7aa430..421acaa8b 100644 --- a/test/Tests/Readers/Docx.hs +++ b/test/Tests/Readers/Docx.hs @@ -10,7 +10,6 @@ import Test.Tasty import Test.Tasty.HUnit import Tests.Helpers import Text.Pandoc -import Text.Pandoc.Shared (stripEmptyParagraphs) import qualified Text.Pandoc.Class as P import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory) import Text.Pandoc.UTF8 as UTF8 @@ -38,23 +37,20 @@ instance ToString NoNormPandoc where instance ToPandoc NoNormPandoc where toPandoc = unNoNorm -compareOutput :: Bool - -> ReaderOptions - -> FilePath - -> FilePath - -> IO (NoNormPandoc, NoNormPandoc) -compareOutput strip opts docxFile nativeFile = do +compareOutput :: ReaderOptions + -> FilePath + -> FilePath + -> IO (NoNormPandoc, NoNormPandoc) +compareOutput opts docxFile nativeFile = do df <- B.readFile docxFile nf <- UTF8.toText <$> BS.readFile nativeFile p <- runIOorExplode $ readDocx opts df df' <- runIOorExplode $ readNative def nf - return $ (noNorm (if strip - then stripEmptyParagraphs p - else p), noNorm df') + return $ (noNorm p, noNorm df') testCompareWithOptsIO :: ReaderOptions -> String -> FilePath -> FilePath -> IO TestTree testCompareWithOptsIO opts name docxFile nativeFile = do - (dp, np) <- compareOutput True opts docxFile nativeFile + (dp, np) <- compareOutput opts docxFile nativeFile return $ test id name (dp, np) testCompareWithOpts :: ReaderOptions -> String -> FilePath -> FilePath -> TestTree @@ -75,11 +71,6 @@ testForWarningsWithOpts :: ReaderOptions -> String -> FilePath -> [String] -> Te testForWarningsWithOpts opts name docxFile expected = unsafePerformIO $ testForWarningsWithOptsIO opts name docxFile expected -testCompareNoStrip :: String -> FilePath -> FilePath -> TestTree -testCompareNoStrip name docxFile nativeFile = unsafePerformIO $ do - (dp, np) <- compareOutput False defopts docxFile nativeFile - return $ test id name (dp, np) - -- testForWarnings :: String -> FilePath -> [String] -> TestTree -- testForWarnings = testForWarningsWithOpts defopts @@ -266,10 +257,6 @@ tests = [ testGroup "inlines" "dropcap paragraphs" "docx/drop_cap.docx" "docx/drop_cap.native" - , testCompareNoStrip - "empty paragraphs without stripping" - "docx/drop_cap.docx" - "docx/drop_cap_nostrip.native" ] , testGroup "track changes" [ testCompare diff --git a/test/command/2649.md b/test/command/2649.md index af84693c4..8f594cfe1 100644 --- a/test/command/2649.md +++ b/test/command/2649.md @@ -88,20 +88,17 @@

1

- -

Sébastien Loeb

+

Sébastien Loeb

78

2

- -

Sébastien Ogier

+

Sébastien Ogier

38

10

- -

Hannu Mikkola

+

Hannu Mikkola

18

diff --git a/test/command/3494.md b/test/command/3494.md index 534041246..249973fb3 100644 --- a/test/command/3494.md +++ b/test/command/3494.md @@ -25,8 +25,7 @@ thank you - -

blah

+

blah

blah blah diff --git a/test/command/empty_paragraphs.md b/test/command/empty_paragraphs.md new file mode 100644 index 000000000..3064d3f7d --- /dev/null +++ b/test/command/empty_paragraphs.md @@ -0,0 +1,95 @@ +``` +% pandoc -f native -t docx | pandoc -f docx -t native +[Para [Str "hi"], Para [], Para [], Para [Str "lo"]] +^D +[Para [Str "hi"] +,Para [Str "lo"]] +``` + +``` +% pandoc -f native -t docx+empty_paragraphs | pandoc -f docx -t native +[Para [Str "hi"], Para [], Para [], Para [Str "lo"]] +^D +[Para [Str "hi"] +,Para [Str "lo"]] +``` + +``` +% pandoc -f native -t docx | pandoc -f docx+empty_paragraphs -t native +[Para [Str "hi"], Para [], Para [], Para [Str "lo"]] +^D +[Para [Str "hi"] +,Para [Str "lo"]] +``` + +``` +% pandoc -f native -t docx+empty_paragraphs | pandoc -f docx+empty_paragraphs -t native +[Para [Str "hi"], Para [], Para [], Para [Str "lo"]] +^D +[Para [Str "hi"] +,Para [] +,Para [] +,Para [Str "lo"]] +``` + +``` +% pandoc -f native -t html5 +[Para [Str "hi"], Para [], Para [], Para [Str "lo"]] +^D +

hi

+ + +

lo

+``` + +``` +% pandoc -f native -t html5+empty_paragraphs +[Para [Str "hi"], Para [], Para [], Para [Str "lo"]] +^D +

hi

+

+

+

lo

+``` + +``` +% pandoc -f html+empty_paragraphs -t native +

hi

+

+

+

lo

+^D +[Para [Str "hi"] +,Para [] +,Para [] +,Para [Str "lo"]] +``` + +``` +% pandoc -f html -t native +

hi

+

+

+

lo

+^D +[Para [Str "hi"] +,Para [Str "lo"]] +``` + +``` +% pandoc -f native -t opendocument+empty_paragraphs +[Para [Str "hi"], Para [], Para [], Para [Str "lo"]] +^D +hi + + +lo +``` + +``` +% pandoc -f native -t opendocument +[Para [Str "hi"], Para [], Para [], Para [Str "lo"]] +^D +hi +lo +``` diff --git a/test/docx/0_level_headers.native b/test/docx/0_level_headers.native index 973f4cfe5..804ad8732 100644 --- a/test/docx/0_level_headers.native +++ b/test/docx/0_level_headers.native @@ -1,15 +1,15 @@ [Table [] [AlignDefault] [0.0] [[]] - [[[Plain []]] + [[[]] ,[[Plain [Str "User\8217s",Space,Str "Guide"]]] - ,[[Plain []]] - ,[[Plain []]] - ,[[Plain []]] + ,[[]] + ,[[]] + ,[[]] ,[[Plain [Str "11",Space,Str "August",Space,Str "2017"]]] - ,[[Plain []]] - ,[[Plain []]] - ,[[Plain []]] - ,[[Plain []]]] + ,[[]] + ,[[]] + ,[[]] + ,[[]]] ,Para [Str "CONTENTS"] ,Para [Strong [Str "Section",Space,Str "Page"]] ,Para [Str "FIGURES",Space,Str "iv"] diff --git a/test/docx/comments.native b/test/docx/comments.native index 8587b54dd..3357bc257 100644 --- a/test/docx/comments.native +++ b/test/docx/comments.native @@ -1,4 +1,4 @@ [Para [Str "I",Space,Str "want",Space,Span ("",["comment-start"],[("id","0"),("author","Jesse Rosenthal"),("date","2016-05-09T16:13:00Z")]) [Str "I",Space,Str "left",Space,Str "a",Space,Str "comment."],Str "some",Space,Str "text",Space,Str "to",Space,Str "have",Space,Str "a",Space,Str "comment",Space,Span ("",["comment-end"],[("id","0")]) [],Str "on",Space,Str "it."] ,Para [Str "This",Space,Str "is",Space,Span ("",["comment-start"],[("id","1"),("author","Jesse Rosenthal"),("date","2016-05-09T16:13:00Z")]) [Str "A",Space,Str "comment",Space,Str "across",Space,Str "paragraphs."],Str "a",Space,Str "new",Space,Str "paragraph."] ,Para [Str "And",Space,Str "so",Span ("",["comment-end"],[("id","1")]) [],Space,Str "is",Space,Str "this."] -,Para [Str "One",Space,Span ("",["comment-start"],[("id","2"),("author","Jesse Rosenthal"),("date","2016-05-09T16:14:00Z")]) [Str "This",Space,Str "one",Space,Str "has",Space,Str "multiple",Space,Str "paragraphs.",Space,Str "\182",Space,Str "\182",Space,Str "See?"],Str "more",Span ("",["comment-end"],[("id","2")]) [],Str ".",Space,Str "And",Space,Str "this",Space,Str "is",Space,Str "one",Space,Str "with",Space,Str "a",Space,Span ("",["comment-start"],[("id","3"),("author","Jesse Rosenthal"),("date","2016-06-22T14:35:00Z")]) [Str "Do",Space,Str "something."],Span ("",["comment-start"],[("id","4"),("author","Jesse Rosenthal"),("date","2016-06-22T14:36:00Z")]) [Str "Do",Space,Str "something",Space,Str "else."],Str "comment",Space,Str "in",Space,Str "a",Space,Str "comment",Span ("",["comment-end"],[("id","3")]) [Span ("",["comment-end"],[("id","4")]) []],Str "."]] +,Para [Str "One",Space,Span ("",["comment-start"],[("id","2"),("author","Jesse Rosenthal"),("date","2016-05-09T16:14:00Z")]) [Str "This",Space,Str "one",Space,Str "has",Space,Str "multiple",Space,Str "paragraphs.",Space,Str "\182",Space,Str "See?"],Str "more",Span ("",["comment-end"],[("id","2")]) [],Str ".",Space,Str "And",Space,Str "this",Space,Str "is",Space,Str "one",Space,Str "with",Space,Str "a",Space,Span ("",["comment-start"],[("id","3"),("author","Jesse Rosenthal"),("date","2016-06-22T14:35:00Z")]) [Str "Do",Space,Str "something."],Span ("",["comment-start"],[("id","4"),("author","Jesse Rosenthal"),("date","2016-06-22T14:36:00Z")]) [Str "Do",Space,Str "something",Space,Str "else."],Str "comment",Space,Str "in",Space,Str "a",Space,Str "comment",Span ("",["comment-end"],[("id","3")]) [Span ("",["comment-end"],[("id","4")]) []],Str "."]] diff --git a/test/docx/drop_cap_nostrip.native b/test/docx/drop_cap_nostrip.native deleted file mode 100644 index ea6a244b5..000000000 --- a/test/docx/drop_cap_nostrip.native +++ /dev/null @@ -1,9 +0,0 @@ -[Para [Str "Drop",Space,Str "cap."] -,Para [] -,Para [Str "Next",Space,Str "paragraph."] -,Para [] -,Para [Str "Drop",Space,Str "cap",Space,Str "in",Space,Str "margin."] -,Para [] -,Para [] -,Para [] -,Para [Str "Drop",Space,Str "cap",Space,Str "(not",Space,Str "really)."]]