From d07ed83d705df491bba7b295bd5e80629d971685 Mon Sep 17 00:00:00 2001
From: Mauro Bieg <mb21@users.noreply.github.com>
Date: Mon, 27 May 2019 19:53:19 +0200
Subject: [PATCH] consolidate simple-table detection (#5524)

add `onlySimpleTableCells` to `Text.Pandoc.Shared`

[API change]

This fixes an inconsistency in the HTML reader, which did not treat tables with `<p>` inside cells as simple.
---
 src/Text/Pandoc/Readers/HTML.hs       |  9 ++-------
 src/Text/Pandoc/Shared.hs             | 15 +++++++++++++++
 src/Text/Pandoc/Writers/CommonMark.hs | 18 ++++--------------
 src/Text/Pandoc/Writers/Markdown.hs   | 10 +---------
 src/Text/Pandoc/Writers/Muse.hs       | 12 +-----------
 test/html-reader.native               |  2 +-
 6 files changed, 24 insertions(+), 42 deletions(-)

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index c1478ebc4..474bda3de 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -61,7 +61,7 @@ import Text.Pandoc.Options (
     extensionEnabled)
 import Text.Pandoc.Parsing hiding ((<|>))
 import Text.Pandoc.Shared (addMetaField, blocksToInlines', crFilter, escapeURI,
-                           extractSpaces, safeRead, underlineSpan)
+    extractSpaces, onlySimpleTableCells, safeRead, underlineSpan)
 import Text.Pandoc.Walk
 import Text.Parsec.Error
 import Text.TeXMath (readMathML, writeTeX)
@@ -488,14 +488,9 @@ pTable = try $ do
   TagClose _ <- pSatisfy (matchTagClose "table")
   let rows'' = concat rowsLs <> topfoot <> bottomfoot
   let rows''' = map (map snd) rows''
-  -- let rows''' = map (map snd) rows''
   -- fail on empty table
   guard $ not $ null head' && null rows'''
-  let isSinglePlain x = case B.toList x of
-                             []        -> True
-                             [Plain _] -> True
-                             _         -> False
-  let isSimple = all isSinglePlain $ concat (head':rows''')
+  let isSimple = onlySimpleTableCells $ fmap B.toList <$> head':rows'''
   let cols = if null head'
                 then maximum (map length rows''')
                 else length head'
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 8aaa362cd..2fb9cbac3 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -59,6 +59,7 @@ module Text.Pandoc.Shared (
                      isHeaderBlock,
                      headerShift,
                      stripEmptyParagraphs,
+                     onlySimpleTableCells,
                      isTightList,
                      taskListItemFromAscii,
                      taskListItemToAscii,
@@ -108,6 +109,7 @@ import Data.List (find, intercalate, intersperse, stripPrefix, sortBy)
 import Data.Ord (comparing)
 import qualified Data.Map as M
 import Data.Maybe (mapMaybe)
+import Data.Monoid (Any (..))
 import Data.Sequence (ViewL (..), ViewR (..), viewl, viewr)
 import qualified Data.Set as Set
 import qualified Data.Text as T
@@ -571,6 +573,19 @@ stripEmptyParagraphs = walk go
         isEmptyParagraph (Para []) = True
         isEmptyParagraph _         = False
 
+-- | Detect if table rows contain only cells consisting of a single
+-- paragraph that has no @LineBreak@.
+onlySimpleTableCells :: [[TableCell]] -> Bool
+onlySimpleTableCells = all isSimpleCell . concat
+  where
+    isSimpleCell [Plain ils] = not (hasLineBreak ils)
+    isSimpleCell [Para ils ] = not (hasLineBreak ils)
+    isSimpleCell []          = True
+    isSimpleCell _           = False
+    hasLineBreak = getAny . query isLineBreak
+    isLineBreak LineBreak = Any True
+    isLineBreak _         = Any False
+
 -- | Detect if a list is tight.
 isTightList :: [[Block]] -> Bool
 isTightList = all firstIsPlain
diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs
index e7d215707..6a763913a 100644
--- a/src/Text/Pandoc/Writers/CommonMark.hs
+++ b/src/Text/Pandoc/Writers/CommonMark.hs
@@ -21,17 +21,16 @@ import Control.Monad.State.Strict (State, get, modify, runState)
 import Data.Char (isAscii)
 import Data.Foldable (foldrM)
 import Data.List (transpose)
-import Data.Monoid (Any (..))
 import Data.Text (Text)
 import qualified Data.Text as T
 import Network.HTTP (urlEncode)
 import Text.Pandoc.Class (PandocMonad)
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
-import Text.Pandoc.Shared (isTightList, taskListItemToAscii, linesToPara,
-                           substitute, capitalize, isHeaderBlock)
+import Text.Pandoc.Shared (capitalize, isHeaderBlock, isTightList,
+    linesToPara, onlySimpleTableCells, substitute, taskListItemToAscii)
 import Text.Pandoc.Templates (renderTemplate')
-import Text.Pandoc.Walk (query, walk, walkM)
+import Text.Pandoc.Walk (walk, walkM)
 import Text.Pandoc.Writers.HTML (writeHtml5String, tagWithAttributes)
 import Text.Pandoc.Writers.Shared
 import Text.Pandoc.XML (toHtml5Entities)
@@ -159,16 +158,7 @@ blockToNodes opts (DefinitionList items) ns =
         dlToBullet (term, xs) =
           Para term : concat xs
 blockToNodes opts t@(Table capt aligns _widths headers rows) ns = do
-  let allcells = concat (headers:rows)
-  let isLineBreak LineBreak = Any True
-      isLineBreak _         = Any False
-  let isPlainOrPara [Para _]  = True
-      isPlainOrPara [Plain _] = True
-      isPlainOrPara []        = True
-      isPlainOrPara _         = False
-  let isSimple = all isPlainOrPara allcells &&
-                 not ( getAny (query isLineBreak allcells) )
-  if isEnabled Ext_pipe_tables opts && isSimple
+  if isEnabled Ext_pipe_tables opts && onlySimpleTableCells (headers:rows)
      then do
        -- We construct a table manually as a CUSTOM_BLOCK, for
        -- two reasons:  (1) cmark-gfm currently doesn't support
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index b542d4193..85d9e85e1 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -27,7 +27,6 @@ import Data.List (find, group, intersperse, sortBy, stripPrefix, transpose,
                   isPrefixOf)
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe)
-import Data.Monoid (Any (..))
 import Data.Ord (comparing)
 import qualified Data.Set as Set
 import qualified Data.Scientific as Scientific
@@ -574,14 +573,7 @@ blockToMarkdown' opts t@(Table caption aligns widths headers rows) =  do
   let caption'' = if null caption || not (isEnabled Ext_table_captions opts)
                      then blankline
                      else blankline $$ (": " <> caption') $$ blankline
-  let isLineBreak LineBreak = Any True
-      isLineBreak _         = Any False
-  let hasLineBreak = getAny . query isLineBreak
-  let isSimpleCell [Plain ils] = not (hasLineBreak ils)
-      isSimpleCell [Para ils ] = not (hasLineBreak ils)
-      isSimpleCell []          = True
-      isSimpleCell _           = False
-  let hasSimpleCells = all isSimpleCell (concat (headers:rows))
+  let hasSimpleCells = onlySimpleTableCells $ headers:rows
   let isSimple = hasSimpleCells && all (==0) widths
   let isPlainBlock (Plain _) = True
       isPlainBlock _         = False
diff --git a/src/Text/Pandoc/Writers/Muse.hs b/src/Text/Pandoc/Writers/Muse.hs
index c71708e23..c03fd0c1a 100644
--- a/src/Text/Pandoc/Writers/Muse.hs
+++ b/src/Text/Pandoc/Writers/Muse.hs
@@ -31,7 +31,6 @@ import Control.Monad.State.Strict
 import Data.Char (isAlphaNum, isAsciiLower, isAsciiUpper, isDigit, isSpace)
 import Data.Default
 import Data.List (intersperse, isInfixOf, transpose)
-import Data.Monoid (Any (..))
 import qualified Data.Set as Set
 import Data.Text (Text)
 import System.FilePath (takeExtension)
@@ -44,7 +43,6 @@ import Text.Pandoc.Shared
 import Text.Pandoc.Templates (renderTemplate')
 import Text.Pandoc.Writers.Math
 import Text.Pandoc.Writers.Shared
-import Text.Pandoc.Walk
 
 type Notes = [[Block]]
 
@@ -269,15 +267,7 @@ blockToMuse (Table caption aligns widths headers rows) =
     blocksToDoc opts blocks =
       local (\env -> env { envOptions = opts }) $ blockListToMuse blocks
     numcols = maximum (length aligns : length widths : map length (headers:rows))
-    hasSimpleCells = all isSimpleCell (concat (headers:rows))
-    isLineBreak LineBreak = Any True
-    isLineBreak _         = Any False
-    hasLineBreak = getAny . query isLineBreak
-    isSimple = hasSimpleCells && all (== 0) widths
-    isSimpleCell [Plain ils] = not (hasLineBreak ils)
-    isSimpleCell [Para ils ] = not (hasLineBreak ils)
-    isSimpleCell []          = True
-    isSimpleCell _           = False
+    isSimple = onlySimpleTableCells (headers:rows) && all (== 0) widths
 blockToMuse (Div _ bs) = flatBlockListToMuse bs
 blockToMuse Null = return empty
 
diff --git a/test/html-reader.native b/test/html-reader.native
index 4203aa3a8..acd0087ef 100644
--- a/test/html-reader.native
+++ b/test/html-reader.native
@@ -415,7 +415,7 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl
   ,[Plain [Str "5"]]
   ,[Plain [Str "6"]]]]
 ,HorizontalRule
-,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.3333333333333333,0.3333333333333333,0.3333333333333333]
+,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
  [[Plain [Str "X"]]
  ,[Plain [Str "Y"]]
  ,[Plain [Str "Z"]]]