consolidate simple-table detection (#5524)

add `onlySimpleTableCells` to `Text.Pandoc.Shared`

[API change]

This fixes an inconsistency in the HTML reader, which did not treat tables with `<p>` inside cells as simple.
This commit is contained in:
Mauro Bieg 2019-05-27 19:53:19 +02:00 committed by John MacFarlane
parent f807f5b383
commit d07ed83d70
6 changed files with 24 additions and 42 deletions

View file

@ -61,7 +61,7 @@ import Text.Pandoc.Options (
extensionEnabled)
import Text.Pandoc.Parsing hiding ((<|>))
import Text.Pandoc.Shared (addMetaField, blocksToInlines', crFilter, escapeURI,
extractSpaces, safeRead, underlineSpan)
extractSpaces, onlySimpleTableCells, safeRead, underlineSpan)
import Text.Pandoc.Walk
import Text.Parsec.Error
import Text.TeXMath (readMathML, writeTeX)
@ -488,14 +488,9 @@ pTable = try $ do
TagClose _ <- pSatisfy (matchTagClose "table")
let rows'' = concat rowsLs <> topfoot <> bottomfoot
let rows''' = map (map snd) rows''
-- let rows''' = map (map snd) rows''
-- fail on empty table
guard $ not $ null head' && null rows'''
let isSinglePlain x = case B.toList x of
[] -> True
[Plain _] -> True
_ -> False
let isSimple = all isSinglePlain $ concat (head':rows''')
let isSimple = onlySimpleTableCells $ fmap B.toList <$> head':rows'''
let cols = if null head'
then maximum (map length rows''')
else length head'

View file

@ -59,6 +59,7 @@ module Text.Pandoc.Shared (
isHeaderBlock,
headerShift,
stripEmptyParagraphs,
onlySimpleTableCells,
isTightList,
taskListItemFromAscii,
taskListItemToAscii,
@ -108,6 +109,7 @@ import Data.List (find, intercalate, intersperse, stripPrefix, sortBy)
import Data.Ord (comparing)
import qualified Data.Map as M
import Data.Maybe (mapMaybe)
import Data.Monoid (Any (..))
import Data.Sequence (ViewL (..), ViewR (..), viewl, viewr)
import qualified Data.Set as Set
import qualified Data.Text as T
@ -571,6 +573,19 @@ stripEmptyParagraphs = walk go
isEmptyParagraph (Para []) = True
isEmptyParagraph _ = False
-- | Detect if table rows contain only cells consisting of a single
-- paragraph that has no @LineBreak@.
onlySimpleTableCells :: [[TableCell]] -> Bool
onlySimpleTableCells = all isSimpleCell . concat
where
isSimpleCell [Plain ils] = not (hasLineBreak ils)
isSimpleCell [Para ils ] = not (hasLineBreak ils)
isSimpleCell [] = True
isSimpleCell _ = False
hasLineBreak = getAny . query isLineBreak
isLineBreak LineBreak = Any True
isLineBreak _ = Any False
-- | Detect if a list is tight.
isTightList :: [[Block]] -> Bool
isTightList = all firstIsPlain

View file

@ -21,17 +21,16 @@ import Control.Monad.State.Strict (State, get, modify, runState)
import Data.Char (isAscii)
import Data.Foldable (foldrM)
import Data.List (transpose)
import Data.Monoid (Any (..))
import Data.Text (Text)
import qualified Data.Text as T
import Network.HTTP (urlEncode)
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Shared (isTightList, taskListItemToAscii, linesToPara,
substitute, capitalize, isHeaderBlock)
import Text.Pandoc.Shared (capitalize, isHeaderBlock, isTightList,
linesToPara, onlySimpleTableCells, substitute, taskListItemToAscii)
import Text.Pandoc.Templates (renderTemplate')
import Text.Pandoc.Walk (query, walk, walkM)
import Text.Pandoc.Walk (walk, walkM)
import Text.Pandoc.Writers.HTML (writeHtml5String, tagWithAttributes)
import Text.Pandoc.Writers.Shared
import Text.Pandoc.XML (toHtml5Entities)
@ -159,16 +158,7 @@ blockToNodes opts (DefinitionList items) ns =
dlToBullet (term, xs) =
Para term : concat xs
blockToNodes opts t@(Table capt aligns _widths headers rows) ns = do
let allcells = concat (headers:rows)
let isLineBreak LineBreak = Any True
isLineBreak _ = Any False
let isPlainOrPara [Para _] = True
isPlainOrPara [Plain _] = True
isPlainOrPara [] = True
isPlainOrPara _ = False
let isSimple = all isPlainOrPara allcells &&
not ( getAny (query isLineBreak allcells) )
if isEnabled Ext_pipe_tables opts && isSimple
if isEnabled Ext_pipe_tables opts && onlySimpleTableCells (headers:rows)
then do
-- We construct a table manually as a CUSTOM_BLOCK, for
-- two reasons: (1) cmark-gfm currently doesn't support

View file

@ -27,7 +27,6 @@ import Data.List (find, group, intersperse, sortBy, stripPrefix, transpose,
isPrefixOf)
import qualified Data.Map as M
import Data.Maybe (fromMaybe)
import Data.Monoid (Any (..))
import Data.Ord (comparing)
import qualified Data.Set as Set
import qualified Data.Scientific as Scientific
@ -574,14 +573,7 @@ blockToMarkdown' opts t@(Table caption aligns widths headers rows) = do
let caption'' = if null caption || not (isEnabled Ext_table_captions opts)
then blankline
else blankline $$ (": " <> caption') $$ blankline
let isLineBreak LineBreak = Any True
isLineBreak _ = Any False
let hasLineBreak = getAny . query isLineBreak
let isSimpleCell [Plain ils] = not (hasLineBreak ils)
isSimpleCell [Para ils ] = not (hasLineBreak ils)
isSimpleCell [] = True
isSimpleCell _ = False
let hasSimpleCells = all isSimpleCell (concat (headers:rows))
let hasSimpleCells = onlySimpleTableCells $ headers:rows
let isSimple = hasSimpleCells && all (==0) widths
let isPlainBlock (Plain _) = True
isPlainBlock _ = False

View file

@ -31,7 +31,6 @@ import Control.Monad.State.Strict
import Data.Char (isAlphaNum, isAsciiLower, isAsciiUpper, isDigit, isSpace)
import Data.Default
import Data.List (intersperse, isInfixOf, transpose)
import Data.Monoid (Any (..))
import qualified Data.Set as Set
import Data.Text (Text)
import System.FilePath (takeExtension)
@ -44,7 +43,6 @@ import Text.Pandoc.Shared
import Text.Pandoc.Templates (renderTemplate')
import Text.Pandoc.Writers.Math
import Text.Pandoc.Writers.Shared
import Text.Pandoc.Walk
type Notes = [[Block]]
@ -269,15 +267,7 @@ blockToMuse (Table caption aligns widths headers rows) =
blocksToDoc opts blocks =
local (\env -> env { envOptions = opts }) $ blockListToMuse blocks
numcols = maximum (length aligns : length widths : map length (headers:rows))
hasSimpleCells = all isSimpleCell (concat (headers:rows))
isLineBreak LineBreak = Any True
isLineBreak _ = Any False
hasLineBreak = getAny . query isLineBreak
isSimple = hasSimpleCells && all (== 0) widths
isSimpleCell [Plain ils] = not (hasLineBreak ils)
isSimpleCell [Para ils ] = not (hasLineBreak ils)
isSimpleCell [] = True
isSimpleCell _ = False
isSimple = onlySimpleTableCells (headers:rows) && all (== 0) widths
blockToMuse (Div _ bs) = flatBlockListToMuse bs
blockToMuse Null = return empty

View file

@ -415,7 +415,7 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl
,[Plain [Str "5"]]
,[Plain [Str "6"]]]]
,HorizontalRule
,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.3333333333333333,0.3333333333333333,0.3333333333333333]
,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
[[Plain [Str "X"]]
,[Plain [Str "Y"]]
,[Plain [Str "Z"]]]