Improve detection of pipe table line widths.

Fixed calculation of maximum column widths in pipe tables.
It is now based on the length of the markdown line, rather
than a "stringified" version of the parsed line.  This should
be more predictable for users. In addition, we take into account
double-wide characters such as emojis.

Closes #7713.
This commit is contained in:
John MacFarlane 2021-11-23 10:50:35 -08:00
parent b72ba3ed6d
commit 79e6f8db13
3 changed files with 52 additions and 20 deletions

View file

@ -4055,12 +4055,12 @@ legal (though ugly) pipe table:
orange|3.09 orange|3.09
The cells of pipe tables cannot contain block elements like paragraphs The cells of pipe tables cannot contain block elements like paragraphs
and lists, and cannot span multiple lines. If a pipe table contains a and lists, and cannot span multiple lines. If any line of the
row whose Markdown content is wider than the column width (see markdown source is longer than the column width (see `--columns`),
`--columns`), then the table will take up the full text width and then the table will take up the full text width and the cell
the cell contents will wrap, with the relative cell widths determined contents will wrap, with the relative cell widths determined by
by the number of dashes in the line separating the table header from the number of dashes in the line separating the table header
the table body. (For example `---|-` would make the first column 3/4 from the table body. (For example `---|-` would make the first column 3/4
and the second column 1/4 of the full text width.) and the second column 1/4 of the full text width.)
On the other hand, if no lines are wider than column width, then On the other hand, if no lines are wider than column width, then
cell contents will not be wrapped, and the cells will be sized cell contents will not be wrapped, and the cells will be sized

View file

@ -1,4 +1,5 @@
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TupleSections #-} {-# LANGUAGE TupleSections #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns #-} {-# LANGUAGE ViewPatterns #-}
@ -21,8 +22,8 @@ module Text.Pandoc.Readers.Markdown (
import Control.Monad import Control.Monad
import Control.Monad.Except (throwError) import Control.Monad.Except (throwError)
import Data.Char (isAlphaNum, isPunctuation, isSpace) import Data.Char (isAlphaNum, isPunctuation, isSpace)
import Text.DocLayout (realLength)
import Data.List (transpose, elemIndex, sortOn, foldl') import Data.List (transpose, elemIndex, sortOn, foldl')
import Data.List.NonEmpty (NonEmpty(..))
import qualified Data.Map as M import qualified Data.Map as M
import Data.Maybe import Data.Maybe
import qualified Data.Set as Set import qualified Data.Set as Set
@ -39,6 +40,7 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..), report)
import Text.Pandoc.Definition as Pandoc import Text.Pandoc.Definition as Pandoc
import Text.Pandoc.Emoji (emojiToInline) import Text.Pandoc.Emoji (emojiToInline)
import Text.Pandoc.Error import Text.Pandoc.Error
import Safe.Foldable (maximumBounded)
import Text.Pandoc.Logging import Text.Pandoc.Logging
import Text.Pandoc.Options import Text.Pandoc.Options
import Text.Pandoc.Walk (walk) import Text.Pandoc.Walk (walk)
@ -1351,26 +1353,30 @@ pipeTable = try $ do
nonindentSpaces nonindentSpaces
lookAhead nonspaceChar lookAhead nonspaceChar
(heads,(aligns, seplengths)) <- (,) <$> pipeTableRow <*> pipeBreak (heads,(aligns, seplengths)) <- (,) <$> pipeTableRow <*> pipeBreak
let heads' = take (length aligns) <$> heads let cellContents = parseFromString' pipeTableCell . trim
let numcols = length aligns
let heads' = take numcols heads
lines' <- many pipeTableRow lines' <- many pipeTableRow
let lines'' = map (take (length aligns) <$>) lines' let lines'' = map (take numcols) lines'
let maxlength = maximum $ let lineWidths = map (sum . map realLength) (heads' : lines'')
fmap (\x -> T.length . stringify $ runF x def) (heads' :| lines'') columns <- getOption readerColumns
numColumns <- getOption readerColumns -- add numcols + 1 for the pipes themselves
let widths = if maxlength > numColumns let widths = if maximumBounded (sum seplengths : lineWidths) + (numcols + 1) > columns
then map (\len -> then map (\len ->
fromIntegral len / fromIntegral (sum seplengths)) fromIntegral len / fromIntegral (sum seplengths))
seplengths seplengths
else replicate (length aligns) 0.0 else replicate (length aligns) 0.0
return (aligns, widths, toHeaderRow <$> heads', map toRow <$> sequence lines'') (headCells :: F [Blocks]) <- sequence <$> mapM cellContents heads'
(rows :: F [[Blocks]]) <- sequence <$> mapM (fmap sequence . mapM cellContents) lines''
return (aligns, widths, toHeaderRow <$> headCells, map toRow <$> rows)
sepPipe :: PandocMonad m => MarkdownParser m () sepPipe :: PandocMonad m => MarkdownParser m ()
sepPipe = try $ do sepPipe = try $ do
char '|' <|> char '+' char '|' <|> char '+'
notFollowedBy blankline notFollowedBy blankline
-- parse a row, also returning probable alignments for org-table cells -- parse a row, returning raw cell contents
pipeTableRow :: PandocMonad m => MarkdownParser m (F [Blocks]) pipeTableRow :: PandocMonad m => MarkdownParser m [Text]
pipeTableRow = try $ do pipeTableRow = try $ do
scanForPipe scanForPipe
skipMany spaceChar skipMany spaceChar
@ -1378,13 +1384,11 @@ pipeTableRow = try $ do
-- split into cells -- split into cells
let chunk = void (code <|> math <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline') let chunk = void (code <|> math <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline')
<|> void (noneOf "|\n\r") <|> void (noneOf "|\n\r")
let cellContents = withRaw (many chunk) >>= cells <- (snd <$> withRaw (many chunk)) `sepEndBy1` char '|'
parseFromString' pipeTableCell . trim . snd
cells <- cellContents `sepEndBy1` char '|'
-- surrounding pipes needed for a one-column table: -- surrounding pipes needed for a one-column table:
guard $ not (length cells == 1 && not openPipe) guard $ not (length cells == 1 && not openPipe)
blankline blankline
return $ sequence cells return cells
pipeTableCell :: PandocMonad m => MarkdownParser m (F Blocks) pipeTableCell :: PandocMonad m => MarkdownParser m (F Blocks)
pipeTableCell = pipeTableCell =

28
test/command/7713.md Normal file
View file

@ -0,0 +1,28 @@
```
% pandoc
| aaaaaaaaaaaa | bbbbb | ccccccccccc |
| --- | --- | --- |
| | | cccccccccc cccccccccc cccccccccc cccccccccc cccccccccc cccccccccc |
^D
<table>
<colgroup>
<col style="width: 33%" />
<col style="width: 33%" />
<col style="width: 33%" />
</colgroup>
<thead>
<tr class="header">
<th>aaaaaaaaaaaa</th>
<th>bbbbb</th>
<th>ccccccccccc</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td></td>
<td></td>
<td>cccccccccc cccccccccc cccccccccc cccccccccc cccccccccc cccccccccc</td>
</tr>
</tbody>
</table>
```