Org reader: fix handling of empty table cells, rows

This fixes Org mode parsing of some corner cases regarding empty cells
and rows.  Empty cells weren't parsed correctly, e.g. `|||` should be
two empty cells, but would be parsed as a single cell containing a pipe
character.  Empty rows where parsed as alignment rows and dropped from
the output.

This fixes #2616.
This commit is contained in:
Albert Krewinkel 2016-05-04 15:33:18 +02:00
parent a51e4e8215
commit 2d825603c6
2 changed files with 29 additions and 14 deletions

View file

@ -35,6 +35,7 @@ import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..),
trimInlines )
import Text.Pandoc.Definition
import Text.Pandoc.Compat.Monoid ((<>))
import Text.Pandoc.Error
import Text.Pandoc.Options
import qualified Text.Pandoc.Parsing as P
import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
@ -57,8 +58,6 @@ import qualified Data.Set as Set
import Data.Maybe (fromMaybe, isJust)
import Network.HTTP (urlEncode)
import Text.Pandoc.Error
-- | Parse org-mode string and return a Pandoc document.
readOrg :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
@ -807,18 +806,19 @@ tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)
tableContentRow :: OrgParser OrgTableRow
tableContentRow = try $
OrgContentRow . sequence <$> (tableStart *> manyTill tableContentCell newline)
OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline)
tableContentCell :: OrgParser (F Blocks)
tableContentCell = try $
fmap B.plain . trimInlinesF . mconcat <$> many1Till inline endOfCell
endOfCell :: OrgParser Char
endOfCell = try $ char '|' <|> lookAhead newline
fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell
tableAlignRow :: OrgParser OrgTableRow
tableAlignRow = try $
OrgAlignRow <$> (tableStart *> manyTill tableAlignCell newline)
tableAlignRow = try $ do
tableStart
cells <- many1Till tableAlignCell newline
-- Empty rows are regular (i.e. content) rows, not alignment rows.
guard $ any (/= AlignDefault) cells
return $ OrgAlignRow cells
tableAlignCell :: OrgParser Alignment
tableAlignCell =
@ -833,15 +833,19 @@ tableAlignCell =
where emptyCell = try $ skipSpaces *> endOfCell
tableAlignFromChar :: OrgParser Alignment
tableAlignFromChar = try $ choice [ char 'l' *> return AlignLeft
, char 'c' *> return AlignCenter
, char 'r' *> return AlignRight
]
tableAlignFromChar = try $
choice [ char 'l' *> return AlignLeft
, char 'c' *> return AlignCenter
, char 'r' *> return AlignRight
]
tableHline :: OrgParser OrgTableRow
tableHline = try $
OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
endOfCell :: OrgParser Char
endOfCell = try $ char '|' <|> lookAhead newline
rowsToTable :: [OrgTableRow]
-> F OrgTable
rowsToTable = foldM rowToContent emptyTable

View file

@ -941,7 +941,7 @@ tests =
, "Empty table" =:
"||" =?>
simpleTable' 1 mempty mempty
simpleTable' 1 mempty [[mempty]]
, "Glider Table" =:
unlines [ "| 1 | 0 | 0 |"
@ -996,6 +996,17 @@ tests =
, [ plain "dynamic", plain "Lisp" ]
]
, "Table with empty cells" =:
"|||c|" =?>
simpleTable' 3 mempty [[mempty, mempty, plain "c"]]
, "Table with empty rows" =:
unlines [ "| first |"
, "| |"
, "| third |"
] =?>
simpleTable' 1 mempty [[plain "first"], [mempty], [plain "third"]]
, "Table with alignment row" =:
unlines [ "| Numbers | Text | More |"
, "| <c> | <r> | |"