HTML reader: support row or column-spanning table cells

This commit is contained in:
Albert Krewinkel 2020-11-24 13:48:43 +01:00
parent 446ef27a3f
commit c9f98e2bf5
No known key found for this signature in database
GPG key ID: 388DC0B21F631124
3 changed files with 36 additions and 38 deletions

View file

@ -476,18 +476,10 @@ pHrule = do
pTable :: PandocMonad m => TagParser m Blocks
pTable = pTable' block pCell
noColOrRowSpans :: Tag Text -> Bool
noColOrRowSpans t = isNullOrOne "colspan" && isNullOrOne "rowspan"
where isNullOrOne x = case fromAttrib x t of
"" -> True
"1" -> True
_ -> False
pCell :: PandocMonad m => Text -> TagParser m [(Alignment, Blocks)]
pCell :: PandocMonad m => Text -> TagParser m [Cell]
pCell celltype = try $ do
skipMany pBlank
tag <- lookAhead $
pSatisfy (\t -> t ~== TagOpen celltype [] && noColOrRowSpans t)
tag <- lookAhead $ pSatisfy (\t -> t ~== TagOpen celltype [])
let extractAlign' [] = ""
extractAlign' ("text-align":x:_) = x
extractAlign' (_:xs) = extractAlign' xs
@ -498,9 +490,13 @@ pCell celltype = try $ do
Just "right" -> AlignRight
Just "center" -> AlignCenter
_ -> AlignDefault
res <- pInTags' celltype noColOrRowSpans block
let rowspan = RowSpan . fromMaybe 1 $
safeRead =<< maybeFromAttrib "rowspan" tag
let colspan = ColSpan . fromMaybe 1 $
safeRead =<< maybeFromAttrib "colspan" tag
res <- pInTags celltype block
skipMany pBlank
return [(align, res)]
return [B.cell align rowspan colspan res]
pBlockQuote :: PandocMonad m => TagParser m Blocks
pBlockQuote = do

View file

@ -59,8 +59,8 @@ pColgroup = try $ do
-- | Parses a simple HTML table
pTable' :: PandocMonad m
=> TagParser m Blocks -- ^ Caption parser
-> (Text -> TagParser m [(Alignment, Blocks)]) -- ^ Table cell parser
=> TagParser m Blocks -- ^ Caption parser
-> (Text -> TagParser m [Cell]) -- ^ Table cell parser
-> TagParser m Blocks
pTable' block pCell = try $ do
TagOpen _ attribs' <- pSatisfy (matchTagOpen "table" [])
@ -73,35 +73,31 @@ pTable' block pCell = try $ do
pInTags "tr" (pCell "td" <|> pCell "th")
pTBody = pInTag True "tbody" $ many1 pTr
head'' <- pInTag False "thead" (option [] pTr) <|> pInTag True "thead" pTh
head' <- map snd <$>
pInTag True "tbody"
head' <- pInTag True "tbody"
(if null head'' then pTh else return head'')
topfoot <- option [] $ pInTag False "tfoot" $ many pTr
rowsLs <- many pTBody
bottomfoot <- option [] $ pInTag False "tfoot" $ many pTr
TagClose _ <- pSatisfy (matchTagClose "table")
let rows'' = concat rowsLs <> topfoot <> bottomfoot
let rows''' = map (map snd) rows''
let rows = concat rowsLs <> topfoot <> bottomfoot
rows''' = map (map cellContents) rows
-- fail on empty table
guard $ not $ null head' && null rows'''
let isSimple = onlySimpleTableCells $ fmap B.toList <$> head':rows'''
let isSimple = onlySimpleTableCells $
map cellContents head' : rows'''
let cols = if null head'
then maximum (map length rows''')
else length head'
-- add empty cells to short rows
let addEmpties r = case cols - length r of
n | n > 0 -> r <> replicate n mempty
| otherwise -> r
let rows = map addEmpties rows'''
let aligns = case rows'' of
(cs:_) -> take cols $ map fst cs ++ repeat AlignDefault
_ -> replicate cols AlignDefault
let aligns = case rows of
(cs:_) -> take cols $
concatMap cellAligns cs ++ repeat AlignDefault
_ -> replicate cols AlignDefault
let widths = if null widths'
then if isSimple
then replicate cols ColWidthDefault
else replicate cols (ColWidth (1.0 / fromIntegral cols))
else widths'
let toRow = Row nullAttr . map B.simpleCell
let toRow = Row nullAttr
toHeaderRow l = [toRow l | not (null l)]
return $ B.tableWith attribs
(B.simpleCaption caption)
@ -109,3 +105,9 @@ pTable' block pCell = try $ do
(TableHead nullAttr $ toHeaderRow head')
[TableBody nullAttr 0 [] $ map toRow rows]
(TableFoot nullAttr [])
cellContents :: Cell -> [Block]
cellContents (Cell _ _ _ _ bs) = bs
cellAligns :: Cell -> [Alignment]
cellAligns (Cell _ align _ (ColSpan cs) _) = replicate cs align

View file

@ -28,22 +28,22 @@
,(AlignDefault,ColWidthDefault)]
(TableHead ("",[],[])
[Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[Cell ("",[],[]) AlignRight (RowSpan 1) (ColSpan 1)
[Plain [Str "Right"]]
,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
,Cell ("",[],[]) AlignLeft (RowSpan 1) (ColSpan 1)
[Plain [Str "Left"]]
,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
,Cell ("",[],[]) AlignCenter (RowSpan 1) (ColSpan 1)
[Plain [Str "Center"]]
,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[Plain [Str "Default"]]]])
[(TableBody ("",[],[]) (RowHeadColumns 0)
[]
[Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[Cell ("",[],[]) AlignRight (RowSpan 1) (ColSpan 1)
[Plain [Str "12"]]
,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
,Cell ("",[],[]) AlignLeft (RowSpan 1) (ColSpan 1)
[Plain [Str "12"]]
,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
,Cell ("",[],[]) AlignCenter (RowSpan 1) (ColSpan 1)
[Plain [Str "12"]]
,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[Plain [Str "12"]]]])]
@ -73,13 +73,13 @@
[(TableBody ("",[],[]) (RowHeadColumns 0)
[]
[Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[Cell ("",[],[]) AlignRight (RowSpan 1) (ColSpan 1)
[Plain [Str "12"]]
,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
,Cell ("",[],[]) AlignLeft (RowSpan 1) (ColSpan 1)
[Plain [Str "12"]]
,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
,Cell ("",[],[]) AlignCenter (RowSpan 1) (ColSpan 1)
[Plain [Str "12"]]
,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
,Cell ("",[],[]) AlignRight (RowSpan 1) (ColSpan 1)
[Plain [Str "12"]]]])]
(TableFoot ("",[],[])
[])]