Merge pull request #1885 from mb21/html-reader-tables

fixes HTML Reader: tables
This commit is contained in:
John MacFarlane 2015-01-25 10:46:47 -08:00
commit 33d1c8cc01
3 changed files with 397 additions and 12 deletions

View file

@ -374,12 +374,20 @@ pTable = try $ do
caption <- option mempty $ pInTags "caption" inline <* skipMany pBlank
-- TODO actually read these and take width information from them
widths' <- pColgroup <|> many pCol
head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th")
skipMany pBlank
rows <- pOptInTag "tbody"
$ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td")
skipMany pBlank
let pTh = option [] $ pInTags "tr" (pCell "th")
pTr = try $ skipMany pBlank >> pInTags "tr" (pCell "td" <|> pCell "th")
pTBody = do pOptInTag "tbody" $ many1 pTr
head'' <- pOptInTag "thead" pTh
head' <- pOptInTag "tbody" $ do
if null head''
then pTh
else return head''
rowsLs <- many pTBody
rows' <- pOptInTag "tfoot" $ many pTr
TagClose _ <- pSatisfy (~== TagClose "table")
let rows = (concat rowsLs) ++ rows'
-- fail on empty table
guard $ not $ null head' && null rows
let isSinglePlain x = case B.toList x of
[Plain _] -> True
_ -> False
@ -624,14 +632,17 @@ pInTags tagtype parser = try $ do
pSatisfy (~== TagOpen tagtype [])
mconcat <$> manyTill parser (pCloses tagtype <|> eof)
pOptInTag :: String -> TagParser a
-> TagParser a
pOptInTag tagtype parser = try $ do
open <- option False (pSatisfy (~== TagOpen tagtype []) >> return True)
-- parses p, preceeded by an optional opening tag
-- and followed by an optional closing tags
pOptInTag :: String -> TagParser a -> TagParser a
pOptInTag tagtype p = try $ do
skipMany pBlank
x <- parser
optional $ pSatisfy (~== TagOpen tagtype [])
skipMany pBlank
x <- p
skipMany pBlank
optional $ pSatisfy (~== TagClose tagtype)
skipMany pBlank
when open $ pCloses tagtype
return x
pCloses :: String -> TagParser ()

View file

@ -433,6 +433,7 @@ An e-mail address: nobody [at] nowhere.net<blockquote>
<p>text<em> Leading spaces</em></p>
<p><em>Trailing spaces </em>text</p>
<h1>Tables</h1>
<h2>Tables with Headers</h2>
<table>
<tr>
<th>X</th>
@ -450,5 +451,251 @@ An e-mail address: nobody [at] nowhere.net<blockquote>
<td>6</td>
</tr>
</table>
<hr />
<table>
<thead>
<tr>
<th>X</th>
<th>Y</th>
<th>Z</th>
</tr>
</thead>
<tbody>
<tr>
<td>1</td>
<td>2</td>
<td>3</td>
</tr>
<tr>
<td>4</td>
<td>5</td>
<td>6</td>
</tr>
</tbody>
</table>
<hr />
<table>
<thead>
<tr>
<th>X</th>
<th>Y</th>
<th>Z</th>
</tr>
</thead>
<tbody>
<tr>
<th>1</th>
<td>2</td>
<td>3</td>
</tr>
<tr>
<th>4</th>
<td>5</td>
<td>6</td>
</tr>
</tbody>
</table>
<hr />
<table>
<thead>
<tr>
<th>X</th>
<th>Y</th>
<th>Z</th>
</tr>
</thead>
<tbody>
<tr>
<th>1</th>
<td>2</td>
<td>3</td>
</tr>
</tbody>
<tfoot>
<tr>
<th>4</th>
<td>5</td>
<td>6</td>
</tr>
</tfoot>
</table>
<hr />
<table>
<tr>
<th>X</th>
<th>Y</th>
<th>Z</th>
</tr>
<tr>
<th>1</th>
<th>2</th>
<th>3</th>
</tr>
<tr>
<td>4</td>
<td>5</td>
<td>6</td>
</tr>
</table>
<hr />
<table>
<tbody>
<tr>
<th>X</th>
<th>Y</th>
<th>Z</th>
</tr>
<tr>
<td>1</td>
<td>2</td>
<td>3</td>
</tr>
<tr>
<td>4</td>
<td>5</td>
<td>6</td>
</tr>
</tbody>
</table>
<hr />
<table>
<thead>
</thead>
<tbody>
<tr>
<th>X</th>
<th>Y</th>
<th>Z</th>
</tr>
<tr>
<td>1</td>
<td>2</td>
<td>3</td>
</tr>
<tr>
<td>4</td>
<td>5</td>
<td>6</td>
</tr>
</tbody>
</table>
<hr />
<table>
<thead>
<tr>
<th>X</th>
<th>Y</th>
<th>Z</th>
</tr>
</thead>
<tbody>
<tr>
<td>1</td>
<td>2</td>
<td>3</td>
</tr>
</tbody>
<tbody>
<tr>
<td>4</td>
<td>5</td>
<td>6</td>
</tr>
</tbody>
</table>
<hr />
<table>
<thead>
<tr>
<th>X</th>
<th>Y</th>
<th>Z</th>
</tr>
</thead>
<tbody>
<tr>
<td>1</td>
<td><p>2</p></td>
<td>3</td>
</tr>
</tbody>
<tbody>
<tr>
<td>4</td>
<td>5</td>
<td>6</td>
</tr>
</tbody>
</table>
<h2>Tables without Headers</h2>
<table>
<tbody>
<tr>
<td>1</td>
<td>2</td>
<td>3</td>
</tr>
<tr>
<td>4</td>
<td>5</td>
<td>6</td>
</tr>
</tbody>
</table>
<hr />
<table>
<tr>
<td>1</td>
<td>2</td>
<td>3</td>
</tr>
<tr>
<td>4</td>
<td>5</td>
<td>6</td>
</tr>
</table>
<hr />
<table>
<thead>
</thead>
<tbody>
<tr>
<td>1</td>
<td>2</td>
<td>3</td>
</tr>
<tr>
<td>4</td>
<td>5</td>
<td>6</td>
</tr>
</tbody>
</table>
<hr />
<table>
<tbody>
<tr>
<td>1</td>
<td>2</td>
<td>3</td>
</tr>
</tbody>
<tfoot>
<tr>
<td>4</td>
<td>5</td>
<td>6</td>
</tr>
</tfoot>
</table>
<h2>Empty Tables</h2>
<p>This section should be empty.</p>
<table>
<tbody>
</tbody>
</table>
<table>
</table>
</body>
</html>

View file

@ -311,6 +311,7 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl
,Para [Str "text",Space,Emph [Str "Leading",Space,Str "spaces"]]
,Para [Emph [Str "Trailing",Space,Str "spaces"],Space,Str "text"]
,Header 1 ("",[],[]) [Str "Tables"]
,Header 2 ("",[],[]) [Str "Tables",Space,Str "with",Space,Str "Headers"]
,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
[[Plain [Str "X"]]
,[Plain [Str "Y"]]
@ -320,4 +321,130 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl
,[Plain [Str "3"]]]
,[[Plain [Str "4"]]
,[Plain [Str "5"]]
,[Plain [Str "6"]]]]]
,[Plain [Str "6"]]]]
,HorizontalRule
,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
[[Plain [Str "X"]]
,[Plain [Str "Y"]]
,[Plain [Str "Z"]]]
[[[Plain [Str "1"]]
,[Plain [Str "2"]]
,[Plain [Str "3"]]]
,[[Plain [Str "4"]]
,[Plain [Str "5"]]
,[Plain [Str "6"]]]]
,HorizontalRule
,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
[[Plain [Str "X"]]
,[Plain [Str "Y"]]
,[Plain [Str "Z"]]]
[[[Plain [Str "1"]]
,[Plain [Str "2"]]
,[Plain [Str "3"]]]
,[[Plain [Str "4"]]
,[Plain [Str "5"]]
,[Plain [Str "6"]]]]
,HorizontalRule
,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
[[Plain [Str "X"]]
,[Plain [Str "Y"]]
,[Plain [Str "Z"]]]
[[[Plain [Str "1"]]
,[Plain [Str "2"]]
,[Plain [Str "3"]]]
,[[Plain [Str "4"]]
,[Plain [Str "5"]]
,[Plain [Str "6"]]]]
,HorizontalRule
,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
[[Plain [Str "X"]]
,[Plain [Str "Y"]]
,[Plain [Str "Z"]]]
[[[Plain [Str "1"]]
,[Plain [Str "2"]]
,[Plain [Str "3"]]]
,[[Plain [Str "4"]]
,[Plain [Str "5"]]
,[Plain [Str "6"]]]]
,HorizontalRule
,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
[[Plain [Str "X"]]
,[Plain [Str "Y"]]
,[Plain [Str "Z"]]]
[[[Plain [Str "1"]]
,[Plain [Str "2"]]
,[Plain [Str "3"]]]
,[[Plain [Str "4"]]
,[Plain [Str "5"]]
,[Plain [Str "6"]]]]
,HorizontalRule
,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
[[Plain [Str "X"]]
,[Plain [Str "Y"]]
,[Plain [Str "Z"]]]
[[[Plain [Str "1"]]
,[Plain [Str "2"]]
,[Plain [Str "3"]]]
,[[Plain [Str "4"]]
,[Plain [Str "5"]]
,[Plain [Str "6"]]]]
,HorizontalRule
,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
[[Plain [Str "X"]]
,[Plain [Str "Y"]]
,[Plain [Str "Z"]]]
[[[Plain [Str "1"]]
,[Plain [Str "2"]]
,[Plain [Str "3"]]]
,[[Plain [Str "4"]]
,[Plain [Str "5"]]
,[Plain [Str "6"]]]]
,HorizontalRule
,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.3333333333333333,0.3333333333333333,0.3333333333333333]
[[Plain [Str "X"]]
,[Plain [Str "Y"]]
,[Plain [Str "Z"]]]
[[[Plain [Str "1"]]
,[Para [Str "2"]]
,[Plain [Str "3"]]]
,[[Plain [Str "4"]]
,[Plain [Str "5"]]
,[Plain [Str "6"]]]]
,Header 2 ("",[],[]) [Str "Tables",Space,Str "without",Space,Str "Headers"]
,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
[]
[[[Plain [Str "1"]]
,[Plain [Str "2"]]
,[Plain [Str "3"]]]
,[[Plain [Str "4"]]
,[Plain [Str "5"]]
,[Plain [Str "6"]]]]
,HorizontalRule
,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
[]
[[[Plain [Str "1"]]
,[Plain [Str "2"]]
,[Plain [Str "3"]]]
,[[Plain [Str "4"]]
,[Plain [Str "5"]]
,[Plain [Str "6"]]]]
,HorizontalRule
,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
[]
[[[Plain [Str "1"]]
,[Plain [Str "2"]]
,[Plain [Str "3"]]]
,[[Plain [Str "4"]]
,[Plain [Str "5"]]
,[Plain [Str "6"]]]]
,HorizontalRule
,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
[]
[[[Plain [Str "1"]]
,[Plain [Str "2"]]
,[Plain [Str "3"]]]
,[[Plain [Str "4"]]
,[Plain [Str "5"]]
,[Plain [Str "6"]]]]
,Header 2 ("",[],[]) [Str "Empty",Space,Str "Tables"]
,Para [Str "This",Space,Str "section",Space,Str "should",Space,Str "be",Space,Str "empty."]]