HTML reader: treat commments as blank when parsing.

This modifies pBlank.  Previously comments could sometimes
flummox the parser.

Cloes #7482.
This commit is contained in:
John MacFarlane 2021-08-10 12:50:23 -07:00
parent 7ca4233793
commit 3a924d8f96
2 changed files with 54 additions and 5 deletions

View file

@ -30,11 +30,11 @@ module Text.Pandoc.Readers.HTML.Parsing
)
where
import Control.Monad (guard, void, mzero)
import Control.Monad (void, mzero)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import Text.HTML.TagSoup
( Attribute, Tag (..), isTagText, isTagPosition, isTagOpen, isTagClose, (~==) )
( Attribute, Tag (..), isTagPosition, isTagOpen, isTagClose, (~==) )
import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
import Text.Pandoc.Definition (Attr)
import Text.Pandoc.Parsing
@ -118,9 +118,11 @@ pCloses tagtype = try $ do
_ -> mzero
pBlank :: PandocMonad m => TagParser m ()
pBlank = try $ do
(TagText str) <- pSatisfy isTagText
guard $ T.all isSpace str
pBlank = void $ pSatisfy isBlank
where
isBlank (TagText t) = T.all isSpace t
isBlank (TagComment _) = True
isBlank _ = False
pLocation :: PandocMonad m => TagParser m ()
pLocation = do

47
test/command/7482.md Normal file
View file

@ -0,0 +1,47 @@
```
% pandoc -f html -t org
<table border=0 cellpadding=0 cellspacing=0 width=206 style='border-collapse:
collapse;width:154pt'>
<!--StartFragment-->
<col width=99 style='mso-width-source:userset;mso-width-alt:3157;width:74pt'>
<col width=107 style='mso-width-source:userset;mso-width-alt:3413;width:80pt'>
<tr height=23 style='height:17.0pt'>
<td height=23 align=left width=99 style='height:17.0pt;width:74pt'>Last N
credits</td>
<td align=left width=107 style='width:80pt'>Average grade</td>
</tr>
<tr height=21 style='height:16.0pt'>
<td height=21 align=right style='height:16.0pt'>140</td>
<td align=right>17.06571429</td>
</tr>
<tr height=21 style='height:16.0pt'>
<td height=21 class=xl63 align=right style='height:16.0pt'>84</td>
<td class=xl63 align=right>17.95595238</td>
</tr>
<tr height=21 style='height:16.0pt'>
<td height=21 class=xl63 align=right style='height:16.0pt'>64</td>
<td class=xl63 align=right>18.9734375</td>
</tr>
<tr height=21 style='height:16.0pt'>
<td height=21 align=right style='height:16.0pt'>36</td>
<td align=right>19.12777778</td>
</tr>
<tr height=27 style='mso-height-source:userset;height:20.0pt'>
<td height=27 align=right style='height:20.0pt'>29</td>
<td align=right>19.18275862</td>
</tr>
<tr height=21 style='height:16.0pt'>
<td height=21 align=right style='height:16.0pt'>19</td>
<td align=right>19.00526316</td>
</tr>
<!--EndFragment-->
</table>
^D
| Last N credits | Average grade |
| 140 | 17.06571429 |
| 84 | 17.95595238 |
| 64 | 18.9734375 |
| 36 | 19.12777778 |
| 29 | 19.18275862 |
| 19 | 19.00526316 |
```