HTML reader: allow sublists that are not marked as items.

The HTML standard requires all list items to be marked with a `<li>`
tag, but some tools fail to do so for sublists. The reader now accepts
these unwrapped lists as sublists.

Closes: #8150
This commit is contained in:
Albert Krewinkel 2022-08-03 13:04:45 +02:00
parent b306f2e1fd
commit 0d7f80c87f
No known key found for this signature in database
GPG key ID: 388DC0B21F631124
2 changed files with 73 additions and 5 deletions

View file

@ -327,11 +327,11 @@ pBulletList = try $ do
-- note: if they have an <ol> or <ul> not in scope of a <li>, -- note: if they have an <ol> or <ul> not in scope of a <li>,
-- treat it as a list item, though it's not valid xhtml... -- treat it as a list item, though it's not valid xhtml...
skipMany nonItem skipMany nonItem
items <- manyTill (pListItem nonItem) (pCloses "ul") items <- manyTill (pListItem' nonItem) (pCloses "ul")
return $ B.bulletList $ map (fixPlains True) items return $ B.bulletList $ map (fixPlains True) items
pListItem :: PandocMonad m => TagParser m a -> TagParser m Blocks pListItem :: PandocMonad m => TagParser m Blocks
pListItem nonItem = do pListItem = do
TagOpen _ attr' <- lookAhead $ pSatisfy (matchTagOpen "li" []) TagOpen _ attr' <- lookAhead $ pSatisfy (matchTagOpen "li" [])
let attr = toStringAttr attr' let attr = toStringAttr attr'
let addId ident bs = case B.toList bs of let addId ident bs = case B.toList bs of
@ -339,7 +339,13 @@ pListItem nonItem = do
[Span (ident, [], []) ils] : xs) [Span (ident, [], []) ils] : xs)
_ -> B.divWith (ident, [], []) bs _ -> B.divWith (ident, [], []) bs
maybe id addId (lookup "id" attr) <$> maybe id addId (lookup "id" attr) <$>
pInTags "li" block <* skipMany nonItem pInTags "li" block
-- | Parses a list item just like 'pListItem', but allows sublists outside of
-- @li@ tags to be treated as items.
pListItem' :: PandocMonad m => TagParser m a -> TagParser m Blocks
pListItem' nonItem = (pListItem <|> pBulletList <|> pOrderedList)
<* skipMany nonItem
parseListStyleType :: Text -> ListNumberStyle parseListStyleType :: Text -> ListNumberStyle
parseListStyleType "lower-roman" = LowerRoman parseListStyleType "lower-roman" = LowerRoman
@ -381,7 +387,7 @@ pOrderedList = try $ do
_ <- manyTill (eFootnote <|> pBlank) (pCloses "ol") _ <- manyTill (eFootnote <|> pBlank) (pCloses "ol")
return mempty return mempty
else do else do
items <- manyTill (pListItem nonItem) (pCloses "ol") items <- manyTill (pListItem' nonItem) (pCloses "ol")
return $ B.orderedListWith (start, style, DefaultDelim) $ return $ B.orderedListWith (start, style, DefaultDelim) $
map (fixPlains True) items map (fixPlains True) items

62
test/command/8150.md Normal file
View file

@ -0,0 +1,62 @@
Nested bullet lists
```
% pandoc -f html -t markdown
<ul>
<li>L1</li>
<li>L2</li>
<ul>
<li>L3.1</li>
<li>L3.2</li>
</ul>
<li>L4</li>
</ul>
^D
- L1
- L2
- - L3.1
- L3.2
- L4
```
Nested ordered lists
```
% pandoc -f html -t markdown
<ol>
<li>L1</li>
<li>L2</li>
<ol>
<li>L3.1</li>
<li>L3.2</li>
</ol>
</ol>
^D
1. L1
2. L2
3. 1. L3.1
2. L3.2
```
Ordered list nested below an unordered list
```
% pandoc -f html -t markdown
<ul>
<li>L1</li>
<li>L2</li>
<ol>
<li>L3.1</li>
<li>L3.2</li>
</ol>
</ul>
^D
- L1
- L2
- 1. L3.1
2. L3.2
```