From 0d7f80c87ff3948669356c7963118d90533cd519 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 3 Aug 2022 13:04:45 +0200 Subject: [PATCH] HTML reader: allow sublists that are not marked as items. The HTML standard requires all list items to be marked with a `
  • ` tag, but some tools fail to do so for sublists. The reader now accepts these unwrapped lists as sublists. Closes: #8150 --- src/Text/Pandoc/Readers/HTML.hs | 16 ++++++--- test/command/8150.md | 62 +++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 5 deletions(-) create mode 100644 test/command/8150.md diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index dd0e54c27..711457312 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -327,11 +327,11 @@ pBulletList = try $ do -- note: if they have an
      or
        not in scope of a
      • , -- treat it as a list item, though it's not valid xhtml... skipMany nonItem - items <- manyTill (pListItem nonItem) (pCloses "ul") + items <- manyTill (pListItem' nonItem) (pCloses "ul") return $ B.bulletList $ map (fixPlains True) items -pListItem :: PandocMonad m => TagParser m a -> TagParser m Blocks -pListItem nonItem = do +pListItem :: PandocMonad m => TagParser m Blocks +pListItem = do TagOpen _ attr' <- lookAhead $ pSatisfy (matchTagOpen "li" []) let attr = toStringAttr attr' let addId ident bs = case B.toList bs of @@ -339,7 +339,13 @@ pListItem nonItem = do [Span (ident, [], []) ils] : xs) _ -> B.divWith (ident, [], []) bs maybe id addId (lookup "id" attr) <$> - pInTags "li" block <* skipMany nonItem + pInTags "li" block + +-- | Parses a list item just like 'pListItem', but allows sublists outside of +-- @li@ tags to be treated as items. +pListItem' :: PandocMonad m => TagParser m a -> TagParser m Blocks +pListItem' nonItem = (pListItem <|> pBulletList <|> pOrderedList) + <* skipMany nonItem parseListStyleType :: Text -> ListNumberStyle parseListStyleType "lower-roman" = LowerRoman @@ -381,7 +387,7 @@ pOrderedList = try $ do _ <- manyTill (eFootnote <|> pBlank) (pCloses "ol") return mempty else do - items <- manyTill (pListItem nonItem) (pCloses "ol") + items <- manyTill (pListItem' nonItem) (pCloses "ol") return $ B.orderedListWith (start, style, DefaultDelim) $ map (fixPlains True) items diff --git a/test/command/8150.md b/test/command/8150.md new file mode 100644 index 000000000..2feded961 --- /dev/null +++ b/test/command/8150.md @@ -0,0 +1,62 @@ +Nested bullet lists +``` +% pandoc -f html -t markdown +
          +
        • L1
        • +
        • L2
        • +
            +
          • L3.1
          • +
          • L3.2
          • +
          +
        • L4
        • +
        +^D +- L1 + +- L2 + +- - L3.1 + - L3.2 + +- L4 +``` + +Nested ordered lists +``` +% pandoc -f html -t markdown +
          +
        1. L1
        2. +
        3. L2
        4. +
            +
          1. L3.1
          2. +
          3. L3.2
          4. +
          +
        +^D +1. L1 + +2. L2 + +3. 1. L3.1 + 2. L3.2 +``` + +Ordered list nested below an unordered list +``` +% pandoc -f html -t markdown +
          +
        • L1
        • +
        • L2
        • +
            +
          1. L3.1
          2. +
          3. L3.2
          4. +
          +
        +^D +- L1 + +- L2 + +- 1. L3.1 + 2. L3.2 +```