Org reader: support alphabetical (fancy) lists

This adds support for alphabetical lists in org by enabling the
extension Ext_fancy_lists, mimicking the behaviour of Org Mode when
org-list-allow-alphabetical is enabled.

Enabling Ext_fancy_lists will also make Pandoc differentiate between the
delimiters of ordered lists (periods or closing parentheses). Org does
this differentiation by default when exporting to some formats (e.g.
plain text) but does not in others (e.g. html and latex), so I decided
to copy Pandoc's markdown reader behaviour.
This commit is contained in:
Lucas Viana 2022-01-06 16:30:20 -03:00 committed by John MacFarlane
parent 66636c89b0
commit fb91a91615
5 changed files with 89 additions and 11 deletions

View file

@ -3162,6 +3162,15 @@ output format.
Some aspects of [Pandoc's Markdown citation syntax](#citations) Some aspects of [Pandoc's Markdown citation syntax](#citations)
are also accepted in `org` input. are also accepted in `org` input.
#### Extension: `fancy_lists` {#org-fancy-lists}
Some aspects of [Pandoc's Markdown fancy lists](#extension-fancy_lists) are also
accepted in `org` input, mimicking the option `org-list-allow-alphabetical` in
Emacs. As in Org Mode, enabling this extension allows lowercase and uppercase
alphabetical markers for ordered lists to be parsed in addition to arabic ones.
Note that for Org, this does not include roman numerals or the `#` placeholder
that are enabled by the extension in Pandoc's Markdown.
#### Extension: `element_citations` #### #### Extension: `element_citations` ####
In the `jats` output formats, this causes reference items to In the `jats` output formats, this causes reference items to

View file

@ -536,6 +536,7 @@ getAllExtensions f = universalExtensions <> getAll f
extensionsFromList extensionsFromList
[ Ext_citations [ Ext_citations
, Ext_smart , Ext_smart
, Ext_fancy_lists
, Ext_task_lists , Ext_task_lists
] ]
getAll "html" = autoIdExtensions <> getAll "html" = autoIdExtensions <>

View file

@ -28,7 +28,8 @@ import Data.Text (Text)
import Text.Pandoc.Readers.Org.Parsing import Text.Pandoc.Readers.Org.Parsing
import Text.Pandoc.Definition as Pandoc import Text.Pandoc.Definition as Pandoc
import Text.Pandoc.Shared (safeRead) import Text.Pandoc.Shared (safeRead)
import Data.Char (toLower) import Text.Pandoc.Parsing (lowerAlpha, upperAlpha)
import Data.Functor (($>))
-- | Horizontal Line (five -- dashes or more) -- | Horizontal Line (five -- dashes or more)
hline :: Monad m => OrgParser m () hline :: Monad m => OrgParser m ()
@ -65,13 +66,12 @@ latexEnvStart = try $
listCounterCookie :: Monad m => OrgParser m Int listCounterCookie :: Monad m => OrgParser m Int
listCounterCookie = try $ listCounterCookie = try $
string "[@" string "[@"
*> ((many1Char digit >>= safeRead) <|> numFromAlph <$> oneOf asciiAlph) *> parseNum
<* char ']' <* char ']'
<* (skipSpaces <|> lookAhead eol) <* (skipSpaces <|> lookAhead eol)
where where parseNum = (safeRead =<< many1Char digit)
asciiAlph = ['a'..'z'] ++ ['A'..'Z'] <|> snd <$> lowerAlpha
-- The number makes sense because char is always in asciiAlph <|> snd <$> upperAlpha
numFromAlph c = fromEnum (toLower c) - fromEnum 'a' + 1
bulletListStart :: Monad m => OrgParser m Int bulletListStart :: Monad m => OrgParser m Int
bulletListStart = try $ do bulletListStart = try $ do
@ -88,12 +88,20 @@ eol = void (char '\n')
orderedListStart :: Monad m => OrgParser m (Int, ListAttributes) orderedListStart :: Monad m => OrgParser m (Int, ListAttributes)
orderedListStart = try $ do orderedListStart = try $ do
ind <- length <$> many spaceChar ind <- length <$> many spaceChar
orderedListMarker style <- choice styles
delim <- choice delims
skipSpaces1 <|> lookAhead eol skipSpaces1 <|> lookAhead eol
start <- option 1 listCounterCookie start <- option 1 listCounterCookie
return (ind + 1, (start, DefaultStyle, DefaultDelim)) return (ind + 1, (start, style, delim))
-- Ordered list markers allowed in org-mode -- Ordered list markers allowed in org-mode
where orderedListMarker = many1Char digit *> oneOf ".)" where
styles = [ many1Char digit $> Decimal
, fst <$> lowerAlpha
, fst <$> upperAlpha
]
delims = [ char '.' $> Period
, char ')' $> OneParen
]
drawerStart :: Monad m => OrgParser m Text drawerStart :: Monad m => OrgParser m Text
drawerStart = try $ skipSpaces *> drawerName <* skipSpaces <* newline drawerStart = try $ skipSpaces *> drawerName <* skipSpaces <* newline

View file

@ -835,8 +835,11 @@ indented indentedMarker minIndent = try $ do
orderedList :: PandocMonad m => OrgParser m (F Blocks) orderedList :: PandocMonad m => OrgParser m (F Blocks)
orderedList = try $ do orderedList = try $ do
(indent, attr) <- lookAhead orderedListStart (indent, attr) <- lookAhead orderedListStart
fmap (B.orderedListWith attr . compactify) . sequence attr' <- option (fst3 attr, DefaultStyle, DefaultDelim) $
guardEnabled Ext_fancy_lists $> attr
fmap (B.orderedListWith attr' . compactify) . sequence
<$> many1 (listItem ((fst <$> orderedListStart) `indented` indent)) <$> many1 (listItem ((fst <$> orderedListStart) `indented` indent))
where fst3 (x,_,_) = x
definitionListItem :: PandocMonad m definitionListItem :: PandocMonad m
=> OrgParser m Int => OrgParser m Int

View file

@ -12,12 +12,21 @@ Test parsing of org lists.
-} -}
module Tests.Readers.Org.Block.List (tests) where module Tests.Readers.Org.Block.List (tests) where
import Data.Text (Text)
import Test.Tasty (TestTree) import Test.Tasty (TestTree)
import Tests.Helpers ((=?>)) import Tests.Helpers ((=?>), purely, test)
import Tests.Readers.Org.Shared ((=:), spcSep) import Tests.Readers.Org.Shared ((=:), spcSep)
import Text.Pandoc (ReaderOptions (readerExtensions),
Extension (Ext_fancy_lists), def, enableExtension,
getDefaultExtensions, readOrg)
import Text.Pandoc.Builder import Text.Pandoc.Builder
import qualified Data.Text as T import qualified Data.Text as T
orgFancyLists :: Text -> Pandoc
orgFancyLists = purely $
let extensionsFancy = enableExtension Ext_fancy_lists (getDefaultExtensions "org")
in readOrg def{ readerExtensions = extensionsFancy }
tests :: [TestTree] tests :: [TestTree]
tests = tests =
[ "Simple Bullet Lists" =: [ "Simple Bullet Lists" =:
@ -153,6 +162,21 @@ tests =
] ]
] ]
, test orgFancyLists "Task with alphabetical markers and counter cookie" $
T.unlines [ "- [ ] nope"
, "- [@9] [X] yup"
, "- [@a][-] started"
, " a) [@D][X] sure"
, " b) [@8] [ ] nuh-uh"
] =?>
bulletList [ plain "☐ nope", plain "☒ yup"
, mconcat [ plain "☐ started"
, orderedListWith
(4, LowerAlpha, OneParen)
[plain "☒ sure", plain "☐ nuh-uh"]
]
]
, "Simple Ordered List" =: , "Simple Ordered List" =:
("1. Item1\n" <> ("1. Item1\n" <>
"2. Item2\n") =?> "2. Item2\n") =?>
@ -162,6 +186,33 @@ tests =
] ]
in orderedListWith listStyle listStructure in orderedListWith listStyle listStructure
, test orgFancyLists "Simple Ordered List with fancy lists extension" $
("1. Item1\n" <>
"2. Item2\n") =?>
let listStyle = (1, Decimal, Period)
listStructure = [ plain "Item1"
, plain "Item2"
]
in orderedListWith listStyle listStructure
, test orgFancyLists "Simple Ordered List with lower alpha marker" $
("a) Item1\n" <>
"b) Item2\n") =?>
let listStyle = (1, LowerAlpha, OneParen)
listStructure = [ plain "Item1"
, plain "Item2"
]
in orderedListWith listStyle listStructure
, test orgFancyLists "Simple Ordered List with upper and lower alpha markers" $
("A. Item1\n" <>
"b) Item2\n") =?>
let listStyle = (1, UpperAlpha, Period)
listStructure = [ plain "Item1"
, plain "Item2"
]
in orderedListWith listStyle listStructure
, "Simple Ordered List with Counter Cookie" =: , "Simple Ordered List with Counter Cookie" =:
("1. [@1234] Item1\n" <> ("1. [@1234] Item1\n" <>
"2. Item2\n") =?> "2. Item2\n") =?>
@ -213,6 +264,12 @@ tests =
] =?> ] =?>
orderedList [ plain "", plain "" ] orderedList [ plain "", plain "" ]
, test orgFancyLists "Empty ordered list item with fancy lists extension" $
T.unlines [ "a."
, "2. "
] =?>
orderedListWith (1, LowerAlpha, Period) [ plain "", plain "" ]
, "Empty ordered list item with counter cookie" =: , "Empty ordered list item with counter cookie" =:
T.unlines [ "1. [@5]" T.unlines [ "1. [@5]"
, "3. [@e] " , "3. [@e] "