Parsing: partition module into (internal) submodules (#7962)

2022-03-11 18:21:59 +01:00 · 2022-03-11 18:21:59 +01:00 · 517bceeba8
commit 517bceeba8
parent 168529f0a4
11 changed files with 1885 additions and 1472 deletions
--- a/pandoc.cabal
+++ b/pandoc.cabal
@ -649,6 +649,15 @@ library
                   Text.Pandoc.Filter.JSON,
                   Text.Pandoc.Filter.Lua,
                   Text.Pandoc.Filter.Path,
+                   Text.Pandoc.Parsing.Capabilities,
+                   Text.Pandoc.Parsing.Citations,
+                   Text.Pandoc.Parsing.Combinators,
+                   Text.Pandoc.Parsing.GridTable,
+                   Text.Pandoc.Parsing.Lists,
+                   Text.Pandoc.Parsing.Math,
+                   Text.Pandoc.Parsing.Smart,
+                   Text.Pandoc.Parsing.State,
+                   Text.Pandoc.Parsing.Types,
                   Text.Pandoc.Readers.Docx.Lists,
                   Text.Pandoc.Readers.Docx.Combine,
                   Text.Pandoc.Readers.Docx.Parse,
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
--- a/src/Text/Pandoc/Parsing/Capabilities.hs
+++ b/src/Text/Pandoc/Parsing/Capabilities.hs
@ -0,0 +1,137 @@
+{-# LANGUAGE MultiParamTypeClasses      #-}
+{- |
+   Module      : Text.Pandoc.Parsing
+   Copyright   : © 2006-2022 John MacFarlane
+   License     : GPL-2.0-or-later
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+
+Parser state capabilities.
+-}
+module Text.Pandoc.Parsing.Capabilities
+  ( -- * Capabilities
+
+    -- ** Element identifiers
+    HasIdentifierList (..)
+
+    -- ** Include files
+  , HasIncludeFiles (..)
+
+    -- ** String/Word boundaries
+  , HasLastStrPosition (..)
+  , updateLastStrPos
+  , notAfterString
+
+    -- ** Logging
+  , HasLogMessages (..)
+  , logMessage
+  , reportLogMessages
+
+    -- ** Macros
+  , HasMacros (..)
+
+    -- ** Quote context
+  , QuoteContext (..)
+  , HasQuoteContext (..)
+  , failIfInQuoteContext
+
+    -- ** Reader options
+  , HasReaderOptions (..)
+  , guardEnabled
+  , guardDisabled
+  )
+where
+
+import Control.Monad (guard, when)
+import Data.Text (Text)
+import Text.Parsec (ParsecT, SourcePos, Stream, getPosition, getState, updateState)
+import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
+import Text.Pandoc.Logging (LogMessage)
+import Text.Pandoc.Options
+  ( Extension
+  , ReaderOptions(readerExtensions)
+  , extensionEnabled
+  )
+import Text.Pandoc.Parsing.Types
+import Text.Pandoc.Readers.LaTeX.Types (Macro)
+
+import qualified Data.Map as M
+import qualified Data.Set as Set
+
+class HasReaderOptions st where
+  extractReaderOptions :: st -> ReaderOptions
+  getOption            :: (Stream s m t) => (ReaderOptions -> b) -> ParserT s st m b
+  -- default
+  getOption  f         = f . extractReaderOptions <$> getState
+
+class HasQuoteContext st m where
+  getQuoteContext :: (Stream s m t) => ParsecT s st m QuoteContext
+  withQuoteContext :: QuoteContext -> ParsecT s st m a -> ParsecT s st m a
+
+failIfInQuoteContext :: (HasQuoteContext st m, Stream s m t)
+                     => QuoteContext
+                     -> ParserT s st m ()
+failIfInQuoteContext context = do
+  context' <- getQuoteContext
+  when (context' == context) $ Prelude.fail "already inside quotes"
+
+class HasIdentifierList st where
+  extractIdentifierList  :: st -> Set.Set Text
+  updateIdentifierList   :: (Set.Set Text -> Set.Set Text) -> st -> st
+
+class HasMacros st where
+  extractMacros          :: st -> M.Map Text Macro
+  updateMacros           :: (M.Map Text Macro -> M.Map Text Macro) -> st -> st
+
+class HasLastStrPosition st where
+  setLastStrPos  :: Maybe SourcePos -> st -> st
+  getLastStrPos  :: st -> Maybe SourcePos
+
+class HasLogMessages st where
+  addLogMessage :: LogMessage -> st -> st
+  getLogMessages :: st -> [LogMessage]
+
+class HasIncludeFiles st where
+  getIncludeFiles :: st -> [Text]
+  addIncludeFile :: Text -> st -> st
+  dropLatestIncludeFile :: st -> st
+
+-- | Add a log message.
+logMessage :: (Stream s m a, HasLogMessages st)
+           => LogMessage -> ParserT s st m ()
+logMessage msg = updateState (addLogMessage msg)
+
+-- | Report all the accumulated log messages, according to verbosity level.
+reportLogMessages :: (PandocMonad m, HasLogMessages st) => ParserT s st m ()
+reportLogMessages = do
+  msgs <- getLogMessages <$> getState
+  mapM_ report msgs
+
+-- | Succeed only if the extension is enabled.
+guardEnabled :: (Stream s m a,  HasReaderOptions st)
+             => Extension -> ParserT s st m ()
+guardEnabled ext =
+  getOption readerExtensions >>= guard . extensionEnabled ext
+
+-- | Succeed only if the extension is disabled.
+guardDisabled :: (Stream s m a, HasReaderOptions st)
+              => Extension -> ParserT s st m ()
+guardDisabled ext =
+  getOption readerExtensions >>= guard . not . extensionEnabled ext
+
+-- | Update the position on which the last string ended.
+updateLastStrPos :: (Stream s m a, HasLastStrPosition st)
+                 => ParserT s st m ()
+updateLastStrPos = getPosition >>= updateState . setLastStrPos . Just
+
+-- | Whether we are right after the end of a string.
+notAfterString :: (Stream s m a, HasLastStrPosition st) => ParserT s st m Bool
+notAfterString = do
+  pos <- getPosition
+  st  <- getState
+  return $ getLastStrPos st /= Just pos
+
+data QuoteContext
+    = InSingleQuote   -- ^ Used when parsing inside single quotes
+    | InDoubleQuote   -- ^ Used when parsing inside double quotes
+    | NoQuote         -- ^ Used when not parsing inside quotes
+    deriving (Eq, Show)
--- a/src/Text/Pandoc/Parsing/Citations.hs
+++ b/src/Text/Pandoc/Parsing/Citations.hs
@ -0,0 +1,57 @@
+{-# LANGUAGE FlexibleContexts           #-}
+{-# LANGUAGE OverloadedStrings          #-}
+{- |
+Module      : Text.Pandoc.Parsing.Citations
+Copyright   : © 2006-2022 John MacFarlane
+License     : GPL-2.0-or-later
+Maintainer  : John MacFarlane <jgm@berkeley.edu>
+
+Citation parsing.
+-}
+
+module Text.Pandoc.Parsing.Citations
+  ( citeKey
+  )
+where
+
+import Control.Monad (guard, MonadPlus(mzero))
+import Data.Char (isAlphaNum , isSpace)
+import Data.Text (Text)
+import Text.Pandoc.Sources
+import Text.Parsec
+  ( (<|>)
+  , Stream(..)
+  , lookAhead
+  , many
+  , option
+  , try
+  )
+import Text.Pandoc.Parsing.Capabilities (HasLastStrPosition, notAfterString)
+import Text.Pandoc.Parsing.Combinators
+import Text.Pandoc.Parsing.Types (ParserT)
+
+import qualified Data.Text as T
+
+citeKey :: (Stream s m Char, UpdateSourcePos s Char, HasLastStrPosition st)
+        => Bool -- ^ If True, allow expanded @{..} syntax.
+        -> ParserT s st m (Bool, Text)
+citeKey allowBraced = try $ do
+  guard =<< notAfterString
+  suppress_author <- option False (True <$ char '-')
+  char '@'
+  key <- simpleCiteIdentifier
+        <|> if allowBraced
+               then charsInBalanced '{' '}' (satisfy (not . isSpace))
+               else mzero
+  return (suppress_author, key)
+
+simpleCiteIdentifier :: (Stream s m Char, UpdateSourcePos s Char)
+                      => ParserT s st m Text
+simpleCiteIdentifier = do
+  firstChar <- alphaNum <|> char '_' <|> char '*' -- @* for wildcard in nocite
+  let regchar = satisfy (\c -> isAlphaNum c || c == '_')
+  let internal p = try $ p <* lookAhead regchar
+  rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") <|>
+                 try (oneOf ":/" <* lookAhead (char '/'))
+  return $ T.pack $ firstChar:rest
+
--- a/src/Text/Pandoc/Parsing/Combinators.hs
+++ b/src/Text/Pandoc/Parsing/Combinators.hs
@ -0,0 +1,751 @@
+{-# LANGUAGE FlexibleContexts           #-}
+{-# LANGUAGE OverloadedStrings          #-}
+{- |
+Module      : Text.Pandoc.Parsing.Combinators
+Copyright   : © 2006-2022 John MacFarlane
+License     : GPL-2.0-or-later
+Maintainer  : John MacFarlane <jgm@berkeley.edu>
+
+Parser combinators for pandoc format readers.
+-}
+
+module Text.Pandoc.Parsing.Combinators
+  ( (<+?>)
+  , anyLine
+  , anyLineNewline
+  , blankline
+  , blanklines
+  , charRef
+  , characterReference
+  , charsInBalanced
+  , countChar
+  , emailAddress
+  , enclosed
+  , escaped
+  , extractIdClass
+  , gobbleAtMostSpaces
+  , gobbleSpaces
+  , indentWith
+  , insertIncludedFile
+  , isSpaceChar          -- not re-exported from T.P.Parsing
+  , lineBlockLines
+  , lineClump
+  , many1Char
+  , many1Till
+  , many1TillChar
+  , manyChar
+  , manyTillChar
+  , manyUntil
+  , manyUntilChar
+  , nested
+  , nonspaceChar
+  , notFollowedBy'
+  , oneOfStrings
+  , oneOfStringsCI
+  , parseFromString
+  , parseFromString'
+  , readWith
+  , readWithM
+  , registerHeader
+  , sepBy1'
+  , skipSpaces
+  , spaceChar
+  , stringAnyCase
+  , testStringWith
+  , textStr
+  , token
+  , trimInlinesF
+  , uri
+  , withHorizDisplacement
+  , withRaw
+  )
+where
+
+import Control.Monad
+  ( guard
+  , join
+  , liftM
+  , unless
+  , void
+  , when
+  )
+import Control.Monad.Except ( MonadError(throwError) )
+import Control.Monad.Identity ( Identity(..), MonadPlus(mzero) )
+import Data.Char
+  ( chr
+  , isAlphaNum
+  , isAscii
+  , isAsciiUpper
+  , isSpace
+  , ord
+  , toLower
+  , toUpper
+  )
+import Data.Functor (($>))
+import Data.List (intercalate)
+import Data.Maybe (fromMaybe)
+import Data.Text (Text)
+import Text.HTML.TagSoup.Entity (lookupEntity)
+import Text.Pandoc.Asciify (toAsciiText)
+import Text.Pandoc.Builder (Attr, Inline(Str), Inlines, trimInlines)
+import Text.Pandoc.Class.PandocMonad (PandocMonad, readFileFromDirs, report)
+import Text.Pandoc.Logging
+  ( LogMessage(CouldNotLoadIncludeFile, DuplicateIdentifier) )
+import Text.Pandoc.Options
+  ( extensionEnabled
+  , Extension(Ext_auto_identifiers, Ext_ascii_identifiers)
+  , ReaderOptions(readerTabStop, readerExtensions) )
+import Text.Pandoc.Shared (escapeURI, mapLeft, schemes, tshow, uniqueIdent)
+import Text.Pandoc.Sources
+import Text.Pandoc.XML (fromEntities)
+import Text.Parsec
+  ( (<|>)
+  , ParsecT
+  , SourcePos
+  , Stream(..)
+  , between
+  , choice
+  , count
+  , getInput
+  , getPosition
+  , getState
+  , lookAhead
+  , many
+  , many1
+  , manyTill
+  , notFollowedBy
+  , option
+  , runParserT
+  , setInput
+  , setPosition
+  , skipMany
+  , sourceColumn
+  , sourceName
+  , tokenPrim
+  , try
+  , unexpected
+  , updateState
+  )
+import Text.Parsec.Pos (initialPos, newPos)
+import Text.Pandoc.Error
+  ( PandocError(PandocParseError, PandocParsecError) )
+import Text.Pandoc.Parsing.Capabilities
+import Text.Pandoc.Parsing.State
+import Text.Pandoc.Parsing.Types ( Parser, ParserT, Future (..))
+
+import qualified Data.Set as Set
+import qualified Data.Text as T
+import qualified Text.Pandoc.Builder as B
+import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
+
+-- | Remove whitespace from start and end; just like @'trimInlines'@,
+-- but lifted into the 'Future' type.
+trimInlinesF :: Future s Inlines -> Future s Inlines
+trimInlinesF = liftM trimInlines
+
+-- | Like @count@, but packs its result
+countChar :: (Stream s m Char, UpdateSourcePos s Char, Monad m)
+          => Int
+          -> ParsecT s st m Char
+          -> ParsecT s st m Text
+countChar n = fmap T.pack . count n
+
+-- | Like @string@, but uses @Text@.
+textStr :: (Stream s m Char, UpdateSourcePos s Char)
+        => Text -> ParsecT s u m Text
+textStr t = string (T.unpack t) $> t
+
+
+-- | Parse any line of text, returning the contents without the
+-- final newline.
+anyLine :: Monad m => ParserT Sources st m Text
+anyLine = do
+  -- This is much faster than:
+  -- manyTill anyChar newline
+  inp <- getInput
+  case inp of
+    Sources [] -> mzero
+    Sources ((fp,t):inps) ->
+      -- we assume that lines don't span different input files
+      case T.break (=='\n') t of
+           (this, rest)
+             | T.null rest
+             , not (null inps) ->
+                -- line may span different input files, so do it
+                 -- character by character
+                 T.pack <$> manyTill anyChar newline
+             | otherwise -> do --  either end of inputs or newline in rest
+                 setInput $ Sources ((fp, rest):inps)
+                 char '\n' -- needed so parsec knows we won't match empty string
+                           -- and so source pos is updated
+                 return this
+
+-- | Parse any line, include the final newline in the output
+anyLineNewline :: Monad m => ParserT Sources st m Text
+anyLineNewline = (<> "\n") <$> anyLine
+
+-- | Parse indent by specified number of spaces (or equiv. tabs)
+indentWith :: (Stream s m Char, UpdateSourcePos s Char)
+           => HasReaderOptions st
+           => Int -> ParserT s st m Text
+indentWith num = do
+  tabStop <- getOption readerTabStop
+  if num < tabStop
+     then countChar num (char ' ')
+     else choice [ try (countChar num (char ' '))
+                 , try (char '\t' >> indentWith (num - tabStop)) ]
+
+-- | Like @many@, but packs its result.
+manyChar :: Stream s m t
+         => ParserT s st m Char
+         -> ParserT s st m Text
+manyChar = fmap T.pack . many
+
+-- | Like @many1@, but packs its result.
+many1Char :: Stream s m t
+          => ParserT s st m Char
+          -> ParserT s st m Text
+many1Char = fmap T.pack . many1
+
+-- | Like @manyTill@, but packs its result.
+manyTillChar :: Stream s m t
+             => ParserT s st m Char
+             -> ParserT s st m a
+             -> ParserT s st m Text
+manyTillChar p = fmap T.pack . manyTill p
+
+-- | Like @manyTill@, but reads at least one item.
+many1Till :: (Show end, Stream s m t)
+          => ParserT s st m a
+          -> ParserT s st m end
+          -> ParserT s st m [a]
+many1Till p end = do
+         notFollowedBy' end
+         first <- p
+         rest <- manyTill p end
+         return (first:rest)
+
+-- | Like @many1Till@, but packs its result
+many1TillChar :: (Show end, Stream s m t)
+              => ParserT s st m Char
+              -> ParserT s st m end
+              -> ParserT s st m Text
+many1TillChar p = fmap T.pack . many1Till p
+
+-- | Like @manyTill@, but also returns the result of end parser.
+manyUntil :: ParserT s u m a
+          -> ParserT s u m b
+          -> ParserT s u m ([a], b)
+manyUntil p end = scan
+  where scan =
+          (do e <- end
+              return ([], e)
+          ) <|>
+          (do x <- p
+              (xs, e) <- scan
+              return (x:xs, e))
+
+-- | Like @manyUntil@, but also packs its result.
+manyUntilChar :: ParserT s u m Char
+              -> ParserT s u m b
+              -> ParserT s u m (Text, b)
+manyUntilChar p = fmap go . manyUntil p
+  where
+    go (x, y) = (T.pack x, y)
+
+-- | Like @sepBy1@ from Parsec,
+-- but does not fail if it @sep@ succeeds and @p@ fails.
+sepBy1' :: ParsecT s u m a
+        -> ParsecT s u m sep
+        -> ParsecT s u m [a]
+sepBy1' p sep = (:) <$> p <*> many (try $ sep >> p)
+
+-- | A more general form of @notFollowedBy@.  This one allows any
+-- type of parser to be specified, and succeeds only if that parser fails.
+-- It does not consume any input.
+notFollowedBy' :: (Show b, Stream s m a) => ParserT s st m b -> ParserT s st m ()
+notFollowedBy' p  = try $ join $  do  a <- try p
+                                      return (unexpected (show a))
+                                  <|>
+                                  return (return ())
+-- (This version due to Andrew Pimlott on the Haskell mailing list.)
+
+oneOfStrings' :: (Stream s m Char, UpdateSourcePos s Char)
+              => (Char -> Char -> Bool) -> [Text] -> ParserT s st m Text
+oneOfStrings' f = fmap T.pack . oneOfStrings'' f . fmap T.unpack
+
+-- TODO: This should be re-implemented in a Text-aware way
+oneOfStrings'' :: (Stream s m Char, UpdateSourcePos s Char)
+               => (Char -> Char -> Bool) -> [String] -> ParserT s st m String
+oneOfStrings'' _ []   = Prelude.fail "no strings"
+oneOfStrings'' matches strs = try $ do
+  c <- anyChar
+  let strs' = [xs | (x:xs) <- strs, x `matches` c]
+  case strs' of
+       []  -> Prelude.fail "not found"
+       _   -> (c:) <$> oneOfStrings'' matches strs'
+               <|> if "" `elem` strs'
+                      then return [c]
+                      else Prelude.fail "not found"
+
+-- | Parses one of a list of strings.  If the list contains
+-- two strings one of which is a prefix of the other, the longer
+-- string will be matched if possible.
+oneOfStrings :: (Stream s m Char, UpdateSourcePos s Char)
+             => [Text] -> ParserT s st m Text
+oneOfStrings = oneOfStrings' (==)
+
+-- | Parses one of a list of strings (tried in order), case insensitive.
+
+-- TODO: This will not be accurate with general Unicode (neither
+-- Text.toLower nor Text.toCaseFold can be implemented with a map)
+oneOfStringsCI :: (Stream s m Char, UpdateSourcePos s Char)
+               => [Text] -> ParserT s st m Text
+oneOfStringsCI = oneOfStrings' ciMatch
+  where ciMatch x y = toLower' x == toLower' y
+        -- this optimizes toLower by checking common ASCII case
+        -- first, before calling the expensive unicode-aware
+        -- function:
+        toLower' c | isAsciiUpper c = chr (ord c + 32)
+                   | isAscii c = c
+                   | otherwise = toLower c
+
+-- | Parses a space or tab.
+spaceChar :: (Stream s m Char, UpdateSourcePos s Char)
+          => ParserT s st m Char
+spaceChar = satisfy $ \c -> c == ' ' || c == '\t'
+
+-- | Parses a nonspace, nonnewline character.
+nonspaceChar :: (Stream s m Char, UpdateSourcePos s Char)
+             => ParserT s st m Char
+nonspaceChar = satisfy (not . isSpaceChar)
+
+isSpaceChar :: Char -> Bool
+isSpaceChar ' '  = True
+isSpaceChar '\t' = True
+isSpaceChar '\n' = True
+isSpaceChar '\r' = True
+isSpaceChar _    = False
+
+-- | Skips zero or more spaces or tabs.
+skipSpaces :: (Stream s m Char, UpdateSourcePos s Char)
+           => ParserT s st m ()
+skipSpaces = skipMany spaceChar
+
+-- | Skips zero or more spaces or tabs, then reads a newline.
+blankline :: (Stream s m Char, UpdateSourcePos s Char)
+          => ParserT s st m Char
+blankline = try $ skipSpaces >> newline
+
+-- | Parses one or more blank lines and returns a string of newlines.
+blanklines :: (Stream s m Char, UpdateSourcePos s Char)
+           => ParserT s st m Text
+blanklines = T.pack <$> many1 blankline
+
+-- | Gobble n spaces; if tabs are encountered, expand them
+-- and gobble some or all of their spaces, leaving the rest.
+gobbleSpaces :: (HasReaderOptions st, Monad m)
+             => Int -> ParserT Sources st m ()
+gobbleSpaces 0 = return ()
+gobbleSpaces n
+  | n < 0     = error "gobbleSpaces called with negative number"
+  | otherwise = try $ do
+      char ' ' <|> eatOneSpaceOfTab
+      gobbleSpaces (n - 1)
+
+eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT Sources st m Char
+eatOneSpaceOfTab = do
+  lookAhead (char '\t')
+  pos <- getPosition
+  tabstop <- getOption readerTabStop
+  -- replace the tab on the input stream with spaces
+  let numSpaces = tabstop - ((sourceColumn pos - 1) `mod` tabstop)
+  inp <- getInput
+  setInput $
+    case inp of
+      Sources [] -> error "eatOneSpaceOfTab - empty Sources list"
+      Sources ((fp,t):rest) ->
+        -- drop the tab and add spaces
+        Sources ((fp, T.replicate numSpaces " " <> T.drop 1 t):rest)
+  char ' '
+
+-- | Gobble up to n spaces; if tabs are encountered, expand them
+-- and gobble some or all of their spaces, leaving the rest.
+gobbleAtMostSpaces :: (HasReaderOptions st, Monad m)
+                   => Int -> ParserT Sources st m Int
+gobbleAtMostSpaces 0 = return 0
+gobbleAtMostSpaces n
+  | n < 0     = error "gobbleAtMostSpaces called with negative number"
+  | otherwise = option 0 $ do
+      char ' ' <|> eatOneSpaceOfTab
+      (+ 1) <$> gobbleAtMostSpaces (n - 1)
+
+-- | Parses material enclosed between start and end parsers.
+enclosed :: (Show end, Stream s m Char, UpdateSourcePos s Char)
+         => ParserT s st m t   -- ^ start parser
+         -> ParserT s st m end  -- ^ end parser
+         -> ParserT s st m a    -- ^ content parser (to be used repeatedly)
+         -> ParserT s st m [a]
+enclosed start end parser = try $
+  start >> notFollowedBy space >> many1Till parser end
+
+-- | Parse string, case insensitive.
+stringAnyCase :: (Stream s m Char, UpdateSourcePos s Char)
+              => Text -> ParserT s st m Text
+stringAnyCase = fmap T.pack . stringAnyCase' . T.unpack
+
+stringAnyCase' :: (Stream s m Char, UpdateSourcePos s Char)
+               => String -> ParserT s st m String
+stringAnyCase' [] = string ""
+stringAnyCase' (x:xs) = do
+  firstChar <- char (toUpper x) <|> char (toLower x)
+  rest <- stringAnyCase' xs
+  return (firstChar:rest)
+
+-- TODO rewrite by just adding to Sources stream?
+-- | Parse contents of 'str' using 'parser' and return result.
+parseFromString :: Monad m
+                => ParserT Sources st m r
+                -> Text
+                -> ParserT Sources st m r
+parseFromString parser str = do
+  oldPos <- getPosition
+  oldInput <- getInput
+  setInput $ toSources str
+  setPosition $ initialPos $ sourceName oldPos <> "_chunk"
+  result <- parser
+  spaces
+  setInput oldInput
+  setPosition oldPos
+  return result
+
+-- | Like 'parseFromString' but specialized for 'ParserState'.
+-- This resets 'stateLastStrPos', which is almost always what we want.
+parseFromString' :: (Monad m, HasLastStrPosition u)
+                 => ParserT Sources u m a
+                 -> Text
+                 -> ParserT Sources u m a
+parseFromString' parser str = do
+  oldLastStrPos <- getLastStrPos <$> getState
+  updateState $ setLastStrPos Nothing
+  res <- parseFromString parser str
+  updateState $ setLastStrPos oldLastStrPos
+  return res
+
+-- | Parse raw line block up to and including blank lines.
+lineClump :: Monad m => ParserT Sources st m Text
+lineClump = blanklines
+          <|> (T.unlines <$> many1 (notFollowedBy blankline >> anyLine))
+
+-- | Parse a string of characters between an open character
+-- and a close character, including text between balanced
+-- pairs of open and close, which must be different. For example,
+-- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))"
+-- and return "hello (there)".
+charsInBalanced :: (Stream s m Char, UpdateSourcePos s Char) => Char -> Char -> ParserT s st m Char
+                -> ParserT s st m Text
+charsInBalanced open close parser = try $ do
+  char open
+  let isDelim c = c == open || c == close
+  raw <- many $  T.pack <$> many1 (notFollowedBy (satisfy isDelim) >> parser)
+             <|> (do res <- charsInBalanced open close parser
+                     return $ T.singleton open <> res <> T.singleton close)
+  char close
+  return $ T.concat raw
+
+-- old charsInBalanced would be:
+-- charsInBalanced open close (noneOf "\n" <|> char '\n' >> notFollowedBy blankline)
+-- old charsInBalanced' would be:
+-- charsInBalanced open close anyChar
+
+-- Parsers for email addresses and URIs
+
+-- | Parses an email address; returns original and corresponding
+-- escaped mailto: URI.
+emailAddress :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (Text, Text)
+emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
+ where toResult mbox dom = let full = fromEntities $ T.pack $ mbox ++ '@':dom
+                           in  (full, escapeURI $ "mailto:" <> full)
+       mailbox           = intercalate "." <$> (emailWord `sepBy1'` dot)
+       domain            = intercalate "." <$> (subdomain `sepBy1'` dot)
+       dot               = char '.'
+       subdomain         = many1 $ alphaNum <|> innerPunct (=='-')
+       -- this excludes some valid email addresses, since an
+       -- email could contain e.g. '__', but gives better results
+       -- for our purposes, when combined with markdown parsing:
+       innerPunct f      = try (satisfy f
+                                 <* notFollowedBy (satisfy (not . isAlphaNum)))
+       -- technically an email address could begin with a symbol,
+       -- but allowing this creates too many problems.
+       -- See e.g. https://github.com/jgm/pandoc/issues/2940
+       emailWord         = do x <- satisfy isAlphaNum
+                              xs <- many (satisfy isEmailChar)
+                              return (x:xs)
+       isEmailChar c     = isAlphaNum c || isEmailPunct c
+       isEmailPunct c    = T.any (== c) "!\"#$%&'*+-/=?^_{|}~;"
+
+
+uriScheme :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Text
+uriScheme = oneOfStringsCI (Set.toList schemes)
+
+-- | Parses a URI. Returns pair of original and URI-escaped version.
+uri :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (Text, Text)
+uri = try $ do
+  scheme <- uriScheme
+  char ':'
+  -- Avoid parsing e.g. "**Notes:**" as a raw URI:
+  notFollowedBy $ satisfy (\c -> c == '*' || c == '_' || c == ']')
+  -- We allow sentence punctuation except at the end, since
+  -- we don't want the trailing '.' in 'http://google.com.' We want to allow
+  -- http://en.wikipedia.org/wiki/State_of_emergency_(disambiguation)
+  -- as a URL, while NOT picking up the closing paren in
+  -- (http://wikipedia.org). So we include balanced parens in the URL.
+  str <- T.concat <$> many1 (uriChunkBetween '(' ')'
+                        <|> uriChunkBetween '{' '}'
+                        <|> uriChunkBetween '[' ']'
+                        <|> T.pack <$> uriChunk)
+  str' <- option str $ char '/' >> return (str <> "/")
+  let uri' = scheme <> ":" <> fromEntities str'
+  return (uri', escapeURI uri')
+  where
+    isWordChar '#' = True
+    isWordChar '$' = True
+    isWordChar '%' = True
+    isWordChar '+' = True
+    isWordChar '/' = True
+    isWordChar '@' = True
+    isWordChar '\\' = True
+    isWordChar '_' = True
+    isWordChar '-' = True
+    isWordChar '&' = True
+    isWordChar '=' = True
+    isWordChar c   = isAlphaNum c
+
+    wordChar = satisfy isWordChar
+    percentEscaped = try $ (:) <$> char '%' <*> many1 hexDigit
+    entity = try $ pure <$> characterReference
+    punct = try $ many1 (char ',') <|> fmap pure (satisfy (\c -> not (isSpace c) && c /= '<' && c /= '>'))
+    uriChunk = many1 wordChar
+           <|> percentEscaped
+           <|> entity
+           <|> try (punct <* lookAhead (void wordChar <|> void percentEscaped))
+    uriChunkBetween l r = try $ do chunk <- between (char l) (char r) uriChunk
+                                   return (T.pack $ [l] ++ chunk ++ [r])
+
+-- | Applies a parser, returns tuple of its results and its horizontal
+-- displacement (the difference between the source column at the end
+-- and the source column at the beginning). Vertical displacement
+-- (source row) is ignored.
+withHorizDisplacement :: (Stream s m Char, UpdateSourcePos s Char)
+                      => ParserT s st m a  -- ^ Parser to apply
+                      -> ParserT s st m (a, Int) -- ^ (result, displacement)
+withHorizDisplacement parser = do
+  pos1 <- getPosition
+  result <- parser
+  pos2 <- getPosition
+  return (result, sourceColumn pos2 - sourceColumn pos1)
+
+-- | Applies a parser and returns the raw string that was parsed,
+-- along with the value produced by the parser.
+withRaw :: Monad m
+        => ParsecT Sources st m a
+        -> ParsecT Sources st m (a, Text)
+withRaw parser = do
+  inps1 <- getInput
+  result <- parser
+  inps2 <- getInput
+  -- 'raw' is the difference between inps1 and inps2
+  return (result, sourcesDifference inps1 inps2)
+
+sourcesDifference :: Sources -> Sources -> Text
+sourcesDifference (Sources is1) (Sources is2) = go is1 is2
+ where
+   go inps1 inps2 =
+    case (inps1, inps2) of
+      ([], _) -> mempty
+      (_, []) -> mconcat $ map snd inps1
+      ((p1,t1):rest1, (p2, t2):rest2)
+        | p1 == p2
+        , t1 == t2  -> go rest1 rest2
+        | p1 == p2
+        , t1 /= t2  -> fromMaybe mempty $ T.stripSuffix t2 t1
+        | otherwise -> t1 <> go rest1 inps2
+
+-- | Parses backslash, then applies character parser.
+escaped :: (Stream s m Char, UpdateSourcePos s Char)
+        => ParserT s st m Char  -- ^ Parser for character to escape
+        -> ParserT s st m Char
+escaped parser = try $ char '\\' >> parser
+
+-- | Parse character entity.
+characterReference :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Char
+characterReference = try $ do
+  char '&'
+  ent <- many1Till nonspaceChar (char ';')
+  let ent' = case ent of
+                  '#':'X':xs -> '#':'x':xs  -- workaround tagsoup bug
+                  '#':_      -> ent
+                  _          -> ent ++ ";"
+  case lookupEntity ent' of
+       Just (c : _) -> return c
+       _            -> Prelude.fail "entity not found"
+
+-- | Parses a character reference and returns a Str element.
+charRef :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inline
+charRef = Str . T.singleton <$> characterReference
+
+lineBlockLine :: Monad m => ParserT Sources st m Text
+lineBlockLine = try $ do
+  char '|'
+  char ' '
+  white <- T.pack <$> many (spaceChar >> return '\160')
+  notFollowedBy newline
+  line <- anyLine
+  continuations <- many (try $ char ' ' >> anyLine)
+  return $ white <> T.unwords (line : continuations)
+
+blankLineBlockLine :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Char
+blankLineBlockLine = try (char '|' >> blankline)
+
+-- | Parses an RST-style line block and returns a list of strings.
+lineBlockLines :: Monad m => ParserT Sources st m [Text]
+lineBlockLines = try $ do
+  lines' <- many1 (lineBlockLine <|> (T.singleton <$> blankLineBlockLine))
+  skipMany blankline
+  return lines'
+
+
+-- | Removes the ParsecT layer from the monad transformer stack
+readWithM :: (Monad m, ToSources t)
+          => ParserT Sources st m a  -- ^ parser
+          -> st                      -- ^ initial state
+          -> t                       -- ^ input
+          -> m (Either PandocError a)
+readWithM parser state input =
+    mapLeft (PandocParsecError sources)
+      <$> runParserT parser state (initialSourceName sources) sources
+ where
+   sources = toSources input
+
+-- | Parse a string with a given parser and state
+readWith :: ToSources t
+         => Parser Sources st a
+         -> st
+         -> t
+         -> Either PandocError a
+readWith p t inp = runIdentity $ readWithM p t inp
+
+-- | Parse a string with @parser@ (for testing).
+testStringWith :: Show a
+               => ParserT Sources ParserState Identity a
+               -> Text
+               -> IO ()
+testStringWith parser str = UTF8.putStrLn $ tshow $
+                            readWith parser defaultParserState (toSources str)
+
+-- | Add header to the list of headers in state, together
+--  with its associated identifier.  If the identifier is null
+--  and the auto_identifiers extension is set, generate a new
+--  unique identifier, and update the list of identifiers
+--  in state.  Issue a warning if an explicit identifier
+--  is encountered that duplicates an earlier identifier
+--  (explicit or automatically generated).
+registerHeader :: (Stream s m a, HasReaderOptions st,
+                   HasLogMessages st, HasIdentifierList st)
+               => Attr -> Inlines -> ParserT s st m Attr
+registerHeader (ident,classes,kvs) header' = do
+  ids <- extractIdentifierList <$> getState
+  exts <- getOption readerExtensions
+  if T.null ident && Ext_auto_identifiers `extensionEnabled` exts
+     then do
+       let id' = uniqueIdent exts (B.toList header') ids
+       let id'' = if Ext_ascii_identifiers `extensionEnabled` exts
+                     then toAsciiText id'
+                     else id'
+       updateState $ updateIdentifierList $ Set.insert id'
+       updateState $ updateIdentifierList $ Set.insert id''
+       return (id'',classes,kvs)
+     else do
+        unless (T.null ident) $ do
+          when (ident `Set.member` ids) $ do
+            pos <- getPosition
+            logMessage $ DuplicateIdentifier ident pos
+          updateState $ updateIdentifierList $ Set.insert ident
+        return (ident,classes,kvs)
+
+-- This is used to prevent exponential blowups for things like:
+-- a**a*a**a*a**a*a**a*a**a*a**a*a**
+nested :: Stream s m a
+       => ParserT s ParserState m a
+       -> ParserT s ParserState m a
+nested p = do
+  nestlevel <- stateMaxNestingLevel <$>  getState
+  guard $ nestlevel > 0
+  updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 }
+  res <- p
+  updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
+  return res
+
+token :: (Stream s m t)
+      => (t -> Text)
+      -> (t -> SourcePos)
+      -> (t -> Maybe a)
+      -> ParsecT s st m a
+token pp pos match = tokenPrim (T.unpack . pp) (\_ t _ -> pos t) match
+
+infixr 5 <+?>
+(<+?>) :: (Monoid a) => ParserT s st m a -> ParserT s st m a -> ParserT s st m a
+a <+?> b = a >>= flip fmap (try b <|> return mempty) . mappend
+
+extractIdClass :: Attr -> Attr
+extractIdClass (ident, cls, kvs) = (ident', cls', kvs')
+  where
+    ident' = fromMaybe ident (lookup "id" kvs)
+    cls'   = maybe cls T.words $ lookup "class" kvs
+    kvs'   = filter (\(k,_) -> k /= "id" || k /= "class") kvs
+
+insertIncludedFile :: (PandocMonad m, HasIncludeFiles st)
+                   => ParserT a st m b -- ^ parser to apply
+                   -> (Text -> a) -- ^ convert Text to stream type
+                   -> [FilePath]  -- ^ search path (directories)
+                   -> FilePath    -- ^ path of file to include
+                   -> Maybe Int   -- ^ start line (negative counts from end)
+                   -> Maybe Int   -- ^ end line (negative counts from end)
+                   -> ParserT a st m b
+insertIncludedFile parser toStream dirs f mbstartline mbendline = do
+  oldPos <- getPosition
+  oldInput <- getInput
+  containers <- getIncludeFiles <$> getState
+  when (T.pack f `elem` containers) $
+    throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show oldPos
+  updateState $ addIncludeFile $ T.pack f
+  mbcontents <- readFileFromDirs dirs f
+  contents <- case mbcontents of
+                   Just s -> return $ exciseLines mbstartline mbendline s
+                   Nothing -> do
+                     report $ CouldNotLoadIncludeFile (T.pack f) oldPos
+                     return ""
+  setInput $ toStream contents
+  setPosition $ newPos f (fromMaybe 1 mbstartline) 1
+  result <- parser
+  setInput oldInput
+  setPosition oldPos
+  updateState dropLatestIncludeFile
+  return result
+
+exciseLines :: Maybe Int -> Maybe Int -> Text -> Text
+exciseLines Nothing Nothing t = t
+exciseLines mbstartline mbendline t =
+  T.unlines $ take (endline' - (startline' - 1))
+            $ drop (startline' - 1) contentLines
+ where
+  contentLines = T.lines t
+  numLines = length contentLines
+  startline' = case mbstartline of
+                 Nothing -> 1
+                 Just x | x >= 0 -> x
+                        | otherwise -> numLines + x -- negative from end
+  endline' = case mbendline of
+                 Nothing -> numLines
+                 Just x | x >= 0 -> x
+                        | otherwise -> numLines + x -- negative from end
--- a/src/Text/Pandoc/Parsing/GridTable.hs
+++ b/src/Text/Pandoc/Parsing/GridTable.hs
@ -0,0 +1,217 @@
+{-# LANGUAGE FlexibleContexts           #-}
+{-# LANGUAGE OverloadedStrings          #-}
+{- |
+   Module      : Text.Pandoc.Parsing.GridTable
+   Copyright   : Copyright (C) 2006-2022 John MacFarlane
+   License     : GPL-2.0-or-later
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+
+Shared parsers for plaintext tables.
+-}
+module Text.Pandoc.Parsing.GridTable
+  ( gridTableWith
+  , gridTableWith'
+  , tableWith
+  , tableWith'
+  , widthsFromIndices
+  )
+where
+
+import Control.Monad (guard)
+import Data.List (transpose)
+import Data.Text (Text)
+import Text.Pandoc.Options (ReaderOptions (readerColumns))
+import Text.Pandoc.Builder (Blocks)
+import Text.Pandoc.Definition
+import Text.Pandoc.Parsing.Capabilities
+import Text.Pandoc.Parsing.Combinators
+import Text.Pandoc.Parsing.Types
+import Text.Pandoc.Shared (compactify, splitTextByIndices, trim, trimr)
+import Text.Pandoc.Sources
+import Text.Parsec
+  ( Stream (..), many1, notFollowedBy, option, optional, sepEndBy1, try )
+
+import qualified Data.Text as T
+import qualified Text.Pandoc.Builder as B
+
+-- | Parse a grid table: starts with row of '-' on top, then header
+-- (which may be grid), then the rows, which may be grid, separated by
+-- blank lines, and ending with a footer (dashed line followed by blank
+-- line).
+gridTableWith :: (Monad m, Monad mf, HasLastStrPosition st, HasReaderOptions st)
+              => ParserT Sources st m (mf Blocks)  -- ^ Block list parser
+              -> Bool                        -- ^ Headerless table
+              -> ParserT Sources st m (mf Blocks)
+gridTableWith blocks headless =
+  tableWith (gridTableHeader headless blocks) (gridTableRow blocks)
+            (gridTableSep '-') gridTableFooter
+
+gridTableWith' :: (Monad m, Monad mf,
+                   HasReaderOptions st, HasLastStrPosition st)
+               => ParserT Sources st m (mf Blocks)  -- ^ Block list parser
+               -> Bool                        -- ^ Headerless table
+               -> ParserT Sources st m (TableComponents mf)
+gridTableWith' blocks headless =
+  tableWith' (gridTableHeader headless blocks) (gridTableRow blocks)
+             (gridTableSep '-') gridTableFooter
+
+gridTableSplitLine :: [Int] -> Text -> [Text]
+gridTableSplitLine indices line = map removeFinalBar $ tail $
+  splitTextByIndices (init indices) $ trimr line
+
+gridPart :: Monad m => Char -> ParserT Sources st m ((Int, Int), Alignment)
+gridPart ch = do
+  leftColon <- option False (True <$ char ':')
+  dashes <- many1 (char ch)
+  rightColon <- option False (True <$ char ':')
+  char '+'
+  let lengthDashes = length dashes + (if leftColon then 1 else 0) +
+                       (if rightColon then 1 else 0)
+  let alignment = case (leftColon, rightColon) of
+                       (True, True)   -> AlignCenter
+                       (True, False)  -> AlignLeft
+                       (False, True)  -> AlignRight
+                       (False, False) -> AlignDefault
+  return ((lengthDashes, lengthDashes + 1), alignment)
+
+gridDashedLines :: Monad m => Char -> ParserT Sources st m [((Int, Int), Alignment)]
+gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
+
+removeFinalBar :: Text -> Text
+removeFinalBar = T.dropWhileEnd go . T.dropWhileEnd (=='|')
+  where
+    go c = T.any (== c) " \t"
+
+-- | Separator between rows of grid table.
+gridTableSep :: Monad m => Char -> ParserT Sources st m Char
+gridTableSep ch = try $ gridDashedLines ch >> return '\n'
+
+-- | Parse header for a grid table.
+gridTableHeader :: (Monad m, Monad mf, HasLastStrPosition st)
+                => Bool -- ^ Headerless table
+                -> ParserT Sources st m (mf Blocks)
+                -> ParserT Sources st m (mf [Blocks], [Alignment], [Int])
+gridTableHeader True _ = do
+  optional blanklines
+  dashes <- gridDashedLines '-'
+  let aligns = map snd dashes
+  let lines'   = map (snd . fst) dashes
+  let indices  = scanl (+) 0 lines'
+  return (return [], aligns, indices)
+gridTableHeader False blocks = try $ do
+  optional blanklines
+  dashes <- gridDashedLines '-'
+  rawContent  <- many1 (notFollowedBy (gridTableSep '=') >> char '|' >>
+                           T.pack <$> many1Till anyChar newline)
+  underDashes <- gridDashedLines '='
+  guard $ length dashes == length underDashes
+  let lines'   = map (snd . fst) underDashes
+  let indices  = scanl (+) 0 lines'
+  let aligns   = map snd underDashes
+  let rawHeads = map (T.unlines . map trim) $ transpose
+                       $ map (gridTableSplitLine indices) rawContent
+  heads <- sequence <$> mapM (parseFromString' blocks . trim) rawHeads
+  return (heads, aligns, indices)
+
+gridTableRawLine :: (Stream s m Char, UpdateSourcePos s Char) => [Int] -> ParserT s st m [Text]
+gridTableRawLine indices = do
+  char '|'
+  line <- many1Till anyChar newline
+  return (gridTableSplitLine indices $ T.pack line)
+
+-- | Parse row of grid table.
+gridTableRow :: (Monad m, Monad mf, HasLastStrPosition st)
+             => ParserT Sources st m (mf Blocks)
+             -> [Int]
+             -> ParserT Sources st m (mf [Blocks])
+gridTableRow blocks indices = do
+  colLines <- many1 (gridTableRawLine indices)
+  let cols = map ((<> "\n") . T.unlines . removeOneLeadingSpace) $
+               transpose colLines
+      compactifyCell bs = case compactify [bs] of
+                            []  -> mempty
+                            x:_ -> x
+  cells <- sequence <$> mapM (parseFromString' blocks) cols
+  return $ fmap (map compactifyCell) cells
+
+removeOneLeadingSpace :: [Text] -> [Text]
+removeOneLeadingSpace xs =
+  if all startsWithSpace xs
+     then map (T.drop 1) xs
+     else xs
+   where startsWithSpace t = case T.uncons t of
+           Nothing     -> True
+           Just (c, _) -> c == ' '
+
+-- | Parse footer for a grid table.
+gridTableFooter :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
+gridTableFooter = optional blanklines
+
+---
+
+-- | Parse a table using 'headerParser', 'rowParser',
+-- 'lineParser', and 'footerParser'.
+tableWith :: (Stream s m Char, UpdateSourcePos s Char,
+              HasReaderOptions st, Monad mf)
+          => ParserT s st m (mf [Blocks], [Alignment], [Int])
+          -> ([Int] -> ParserT s st m (mf [Blocks]))
+          -> ParserT s st m sep
+          -> ParserT s st m end
+          -> ParserT s st m (mf Blocks)
+tableWith headerParser rowParser lineParser footerParser = try $ do
+  (aligns, widths, heads, rows) <- tableWith' headerParser rowParser
+                                                lineParser footerParser
+  let th = TableHead nullAttr <$> heads
+      tb = (:[]) . TableBody nullAttr 0 [] <$> rows
+      tf = pure $ TableFoot nullAttr []
+  return $ B.table B.emptyCaption (zip aligns (map fromWidth widths)) <$> th <*> tb <*> tf
+  where
+    fromWidth n
+      | n > 0     = ColWidth n
+      | otherwise = ColWidthDefault
+
+type TableComponents mf = ([Alignment], [Double], mf [Row], mf [Row])
+
+tableWith' :: (Stream s m Char, UpdateSourcePos s Char,
+               HasReaderOptions st, Monad mf)
+           => ParserT s st m (mf [Blocks], [Alignment], [Int])
+           -> ([Int] -> ParserT s st m (mf [Blocks]))
+           -> ParserT s st m sep
+           -> ParserT s st m end
+           -> ParserT s st m (TableComponents mf)
+tableWith' headerParser rowParser lineParser footerParser = try $ do
+    (heads, aligns, indices) <- headerParser
+    lines' <- sequence <$> rowParser indices `sepEndBy1` lineParser
+    footerParser
+    numColumns <- getOption readerColumns
+    let widths = if null indices
+                    then replicate (length aligns) 0.0
+                    else widthsFromIndices numColumns indices
+    let toRow =  Row nullAttr . map B.simpleCell
+        toHeaderRow l = [toRow l | not (null l)]
+    return (aligns, widths, toHeaderRow <$> heads, map toRow <$> lines')
+
+-- Calculate relative widths of table columns, based on indices
+widthsFromIndices :: Int      -- Number of columns on terminal
+                  -> [Int]    -- Indices
+                  -> [Double] -- Fractional relative sizes of columns
+widthsFromIndices _ [] = []
+widthsFromIndices numColumns' indices =
+  let numColumns = max numColumns' (if null indices then 0 else last indices)
+      lengths' = zipWith (-) indices (0:indices)
+      lengths  = reverse $
+                 case reverse lengths' of
+                      []       -> []
+                      [x]      -> [x]
+                      -- compensate for the fact that intercolumn
+                      -- spaces are counted in widths of all columns
+                      -- but the last...
+                      (x:y:zs) -> if x < y && y - x <= 2
+                                     then y:y:zs
+                                     else x:y:zs
+      totLength = sum lengths
+      quotient = if totLength > numColumns
+                   then fromIntegral totLength
+                   else fromIntegral numColumns
+      fracs = map (\l -> fromIntegral l / quotient) lengths in
+  tail fracs
--- a/src/Text/Pandoc/Parsing/Lists.hs
+++ b/src/Text/Pandoc/Parsing/Lists.hs
@ -0,0 +1,215 @@
+{-# LANGUAGE FlexibleContexts           #-}
+{-# LANGUAGE OverloadedStrings          #-}
+{- |
+Module      : Text.Pandoc.Parsing.Lists
+Copyright   : © 2006-2022 John MacFarlane
+License     : GPL-2.0-or-later
+Maintainer  : John MacFarlane <jgm@berkeley.edu>
+
+Parsers for list markers.
+-}
+
+module Text.Pandoc.Parsing.Lists
+  ( anyOrderedListMarker
+  , decimal
+  , indentWith
+  , lowerAlpha
+  , lowerRoman
+  , orderedListMarker
+  , romanNumeral
+  , upperAlpha
+  , upperRoman
+  )
+where
+
+import Data.Char
+  ( isAsciiUpper
+  , isAsciiLower
+  , ord
+  , toLower
+  )
+import Data.Maybe (fromMaybe)
+import Text.Pandoc.Definition
+  ( ListNumberDelim(..)
+  , ListAttributes
+  , ListNumberStyle(..)
+  )
+import Text.Pandoc.Shared (safeRead)
+import Text.Pandoc.Sources
+import Text.Parsec
+  ( (<|>)
+  , Stream(..)
+  , choice
+  , getState
+  , lookAhead
+  , many
+  , many1
+  , option
+  , try
+  , updateState
+  )
+import Text.Pandoc.Parsing.Combinators
+import Text.Pandoc.Parsing.State
+import Text.Pandoc.Parsing.Types (ParserT)
+
+import qualified Data.Map as M
+import qualified Data.Text as T
+
+-- | Parses a roman numeral (uppercase or lowercase), returns number.
+romanNumeral :: (Stream s m Char, UpdateSourcePos s Char) => Bool                  -- ^ Uppercase if true
+             -> ParserT s st m Int
+romanNumeral upperCase = do
+    let rchar uc = char $ if upperCase then uc else toLower uc
+    let one         = rchar 'I'
+    let five        = rchar 'V'
+    let ten         = rchar 'X'
+    let fifty       = rchar 'L'
+    let hundred     = rchar 'C'
+    let fivehundred = rchar 'D'
+    let thousand    = rchar 'M'
+    lookAhead $ choice [one, five, ten, fifty, hundred, fivehundred, thousand]
+    thousands <- (1000 *) . length <$> many thousand
+    ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900
+    fivehundreds <- option 0 $ 500 <$ fivehundred
+    fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400
+    hundreds <- (100 *) . length <$> many hundred
+    nineties <- option 0 $ try $ ten >> hundred >> return 90
+    fifties <- option 0 (50 <$ fifty)
+    forties <- option 0 $ try $ ten >> fifty >> return 40
+    tens <- (10 *) . length <$> many ten
+    nines <- option 0 $ try $ one >> ten >> return 9
+    fives <- option 0 (5 <$ five)
+    fours <- option 0 $ try $ one >> five >> return 4
+    ones <- length <$> many one
+    let total = thousands + ninehundreds + fivehundreds + fourhundreds +
+                hundreds + nineties + fifties + forties + tens + nines +
+                fives + fours + ones
+    if total == 0
+       then Prelude.fail "not a roman numeral"
+       else return total
+
+-- | Parses an uppercase roman numeral and returns (UpperRoman, number).
+upperRoman :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
+upperRoman = do
+  num <- romanNumeral True
+  return (UpperRoman, num)
+
+-- | Parses a lowercase roman numeral and returns (LowerRoman, number).
+lowerRoman :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
+lowerRoman = do
+  num <- romanNumeral False
+  return (LowerRoman, num)
+
+-- | Parses a decimal numeral and returns (Decimal, number).
+decimal :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
+decimal = do
+  num <- many1 digit
+  return (Decimal, fromMaybe 1 $ safeRead $ T.pack num)
+
+-- | Parses a '@' and optional label and
+-- returns (DefaultStyle, [next example number]).  The next
+-- example number is incremented in parser state, and the label
+-- (if present) is added to the label table.
+exampleNum :: (Stream s m Char, UpdateSourcePos s Char)
+           => ParserT s ParserState m (ListNumberStyle, Int)
+exampleNum = do
+  char '@'
+  lab <- mconcat . map T.pack <$>
+                    many (many1 alphaNum <|>
+                          try (do c <- char '_' <|> char '-'
+                                  cs <- many1 alphaNum
+                                  return (c:cs)))
+  st <- getState
+  let num = stateNextExample st
+  let newlabels = if T.null lab
+                     then stateExamples st
+                     else M.insert lab num $ stateExamples st
+  updateState $ \s -> s{ stateNextExample = num + 1
+                       , stateExamples    = newlabels }
+  return (Example, num)
+
+-- | Parses a '#' returns (DefaultStyle, 1).
+defaultNum :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
+defaultNum = do
+  char '#'
+  return (DefaultStyle, 1)
+
+-- | Parses a lowercase letter and returns (LowerAlpha, number).
+lowerAlpha :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
+lowerAlpha = do
+  ch <- satisfy isAsciiLower
+  return (LowerAlpha, ord ch - ord 'a' + 1)
+
+-- | Parses an uppercase letter and returns (UpperAlpha, number).
+upperAlpha :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
+upperAlpha = do
+  ch <- satisfy isAsciiUpper
+  return (UpperAlpha, ord ch - ord 'A' + 1)
+
+-- | Parses a roman numeral i or I
+romanOne :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
+romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
+           (char 'I' >> return (UpperRoman, 1))
+
+-- | Parses an ordered list marker and returns list attributes.
+anyOrderedListMarker :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s ParserState m ListAttributes
+anyOrderedListMarker = choice
+  [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
+                           numParser <- [decimal, exampleNum, defaultNum, romanOne,
+                           lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
+
+-- | Parses a list number (num) followed by a period, returns list attributes.
+inPeriod :: (Stream s m Char, UpdateSourcePos s Char)
+         => ParserT s st m (ListNumberStyle, Int)
+         -> ParserT s st m ListAttributes
+inPeriod num = try $ do
+  (style, start) <- num
+  char '.'
+  let delim = if style == DefaultStyle
+                 then DefaultDelim
+                 else Period
+  return (start, style, delim)
+
+-- | Parses a list number (num) followed by a paren, returns list attributes.
+inOneParen :: (Stream s m Char, UpdateSourcePos s Char)
+           => ParserT s st m (ListNumberStyle, Int)
+           -> ParserT s st m ListAttributes
+inOneParen num = try $ do
+  (style, start) <- num
+  char ')'
+  return (start, style, OneParen)
+
+-- | Parses a list number (num) enclosed in parens, returns list attributes.
+inTwoParens :: (Stream s m Char, UpdateSourcePos s Char)
+            => ParserT s st m (ListNumberStyle, Int)
+            -> ParserT s st m ListAttributes
+inTwoParens num = try $ do
+  char '('
+  (style, start) <- num
+  char ')'
+  return (start, style, TwoParens)
+
+-- | Parses an ordered list marker with a given style and delimiter,
+-- returns number.
+orderedListMarker :: (Stream s m Char, UpdateSourcePos s Char)
+                  => ListNumberStyle
+                  -> ListNumberDelim
+                  -> ParserT s ParserState m Int
+orderedListMarker style delim = do
+  let num = defaultNum <|>  -- # can continue any kind of list
+            case style of
+               DefaultStyle -> decimal
+               Example      -> exampleNum
+               Decimal      -> decimal
+               UpperRoman   -> upperRoman
+               LowerRoman   -> lowerRoman
+               UpperAlpha   -> upperAlpha
+               LowerAlpha   -> lowerAlpha
+  let context = case delim of
+               DefaultDelim -> inPeriod
+               Period       -> inPeriod
+               OneParen     -> inOneParen
+               TwoParens    -> inTwoParens
+  (start, _, _) <- context num
+  return start
+
--- a/src/Text/Pandoc/Parsing/Math.hs
+++ b/src/Text/Pandoc/Parsing/Math.hs
@ -0,0 +1,96 @@
+{-# LANGUAGE FlexibleContexts           #-}
+{-# LANGUAGE OverloadedStrings          #-}
+{- |
+Module      : Text.Pandoc.Parsing.Math
+Copyright   : © 2006-2022 John MacFarlane
+License     : GPL-2.0-or-later
+Maintainer  : John MacFarlane <jgm@berkeley.edu>
+
+Parsing of LaTeX math.
+-}
+
+module Text.Pandoc.Parsing.Math
+  ( mathDisplay
+  , mathInline
+  )
+where
+
+import Control.Monad (mzero, when)
+import Data.Text (Text)
+import Text.Parsec ((<|>), Stream(..), notFollowedBy, skipMany, try)
+import Text.Pandoc.Options
+  ( Extension(Ext_tex_math_dollars, Ext_tex_math_single_backslash,
+              Ext_tex_math_double_backslash) )
+import Text.Pandoc.Parsing.Capabilities (HasReaderOptions, guardEnabled)
+import Text.Pandoc.Parsing.Combinators
+import Text.Pandoc.Parsing.Types (ParserT)
+import Text.Pandoc.Shared (trimMath)
+import Text.Pandoc.Sources
+  (UpdateSourcePos, anyChar, char, digit, newline, satisfy, space, string)
+
+import qualified Data.Text as T
+
+mathInlineWith :: (Stream s m Char, UpdateSourcePos s Char)  => Text -> Text -> ParserT s st m Text
+mathInlineWith op cl = try $ do
+  textStr op
+  when (op == "$") $ notFollowedBy space
+  words' <- many1Till (
+                       (T.singleton <$>
+                          satisfy (\c -> not (isSpaceChar c || c == '\\')))
+                   <|> (char '\\' >>
+                           -- This next clause is needed because \text{..} can
+                           -- contain $, \(\), etc.
+                           (try (string "text" >>
+                                 (("\\text" <>) <$> inBalancedBraces 0 ""))
+                            <|>  (\c -> T.pack ['\\',c]) <$> anyChar))
+                   <|> do (blankline <* notFollowedBy' blankline) <|>
+                             (spaceChar <* skipMany spaceChar)
+                          notFollowedBy (char '$')
+                          return " "
+                    ) (try $ textStr cl)
+  notFollowedBy digit  -- to prevent capture of $5
+  return $ trimMath $ T.concat words'
+ where
+  inBalancedBraces :: (Stream s m Char, UpdateSourcePos s Char) => Int -> Text -> ParserT s st m Text
+  inBalancedBraces n = fmap T.pack . inBalancedBraces' n . T.unpack
+
+  inBalancedBraces' :: (Stream s m Char, UpdateSourcePos s Char) => Int -> String -> ParserT s st m String
+  inBalancedBraces' 0 "" = do
+    c <- anyChar
+    if c == '{'
+       then inBalancedBraces' 1 "{"
+       else mzero
+  inBalancedBraces' 0 s = return $ reverse s
+  inBalancedBraces' numOpen ('\\':xs) = do
+    c <- anyChar
+    inBalancedBraces' numOpen (c:'\\':xs)
+  inBalancedBraces' numOpen xs = do
+    c <- anyChar
+    case c of
+         '}' -> inBalancedBraces' (numOpen - 1) (c:xs)
+         '{' -> inBalancedBraces' (numOpen + 1) (c:xs)
+         _   -> inBalancedBraces' numOpen (c:xs)
+
+mathDisplayWith :: (Stream s m Char, UpdateSourcePos s Char) => Text -> Text -> ParserT s st m Text
+mathDisplayWith op cl = try $ fmap T.pack $ do
+  textStr op
+  many1Till (satisfy (/= '\n') <|> (newline <* notFollowedBy' blankline))
+            (try $ textStr cl)
+
+mathDisplay :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
+            => ParserT s st m Text
+mathDisplay =
+      (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
+  <|> (guardEnabled Ext_tex_math_single_backslash >>
+       mathDisplayWith "\\[" "\\]")
+  <|> (guardEnabled Ext_tex_math_double_backslash >>
+       mathDisplayWith "\\\\[" "\\\\]")
+
+mathInline :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
+           => ParserT s st m Text
+mathInline =
+      (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
+  <|> (guardEnabled Ext_tex_math_single_backslash >>
+       mathInlineWith "\\(" "\\)")
+  <|> (guardEnabled Ext_tex_math_double_backslash >>
+       mathInlineWith "\\\\(" "\\\\)")
--- a/src/Text/Pandoc/Parsing/Smart.hs
+++ b/src/Text/Pandoc/Parsing/Smart.hs
@ -0,0 +1,145 @@
+{-# LANGUAGE FlexibleContexts           #-}
+{-# LANGUAGE OverloadedStrings          #-}
+{- |
+Module      : Text.Pandoc.Parsing.Combinators
+Copyright   : © 2006-2022 John MacFarlane
+License     : GPL-2.0-or-later
+Maintainer  : John MacFarlane <jgm@berkeley.edu>
+
+Smart parsing of quotes, dashes, and other character combinations.
+-}
+
+module Text.Pandoc.Parsing.Smart
+  ( apostrophe
+  , dash
+  , doubleCloseQuote
+  , doubleQuoteEnd
+  , doubleQuoteStart
+  , doubleQuoted
+  , ellipses
+  , singleQuoteEnd
+  , singleQuoteStart
+  , singleQuoted
+  , smartPunctuation
+  )
+where
+
+import Control.Monad (guard , void)
+import Text.Pandoc.Builder (Inlines)
+import Text.Pandoc.Options
+  ( extensionEnabled
+  , Extension(Ext_old_dashes, Ext_smart)
+  , ReaderOptions(readerExtensions) )
+import Text.Pandoc.Sources
+import Text.Pandoc.Parsing.Capabilities
+import Text.Pandoc.Parsing.Combinators
+import Text.Pandoc.Parsing.Types (ParserT)
+import Text.Parsec
+  ( (<|>)
+  , Stream(..)
+  , choice
+  , lookAhead
+  , manyTill
+  , notFollowedBy
+  , try
+  )
+
+import qualified Text.Pandoc.Builder as B
+
+smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st,
+                     HasQuoteContext st m,
+                     Stream s m Char, UpdateSourcePos s Char)
+                 => ParserT s st m Inlines
+                 -> ParserT s st m Inlines
+smartPunctuation inlineParser = do
+  guardEnabled Ext_smart
+  choice [ quoted inlineParser, apostrophe, doubleCloseQuote, dash, ellipses ]
+
+quoted :: (HasLastStrPosition st, HasQuoteContext st m,
+           Stream s m Char, UpdateSourcePos s Char)
+       => ParserT s st m Inlines
+       -> ParserT s st m Inlines
+quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
+
+singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m,
+                 Stream s m Char, UpdateSourcePos s Char)
+             => ParserT s st m Inlines
+             -> ParserT s st m Inlines
+singleQuoted inlineParser = do
+  singleQuoteStart
+  (B.singleQuoted . mconcat <$>
+    try
+     (withQuoteContext InSingleQuote (many1Till inlineParser singleQuoteEnd)))
+   <|> pure "\8217"
+
+doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st,
+                 Stream s m Char, UpdateSourcePos s Char)
+             => ParserT s st m Inlines
+             -> ParserT s st m Inlines
+doubleQuoted inlineParser = do
+  doubleQuoteStart
+  (B.doubleQuoted . mconcat <$>
+    try
+     (withQuoteContext InDoubleQuote (manyTill inlineParser doubleQuoteEnd)))
+   <|> pure (B.str "\8220")
+
+charOrRef :: (Stream s m Char, UpdateSourcePos s Char) => [Char] -> ParserT s st m Char
+charOrRef cs =
+  oneOf cs <|> try (do c <- characterReference
+                       guard (c `elem` cs)
+                       return c)
+
+singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m,
+                     Stream s m Char, UpdateSourcePos s Char)
+                 => ParserT s st m ()
+singleQuoteStart = do
+  failIfInQuoteContext InSingleQuote
+  -- single quote start can't be right after str
+  guard =<< notAfterString
+  try $ do
+    charOrRef "'\8216\145"
+    void $ lookAhead (satisfy (not . isSpaceChar))
+
+singleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char)
+               => ParserT s st m ()
+singleQuoteEnd = try $ do
+  charOrRef "'\8217\146"
+  notFollowedBy alphaNum
+
+doubleQuoteStart :: (HasLastStrPosition st,
+                     HasQuoteContext st m,
+                     Stream s m Char, UpdateSourcePos s Char)
+                 => ParserT s st m ()
+doubleQuoteStart = do
+  failIfInQuoteContext InDoubleQuote
+  guard =<< notAfterString
+  try $ do charOrRef "\"\8220\147"
+           void $ lookAhead (satisfy (not . isSpaceChar))
+
+doubleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char)
+               => ParserT s st m ()
+doubleQuoteEnd = void (charOrRef "\"\8221\148")
+
+apostrophe :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines
+apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\8217")
+
+doubleCloseQuote :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines
+doubleCloseQuote = B.str "\8221" <$ char '"'
+
+ellipses :: (Stream s m Char, UpdateSourcePos s Char)
+         => ParserT s st m Inlines
+ellipses = try (string "..." >> return (B.str "\8230"))
+
+dash :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
+     => ParserT s st m Inlines
+dash = try $ do
+  oldDashes <- extensionEnabled Ext_old_dashes <$> getOption readerExtensions
+  if oldDashes
+     then do
+       char '-'
+       (char '-' >> return (B.str "\8212"))
+         <|> (lookAhead digit >> return (B.str "\8211"))
+     else do
+       string "--"
+       (char '-' >> return (B.str "\8212"))
+         <|> return (B.str "\8211")
--- a/src/Text/Pandoc/Parsing/State.hs
+++ b/src/Text/Pandoc/Parsing/State.hs
@ -0,0 +1,191 @@
+{-# LANGUAGE FlexibleInstances          #-}
+{-# LANGUAGE MultiParamTypeClasses      #-}
+{-# LANGUAGE OverloadedStrings          #-}
+{- |
+   Module      : Text.Pandoc.Parsing
+   Copyright   : Copyright (C) 2006-2022 John MacFarlane
+   License     : GPL-2.0-or-later
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+
+A default parser state with commonly used properties.
+-}
+
+module Text.Pandoc.Parsing.State
+  ( ParserState (..)
+  , ParserContext (..)
+  , HeaderType (..)
+  , NoteTable
+  , NoteTable'
+  , Key (..)
+  , KeyTable
+  , SubstTable
+  , defaultParserState
+  , toKey
+  )
+where
+
+import Data.Default (Default (def))
+import Data.Text (Text)
+import Text.Parsec (SourcePos, getState, setState)
+import Text.Pandoc.Builder (Blocks, HasMeta (..), Inlines)
+import Text.Pandoc.Definition (Attr, Meta, Target, nullMeta)
+import Text.Pandoc.Logging (LogMessage)
+import Text.Pandoc.Options (ReaderOptions)
+import Text.Pandoc.Parsing.Capabilities
+import Text.Pandoc.Parsing.Types
+import Text.Pandoc.Readers.LaTeX.Types (Macro)
+
+import qualified Data.Map as M
+import qualified Data.Set as Set
+import qualified Data.Text as T
+
+-- | Parsing options.
+data ParserState = ParserState
+  { stateOptions         :: ReaderOptions -- ^ User options
+  , stateParserContext   :: ParserContext -- ^ Inside list?
+  , stateQuoteContext    :: QuoteContext  -- ^ Inside quoted environment?
+  , stateAllowLinks      :: Bool          -- ^ Allow parsing of links
+  , stateAllowLineBreaks :: Bool          -- ^ Allow parsing of line breaks
+  , stateMaxNestingLevel :: Int           -- ^ Max # of nested Strong/Emph
+  , stateLastStrPos      :: Maybe SourcePos -- ^ Position after last str parsed
+  , stateKeys            :: KeyTable      -- ^ List of reference keys
+  , stateHeaderKeys      :: KeyTable      -- ^ List of implicit header ref keys
+  , stateSubstitutions   :: SubstTable    -- ^ List of substitution references
+  , stateNotes           :: NoteTable     -- ^ List of notes (raw bodies)
+  , stateNotes'          :: NoteTable'    -- ^ List of notes (parsed bodies)
+  , stateNoteRefs        :: Set.Set Text  -- ^ List of note references used
+  , stateInNote          :: Bool          -- ^ True if parsing note contents
+  , stateNoteNumber      :: Int           -- ^ Last note number for citations
+  , stateMeta            :: Meta          -- ^ Document metadata
+  , stateMeta'           :: Future ParserState Meta -- ^ Document metadata
+  , stateCitations       :: M.Map Text Text -- ^ RST-style citations
+  , stateHeaderTable     :: [HeaderType]  -- ^ Ordered list of header types used
+  , stateIdentifiers     :: Set.Set Text  -- ^ Header identifiers used
+  , stateNextExample     :: Int           -- ^ Number of next example
+  , stateExamples        :: M.Map Text Int -- ^ Map from example labels to numbers
+  , stateMacros          :: M.Map Text Macro -- ^ Table of macros defined so far
+  , stateRstDefaultRole  :: Text          -- ^ Current rST default
+                                           -- interpreted text role
+  , stateRstHighlight    :: Maybe Text    -- ^ Current rST literal block
+                                           -- language
+  , stateRstCustomRoles  :: M.Map Text (Text, Maybe Text, Attr)
+    -- ^ Current rST cust text roles;
+    -- Triple represents:) Base role 2) Optional format (only for :raw:
+    -- roles) 3) Addition classes (rest of Attr is unused)).
+  , stateCaption         :: Maybe Inlines -- ^ Caption in current environment
+  , stateInHtmlBlock     :: Maybe Text    -- ^ Tag type of HTML block being parsed
+  , stateFencedDivLevel  :: Int           -- ^ Depth of fenced div
+  , stateContainers      :: [Text]        -- ^ parent include files
+  , stateLogMessages     :: [LogMessage]  -- ^ log messages
+  , stateMarkdownAttribute :: Bool        -- ^ True if in markdown=1 context
+  }
+
+instance Default ParserState where
+  def = defaultParserState
+
+instance HasMeta ParserState where
+  setMeta field val st =
+    st{ stateMeta = setMeta field val $ stateMeta st }
+  deleteMeta field st =
+    st{ stateMeta = deleteMeta field $ stateMeta st }
+
+instance HasReaderOptions ParserState where
+  extractReaderOptions = stateOptions
+
+instance Monad m => HasQuoteContext ParserState m where
+  getQuoteContext = stateQuoteContext <$> getState
+  withQuoteContext context parser = do
+    oldState <- getState
+    let oldQuoteContext = stateQuoteContext oldState
+    setState oldState { stateQuoteContext = context }
+    result <- parser
+    newState <- getState
+    setState newState { stateQuoteContext = oldQuoteContext }
+    return result
+
+instance HasIdentifierList ParserState where
+  extractIdentifierList     = stateIdentifiers
+  updateIdentifierList f st = st{ stateIdentifiers = f $ stateIdentifiers st }
+
+instance HasMacros ParserState where
+  extractMacros        = stateMacros
+  updateMacros f st    = st{ stateMacros = f $ stateMacros st }
+
+instance HasLastStrPosition ParserState where
+  setLastStrPos pos st = st{ stateLastStrPos = pos }
+  getLastStrPos st     = stateLastStrPos st
+
+instance HasLogMessages ParserState where
+  addLogMessage msg st = st{ stateLogMessages = msg : stateLogMessages st }
+  getLogMessages st = reverse $ stateLogMessages st
+
+instance HasIncludeFiles ParserState where
+  getIncludeFiles = stateContainers
+  addIncludeFile f s = s{ stateContainers = f : stateContainers s }
+  dropLatestIncludeFile s = s { stateContainers = drop 1 $ stateContainers s }
+
+data ParserContext
+    = ListItemState   -- ^ Used when running parser on list item contents
+    | NullState       -- ^ Default state
+    deriving (Eq, Show)
+
+data HeaderType
+    = SingleHeader Char  -- ^ Single line of characters underneath
+    | DoubleHeader Char  -- ^ Lines of characters above and below
+    deriving (Eq, Show)
+
+defaultParserState :: ParserState
+defaultParserState = ParserState
+  { stateOptions         = def
+  , stateParserContext   = NullState
+  , stateQuoteContext    = NoQuote
+  , stateAllowLinks      = True
+  , stateAllowLineBreaks = True
+  , stateMaxNestingLevel = 6
+  , stateLastStrPos      = Nothing
+  , stateKeys            = M.empty
+  , stateHeaderKeys      = M.empty
+  , stateSubstitutions   = M.empty
+  , stateNotes           = []
+  , stateNotes'          = M.empty
+  , stateNoteRefs        = Set.empty
+  , stateInNote          = False
+  , stateNoteNumber      = 0
+  , stateMeta            = nullMeta
+  , stateMeta'           = return nullMeta
+  , stateCitations       = M.empty
+  , stateHeaderTable     = []
+  , stateIdentifiers     = Set.empty
+  , stateNextExample     = 1
+  , stateExamples        = M.empty
+  , stateMacros          = M.empty
+  , stateRstDefaultRole  = "title-reference"
+  , stateRstHighlight    = Nothing
+  , stateRstCustomRoles  = M.empty
+  , stateCaption         = Nothing
+  , stateInHtmlBlock     = Nothing
+  , stateFencedDivLevel  = 0
+  , stateContainers      = []
+  , stateLogMessages     = []
+  , stateMarkdownAttribute = False
+  }
+
+type NoteTable = [(Text, Text)]
+
+type NoteTable' = M.Map Text (SourcePos, Future ParserState Blocks)
+-- used in markdown reader
+
+newtype Key = Key Text deriving (Show, Read, Eq, Ord)
+
+toKey :: Text -> Key
+toKey = Key . T.toLower . T.unwords . T.words . unbracket
+  where unbracket t
+          | Just ('[', t') <- T.uncons t
+          , Just (t'', ']') <- T.unsnoc t'
+          = t''
+          | otherwise
+          = t
+
+type KeyTable = M.Map Key (Target, Attr)
+
+type SubstTable = M.Map Key Inlines
--- a/src/Text/Pandoc/Parsing/Types.hs
+++ b/src/Text/Pandoc/Parsing/Types.hs
@ -0,0 +1,57 @@
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{- |
+   Module      : Text.Pandoc.Parsing
+   Copyright   : Copyright (C) 2006-2022 John MacFarlane
+   License     : GPL-2.0-or-later
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+
+Types and type-related functions for parsers.
+-}
+
+module Text.Pandoc.Parsing.Types
+  ( Parser
+  , ParserT
+  , Future (..)
+  , runF
+  , askF
+  , asksF
+  , returnF
+  )
+where
+
+import Control.Applicative (liftA2)
+import Control.Monad.Reader
+  ( asks, runReader, MonadReader(ask), Reader, ReaderT(ReaderT) )
+import Text.Parsec ( Parsec , ParsecT )
+
+-- | Generic parser type used by many pandoc readers.
+type Parser t s = Parsec t s
+
+-- | Generic parser transformer used by many pandoc readers.
+type ParserT = ParsecT
+
+-- | Reader monad wrapping the parser state. This is used to possibly
+-- delay evaluation until all relevant information has been parsed and
+-- made available in the parser state.
+newtype Future s a = Future { runDelayed :: Reader s a }
+  deriving (Monad, Applicative, Functor)
+
+instance Semigroup a => Semigroup (Future s a) where
+  (<>) = liftA2 (<>)
+
+instance (Semigroup a, Monoid a) => Monoid (Future s a) where
+  mempty = return mempty
+  mappend = (<>)
+
+-- | Run a delayed action with the given state.
+runF :: Future s a -> s -> a
+runF = runReader . runDelayed
+
+askF :: Future s s
+askF = Future ask
+
+asksF :: (s -> a) -> Future s a
+asksF f = Future $ asks f
+
+returnF :: Monad m => a -> m (Future s a)
+returnF = return . return