Added Text.Pandoc.Readers.LaTeX.Parsing (unexported).

This collects some of the general-purpose code from the LaTeX reader, with the aim of making the module smaller. (We've been having out-of-memory issues compiling this module on CI.)
2018-09-28 10:33:32 -07:00 · 2018-09-28 10:33:32 -07:00 · 9dac993835
commit 9dac993835
parent 4f9ab7e032
3 changed files with 667 additions and 557 deletions
--- a/pandoc.cabal
+++ b/pandoc.cabal
@ -508,6 +508,7 @@ library
                   Text.Pandoc.Readers.Docx.Util,
                   Text.Pandoc.Readers.Docx.StyleMap,
                   Text.Pandoc.Readers.Docx.Fields,
                   Text.Pandoc.Readers.LaTeX.Parsing,
                   Text.Pandoc.Readers.Odt.Base,
                   Text.Pandoc.Readers.Odt.Namespaces,
                   Text.Pandoc.Readers.Odt.StyleReader,
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@ -47,8 +47,7 @@ import Prelude
 import Control.Applicative (many, optional, (<|>))
 import Control.Monad
 import Control.Monad.Except (throwError)
-import Control.Monad.Trans (lift)
+import Data.Char (isDigit, isLetter, toLower, toUpper)
 import Data.Char (chr, isAlphaNum, isDigit, isLetter, ord, toLower, toUpper)
 import Data.Default
 import Data.List (intercalate, isPrefixOf)
 import qualified Data.Map as M
@ -63,7 +62,7 @@ import Text.Pandoc.Builder
 import Text.Pandoc.Class (PandocMonad, PandocPure, getResourcePath, lookupEnv,
                          readFileFromDirs, report, setResourcePath,
                          setTranslations, translateTerm, trace)
-import Text.Pandoc.Error (PandocError (PandocMacroLoop, PandocParseError, PandocParsecError))
+import Text.Pandoc.Error (PandocError ( PandocParseError, PandocParsecError))
 import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension)
 import Text.Pandoc.ImageSize (numUnit, showFl)
 import Text.Pandoc.Logging
@ -72,10 +71,10 @@ import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
                            optional, space, spaces, withRaw, (<|>))
 import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
                                        ArgSpec (..), Tok (..), TokType (..))
 import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Shared
 import qualified Text.Pandoc.Translations as Translations
 import Text.Pandoc.Walk
 import Text.Parsec.Pos
 import qualified Text.Pandoc.Builder as B
 -- for debugging:
@ -137,151 +136,6 @@ resolveRefs _ x = x
 --        Left e  -> error (show e)
 --        Right r -> return r
 newtype DottedNum = DottedNum [Int]
  deriving (Show)
 renderDottedNum :: DottedNum -> String
 renderDottedNum (DottedNum xs) =
  intercalate "." (map show xs)
 incrementDottedNum :: Int -> DottedNum -> DottedNum
 incrementDottedNum level (DottedNum ns) = DottedNum $
  case reverse (take level (ns ++ repeat 0)) of
       (x:xs) -> reverse (x+1 : xs)
       []     -> []  -- shouldn't happen
 data LaTeXState = LaTeXState{ sOptions       :: ReaderOptions
                            , sMeta          :: Meta
                            , sQuoteContext  :: QuoteContext
                            , sMacros        :: M.Map Text Macro
                            , sContainers    :: [String]
                            , sHeaders       :: M.Map Inlines String
                            , sLogMessages   :: [LogMessage]
                            , sIdentifiers   :: Set.Set String
                            , sVerbatimMode  :: Bool
                            , sCaption       :: (Maybe Inlines, Maybe String)
                            , sInListItem    :: Bool
                            , sInTableCell   :: Bool
                            , sLastHeaderNum :: DottedNum
                            , sLastFigureNum :: DottedNum
                            , sLabels        :: M.Map String [Inline]
                            , sHasChapters   :: Bool
                            , sToggles       :: M.Map String Bool
                            }
     deriving Show
 defaultLaTeXState :: LaTeXState
 defaultLaTeXState = LaTeXState{ sOptions       = def
                              , sMeta          = nullMeta
                              , sQuoteContext  = NoQuote
                              , sMacros        = M.empty
                              , sContainers    = []
                              , sHeaders       = M.empty
                              , sLogMessages   = []
                              , sIdentifiers   = Set.empty
                              , sVerbatimMode  = False
                              , sCaption       = (Nothing, Nothing)
                              , sInListItem    = False
                              , sInTableCell   = False
                              , sLastHeaderNum = DottedNum []
                              , sLastFigureNum = DottedNum []
                              , sLabels        = M.empty
                              , sHasChapters   = False
                              , sToggles       = M.empty
                              }
 instance PandocMonad m => HasQuoteContext LaTeXState m where
  getQuoteContext = sQuoteContext <$> getState
  withQuoteContext context parser = do
    oldState <- getState
    let oldQuoteContext = sQuoteContext oldState
    setState oldState { sQuoteContext = context }
    result <- parser
    newState <- getState
    setState newState { sQuoteContext = oldQuoteContext }
    return result
 instance HasLogMessages LaTeXState where
  addLogMessage msg st = st{ sLogMessages = msg : sLogMessages st }
  getLogMessages st = reverse $ sLogMessages st
 instance HasIdentifierList LaTeXState where
  extractIdentifierList     = sIdentifiers
  updateIdentifierList f st = st{ sIdentifiers = f $ sIdentifiers st }
 instance HasIncludeFiles LaTeXState where
  getIncludeFiles = sContainers
  addIncludeFile f s = s{ sContainers = f : sContainers s }
  dropLatestIncludeFile s = s { sContainers = drop 1 $ sContainers s }
 instance HasHeaderMap LaTeXState where
  extractHeaderMap     = sHeaders
  updateHeaderMap f st = st{ sHeaders = f $ sHeaders st }
 instance HasMacros LaTeXState where
  extractMacros  st  = sMacros st
  updateMacros f st  = st{ sMacros = f (sMacros st) }
 instance HasReaderOptions LaTeXState where
  extractReaderOptions = sOptions
 instance HasMeta LaTeXState where
  setMeta field val st =
    st{ sMeta = setMeta field val $ sMeta st }
  deleteMeta field st =
    st{ sMeta = deleteMeta field $ sMeta st }
 instance Default LaTeXState where
  def = defaultLaTeXState
 type LP m = ParserT [Tok] LaTeXState m
 withVerbatimMode :: PandocMonad m => LP m a -> LP m a
 withVerbatimMode parser = do
  updateState $ \st -> st{ sVerbatimMode = True }
  result <- parser
  updateState $ \st -> st{ sVerbatimMode = False }
  return result
 rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
               => Bool -> LP m a -> LP m a -> ParserT String s m (a, String)
 rawLaTeXParser retokenize parser valParser = do
  inp <- getInput
  let toks = tokenize "source" $ T.pack inp
  pstate <- getState
  let lstate = def{ sOptions = extractReaderOptions pstate }
  let lstate' = lstate { sMacros = extractMacros pstate }
  let rawparser = (,) <$> withRaw valParser <*> getState
  res' <- lift $ runParserT (snd <$> withRaw parser) lstate "chunk" toks
  case res' of
       Left _    -> mzero
       Right toks' -> do
         res <- lift $ runParserT (do when retokenize $ do
                                        -- retokenize, applying macros
                                        doMacros 0
                                        ts <- many (satisfyTok (const True))
                                        setInput ts
                                      rawparser)
                        lstate' "chunk" toks'
         case res of
              Left _    -> mzero
              Right ((val, raw), st) -> do
                updateState (updateMacros (sMacros st <>))
                _ <- takeP (T.length (untokenize toks'))
                return (val, T.unpack (untokenize raw))
 applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
            => String -> ParserT String s m String
 applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
   do let retokenize = doMacros 0 *>
             (toksToString <$> many (satisfyTok (const True)))
      pstate <- getState
      let lstate = def{ sOptions = extractReaderOptions pstate
                      , sMacros  = extractMacros pstate }
      res <- runParserT retokenize lstate "math" (tokenize "math" (T.pack s))
      case res of
           Left e   -> fail (show e)
           Right s' -> return s'
 rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s)
              => ParserT String s m String
@ -326,358 +180,6 @@ inlineCommand = do
  lookAhead (try (char '\\' >> letter))
  fst <$> rawLaTeXParser True (inlineEnvironment <|> inlineCommand') inlines
 tokenize :: SourceName -> Text -> [Tok]
 tokenize sourcename = totoks (initialPos sourcename)
 totoks :: SourcePos -> Text -> [Tok]
 totoks pos t =
  case T.uncons t of
       Nothing        -> []
       Just (c, rest)
         | c == '\n' ->
           Tok pos Newline "\n"
           : totoks (setSourceColumn (incSourceLine pos 1) 1) rest
         | isSpaceOrTab c ->
           let (sps, rest') = T.span isSpaceOrTab t
           in  Tok pos Spaces sps
               : totoks (incSourceColumn pos (T.length sps))
                 rest'
         | isAlphaNum c ->
           let (ws, rest') = T.span isAlphaNum t
           in  Tok pos Word ws
               : totoks (incSourceColumn pos (T.length ws)) rest'
         | c == '%' ->
           let (cs, rest') = T.break (== '\n') rest
           in  Tok pos Comment ("%" <> cs)
               : totoks (incSourceColumn pos (1 + T.length cs)) rest'
         | c == '\\' ->
           case T.uncons rest of
                Nothing -> [Tok pos (CtrlSeq " ") "\\"]
                Just (d, rest')
                  | isLetterOrAt d ->
                      -- \makeatletter is common in macro defs;
                      -- ideally we should make tokenization sensitive
                      -- to \makeatletter and \makeatother, but this is
                      -- probably best for now
                      let (ws, rest'') = T.span isLetterOrAt rest
                          (ss, rest''') = T.span isSpaceOrTab rest''
                      in  Tok pos (CtrlSeq ws) ("\\" <> ws <> ss)
                          : totoks (incSourceColumn pos
                               (1 + T.length ws + T.length ss)) rest'''
                  | isSpaceOrTab d || d == '\n' ->
                      let (w1, r1) = T.span isSpaceOrTab rest
                          (w2, (w3, r3)) = case T.uncons r1 of
                                          Just ('\n', r2)
                                                  -> (T.pack "\n",
                                                        T.span isSpaceOrTab r2)
                                          _ -> (mempty, (mempty, r1))
                          ws = "\\" <> w1 <> w2 <> w3
                      in  case T.uncons r3 of
                               Just ('\n', _) ->
                                 Tok pos (CtrlSeq " ") ("\\" <> w1)
                                 : totoks (incSourceColumn pos (T.length ws))
                                   r1
                               _ ->
                                 Tok pos (CtrlSeq " ") ws
                                 : totoks (incSourceColumn pos (T.length ws))
                                   r3
                  | otherwise  ->
                      Tok pos (CtrlSeq (T.singleton d)) (T.pack [c,d])
                      : totoks (incSourceColumn pos 2) rest'
         | c == '#' ->
           let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest
           in  case safeRead (T.unpack t1) of
                    Just i ->
                       Tok pos (Arg i) ("#" <> t1)
                       : totoks (incSourceColumn pos (1 + T.length t1)) t2
                    Nothing ->
                       Tok pos Symbol "#"
                       : totoks (incSourceColumn pos 1) t2
         | c == '^' ->
           case T.uncons rest of
                Just ('^', rest') ->
                  case T.uncons rest' of
                       Just (d, rest'')
                         | isLowerHex d ->
                           case T.uncons rest'' of
                                Just (e, rest''') | isLowerHex e ->
                                  Tok pos Esc2 (T.pack ['^','^',d,e])
                                  : totoks (incSourceColumn pos 4) rest'''
                                _ ->
                                  Tok pos Esc1 (T.pack ['^','^',d])
                                  : totoks (incSourceColumn pos 3) rest''
                         | d < '\128' ->
                                  Tok pos Esc1 (T.pack ['^','^',d])
                                  : totoks (incSourceColumn pos 3) rest''
                       _ -> Tok pos Symbol "^" :
                            Tok (incSourceColumn pos 1) Symbol "^" :
                            totoks (incSourceColumn pos 2) rest'
                _ -> Tok pos Symbol "^"
                     : totoks (incSourceColumn pos 1) rest
         | otherwise ->
           Tok pos Symbol (T.singleton c) : totoks (incSourceColumn pos 1) rest
 isSpaceOrTab :: Char -> Bool
 isSpaceOrTab ' '  = True
 isSpaceOrTab '\t' = True
 isSpaceOrTab _    = False
 isLetterOrAt :: Char -> Bool
 isLetterOrAt '@' = True
 isLetterOrAt c   = isLetter c
 isLowerHex :: Char -> Bool
 isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
 untokenize :: [Tok] -> Text
 untokenize = mconcat . map untoken
 untoken :: Tok -> Text
 untoken (Tok _ _ t) = t
 satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok
 satisfyTok f =
  try $ do
    res <- tokenPrim (T.unpack . untoken) updatePos matcher
    doMacros 0 -- apply macros on remaining input stream
    return res
  where matcher t | f t       = Just t
                  | otherwise = Nothing
        updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos
        updatePos _spos _ (Tok pos _ _ : _) = pos
        updatePos spos _ []                 = incSourceColumn spos 1
 doMacros :: PandocMonad m => Int -> LP m ()
 doMacros n = do
  verbatimMode <- sVerbatimMode <$> getState
  unless verbatimMode $ do
    inp <- getInput
    case inp of
         Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
          Tok _ Word name : Tok _ Symbol "}" : ts
            -> handleMacros spos name ts
         Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
          Tok _ Word name : Tok _ Symbol "}" : ts
            -> handleMacros spos ("end" <> name) ts
         Tok _ (CtrlSeq "expandafter") _ : t : ts
            -> do setInput ts
                  doMacros n
                  getInput >>= setInput . combineTok t
         Tok spos (CtrlSeq name) _ : ts
            -> handleMacros spos name ts
         _ -> return ()
  where combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts)
          | T.all isLetterOrAt w =
            Tok spos (CtrlSeq (name <> w)) (x1 <> w <> x2) : ts
              where (x1, x2) = T.break isSpaceOrTab x
        combineTok t ts = t:ts
        handleMacros spos name ts = do
                macros <- sMacros <$> getState
                case M.lookup name macros of
                     Nothing -> return ()
                     Just (Macro expansionPoint argspecs optarg newtoks) -> do
                       setInput ts
                       let matchTok (Tok _ toktype txt) =
                             satisfyTok (\(Tok _ toktype' txt') ->
                                           toktype == toktype' &&
                                           txt == txt')
                       let matchPattern toks = try $ mapM_ matchTok toks
                       let getargs argmap [] = return argmap
                           getargs argmap (Pattern toks : rest) = try $ do
                              matchPattern toks
                              getargs argmap rest
                           getargs argmap (ArgNum i : Pattern toks : rest) =
                             try $ do
                               x <- mconcat <$> manyTill
                                     (braced <|> ((:[]) <$> anyTok))
                                     (matchPattern toks)
                               getargs (M.insert i x argmap) rest
                           getargs argmap (ArgNum i : rest) = do
                             x <- try $ spaces >> bracedOrToken
                             getargs (M.insert i x argmap) rest
                       args <- case optarg of
                                    Nothing -> getargs M.empty argspecs
                                    Just o  -> do
                                       x <- option o bracketedToks
                                       getargs (M.singleton 1 x) argspecs
                       -- first boolean param is true if we're tokenizing
                       -- an argument (in which case we don't want to
                       -- expand #1 etc.)
                       let addTok False (Tok _ (Arg i) _) acc =
                              case M.lookup i args of
                                   Nothing -> mzero
                                   Just xs -> foldr (addTok True) acc xs
                           -- see #4007
                           addTok _ (Tok _ (CtrlSeq x) txt)
                                  acc@(Tok _ Word _ : _)
                             | not (T.null txt) &&
                               isLetter (T.last txt) =
                               Tok spos (CtrlSeq x) (txt <> " ") : acc
                           addTok _ t acc = setpos spos t : acc
                       ts' <- getInput
                       setInput $ foldr (addTok False) ts' newtoks
                       case expansionPoint of
                            ExpandWhenUsed ->
                              if n > 20  -- detect macro expansion loops
                                 then throwError $ PandocMacroLoop (T.unpack name)
                                 else doMacros (n + 1)
                            ExpandWhenDefined -> return ()
 setpos :: SourcePos -> Tok -> Tok
 setpos spos (Tok _ tt txt) = Tok spos tt txt
 anyControlSeq :: PandocMonad m => LP m Tok
 anyControlSeq = satisfyTok isCtrlSeq
 isCtrlSeq :: Tok -> Bool
 isCtrlSeq (Tok _ (CtrlSeq _) _) = True
 isCtrlSeq _                     = False
 anySymbol :: PandocMonad m => LP m Tok
 anySymbol = satisfyTok isSymbolTok
 isSymbolTok :: Tok -> Bool
 isSymbolTok (Tok _ Symbol _) = True
 isSymbolTok _                = False
 spaces :: PandocMonad m => LP m ()
 spaces = skipMany (satisfyTok (tokTypeIn [Comment, Spaces, Newline]))
 spaces1 :: PandocMonad m => LP m ()
 spaces1 = skipMany1 (satisfyTok (tokTypeIn [Comment, Spaces, Newline]))
 tokTypeIn :: [TokType] -> Tok -> Bool
 tokTypeIn toktypes (Tok _ tt _) = tt `elem` toktypes
 controlSeq :: PandocMonad m => Text -> LP m Tok
 controlSeq name = satisfyTok isNamed
  where isNamed (Tok _ (CtrlSeq n) _) = n == name
        isNamed _                     = False
 symbol :: PandocMonad m => Char -> LP m Tok
 symbol c = satisfyTok isc
  where isc (Tok _ Symbol d) = case T.uncons d of
                                    Just (c',_) -> c == c'
                                    _           -> False
        isc _ = False
 symbolIn :: PandocMonad m => [Char] -> LP m Tok
 symbolIn cs = satisfyTok isInCs
  where isInCs (Tok _ Symbol d) = case T.uncons d of
                                       Just (c,_) -> c `elem` cs
                                       _          -> False
        isInCs _ = False
 sp :: PandocMonad m => LP m ()
 sp = whitespace <|> endline
 whitespace :: PandocMonad m => LP m ()
 whitespace = () <$ satisfyTok isSpaceTok
 isSpaceTok :: Tok -> Bool
 isSpaceTok (Tok _ Spaces _) = True
 isSpaceTok _                = False
 newlineTok :: PandocMonad m => LP m ()
 newlineTok = () <$ satisfyTok isNewlineTok
 isNewlineTok :: Tok -> Bool
 isNewlineTok (Tok _ Newline _) = True
 isNewlineTok _                 = False
 comment :: PandocMonad m => LP m ()
 comment = () <$ satisfyTok isCommentTok
 isCommentTok :: Tok -> Bool
 isCommentTok (Tok _ Comment _) = True
 isCommentTok _                 = False
 anyTok :: PandocMonad m => LP m Tok
 anyTok = satisfyTok (const True)
 endline :: PandocMonad m => LP m ()
 endline = try $ do
  newlineTok
  lookAhead anyTok
  notFollowedBy blankline
 blankline :: PandocMonad m => LP m ()
 blankline = try $ skipMany whitespace *> newlineTok
 primEscape :: PandocMonad m => LP m Char
 primEscape = do
  Tok _ toktype t <- satisfyTok (tokTypeIn [Esc1, Esc2])
  case toktype of
       Esc1 -> case T.uncons (T.drop 2 t) of
                    Just (c, _)
                      | c >= '\64' && c <= '\127' -> return (chr (ord c - 64))
                      | otherwise                 -> return (chr (ord c + 64))
                    Nothing -> fail "Empty content of Esc1"
       Esc2 -> case safeRead ('0':'x':T.unpack (T.drop 2 t)) of
                    Just x  -> return (chr x)
                    Nothing -> fail $ "Could not read: " ++ T.unpack t
       _    -> fail "Expected an Esc1 or Esc2 token" -- should not happen
 bgroup :: PandocMonad m => LP m Tok
 bgroup = try $ do
  skipMany sp
  symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup"
 egroup :: PandocMonad m => LP m Tok
 egroup = symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup"
 grouped :: (PandocMonad m,  Monoid a) => LP m a -> LP m a
 grouped parser = try $ do
  bgroup
  -- first we check for an inner 'grouped', because
  -- {{a,b}} should be parsed the same as {a,b}
  try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup)
 braced' :: PandocMonad m => LP m Tok -> Int -> LP m [Tok]
 braced' getTok n =
  handleEgroup <|> handleBgroup <|> handleOther
  where handleEgroup = do
          t <- egroup
          if n == 1
             then return []
             else (t:) <$> braced' getTok (n - 1)
        handleBgroup = do
          t <- bgroup
          (t:) <$> braced' getTok (n + 1)
        handleOther = do
          t <- getTok
          (t:) <$> braced' getTok n
 braced :: PandocMonad m => LP m [Tok]
 braced = bgroup *> braced' anyTok 1
 -- URLs require special handling, because they can contain %
 -- characters.  So we retonenize comments as we go...
 bracedUrl :: PandocMonad m => LP m [Tok]
 bracedUrl = bgroup *> braced' (retokenizeComment >> anyTok) 1
 bracketed :: PandocMonad m => Monoid a => LP m a -> LP m a
 bracketed parser = try $ do
  symbol '['
  mconcat <$> manyTill parser (symbol ']')
 parenWrapped :: PandocMonad m => Monoid a => LP m a -> LP m a
 parenWrapped parser = try $ do
  symbol '('
  mconcat <$> manyTill parser (symbol ')')
 dimenarg :: PandocMonad m => LP m Text
 dimenarg = try $ do
  ch  <- option False $ True <$ symbol '='
  Tok _ _ s <- satisfyTok isWordTok
  guard $ T.take 2 (T.reverse s) `elem`
           ["pt","pc","in","bp","cm","mm","dd","cc","sp"]
  let num = T.take (T.length s - 2) s
  guard $ T.length num > 0
  guard $ T.all isDigit num
  return $ T.pack ['=' | ch] <> s
 -- inline elements:
 word :: PandocMonad m => LP m Inlines
@ -689,13 +191,6 @@ regularSymbol = (str . T.unpack . untoken) <$> satisfyTok isRegularSymbol
        isRegularSymbol _                = False
        isSpecial c = c `Set.member` specialChars
 specialChars :: Set.Set Char
 specialChars = Set.fromList "#$%&~_^\\{}"
 isWordTok :: Tok -> Bool
 isWordTok (Tok _ Word _) = True
 isWordTok _              = False
 inlineGroup :: PandocMonad m => LP m Inlines
 inlineGroup = do
  ils <- grouped inline
@ -1396,9 +891,6 @@ breve 'U' = "Ŭ"
 breve 'u' = "ŭ"
 breve c   = [c]
 toksToString :: [Tok] -> String
 toksToString = T.unpack . untokenize
 mathDisplay :: String -> Inlines
 mathDisplay = displayMath . trim
@ -1562,19 +1054,6 @@ tok = try $ spaces >> grouped inline <|> inlineCommand' <|> singleChar'
          Tok _ _ t <- singleChar
          return (str (T.unpack t))
 singleChar :: PandocMonad m => LP m Tok
 singleChar = try $ do
  Tok pos toktype t <- satisfyTok (tokTypeIn [Word, Symbol])
  guard $ not $ toktype == Symbol &&
                T.any (`Set.member` specialChars) t
  if T.length t > 1
     then do
       let (t1, t2) = (T.take 1 t, T.drop 1 t)
       inp <- getInput
       setInput $ Tok (incSourceColumn pos 1) toktype t2 : inp
       return $ Tok pos toktype t1
     else return $ Tok pos toktype t
 opt :: PandocMonad m => LP m Inlines
 opt = bracketed inline <|> (str . T.unpack <$> rawopt)
@ -1611,20 +1090,6 @@ overlayTok =
                    Tok _ Symbol c     -> c `elem` ["-","+","@","|",":",","]
                    _                  -> False)
 ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a
 ignore raw = do
  pos <- getPosition
  report $ SkippedContent raw pos
  return mempty
 withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok])
 withRaw parser = do
  inp <- getInput
  result <- parser
  nxt <- option (Tok (initialPos "source") Word "") (lookAhead anyTok)
  let raw = takeWhile (/= nxt) inp
  return (result, raw)
 inBrackets :: Inlines -> Inlines
 inBrackets x = str "[" <> x <> str "]"
@ -1634,17 +1099,6 @@ unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs
 unescapeURL (x:xs) = x:unescapeURL xs
 unescapeURL [] = ""
 -- For handling URLs, which allow literal % characters...
 retokenizeComment :: PandocMonad m => LP m ()
 retokenizeComment = (do
  Tok pos Comment txt <- satisfyTok isCommentTok
  let updPos (Tok pos' toktype' txt') =
        Tok (incSourceColumn (incSourceLine pos' (sourceLine pos - 1))
             (sourceColumn pos)) toktype' txt'
  let newtoks = map updPos $ tokenize (sourceName pos) $ T.tail txt
  getInput >>= setInput . ((Tok pos Symbol "%" : newtoks) ++))
    <|> return ()
 mathEnvWith :: PandocMonad m
            => (Inlines -> a) -> Maybe Text -> Text -> LP m a
 mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name
@ -2364,9 +1818,6 @@ isArgTok :: Tok -> Bool
 isArgTok (Tok _ (Arg _) _) = True
 isArgTok _                 = False
 bracedOrToken :: PandocMonad m => LP m [Tok]
 bracedOrToken = braced <|> ((:[]) <$> (anyControlSeq <|> singleChar))
 newcommand :: PandocMonad m => LP m (Text, Macro)
 newcommand = do
  pos <- getPosition
@ -2417,11 +1868,6 @@ newenvironment = do
  return (name, Macro ExpandWhenUsed argspecs optarg startcontents,
             Macro ExpandWhenUsed [] Nothing endcontents)
 bracketedToks :: PandocMonad m => LP m [Tok]
 bracketedToks = do
  symbol '['
  mconcat <$> manyTill (braced <|> (:[]) <$> anyTok) (symbol ']')
 bracketedNum :: PandocMonad m => LP m Int
 bracketedNum = do
  ds <- untokenize <$> bracketedToks
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@ -0,0 +1,663 @@
 {-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE FlexibleInstances     #-}
 {-# LANGUAGE MultiParamTypeClasses #-}
 {-# LANGUAGE OverloadedStrings     #-}
 {-# LANGUAGE ScopedTypeVariables   #-}
 {-
 Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 -}
 {- |
   Module      : Text.Pandoc.Readers.LaTeX.Parsing
   Copyright   : Copyright (C) 2006-2018 John MacFarlane
   License     : GNU GPL, version 2 or above
   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable
 General parsing types and functions for LaTeX.
 -}
 module Text.Pandoc.Readers.LaTeX.Parsing
  ( DottedNum(..)
  , renderDottedNum
  , incrementDottedNum
  , LaTeXState(..)
  , defaultLaTeXState
  , LP
  , withVerbatimMode
  , rawLaTeXParser
  , applyMacros
  , tokenize
  , untokenize
  , untoken
  , totoks
  , toksToString
  , satisfyTok
  , doMacros
  , setpos
  , anyControlSeq
  , anySymbol
  , isWordTok
  , isNewlineTok
  , spaces
  , spaces1
  , tokTypeIn
  , controlSeq
  , symbol
  , symbolIn
  , sp
  , whitespace
  , newlineTok
  , comment
  , anyTok
  , singleChar
  , specialChars
  , endline
  , blankline
  , primEscape
  , bgroup
  , egroup
  , grouped
  , braced
  , braced'
  , bracedUrl
  , bracedOrToken
  , bracketed
  , bracketedToks
  , parenWrapped
  , dimenarg
  , ignore
  , withRaw
  ) where
 import Prelude
 import Control.Applicative (many, (<|>))
 import Control.Monad
 import Control.Monad.Except (throwError)
 import Control.Monad.Trans (lift)
 import Data.Char (chr, isAlphaNum, isDigit, isLetter, ord)
 import Data.Default
 import Data.List (intercalate)
 import qualified Data.Map as M
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Builder
 import Text.Pandoc.Class (PandocMonad, report)
 import Text.Pandoc.Error (PandocError (PandocMacroLoop))
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
                            optional, space, spaces, withRaw, (<|>))
 import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
                                        ArgSpec (..), Tok (..), TokType (..))
 import Text.Pandoc.Shared
 import Text.Parsec.Pos
 newtype DottedNum = DottedNum [Int]
  deriving (Show)
 renderDottedNum :: DottedNum -> String
 renderDottedNum (DottedNum xs) =
  intercalate "." (map show xs)
 incrementDottedNum :: Int -> DottedNum -> DottedNum
 incrementDottedNum level (DottedNum ns) = DottedNum $
  case reverse (take level (ns ++ repeat 0)) of
       (x:xs) -> reverse (x+1 : xs)
       []     -> []  -- shouldn't happen
 data LaTeXState = LaTeXState{ sOptions       :: ReaderOptions
                            , sMeta          :: Meta
                            , sQuoteContext  :: QuoteContext
                            , sMacros        :: M.Map Text Macro
                            , sContainers    :: [String]
                            , sHeaders       :: M.Map Inlines String
                            , sLogMessages   :: [LogMessage]
                            , sIdentifiers   :: Set.Set String
                            , sVerbatimMode  :: Bool
                            , sCaption       :: (Maybe Inlines, Maybe String)
                            , sInListItem    :: Bool
                            , sInTableCell   :: Bool
                            , sLastHeaderNum :: DottedNum
                            , sLastFigureNum :: DottedNum
                            , sLabels        :: M.Map String [Inline]
                            , sHasChapters   :: Bool
                            , sToggles       :: M.Map String Bool
                            }
     deriving Show
 defaultLaTeXState :: LaTeXState
 defaultLaTeXState = LaTeXState{ sOptions       = def
                              , sMeta          = nullMeta
                              , sQuoteContext  = NoQuote
                              , sMacros        = M.empty
                              , sContainers    = []
                              , sHeaders       = M.empty
                              , sLogMessages   = []
                              , sIdentifiers   = Set.empty
                              , sVerbatimMode  = False
                              , sCaption       = (Nothing, Nothing)
                              , sInListItem    = False
                              , sInTableCell   = False
                              , sLastHeaderNum = DottedNum []
                              , sLastFigureNum = DottedNum []
                              , sLabels        = M.empty
                              , sHasChapters   = False
                              , sToggles       = M.empty
                              }
 instance PandocMonad m => HasQuoteContext LaTeXState m where
  getQuoteContext = sQuoteContext <$> getState
  withQuoteContext context parser = do
    oldState <- getState
    let oldQuoteContext = sQuoteContext oldState
    setState oldState { sQuoteContext = context }
    result <- parser
    newState <- getState
    setState newState { sQuoteContext = oldQuoteContext }
    return result
 instance HasLogMessages LaTeXState where
  addLogMessage msg st = st{ sLogMessages = msg : sLogMessages st }
  getLogMessages st = reverse $ sLogMessages st
 instance HasIdentifierList LaTeXState where
  extractIdentifierList     = sIdentifiers
  updateIdentifierList f st = st{ sIdentifiers = f $ sIdentifiers st }
 instance HasIncludeFiles LaTeXState where
  getIncludeFiles = sContainers
  addIncludeFile f s = s{ sContainers = f : sContainers s }
  dropLatestIncludeFile s = s { sContainers = drop 1 $ sContainers s }
 instance HasHeaderMap LaTeXState where
  extractHeaderMap     = sHeaders
  updateHeaderMap f st = st{ sHeaders = f $ sHeaders st }
 instance HasMacros LaTeXState where
  extractMacros  st  = sMacros st
  updateMacros f st  = st{ sMacros = f (sMacros st) }
 instance HasReaderOptions LaTeXState where
  extractReaderOptions = sOptions
 instance HasMeta LaTeXState where
  setMeta field val st =
    st{ sMeta = setMeta field val $ sMeta st }
  deleteMeta field st =
    st{ sMeta = deleteMeta field $ sMeta st }
 instance Default LaTeXState where
  def = defaultLaTeXState
 type LP m = ParserT [Tok] LaTeXState m
 withVerbatimMode :: PandocMonad m => LP m a -> LP m a
 withVerbatimMode parser = do
  updateState $ \st -> st{ sVerbatimMode = True }
  result <- parser
  updateState $ \st -> st{ sVerbatimMode = False }
  return result
 rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
               => Bool -> LP m a -> LP m a -> ParserT String s m (a, String)
 rawLaTeXParser retokenize parser valParser = do
  inp <- getInput
  let toks = tokenize "source" $ T.pack inp
  pstate <- getState
  let lstate = def{ sOptions = extractReaderOptions pstate }
  let lstate' = lstate { sMacros = extractMacros pstate }
  let rawparser = (,) <$> withRaw valParser <*> getState
  res' <- lift $ runParserT (snd <$> withRaw parser) lstate "chunk" toks
  case res' of
       Left _    -> mzero
       Right toks' -> do
         res <- lift $ runParserT (do when retokenize $ do
                                        -- retokenize, applying macros
                                        doMacros 0
                                        ts <- many (satisfyTok (const True))
                                        setInput ts
                                      rawparser)
                        lstate' "chunk" toks'
         case res of
              Left _    -> mzero
              Right ((val, raw), st) -> do
                updateState (updateMacros (sMacros st <>))
                _ <- takeP (T.length (untokenize toks'))
                return (val, T.unpack (untokenize raw))
 applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
            => String -> ParserT String s m String
 applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
   do let retokenize = doMacros 0 *>
             (toksToString <$> many (satisfyTok (const True)))
      pstate <- getState
      let lstate = def{ sOptions = extractReaderOptions pstate
                      , sMacros  = extractMacros pstate }
      res <- runParserT retokenize lstate "math" (tokenize "math" (T.pack s))
      case res of
           Left e   -> fail (show e)
           Right s' -> return s'
 tokenize :: SourceName -> Text -> [Tok]
 tokenize sourcename = totoks (initialPos sourcename)
 totoks :: SourcePos -> Text -> [Tok]
 totoks pos t =
  case T.uncons t of
       Nothing        -> []
       Just (c, rest)
         | c == '\n' ->
           Tok pos Newline "\n"
           : totoks (setSourceColumn (incSourceLine pos 1) 1) rest
         | isSpaceOrTab c ->
           let (sps, rest') = T.span isSpaceOrTab t
           in  Tok pos Spaces sps
               : totoks (incSourceColumn pos (T.length sps))
                 rest'
         | isAlphaNum c ->
           let (ws, rest') = T.span isAlphaNum t
           in  Tok pos Word ws
               : totoks (incSourceColumn pos (T.length ws)) rest'
         | c == '%' ->
           let (cs, rest') = T.break (== '\n') rest
           in  Tok pos Comment ("%" <> cs)
               : totoks (incSourceColumn pos (1 + T.length cs)) rest'
         | c == '\\' ->
           case T.uncons rest of
                Nothing -> [Tok pos (CtrlSeq " ") "\\"]
                Just (d, rest')
                  | isLetterOrAt d ->
                      -- \makeatletter is common in macro defs;
                      -- ideally we should make tokenization sensitive
                      -- to \makeatletter and \makeatother, but this is
                      -- probably best for now
                      let (ws, rest'') = T.span isLetterOrAt rest
                          (ss, rest''') = T.span isSpaceOrTab rest''
                      in  Tok pos (CtrlSeq ws) ("\\" <> ws <> ss)
                          : totoks (incSourceColumn pos
                               (1 + T.length ws + T.length ss)) rest'''
                  | isSpaceOrTab d || d == '\n' ->
                      let (w1, r1) = T.span isSpaceOrTab rest
                          (w2, (w3, r3)) = case T.uncons r1 of
                                          Just ('\n', r2)
                                                  -> (T.pack "\n",
                                                        T.span isSpaceOrTab r2)
                                          _ -> (mempty, (mempty, r1))
                          ws = "\\" <> w1 <> w2 <> w3
                      in  case T.uncons r3 of
                               Just ('\n', _) ->
                                 Tok pos (CtrlSeq " ") ("\\" <> w1)
                                 : totoks (incSourceColumn pos (T.length ws))
                                   r1
                               _ ->
                                 Tok pos (CtrlSeq " ") ws
                                 : totoks (incSourceColumn pos (T.length ws))
                                   r3
                  | otherwise  ->
                      Tok pos (CtrlSeq (T.singleton d)) (T.pack [c,d])
                      : totoks (incSourceColumn pos 2) rest'
         | c == '#' ->
           let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest
           in  case safeRead (T.unpack t1) of
                    Just i ->
                       Tok pos (Arg i) ("#" <> t1)
                       : totoks (incSourceColumn pos (1 + T.length t1)) t2
                    Nothing ->
                       Tok pos Symbol "#"
                       : totoks (incSourceColumn pos 1) t2
         | c == '^' ->
           case T.uncons rest of
                Just ('^', rest') ->
                  case T.uncons rest' of
                       Just (d, rest'')
                         | isLowerHex d ->
                           case T.uncons rest'' of
                                Just (e, rest''') | isLowerHex e ->
                                  Tok pos Esc2 (T.pack ['^','^',d,e])
                                  : totoks (incSourceColumn pos 4) rest'''
                                _ ->
                                  Tok pos Esc1 (T.pack ['^','^',d])
                                  : totoks (incSourceColumn pos 3) rest''
                         | d < '\128' ->
                                  Tok pos Esc1 (T.pack ['^','^',d])
                                  : totoks (incSourceColumn pos 3) rest''
                       _ -> Tok pos Symbol "^" :
                            Tok (incSourceColumn pos 1) Symbol "^" :
                            totoks (incSourceColumn pos 2) rest'
                _ -> Tok pos Symbol "^"
                     : totoks (incSourceColumn pos 1) rest
         | otherwise ->
           Tok pos Symbol (T.singleton c) : totoks (incSourceColumn pos 1) rest
 isSpaceOrTab :: Char -> Bool
 isSpaceOrTab ' '  = True
 isSpaceOrTab '\t' = True
 isSpaceOrTab _    = False
 isLetterOrAt :: Char -> Bool
 isLetterOrAt '@' = True
 isLetterOrAt c   = isLetter c
 isLowerHex :: Char -> Bool
 isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
 untokenize :: [Tok] -> Text
 untokenize = mconcat . map untoken
 untoken :: Tok -> Text
 untoken (Tok _ _ t) = t
 toksToString :: [Tok] -> String
 toksToString = T.unpack . untokenize
 satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok
 satisfyTok f =
  try $ do
    res <- tokenPrim (T.unpack . untoken) updatePos matcher
    doMacros 0 -- apply macros on remaining input stream
    return res
  where matcher t | f t       = Just t
                  | otherwise = Nothing
        updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos
        updatePos _spos _ (Tok pos _ _ : _) = pos
        updatePos spos _ []                 = incSourceColumn spos 1
 doMacros :: PandocMonad m => Int -> LP m ()
 doMacros n = do
  verbatimMode <- sVerbatimMode <$> getState
  unless verbatimMode $ do
    inp <- getInput
    case inp of
         Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
          Tok _ Word name : Tok _ Symbol "}" : ts
            -> handleMacros spos name ts
         Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
          Tok _ Word name : Tok _ Symbol "}" : ts
            -> handleMacros spos ("end" <> name) ts
         Tok _ (CtrlSeq "expandafter") _ : t : ts
            -> do setInput ts
                  doMacros n
                  getInput >>= setInput . combineTok t
         Tok spos (CtrlSeq name) _ : ts
            -> handleMacros spos name ts
         _ -> return ()
  where combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts)
          | T.all isLetterOrAt w =
            Tok spos (CtrlSeq (name <> w)) (x1 <> w <> x2) : ts
              where (x1, x2) = T.break isSpaceOrTab x
        combineTok t ts = t:ts
        handleMacros spos name ts = do
                macros <- sMacros <$> getState
                case M.lookup name macros of
                     Nothing -> return ()
                     Just (Macro expansionPoint argspecs optarg newtoks) -> do
                       setInput ts
                       let matchTok (Tok _ toktype txt) =
                             satisfyTok (\(Tok _ toktype' txt') ->
                                           toktype == toktype' &&
                                           txt == txt')
                       let matchPattern toks = try $ mapM_ matchTok toks
                       let getargs argmap [] = return argmap
                           getargs argmap (Pattern toks : rest) = try $ do
                              matchPattern toks
                              getargs argmap rest
                           getargs argmap (ArgNum i : Pattern toks : rest) =
                             try $ do
                               x <- mconcat <$> manyTill
                                     (braced <|> ((:[]) <$> anyTok))
                                     (matchPattern toks)
                               getargs (M.insert i x argmap) rest
                           getargs argmap (ArgNum i : rest) = do
                             x <- try $ spaces >> bracedOrToken
                             getargs (M.insert i x argmap) rest
                       args <- case optarg of
                                    Nothing -> getargs M.empty argspecs
                                    Just o  -> do
                                       x <- option o bracketedToks
                                       getargs (M.singleton 1 x) argspecs
                       -- first boolean param is true if we're tokenizing
                       -- an argument (in which case we don't want to
                       -- expand #1 etc.)
                       let addTok False (Tok _ (Arg i) _) acc =
                              case M.lookup i args of
                                   Nothing -> mzero
                                   Just xs -> foldr (addTok True) acc xs
                           -- see #4007
                           addTok _ (Tok _ (CtrlSeq x) txt)
                                  acc@(Tok _ Word _ : _)
                             | not (T.null txt) &&
                               isLetter (T.last txt) =
                               Tok spos (CtrlSeq x) (txt <> " ") : acc
                           addTok _ t acc = setpos spos t : acc
                       ts' <- getInput
                       setInput $ foldr (addTok False) ts' newtoks
                       case expansionPoint of
                            ExpandWhenUsed ->
                              if n > 20  -- detect macro expansion loops
                                 then throwError $ PandocMacroLoop (T.unpack name)
                                 else doMacros (n + 1)
                            ExpandWhenDefined -> return ()
 setpos :: SourcePos -> Tok -> Tok
 setpos spos (Tok _ tt txt) = Tok spos tt txt
 anyControlSeq :: PandocMonad m => LP m Tok
 anyControlSeq = satisfyTok isCtrlSeq
 isCtrlSeq :: Tok -> Bool
 isCtrlSeq (Tok _ (CtrlSeq _) _) = True
 isCtrlSeq _                     = False
 anySymbol :: PandocMonad m => LP m Tok
 anySymbol = satisfyTok isSymbolTok
 isSymbolTok :: Tok -> Bool
 isSymbolTok (Tok _ Symbol _) = True
 isSymbolTok _                = False
 isWordTok :: Tok -> Bool
 isWordTok (Tok _ Word _) = True
 isWordTok _              = False
 spaces :: PandocMonad m => LP m ()
 spaces = skipMany (satisfyTok (tokTypeIn [Comment, Spaces, Newline]))
 spaces1 :: PandocMonad m => LP m ()
 spaces1 = skipMany1 (satisfyTok (tokTypeIn [Comment, Spaces, Newline]))
 tokTypeIn :: [TokType] -> Tok -> Bool
 tokTypeIn toktypes (Tok _ tt _) = tt `elem` toktypes
 controlSeq :: PandocMonad m => Text -> LP m Tok
 controlSeq name = satisfyTok isNamed
  where isNamed (Tok _ (CtrlSeq n) _) = n == name
        isNamed _                     = False
 symbol :: PandocMonad m => Char -> LP m Tok
 symbol c = satisfyTok isc
  where isc (Tok _ Symbol d) = case T.uncons d of
                                    Just (c',_) -> c == c'
                                    _           -> False
        isc _ = False
 symbolIn :: PandocMonad m => [Char] -> LP m Tok
 symbolIn cs = satisfyTok isInCs
  where isInCs (Tok _ Symbol d) = case T.uncons d of
                                       Just (c,_) -> c `elem` cs
                                       _          -> False
        isInCs _ = False
 sp :: PandocMonad m => LP m ()
 sp = whitespace <|> endline
 whitespace :: PandocMonad m => LP m ()
 whitespace = () <$ satisfyTok isSpaceTok
 isSpaceTok :: Tok -> Bool
 isSpaceTok (Tok _ Spaces _) = True
 isSpaceTok _                = False
 newlineTok :: PandocMonad m => LP m ()
 newlineTok = () <$ satisfyTok isNewlineTok
 isNewlineTok :: Tok -> Bool
 isNewlineTok (Tok _ Newline _) = True
 isNewlineTok _                 = False
 comment :: PandocMonad m => LP m ()
 comment = () <$ satisfyTok isCommentTok
 isCommentTok :: Tok -> Bool
 isCommentTok (Tok _ Comment _) = True
 isCommentTok _                 = False
 anyTok :: PandocMonad m => LP m Tok
 anyTok = satisfyTok (const True)
 singleChar :: PandocMonad m => LP m Tok
 singleChar = try $ do
  Tok pos toktype t <- satisfyTok (tokTypeIn [Word, Symbol])
  guard $ not $ toktype == Symbol &&
                T.any (`Set.member` specialChars) t
  if T.length t > 1
     then do
       let (t1, t2) = (T.take 1 t, T.drop 1 t)
       inp <- getInput
       setInput $ Tok (incSourceColumn pos 1) toktype t2 : inp
       return $ Tok pos toktype t1
     else return $ Tok pos toktype t
 specialChars :: Set.Set Char
 specialChars = Set.fromList "#$%&~_^\\{}"
 endline :: PandocMonad m => LP m ()
 endline = try $ do
  newlineTok
  lookAhead anyTok
  notFollowedBy blankline
 blankline :: PandocMonad m => LP m ()
 blankline = try $ skipMany whitespace *> newlineTok
 primEscape :: PandocMonad m => LP m Char
 primEscape = do
  Tok _ toktype t <- satisfyTok (tokTypeIn [Esc1, Esc2])
  case toktype of
       Esc1 -> case T.uncons (T.drop 2 t) of
                    Just (c, _)
                      | c >= '\64' && c <= '\127' -> return (chr (ord c - 64))
                      | otherwise                 -> return (chr (ord c + 64))
                    Nothing -> fail "Empty content of Esc1"
       Esc2 -> case safeRead ('0':'x':T.unpack (T.drop 2 t)) of
                    Just x  -> return (chr x)
                    Nothing -> fail $ "Could not read: " ++ T.unpack t
       _    -> fail "Expected an Esc1 or Esc2 token" -- should not happen
 bgroup :: PandocMonad m => LP m Tok
 bgroup = try $ do
  skipMany sp
  symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup"
 egroup :: PandocMonad m => LP m Tok
 egroup = symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup"
 grouped :: (PandocMonad m,  Monoid a) => LP m a -> LP m a
 grouped parser = try $ do
  bgroup
  -- first we check for an inner 'grouped', because
  -- {{a,b}} should be parsed the same as {a,b}
  try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup)
 braced' :: PandocMonad m => LP m Tok -> Int -> LP m [Tok]
 braced' getTok n =
  handleEgroup <|> handleBgroup <|> handleOther
  where handleEgroup = do
          t <- egroup
          if n == 1
             then return []
             else (t:) <$> braced' getTok (n - 1)
        handleBgroup = do
          t <- bgroup
          (t:) <$> braced' getTok (n + 1)
        handleOther = do
          t <- getTok
          (t:) <$> braced' getTok n
 braced :: PandocMonad m => LP m [Tok]
 braced = bgroup *> braced' anyTok 1
 -- URLs require special handling, because they can contain %
 -- characters.  So we retonenize comments as we go...
 bracedUrl :: PandocMonad m => LP m [Tok]
 bracedUrl = bgroup *> braced' (retokenizeComment >> anyTok) 1
 -- For handling URLs, which allow literal % characters...
 retokenizeComment :: PandocMonad m => LP m ()
 retokenizeComment = (do
  Tok pos Comment txt <- satisfyTok isCommentTok
  let updPos (Tok pos' toktype' txt') =
        Tok (incSourceColumn (incSourceLine pos' (sourceLine pos - 1))
             (sourceColumn pos)) toktype' txt'
  let newtoks = map updPos $ tokenize (sourceName pos) $ T.tail txt
  getInput >>= setInput . ((Tok pos Symbol "%" : newtoks) ++))
    <|> return ()
 bracedOrToken :: PandocMonad m => LP m [Tok]
 bracedOrToken = braced <|> ((:[]) <$> (anyControlSeq <|> singleChar))
 bracketed :: PandocMonad m => Monoid a => LP m a -> LP m a
 bracketed parser = try $ do
  symbol '['
  mconcat <$> manyTill parser (symbol ']')
 bracketedToks :: PandocMonad m => LP m [Tok]
 bracketedToks = do
  symbol '['
  mconcat <$> manyTill (braced <|> (:[]) <$> anyTok) (symbol ']')
 parenWrapped :: PandocMonad m => Monoid a => LP m a -> LP m a
 parenWrapped parser = try $ do
  symbol '('
  mconcat <$> manyTill parser (symbol ')')
 dimenarg :: PandocMonad m => LP m Text
 dimenarg = try $ do
  ch  <- option False $ True <$ symbol '='
  Tok _ _ s <- satisfyTok isWordTok
  guard $ T.take 2 (T.reverse s) `elem`
           ["pt","pc","in","bp","cm","mm","dd","cc","sp"]
  let num = T.take (T.length s - 2) s
  guard $ T.length num > 0
  guard $ T.all isDigit num
  return $ T.pack ['=' | ch] <> s
 ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a
 ignore raw = do
  pos <- getPosition
  report $ SkippedContent raw pos
  return mempty
 withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok])
 withRaw parser = do
  inp <- getInput
  result <- parser
  nxt <- option (Tok (initialPos "source") Word "") (lookAhead anyTok)
  let raw = takeWhile (/= nxt) inp
  return (result, raw)