Small improvements to BibTeX parser.

This commit is contained in:
John MacFarlane 2020-10-08 20:48:19 -07:00
parent 0cfba4e36e
commit dd3c4000ff

View file

@ -38,7 +38,7 @@ import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Map as Map
import Data.Maybe
import Text.Parsec hiding (State, many, (<|>))
import Text.Pandoc.Parsing hiding ((<|>), many)
import Control.Applicative
import Data.List.Split (splitOn, splitWhen, wordsBy)
import Control.Monad.RWS hiding ((<>))
@ -73,7 +73,7 @@ defaultLang = Lang "en" (Just "US")
-- a map of bibtex "string" macros
type StringMap = Map.Map Text Text
type BibParser = Parsec Text (Lang, StringMap)
type BibParser = Parser Text (Lang, StringMap)
data Item = Item{ identifier :: Text
, sourcePos :: SourcePos
@ -571,7 +571,7 @@ bibEntries = do
(bibComment <|> bibPreamble <|> bibString))
bibSkip :: BibParser ()
bibSkip = skipMany1 (satisfy (/='@'))
bibSkip = () <$ take1WhileP (/='@')
bibComment :: BibParser ()
bibComment = do
@ -597,7 +597,7 @@ bibString = do
return ()
inBraces :: BibParser Text
inBraces = try $ do
inBraces = do
char '{'
res <- manyTill
( (T.pack <$> many1 (noneOf "{}\\"))
@ -621,8 +621,9 @@ inQuotes = do
) (char '"')
fieldName :: BibParser Text
fieldName = resolveAlias . T.toLower . T.pack
<$> many1 (letter <|> digit <|> oneOf "-_:+")
fieldName = resolveAlias . T.toLower
<$> take1WhileP (\c ->
isAlphaNum c || c == '-' || c == '_' || c == ':' || c == '+')
isBibtexKeyChar :: Char -> Bool
isBibtexKeyChar c =
@ -632,18 +633,18 @@ bibItem :: BibParser Item
bibItem = do
char '@'
pos <- getPosition
enttype <- map toLower <$> many1 letter
enttype <- T.toLower <$> take1WhileP isLetter
spaces
char '{'
spaces
entid <- many1 (satisfy isBibtexKeyChar)
entid <- take1WhileP isBibtexKeyChar
spaces
char ','
spaces
entfields <- entField `sepEndBy` (char ',' >> spaces)
spaces
char '}'
return $ Item (T.pack entid) pos (T.pack enttype) (Map.fromList entfields)
return $ Item entid pos enttype (Map.fromList entfields)
entField :: BibParser (Text, Text)
entField = do
@ -662,7 +663,7 @@ resolveAlias "primaryclass" = "eprintclass"
resolveAlias s = s
rawWord :: BibParser Text
rawWord = T.pack <$> many1 alphaNum
rawWord = take1WhileP isAlphaNum
expandString :: BibParser Text
expandString = do