2019-09-24 18:36:17 +02:00
|
|
|
{-# LANGUAGE FlexibleInstances #-}
|
|
|
|
{-# LANGUAGE ConstraintKinds #-}
|
|
|
|
{-# LANGUAGE UndecidableInstances #-}
|
2019-05-24 10:48:09 +02:00
|
|
|
module PDF.Parser (
|
2020-02-08 08:15:32 +01:00
|
|
|
MonadParser(..)
|
2019-09-24 18:36:17 +02:00
|
|
|
, Parser
|
2019-05-24 10:48:09 +02:00
|
|
|
, (<?>)
|
|
|
|
, octDigit
|
|
|
|
, on
|
|
|
|
, runParser
|
2019-09-24 18:32:23 +02:00
|
|
|
, evalParser
|
2019-05-24 10:48:09 +02:00
|
|
|
) where
|
|
|
|
|
2019-09-24 18:36:17 +02:00
|
|
|
import Control.Applicative (Alternative, (<|>))
|
|
|
|
import Control.Monad (MonadPlus)
|
|
|
|
import Control.Monad.Fail (MonadFail(..))
|
2019-05-24 10:48:09 +02:00
|
|
|
import Control.Monad.State (StateT(..), evalStateT)
|
2019-09-24 18:36:17 +02:00
|
|
|
import Control.Monad.Trans (MonadTrans(..))
|
2019-05-24 10:48:09 +02:00
|
|
|
import qualified Data.Attoparsec.ByteString.Char8 as Atto (
|
2020-02-14 18:02:40 +01:00
|
|
|
Parser, char, decimal, double, endOfInput, parseOnly, peekChar', satisfy, string, take
|
2020-02-12 17:34:27 +01:00
|
|
|
, takeWhile, takeWhile1
|
2019-05-24 10:48:09 +02:00
|
|
|
)
|
|
|
|
import Data.ByteString (ByteString)
|
2019-10-04 18:46:07 +02:00
|
|
|
import Data.ByteString.Char8.Util (B16Int(..))
|
2019-05-24 10:48:09 +02:00
|
|
|
import Data.Char (toLower)
|
|
|
|
import Data.Set (Set)
|
|
|
|
import qualified Data.Set as Set (fromList, member, unions)
|
2019-09-24 18:36:17 +02:00
|
|
|
import Prelude hiding (fail)
|
|
|
|
|
|
|
|
type MonadDeps m = (MonadFail m, MonadPlus m)
|
|
|
|
|
|
|
|
class MonadDeps m => MonadParser m where
|
|
|
|
block :: Int -> m ByteString
|
|
|
|
char :: Char -> m Char
|
2020-02-14 18:02:40 +01:00
|
|
|
decNumber :: m Int
|
|
|
|
floatNumber :: m Double
|
2020-02-08 08:15:32 +01:00
|
|
|
endOfInput :: m ()
|
2019-10-04 18:46:07 +02:00
|
|
|
hexNumber :: m B16Int
|
2019-09-24 18:36:17 +02:00
|
|
|
oneOf :: String -> m Char
|
2020-02-12 17:34:27 +01:00
|
|
|
peek :: m Char
|
2019-09-24 18:36:17 +02:00
|
|
|
string :: ByteString -> m ByteString
|
|
|
|
takeAll :: (Char -> Bool) -> m ByteString
|
|
|
|
takeAll1 :: (Char -> Bool) -> m ByteString
|
|
|
|
|
|
|
|
instance MonadParser Atto.Parser where
|
|
|
|
block = Atto.take
|
|
|
|
char = Atto.char
|
2020-02-08 08:15:32 +01:00
|
|
|
endOfInput = Atto.endOfInput
|
2020-02-14 18:02:40 +01:00
|
|
|
decNumber = Atto.decimal
|
|
|
|
floatNumber = Atto.double
|
2019-10-04 18:46:07 +02:00
|
|
|
hexNumber = B16Int <$> Atto.takeWhile1 (`Set.member` hexDigits)
|
2019-09-24 18:36:17 +02:00
|
|
|
oneOf charSet = Atto.satisfy (`elem` charSet)
|
2020-02-12 17:34:27 +01:00
|
|
|
peek = Atto.peekChar'
|
2019-09-27 12:21:06 +02:00
|
|
|
string s = Atto.string s <?> show s
|
2019-09-24 18:36:17 +02:00
|
|
|
takeAll = Atto.takeWhile
|
|
|
|
takeAll1 = Atto.takeWhile1
|
|
|
|
|
|
|
|
instance (MonadParser m, MonadTrans t, MonadDeps (t m)) => MonadParser (t m) where
|
|
|
|
block = lift . block
|
|
|
|
char = lift . char
|
2020-02-08 08:15:32 +01:00
|
|
|
endOfInput = lift $ endOfInput
|
2019-09-24 18:36:17 +02:00
|
|
|
decNumber = lift $ decNumber
|
2020-02-14 18:02:40 +01:00
|
|
|
floatNumber = lift $ floatNumber
|
2019-09-24 18:36:17 +02:00
|
|
|
hexNumber = lift $ hexNumber
|
|
|
|
oneOf = lift . oneOf
|
2020-02-12 17:34:27 +01:00
|
|
|
peek = lift $ peek
|
2019-09-24 18:36:17 +02:00
|
|
|
string = lift . string
|
|
|
|
takeAll = lift . takeAll
|
|
|
|
takeAll1 = lift . takeAll1
|
2019-05-24 10:48:09 +02:00
|
|
|
|
|
|
|
type Parser s = StateT s Atto.Parser
|
|
|
|
|
2019-09-24 18:36:17 +02:00
|
|
|
(<?>) :: (Alternative m, MonadFail m) => m a -> String -> m a
|
2019-05-24 10:48:09 +02:00
|
|
|
(<?>) parser debugMessage = parser <|> fail debugMessage
|
|
|
|
|
|
|
|
digits :: Set Char
|
|
|
|
digits = Set.fromList ['0'..'9']
|
|
|
|
|
|
|
|
hexDigits :: Set Char
|
|
|
|
hexDigits = Set.unions [digits, Set.fromList af, Set.fromList $ toLower <$> af]
|
|
|
|
where
|
|
|
|
af = ['A'..'F']
|
|
|
|
|
2019-09-24 18:36:17 +02:00
|
|
|
octDigit :: MonadParser m => m Char
|
2019-05-24 10:48:09 +02:00
|
|
|
octDigit = oneOf ['0'..'7']
|
|
|
|
|
|
|
|
on :: Parser s a -> ByteString -> Parser s (Either String a)
|
|
|
|
on (StateT parserF) input = StateT $ \state ->
|
|
|
|
case Atto.parseOnly (parserF state) input of
|
|
|
|
Left errorMsg -> return (Left errorMsg, state)
|
|
|
|
Right (result, newState) -> return (Right result, newState)
|
|
|
|
|
2019-09-24 18:32:23 +02:00
|
|
|
runParser :: Parser s a -> s -> ByteString -> Either String (a, s)
|
|
|
|
runParser parser initState = Atto.parseOnly (runStateT parser initState)
|
|
|
|
|
|
|
|
evalParser :: Parser s a -> s -> ByteString -> Either String a
|
|
|
|
evalParser parser initState = Atto.parseOnly (evalStateT parser initState)
|