24 changed files with 150 additions and 1495 deletions
--- a/Hufflepdf.cabal
+++ b/Hufflepdf.cabal
@ -17,36 +17,19 @@ cabal-version:       >=1.10
 library
  exposed-modules:     PDF
                     , PDF.CMap
                     , PDF.Content
                     , PDF.EOL
                     , PDF.Object
                     , PDF.Object.Navigation
                     , PDF.Output
                     , PDF.Parser
                     , PDF.Pages
                     , PDF.Update
  other-modules:       Data.ByteString.Char8.Util
                     , PDF.Content.Operator
                     , PDF.Content.Operator.Color
                     , PDF.Content.Operator.Common
                     , PDF.Content.Operator.GraphicState
                     , PDF.Content.Operator.Path
                     , PDF.Content.Operator.Text
                     , PDF.Content.Text
                     , PDF.Encoding
                     , PDF.Encoding.MacRoman
                     , PDF.Body
-                     , PDF.Font
+                     , PDF.Parser
  -- other-extensions:    
  build-depends:       attoparsec
                     , base >=4.9 && <4.13
                     , bytestring
                     , containers
                     , mtl
                     , text
                     , utf8-string
                     , zlib
  hs-source-dirs:      src
  ghc-options:         -Wall
  default-language:    Haskell2010
@ -68,15 +51,3 @@ executable             getObj
                     , zlib
  ghc-options:         -Wall
  default-language:    Haskell2010
 executable             getText
  main-is:             examples/getText.hs
  build-depends:       base
                     , bytestring
                     , containers
                     , Hufflepdf
                     , mtl
                     , text
                     , zlib
  ghc-options:         -Wall
  default-language:    Haskell2010
--- a/examples/getText.hs
+++ b/examples/getText.hs
@ -1,38 +0,0 @@
 import qualified Data.ByteString.Char8 as BS (readFile)
 import qualified Data.Map as Map (toList)
 import qualified Data.Text.IO as Text (putStrLn)
 import PDF (Document(..), parseDocument)
 import PDF.Object (Content)
 import PDF.Pages (Page(..), get, getAll)
 import PDF.Update (unify)
 import System.Environment (getArgs)
 import System.Exit (die)
 import System.IO (BufferMode(..), hSetBuffering, stdout)
 onDoc :: FilePath -> (Content -> Either String a) -> IO a
 onDoc inputFile f = do
  content <- fmap (unify . updates) . parseDocument <$> BS.readFile inputFile
  case content >>= f of
    Left someError -> die someError
    Right value -> return value
 displayPage :: Page -> IO ()
 displayPage = mapM_ Text.putStrLn . contents
 wholeDoc :: FilePath -> IO ()
 wholeDoc inputFile = do
  pages <- onDoc inputFile getAll
  mapM_ (displayPage . snd) $ Map.toList pages
 singlePage :: FilePath -> Int -> IO ()
 singlePage inputFile pageNumber =
  onDoc inputFile (`get` pageNumber) >>= displayPage
 main :: IO ()
 main = do
  hSetBuffering stdout LineBuffering
  args <- getArgs
  case args of
    [inputFile] -> wholeDoc inputFile
    [inputFile, pageNumber] -> singlePage inputFile (read pageNumber)
    _ -> die "Syntax: getText INPUT_FILE [PAGE_NUMBER]"
--- a/src/Data/ByteString/Char8/Util.hs
+++ b/src/Data/ByteString/Char8/Util.hs
@ -1,91 +1,16 @@
 module Data.ByteString.Char8.Util (
-      B16Int(..)
+      previous
    , B256Int(..)
    , b8ToInt
    , b16ToBytes
    , b16ToInt
    , b256ToInt
    , intToB256
    , previous
    , subBS
    , toBytes
    , unescape
    , utf16BEToutf8
  ) where
-import Data.ByteString (ByteString, snoc)
+import Data.ByteString.Char8 (ByteString)
-import qualified Data.ByteString as BS (empty, foldl, length, pack, singleton, splitAt)
+import qualified Data.ByteString.Char8 as BS (drop, index, take)
 import qualified Data.ByteString.Char8 as Char8 (
    cons, drop, index, splitAt, take, uncons, unpack
  )
 import Data.Text (Text)
 import Data.Text.Encoding (decodeUtf16BE)
 import Prelude hiding (length)
 import Text.Printf (printf)
 newtype B8Int = B8Int ByteString deriving Show
 newtype B16Int = B16Int ByteString deriving Show
 newtype B256Int = B256Int ByteString deriving Show
 previous :: Char -> Int -> ByteString -> Int
 previous char position byteString
-  | Char8.index byteString position == char = position
+  | BS.index byteString position == char = position
  | otherwise = previous char (position - 1) byteString
 subBS :: Int -> Int -> ByteString -> ByteString
-subBS offset length = Char8.take length . Char8.drop offset
+subBS offset length = BS.take length . BS.drop offset
 intToB256 :: Int -> B256Int
 intToB256 n
  | n < 0x100 = B256Int . BS.singleton $ toEnum n
  | otherwise =
    let B256Int begining = intToB256 (n `div` 0x100) in
    B256Int $ begining `snoc` (toEnum (n `mod` 0x100))
 b256ToInt :: B256Int -> Int
 b256ToInt (B256Int n) = BS.foldl (\k w -> 0x100*k + fromEnum w) 0 n
 toBytes :: Int -> Int -> ByteString
 toBytes 0 _ = BS.empty
 toBytes size n =
  (toBytes (size - 1) (n `div` 0x100)) `snoc` (toEnum (n `mod` 0x100))
 b16ToBytes :: B16Int -> ByteString
 b16ToBytes (B16Int n) = BS.pack . fmap b16ToInt $ pairDigits n
  where
    pairDigits s =
      case BS.length s of
        0 -> []
        1 -> [B16Int s]
        _ ->
          let (twoHexDigits, rest) = BS.splitAt 2 s in
          (B16Int $ twoHexDigits):(pairDigits rest)
 fromBase :: (Num a, Read a) => Char -> ByteString -> a
 fromBase b = read . printf "0%c%s" b . Char8.unpack
 b16ToInt :: (Num a, Read a) => B16Int -> a
 b16ToInt (B16Int n) = fromBase 'x' n
 b8ToInt :: (Num a, Read a) => B8Int -> a
 b8ToInt (B8Int n) = fromBase 'o' n
 unescape :: ByteString -> ByteString
 unescape escapedBS =
  case Char8.uncons escapedBS of
    Nothing -> BS.empty
    Just ('\\', s) -> unescapeChar s
    Just (c, s) -> Char8.cons c (unescape s)
  where
    unescapeChar s =
      case Char8.uncons s of
        Nothing -> BS.empty
        Just (c, s')
          | c `elem` "()" -> Char8.cons c (unescape s')
          | c `elem` "nrtbf" -> Char8.cons (read (printf "'\\%c'" c)) (unescape s')
          | c `elem` ['0'..'7'] -> fromOctal (Char8.splitAt 3 s)
          | otherwise -> Char8.cons c (unescape s')
    fromOctal (code, s) = Char8.cons (toEnum $ b8ToInt (B8Int code)) (unescape s)
 utf16BEToutf8 :: ByteString -> Text
 utf16BEToutf8 = decodeUtf16BE
--- a/src/PDF.hs
+++ b/src/PDF.hs
@ -21,7 +21,7 @@ import PDF.Object (
  )
 import qualified PDF.Output as Output (render, line)
 import PDF.Output (Output(..))
-import PDF.Parser (Parser, evalParser, string, takeAll)
+import PDF.Parser (Parser, runParser, string, takeAll)
 import Text.Printf (printf)
 data Document = Document {
@ -83,7 +83,7 @@ findNextSection offset input =
 readStructures :: Int -> ByteString -> Either String [InputStructure]
 readStructures startXref input =
-  evalParser structure () (BS.drop startXref input) >>= stopOrFollow
+  runParser structure () (BS.drop startXref input) >>= stopOrFollow
  where
    stopOrFollow s@(Structure {trailer}) =
      case Map.lookup (Name "Prev") trailer of
@ -96,7 +96,7 @@ readStructures startXref input =
 parseDocument :: ByteString -> Either String Document
 parseDocument input = do
-  (pdfVersion, eolStyle) <- evalParser ((,) <$> version <*> EOL.parser) () input
+  (pdfVersion, eolStyle) <- runParser ((,) <$> version <*> EOL.parser) () input
  startXref <- readStartXref eolStyle input
  structuresRead <- readStructures startXref input
  let updates = populate input <$> structuresRead
--- a/src/PDF/Body.hs
+++ b/src/PDF/Body.hs
@ -6,7 +6,6 @@ module PDF.Body (
 import Control.Applicative ((<|>))
 import Control.Monad.State (get, gets, modify)
 import Data.Attoparsec.ByteString.Char8 (option)
 import Data.ByteString.Char8 (ByteString)
 import qualified Data.ByteString.Char8 as BS (cons, drop, unpack)
 import Data.Map ((!))
@ -19,7 +18,7 @@ import PDF.Object (
    , blank, dictionary, directObject, integer, line
  )
 import PDF.Output (ObjectId(..), Offset(..))
-import PDF.Parser (Parser, block, char, evalParser, on, takeAll)
+import PDF.Parser (Parser, (<?>), block, char, on, option, runParser, takeAll)
 data UserState = UserState {
      input :: ByteString
@ -105,12 +104,12 @@ indirectObjCoordinates = do
 occurrence :: SParser Occurrence
 occurrence =
-  Comment <$> comment <|> Indirect <$> indirectObjCoordinates
+  Comment <$> comment <|> Indirect <$> indirectObjCoordinates <?> "comment or object"
 populate :: ByteString -> InputStructure -> Content
 populate input structure =
  let bodyInput = BS.drop (startOffset structure) input in
-  case evalParser recurseOnOccurrences initialState bodyInput of
+  case runParser recurseOnOccurrences initialState bodyInput of
    Left _ -> Content {occurrences = [], objects = Map.empty, docStructure}
    Right finalState ->
      let Flow {occurrencesStack, tmpObjects} = flow finalState in
--- a/src/PDF/CMap.hs
+++ b/src/PDF/CMap.hs
@ -1,159 +0,0 @@
 {-# LANGUAGE NamedFieldPuns #-}
 {-# LANGUAGE OverloadedStrings #-}
 module PDF.CMap (
      CMap
    , CMappers
    , CRange(..)
    , cMap
    , emptyCMap
    , matches
  ) where
 import Control.Applicative ((<|>), many)
 import Control.Monad.Fail (MonadFail(..))
 import Control.Monad.State (modify)
 import Data.Attoparsec.ByteString.Char8 (count)
 import Data.ByteString (ByteString)
 import qualified Data.ByteString as BS (drop, length, null, take)
 import Data.ByteString.Char8 (unpack)
 import Data.ByteString.Char8.Util (
    B16Int(..), b16ToBytes, b16ToInt, toBytes, utf16BEToutf8
  )
 import Data.Map (Map, union)
 import qualified Data.Map as Map (
    adjust, empty, fromList, insertWith, lookup, toList
  )
 import Data.Text (Text)
 import qualified PDF.EOL as EOL (charset, parser)
 import PDF.Font (Font)
 import PDF.Object (
      DirectObject(..), Name, StringObject(..)
    , blank, directObject, integer, line, stringObject
  )
 import PDF.Parser (MonadParser, Parser, runParser, takeAll)
 import Prelude hiding (fail)
 type CMappers = Map Name CMap
 type Mapping = Map ByteString Text
 data CRange = CRange {
      fromSequence :: ByteString
    , toSequence :: ByteString
    , mapping :: Mapping
  } deriving Show
 type RangeSize = Int
 type CMap = Map RangeSize [CRange]
 toFont :: CMap -> Font
 toFont aCMap input
  | BS.null input = Right ""
  | otherwise = do
    (output, remainingInput) <- trySizes input $ Map.toList aCMap
    mappend output <$> toFont aCMap remainingInput
  where
    trySizes s [] = Left $ "No matching code found in font for " ++ unpack s
    trySizes s ((size, cRanges):others) =
      let prefix = BS.take size s in
      case tryRanges prefix cRanges of
        Nothing -> trySizes s others
        Just outputSequence -> Right (outputSequence, BS.drop size s)
    tryRanges :: ByteString -> [CRange] -> Maybe Text
    tryRanges _ [] = Nothing
    tryRanges prefix ((CRange {mapping}):cRanges) =
      case Map.lookup prefix mapping of
        Nothing -> tryRanges prefix cRanges
        outputSequence -> outputSequence
 emptyCMap :: CMap
 emptyCMap = Map.empty
 matches :: ByteString -> CRange -> Bool
 matches code (CRange {fromSequence, toSequence}) =
  fromSequence <= code && code <= toSequence
 cMap :: MonadFail m => ByteString -> m Font
 cMap = either fail (return . toFont . snd) . runParser
  (many (codeRanges <|> cMapRange <|> cMapChar <|> ignoredLine))
  emptyCMap
  where
    ignoredLine =
      takeAll (not . (`elem` EOL.charset)) *> EOL.parser *> return ()
 codeRanges :: Parser CMap ()
 codeRanges = do
  size <- integer <* line "begincodespacerange"
  mapM_ createMapping =<< count size codeRange
  line "endcodespacerange"
  where
    codeRange =
      (,) <$> stringObject <* blank <*> stringObject <* EOL.parser
 createMapping :: (StringObject, StringObject) -> Parser CMap ()
 createMapping (Hexadecimal from, Hexadecimal to) = modify $
  Map.insertWith (++) size [CRange {fromSequence, toSequence, mapping}]
  where
    fromSequence = b16ToBytes from
    size = BS.length fromSequence
    toSequence = b16ToBytes to
    mapping = Map.empty
 createMapping _ = return ()
 cMapRange :: Parser CMap ()
 cMapRange = do
  size <- integer <* line "beginbfrange"
  mapM_ saveMapping =<< count size rangeMapping
  line "endbfrange"
  where
    rangeMapping = (,,) 
      <$> (stringObject <* blank)
      <*> (stringObject <* blank)
      <*> directObject <* EOL.parser
      >>= mapFromTo
 saveMapping :: [(ByteString, Text)] -> Parser CMap ()
 saveMapping [] = return ()
 saveMapping assoc@((code, _):_) = modify $ Map.adjust insertCRange mappingSize
  where
    newMapping = Map.fromList assoc
    mappingSize = BS.length code
    appendMapping cRange =
      cRange {mapping = mapping cRange `union` newMapping}
    insertCRange = fmap (\cRange ->
        if code `matches` cRange then appendMapping cRange else cRange
      )
 cMapChar :: Parser CMap ()
 cMapChar = do
  size <- integer <* line "beginbfchar"
  saveMapping =<< count size charMapping <* line "endbfchar"
  where
    charMapping =
      (,) <$> stringObject <* blank <*> stringObject <* EOL.parser
      >>= pairMapping
 between :: B16Int -> B16Int -> [ByteString]
 between from@(B16Int s) to =
  let size = BS.length s `div` 2 in
  toBytes size <$> [b16ToInt from .. b16ToInt to]
 startFrom :: B16Int -> [ByteString]
 startFrom from@(B16Int s) =
  let size = BS.length s `div` 2 in
  toBytes size <$> [b16ToInt from .. ]
 mapFromTo :: MonadParser m => (StringObject, StringObject, DirectObject) -> m [(ByteString, Text)]
 mapFromTo (Hexadecimal from, Hexadecimal to, StringObject (Hexadecimal dstFrom)) =
  return $ zip (between from to) (utf16BEToutf8 <$> startFrom dstFrom)
 mapFromTo (Hexadecimal from, Hexadecimal to, Array dstPoints) =
  zip (between from to) <$> (mapM dstByteString dstPoints)
  where
    dstByteString (StringObject (Hexadecimal dst)) =
      return . utf16BEToutf8 $ b16ToBytes dst
    dstByteString _ = fail "Invalid for a replacement string"
 mapFromTo _ = fail "invalid range mapping found"
 pairMapping :: MonadParser m => (StringObject, StringObject) -> m (ByteString, Text)
 pairMapping (Hexadecimal from, Hexadecimal to) =
  return (b16ToBytes  from, utf16BEToutf8 $ b16ToBytes  to)
 pairMapping _ = fail "invalid pair mapping found"
--- a/src/PDF/Content.hs
+++ b/src/PDF/Content.hs
@ -1,69 +0,0 @@
 {-# LANGUAGE OverloadedStrings #-}
 module PDF.Content (
      Content(..)
    , ContentUnit(..)
    , GraphicContextUnit(..)
    , TextContext
    , content
    , parse
  ) where
 import Control.Applicative ((<|>))
 import Control.Monad.State (evalStateT, modify)
 import Data.Attoparsec.ByteString.Char8 (sepBy)
 import Data.ByteString (ByteString)
 import PDF.Content.Operator (Instruction, operator)
 import PDF.Object (blank, directObject)
 import PDF.Output (Output(..), line)
 import PDF.Parser (MonadParser, evalParser, string)
 data GraphicContextUnit =
    GraphicInstruction Instruction
  | ContentUnit ContentUnit
  deriving Show
 type TextContext = [Instruction]
 data ContentUnit =
    GraphicContext [GraphicContextUnit]
  | TextContext TextContext
  deriving Show
 newtype Content = Content [ContentUnit] deriving Show
 content :: MonadParser m => m Content
 content = Content <$> contentUnit `sepBy` blank
 contentUnit :: MonadParser m => m ContentUnit
 contentUnit =
    (GraphicContext <$> graphicContext)
  <|> (TextContext <$> textContext)
  where
    graphicContext =
      string "q" *> blank *> graphicContextUnit `sepBy` blank <* blank <* string "Q"
 graphicContextUnit :: MonadParser m => m GraphicContextUnit
 graphicContextUnit =
    (GraphicInstruction <$> instruction)
  <|> (ContentUnit <$> contentUnit)
 instruction :: MonadParser m => m Instruction
 instruction = evalStateT stackParser []
  where
    stackParser = ((directObject <* blank) >>= push) <|> operator
    push arg = modify (arg:) *> stackParser
 parse :: ByteString -> Either String Content
 parse = evalParser content ()
 textContext :: MonadParser m => m TextContext
 textContext =
  string "BT" *> blank *> instruction `sepBy` blank <* blank <* string "ET"
 instance Output Content where
  output (Content contentUnits) = output contentUnits
 instance Output ContentUnit where
  output (GraphicContext gc) = line "q" `mappend` output gc `mappend` line "Q"
  output (TextContext tc) = line "BT" `mappend` output tc `mappend` line "ET"
 instance Output GraphicContextUnit where
  output (GraphicInstruction gi) = output gi
  output (ContentUnit cu) = output cu
--- a/src/PDF/Content/Operator.hs
+++ b/src/PDF/Content/Operator.hs
@ -1,76 +0,0 @@
 {-# LANGUAGE ConstraintKinds #-}
 {-# LANGUAGE FlexibleContexts #-}
 {-# LANGUAGE TypeSynonymInstances #-}
 {-# LANGUAGE FlexibleInstances #-}
 module PDF.Content.Operator (
      Instruction
    , Operator(..)
    , operator
  ) where
 import Control.Monad.Fail (MonadFail(..))
 import Control.Monad.State (MonadState(..))
 import Data.ByteString.Char8 (ByteString, pack, unpack)
 import Data.Char (toLower)
 import Data.Map (Map, (!?))
 import qualified Data.Map as Map (fromList)
 import qualified PDF.Content.Operator.Color as Color (Operator, signature)
 import PDF.Content.Operator.Common (Signature)
 import qualified PDF.Content.Operator.GraphicState as GraphicState (Operator, signature)
 import qualified PDF.Content.Operator.Path as Path (Operator, signature)
 import qualified PDF.Content.Operator.Text as Text (Operator, signature)
 import PDF.Object (DirectObject, blank, regular)
 import PDF.Output (Output(..), join, line)
 import PDF.Parser (MonadParser, takeAll1)
 import Prelude hiding (fail)
 import Text.Printf (printf)
 data Operator =
    GraphicState GraphicState.Operator
  | Path Path.Operator
  | Color Color.Operator
  | Text Text.Operator 
 instance Show Operator where
  show (GraphicState gcOp) = code gcOp
  show (Path pOp) = code pOp
  show (Color cOp) = code cOp
  show (Text tOp) = code tOp
 type Instruction = (Operator, [DirectObject])
 instance Output Instruction where
  output (op, args) = join " " ((output <$> args) ++ [line (show op)])
 operatorsTable :: Map ByteString (Signature Operator)
 operatorsTable = Map.fromList (
      (prepare GraphicState <$> GraphicState.signature)
    ++ (prepare Path <$> Path.signature)
    ++ (prepare Color <$> Color.signature)
    ++ (prepare Text <$> Text.signature)
  )
  where
    prepare constructor (op, sig) = (pack $ code op, (constructor op, sig))
 code :: Show a => a -> String
 code = expand . show
  where
    expand "" = ""
    expand (c:'_':s) = toLower c : expand s
    expand ('s':'t':'a':'r':s) = '*' : expand s
    expand ('Q':'u':'o':'t':'e':s) = '\'' : expand s
    expand ('D':'Q':'u':'o':'t':'e':s) = '"' : expand s
    expand (c:s) = c : expand s
 type StackParser m = (MonadState [DirectObject] m, MonadParser m)
 operator :: StackParser m => m Instruction
 operator = do
  chunk <- takeAll1 regular <* blank
  args <- reverse <$> get
  case operatorsTable !? chunk of
    Just (op, sig)
      | sig args -> return (op, args)
      | otherwise -> 
        get >>= fail . printf "Operator %s with stack %s" (show op) . show
    _ -> fail ("Unknown chunk " ++ unpack chunk)
--- a/src/PDF/Content/Operator/Color.hs
+++ b/src/PDF/Content/Operator/Color.hs
@ -1,27 +0,0 @@
 module PDF.Content.Operator.Color (
      Operator(..)
    , signature
  ) where
 import PDF.Content.Operator.Common (Signature)
 import PDF.Object (DirectObject(..))
 data Operator =
    CS | C_s | SC | SCN | S_c | S_cn | G | G_ | RG | R_g | K | K_
  deriving (Bounded, Enum, Show)
 signature :: [Signature Operator]
 signature = [
      (CS, \l -> case l of [NameObject _] -> True ; _ -> False)
    , (C_s, \l -> case l of [NameObject _] -> True ; _ -> False)
    , (SC, \_ -> True)
    , (SCN, \_ -> True)
    , (S_c, \_ -> True)
    , (S_cn, \_ -> True)
    , (G, \l -> case l of [_] -> True ; _ -> False)
    , (G_, \l -> case l of [_] -> True ; _ -> False)
    , (RG, \l -> case l of [_, _, _] -> True ; _ -> False)
    , (R_g, \l -> case l of [_, _, _] -> True ; _ -> False)
    , (K, \l -> case l of [_, _, _, _] -> True ; _ -> False)
    , (K_, \l -> case l of [_, _, _, _] -> True ; _ -> False)
  ]
--- a/src/PDF/Content/Operator/Common.hs
+++ b/src/PDF/Content/Operator/Common.hs
@ -1,7 +0,0 @@
 module PDF.Content.Operator.Common (
    Signature
  ) where
 import PDF.Object (DirectObject)
 type Signature a = (a, [DirectObject] -> Bool)
--- a/src/PDF/Content/Operator/GraphicState.hs
+++ b/src/PDF/Content/Operator/GraphicState.hs
@ -1,24 +0,0 @@
 module PDF.Content.Operator.GraphicState (
      Operator(..)
    , signature
  ) where
 import PDF.Content.Operator.Common (Signature)
 import PDF.Object (DirectObject(..))
 data Operator =
  C_m | W_ | J | J_ | M | D_ | R_i | I_ | G_s -- general graphic state
  deriving (Bounded, Enum, Show)
 signature :: [Signature Operator]
 signature = [
      (C_m, \l -> case l of [_, _, _, _, _, _] -> True ; _ -> False)
    , (W_, \l -> case l of [_] -> True ; _ -> False)
    , (J, \l -> case l of [_] -> True ; _ -> False)
    , (J_, \l -> case l of [_] -> True ; _ -> False)
    , (M, \l -> case l of [_] -> True ; _ -> False)
    , (D_, \l -> case l of [_, _] -> True ; _ -> False)
    , (R_i, \l -> case l of [_] -> True ; _ -> False)
    , (I_, \l -> case l of [_] -> True ; _ -> False)
    , (G_s, \l -> case l of [NameObject _] -> True ; _ -> False)
  ]
--- a/src/PDF/Content/Operator/Path.hs
+++ b/src/PDF/Content/Operator/Path.hs
@ -1,35 +0,0 @@
 module PDF.Content.Operator.Path (
      Operator(..)
    , signature
  ) where
 import PDF.Content.Operator.Common (Signature)
 data Operator =
    M_ | L_ | C_ | V_ | Y_ | H_ | R_e -- path construction
  | S | S_ | F_ | F | Fstar | B | Bstar | B_ | B_star | N_ -- path painting
  | W | Wstar -- clipping path
  deriving (Bounded, Enum, Show)
 signature :: [Signature Operator]
 signature = [
      (M_, \l -> case l of [_, _] -> True ; _ -> False)
    , (L_, \l -> case l of [_, _] -> True ; _ -> False)
    , (C_, \l -> case l of [_, _, _, _, _, _] -> True ; _ -> False)
    , (V_, \l -> case l of [_, _, _, _] -> True ; _ -> False)
    , (Y_, \l -> case l of [_, _, _, _] -> True ; _ -> False)
    , (H_, \l -> case l of [] -> True ; _ -> False)
    , (R_e, \l -> case l of [_, _, _, _] -> True ; _ -> False)
    , (S, \l -> case l of [] -> True ; _ -> False)
    , (S_, \l -> case l of [] -> True ; _ -> False)
    , (F_, \l -> case l of [] -> True ; _ -> False)
    , (F, \l -> case l of [] -> True ; _ -> False)
    , (Fstar, \l -> case l of [] -> True ; _ -> False)
    , (B, \l -> case l of [] -> True ; _ -> False)
    , (Bstar, \l -> case l of [] -> True ; _ -> False)
    , (B_, \l -> case l of [] -> True ; _ -> False)
    , (B_star, \l -> case l of [] -> True ; _ -> False)
    , (N_, \l -> case l of [] -> True ; _ -> False)
    , (W, \l -> case l of [] -> True ; _ -> False)
    , (Wstar, \l -> case l of [] -> True ; _ -> False)
  ]
--- a/src/PDF/Content/Operator/Text.hs
+++ b/src/PDF/Content/Operator/Text.hs
@ -1,32 +0,0 @@
 module PDF.Content.Operator.Text (
      Operator(..)
    , signature
  ) where
 import PDF.Content.Operator.Common (Signature)
 import PDF.Object (DirectObject(..))
 data Operator =
    Td | TD | Tm | Tstar -- text positioning
  | TJ | Tj | Quote | DQuote -- text showing
  | Tc | Tw | Tz | TL | Tf | Tr | Ts -- text state
  deriving (Bounded, Enum, Show)
 signature :: [Signature Operator]
 signature = [
      (Td, \l -> case l of [_, _] -> True ; _ -> False)
    , (TD, \l -> case l of [_, _] -> True ; _ -> False)
    , (Tm, \l -> case l of [_, _, _, _, _, _] -> True ; _ -> False)
    , (Tstar, \l -> case l of [] -> True ; _ -> False)
    , (TJ, \l -> case l of [Array _] -> True ; _ -> False)
    , (Tj, \l -> case l of [StringObject _] -> True ; _ -> False)
    , (Quote, \l -> case l of [StringObject _] -> True ; _ -> False)
    , (DQuote, \l -> case l of [StringObject _] -> True ; _ -> False)
    , (Tc, \l -> case l of [_] -> True ; _ -> False)
    , (Tw, \l -> case l of [_] -> True ; _ -> False)
    , (Tz, \l -> case l of [_] -> True ; _ -> False)
    , (TL, \l -> case l of [_] -> True ; _ -> False)
    , (Tf, \l -> case l of [NameObject _, _] -> True ; _ -> False)
    , (Tr, \l -> case l of [_] -> True ; _ -> False)
    , (Ts, \l -> case l of [_] -> True ; _ -> False)
  ]
--- a/src/PDF/Content/Text.hs
+++ b/src/PDF/Content/Text.hs
@ -1,75 +0,0 @@
 {-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE ConstraintKinds #-}
 {-# LANGUAGE FlexibleContexts #-}
 module PDF.Content.Text (
      format
    , renderText
  ) where
 import Control.Monad (foldM)
 import Control.Monad.Fail (MonadFail(..))
 import Control.Monad.Reader (MonadReader(..), asks, runReaderT)
 import Control.Monad.State (MonadState(..), evalStateT)
 import Data.ByteString.Char8 (pack)
 import Data.Map ((!))
 import Data.Text (Text)
 import PDF.Content (Content(..), ContentUnit(..), GraphicContextUnit(..))
 import PDF.Content.Operator (Instruction, Operator(..))
 import PDF.Content.Operator.Text (Operator(..))
 import PDF.Font (Font, FontSet, emptyFont)
 import PDF.Object (DirectObject(..), StringObject(..), toByteString)
 import Prelude hiding (fail)
 type TextContent m = (MonadReader FontSet m, MonadFail m)
 type FontContext m = (MonadState Font m, TextContent m)
 decodeString :: FontContext m => StringObject -> m Text
 decodeString input = do
  font <- get
  either fail return . font $ toByteString input
 renderText :: MonadFail m => FontSet -> Content -> m [Text]
 renderText fontSet (Content contentUnits) =
  runReaderT (concat <$> mapM renderContentUnit contentUnits) fontSet
 renderContentUnit :: TextContent m => ContentUnit -> m [Text]
 renderContentUnit (GraphicContext graphicContextUnits) =
  concat <$> mapM renderGraphicContextUnit graphicContextUnits
 renderContentUnit (TextContext instructions) =
  evalStateT (concat <$> mapM renderInstruction instructions) emptyFont
 renderGraphicContextUnit :: TextContent m => GraphicContextUnit -> m [Text]
 renderGraphicContextUnit (GraphicInstruction _) = return []
 renderGraphicContextUnit (ContentUnit contentUnit) =
  renderContentUnit contentUnit
 renderInstruction :: FontContext m => Instruction -> m [Text]
 renderInstruction (Text Tf, [NameObject fontName, _]) =
  asks (! fontName) >>= put >> return []
 renderInstruction (Text Tstar, []) = return ["\n"]
 renderInstruction (Text TJ, [Array arrayObject]) =
  replicate 1 <$> foldM appendText "" arrayObject
  where
    appendText t (StringObject outputString) =
      mappend t <$> decodeString outputString
    appendText t _ = return t
 renderInstruction (Text Tj, [StringObject outputString]) =
  replicate 1 <$> decodeString outputString
 renderInstruction (Text Quote, [StringObject outputString]) =
  (\t -> ["\n", t]) <$> decodeString outputString
 renderInstruction (Text DQuote, [StringObject outputString]) =
  (\t -> ["\n", t]) <$> decodeString outputString
 renderInstruction _ = return []
 format :: String -> [Instruction]
 format s =
  case break (== '\n') s of
    ("", "") -> []
    ("", left) -> (Text Tstar, []) : format (drop 1 left)
    (line, left) -> (Text Tj, [StringObject . Literal $ pack line]) : format left
--- a/src/PDF/EOL.hs
+++ b/src/PDF/EOL.hs
@ -6,14 +6,14 @@ module PDF.EOL (
  ) where
 import Control.Applicative ((<|>))
-import PDF.Parser (MonadParser, string)
+import PDF.Parser (Parser, string)
 data Style = CR | LF | CRLF deriving Show
 charset :: String
 charset = "\r\n"
-parser :: MonadParser m => m Style
+parser :: Parser s Style
 parser =
    (string "\r\n" >> return CRLF)
  <|> (string "\r" >> return CR)
--- a/src/PDF/Encoding.hs
+++ b/src/PDF/Encoding.hs
@ -1,12 +0,0 @@
 module PDF.Encoding (
    encoding
  ) where
 import Control.Monad.Fail (MonadFail(..))
 import PDF.Encoding.MacRoman (macRomanEncoding)
 import PDF.Font (Font)
 import Prelude hiding (fail)
 encoding :: MonadFail m => String -> m Font
 encoding "MacRomanEncoding" = return macRomanEncoding
 encoding s = fail $ "Unknown encoding " ++ s
--- a/src/PDF/Encoding/MacRoman.hs
+++ b/src/PDF/Encoding/MacRoman.hs
@ -1,141 +0,0 @@
 module PDF.Encoding.MacRoman (
    macRomanEncoding
  ) where
 import Data.ByteString.Char8 (unpack)
 import Data.Text (pack)
 import PDF.Font (Font)
 macRomanEncoding :: Font
 macRomanEncoding = Right . pack . fmap decode . unpack
 decode :: Char -> Char
 decode '\x80' = '\x00C4' -- LATIN CAPITAL LETTER A WITH DIAERESIS
 decode '\x81' = '\x00C5' -- LATIN CAPITAL LETTER A WITH RING ABOVE
 decode '\x82' = '\x00C7' -- LATIN CAPITAL LETTER C WITH CEDILLA
 decode '\x83' = '\x00C9' -- LATIN CAPITAL LETTER E WITH ACUTE
 decode '\x84' = '\x00D1' -- LATIN CAPITAL LETTER N WITH TILDE
 decode '\x85' = '\x00D6' -- LATIN CAPITAL LETTER O WITH DIAERESIS
 decode '\x86' = '\x00DC' -- LATIN CAPITAL LETTER U WITH DIAERESIS
 decode '\x87' = '\x00E1' -- LATIN SMALL LETTER A WITH ACUTE
 decode '\x88' = '\x00E0' -- LATIN SMALL LETTER A WITH GRAVE
 decode '\x89' = '\x00E2' -- LATIN SMALL LETTER A WITH CIRCUMFLEX
 decode '\x8A' = '\x00E4' -- LATIN SMALL LETTER A WITH DIAERESIS
 decode '\x8B' = '\x00E3' -- LATIN SMALL LETTER A WITH TILDE
 decode '\x8C' = '\x00E5' -- LATIN SMALL LETTER A WITH RING ABOVE
 decode '\x8D' = '\x00E7' -- LATIN SMALL LETTER C WITH CEDILLA
 decode '\x8E' = '\x00E9' -- LATIN SMALL LETTER E WITH ACUTE
 decode '\x8F' = '\x00E8' -- LATIN SMALL LETTER E WITH GRAVE
 decode '\x90' = '\x00EA' -- LATIN SMALL LETTER E WITH CIRCUMFLEX
 decode '\x91' = '\x00EB' -- LATIN SMALL LETTER E WITH DIAERESIS
 decode '\x92' = '\x00ED' -- LATIN SMALL LETTER I WITH ACUTE
 decode '\x93' = '\x00EC' -- LATIN SMALL LETTER I WITH GRAVE
 decode '\x94' = '\x00EE' -- LATIN SMALL LETTER I WITH CIRCUMFLEX
 decode '\x95' = '\x00EF' -- LATIN SMALL LETTER I WITH DIAERESIS
 decode '\x96' = '\x00F1' -- LATIN SMALL LETTER N WITH TILDE
 decode '\x97' = '\x00F3' -- LATIN SMALL LETTER O WITH ACUTE
 decode '\x98' = '\x00F2' -- LATIN SMALL LETTER O WITH GRAVE
 decode '\x99' = '\x00F4' -- LATIN SMALL LETTER O WITH CIRCUMFLEX
 decode '\x9A' = '\x00F6' -- LATIN SMALL LETTER O WITH DIAERESIS
 decode '\x9B' = '\x00F5' -- LATIN SMALL LETTER O WITH TILDE
 decode '\x9C' = '\x00FA' -- LATIN SMALL LETTER U WITH ACUTE
 decode '\x9D' = '\x00F9' -- LATIN SMALL LETTER U WITH GRAVE
 decode '\x9E' = '\x00FB' -- LATIN SMALL LETTER U WITH CIRCUMFLEX
 decode '\x9F' = '\x00FC' -- LATIN SMALL LETTER U WITH DIAERESIS
 decode '\xA0' = '\x2020' -- DAGGER
 decode '\xA1' = '\x00B0' -- DEGREE SIGN
 decode '\xA2' = '\x00A2' -- CENT SIGN
 decode '\xA3' = '\x00A3' -- POUND SIGN
 decode '\xA4' = '\x00A7' -- SECTION SIGN
 decode '\xA5' = '\x2022' -- BULLET
 decode '\xA6' = '\x00B6' -- PILCROW SIGN
 decode '\xA7' = '\x00DF' -- LATIN SMALL LETTER SHARP S
 decode '\xA8' = '\x00AE' -- REGISTERED SIGN
 decode '\xA9' = '\x00A9' -- COPYRIGHT SIGN
 decode '\xAA' = '\x2122' -- TRADE MARK SIGN
 decode '\xAB' = '\x00B4' -- ACUTE ACCENT
 decode '\xAC' = '\x00A8' -- DIAERESIS
 decode '\xAD' = '\x2260' -- NOT EQUAL TO
 decode '\xAE' = '\x00C6' -- LATIN CAPITAL LETTER AE
 decode '\xAF' = '\x00D8' -- LATIN CAPITAL LETTER O WITH STROKE
 decode '\xB0' = '\x221E' -- INFINITY
 decode '\xB1' = '\x00B1' -- PLUS-MINUS SIGN
 decode '\xB2' = '\x2264' -- LESS-THAN OR EQUAL TO
 decode '\xB3' = '\x2265' -- GREATER-THAN OR EQUAL TO
 decode '\xB4' = '\x00A5' -- YEN SIGN
 decode '\xB5' = '\x00B5' -- MICRO SIGN
 decode '\xB6' = '\x2202' -- PARTIAL DIFFERENTIAL
 decode '\xB7' = '\x2211' -- N-ARY SUMMATION
 decode '\xB8' = '\x220F' -- N-ARY PRODUCT
 decode '\xB9' = '\x03C0' -- GREEK SMALL LETTER PI
 decode '\xBA' = '\x222B' -- INTEGRAL
 decode '\xBB' = '\x00AA' -- FEMININE ORDINAL INDICATOR
 decode '\xBC' = '\x00BA' -- MASCULINE ORDINAL INDICATOR
 decode '\xBD' = '\x03A9' -- GREEK CAPITAL LETTER OMEGA
 decode '\xBE' = '\x00E6' -- LATIN SMALL LETTER AE
 decode '\xBF' = '\x00F8' -- LATIN SMALL LETTER O WITH STROKE
 decode '\xC0' = '\x00BF' -- INVERTED QUESTION MARK
 decode '\xC1' = '\x00A1' -- INVERTED EXCLAMATION MARK
 decode '\xC2' = '\x00AC' -- NOT SIGN
 decode '\xC3' = '\x221A' -- SQUARE ROOT
 decode '\xC4' = '\x0192' -- LATIN SMALL LETTER F WITH HOOK
 decode '\xC5' = '\x2248' -- ALMOST EQUAL TO
 decode '\xC6' = '\x2206' -- INCREMENT
 decode '\xC7' = '\x00AB' -- LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
 decode '\xC8' = '\x00BB' -- RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
 decode '\xC9' = '\x2026' -- HORIZONTAL ELLIPSIS
 decode '\xCA' = '\x00A0' -- NO-BREAK SPACE
 decode '\xCB' = '\x00C0' -- LATIN CAPITAL LETTER A WITH GRAVE
 decode '\xCC' = '\x00C3' -- LATIN CAPITAL LETTER A WITH TILDE
 decode '\xCD' = '\x00D5' -- LATIN CAPITAL LETTER O WITH TILDE
 decode '\xCE' = '\x0152' -- LATIN CAPITAL LIGATURE OE
 decode '\xCF' = '\x0153' -- LATIN SMALL LIGATURE OE
 decode '\xD0' = '\x2013' -- EN DASH
 decode '\xD1' = '\x2014' -- EM DASH
 decode '\xD2' = '\x201C' -- LEFT DOUBLE QUOTATION MARK
 decode '\xD3' = '\x201D' -- RIGHT DOUBLE QUOTATION MARK
 decode '\xD4' = '\x2018' -- LEFT SINGLE QUOTATION MARK
 decode '\xD5' = '\x2019' -- RIGHT SINGLE QUOTATION MARK
 decode '\xD6' = '\x00F7' -- DIVISION SIGN
 decode '\xD7' = '\x25CA' -- LOZENGE
 decode '\xD8' = '\x00FF' -- LATIN SMALL LETTER Y WITH DIAERESIS
 decode '\xD9' = '\x0178' -- LATIN CAPITAL LETTER Y WITH DIAERESIS
 decode '\xDA' = '\x2044' -- FRACTION SLASH
 decode '\xDB' = '\x20AC' -- EURO SIGN
 decode '\xDC' = '\x2039' -- SINGLE LEFT-POINTING ANGLE QUOTATION MARK
 decode '\xDD' = '\x203A' -- SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
 decode '\xDE' = '\xFB01' -- LATIN SMALL LIGATURE FI
 decode '\xDF' = '\xFB02' -- LATIN SMALL LIGATURE FL
 decode '\xE0' = '\x2021' -- DOUBLE DAGGER
 decode '\xE1' = '\x00B7' -- MIDDLE DOT
 decode '\xE2' = '\x201A' -- SINGLE LOW-9 QUOTATION MARK
 decode '\xE3' = '\x201E' -- DOUBLE LOW-9 QUOTATION MARK
 decode '\xE4' = '\x2030' -- PER MILLE SIGN
 decode '\xE5' = '\x00C2' -- LATIN CAPITAL LETTER A WITH CIRCUMFLEX
 decode '\xE6' = '\x00CA' -- LATIN CAPITAL LETTER E WITH CIRCUMFLEX
 decode '\xE7' = '\x00C1' -- LATIN CAPITAL LETTER A WITH ACUTE
 decode '\xE8' = '\x00CB' -- LATIN CAPITAL LETTER E WITH DIAERESIS
 decode '\xE9' = '\x00C8' -- LATIN CAPITAL LETTER E WITH GRAVE
 decode '\xEA' = '\x00CD' -- LATIN CAPITAL LETTER I WITH ACUTE
 decode '\xEB' = '\x00CE' -- LATIN CAPITAL LETTER I WITH CIRCUMFLEX
 decode '\xEC' = '\x00CF' -- LATIN CAPITAL LETTER I WITH DIAERESIS
 decode '\xED' = '\x00CC' -- LATIN CAPITAL LETTER I WITH GRAVE
 decode '\xEE' = '\x00D3' -- LATIN CAPITAL LETTER O WITH ACUTE
 decode '\xEF' = '\x00D4' -- LATIN CAPITAL LETTER O WITH CIRCUMFLEX
 decode '\xF0' = '\xF8FF' -- Apple logo
 decode '\xF1' = '\x00D2' -- LATIN CAPITAL LETTER O WITH GRAVE
 decode '\xF2' = '\x00DA' -- LATIN CAPITAL LETTER U WITH ACUTE
 decode '\xF3' = '\x00DB' -- LATIN CAPITAL LETTER U WITH CIRCUMFLEX
 decode '\xF4' = '\x00D9' -- LATIN CAPITAL LETTER U WITH GRAVE
 decode '\xF5' = '\x0131' -- LATIN SMALL LETTER DOTLESS I
 decode '\xF6' = '\x02C6' -- MODIFIER LETTER CIRCUMFLEX ACCENT
 decode '\xF7' = '\x02DC' -- SMALL TILDE
 decode '\xF8' = '\x00AF' -- MACRON
 decode '\xF9' = '\x02D8' -- BREVE
 decode '\xFA' = '\x02D9' -- DOT ABOVE
 decode '\xFB' = '\x02DA' -- RING ABOVE
 decode '\xFC' = '\x00B8' -- CEDILLA
 decode '\xFD' = '\x02DD' -- DOUBLE ACUTE ACCENT
 decode '\xFE' = '\x02DB' -- OGONEK
 decode '\xFF' = '\x02C7' -- CARON
 decode c = c             -- The rest is ASCII
--- a/src/PDF/Font.hs
+++ b/src/PDF/Font.hs
@ -1,16 +0,0 @@
 module PDF.Font (
      Font
    , FontSet
    , emptyFont
  ) where
 import Data.ByteString (ByteString)
 import Data.Map (Map)
 import Data.Text (Text)
 import PDF.Object (Name)
 type Font = ByteString -> Either String Text
 type FontSet = Map Name Font
 emptyFont :: Font
 emptyFont _ = Left "No fond loaded"
--- a/src/PDF/Object.hs
+++ b/src/PDF/Object.hs
@ -3,7 +3,6 @@
 {-# LANGUAGE FlexibleInstances #-}
 module PDF.Object (
      Content(..)
    , Dictionary
    , DirectObject(..)
    , Flow(..)
    , IndexedObjects
@ -13,11 +12,9 @@ module PDF.Object (
    , Number(..)
    , Object(..)
    , Occurrence(..)
    , StringObject(..)
    , Structure(..)
    , XRefEntry(..)
    , XRefSection
    , array
    , blank
    , dictionary
    , directObject
@ -25,36 +22,34 @@ module PDF.Object (
    , integer
    , line
    , magicNumber
    , name
    , number
    , regular
    , stringObject
    , structure
    , toByteString
  ) where
-import Control.Applicative ((<|>), many)
+import Control.Applicative ((<|>))
-import Data.Attoparsec.ByteString.Char8 (choice, count, option, sepBy)
+import Data.ByteString.Char8 (ByteString)
-import Data.ByteString (ByteString)
+import qualified Data.ByteString.Char8 as BS (
-import qualified Data.ByteString as BS (concat)
+    concat, cons, pack, singleton, unpack
-import qualified Data.ByteString.Char8 as Char8 (cons, pack, singleton, unpack)
+  )
 import Data.ByteString.Char8.Util (B16Int(..), b16ToBytes, unescape)
 import Data.Map (Map, (!), mapWithKey)
 import qualified Data.Map as Map (
    delete, empty, fromList, lookup, minViewWithKey, toList, union
  )
 import qualified PDF.EOL as EOL (charset, parser)
-import qualified PDF.Output as Output (line, string)
+import qualified PDF.Output as Output (concat, line, string)
 import PDF.Output (
      OBuilder, ObjectId(..), Offset(..), Output(..), Resource(..)
    , byteString, getObjectId, getOffset, getOffsets, join, newLine
    , saveOffset
  )
-import PDF.Parser (MonadParser(..), Parser, octDigit, oneOf)
+import PDF.Parser (
      Parser, (<?>)
    , char, choice, count, decNumber, hexNumber, many, octDigit, oneOf, option
    , sepBy, string, takeAll, takeAll1
  )
 import Text.Printf (printf)
-line :: MonadParser m => String -> m ()
+line :: String -> Parser u ()
-line l = (string (Char8.pack l) *> blank *> return ())
+line l = (string (BS.pack l) *> blank *> return ()) <?> printf "line «%s»" l
 magicNumber :: ByteString
 magicNumber = "%PDF-"
@ -65,8 +60,8 @@ eofMarker = "%%EOF"
 whiteSpaceCharset :: String
 whiteSpaceCharset = "\0\t\12 "
-blank :: MonadParser m => m ()
+blank :: Parser u ()
-blank = takeAll (`elem` (EOL.charset ++ whiteSpaceCharset)) *> pure ()
+blank = takeAll (`elem` (EOL.charset ++ whiteSpaceCharset)) *> return ()
 delimiterCharset :: String
 delimiterCharset = "()<>[]{}/%"
@ -74,8 +69,8 @@ delimiterCharset = "()<>[]{}/%"
 regular :: Char -> Bool
 regular = not . (`elem` (EOL.charset ++ whiteSpaceCharset ++ delimiterCharset))
-integer :: (Read a, Num a, MonadParser m) => m a
+integer :: (Read a, Num a) => Parser u a
-integer = read . Char8.unpack <$> decNumber <* blank
+integer = read . BS.unpack <$> decNumber <* blank <?> "decimal integer"
 -------------------------------------
 --          OBJECTS
@ -86,9 +81,9 @@ type IndexedObjects = Map ObjectId Object
 --
 -- Boolean
 --
-boolean :: MonadParser m => m Bool
+boolean :: Parser u Bool
 boolean =
-  (string "true" *> return True) <|> (string "false" *> return False)
+  (string "true" *> return True) <|> (string "false" *> return False) <?> "boolean"
 --
 -- Number
@ -101,40 +96,38 @@ instance Output Number where
      (n, 0) -> printf "%d" (n :: Int)
      _ -> printf "%f" f
-number :: MonadParser m => m Number
+number :: Parser u Number
-number = Number . read . Char8.unpack <$>
+number = Number . read . BS.unpack <$>
-  (mappend <$> sign <*> (integerPart <|> Char8.cons '0' <$> floatPart))
+  (mappend <$> sign <*> (integerPart <|> BS.cons '0' <$> floatPart))
  <?> "number"
  where
    sign = string "-" <|> option "" (char '+' >> return "")
    integerPart = mappend <$> decNumber <*> option "" floatPart
-    floatPart = Char8.cons <$> char '.' <*> (option "0" $ decNumber)
+    floatPart = BS.cons <$> char '.' <*> (option "0" $ decNumber)
 --
 -- StringObject
 --
-data StringObject = Literal ByteString | Hexadecimal B16Int deriving Show
+data StringObject = Literal String | Hexadecimal String deriving Show
 instance Output StringObject where
-  output (Literal s) = Output.string (printf "(%s)" (Char8.unpack s))
+  output (Literal s) = Output.string (printf "(%s)" s)
-  output (Hexadecimal (B16Int n)) = Output.string (printf "<%s>" (Char8.unpack n))
+  output (Hexadecimal s) = Output.string (printf "<%s>" s)
-stringObject :: MonadParser m => m StringObject
+stringObject :: Parser u StringObject
 stringObject =
-    Literal <$> (char '(' *> (BS.concat <$> literalString) <* char ')')
+    Literal . BS.unpack <$> (char '(' *> (BS.concat <$> literalString) <* char ')')
-  <|> Hexadecimal <$> (char '<' *> hexNumber <* char '>')
+  <|> Hexadecimal . BS.unpack <$> (char '<' *> hexNumber <* char '>')
  <?> "string object (literal or hexadecimal)"
  where
    literalString = many literalStringBlock
    literalStringBlock = takeAll1 normalChar <|> matchingParenthesis <|> escapedChar
    normalChar = not . (`elem` ("\\()" :: String))
    matchingParenthesis =
-      mappend <$> (Char8.cons <$> char '(' <*> literalStringBlock) <*> string ")"
+      mappend <$> (BS.cons <$> char '(' <*> literalStringBlock) <*> string ")"
    escapedChar =
-      Char8.cons <$> char '\\' <*> (Char8.singleton <$> oneOf "nrtbf()\\\n" <|> octalCode)
+      BS.cons <$> char '\\' <*> (BS.singleton <$> oneOf "nrtbf()\\" <|> octalCode)
-    octalCode = choice $ (\n -> Char8.pack <$> count n octDigit) <$> [1..3]
+    octalCode = choice $ (\n -> BS.pack <$> count n octDigit) <$> [1..3]
 toByteString :: StringObject -> ByteString
 toByteString (Hexadecimal h) = b16ToBytes h
 toByteString (Literal s) = unescape s
 --
 -- Name
@ -144,15 +137,15 @@ newtype Name = Name String deriving (Eq, Ord, Show)
 instance Output Name where
  output (Name n) = Output.string ('/':n)
-name :: MonadParser m => m Name
+name :: Parser u Name
-name = Name . Char8.unpack <$> (char '/' *> takeAll regular)
+name = Name . BS.unpack <$> (char '/' *> takeAll regular) <?> "name"
 --
 -- Array
 --
-array :: MonadParser m => m [DirectObject]
+array :: Parser u [DirectObject]
 array =
-  char '[' *> blank *> directObject `sepBy` blank <* blank <* char ']'
+  char '[' *> blank *> directObject `sepBy` blank <* blank <* char ']' <?> "array"
 --
 -- Dictionary
@ -160,16 +153,16 @@ array =
 type Dictionary = Map Name DirectObject
 instance Output Dictionary where
-  output aDictionary =
+  output dict =
    "<<" `mappend` keyValues `mappend` ">>"
    where
-      keyValues = join " " $ outputKeyVal <$> Map.toList aDictionary
+      keyValues = join " " $ outputKeyVal <$> Map.toList dict
      outputKeyVal :: (Name, DirectObject) -> OBuilder
-      outputKeyVal (key, val) = mconcat [output key, " ", output val]
+      outputKeyVal (key, val) = Output.concat [output key, " ", output val]
-dictionary :: MonadParser m => m Dictionary
+dictionary :: Parser u Dictionary
 dictionary =
-  string "<<" *> blank *> keyValPairs <* string ">>"
+  string "<<" *> blank *> keyValPairs <* string ">>" <?> "dictionary"
  where
    keyVal = (,) <$> name <* blank <*> directObject
    keyValPairs = Map.fromList <$> keyVal `sepBy` blank <* blank
@ -177,8 +170,8 @@ dictionary =
 --
 -- Null
 --
-nullObject :: MonadParser m => m ()
+nullObject :: Parser u ()
-nullObject = string "null" *> return ()
+nullObject = string "null" *> return () <?> "null object"
 --
 -- Reference
@ -188,9 +181,9 @@ data IndirectObjCoordinates = IndirectObjCoordinates {
    , versionNumber :: Int
  } deriving Show
-reference :: MonadParser m => m IndirectObjCoordinates
+reference :: Parser u IndirectObjCoordinates
 reference = IndirectObjCoordinates
-  <$> (fmap ObjectId integer) <*> integer <* char 'R'
+  <$> (fmap ObjectId integer) <*> integer <* char 'R' <?> "reference to an object"
 --
 -- DirectObject
@ -211,25 +204,23 @@ instance Output DirectObject where
  output (NumberObject n) = output n
  output (StringObject s) = output s
  output (NameObject n) = output n
-  output (Array a) = mconcat ["[", join " " a, "]"]
+  output (Array a) = Output.concat ["[", join " " a, "]"]
  output (Dictionary d) = output d
  output (Null) = "null"
  output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
    Output.string (printf "%d %d R" (getObjectId objectId) versionNumber)
-directObject :: MonadParser m => m DirectObject
+directObject :: Parser u DirectObject
-directObject = peek >>= dispatch
+directObject =
-  where
+    Boolean <$> boolean
-    dispatch 't' = Boolean <$> boolean
+  <|> Reference <$> reference {- defined before Number because Number is a prefix of it -}
-    dispatch 'f' = Boolean <$> boolean
+  <|> NumberObject <$> number
-    dispatch '(' = StringObject <$> stringObject
+  <|> StringObject <$> stringObject
-    dispatch '<' = StringObject <$> stringObject <|> Dictionary <$> dictionary
+  <|> NameObject <$> name
-    dispatch '/' = NameObject <$> name
+  <|> Array <$> array
-    dispatch '[' = Array <$> array
+  <|> Dictionary <$> dictionary
-    dispatch 'n' = nullObject *> return Null
+  <|> const Null <$> nullObject
-    dispatch _ =
+  <?> "direct object"
        Reference <$> reference {- defined before Number because Number is a prefix of it -}
      <|> NumberObject <$> number
 --
 -- Object
@ -244,7 +235,7 @@ data Object =
 instance Output Object where
  output (Direct d) = output d
-  output (Stream {header, streamContent}) = mconcat [
+  output (Stream {header, streamContent}) = Output.concat [
        output header, newLine
      , Output.line "stream"
      , byteString streamContent
@ -259,7 +250,7 @@ data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
 outputOccurrence :: IndexedObjects -> Occurrence -> OBuilder
 outputOccurrence _ (Comment c) = Output.line c
 outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
-  saveOffset (Object objectId) >> mconcat [
+  saveOffset (Object objectId) >> Output.concat [
        Output.line (printf "%d %d obj" (getObjectId objectId) versionNumber)
      , output (objects ! objectId), newLine
      , Output.line "endobj"
@ -289,7 +280,7 @@ instance Output XRefEntry where
 entry :: Parser u XRefEntry
 entry = do
  (big, small) <- (,) <$> integer <*> integer
-  (inUse big small <|> free big small) <* blank
+  (inUse big small <|> free big small <?> "XRef entry") <* blank
  where
    inUse :: Int -> Int -> Parser u XRefEntry
    inUse big generation =
@ -313,7 +304,7 @@ instance Output XRefSubSection where
 xRefSubSection :: Parser u XRefSubSection
 xRefSubSection = do
-  (firstId, entriesNumber) <- (,) <$> integer <*> integer
+  (firstId, entriesNumber) <- (,) <$> integer <*> integer <?> "XRef subsection"
  entries <- count entriesNumber entry
  return $ XRefSubSection {firstObjectId = ObjectId firstId, entries}
@ -392,7 +383,7 @@ outputBody (occurrences, objects) =
 instance Output Content where
  output (Content {occurrences, objects, docStructure}) =
    fmap (updateXrefs xRef) <$> getOffsets (outputBody (occurrences, objects))
-    >>= \(body, (xref, startXRef)) -> mconcat [
+    >>= \(body, (xref, startXRef)) -> Output.concat [
          body
        , Output.line "xref"
        , output xref
--- a/src/PDF/Object/Navigation.hs
+++ b/src/PDF/Object/Navigation.hs
@ -1,82 +0,0 @@
 {-# LANGUAGE NamedFieldPuns #-}
 {-# LANGUAGE ConstraintKinds #-}
 {-# LANGUAGE FlexibleContexts #-}
 module PDF.Object.Navigation (
      (//)
    , dictionaryById
    , getDictionary
    , getField
    , follow
    , objectById
    , openStream
    , origin
  ) where
 import Codec.Compression.Zlib (decompress)
 import Control.Monad.Reader (MonadReader(..))
 import Control.Monad.Fail (MonadFail(..))
 import Data.ByteString (ByteString)
 import qualified Data.ByteString.Lazy as Lazy (fromStrict, toStrict)
 import Data.Map ((!))
 import qualified Data.Map as Map (lookup)
 import PDF.Object (
      Content(..), Dictionary, DirectObject(..), IndirectObjCoordinates(..)
    , Name(..), Object(..), Structure(..)
  )
 import PDF.Output (ObjectId)
 import Prelude hiding (fail)
 import Text.Printf (printf)
 type PDFContent m = (MonadReader Content m, MonadFail m)
 castDictionary :: MonadFail m => Object -> m Dictionary
 castDictionary (Direct (Dictionary aDict)) = return aDict
 castDictionary obj = expected "dictionary : " obj
 castObjectId :: MonadFail m => DirectObject -> m ObjectId
 castObjectId (Reference (IndirectObjCoordinates {objectId})) = return objectId
 castObjectId directObject = expected "reference" directObject
 dictionaryById :: PDFContent m => ObjectId -> m Dictionary
 dictionaryById objectId = objectById objectId >>= castDictionary
 expected :: (MonadFail m, Show a) => String -> a -> m b
 expected name = fail . printf "Not a %s: %s" name . show
 getField :: MonadFail m => String -> Dictionary -> m DirectObject
 getField key aDictionary =
  maybe (fail errorMessage) return (Map.lookup (Name key) aDictionary)
  where
    errorMessage =
      printf "Key %s not found in dictionary %s" key (show aDictionary)
 follow :: PDFContent m => DirectObject -> m Object
 follow directObject = castObjectId directObject >>= objectById
 objectById :: PDFContent m => ObjectId -> m Object
 objectById objectId = do
  content <- ask
  return (objects content ! objectId)
 getDictionary :: PDFContent m => DirectObject -> m Dictionary
 getDictionary (Dictionary aDictionary) = return aDictionary
 getDictionary (Reference (IndirectObjCoordinates {objectId})) =
  objectById objectId >>= castDictionary
 getDictionary aDirectObject =
  expected "resource (dictionary or reference)" aDirectObject
 (//) :: PDFContent m => Dictionary -> [String] -> m DirectObject
 (//) aDict [] = return $ Dictionary aDict
 (//) aDict [key] = getField key aDict
 (//) aDict (key:keys) = getField key aDict >>= getDictionary >>= (// keys)
 origin :: PDFContent m => m Dictionary
 origin = trailer . docStructure <$> ask
 openStream :: MonadFail m => Object -> m ByteString
 openStream (Stream {header, streamContent}) = return $
  case Map.lookup (Name "Filter") header of
    Just (NameObject (Name "FlateDecode")) ->
      Lazy.toStrict . decompress . Lazy.fromStrict $ streamContent
    _ -> streamContent
 openStream obj = expected "stream" obj
--- a/src/PDF/Output.hs
+++ b/src/PDF/Output.hs
@ -12,6 +12,7 @@ module PDF.Output (
    , Resource(..)
    , byteString
    , char
    , concat
    , getOffsets
    , join
    , line
@ -31,6 +32,7 @@ import qualified Data.Map as Map (singleton)
 import Data.String (IsString(..))
 import Control.Monad.RWS (RWS, runRWS, ask, get, listen, modify, tell)
 import qualified PDF.EOL as EOL (Style(..))
 import Prelude hiding (concat)
 newtype ObjectId = ObjectId {getObjectId :: Int} deriving (Enum, Eq, Ord, Show)
 newtype Offset = Offset {getOffset :: Int} deriving (Show)
@ -64,6 +66,9 @@ getOffsets (OContext builder) =
 append :: OBuilder -> OBuilder -> OBuilder
 append (OContext a) (OContext b) = OContext (mappend <$> a <*> b)
 concat :: [OBuilder] -> OBuilder
 concat = foldl mappend mempty
 #if MIN_VERSION_base(4,11,0)
 instance Semigroup OBuilder where
  (<>) = append
@ -89,7 +94,7 @@ instance Output Bool where
  output True = string "true"
 instance Output a => Output [a] where
-  output = mconcat . fmap output
+  output = concat . fmap output
 join :: Output a => String -> [a] -> OBuilder
 join _ [] = mempty
@ -111,7 +116,7 @@ char :: Char -> OBuilder
 char c = lift char8 c <* offset (+1)
 string :: String -> OBuilder
-string s = lift string8 s <* offset (+ length s)
+string s = lift string8 s <* offset (+ toEnum (length s))
 line :: String -> OBuilder
 line l = string l `mappend` newLine
--- a/src/PDF/Pages.hs
+++ b/src/PDF/Pages.hs
@ -1,133 +0,0 @@
 {-# LANGUAGE NamedFieldPuns #-}
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 module PDF.Pages (
      Page(..)
    , get
    , getAll
  ) where
 import Control.Applicative (Alternative(..), (<|>))
 import Control.Monad (MonadPlus(..), foldM)
 import Control.Monad.Fail (MonadFail(..))
 import Control.Monad.RWS (RWST(..), evalRWST, modify)
 import qualified Control.Monad.RWS as RWS (get)
 import Data.Map (Map)
 import qualified Data.Map as Map (empty, fromList, insert, lookup, toList)
 import Data.Text (Text)
 import PDF.CMap (cMap)
 import qualified PDF.Content as Content (parse)
 import PDF.Content.Text (renderText)
 import PDF.Encoding (encoding)
 import PDF.Font (Font, FontSet)
 import PDF.Object (
      Content(..), Dictionary, DirectObject(..), IndirectObjCoordinates(..)
    , Name(..)
  ,)
 import PDF.Object.Navigation (
    (//), dictionaryById, getDictionary, getField, follow, openStream, origin
  )
 import PDF.Output (ObjectId(..))
 import Prelude hiding (fail)
 import Text.Printf (printf)
 type CachedFonts = Map ObjectId Font
 newtype Error a = Error {
    runError :: Either String a
  } deriving (Alternative, Functor, Applicative, Monad, MonadPlus)
 type T = RWST Content () CachedFonts Error
 data Page = Page {
      contents :: [Text]
    , source :: ObjectId
  }
 instance MonadFail Error where
  fail = Error . Left
 getFontDictionary :: Dictionary -> T Dictionary
 getFontDictionary pageDict =
    ((pageDict // ["Resources", "Font"]) >>= getDictionary)
  <|> return Map.empty
 cache :: (ObjectId -> T Font) -> ObjectId -> T Font
 cache loader objectId =
  (maybe load return . Map.lookup objectId) =<< RWS.get
  where
    load = do
      value <- loader objectId
      modify $ Map.insert objectId value
      return value
 loadFont :: ObjectId -> T Font
 loadFont objectId = dictionaryById objectId >>= tryMappings
  where
    tryMappings dictionary =
        loadCMap dictionary
      <|> (getField "Encoding" dictionary >>= loadEncoding)
      <|> (fail $ unknownFormat (show objectId) (show dictionary))
    unknownFormat = printf "Unknown font format for object #%s : %s"
    loadCMap :: Dictionary -> T Font
    loadCMap dictionary =
      getField "ToUnicode" dictionary >>= follow >>= openStream >>= cMap
    loadEncoding :: DirectObject -> T Font
    loadEncoding (NameObject (Name name)) = encoding name
    loadEncoding directObject =
      fail $ printf "Encoding must be a name, not that : %s" $ show directObject
 loadFonts :: Dictionary -> T FontSet
 loadFonts = foldM addFont Map.empty . Map.toList
  where
    addFont :: FontSet -> (Name, DirectObject) -> T FontSet
    addFont output (name, Reference (IndirectObjCoordinates {objectId})) =
      flip (Map.insert name) output <$> cache loadFont objectId
    addFont output _ = return output
 several :: DirectObject -> [DirectObject]
 several (Array l) =  l
 several directObject = [directObject]
 pagesList :: T [ObjectId]
 pagesList = do
  pages <- (origin :: T Dictionary) >>= (// ["Root", "Pages"]) >>= getDictionary
  case Map.lookup (Name "Kids") pages of
    Just (Array kids) -> return $ getReferences kids
    _ -> return []
 getReferences :: [DirectObject] -> [ObjectId]
 getReferences objects = do
  object <- objects
  case object of
    Reference (IndirectObjCoordinates {objectId}) -> [objectId]
    _ -> []
 extractText :: Dictionary -> T [Text]
 extractText pageDict = do
  fonts <- loadFonts =<< getFontDictionary pageDict
  objects <- several <$> getField "Contents" pageDict
  concat <$> mapM (loadContent fonts) objects
  where
    loadContent :: FontSet -> DirectObject -> T [Text]
    loadContent fonts directObject =
        follow directObject
      >>= openStream
      >>= (either fail return . Content.parse)
      >>= renderText fonts
 loadPage :: ObjectId -> T Page
 loadPage source = do
  contents <- extractText =<< dictionaryById source
  return $ Page {contents, source}
 getAll :: Content -> Either String (Map Int Page)
 getAll content = runError $ fst <$> evalRWST getPages content Map.empty
  where
    numbered = Map.fromList . zip [1..]
    getPages = numbered <$> (mapM loadPage =<< pagesList)
 get :: Content -> Int -> Either String Page
 get content pageNumber
  | pageNumber < 1 = Left "Pages start at 1"
  | otherwise = runError $ fst <$> evalRWST getPage content Map.empty
  where
    firstPage [] = fail "Page is out of bounds"
    firstPage (p:_) = loadPage p
    getPage = drop (pageNumber - 1) <$> pagesList >>= firstPage
--- a/src/PDF/Parser.hs
+++ b/src/PDF/Parser.hs
@ -1,70 +1,56 @@
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE ConstraintKinds #-}
 {-# LANGUAGE UndecidableInstances #-}
 module PDF.Parser (
-      MonadParser(..)
+      Parser
-    , Parser
+    , (<?>)
    , block
    , char
    , choice
    , count
    , decNumber
    , hexNumber
    , many
    , octDigit
    , on
    , oneOf
    , option
    , runParser
-    , evalParser
+    , sepBy
    , string
    , takeAll
    , takeAll1
  ) where
-import Control.Monad (MonadPlus)
+import Control.Applicative ((<|>), empty)
 import Control.Monad.Fail (MonadFail(..))
 import Control.Monad.State (StateT(..), evalStateT)
-import Control.Monad.Trans (MonadTrans(..))
+import Control.Monad.Trans (lift)
 import qualified Data.Attoparsec.ByteString.Char8 as Atto (
-      Parser, char, endOfInput, parseOnly, peekChar', satisfy, string, take
+    Parser, char, parseOnly, satisfy, string, take, takeWhile, takeWhile1
    , takeWhile, takeWhile1
  )
 import Data.ByteString (ByteString)
 import Data.ByteString.Char8.Util (B16Int(..))
 import Data.Char (toLower)
 import Data.Set (Set)
 import qualified Data.Set as Set (fromList, member, unions)
 import Prelude hiding (fail)
 type MonadDeps m = (MonadFail m, MonadPlus m)
 class MonadDeps m => MonadParser m where
  block :: Int -> m ByteString
  char :: Char -> m Char
  decNumber :: m ByteString
  endOfInput :: m ()
  hexNumber :: m B16Int
  oneOf :: String -> m Char
  peek :: m Char
  string :: ByteString -> m ByteString
  takeAll :: (Char -> Bool) -> m ByteString
  takeAll1 :: (Char -> Bool) -> m ByteString
 instance MonadParser Atto.Parser where
  block = Atto.take
  char = Atto.char
  endOfInput = Atto.endOfInput
  decNumber = Atto.takeWhile1 (`Set.member` digits)
  hexNumber = B16Int <$> Atto.takeWhile1 (`Set.member` hexDigits)
  oneOf charSet = Atto.satisfy (`elem` charSet)
  peek = Atto.peekChar'
  string s = Atto.string s
  takeAll = Atto.takeWhile
  takeAll1 = Atto.takeWhile1
 instance (MonadParser m, MonadTrans t, MonadDeps (t m)) => MonadParser (t m) where
  block = lift . block
  char = lift . char
  endOfInput = lift $ endOfInput
  decNumber = lift $ decNumber
  hexNumber = lift $ hexNumber
  oneOf = lift . oneOf
  peek = lift $ peek
  string = lift . string
  takeAll = lift . takeAll
  takeAll1 = lift . takeAll1
 type Parser s = StateT s Atto.Parser
 (<?>) :: Parser s a -> String -> Parser s a
 (<?>) parser debugMessage = parser <|> fail debugMessage
 block :: Int -> Parser s ByteString
 block = lift . Atto.take
 char :: Char -> Parser s Char
 char = lift . Atto.char
 choice :: [Parser s a] -> Parser s a
 choice = foldr (<|>) empty
 count :: Int -> Parser s a -> Parser s [a]
 count 0 _ = return []
 count n p = (:) <$> p <*> count (n-1) p
 decNumber :: Parser s ByteString
 decNumber = lift $ Atto.takeWhile1 (`Set.member` digits)
 digits :: Set Char
 digits = Set.fromList ['0'..'9']
@ -73,7 +59,13 @@ hexDigits = Set.unions [digits, Set.fromList af, Set.fromList $ toLower <$> af]
  where
    af = ['A'..'F']
-octDigit :: MonadParser m => m Char
+hexNumber :: Parser s ByteString
 hexNumber = lift $ Atto.takeWhile1 (`Set.member` hexDigits)
 many :: Parser s a -> Parser s [a]
 many parser = (:) <$> parser <*> many parser <|> return []
 octDigit :: Parser s Char
 octDigit = oneOf ['0'..'7']
 on :: Parser s a -> ByteString -> Parser s (Either String a)
@ -82,8 +74,25 @@ on (StateT parserF) input = StateT $ \state ->
    Left errorMsg -> return (Left errorMsg, state)
    Right (result, newState) -> return (Right result, newState)
-runParser :: Parser s a -> s -> ByteString -> Either String (a, s)
+oneOf :: String -> Parser s Char
-runParser parser initState = Atto.parseOnly (runStateT parser initState)
+oneOf charSet = lift $ Atto.satisfy (`elem` charSet)
-evalParser :: Parser s a -> s -> ByteString -> Either String a
+option :: a -> Parser s a -> Parser s a
-evalParser parser initState = Atto.parseOnly (evalStateT parser initState)
+option defaultValue p = p <|> pure defaultValue
 runParser :: Parser s a -> s -> ByteString -> Either String a
 runParser parser initState =
  Atto.parseOnly (evalStateT parser initState)
 sepBy :: Parser s a -> Parser s b -> Parser s [a]
 sepBy parser separator =
  option [] $ (:) <$> parser <*> many (separator *> parser)
 string :: ByteString -> Parser s ByteString
 string = lift . Atto.string
 takeAll :: (Char -> Bool) -> Parser s ByteString
 takeAll = lift . Atto.takeWhile
 takeAll1 :: (Char -> Bool) -> Parser s ByteString
 takeAll1 = lift . Atto.takeWhile1
--- a/src/PDF/Text.hs
+++ b/src/PDF/Text.hs
@ -1,319 +0,0 @@
 {-# LANGUAGE NamedFieldPuns #-}
 {-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE ConstraintKinds #-}
 {-# LANGUAGE FlexibleContexts #-}
 module PDF.Text {-(
    pageContents
  )-} where
 import Control.Applicative ((<|>))
 import Control.Monad (foldM)
 import Control.Monad.Fail (MonadFail)
 import Control.Monad.Reader (ReaderT, runReaderT, asks)
 import Control.Monad.State (MonadState, evalStateT, get, modify, put)
 import Data.Attoparsec.ByteString.Char8 (choice, sepBy)
 import Data.ByteString (ByteString)
 import Data.ByteString.Char8 (pack, unpack)
 import Data.Char (toLower)
 import Data.Map ((!), (!?), Map)
 import qualified Data.Map as Map (fromList)
 import Data.Text (Text)
 import PDF.Font (Font, FontSet, emptyFont)
 import PDF.Object (
      DirectObject(..), StringObject(..)
    , array, blank, name, regular, stringObject, toByteString
  )
 import PDF.Parser (MonadParser(..), (<?>), Parser, evalParser)
 data StateOperator =
  C_m | W_ | J | J_ | M | D_ | R_i | I_ | G_s -- general graphic state
  deriving (Bounded, Enum, Show)
 data PathOperator =
    M_ | L_ | C_ | V_ | Y_ | H_ | R_e -- path construction
  | S | S_ | F_ | F | Fstar | B | Bstar | B_ | B_star | N_ -- path painting
  | W | Wstar -- clipping path
  deriving (Bounded, Enum, Show)
 data ColorOperator =
    CS | C_s | SC | SCN | S_c | S_cn | G | G_ | RG | R_g | K | K_
  deriving (Bounded, Enum, Show)
 data TextOperator =
    Td | TD | Tm | Tstar -- text positioning
  | TJ | Tj | Quote | DQuote -- text showing
  | Tc | Tw | Tz | TL | Tf | Tr | Ts -- text state
  deriving (Bounded, Enum, Show)
 data Argument = Raw ByteString | Typed DirectObject deriving Show
 type Call a = (a, [Argument])
 type Operator a = (Bounded a, Enum a, Show a)
 code :: Operator a => a -> ByteString
 code = pack . expand . show
  where
    expand "" = ""
    expand (c:'_':s) = toLower c : expand s
    expand ('s':'t':'a':'r':s) = '*' : expand s
    expand ('Q':'u':'o':'t':'e':s) = '\'' : expand s
    expand ('D':'Q':'u':'o':'t':'e':s) = '"' : expand s
    expand (c:s) = c : expand s
 {-
 instance Show StateOperator where
  show Cm = "cm"
  show W_ = "w"
  show J = "J"
  show J_ = "j"
  show M = "M"
  show D = "d"
  show Ri = "ri"
  show I = "i"
  show Gs = "gs"
 instance Show PathOperator where
  show M_ = "m"
  show L_ = "l"
  show C_ = "c"
  show V_ = "v"
  show Y_ = "y"
  show H_
      ("m", (M_, \l -> case l of [Raw _, Raw _] -> True ; _ -> False))
    , ("l", (L_, \l -> case l of [Raw _, Raw _] -> True ; _ -> False))
    , ("c", (L_, \l -> case l of [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False))
    , ("v", (L_, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False))
    , ("y", (L_, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False))
    , ("h", (L_, \l -> case l of [] -> True ; _ -> False))
    , ("re", (L_, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False))
    , ("S", (L_, \l -> case l of [] -> True ; _ -> False))
    , ("s", (L_, \l -> case l of [] -> True ; _ -> False))
    , ("f", (L_, \l -> case l of [] -> True ; _ -> False))
    , ("F", (L_, \l -> case l of [] -> True ; _ -> False))
    , ("F*", (L_, \l -> case l of [] -> True ; _ -> False))
    , ("B", (L_, \l -> case l of [] -> True ; _ -> False))
    , ("B*", (L_, \l -> case l of [] -> True ; _ -> False))
    , ("b", (L_, \l -> case l of [] -> True ; _ -> False))
    , ("b*", (L_, \l -> case l of [] -> True ; _ -> False))
    , ("n", (L_, \l -> case l of [] -> True ; _ -> False))
    , ("W", (L_, \l -> case l of [] -> True ; _ -> False))
    , ("W*", (L_, \l -> case l of [] -> True ; _ -> False))
 instance Show ColorOperator where
 instance Show TextOperator where
  show Td = "Td"
  show TD = "TD"
  show Tm = "Tm"
  show Tstar = "T*"
  show TJ = "TJ"
  show Tj = "Tj"
  show Quote = "'"
  show DQuote = "\""
  show Tc = "Tc"
  show Tw = "Tw"
  show Tz = "Tz"
  show TL = "TL"
  show Tf = "Tf"
  show Tr = "Tr"
  show Ts = "Ts"
 -}
 stateOperator :: OperatorTable StateOperator
 stateOperator = Map.fromList $ (\(op, checker) -> (code op, (op, checker))) <$> [
      (C_m, \l -> case l of [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False)
    , (W_, \l -> case l of [Raw _] -> True ; _ -> False)
    , (J, \l -> case l of [Raw _] -> True ; _ -> False)
    , (J_, \l -> case l of [Raw _] -> True ; _ -> False)
    , (M, \l -> case l of [Raw _] -> True ; _ -> False)
    , (D_, \l -> case l of [Raw _, Raw _] -> True ; _ -> False)
    , (R_i, \l -> case l of [Raw _] -> True ; _ -> False)
    , (I_, \l -> case l of [Raw _] -> True ; _ -> False)
    , (G_s, \l -> case l of [Typed (NameObject _)] -> True ; _ -> False)
  ]
 pathOperator :: OperatorTable PathOperator
 pathOperator = Map.fromList $ (\(op, checker) -> (code op, (op, checker))) <$> [
      (M_, \l -> case l of [Raw _, Raw _] -> True ; _ -> False)
    , (L_, \l -> case l of [Raw _, Raw _] -> True ; _ -> False)
    , (C_, \l -> case l of [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False)
    , (V_, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False)
    , (Y_, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False)
    , (H_, \l -> case l of [] -> True ; _ -> False)
    , (R_e, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False)
    , (S, \l -> case l of [] -> True ; _ -> False)
    , (S_, \l -> case l of [] -> True ; _ -> False)
    , (F_, \l -> case l of [] -> True ; _ -> False)
    , (F, \l -> case l of [] -> True ; _ -> False)
    , (Fstar, \l -> case l of [] -> True ; _ -> False)
    , (B, \l -> case l of [] -> True ; _ -> False)
    , (Bstar, \l -> case l of [] -> True ; _ -> False)
    , (B_, \l -> case l of [] -> True ; _ -> False)
    , (B_star, \l -> case l of [] -> True ; _ -> False)
    , (N_, \l -> case l of [] -> True ; _ -> False)
    , (W, \l -> case l of [] -> True ; _ -> False)
    , (Wstar, \l -> case l of [] -> True ; _ -> False)
  ]
 colorOperator :: OperatorTable ColorOperator
 colorOperator = Map.fromList $ (\(op, checker) -> (code op, (op, checker))) <$> [
      (CS, \l -> case l of [Typed (NameObject _)] -> True ; _ -> False)
    , (C_s, \l -> case l of [Typed (NameObject _)] -> True ; _ -> False)
    , (SC, \_ -> True)
    , (SCN, \_ -> True)
    , (S_c, \_ -> True)
    , (S_cn, \_ -> True)
    , (G, \l -> case l of [Raw _] -> True ; _ -> False)
    , (G_, \l -> case l of [Raw _] -> True ; _ -> False)
    , (RG, \l -> case l of [Raw _, Raw _, Raw _] -> True ; _ -> False)
    , (R_g, \l -> case l of [Raw _, Raw _, Raw _] -> True ; _ -> False)
    , (K, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False)
    , (K_, \l -> case l of [Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False)
  ]
 {-
 stateOperator (Cm, [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _]) = True
 stateOperator (W, [Raw _]) = True
 stateOperator (J, [Raw _]) = True
 stateOperator (J_, [Raw _]) = True
 stateOperator (M, [Raw _]) = True
 stateOperator (D, [Raw _, Raw _]) = True
 stateOperator (Ri, [Raw _]) = True
 stateOperator (I, [Raw _]) = True
 stateOperator (Gs, [Typed (NameObject _)]) = True
 stateOperator _ = False
 -}
 textOperator :: OperatorTable TextOperator
 textOperator = Map.fromList $ (\(op, checker) -> (code op, (op, checker))) <$> [
      (Td, \l -> case l of [Raw _, Raw _] -> True ; _ -> False)
    , (TD, \l -> case l of [Raw _, Raw _] -> True ; _ -> False)
    , (Tm, \l -> case l of [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _] -> True ; _ -> False)
    , (Tstar, \l -> case l of [] -> True ; _ -> False)
    , (TJ, \l -> case l of [Typed (Array _)] -> True ; _ -> False)
    , (Tj, \l -> case l of [Typed (StringObject _)] -> True ; _ -> False)
    , (Quote, \l -> case l of [Typed (StringObject _)] -> True ; _ -> False)
    , (DQuote, \l -> case l of [Typed (StringObject _)] -> True ; _ -> False)
    , (Tc, \l -> case l of [Raw _] -> True ; _ -> False)
    , (Tw, \l -> case l of [Raw _] -> True ; _ -> False)
    , (Tz, \l -> case l of [Raw _] -> True ; _ -> False)
    , (TL, \l -> case l of [Raw _] -> True ; _ -> False)
    , (Tf, \l -> case l of [Typed (NameObject _), Raw _] -> True ; _ -> False)
    , (Tr, \l -> case l of [Raw _] -> True ; _ -> False)
    , (Ts, \l -> case l of [Raw _] -> True ; _ -> False)
  ]
  {-
 textOperator (Td, [Raw _, Raw _]) = True
 textOperator (TD, [Raw _, Raw _]) = True
 textOperator (Tm, [Raw _, Raw _, Raw _, Raw _, Raw _, Raw _]) = True
 textOperator (Tstar, []) = True
 textOperator (TJ, [Typed (Array _)]) = True
 textOperator (Tj, [Typed (StringObject _)]) = True
 textOperator (Quote, [Typed (StringObject _)]) = True
 textOperator (DQuote, [Typed (StringObject _)]) = True
 textOperator (Tc, [Raw _]) = True
 textOperator (Tw, [Raw _]) = True
 textOperator (Tz, [Raw _]) = True
 textOperator (TL, [Raw _]) = True
 textOperator (Tf, [Typed (NameObject _), Raw _]) = True
 textOperator (Tr, [Raw _]) = True
 textOperator (Ts, [Raw _]) = True
 textOperator _ = False
 -}
 type ArgumentStackParser m = (MonadState [Argument] m, MonadParser m)
 --type Operator a = (Bounded a, Enum a, Show a)
 type OperatorTable a = Map ByteString (TypeChecker a)
 type TypeChecker a = (a, [Argument] -> Bool)
 parseShowable :: (Show a, MonadParser m) => a -> m a
 parseShowable textOp = string (pack $ show textOp) *> return textOp
 callChunk :: MonadParser m => OperatorTable a -> m (Either (TypeChecker a) Argument)
 callChunk table =
    (Right <$> choice [stringArg, nameArg, arrayArg])
  <|> operatorOrRawArg
  <?> "call chunk"
  where
    operatorOrRawArg = do
      chunk <- takeAll1 regular <* blank
      case table !? chunk of
        Nothing -> return . Right $ Raw chunk
        Just typeChecker -> return $ Left typeChecker
 stackParser :: (ArgumentStackParser m, Operator a) => OperatorTable a -> m (Call a)
 stackParser table = either popCall push =<< (callChunk table)
  where
    push arg = modify (arg:) >> stackParser table
    popCall (operator, predicate) = do
      arguments <- reverse <$> get
      let call = (operator, arguments)
      if predicate arguments then return call else fail (unpack $ code operator)
 a :: (Operator a, MonadParser m) => OperatorTable a -> m (Call a)
 a table = evalStateT (stackParser table) []
 argument :: MonadParser m => m Argument
 argument = Raw <$> takeAll1 regular <* blank
 arrayArg :: MonadParser m => m Argument
 arrayArg = Typed . Array <$> array <* blank
 nameArg :: MonadParser m => m Argument
 nameArg = Typed . NameObject <$> name <* blank
 stringArg :: MonadParser m => m Argument
 stringArg = Typed . StringObject <$> stringObject <* blank
 type ParserWithFont = ReaderT FontSet (Parser Font)
 pageContents :: FontSet -> ByteString -> Either String [Text]
 pageContents fontSet input =
  evalParser (runReaderT (page) fontSet) emptyFont input
 several :: MonadParser m => m [a] -> m [a]
 several p = concat <$> (p `sepBy` blank)
 page :: ParserWithFont [Text]
 page = several (graphicState <|> text) <* blank <* endOfInput <?> "Text page contents"
 graphicState :: ParserWithFont [Text]
 graphicState =
  string "q" *> blank *> insideQ <* blank <* string "Q" <?> "Graphic state"
  where
    insideQ = several (command <|> graphicState <|> text)
    ignore x = a x *> return []
    command =
      ignore stateOperator <|> ignore pathOperator <|> ignore colorOperator
 text :: ParserWithFont [Text]
 text =
  string "BT" *> blank *> commands <* blank <* string "ET" <?> "Text operators"
  where
    commands = several (a textOperator >>= runOperator)
 runOperator :: Call TextOperator -> ParserWithFont [Text]
 runOperator (Tf, [Typed (NameObject fontName), _]) =
  asks (! fontName) >>= put >> return []
 runOperator (Tstar, []) = return ["\n"]
 runOperator (TJ, [Typed (Array arrayObject)]) =
  replicate 1 <$> foldM appendText "" arrayObject
  where
    appendText bs (StringObject outputString) =
      mappend bs <$> decodeString outputString
    appendText bs _ = return bs
 runOperator (Tj, [Typed (StringObject outputString)]) =
  replicate 1 <$> decodeString outputString
 runOperator (Quote, [Typed (StringObject outputString)]) =
  (\bs -> ["\n", bs]) <$> decodeString outputString
 runOperator (DQuote, [Typed (StringObject outputString)]) =
  (\bs -> ["\n", bs]) <$> decodeString outputString
 runOperator _ = return []
 decodeString :: (MonadFail m, MonadState Font m) => StringObject -> m Text
 decodeString input = do
  font <- get
  either fail return . font $ toByteString input