Expose a toList function similar to Data.Map in module Id for IdMap and allow mapping over keys and values in a monadic construct like mapM

Make a couple improvements in performance + add an example script to extract pages from a PDF
Once again something that should never have been committed
2020-06-03 15:15:07 +02:00 · 2020-05-28 18:54:15 +02:00 · 2020-03-20 09:34:53 +01:00 · 2020-03-19 12:53:35 +01:00 · 2020-03-19 10:27:29 +01:00 · 2020-03-19 10:27:29 +01:00
34 changed files with 2224 additions and 334 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +1,3 @@
 dist*/
 .ghc.environment.*
 cabal.project.local
--- a/Hufflepdf.cabal
+++ b/Hufflepdf.cabal
@ -16,20 +16,40 @@ extra-source-files:  ChangeLog.md
 cabal-version:       >=1.10
 library
-  exposed-modules:     PDF
+  exposed-modules:     Data.OrderedMap
                     , Data.Id
                     , PDF
                     , PDF.Box
                     , PDF.CMap
                     , PDF.Content
                     , PDF.Content.Operator
                     , PDF.Content.Operator.Color
                     , PDF.Content.Operator.Common
                     , PDF.Content.Operator.GraphicState
                     , PDF.Content.Operator.Path
                     , PDF.Content.Operator.Text
                     , PDF.Content.Text
                     , PDF.EOL
                     , PDF.Layer
                     , PDF.Object
                     , PDF.Object.Navigation
                     , PDF.Output
-                     , PDF.Update
+                     , PDF.Parser
                     , PDF.Pages
  other-modules:       Data.ByteString.Char8.Util
                     , PDF.Body
-                     , PDF.Parser
+                     , PDF.Encoding
                     , PDF.Encoding.MacRoman
                     , PDF.Font
  -- other-extensions:    
  build-depends:       attoparsec
                     , base >=4.9 && <4.13
                     , bytestring
                     , containers
                     , mtl
                     , text
                     , utf8-string
                     , zlib
  hs-source-dirs:      src
  ghc-options:         -Wall
  default-language:    Haskell2010
@ -39,7 +59,7 @@ executable             equivalent
  build-depends:       base
                     , bytestring
                     , Hufflepdf
-  ghc-options:         -Wall
+  ghc-options:         -Wall -rtsopts
  default-language:    Haskell2010
 executable             getObj
@ -48,6 +68,63 @@ executable             getObj
                     , bytestring
                     , containers
                     , Hufflepdf
-                     , zlib
+                     , mtl
-  ghc-options:         -Wall
+  ghc-options:         -Wall -rtsopts
  default-language:    Haskell2010
 executable             getText
  main-is:             examples/getText.hs
  build-depends:       base
                     , bytestring
                     , containers
                     , ExceptIOH
                     , Hufflepdf
                     , mtl
                     , text
  ghc-options:         -Wall -rtsopts
  default-language:    Haskell2010
 executable             pdfCut
  main-is:             examples/pdfCut.hs
  build-depends:       base
                     , bytestring
                     , containers
                     , ExceptIOH
                     , filepath
                     , Hufflepdf
                     , mtl
                     , text
  ghc-options:         -Wall -rtsopts
  default-language:    Haskell2010
 executable             fixMermoz
  main-is:             examples/fixMermoz.hs
  build-depends:       base
                     , bytestring
                     , ExceptIOH
                     , Hufflepdf
                     , mtl
  ghc-options:         -Wall -rtsopts
  default-language:    Haskell2010
 Test-Suite unitTests
  type:               exitcode-stdio-1.0
  main-is:            Main.hs
  other-modules:      Object
                    , Data.ByteString.Char8.Util
                    , Data.Id
                    , PDF.EOL
                    , PDF.Parser
                    , PDF.Object
                    , PDF.Output
  hs-source-dirs:     test
                    , src
  build-depends:      attoparsec
                    , base >=4.9 && <4.13
                    , bytestring
                    , containers
                    , Hufflepdf
                    , HUnit
                    , mtl
                    , text
  default-language:    Haskell2010
--- a/examples/getContent.hs
+++ b/examples/getContent.hs
@ -0,0 +1,11 @@
 main :: IO ()
 main = do
  hSetBuffering stdout LineBuffering
  args <- getArgs
  case args of
    --[inputFile] -> wholeDoc inputFile
    [inputFile, pageNumber] -> do
      content <- fmap (unify . updates) . parseDocument <$> BS.readFile inputFile
      get content (read pageNumber)
      singlePage inputFile (read pageNumber)
    _ -> die "Syntax: getContent INPUT_FILE PAGE_NUMBER"
--- a/examples/getObj.hs
+++ b/examples/getObj.hs
@ -1,53 +1,52 @@
 {-# LANGUAGE NamedFieldPuns #-}
 {-# LANGUAGE FlexibleContexts #-}
-import Codec.Compression.Zlib (decompress)
+import Control.Monad.Reader (ReaderT, runReaderT)
 import Data.ByteString.Lazy.Char8 (ByteString)
 import qualified Data.ByteString.Char8 as BS (readFile)
-import qualified Data.ByteString.Lazy.Char8 as Lazy (fromStrict, putStr, toStrict)
+import qualified Data.ByteString.Lazy.Char8 as Lazy (putStrLn)
-import Data.Map ((!?))
+import Data.Id (Id(..))
 import qualified Data.Map as Map (keys, lookup)
 import PDF (Document(..), parseDocument)
-import qualified PDF.EOL as EOL (Style)
+import PDF.Box (Box(..))
-import PDF.Object (Content(..), DirectObject(..), Object(..), Name(..))
+import PDF.Layer (Layer(..), unify)
-import PDF.Output (ObjectId(..))
+import PDF.Object (Object(..))
 import PDF.Object.Navigation (
    Nav(..), PPath(..), StreamContent(..), (//), objectById, catalog
  )
 import PDF.Output (Output)
 import qualified PDF.Output as Output (render)
-import PDF.Update (unify)
+import Prelude hiding (fail)
 import System.Environment (getArgs, getProgName)
 import System.Exit (die)
 import Text.Printf (printf)
 import Text.Read (readMaybe)
-display :: EOL.Style -> Object -> ByteString
+decodedStream :: Object -> Object
-display eolStyle d@(Direct _) = Output.render eolStyle d
+decodedStream object =
-display eolStyle s@(Stream {header, streamContent}) = Output.render eolStyle $
+  either (const object) id $ r Clear object >>= flip (w Raw) object
  case Map.lookup (Name "Filter") header of
    Just (NameObject (Name "FlateDecode")) -> Stream {
          header
        , streamContent = Lazy.toStrict . decompress $ Lazy.fromStrict streamContent
      }
    _ -> s
-extractObject :: ObjectId -> Document -> Either String ByteString
+display :: Functor m => Output a => ReaderT Layer m a -> Document -> m ByteString
-extractObject objectId (Document {eolStyle, updates}) =
+display getter (Document {eolStyle, layers}) =
-  case objects content !? objectId of
+  Output.render eolStyle <$> runReaderT getter (unify layers)
-    Nothing -> Left $ "No object has ID " ++ show (getObjectId objectId)
+
-    Just o -> Right $ display eolStyle o
+parse :: [String] -> IO (FilePath, Document -> Either String ByteString)
 parse [inputFile] = return (inputFile, display $ value <$> catalog)
 parse [inputFile, key] =
  return (inputFile, clear . maybe (byPath key) byId $ readMaybe key)
  where
-    content = unify updates
+    byId = objectById . Id
-
+    byPath path = (catalog // PPath (explode path))
-listObjectIds :: Document -> Either String [String]
+    explode "" = []
-listObjectIds =
+    explode path =
-  Right . prependTitle . toString . Map.keys . objects . unify . updates
+      case break (== '.') path of
-  where
+        (name, "") -> [name]
-    toString = fmap (show . getObjectId)
+        (name, rest) -> name : explode (drop 1 rest)
-    prependTitle = ("ObjectIds defined in this PDF:":)
+    clear = display . fmap (decodedStream . value)
 parse _ =
  die . printf "Syntax: %s inputFile [OBJECT_ID | PATH_TO_OBJ]\n" =<< getProgName
 main :: IO ()
 main = do
  (inputFile, getData) <- parse =<< getArgs
  input <- BS.readFile inputFile
-  either die id $ (parseDocument input >>= getData)
+  either die Lazy.putStrLn $ (parseDocument input >>= getData)
  where
    parse [inputFile] = return (inputFile, fmap (mapM_ putStrLn) . listObjectIds)
    parse [inputFile, objectId] = return
      (inputFile, fmap Lazy.putStr . extractObject (ObjectId (read objectId)))
    parse _ = die . printf "Syntax: %s inputFile [OBJECT_ID]\n" =<< getProgName
--- a/examples/getText.hs
+++ b/examples/getText.hs
@ -0,0 +1,54 @@
 {-# LANGUAGE FlexibleContexts #-}
 import Control.Monad ((>=>))
 import Control.Monad.Except (ExceptT(..))
 import Control.Monad.Except.IOH (handle)
 import Control.Monad.IO.Class (liftIO)
 import qualified Data.ByteString.Char8 as BS (readFile)
 import Data.Id (Id(..), mapWithKey)
 import qualified Data.Map as Map (mapWithKey)
 import Data.OrderedMap (mapi)
 import qualified Data.Text as Text (unpack)
 import PDF (UnifiedLayers(..), parseDocument)
 import PDF.Box (Box(..))
 import PDF.Content.Text (Chunks(..))
 import PDF.Layer (Layer, LayerReader)
 import PDF.Pages (
      Contents(..), FontCache, Page(..), PageNumber(..), Pages(..), withFonts
    , withResources
  )
 import System.Environment (getArgs)
 import System.Exit (die)
 import System.IO (BufferMode(..), hSetBuffering, stdout)
 import Text.Printf (printf)
 displayPage :: Int -> Page -> FontCache (LayerReader (ExceptT String IO)) ()
 displayPage n = withResources (
      r Contents
    >=> sequence_ . mapi (\objectId ->
            r Chunks >=> sequence_ . mapWithKey (display objectId)
          )
  )
  where
    display a b v =
      liftIO . putStrLn $
        printf "p#%d obj#%d instr#%d: %s" n (getId a) (getId b) (Text.unpack v)
 getAll :: Layer -> ExceptT String IO ()
 getAll = withFonts $ r Pages >=> sequence_ . Map.mapWithKey displayPage
 get :: Int -> Layer -> ExceptT String IO ()
 get n = withFonts $ r (P n) >=> displayPage n
 onDoc :: FilePath -> (Layer -> ExceptT String IO ()) -> ExceptT String IO ()
 onDoc inputFile f =
  ExceptT (parseDocument <$> BS.readFile inputFile) >>= r UnifiedLayers >>= f
 main :: IO ()
 main = do
  hSetBuffering stdout LineBuffering
  args <- getArgs
  case args of
    [inputFile] -> onDoc inputFile getAll `handle` die
    [inputFile, pageNumber] ->
      onDoc inputFile (get $ read pageNumber) `handle` die
    _ -> die "Syntax: getText INPUT_FILE [PAGE_NUMBER]"
--- a/examples/pdfCut.hs
+++ b/examples/pdfCut.hs
@ -0,0 +1,41 @@
 import Control.Monad.State (MonadState(..), evalStateT)
 import Control.Monad.Except.IOH (handle)
 import Control.Monad.Trans (lift)
 import qualified Data.ByteString.Char8 as BS (readFile)
 import qualified Data.ByteString.Lazy.Char8 as Lazy (writeFile)
 import Data.Map ((!), fromSet)
 import qualified Data.Set as Set (fromList)
 import PDF (Document, UnifiedLayers(..), parseDocument, render)
 import PDF.Box (at)
 import PDF.Pages (Pages(..))
 import System.Environment (getArgs)
 import System.Exit (die)
 import System.FilePath (replaceBaseName, takeBaseName)
 import Text.Read (readEither)
 parseRange :: String -> Either String (Int, Int)
 parseRange = evalStateT $ do
  from <- lift . readEither =<< state (break (== '-'))
  get >>= makeRange from
  where
    makeRange from "" = return (from, from)
    makeRange from (_:to) = (,) from <$> lift (readEither to)
 cut :: (Int, Int) -> Document -> IO Document
 cut (p1, p2) doc = ((
      at UnifiedLayers .at Pages $ \pages ->
      return $ fromSet (pages!) $ Set.fromList [pMin .. pMax]
    ) doc) `handle` die
  where
    (pMin, pMax) = (min p1 p2, max p1 p2)
 main :: IO ()
 main = do
  [inputPath, inputRange] <- getArgs
  range <- catchLeft $ parseRange inputRange
  doc <- catchLeft =<< parseDocument <$> BS.readFile inputPath
  Lazy.writeFile (outputPath inputPath inputRange) . render =<< cut range doc
  where
    catchLeft = either die return
    outputPath fileName range =
      replaceBaseName fileName (takeBaseName fileName ++ "_p" ++ range)
--- a/src/Data/ByteString/Char8/Util.hs
+++ b/src/Data/ByteString/Char8/Util.hs
@ -1,16 +1,91 @@
 module Data.ByteString.Char8.Util (
-      previous
+      B16Int(..)
    , B256Int(..)
    , b8ToInt
    , b16ToBytes
    , b16ToInt
    , b256ToInt
    , intToB256
    , previous
    , subBS
    , toBytes
    , unescape
    , utf16BEToutf8
  ) where
-import Data.ByteString.Char8 (ByteString)
+import Data.ByteString (ByteString, snoc)
-import qualified Data.ByteString.Char8 as BS (drop, index, take)
+import qualified Data.ByteString as BS (empty, foldl', length, pack, singleton, splitAt)
 import qualified Data.ByteString.Char8 as Char8 (
    cons, drop, index, splitAt, take, uncons, unpack
  )
 import Data.Text (Text)
 import Data.Text.Encoding (decodeUtf16BE)
 import Prelude hiding (length)
 import Text.Printf (printf)
 newtype B8Int = B8Int ByteString deriving (Eq, Show)
 newtype B16Int = B16Int ByteString deriving (Eq, Show)
 newtype B256Int = B256Int ByteString deriving (Eq, Show)
 previous :: Char -> Int -> ByteString -> Int
 previous char position byteString
-  | BS.index byteString position == char = position
+  | Char8.index byteString position == char = position
  | otherwise = previous char (position - 1) byteString
 subBS :: Int -> Int -> ByteString -> ByteString
-subBS offset length = BS.take length . BS.drop offset
+subBS offset length = Char8.take length . Char8.drop offset
 intToB256 :: Int -> B256Int
 intToB256 n
  | n < 0x100 = B256Int . BS.singleton $ toEnum n
  | otherwise =
    let B256Int begining = intToB256 (n `div` 0x100) in
    B256Int $ begining `snoc` (toEnum (n `mod` 0x100))
 b256ToInt :: B256Int -> Int
 b256ToInt (B256Int n) = BS.foldl' (\k w -> 0x100*k + fromEnum w) 0 n
 toBytes :: Int -> Int -> ByteString
 toBytes 0 _ = BS.empty
 toBytes size n =
  (toBytes (size - 1) (n `div` 0x100)) `snoc` (toEnum (n `mod` 0x100))
 b16ToBytes :: B16Int -> ByteString
 b16ToBytes (B16Int n) = BS.pack . fmap b16ToInt $ pairDigits n
  where
    pairDigits s =
      case BS.length s of
        0 -> []
        1 -> [B16Int s]
        _ ->
          let (twoHexDigits, rest) = BS.splitAt 2 s in
          (B16Int $ twoHexDigits):(pairDigits rest)
 fromBase :: (Num a, Read a) => Char -> ByteString -> a
 fromBase b = read . printf "0%c%s" b . Char8.unpack
 b16ToInt :: (Num a, Read a) => B16Int -> a
 b16ToInt (B16Int n) = fromBase 'x' n
 b8ToInt :: (Num a, Read a) => B8Int -> a
 b8ToInt (B8Int n) = fromBase 'o' n
 unescape :: ByteString -> ByteString
 unescape escapedBS =
  case Char8.uncons escapedBS of
    Nothing -> BS.empty
    Just ('\\', s) -> unescapeChar s
    Just (c, s) -> Char8.cons c (unescape s)
  where
    unescapeChar s =
      case Char8.uncons s of
        Nothing -> BS.empty
        Just (c, s')
          | c `elem` "()" -> Char8.cons c (unescape s')
          | c `elem` "nrtbf" -> Char8.cons (read (printf "'\\%c'" c)) (unescape s')
          | c `elem` ['0'..'7'] -> fromOctal (Char8.splitAt 3 s)
          | otherwise -> Char8.cons c (unescape s')
    fromOctal (code, s) = Char8.cons (toEnum $ b8ToInt (B8Int code)) (unescape s)
 utf16BEToutf8 :: ByteString -> Text
 utf16BEToutf8 = decodeUtf16BE
--- a/src/Data/Id.hs
+++ b/src/Data/Id.hs
@ -0,0 +1,95 @@
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 {-# LANGUAGE DeriveTraversable #-}
 {-# LANGUAGE FlexibleContexts #-}
 module Data.Id (
      Id(..)
    , IdMap
    , Indexed
    , at
    , delete
    , empty
    , filterWithKey
    , fromList
    , insert
    , keysSet
    , lookup
    , mapWithKey
    , member
    , minViewWithKey
    , register
    , singleton
    , size
    , toList
    , union
  ) where
 import Control.Monad.State.Strict (MonadState, modify, gets)
 import Data.IntMap (IntMap, (!))
 import qualified Data.IntMap as IntMap (
      delete, empty, filterWithKey, fromList, keysSet, insert, lookup, mapWithKey
    , maxViewWithKey, member, minViewWithKey, size, toList, union
  )
 import Data.IntSet (IntSet)
 import Prelude hiding (lookup)
 newtype Id a = Id {
    getId :: Int
  } deriving (Eq, Enum, Ord, Show)
 newtype IdMap a b = IdMap {
    intMap :: IntMap b
  } deriving (Show, Functor, Semigroup, Monoid, Foldable, Traversable)
 type Indexed a = IdMap a a
 at :: IdMap a b -> Id a -> b
 at (IdMap idMap) = (idMap !) . getId
 lookup :: Id a -> IdMap a b -> Maybe b
 lookup (Id a) (IdMap idMap) = IntMap.lookup a idMap
 size :: IdMap a b -> Int
 size = IntMap.size . intMap
 member :: Id a -> IdMap a b -> Bool
 member (Id a) (IdMap idMap) = IntMap.member a idMap
 empty :: IdMap a b
 empty = IdMap {intMap = IntMap.empty}
 singleton :: Id a -> b -> IdMap a b
 singleton a b = fromList [(a, b)]
 insert :: Id a -> b -> IdMap a b -> IdMap a b
 insert (Id a) b (IdMap idMap) = IdMap {intMap = IntMap.insert a b idMap}
 delete :: Id a -> IdMap a b -> IdMap a b
 delete (Id a) (IdMap idMap) = IdMap {intMap = IntMap.delete a idMap}
 minViewWithKey :: IdMap a b -> Maybe ((Id a, b), IdMap a b)
 minViewWithKey = fmap wrap . IntMap.minViewWithKey . intMap
  where
    wrap ((key, b), idMap) = ((Id key, b), IdMap idMap)
 union :: IdMap a b -> IdMap a b -> IdMap a b
 union (IdMap intMap1) (IdMap intMap2) =
  IdMap {intMap = IntMap.union intMap1 intMap2}
 mapWithKey :: (Id a -> b -> c) -> IdMap a b -> IdMap a c
 mapWithKey f (IdMap idMap) = IdMap {intMap = IntMap.mapWithKey (f . Id) idMap}
 filterWithKey :: (Id a -> b -> Bool) -> IdMap a b -> IdMap a b
 filterWithKey f = IdMap . IntMap.filterWithKey (f . Id) . intMap
 fromList :: [(Id a, b)] -> IdMap a b
 fromList = IdMap . IntMap.fromList . fmap (\(key, b) -> (getId key, b))
 toList :: IdMap a b -> [(Id a, b)]
 toList = fmap (\(key, b) -> (Id key, b)) . IntMap.toList . intMap
 keysSet :: IdMap a b -> IntSet
 keysSet = IntMap.keysSet . intMap
 register :: MonadState (IdMap a b) m => b -> m (Id a)
 register b = do
  newId <- gets (Id . maybe 0 ((+1) . fst . fst) . IntMap.maxViewWithKey . intMap)
  modify (insert newId b)
  return newId
--- a/src/Data/OrderedMap.hs
+++ b/src/Data/OrderedMap.hs
@ -0,0 +1,79 @@
 {-# LANGUAGE NamedFieldPuns #-}
 module Data.OrderedMap (
      OrderedMap
    , build
    , elems
    , fromList
    , get
    , keys
    , lookup
    , mapi
    , set
    , toList
  ) where
 import Data.Map (Map, (!), mapWithKey)
 import qualified Data.Map as Map (fromList, insert, lookup, member)
 import Prelude hiding (lookup)
 data OrderedMap k a = OrderedMap {
      assoc :: Map k a
    , keys :: [k]
  } 
 instance (Ord k, Show k, Show a) => Show (OrderedMap k a) where
  show = show . toList
 instance Functor (OrderedMap k) where
  fmap f orderedMap = orderedMap {assoc = fmap f (assoc orderedMap)}
 instance Ord k => Foldable (OrderedMap k) where
  foldMap f (OrderedMap {assoc, keys}) = foldMap f $ (assoc !) <$> keys
 instance Ord k => Traversable (OrderedMap k) where
  sequenceA (OrderedMap {assoc, keys}) =
    (flip OrderedMap keys) <$> sequenceA assoc 
 elems :: Ord k => OrderedMap k a -> [a]
 elems (OrderedMap {assoc, keys}) = (assoc !) <$> keys
 toList :: Ord k => OrderedMap k a -> [(k, a)]
 toList (OrderedMap {assoc, keys}) = (\k -> (k, assoc ! k)) <$> keys
 fromList :: Ord k => [(k, a)] -> OrderedMap k a
 fromList keyValueList = OrderedMap {
      assoc = Map.fromList keyValueList
    , keys = fst <$> keyValueList
  }
 build :: Ord k => (k -> a) -> [k] -> OrderedMap k a
 build f keys = OrderedMap {
      assoc = Map.fromList $ (\k -> (k, f k)) <$> keys
    , keys
  }
 get :: Ord k => k -> OrderedMap k a -> a
 get k = (! k) . assoc
 lookup :: Ord k => k -> OrderedMap k a -> Maybe a
 lookup k = (Map.lookup k) . assoc
 set :: Ord k => k -> a -> OrderedMap k a -> OrderedMap k a
 set k v orderedMap@(OrderedMap {assoc})
  | Map.member k assoc = orderedMap {assoc = Map.insert k v assoc}
  | otherwise = orderedMap
 mapi :: Ord k => (k -> a -> b) -> OrderedMap k a -> OrderedMap k b
 mapi f orderedMap = orderedMap {
    assoc = mapWithKey f $ assoc orderedMap
  }
 {-
 cons :: Ord k => k -> a -> OrderedMap k a -> OrderedMap k a
 cons k a orderedMap =
 snoc :: Ord k => k -> a -> OrderedMap k a -> OrderedMap k a
 alter :: Ord k => (Maybe a -> Maybe a) -> k -> OrderedMap k a -> OrderedMap k a
 alter 
 -}
--- a/src/PDF.hs
+++ b/src/PDF.hs
@ -1,6 +1,11 @@
 {-# LANGUAGE NamedFieldPuns #-}
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE MultiParamTypeClasses #-}
 module PDF (
      Document(..)
    , EOLStyle(..)
    , Layers(..)
    , UnifiedLayers(..)
    , parseDocument
    , render
  ) where
@ -13,27 +18,45 @@ import Data.ByteString.Char8.Util (previous, subBS)
 import qualified Data.ByteString.Lazy as Lazy (ByteString)
 import qualified Data.Map as Map (lookup)
 import PDF.Body (populate)
 import PDF.Box (Box(..))
 import qualified PDF.EOL as EOL (Style(..), charset, parser)
 import PDF.Layer (Layer, unify)
 import PDF.Object (
-      Content(..), DirectObject(..), InputStructure(..), Name(..), Number(..)
+      DirectObject(..), InputStructure(..), Name(..), Number(..)
    , Structure(..)
    , eofMarker, magicNumber, structure
  )
 import qualified PDF.Output as Output (render, line)
 import PDF.Output (Output(..))
-import PDF.Parser (Parser, runParser, string, takeAll)
+import PDF.Parser (Parser, evalParser, string, takeAll)
 import Text.Printf (printf)
 data Document = Document {
      pdfVersion :: String
    , eolStyle :: EOL.Style
-    , updates :: [Content]
+    , layers :: [Layer]
  } deriving Show
 instance Output Document where
-  output (Document {pdfVersion, updates}) =
+  output (Document {pdfVersion, layers}) =
      Output.line (printf "%%PDF-%s" pdfVersion)
-    `mappend` output updates
+    `mappend` output layers
 data EOLStyle = EOLStyle
 data Layers = Layers
 data UnifiedLayers = UnifiedLayers
 instance Monad m => Box m EOLStyle Document EOL.Style where
  r EOLStyle = return . eolStyle
  w EOLStyle eolStyle document = return $ document {eolStyle}
 instance Monad m => Box m UnifiedLayers Document Layer where
  r UnifiedLayers = return . unify . layers
  w UnifiedLayers layer = w Layers [layer]
 instance Monad m => Box m Layers Document [Layer] where
  r Layers = return . layers
  w Layers layers document = return $ document {layers = layers}
 render :: Document -> Lazy.ByteString
 render document@(Document {eolStyle}) = Output.render eolStyle document
@ -83,7 +106,7 @@ findNextSection offset input =
 readStructures :: Int -> ByteString -> Either String [InputStructure]
 readStructures startXref input =
-  runParser structure () (BS.drop startXref input) >>= stopOrFollow
+  evalParser structure () (BS.drop startXref input) >>= stopOrFollow
  where
    stopOrFollow s@(Structure {trailer}) =
      case Map.lookup (Name "Prev") trailer of
@ -96,8 +119,8 @@ readStructures startXref input =
 parseDocument :: ByteString -> Either String Document
 parseDocument input = do
-  (pdfVersion, eolStyle) <- runParser ((,) <$> version <*> EOL.parser) () input
+  (pdfVersion, eolStyle) <- evalParser ((,) <$> version <*> EOL.parser) () input
  startXref <- readStartXref eolStyle input
  structuresRead <- readStructures startXref input
-  let updates = populate input <$> structuresRead
+  let layers = populate input <$> structuresRead
-  return $ Document {pdfVersion, eolStyle, updates}
+  return $ Document {pdfVersion, eolStyle, layers}
--- a/src/PDF/Body.hs
+++ b/src/PDF/Body.hs
@ -6,19 +6,22 @@ module PDF.Body (
 import Control.Applicative ((<|>))
 import Control.Monad.State (get, gets, modify)
 import Data.Attoparsec.ByteString.Char8 (option)
 import Data.ByteString.Char8 (ByteString)
 import qualified Data.ByteString.Char8 as BS (cons, drop, unpack)
-import Data.Map ((!))
+import Data.Id (Id(..), at, empty)
-import qualified Data.Map as Map (empty, insert, lookup)
+import qualified Data.Id as Id (insert, lookup)
 import qualified Data.Map as Map (lookup)
 import qualified PDF.EOL as EOL (charset, parser)
 import PDF.Layer (Layer(..))
 import PDF.Object (
-      Content(..), DirectObject(..), Flow(..), IndirectObjCoordinates(..)
+      DirectObject(..), Flow(..), IndirectObjCoordinates(..)
    , InputStructure(..), Name(..), Number(..), Object(..), Occurrence(..)
    , Structure(..), XRefEntry(..), XRefSection
    , blank, dictionary, directObject, integer, line
  )
-import PDF.Output (ObjectId(..), Offset(..))
+import PDF.Output (Offset(..))
-import PDF.Parser (Parser, (<?>), block, char, on, option, runParser, takeAll)
+import PDF.Parser (MonadParser(..), Parser, (<?>), evalParser, on)
 data UserState = UserState {
      input :: ByteString
@ -31,9 +34,9 @@ type SParser = Parser UserState
 modifyFlow :: (Flow -> Flow) -> SParser ()
 modifyFlow f = modify $ \state -> state {flow = f $ flow state}
-addObject :: ObjectId -> Object -> SParser ()
+addObject :: (Id Object) -> Object -> SParser ()
 addObject objectId newObject = modifyFlow $ \flow -> flow {
-    tmpObjects = Map.insert objectId newObject $ tmpObjects flow
+    tmpObjects = Id.insert objectId newObject $ tmpObjects flow
  }
 pushOccurrence :: Occurrence -> SParser ()
@ -46,10 +49,10 @@ comment = BS.unpack <$> (option "" afterPercent <* EOL.parser)
  where
    afterPercent = BS.cons <$> char '%' <*> takeAll (not . (`elem` EOL.charset))
-lookupOffset :: ObjectId -> SParser Offset
+lookupOffset :: (Id Object) -> SParser Offset
 lookupOffset objectId = do
  table <- gets xreferences
-  case Map.lookup objectId table >>= entryOffset of
+  case Id.lookup objectId table >>= entryOffset of
    Nothing -> fail $
      "obj " ++ show objectId ++ " is referenced but missing in XRef table"
    Just offset -> return offset
@ -57,12 +60,12 @@ lookupOffset objectId = do
    entryOffset (InUse {offset}) = Just offset
    entryOffset _ = Nothing
-loadNumber :: ObjectId -> SParser Double
+loadNumber :: (Id Object) -> SParser Double
 loadNumber objectId = do
  offset <- getOffset <$> lookupOffset objectId
  objectStart <- BS.drop offset <$> gets input
  indirectObjCoordinates `on` (objectStart :: ByteString) >> return ()
-  objectValue <- (!objectId) . tmpObjects <$> gets flow
+  objectValue <- (`at` objectId) . tmpObjects <$> gets flow
  case objectValue of
    Direct (NumberObject (Number n)) -> return n
    obj -> fail $ "Expected obj@" ++ show offset ++ " (" ++ show obj ++ ") to be a Number"
@ -75,7 +78,7 @@ getSize Nothing = fail "Missing '/Length' key on stream"
 getSize (Just (NumberObject (Number size))) = return size
 getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do
  Flow {tmpObjects} <- gets flow
-  case Map.lookup objectId tmpObjects of
+  case Id.lookup objectId tmpObjects of
    Nothing -> loadNumber objectId
    Just (Direct (NumberObject (Number size))) -> return size
    Just v -> fail $
@ -96,7 +99,7 @@ object = streamObject <|> Direct <$> directObject
 indirectObjCoordinates :: SParser IndirectObjCoordinates
 indirectObjCoordinates = do
-  objectId <- ObjectId <$> integer
+  objectId <- Id <$> integer
  coordinates <- IndirectObjCoordinates objectId <$> integer
  objectValue <- line "obj" *> object <* blank <* line "endobj"
  addObject objectId objectValue
@ -106,14 +109,14 @@ occurrence :: SParser Occurrence
 occurrence =
  Comment <$> comment <|> Indirect <$> indirectObjCoordinates <?> "comment or object"
-populate :: ByteString -> InputStructure -> Content
+populate :: ByteString -> InputStructure -> Layer
 populate input structure =
  let bodyInput = BS.drop (startOffset structure) input in
-  case runParser recurseOnOccurrences initialState bodyInput of
+  case evalParser recurseOnOccurrences initialState bodyInput of
-    Left _ -> Content {occurrences = [], objects = Map.empty, docStructure}
+    Left _ -> Layer {occurrences = [], objects = empty, docStructure}
    Right finalState ->
      let Flow {occurrencesStack, tmpObjects} = flow finalState in
-      Content {
+      Layer {
          occurrences = reverse occurrencesStack, objects = tmpObjects, docStructure
        }
  where
@ -121,7 +124,7 @@ populate input structure =
    xreferences = xRef docStructure
    initialState = UserState {
        input, xreferences, flow = Flow {
-            occurrencesStack = [], tmpObjects = Map.empty
+            occurrencesStack = [], tmpObjects = empty
          }
      }
--- a/src/PDF/Box.hs
+++ b/src/PDF/Box.hs
@ -0,0 +1,80 @@
 {-# LANGUAGE NamedFieldPuns #-}
 {-# LANGUAGE FunctionalDependencies #-}
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE UndecidableInstances #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 module PDF.Box (
      Box(..)
    , Index(..)
    , Maybe_(..)
    , Either_(..)
    , at
    , atAll
    , edit
    , runRO
  ) where
 import Control.Monad.Except (ExceptT(..), runExceptT)
 import Control.Monad.Fail (MonadFail(..))
 import Control.Monad.Reader (ReaderT, runReaderT)
 import Control.Monad.State (MonadState(..))
 import Data.Id (Id, IdMap)
 import qualified Data.Id as Id (insert, lookup)
 import Data.Map (Map)
 import qualified Data.Map as Map (insert, lookup)
 import Data.OrderedMap (OrderedMap)
 import qualified Data.OrderedMap as OrderedMap (lookup, set)
 import Prelude hiding (fail)
 runRO :: MonadState s m => ReaderT s m a -> m a
 runRO ro = get >>= runReaderT ro
 newtype Index = Index Int
 newtype Maybe_ x = Maybe_ x
 newtype Either_ b x = Either_ x
 class Monad m => Box m i a b | m a i -> b where
  r :: i -> a -> m b
  w :: i -> b -> a -> m a
 at :: Box m i a b => i -> (b -> m b) -> a -> m a
 at i f a = r i a >>= f >>= flip (w i) a
 atAll :: (Traversable t, Monad m, Box m i a (t b)) => i -> (b -> m b) -> a -> m a
 atAll i f = at i $ (mapM f)
 edit :: MonadState a m => (a -> m a) -> m ()
 edit f = get >>= f >>= put
 instance MonadFail m => Box m Index [a] a where
  r (Index i) [] = fail $ "Index out of bounds " ++ show i
  r (Index 0) (x:_) = return x
  r (Index i) (_:xs) = r (Index (i-1)) xs
  w (Index i) _ [] = fail $ "Index out of bounds " ++ show i
  w (Index 0) newX (_:xs) = return (newX:xs)
  w (Index i) newX (x:xs) = (x:) <$> w (Index (i-1)) newX xs
 instance (Ord k, MonadFail m) => Box m k (Map k a) a where
  r k = maybe (fail "Unknown key") return . Map.lookup k
  w k a = return . Map.insert k a
 instance (Ord k, MonadFail m) => Box m k (OrderedMap k a) a where
  r k = maybe (fail "Unknown key") return . OrderedMap.lookup k
  w k a orderedMap = r k orderedMap >> return (OrderedMap.set k a orderedMap)
 instance MonadFail m => Box m (Id k) (IdMap k a) a where
  r k = maybe (fail "Unknown key") return . Id.lookup k
  w k a = return . Id.insert k a
 instance (Monad m, Box Maybe i a b) => Box m (Maybe_ i) a (Maybe b) where
  r (Maybe_ i) = return . r i
  w (Maybe_ i) (Just b) a =
    return . maybe a id $ w i b a
  w _ _ obj = return obj
 instance (Monad m, Box (ExceptT e m) i a b) => Box m (Either_ e i) a (Either e b) where
  r (Either_ i) a = runExceptT (r i a)
  w (Either_ i) (Right b) a =
    either (const a) id <$> runExceptT (w i b a :: ExceptT e m a)
  w _ _ a = return a
--- a/src/PDF/CMap.hs
+++ b/src/PDF/CMap.hs
@ -0,0 +1,196 @@
 {-# LANGUAGE NamedFieldPuns #-}
 {-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE FlexibleContexts #-}
 module PDF.CMap (
      CMap
    , CMappers
    , CRange(..)
    , matches
    , parse
  ) where
 import Control.Applicative ((<|>), many)
 import Control.Monad.Except (MonadError(..))
 import Control.Monad.Fail (fail)
 import Control.Monad.State (modify)
 import Data.Attoparsec.ByteString.Char8 (count)
 import Data.ByteString (ByteString)
 import qualified Data.ByteString as BS (drop, length, null, take)
 import Data.ByteString.Char8 (unpack)
 import Data.ByteString.Char8.Util (
    B16Int(..), b16ToBytes, b16ToInt, toBytes, utf16BEToutf8
  )
 import Data.Foldable (foldl', foldr')
 import Data.Map (Map, mapWithKey, union)
 import qualified Data.Map as Map (
    adjust, empty, fromList, insert, insertWith, lookup, toList
  )
 import Data.Text (Text)
 import qualified Data.Text as Text (length, null, splitAt)
 import qualified PDF.EOL as EOL (charset, parser)
 import PDF.Font (Decoder, Encoder, Font(..))
 import PDF.Object (
      DirectObject(..), Name, StringObject(..)
    , blank, directObject, integer, line, stringObject
  )
 import PDF.Parser (MonadParser, Parser, runParser, takeAll)
 import Prelude hiding (fail)
 type CMappers = Map Name CMap
 type ToUnicode = Map ByteString Text
 type FromUnicode = Map Text ByteString
 data CRange = CRange {
      fromSequence :: ByteString
    , toSequence :: ByteString
    , toUnicode :: ToUnicode
  } deriving Show
 type Size = Int
 data CMap = CMap {
      toUnicodeBySize :: Map Size [CRange]
    , fromUnicodeBySize :: Map Size FromUnicode
  }
 toFont :: CMap -> Font
 toFont (CMap {toUnicodeBySize, fromUnicodeBySize})  =
  Font {decode = decoder toUnicodeBySize, encode = encoder fromUnicodeBySize}
 decoder :: Map Size [CRange] -> Decoder
 decoder rangesBySize input
  | BS.null input = Right ""
  | otherwise = do
    (output, remainingInput) <- trySizes input $ Map.toList rangesBySize
    mappend output <$> decoder rangesBySize remainingInput
  where
    trySizes s [] = Left $ "No matching code found in font for " ++ unpack s
    trySizes s ((size, cRanges):others) =
      let prefix = BS.take size s in
      case tryRanges prefix cRanges of
        Nothing -> trySizes s others
        Just outputSequence -> Right (outputSequence, BS.drop size s)
    tryRanges :: ByteString -> [CRange] -> Maybe Text
    tryRanges _ [] = Nothing
    tryRanges prefix ((CRange {toUnicode}):cRanges) =
      case Map.lookup prefix toUnicode of
        Nothing -> tryRanges prefix cRanges
        outputSequence -> outputSequence
 encoder :: Map Size FromUnicode -> Encoder
 encoder fromUnicodes input
  | Text.null input = Right ""
  | otherwise =
    foldr' (<>) (Left "No encoding found") $ mapWithKey tryOn fromUnicodes
    where
      tryOn size fromUnicode =
        let (prefix, end) = Text.splitAt size input in
        case Map.lookup prefix fromUnicode of
          Nothing -> Left ""
          Just code -> (code <>) <$> encoder fromUnicodes end
 matches :: ByteString -> CRange -> Bool
 matches code (CRange {fromSequence, toSequence}) =
  fromSequence <= code && code <= toSequence
 parse :: MonadError String m => ByteString -> m Font
 parse = either throwError (return . toFont . snd) . runParser
  (many (codeRanges <|> cMapRange <|> cMapChar <|> ignoredLine))
  emptyCMap
  where
    ignoredLine =
      takeAll (not . (`elem` EOL.charset)) *> EOL.parser *> return ()
    emptyCMap = CMap {toUnicodeBySize = Map.empty, fromUnicodeBySize = Map.empty}
 codeRanges :: Parser CMap ()
 codeRanges = do
  size <- integer <* line "begincodespacerange"
  mapM_ createMapping =<< count size codeRange
  line "endcodespacerange"
  where
    codeRange =
      (,) <$> stringObject <* blank <*> stringObject <* EOL.parser
 createMapping :: (StringObject, StringObject) -> Parser CMap ()
 createMapping (Hexadecimal from, Hexadecimal to) = modify $ \cmap -> cmap {
      toUnicodeBySize = Map.insertWith (++) size [cRange] (toUnicodeBySize cmap)
    }
  where
    fromSequence = b16ToBytes from
    size = BS.length fromSequence
    toSequence = b16ToBytes to
    cRange = CRange {fromSequence, toSequence, toUnicode = Map.empty}
 createMapping _ = return ()
 cMapRange :: Parser CMap ()
 cMapRange = do
  size <- integer <* line "beginbfrange"
  mapM_ saveMapping =<< count size rangeMapping
  line "endbfrange"
  where
    rangeMapping = do
      from <- (stringObject <* blank)
      to <- (stringObject <* blank)
      mapFromTo from to =<< directObject <* EOL.parser
 saveMapping :: [(ByteString, Text)] -> Parser CMap ()
 saveMapping assoc =
  modify $ \(CMap {toUnicodeBySize, fromUnicodeBySize}) -> CMap {
        toUnicodeBySize = saveToUnicodeBySize assoc toUnicodeBySize
      , fromUnicodeBySize = saveFromUnicodeBySize reversed fromUnicodeBySize
    }
    where
      reversed = (\(a, b) -> (b, a)) <$> assoc
 saveToUnicodeBySize :: [(ByteString, Text)] -> Map Size [CRange] -> Map Size [CRange]
 saveToUnicodeBySize [] = id
 saveToUnicodeBySize assoc@((code, _):_) = Map.adjust insertCRange (BS.length code)
  where
    newMapping = Map.fromList assoc
    appendMapping cRange =
      cRange {toUnicode = toUnicode cRange `union` newMapping}
    insertCRange = fmap (\cRange ->
        if code `matches` cRange then appendMapping cRange else cRange
      )
 saveFromUnicodeBySize :: [(Text, ByteString)] -> Map Size FromUnicode -> Map Size FromUnicode
 saveFromUnicodeBySize = flip (foldl' insertFromUnicode)
  where
    insertFromUnicode :: Map Size FromUnicode -> (Text, ByteString) -> Map Size FromUnicode
    insertFromUnicode tmpFromUnicodeBySize (unicodeSequence, code) =
      let size = Text.length unicodeSequence in
      Map.adjust (Map.insert unicodeSequence code) size tmpFromUnicodeBySize
 cMapChar :: Parser CMap ()
 cMapChar = do
  size <- integer <* line "beginbfchar"
  saveMapping =<< count size charMapping <* line "endbfchar"
  where
    charMapping = do
      from <- stringObject <* blank
      pairMapping from =<< stringObject <* EOL.parser
 between :: B16Int -> B16Int -> [ByteString]
 between from@(B16Int s) to =
  let size = BS.length s `div` 2 in
  toBytes size <$> [b16ToInt from .. b16ToInt to]
 startFrom :: B16Int -> [ByteString]
 startFrom from@(B16Int s) =
  let size = BS.length s `div` 2 in
  toBytes size <$> [b16ToInt from .. ]
 mapFromTo :: MonadParser m => StringObject -> StringObject -> DirectObject -> m [(ByteString, Text)]
 mapFromTo (Hexadecimal from) (Hexadecimal to) (StringObject (Hexadecimal dstFrom)) =
  return $ zip (between from to) (utf16BEToutf8 <$> startFrom dstFrom)
 mapFromTo (Hexadecimal from) (Hexadecimal to) (Array dstPoints) =
  zip (between from to) <$> (mapM dstByteString dstPoints)
  where
    dstByteString (StringObject (Hexadecimal dst)) =
      return . utf16BEToutf8 $ b16ToBytes dst
    dstByteString _ = fail "Invalid for a replacement string"
 mapFromTo _ _ _ = fail "invalid range mapping found"
 pairMapping :: MonadParser m => StringObject -> StringObject -> m (ByteString, Text)
 pairMapping (Hexadecimal from) (Hexadecimal to) =
  return (b16ToBytes  from, utf16BEToutf8 $ b16ToBytes  to)
 pairMapping _ _ = fail "invalid pair mapping found"
--- a/src/PDF/Content.hs
+++ b/src/PDF/Content.hs
@ -0,0 +1,117 @@
 {-# LANGUAGE NamedFieldPuns #-}
 {-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE ConstraintKinds #-}
 {-# LANGUAGE FlexibleContexts #-}
 {-# LANGUAGE TypeSynonymInstances #-}
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE MultiParamTypeClasses #-}
 module PDF.Content (
      Content(..)
    , ContentUnit(..)
    , GraphicContextUnit(..)
    , IdContentUnit
    , IdGraphicContextUnit
    , IdTextContext
    , Instructions(..)
    , TextContext
    , parse
  ) where
 import Control.Applicative ((<|>))
 import Control.Monad.Reader (asks, runReader)
 import Control.Monad.State.Strict (runState, evalStateT, modify)
 import Data.Attoparsec.ByteString.Char8 (sepBy)
 import qualified Data.Attoparsec.ByteString.Char8 as Atto (Parser, parseOnly)
 import Data.ByteString (ByteString)
 import Data.Id (Id(..), Indexed, at, empty, register)
 import PDF.Box (Box(..))
 import PDF.Content.Operator (Instruction, operator)
 import PDF.Object (blank, directObject)
 import PDF.Output (Output(..), line)
 import PDF.Parser (string)
 data Instructions = Instructions
 data GraphicContextUnit a =
    GraphicInstruction a
  | ContentUnit (ContentUnit a)
  deriving Show
 type TextContext a = [a]
 data ContentUnit a =
    GraphicContext [GraphicContextUnit a]
  | TextContext (TextContext a)
  deriving Show
 data Content = Content {
      contentUnits :: [IdContentUnit]
    , indexedInstructions :: Indexed Instruction
    , firstError :: Maybe String
  } deriving Show
 type TmpContentUnit = ContentUnit Instruction
 type TmpGraphicContextUnit = GraphicContextUnit Instruction
 type TmpTextContext = TextContext Instruction
 type IdContentUnit = ContentUnit (Id Instruction)
 type IdGraphicContextUnit = GraphicContextUnit (Id Instruction)
 type IdTextContext = TextContext (Id Instruction)
 instance Monad m => Box m Instructions Content (Indexed Instruction) where
  r Instructions = return . indexedInstructions
  w Instructions indexedInstructions someContent =
    return $ someContent {indexedInstructions}
 parse :: ByteString -> Content
 parse input =
  let result = Atto.parseOnly (contentUnit `sepBy` blank) input in
  let (contentUnits, indexedInstructions) = either (const ([], empty)) buildContent result in
  let firstError = either Just (const Nothing) result in
  Content {contentUnits, indexedInstructions, firstError}
 buildContent :: [TmpContentUnit] -> ([IdContentUnit], Indexed Instruction)
 buildContent instructionContentUnits =
  runState (mapM registerContentUnit instructionContentUnits) empty
  where
    registerContentUnit (GraphicContext gc) =
      GraphicContext <$> (mapM registerGraphicContext gc)
    registerContentUnit (TextContext tc) = TextContext <$> (mapM register tc)
    registerGraphicContext (GraphicInstruction gi) =
      GraphicInstruction <$> (register gi)
    registerGraphicContext (ContentUnit cu) =
      ContentUnit <$> (registerContentUnit cu)
 contentUnit :: Atto.Parser TmpContentUnit
 contentUnit =
    (GraphicContext <$> graphicContext)
  <|> (TextContext <$> textContext)
  where
    graphicContext =
      string "q" *> blank *> graphicContextUnit `sepBy` blank <* blank <* string "Q"
 graphicContextUnit :: Atto.Parser TmpGraphicContextUnit
 graphicContextUnit =
    (GraphicInstruction <$> instruction)
  <|> (ContentUnit <$> contentUnit)
 instruction :: Atto.Parser Instruction
 instruction = evalStateT stackParser []
  where
    stackParser = ((directObject <* blank) >>= push) <|> operator
    push arg = modify (arg:) *> stackParser
 textContext :: Atto.Parser TmpTextContext
 textContext =
  string "BT" *> blank *> instruction `sepBy` blank <* blank <* string "ET"
 instance Output Content where
  output (Content {contentUnits, indexedInstructions}) =
    runReader (mconcat <$> mapM outputCU contentUnits) indexedInstructions
    where
      outputCU (GraphicContext gc) = do
        inside <- mconcat <$> mapM outputGCU gc
        return (line "q" `mappend` inside `mappend` line "Q")
      outputCU (TextContext tc) = do
        inside <- mconcat <$> mapM outputIId tc
        return (line "BT" `mappend` inside `mappend` line "ET")
      outputGCU (GraphicInstruction gi) = outputIId gi
      outputGCU (ContentUnit cu) = outputCU cu
      outputIId instructionId = asks (output . (`at` instructionId))
--- a/src/PDF/Content/Operator.hs
+++ b/src/PDF/Content/Operator.hs
@ -0,0 +1,76 @@
 {-# LANGUAGE ConstraintKinds #-}
 {-# LANGUAGE FlexibleContexts #-}
 {-# LANGUAGE TypeSynonymInstances #-}
 {-# LANGUAGE FlexibleInstances #-}
 module PDF.Content.Operator (
      Instruction
    , Operator(..)
    , operator
  ) where
 import Control.Monad.Fail (MonadFail(..))
 import Control.Monad.State (MonadState(..))
 import Data.ByteString.Char8 (ByteString, pack, unpack)
 import Data.Char (toLower)
 import Data.Map (Map)
 import qualified Data.Map as Map (fromList, lookup)
 import qualified PDF.Content.Operator.Color as Color (Operator, signature)
 import PDF.Content.Operator.Common (Signature)
 import qualified PDF.Content.Operator.GraphicState as GraphicState (Operator, signature)
 import qualified PDF.Content.Operator.Path as Path (Operator, signature)
 import qualified PDF.Content.Operator.Text as Text (Operator, signature)
 import PDF.Object (DirectObject, blank, regular)
 import PDF.Output (Output(..), join, line)
 import PDF.Parser (MonadParser, takeAll1)
 import Prelude hiding (fail)
 import Text.Printf (printf)
 data Operator =
    GraphicState GraphicState.Operator
  | Path Path.Operator
  | Color Color.Operator
  | Text Text.Operator 
 instance Show Operator where
  show (GraphicState gcOp) = code gcOp
  show (Path pOp) = code pOp
  show (Color cOp) = code cOp
  show (Text tOp) = code tOp
 type Instruction = (Operator, [DirectObject])
 instance Output Instruction where
  output (op, args) = join " " ((output <$> args) ++ [line (show op)])
 operatorsTable :: Map ByteString (Signature Operator)
 operatorsTable = Map.fromList (
      (prepare GraphicState <$> GraphicState.signature)
    ++ (prepare Path <$> Path.signature)
    ++ (prepare Color <$> Color.signature)
    ++ (prepare Text <$> Text.signature)
  )
  where
    prepare constructor (op, sig) = (pack $ code op, (constructor op, sig))
 code :: Show a => a -> String
 code = expand . show
  where
    expand "" = ""
    expand (c:'_':s) = toLower c : expand s
    expand ('s':'t':'a':'r':s) = '*' : expand s
    expand ('Q':'u':'o':'t':'e':s) = '\'' : expand s
    expand ('D':'Q':'u':'o':'t':'e':s) = '"' : expand s
    expand (c:s) = c : expand s
 type StackParser m = (MonadState [DirectObject] m, MonadParser m)
 operator :: StackParser m => m Instruction
 operator = do
  chunk <- takeAll1 regular <* blank
  args <- reverse <$> get
  case Map.lookup chunk operatorsTable of
    Just (op, sig)
      | sig args -> return (op, args)
      | otherwise -> 
        get >>= fail . printf "Operator %s with stack %s" (show op) . show
    _ -> fail ("Unknown chunk " ++ unpack chunk)
--- a/src/PDF/Content/Operator/Color.hs
+++ b/src/PDF/Content/Operator/Color.hs
@ -0,0 +1,27 @@
 module PDF.Content.Operator.Color (
      Operator(..)
    , signature
  ) where
 import PDF.Content.Operator.Common (Signature)
 import PDF.Object (DirectObject(..))
 data Operator =
    CS | C_s | SC | SCN | S_c | S_cn | G | G_ | RG | R_g | K | K_
  deriving (Bounded, Enum, Show)
 signature :: [Signature Operator]
 signature = [
      (CS, \l -> case l of [NameObject _] -> True ; _ -> False)
    , (C_s, \l -> case l of [NameObject _] -> True ; _ -> False)
    , (SC, \_ -> True)
    , (SCN, \_ -> True)
    , (S_c, \_ -> True)
    , (S_cn, \_ -> True)
    , (G, \l -> case l of [_] -> True ; _ -> False)
    , (G_, \l -> case l of [_] -> True ; _ -> False)
    , (RG, \l -> case l of [_, _, _] -> True ; _ -> False)
    , (R_g, \l -> case l of [_, _, _] -> True ; _ -> False)
    , (K, \l -> case l of [_, _, _, _] -> True ; _ -> False)
    , (K_, \l -> case l of [_, _, _, _] -> True ; _ -> False)
  ]
--- a/src/PDF/Content/Operator/Common.hs
+++ b/src/PDF/Content/Operator/Common.hs
@ -0,0 +1,7 @@
 module PDF.Content.Operator.Common (
    Signature
  ) where
 import PDF.Object (DirectObject)
 type Signature a = (a, [DirectObject] -> Bool)
--- a/src/PDF/Content/Operator/GraphicState.hs
+++ b/src/PDF/Content/Operator/GraphicState.hs
@ -0,0 +1,24 @@
 module PDF.Content.Operator.GraphicState (
      Operator(..)
    , signature
  ) where
 import PDF.Content.Operator.Common (Signature)
 import PDF.Object (DirectObject(..))
 data Operator =
  C_m | W_ | J | J_ | M | D_ | R_i | I_ | G_s -- general graphic state
  deriving (Bounded, Enum, Show)
 signature :: [Signature Operator]
 signature = [
      (C_m, \l -> case l of [_, _, _, _, _, _] -> True ; _ -> False)
    , (W_, \l -> case l of [_] -> True ; _ -> False)
    , (J, \l -> case l of [_] -> True ; _ -> False)
    , (J_, \l -> case l of [_] -> True ; _ -> False)
    , (M, \l -> case l of [_] -> True ; _ -> False)
    , (D_, \l -> case l of [_, _] -> True ; _ -> False)
    , (R_i, \l -> case l of [_] -> True ; _ -> False)
    , (I_, \l -> case l of [_] -> True ; _ -> False)
    , (G_s, \l -> case l of [NameObject _] -> True ; _ -> False)
  ]
--- a/src/PDF/Content/Operator/Path.hs
+++ b/src/PDF/Content/Operator/Path.hs
@ -0,0 +1,35 @@
 module PDF.Content.Operator.Path (
      Operator(..)
    , signature
  ) where
 import PDF.Content.Operator.Common (Signature)
 data Operator =
    M_ | L_ | C_ | V_ | Y_ | H_ | R_e -- path construction
  | S | S_ | F_ | F | Fstar | B | Bstar | B_ | B_star | N_ -- path painting
  | W | Wstar -- clipping path
  deriving (Bounded, Enum, Show)
 signature :: [Signature Operator]
 signature = [
      (M_, \l -> case l of [_, _] -> True ; _ -> False)
    , (L_, \l -> case l of [_, _] -> True ; _ -> False)
    , (C_, \l -> case l of [_, _, _, _, _, _] -> True ; _ -> False)
    , (V_, \l -> case l of [_, _, _, _] -> True ; _ -> False)
    , (Y_, \l -> case l of [_, _, _, _] -> True ; _ -> False)
    , (H_, \l -> case l of [] -> True ; _ -> False)
    , (R_e, \l -> case l of [_, _, _, _] -> True ; _ -> False)
    , (S, \l -> case l of [] -> True ; _ -> False)
    , (S_, \l -> case l of [] -> True ; _ -> False)
    , (F_, \l -> case l of [] -> True ; _ -> False)
    , (F, \l -> case l of [] -> True ; _ -> False)
    , (Fstar, \l -> case l of [] -> True ; _ -> False)
    , (B, \l -> case l of [] -> True ; _ -> False)
    , (Bstar, \l -> case l of [] -> True ; _ -> False)
    , (B_, \l -> case l of [] -> True ; _ -> False)
    , (B_star, \l -> case l of [] -> True ; _ -> False)
    , (N_, \l -> case l of [] -> True ; _ -> False)
    , (W, \l -> case l of [] -> True ; _ -> False)
    , (Wstar, \l -> case l of [] -> True ; _ -> False)
  ]
--- a/src/PDF/Content/Operator/Text.hs
+++ b/src/PDF/Content/Operator/Text.hs
@ -0,0 +1,32 @@
 module PDF.Content.Operator.Text (
      Operator(..)
    , signature
  ) where
 import PDF.Content.Operator.Common (Signature)
 import PDF.Object (DirectObject(..))
 data Operator =
    Td | TD | Tm | Tstar -- text positioning
  | TJ | Tj | Quote | DQuote -- text showing
  | Tc | Tw | Tz | TL | Tf | Tr | Ts -- text state
  deriving (Bounded, Enum, Show)
 signature :: [Signature Operator]
 signature = [
      (Td, \l -> case l of [_, _] -> True ; _ -> False)
    , (TD, \l -> case l of [_, _] -> True ; _ -> False)
    , (Tm, \l -> case l of [_, _, _, _, _, _] -> True ; _ -> False)
    , (Tstar, \l -> case l of [] -> True ; _ -> False)
    , (TJ, \l -> case l of [Array _] -> True ; _ -> False)
    , (Tj, \l -> case l of [StringObject _] -> True ; _ -> False)
    , (Quote, \l -> case l of [StringObject _] -> True ; _ -> False)
    , (DQuote, \l -> case l of [_, _, StringObject _] -> True ; _ -> False)
    , (Tc, \l -> case l of [_] -> True ; _ -> False)
    , (Tw, \l -> case l of [_] -> True ; _ -> False)
    , (Tz, \l -> case l of [_] -> True ; _ -> False)
    , (TL, \l -> case l of [_] -> True ; _ -> False)
    , (Tf, \l -> case l of [NameObject _, _] -> True ; _ -> False)
    , (Tr, \l -> case l of [_] -> True ; _ -> False)
    , (Ts, \l -> case l of [_] -> True ; _ -> False)
  ]
--- a/src/PDF/Content/Text.hs
+++ b/src/PDF/Content/Text.hs
@ -0,0 +1,151 @@
 {-# LANGUAGE NamedFieldPuns #-}
 {-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE FlexibleContexts #-}
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE MultiParamTypeClasses #-}
 module PDF.Content.Text (
      Chunks(..)
    , chunk
    , format
  ) where
 import Control.Monad (foldM)
 import Control.Monad.Fail (MonadFail(..))
 import Control.Monad.Reader (ReaderT, asks, runReaderT)
 import Control.Monad.State (
    MonadState(..), StateT, evalStateT, gets, modify, runStateT
  )
 import Control.Monad.Trans (lift)
 import qualified Data.ByteString.Char8 as BS (concatMap, singleton)
 import Data.Id (Id(..), Indexed, at, empty, singleton)
 import qualified Data.Id as Id (delete, lookup, register)
 import Data.Map ((!))
 import Data.Text (Text, breakOn)
 import qualified Data.Text as Text (drop)
 import PDF.Box (Box(..))
 import PDF.Content (
      Content(..), ContentUnit(..), GraphicContextUnit(..), IdContentUnit
    , IdGraphicContextUnit
  )
 import PDF.Content.Operator (Instruction, Operator(..))
 import PDF.Content.Operator.Text (Operator(..))
 import PDF.Font (Font(..), FontSet, emptyFont)
 import PDF.Object (DirectObject(..), StringObject(..), toByteString)
 import Prelude hiding (fail)
 type TmpFont = StateT Font
 type Renderer m = ReaderT (Indexed Instruction) (ReaderT FontSet m)
 type Updater m = StateT (Indexed Instruction) (ReaderT (Indexed Text) (ReaderT FontSet m))
 decodeString :: MonadFail m => StringObject -> TmpFont (Renderer m) Text
 decodeString input = do
  Font {decode} <- get
  either fail return . decode $ toByteString input
 data Chunks = Chunks
 chunk :: Int -> Id Text
 chunk = Id
 instance MonadFail m => Box (ReaderT FontSet m) Chunks Content (Indexed Text) where
  r Chunks content =
    runReaderT (mconcat <$> renderer) $ indexedInstructions content
    where
      renderer = mapM renderContentUnit (contentUnits content)
  w Chunks indexedText content = do
    (contentUnits, indexedInstructions) <- runReaderT readerUpdate indexedText
    return $ content {contentUnits, indexedInstructions}
    where
      stateUpdate = mapM updateContentUnit (contentUnits content)
      readerUpdate = runStateT stateUpdate (indexedInstructions content)
 renderContentUnit :: MonadFail m => IdContentUnit -> Renderer m (Indexed Text)
 renderContentUnit (GraphicContext graphicContextUnits) =
  mconcat <$> mapM renderGraphicContextUnit graphicContextUnits
 renderContentUnit (TextContext instructionIds) =
  evalStateT (mconcat <$> mapM renderInstructionId instructionIds) emptyFont
 updateContentUnit :: MonadFail m => IdContentUnit -> Updater m IdContentUnit
 updateContentUnit (GraphicContext graphicContextUnits) = GraphicContext <$>
  mapM updateGraphicContextUnit graphicContextUnits
 updateContentUnit (TextContext instructionIds) = TextContext . concat <$>
  evalStateT (mapM updateInstructionId instructionIds) emptyFont
 renderGraphicContextUnit :: MonadFail m => IdGraphicContextUnit -> Renderer m (Indexed Text)
 renderGraphicContextUnit (GraphicInstruction _) = return empty
 renderGraphicContextUnit (ContentUnit contentUnit) =
  renderContentUnit contentUnit
 updateGraphicContextUnit :: MonadFail m => IdGraphicContextUnit -> Updater m IdGraphicContextUnit
 updateGraphicContextUnit gI@(GraphicInstruction _) = return gI
 updateGraphicContextUnit (ContentUnit contentUnit) =
  ContentUnit <$> updateContentUnit contentUnit
 renderInstructionId :: MonadFail m => Id Instruction -> TmpFont (Renderer m) (Indexed Text)
 renderInstructionId instructionId@(Id n) = toMap <$>
  (asks ((`at` instructionId)) >>= renderInstruction)
  where
    toMap = maybe empty (singleton (Id n))
 updateInstructionId :: MonadFail m => Id Instruction -> TmpFont (Updater m) [Id Instruction]
 updateInstructionId instructionId =
  lift (gets (`at` instructionId)) >>= updateInstruction instructionId
 renderInstruction :: MonadFail m => Instruction -> TmpFont (Renderer m) (Maybe Text)
 renderInstruction (Text Tf, [NameObject fontName, _]) =
  lift (lift $ asks (! fontName)) >>= put >> return Nothing
 renderInstruction (Text Tstar, []) = return $ Just "\n"
 renderInstruction (Text TJ, [Array arrayObject]) =
  Just <$> foldM appendText "" arrayObject
  where
    appendText t (StringObject outputString) =
      mappend t <$> decodeString outputString
    appendText t _ = return t
 renderInstruction (Text Tj, [StringObject outputString]) =
  Just <$> decodeString outputString
 renderInstruction (Text Quote, [StringObject outputString]) =
  (Just . mappend "\n") <$> decodeString outputString
 renderInstruction (Text DQuote, [_, _, StringObject outputString]) =
  (Just . mappend "\n") <$> decodeString outputString
 renderInstruction _ = return Nothing
 updateInstruction :: MonadFail m => Id Instruction -> Instruction -> TmpFont (Updater m) [Id Instruction]
 updateInstruction instructionId (Text Tf, [NameObject fontName, _]) =
  (lift . lift . lift $ asks (!fontName)) >>= put >> return [instructionId]
 updateInstruction instructionId@(Id n) instruction = do
  if emitsText $ fst instruction
  then asks (Id.lookup (Id n)) >>= replaceText
  else return [instructionId]
  where
    emitsText (Text Tstar) = True
    emitsText (Text TJ) = True
    emitsText (Text Tj) = True
    emitsText (Text Quote) = True
    emitsText (Text DQuote) = True
    emitsText _ = False
    replaceText = maybe (return []) $ \text -> do
      lift $ modify (Id.delete instructionId)
      format text >>= mapM (lift . Id.register)
 format :: MonadFail m => Text -> StateT Font m [Instruction]
 format input = do
  case breakOn "\n" input of
    ("", "") -> return []
    ("", left) -> ((Text Tstar, []) :) <$> format (Text.drop 1 left)
    (line, left) -> (:) <$> tj line <*> format left
  where
    tj t = do
      encoded <- either fail return =<< gets (($t) . encode)
      return (Text Tj, [StringObject . Literal $ BS.concatMap escape encoded])
    escape '\\' = "\\\\"
    escape '(' = "\\("
    escape ')' = "\\)"
    escape c = BS.singleton c
--- a/src/PDF/EOL.hs
+++ b/src/PDF/EOL.hs
@ -6,14 +6,14 @@ module PDF.EOL (
  ) where
 import Control.Applicative ((<|>))
-import PDF.Parser (Parser, string)
+import PDF.Parser (MonadParser, string)
 data Style = CR | LF | CRLF deriving Show
 charset :: String
 charset = "\r\n"
-parser :: Parser s Style
+parser :: MonadParser m => m Style
 parser =
    (string "\r\n" >> return CRLF)
  <|> (string "\r" >> return CR)
--- a/src/PDF/Encoding.hs
+++ b/src/PDF/Encoding.hs
@ -0,0 +1,13 @@
 {-# LANGUAGE FlexibleContexts #-}
 module PDF.Encoding (
    encoding
  ) where
 import Control.Monad.Except (MonadError(..))
 import PDF.Encoding.MacRoman (macRomanEncoding)
 import PDF.Font (Font)
 import Prelude hiding (fail)
 encoding :: MonadError String m => String -> m Font
 encoding "MacRomanEncoding" = return macRomanEncoding
 encoding s = throwError $ "Unknown encoding " ++ s
--- a/src/PDF/Encoding/MacRoman.hs
+++ b/src/PDF/Encoding/MacRoman.hs
@ -0,0 +1,162 @@
 module PDF.Encoding.MacRoman (
    macRomanEncoding
  ) where
 import qualified Data.ByteString.Char8 as BS (pack, unpack)
 import Data.Foldable (foldl')
 import Data.Map (Map)
 import qualified Data.Map as Map (empty, insert, lookup)
 import qualified Data.Text as Text (pack, unpack)
 import PDF.Font (Font(..))
 type Mapper = Map Char Char
 macRomanEncoding :: Font
 macRomanEncoding = Font {
      decode = Right . Text.pack . fmap decodeChar . BS.unpack
    , encode = fmap BS.pack . mapM encodeChar . Text.unpack
  }
  where
    decodeChar k = maybe k id $ Map.lookup k (fst mappers)
    encodeChar k =
      case Map.lookup k (snd mappers) of
        Just v -> Right v
        Nothing
          | k < '\x80' -> Right k
          | otherwise -> Left ("Character '" ++ k :"' unavailable in MacRoman")
 mappers :: (Mapper, Mapper)
 mappers = foldl' generateMapers (Map.empty, Map.empty) [
      ('\x80', '\x00C4') -- LATIN CAPITAL LETTER A WITH DIAERESIS
    , ('\x81', '\x00C5') -- LATIN CAPITAL LETTER A WITH RING ABOVE
    , ('\x82', '\x00C7') -- LATIN CAPITAL LETTER C WITH CEDILLA
    , ('\x83', '\x00C9') -- LATIN CAPITAL LETTER E WITH ACUTE
    , ('\x84', '\x00D1') -- LATIN CAPITAL LETTER N WITH TILDE
    , ('\x85', '\x00D6') -- LATIN CAPITAL LETTER O WITH DIAERESIS
    , ('\x86', '\x00DC') -- LATIN CAPITAL LETTER U WITH DIAERESIS
    , ('\x87', '\x00E1') -- LATIN SMALL LETTER A WITH ACUTE
    , ('\x88', '\x00E0') -- LATIN SMALL LETTER A WITH GRAVE
    , ('\x89', '\x00E2') -- LATIN SMALL LETTER A WITH CIRCUMFLEX
    , ('\x8A', '\x00E4') -- LATIN SMALL LETTER A WITH DIAERESIS
    , ('\x8B', '\x00E3') -- LATIN SMALL LETTER A WITH TILDE
    , ('\x8C', '\x00E5') -- LATIN SMALL LETTER A WITH RING ABOVE
    , ('\x8D', '\x00E7') -- LATIN SMALL LETTER C WITH CEDILLA
    , ('\x8E', '\x00E9') -- LATIN SMALL LETTER E WITH ACUTE
    , ('\x8F', '\x00E8') -- LATIN SMALL LETTER E WITH GRAVE
    , ('\x90', '\x00EA') -- LATIN SMALL LETTER E WITH CIRCUMFLEX
    , ('\x91', '\x00EB') -- LATIN SMALL LETTER E WITH DIAERESIS
    , ('\x92', '\x00ED') -- LATIN SMALL LETTER I WITH ACUTE
    , ('\x93', '\x00EC') -- LATIN SMALL LETTER I WITH GRAVE
    , ('\x94', '\x00EE') -- LATIN SMALL LETTER I WITH CIRCUMFLEX
    , ('\x95', '\x00EF') -- LATIN SMALL LETTER I WITH DIAERESIS
    , ('\x96', '\x00F1') -- LATIN SMALL LETTER N WITH TILDE
    , ('\x97', '\x00F3') -- LATIN SMALL LETTER O WITH ACUTE
    , ('\x98', '\x00F2') -- LATIN SMALL LETTER O WITH GRAVE
    , ('\x99', '\x00F4') -- LATIN SMALL LETTER O WITH CIRCUMFLEX
    , ('\x9A', '\x00F6') -- LATIN SMALL LETTER O WITH DIAERESIS
    , ('\x9B', '\x00F5') -- LATIN SMALL LETTER O WITH TILDE
    , ('\x9C', '\x00FA') -- LATIN SMALL LETTER U WITH ACUTE
    , ('\x9D', '\x00F9') -- LATIN SMALL LETTER U WITH GRAVE
    , ('\x9E', '\x00FB') -- LATIN SMALL LETTER U WITH CIRCUMFLEX
    , ('\x9F', '\x00FC') -- LATIN SMALL LETTER U WITH DIAERESIS
    , ('\xA0', '\x2020') -- DAGGER
    , ('\xA1', '\x00B0') -- DEGREE SIGN
    , ('\xA2', '\x00A2') -- CENT SIGN
    , ('\xA3', '\x00A3') -- POUND SIGN
    , ('\xA4', '\x00A7') -- SECTION SIGN
    , ('\xA5', '\x2022') -- BULLET
    , ('\xA6', '\x00B6') -- PILCROW SIGN
    , ('\xA7', '\x00DF') -- LATIN SMALL LETTER SHARP S
    , ('\xA8', '\x00AE') -- REGISTERED SIGN
    , ('\xA9', '\x00A9') -- COPYRIGHT SIGN
    , ('\xAA', '\x2122') -- TRADE MARK SIGN
    , ('\xAB', '\x00B4') -- ACUTE ACCENT
    , ('\xAC', '\x00A8') -- DIAERESIS
    , ('\xAD', '\x2260') -- NOT EQUAL TO
    , ('\xAE', '\x00C6') -- LATIN CAPITAL LETTER AE
    , ('\xAF', '\x00D8') -- LATIN CAPITAL LETTER O WITH STROKE
    , ('\xB0', '\x221E') -- INFINITY
    , ('\xB1', '\x00B1') -- PLUS-MINUS SIGN
    , ('\xB2', '\x2264') -- LESS-THAN OR EQUAL TO
    , ('\xB3', '\x2265') -- GREATER-THAN OR EQUAL TO
    , ('\xB4', '\x00A5') -- YEN SIGN
    , ('\xB5', '\x00B5') -- MICRO SIGN
    , ('\xB6', '\x2202') -- PARTIAL DIFFERENTIAL
    , ('\xB7', '\x2211') -- N-ARY SUMMATION
    , ('\xB8', '\x220F') -- N-ARY PRODUCT
    , ('\xB9', '\x03C0') -- GREEK SMALL LETTER PI
    , ('\xBA', '\x222B') -- INTEGRAL
    , ('\xBB', '\x00AA') -- FEMININE ORDINAL INDICATOR
    , ('\xBC', '\x00BA') -- MASCULINE ORDINAL INDICATOR
    , ('\xBD', '\x03A9') -- GREEK CAPITAL LETTER OMEGA
    , ('\xBE', '\x00E6') -- LATIN SMALL LETTER AE
    , ('\xBF', '\x00F8') -- LATIN SMALL LETTER O WITH STROKE
    , ('\xC0', '\x00BF') -- INVERTED QUESTION MARK
    , ('\xC1', '\x00A1') -- INVERTED EXCLAMATION MARK
    , ('\xC2', '\x00AC') -- NOT SIGN
    , ('\xC3', '\x221A') -- SQUARE ROOT
    , ('\xC4', '\x0192') -- LATIN SMALL LETTER F WITH HOOK
    , ('\xC5', '\x2248') -- ALMOST EQUAL TO
    , ('\xC6', '\x2206') -- INCREMENT
    , ('\xC7', '\x00AB') -- LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
    , ('\xC8', '\x00BB') -- RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
    , ('\xC9', '\x2026') -- HORIZONTAL ELLIPSIS
    , ('\xCA', '\x00A0') -- NO-BREAK SPACE
    , ('\xCB', '\x00C0') -- LATIN CAPITAL LETTER A WITH GRAVE
    , ('\xCC', '\x00C3') -- LATIN CAPITAL LETTER A WITH TILDE
    , ('\xCD', '\x00D5') -- LATIN CAPITAL LETTER O WITH TILDE
    , ('\xCE', '\x0152') -- LATIN CAPITAL LIGATURE OE
    , ('\xCF', '\x0153') -- LATIN SMALL LIGATURE OE
    , ('\xD0', '\x2013') -- EN DASH
    , ('\xD1', '\x2014') -- EM DASH
    , ('\xD2', '\x201C') -- LEFT DOUBLE QUOTATION MARK
    , ('\xD3', '\x201D') -- RIGHT DOUBLE QUOTATION MARK
    , ('\xD4', '\x2018') -- LEFT SINGLE QUOTATION MARK
    , ('\xD5', '\x2019') -- RIGHT SINGLE QUOTATION MARK
    , ('\xD6', '\x00F7') -- DIVISION SIGN
    , ('\xD7', '\x25CA') -- LOZENGE
    , ('\xD8', '\x00FF') -- LATIN SMALL LETTER Y WITH DIAERESIS
    , ('\xD9', '\x0178') -- LATIN CAPITAL LETTER Y WITH DIAERESIS
    , ('\xDA', '\x2044') -- FRACTION SLASH
    , ('\xDB', '\x20AC') -- EURO SIGN
    , ('\xDC', '\x2039') -- SINGLE LEFT-POINTING ANGLE QUOTATION MARK
    , ('\xDD', '\x203A') -- SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
    , ('\xDE', '\xFB01') -- LATIN SMALL LIGATURE FI
    , ('\xDF', '\xFB02') -- LATIN SMALL LIGATURE FL
    , ('\xE0', '\x2021') -- DOUBLE DAGGER
    , ('\xE1', '\x00B7') -- MIDDLE DOT
    , ('\xE2', '\x201A') -- SINGLE LOW-9 QUOTATION MARK
    , ('\xE3', '\x201E') -- DOUBLE LOW-9 QUOTATION MARK
    , ('\xE4', '\x2030') -- PER MILLE SIGN
    , ('\xE5', '\x00C2') -- LATIN CAPITAL LETTER A WITH CIRCUMFLEX
    , ('\xE6', '\x00CA') -- LATIN CAPITAL LETTER E WITH CIRCUMFLEX
    , ('\xE7', '\x00C1') -- LATIN CAPITAL LETTER A WITH ACUTE
    , ('\xE8', '\x00CB') -- LATIN CAPITAL LETTER E WITH DIAERESIS
    , ('\xE9', '\x00C8') -- LATIN CAPITAL LETTER E WITH GRAVE
    , ('\xEA', '\x00CD') -- LATIN CAPITAL LETTER I WITH ACUTE
    , ('\xEB', '\x00CE') -- LATIN CAPITAL LETTER I WITH CIRCUMFLEX
    , ('\xEC', '\x00CF') -- LATIN CAPITAL LETTER I WITH DIAERESIS
    , ('\xED', '\x00CC') -- LATIN CAPITAL LETTER I WITH GRAVE
    , ('\xEE', '\x00D3') -- LATIN CAPITAL LETTER O WITH ACUTE
    , ('\xEF', '\x00D4') -- LATIN CAPITAL LETTER O WITH CIRCUMFLEX
    , ('\xF0', '\xF8FF') -- Apple logo
    , ('\xF1', '\x00D2') -- LATIN CAPITAL LETTER O WITH GRAVE
    , ('\xF2', '\x00DA') -- LATIN CAPITAL LETTER U WITH ACUTE
    , ('\xF3', '\x00DB') -- LATIN CAPITAL LETTER U WITH CIRCUMFLEX
    , ('\xF4', '\x00D9') -- LATIN CAPITAL LETTER U WITH GRAVE
    , ('\xF5', '\x0131') -- LATIN SMALL LETTER DOTLESS I
    , ('\xF6', '\x02C6') -- MODIFIER LETTER CIRCUMFLEX ACCENT
    , ('\xF7', '\x02DC') -- SMALL TILDE
    , ('\xF8', '\x00AF') -- MACRON
    , ('\xF9', '\x02D8') -- BREVE
    , ('\xFA', '\x02D9') -- DOT ABOVE
    , ('\xFB', '\x02DA') -- RING ABOVE
    , ('\xFC', '\x00B8') -- CEDILLA
    , ('\xFD', '\x02DD') -- DOUBLE ACUTE ACCENT
    , ('\xFE', '\x02DB') -- OGONEK
    , ('\xFF', '\x02C7') -- CARON
  ]
  where
    generateMapers (tmpDecoder, tmpEncoder) (macChar, utf8Char) = (
          Map.insert macChar utf8Char tmpDecoder
        , Map.insert utf8Char macChar tmpEncoder
      )
--- a/src/PDF/Font.hs
+++ b/src/PDF/Font.hs
@ -0,0 +1,27 @@
 module PDF.Font (
      Decoder
    , Encoder
    , Font(..)
    , FontSet
    , emptyFont
  ) where
 import Data.ByteString (ByteString)
 import Data.Map (Map)
 import Data.Text (Text)
 import PDF.Object (Name)
 type Decoder = ByteString -> Either String Text
 type Encoder = Text -> Either String ByteString
 data Font = Font {
      decode :: Decoder
    , encode :: Encoder
  }
 type FontSet = Map Name Font
 emptyFont :: Font
 emptyFont = Font {
      decode = \_ -> Left "No fond loaded"
    , encode = \_ -> Left "No fond loaded"
  }
--- a/src/PDF/Layer.hs
+++ b/src/PDF/Layer.hs
@ -0,0 +1,129 @@
 {-# LANGUAGE NamedFieldPuns #-}
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE MultiParamTypeClasses #-}
 {-# LANGUAGE FlexibleContexts #-}
 module PDF.Layer (
      Layer(..)
    , LayerReader
    , Objects(..)
    , unify
  ) where
 import Control.Monad.Except (MonadError(..))
 import Control.Monad.Reader (ReaderT)
 import Data.Foldable (foldl')
 import Data.Id (Id(..), Indexed, keysSet, mapWithKey, member)
 import qualified Data.Id as Id (empty, insert, lookup, union)
 import Data.Map (Map, (!))
 import qualified Data.Map as Map (empty, lookup, union)
 import qualified Data.IntSet as IntSet (delete, toList)
 import PDF.Box (Box(..))
 import PDF.Object (
      IndirectObjCoordinates(..), Object, Occurrence(..), Structure(..)
    , XRefEntry(..), XRefSection, eofMarker, outputBody
  )
 import qualified PDF.Output as Output (line)
 import PDF.Output (
      Offset(..), Output(..), Resource(..), byteString, getOffset
    , getOffsets, newLine
  )
 import Text.Printf (printf)
 data Layer = Layer {
      occurrences :: [Occurrence]
    , objects :: Indexed Object
    , docStructure :: Structure
  } deriving Show
 type LayerReader m = ReaderT Layer m
 updateXRefs :: XRefSection -> Map Resource Offset -> (XRefSection, Offset)
 updateXRefs xRef offsets = (mapWithKey updateEntry xRef, offsets ! StartXRef)
  where
    updateEntry objectId e@(InUse {offset}) =
      case Map.lookup (ObjectId $ getId objectId) offsets of
        Nothing -> Free {nextFree = Id $ getOffset offset, generation = 65535}
        Just newOffset -> e {offset = newOffset}
    updateEntry _ e = e
 instance Output Layer where
  output (Layer {occurrences, objects, docStructure}) = do
    (body, savedOffsets) <- getOffsets (outputBody (occurrences, objects))
    let (newXRef, startXRef) = updateXRefs xRef savedOffsets
    mconcat [
          return body
        , Output.line "xref"
        , output newXRef
        , Output.line "trailer"
        , output trailer, newLine
        , Output.line "startxref"
        , Output.line (printf "%d" (getOffset startXRef))
        , byteString eofMarker
      ]
    where
      Structure {xRef, trailer} = docStructure
 data Objects = Objects
 instance Monad m => Box m Objects Layer (Indexed Object) where
  r Objects = return . objects
  w Objects newObjects layer@(Layer {occurrences, docStructure}) =
    return $ layer {
          occurrences = keptOccurrences ++ newOccurrences
        , objects = newObjects
        , docStructure = docStructure {
              xRef = (const $ InUse (Offset 0) 0) <$> newObjects
            }
      }
    where
      filterOccurrences c@(Comment _) (occ, newObjIds) = (c:occ, newObjIds)
      filterOccurrences i@(Indirect (IndirectObjCoordinates {objectId})) (occ, newObjIds)
        | member objectId newObjects = (i:occ, IntSet.delete (getId objectId) newObjIds)
        | otherwise = (occ, newObjIds)
      (keptOccurrences, newObjectIds) =
        foldr filterOccurrences ([], keysSet newObjects) occurrences
      makeOccurrence objectId =
        Indirect (IndirectObjCoordinates {objectId, versionNumber = 0})
      newOccurrences = (makeOccurrence . Id) <$> IntSet.toList newObjectIds
 instance MonadError String m => Box m (Id Object) Layer Object where
  r objectId =
    maybe (throwError "Unknown key") return . Id.lookup objectId . objects
  w objectId a layer@(Layer {objects})
    | member objectId objects = return $
      layer {objects = Id.insert objectId a objects}
    | otherwise = throwError "Unknown key"
 emptyLayer :: Layer
 emptyLayer = Layer {
      docStructure = Structure {xRef = Id.empty, trailer = Map.empty}
    , objects = Id.empty
    , occurrences = []
  }
 unify :: [Layer] -> Layer
 unify = foldl' complete emptyLayer
  where
    complete tmpLayer older =
      let mergedObjects = Id.union (objects tmpLayer) (objects older) in
      Layer {
          docStructure =
            unifyDocStructure (docStructure tmpLayer) (docStructure older)
        , objects = mergedObjects
        , occurrences =
            unifyOccurrences mergedObjects (occurrences tmpLayer) (occurrences older)
      }
 unifyDocStructure :: Structure -> Structure -> Structure
 unifyDocStructure new old = Structure {
      xRef = Id.union (xRef new) (xRef old)
    , trailer = Map.union (trailer new) (trailer old)
  }
 unifyOccurrences :: (Indexed Object) -> [Occurrence] -> [Occurrence] -> [Occurrence]
 unifyOccurrences objects new = foldr addOlder new
  where
    addOlder occurrence@(Comment _) existing = occurrence : existing
    addOlder occurrence@(Indirect indirect) existing =
      if objectId indirect `member` objects
      then occurrence : existing
      else existing
--- a/src/PDF/Object.hs
+++ b/src/PDF/Object.hs
@ -2,19 +2,20 @@
 {-# LANGUAGE NamedFieldPuns #-}
 {-# LANGUAGE FlexibleInstances #-}
 module PDF.Object (
-      Content(..)
+      Dictionary
    , DirectObject(..)
    , Flow(..)
    , IndexedObjects
    , IndirectObjCoordinates(..)
    , InputStructure(..)
    , Name(..)
    , Number(..)
    , Object(..)
    , Occurrence(..)
    , StringObject(..)
    , Structure(..)
    , XRefEntry(..)
    , XRefSection
    , array
    , blank
    , dictionary
    , directObject
@ -22,34 +23,43 @@ module PDF.Object (
    , integer
    , line
    , magicNumber
    , name
    , number
    , object
    , outputBody
    , regular
    , stringObject
    , structure
    , toByteString
  ) where
-import Control.Applicative ((<|>))
+import Control.Applicative ((<|>), many)
-import Data.ByteString.Char8 (ByteString)
+import Control.Monad.Reader (asks)
-import qualified Data.ByteString.Char8 as BS (
+import Data.Attoparsec.ByteString.Char8 (choice, count, option, sepBy)
-    concat, cons, pack, singleton, unpack
+import Data.ByteString (ByteString)
 import qualified Data.ByteString as BS (concat)
 import qualified Data.ByteString.Char8 as Char8 (
    cons, length, pack, singleton, snoc, unpack
  )
-import Data.Map (Map, (!), mapWithKey)
+import Data.ByteString.Char8.Util (B16Int(..), b16ToBytes, unescape)
-import qualified Data.Map as Map (
+import Data.Id (Id(..), IdMap, Indexed)
-    delete, empty, fromList, lookup, minViewWithKey, toList, union
+import qualified Data.Id as Id (
    at, delete, empty, fromList, lookup, minViewWithKey, union
  )
-import qualified PDF.EOL as EOL (charset, parser)
+import Data.Map (Map)
-import qualified PDF.Output as Output (concat, line, string)
+import qualified Data.Map as Map (fromList, toList)
 import qualified Data.Set as Set (fromList, member)
 import qualified PDF.EOL as EOL (Style(..), charset, parser)
 import qualified PDF.Output as Output (line, string)
 import PDF.Output (
-      OBuilder, ObjectId(..), Offset(..), Output(..), Resource(..)
+      OBuilder, OContext(..), Offset(..), Output(..), Resource(..), byteString
-    , byteString, getObjectId, getOffset, getOffsets, join, newLine
+    , getOffset, join, newLine, saveOffset
    , saveOffset
  )
 import PDF.Parser (
      Parser, (<?>)
    , char, choice, count, decNumber, hexNumber, many, octDigit, oneOf, option
    , sepBy, string, takeAll, takeAll1
  )
 import PDF.Parser (MonadParser(..), Parser, (<?>), octDigit, oneOf)
 import Text.Printf (printf)
-line :: String -> Parser u ()
+line :: MonadParser m => String -> m ()
-line l = (string (BS.pack l) *> blank *> return ()) <?> printf "line «%s»" l
+line l = (string (Char8.pack l) *> blank *> return ()) <?> printf "line «%s»" l
 magicNumber :: ByteString
 magicNumber = "%PDF-"
@ -60,8 +70,8 @@ eofMarker = "%%EOF"
 whiteSpaceCharset :: String
 whiteSpaceCharset = "\0\t\12 "
-blank :: Parser u ()
+blank :: MonadParser m => m ()
-blank = takeAll (`elem` (EOL.charset ++ whiteSpaceCharset)) *> return ()
+blank = takeAll (`elem` (EOL.charset ++ whiteSpaceCharset)) *> pure ()
 delimiterCharset :: String
 delimiterCharset = "()<>[]{}/%"
@ -69,26 +79,24 @@ delimiterCharset = "()<>[]{}/%"
 regular :: Char -> Bool
 regular = not . (`elem` (EOL.charset ++ whiteSpaceCharset ++ delimiterCharset))
-integer :: (Read a, Num a) => Parser u a
+integer :: MonadParser m => m Int
-integer = read . BS.unpack <$> decNumber <* blank <?> "decimal integer"
+integer = decNumber <* blank <?> "decimal integer"
 -------------------------------------
 --          OBJECTS
 -------------------------------------
 type IndexedObjects = Map ObjectId Object
 --
 -- Boolean
 --
-boolean :: Parser u Bool
+boolean :: MonadParser m => m Bool
 boolean =
  (string "true" *> return True) <|> (string "false" *> return False) <?> "boolean"
 --
 -- Number
 --
-newtype Number = Number Double deriving Show
+newtype Number = Number Double deriving (Eq, Show)
 instance Output Number where
  output (Number f) = Output.string $
@ -96,38 +104,44 @@ instance Output Number where
      (n, 0) -> printf "%d" (n :: Int)
      _ -> printf "%f" f
-number :: Parser u Number
+number :: MonadParser m => m Number
-number = Number . read . BS.unpack <$>
+number = Number <$> (sign <*> value) <?> "number"
  (mappend <$> sign <*> (integerPart <|> BS.cons '0' <$> floatPart))
  <?> "number"
  where
-    sign = string "-" <|> option "" (char '+' >> return "")
+    sign = (string "-" *> return negate) <|> option id (char '+' >> return id)
-    integerPart = mappend <$> decNumber <*> option "" floatPart
+    value = floatNumber <|> (char '.' *> afterPoint)
-    floatPart = BS.cons <$> char '.' <*> (option "0" $ decNumber)
+    afterPoint = read . ("0." ++) . Char8.unpack <$> takeAll (`Set.member` digits)
    digits = Set.fromList ['0' .. '9']
 --
 -- StringObject
 --
-data StringObject = Literal String | Hexadecimal String deriving Show
+data StringObject = Literal ByteString | Hexadecimal B16Int deriving (Eq, Show)
 instance Output StringObject where
-  output (Literal s) = Output.string (printf "(%s)" s)
+  output (Literal s) = Output.string (printf "(%s)" (Char8.unpack s))
-  output (Hexadecimal s) = Output.string (printf "<%s>" s)
+  output (Hexadecimal (B16Int n)) = Output.string (printf "<%s>" (Char8.unpack n))
-stringObject :: Parser u StringObject
+stringObject :: MonadParser m => m StringObject
 stringObject =
-    Literal . BS.unpack <$> (char '(' *> (BS.concat <$> literalString) <* char ')')
+    Literal <$> (char '(' *> (BS.concat <$> literalString) <* char ')')
-  <|> Hexadecimal . BS.unpack <$> (char '<' *> hexNumber <* char '>')
+  <|> Hexadecimal . roundBytes <$> (char '<' *> hexNumber <* char '>')
  <?> "string object (literal or hexadecimal)"
  where
    literalString = many literalStringBlock
    literalStringBlock = takeAll1 normalChar <|> matchingParenthesis <|> escapedChar
    normalChar = not . (`elem` ("\\()" :: String))
    matchingParenthesis =
-      mappend <$> (BS.cons <$> char '(' <*> literalStringBlock) <*> string ")"
+      mappend <$> (Char8.cons <$> char '(' <*> literalStringBlock) <*> string ")"
    escapedChar =
-      BS.cons <$> char '\\' <*> (BS.singleton <$> oneOf "nrtbf()\\" <|> octalCode)
+      Char8.cons <$> char '\\' <*> (Char8.singleton <$> oneOf "nrtbf()\\\n" <|> octalCode)
-    octalCode = choice $ (\n -> BS.pack <$> count n octDigit) <$> [1..3]
+    octalCode = choice $ (\n -> Char8.pack <$> count n octDigit) <$> [1..3]
    roundBytes (B16Int bs)
      | Char8.length bs `mod` 2 == 1 = B16Int (bs `Char8.snoc` '0')
      | otherwise = B16Int bs
 toByteString :: StringObject -> ByteString
 toByteString (Hexadecimal h) = b16ToBytes h
 toByteString (Literal s) = unescape s
 --
 -- Name
@ -137,13 +151,13 @@ newtype Name = Name String deriving (Eq, Ord, Show)
 instance Output Name where
  output (Name n) = Output.string ('/':n)
-name :: Parser u Name
+name :: MonadParser m => m Name
-name = Name . BS.unpack <$> (char '/' *> takeAll regular) <?> "name"
+name = Name . Char8.unpack <$> (char '/' *> takeAll regular) <?> "name"
 --
 -- Array
 --
-array :: Parser u [DirectObject]
+array :: MonadParser m => m [DirectObject]
 array =
  char '[' *> blank *> directObject `sepBy` blank <* blank <* char ']' <?> "array"
@ -153,14 +167,14 @@ array =
 type Dictionary = Map Name DirectObject
 instance Output Dictionary where
-  output dict =
+  output aDictionary =
    "<<" `mappend` keyValues `mappend` ">>"
    where
-      keyValues = join " " $ outputKeyVal <$> Map.toList dict
+      keyValues = join " " $ outputKeyVal <$> Map.toList aDictionary
      outputKeyVal :: (Name, DirectObject) -> OBuilder
-      outputKeyVal (key, val) = Output.concat [output key, " ", output val]
+      outputKeyVal (key, val) = mconcat [output key, " ", output val]
-dictionary :: Parser u Dictionary
+dictionary :: MonadParser m => m Dictionary
 dictionary =
  string "<<" *> blank *> keyValPairs <* string ">>" <?> "dictionary"
  where
@ -170,33 +184,33 @@ dictionary =
 --
 -- Null
 --
-nullObject :: Parser u ()
+nullObject :: MonadParser m => m ()
 nullObject = string "null" *> return () <?> "null object"
 --
 -- Reference
 --
 data IndirectObjCoordinates = IndirectObjCoordinates {
-      objectId :: ObjectId
+      objectId :: {-# UNPACK #-} !(Id Object)
-    , versionNumber :: Int
+    , versionNumber :: {-# UNPACK #-} !Int
  } deriving Show
-reference :: Parser u IndirectObjCoordinates
+reference :: MonadParser m => m IndirectObjCoordinates
 reference = IndirectObjCoordinates
-  <$> (fmap ObjectId integer) <*> integer <* char 'R' <?> "reference to an object"
+  <$> (fmap Id integer) <*> integer <* char 'R' <?> "reference to an object"
 --
 -- DirectObject
 --
 data DirectObject =
-    Boolean Bool
+    Boolean !Bool
-  | NumberObject Number
+  | NumberObject !Number
-  | StringObject StringObject
+  | StringObject !StringObject
-  | NameObject Name
+  | NameObject !Name
-  | Array [DirectObject]
+  | Array ![DirectObject]
-  | Dictionary Dictionary
+  | Dictionary !Dictionary
  | Null
-  | Reference IndirectObjCoordinates
+  | Reference !IndirectObjCoordinates
  deriving Show
 instance Output DirectObject where
@ -204,23 +218,25 @@ instance Output DirectObject where
  output (NumberObject n) = output n
  output (StringObject s) = output s
  output (NameObject n) = output n
-  output (Array a) = Output.concat ["[", join " " a, "]"]
+  output (Array a) = mconcat ["[", join " " a, "]"]
  output (Dictionary d) = output d
  output (Null) = "null"
  output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
-    Output.string (printf "%d %d R" (getObjectId objectId) versionNumber)
+    Output.string (printf "%d %d R" (getId objectId) versionNumber)
-directObject :: Parser u DirectObject
+directObject :: MonadParser m => m DirectObject
-directObject =
+directObject = (peek >>= dispatch) <?> "direct object"
-    Boolean <$> boolean
+  where
-  <|> Reference <$> reference {- defined before Number because Number is a prefix of it -}
+    dispatch 't' = Boolean <$> boolean
    dispatch 'f' = Boolean <$> boolean
    dispatch '(' = StringObject <$> stringObject
    dispatch '<' = StringObject <$> stringObject <|> Dictionary <$> dictionary
    dispatch '/' = NameObject <$> name
    dispatch '[' = Array <$> array
    dispatch 'n' = nullObject *> return Null
    dispatch _ =
        Reference <$> reference {- defined before Number because Number is a prefix of it -}
      <|> NumberObject <$> number
  <|> StringObject <$> stringObject
  <|> NameObject <$> name
  <|> Array <$> array
  <|> Dictionary <$> dictionary
  <|> const Null <$> nullObject
  <?> "direct object"
 --
 -- Object
@ -235,27 +251,34 @@ data Object =
 instance Output Object where
  output (Direct d) = output d
-  output (Stream {header, streamContent}) = Output.concat [
+  output (Stream {header, streamContent}) = mconcat [
        output header, newLine
      , Output.line "stream"
      , byteString streamContent
      , "endstream"
    ]
 object :: Int -> Id Object
 object = Id
 --
 -- Occurrence
 --
 data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show
-outputOccurrence :: IndexedObjects -> Occurrence -> OBuilder
+outputOccurrence :: (Indexed Object) -> Occurrence -> OBuilder
 outputOccurrence _ (Comment c) = Output.line c
 outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
-  saveOffset (Object objectId) >> Output.concat [
+  saveOffset (ObjectId $ getId objectId) >> mconcat [
-        Output.line (printf "%d %d obj" (getObjectId objectId) versionNumber)
+        Output.line (printf "%d %d obj" (getId objectId) versionNumber)
-      , output (objects ! objectId), newLine
+      , output (objects `Id.at` objectId), newLine
      , Output.line "endobj"
    ]
 outputBody :: ([Occurrence], (Indexed Object)) -> OBuilder
 outputBody (occurrences, objects) =
  output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
 -------------------------------------
 --          XREF TABLE
 -------------------------------------
@ -267,15 +290,20 @@ data XRefEntry = InUse {
      offset :: Offset
    , generation :: Int
  } | Free {
-      nextFree :: ObjectId
+      nextFree :: (Id Object)
    , generation :: Int
  } deriving Show
 instance Output XRefEntry where
-  output (InUse {offset, generation}) =
+  output xRefEntry = Output.string (build xRefEntry) `mappend` endXRefEntryLine
-      Output.line (printf "%010d %05d n " (getOffset offset) generation)
+    where
-  output (Free {nextFree, generation}) =
+      build (InUse {offset, generation}) =
-      Output.line (printf "%010d %05d f " (getObjectId nextFree) generation)
+        printf "%010d %05d n" (getOffset offset) generation
      build (Free {nextFree, generation}) =
        printf "%010d %05d f" (getId nextFree) generation
      endXRefEntryLine = OContext (asks padEOLToTwoBytes) >>= Output.line
      padEOLToTwoBytes EOL.CRLF = ("" :: String)
      padEOLToTwoBytes _ = " "
 entry :: Parser u XRefEntry
 entry = do
@ -287,52 +315,51 @@ entry = do
      char 'n' *> return (InUse {offset = Offset big, generation})
    free :: Int -> Int -> Parser u XRefEntry
    free big generation =
-      char 'f' *> return (Free {nextFree = ObjectId big, generation})
+      char 'f' *> return (Free {nextFree = Id big, generation})
 --
 -- XRefSubSection
 --
 data XRefSubSection = XRefSubSection {
-      firstObjectId :: ObjectId
+      firstObjectId :: (Id Object)
    , entries :: [XRefEntry]
  } deriving Show
 instance Output XRefSubSection where
  output (XRefSubSection {firstObjectId, entries}) =
-      Output.line (printf "%d %d" (getObjectId firstObjectId) (length entries))
+      Output.line (printf "%d %d" (getId firstObjectId) (length entries))
    `mappend` output entries
 xRefSubSection :: Parser u XRefSubSection
 xRefSubSection = do
  (firstId, entriesNumber) <- (,) <$> integer <*> integer <?> "XRef subsection"
  entries <- count entriesNumber entry
-  return $ XRefSubSection {firstObjectId = ObjectId firstId, entries}
+  return $ XRefSubSection {firstObjectId = Id firstId, entries}
-type XRefSection = Map ObjectId XRefEntry
+type XRefSection = IdMap Object XRefEntry
 instance Output XRefSection where
  output = output . sections
    where
      sections tmp =
-        case Map.minViewWithKey tmp of
+        case Id.minViewWithKey tmp of
          Nothing -> []
-          Just ((objectId@(ObjectId value), firstEntry), rest) ->
+          Just ((objectId@(Id value), firstEntry), rest) ->
            let (subSection, sndRest) = section objectId [firstEntry] (value + 1) rest in
            subSection : sections sndRest
      section firstObjectId stack nextValue tmp =
-        let nextId = ObjectId nextValue in
+        let nextId = (Id nextValue :: Id Object) in
-        case Map.lookup nextId tmp of
+        case Id.lookup nextId tmp of
          Nothing -> (XRefSubSection {firstObjectId, entries = reverse stack}, tmp)
          Just nextEntry ->
-            section firstObjectId (nextEntry:stack) (nextValue + 1) (Map.delete nextId tmp)
+            section firstObjectId (nextEntry:stack) (nextValue + 1) (Id.delete nextId tmp)
 xRefSection :: Parser u XRefSection
-xRefSection = foldr addSubsection Map.empty <$>
+xRefSection = foldr addSubsection Id.empty <$>
  (line "xref" *> xRefSubSection `sepBy` many EOL.parser)
  where
    addSubsection (XRefSubSection {firstObjectId, entries}) =
-      Map.union . Map.fromList $ zip ([firstObjectId..]) entries
+      Id.union . Id.fromList $ zip ([firstObjectId..]) entries
 --
 -- Structure
@ -353,45 +380,10 @@ structure =
  <$> xRefSection
  <*> (string "trailer" *> blank *> dictionary <* EOL.parser)
 updateXrefs :: XRefSection -> Map Resource Offset -> (XRefSection, Offset)
 updateXrefs xRef offsets = (mapWithKey updateEntry xRef, offsets ! StartXRef)
  where
    updateEntry objectId e@(InUse {}) = e {offset = offsets ! (Object objectId)}
    updateEntry _ e = e
 --
 -- Flow
 --
 data Flow = Flow {
      occurrencesStack :: [Occurrence]
-    , tmpObjects :: IndexedObjects
+    , tmpObjects :: (Indexed Object)
  } deriving Show
 --
 -- Content
 --
 data Content = Content {
      occurrences :: [Occurrence]
    , objects :: IndexedObjects
    , docStructure :: Structure
  } deriving Show
 outputBody :: ([Occurrence], IndexedObjects) -> OBuilder
 outputBody (occurrences, objects) =
  output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
 instance Output Content where
  output (Content {occurrences, objects, docStructure}) =
    fmap (updateXrefs xRef) <$> getOffsets (outputBody (occurrences, objects))
    >>= \(body, (xref, startXRef)) -> Output.concat [
          body
        , Output.line "xref"
        , output xref
        , Output.line "trailer"
        , output trailer, newLine
        , Output.line "startxref"
        , Output.line (printf "%d" (getOffset startXRef))
        , byteString eofMarker
      ]
    where
      Structure {xRef, trailer} = docStructure
--- a/src/PDF/Object/Navigation.hs
+++ b/src/PDF/Object/Navigation.hs
@ -0,0 +1,180 @@
 {-# LANGUAGE NamedFieldPuns #-}
 {-# LANGUAGE ConstraintKinds #-}
 {-# LANGUAGE FlexibleContexts #-}
 {-# LANGUAGE MultiParamTypeClasses #-}
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE BangPatterns #-}
 {-# LANGUAGE DeriveFunctor #-}
 module PDF.Object.Navigation (
      Nav(..)
    , PPath(..)
    , ROLayer
    , RWLayer
    , StreamContent(..)
    , (./)
    , (//)
    , (>./)
    , (>//)
    , castObject
    , catalog
    , getDictionary
    , getKey
    , objectById
    , save
  ) where
 import Codec.Compression.Zlib (compress, decompress)
 import Control.Monad.Except (MonadError(..))
 import Control.Monad.Reader (MonadReader(..))
 import Control.Monad.State (MonadState)
 import Data.ByteString (ByteString)
 import qualified Data.ByteString as BS (length)
 import qualified Data.ByteString.Lazy as Lazy (ByteString, fromStrict, toStrict)
 import Data.Id (Id)
 import qualified Data.Id as Id (at)
 import qualified Data.Map as Map (adjust, insert, lookup)
 import PDF.Box (Box(..), at, edit{-, runRO-})
 import PDF.Layer (Layer(..))
 import PDF.Object (
      Dictionary, DirectObject(..), IndirectObjCoordinates(..)
    , Name(..), Number(..), Object(..), Structure(..)
  )
 import Prelude hiding (fail)
 import Text.Printf (printf)
 newtype PPath = PPath [Component]
 data DPath = DPath {
      root :: Id Object
    , offset :: [Component]
  } deriving Show
 push :: Component -> DPath -> DPath
 push component dPath = dPath {offset = (offset dPath) ++ [component]}
 data Nav a = Nav {
      dPath :: DPath
    , value :: a
  } deriving (Functor)
 instance Show a => Show (Nav a) where
  show (Nav {dPath, value}) = "Nav {dPath = " ++ show dPath ++ ", value = " ++ show value ++ "}"
 type ROLayer m = (MonadReader Layer m, MonadError String m)
 type RWLayer m = (MonadState Layer m, MonadError String m)
 type Component = String
 getDictionary :: ROLayer m => Nav Object -> m (Nav Dictionary)
 getDictionary (Nav {dPath, value}) =
  case value of
    (Direct (Dictionary aDict)) -> return $ Nav {dPath, value = aDict}
    (Direct (Reference ref)) -> objectById (objectId ref) >>= getDictionary
    (Stream {header}) -> return $ Nav {dPath, value = header}
    obj -> expected "dictionary : " obj
 expected :: (MonadError String m, Show a) => String -> a -> m b
 expected name = throwError . printf "Not a %s: %s" name . show
 getKey :: ROLayer m => String -> Nav Object -> m (Nav DirectObject)
 getKey key navObject = getDictionary navObject >>= f
  where
    errorMessage =
      printf "Key %s not found in object %s" key (show navObject)
    f (Nav {dPath, value}) =
      case Map.lookup (Name key) value of
        Nothing -> throwError errorMessage
        Just dObj -> return $ Nav {dPath = push key dPath, value = dObj}
 objectById :: ROLayer m => (Id Object) -> m (Nav Object)
 objectById objectId = do
  layer <- ask
  return $ Nav {
        dPath = DPath {root = objectId, offset = []}
      , value = objects layer `Id.at` objectId
    }
 castObject :: ROLayer m => Nav DirectObject -> m (Nav Object)
 castObject (Nav {value = !(Reference (IndirectObjCoordinates {objectId}))}) =
  objectById objectId
 castObject (Nav {dPath, value}) = return $ Nav {dPath, value = Direct value}
 (./) :: ROLayer m => m (Nav Object) -> Component -> m (Nav Object)
 (./) navObject key = (navObject >>= getKey key >>= castObject)
 (//) :: ROLayer m => m (Nav Object) -> PPath -> m (Nav Object)
 (//) navObject (PPath []) = navObject
 (//) navObject (PPath (key:keys)) = navObject ./ key // (PPath keys)
 (>./) :: ROLayer m => Nav Object -> Component -> m (Nav Object)
 (>./) navObject = (return navObject ./)
 (>//) :: ROLayer m => Nav Object -> PPath -> m (Nav Object)
 (>//) navObject = (return navObject //)
 catalog :: ROLayer m => m (Nav Object)
 catalog = do
  value <- Direct . Dictionary . trailer . docStructure <$> ask
  return $ Nav {dPath = undefined, value}
 setAt :: [Component] -> DirectObject -> Dictionary -> Dictionary
 setAt [] _ dict = dict
 setAt [component] directObject dict =
  Map.insert (Name component) directObject dict
 setAt (component:components) directObject dict =
  Map.adjust setDirObj (Name component) dict
  where
    setDirObj (Dictionary subDict) =
      Dictionary $ setAt components directObject subDict
    setDirObj x = x
 save :: RWLayer m => Nav Object -> m ()
 save nav@(Nav {dPath, value = Direct dObj}) =
  edit .at (root dPath) $ return . setObj
  where
    setObj obj@(Stream {header}) =
      obj {header = setAt (offset dPath) dObj header}
    setObj (Direct (Dictionary dict)) =
      Direct . Dictionary $ setAt (offset dPath) dObj dict
    setObj _ = value nav
 save (Nav {dPath = DPath {root, offset = []}, value}) = edit $ w root value
 save _ = throwError "Streams can't be properties of PDF objects"
 data StreamContent = Clear | Raw
 onLazy :: (Lazy.ByteString -> Lazy.ByteString) -> ByteString -> ByteString
 onLazy f = Lazy.toStrict . f . Lazy.fromStrict
 contains :: String -> DirectObject -> Bool
 contains needle !(NameObject (Name n)) = needle == n
 contains needle !(Array directObjects) = oneOf directObjects (contains needle)
  where
    oneOf [] _ = False
    oneOf (x:xs) p = p x || oneOf xs p
 contains _ _ = False
 instance MonadError String m => Box m StreamContent (Nav Object) ByteString where
  r sc = r sc . value
  w sc newStreamContent nav = setValue <$> w sc newStreamContent (value nav)
    where
      setValue value = nav {value}
 instance MonadError String m => Box m StreamContent Object ByteString where
  r Raw (Stream {streamContent}) = return streamContent
  r Clear (Stream {header, streamContent}) = return $
    case Map.lookup (Name "Filter") header of
      Just directObject
        | contains "FlateDecode" directObject -> onLazy decompress streamContent
      _ -> streamContent
  r _ obj = expected "stream" obj
  w Raw streamContent obj@(Stream {}) = return $ obj {streamContent}
  w Clear newStreamContent (Stream {header}) =
    let streamContent = getStreamContent (Map.lookup (Name "Filter") header) in
    return $ Stream {header = fixLength streamContent, streamContent}
    where
      getStreamContent (Just directObject)
        | contains "FlateDecode" directObject = onLazy compress newStreamContent
      getStreamContent _ = newStreamContent
      fixLength sc =
        let newLength = NumberObject . Number . fromIntegral $ BS.length sc in
        Map.insert (Name "Length") newLength header
  w _ _ obj = expected "stream" obj
--- a/src/PDF/Output.hs
+++ b/src/PDF/Output.hs
@ -5,14 +5,12 @@
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 module PDF.Output (
      OBuilder
    , ObjectId(..)
    , OContext(..)
    , Offset(..)
    , Output(..)
    , Resource(..)
    , byteString
    , char
    , concat
    , getOffsets
    , join
    , line
@ -32,12 +30,10 @@ import qualified Data.Map as Map (singleton)
 import Data.String (IsString(..))
 import Control.Monad.RWS (RWS, runRWS, ask, get, listen, modify, tell)
 import qualified PDF.EOL as EOL (Style(..))
 import Prelude hiding (concat)
 newtype ObjectId = ObjectId {getObjectId :: Int} deriving (Enum, Eq, Ord, Show)
 newtype Offset = Offset {getOffset :: Int} deriving (Show)
-data Resource = StartXRef | Object ObjectId deriving (Eq, Ord)
+data Resource = StartXRef | ObjectId Int deriving (Eq, Ord)
 newtype OContext a = OContext (RWS EOL.Style (Map Resource Offset) Offset a)
 type OBuilder = OContext Builder
@ -59,16 +55,12 @@ saveOffset resource = OContext $
 lift :: (a -> Builder) -> a -> OBuilder
 lift f a = return (f a)
-getOffsets :: OBuilder -> OContext (OBuilder, Map Resource Offset)
+getOffsets :: OBuilder -> OContext (Builder, Map Resource Offset)
-getOffsets (OContext builder) =
+getOffsets (OContext builder) = OContext $ listen builder
  OContext (listen builder >>= \(a, w) -> return (return a, w))
 append :: OBuilder -> OBuilder -> OBuilder
 append (OContext a) (OContext b) = OContext (mappend <$> a <*> b)
 concat :: [OBuilder] -> OBuilder
 concat = foldl mappend mempty
 #if MIN_VERSION_base(4,11,0)
 instance Semigroup OBuilder where
  (<>) = append
@ -94,7 +86,7 @@ instance Output Bool where
  output True = string "true"
 instance Output a => Output [a] where
-  output = concat . fmap output
+  output = mconcat . fmap output
 join :: Output a => String -> [a] -> OBuilder
 join _ [] = mempty
@ -116,7 +108,7 @@ char :: Char -> OBuilder
 char c = lift char8 c <* offset (+1)
 string :: String -> OBuilder
-string s = lift string8 s <* offset (+ toEnum (length s))
+string s = lift string8 s <* offset (+ length s)
 line :: String -> OBuilder
 line l = string l `mappend` newLine
--- a/src/PDF/Pages.hs
+++ b/src/PDF/Pages.hs
@ -0,0 +1,181 @@
 {-# LANGUAGE NamedFieldPuns #-}
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE MultiParamTypeClasses #-}
 {-# LANGUAGE ConstraintKinds #-}
 {-# LANGUAGE FlexibleContexts #-}
 module PDF.Pages (
      Contents(..)
    , FontCache
    , Page(..)
    , PageNumber(..)
    , Pages(..)
    , withFonts
    , withResources
  ) where
 import Control.Applicative (Alternative)
 import Control.Applicative ((<|>))
 import Control.Monad (foldM)
 import Control.Monad.Except (MonadError(..))
 import Control.Monad.Reader (ReaderT, runReaderT)
 import Control.Monad.State (StateT(..), evalStateT, execStateT, gets, modify)
 import Control.Monad.Trans (lift)
 import Data.ByteString (ByteString)
 import Data.ByteString.Lazy (toStrict)
 import Data.Id (Id(..), IdMap)
 import qualified Data.Id as Id (empty, insert, lookup)
 import Data.Map (Map)
 import qualified Data.Map as Map (empty, elems, fromList, insert, toList)
 import Data.Maybe (listToMaybe)
 import Data.OrderedMap (OrderedMap, build, mapi)
 import PDF.Box (Box(..), at, edit, runRO)
 import qualified PDF.CMap as CMap (parse)
 import PDF.Content (Content(..))
 import qualified PDF.Content as Content (parse)
 import PDF.Encoding (encoding)
 import PDF.EOL (Style(..))
 import PDF.Font (Font, FontSet)
 import PDF.Layer (Layer(..), LayerReader)
 import PDF.Object (
      Dictionary, DirectObject(..), IndirectObjCoordinates(..)
    , Name(..), Number(..), Object(..)
  ,)
 import PDF.Object.Navigation (
      Nav(..), PPath(..), ROLayer, RWLayer, StreamContent(..), (//), (>./)
    , (>//), catalog, getDictionary, getKey, objectById, save
  )
 import PDF.Output (render)
 import Text.Printf (printf)
 type CachedFonts = IdMap Object Font
 type FontCache m = StateT CachedFonts m
 data Page = Page {
      byteContents :: OrderedMap (Id Object) ByteString
    , resources :: Dictionary
    , source :: (Id Object)
  }
 loadByteContents :: ROLayer m => DirectObject -> m (OrderedMap (Id Object) ByteString)
 loadByteContents directObject = do
  objs <- sequence . build objectById $ objectIds directObject
  mapM (r Clear) objs
  where
    objectIds (Array l) = l >>= getReference
    objectIds dirObj = getReference dirObj
 getFontDictionary :: (Alternative m, ROLayer m) => Nav Object -> m Dictionary
 getFontDictionary pageObj =
    (pageObj >// PPath ["Resources", "Font"] >>= fmap value . getDictionary)
  <|> return Map.empty
 cache :: ROLayer m => (Id Object -> FontCache m Font) -> Id Object -> FontCache m Font
 cache loader objectId =
  gets (Id.lookup objectId) >>= maybe load return
  where
    load = do
      value <- loader objectId
      modify $ Id.insert objectId value
      return value
 loadFont :: (Alternative m, ROLayer m) => Id Object -> FontCache m Font
 loadFont objectId = lift $ objectById objectId >>= tryMappings
  where
    tryMappings object =
        (object >./ "ToUnicode" >>= r Clear >>= CMap.parse)
      <|> (object >./ "Encoding" >>= loadEncoding . value)
      <|> (throwError $ unknownFormat (show objectId) (show object))
    unknownFormat = printf "Unknown font format for object #%s : %s"
    loadEncoding :: MonadError String m => Object -> m Font
    loadEncoding (Direct (NameObject (Name name))) = encoding name
    loadEncoding object =
      throwError $ printf "Encoding must be a name, not that : %s" $ show object
 loadResources :: (Alternative m, ROLayer m) => Dictionary -> FontCache m FontSet
 loadResources = foldM addFont Map.empty . Map.toList
  where
    addFont output (name, Reference (IndirectObjCoordinates {objectId})) =
      flip (Map.insert name) output <$> cache loadFont objectId
    addFont output _ = return output
 getReference :: DirectObject -> [Id Object]
 getReference (Reference (IndirectObjCoordinates {objectId})) = [objectId]
 getReference _ = []
 loadPage :: (Alternative m, ROLayer m) => Id Object -> m Page
 loadPage source = do
  page <- objectById source
  byteContents <- loadByteContents . value =<< getKey "Contents" page
  resources <- getFontDictionary page
  return $ Page {byteContents, resources, source}
 pagesList :: (Alternative m, ROLayer m) => m [Id Object]
 pagesList =
    (catalog // PPath ["Root", "Pages", "Kids"] >>= getReferences . value)
  <|> return []
  where
    getReferences (Direct (Array kids)) = return $ getReference =<< kids
    getReferences _ = throwError "Not a pages array"
 editPagesList :: RWLayer m => ([DirectObject] -> [DirectObject]) -> m ()
 editPagesList f = do
  pages <- runRO (catalog // PPath ["Root", "Pages"])
  kids <- runRO (pages >./ "Kids")
  count <- runRO (pages >./ "Count")
  (newSize, newKids) <- editKids (value kids)
  save $ kids {value = newKids}
  save $ count {value = Direct $ NumberObject newSize}
  where
    editKids (Direct (Array pageRefs)) =
      let result = f pageRefs in
      return (Number . fromIntegral $ length result, Direct $ Array result)
    editKids _ = throwError "Invalid format for Root.Pages.Kids (not an array)"
 updatePage :: RWLayer m => Page -> m ()
 updatePage (Page {byteContents}) = sequence_ $ mapi updateByteContent byteContents
  where
    updateByteContent source byteContent =
      edit .at source .at Clear $ \_ -> return byteContent
 data Pages = Pages
 newtype PageNumber = P Int
 data Contents = Contents
 instance (Alternative m, MonadError String m) => Box m Pages Layer (Map Int Page) where
  r Pages = runReaderT (numbered <$> pagesList >>= mapM loadPage)
    where
      numbered :: [Id Object] -> Map Int (Id Object)
      numbered = Map.fromList . zip [1..]
  w Pages pages = execStateT $ do
    mapM_ updatePage pages
    setPagesList $ Map.elems (source <$> pages)
    where
      setPagesList =
        editPagesList . const . fmap (Reference . flip IndirectObjCoordinates 0)
 instance (Alternative m, MonadError String m) => Box m PageNumber Layer Page where
  r (P p) layer
    | p < 1 = throwError "Pages start at 1"
    | otherwise = runReaderT (drop (p - 1) <$> pagesList >>= firstPage) layer
      where
        firstPage =
          maybe (throwError "Page is out of bounds") loadPage . listToMaybe
  w (P p) page = execStateT $ do
    updatePage page
    editPagesList $ setPage (Reference $ IndirectObjCoordinates (source page) 0)
    where
      setPage ref l = take (p-1) l ++ ref : drop p l
 instance Monad m => Box m Contents Page (OrderedMap (Id Object) Content) where
  r Contents = return . fmap Content.parse . byteContents
  w Contents contents page = return $ page {byteContents}
    where
      byteContents = toStrict . render LF <$> contents
 withFonts :: MonadError String m => (Layer -> FontCache (LayerReader m) a) -> Layer -> m a
 withFonts f layer = runReaderT (evalStateT (f layer) Id.empty) layer
 withResources :: (Alternative m, MonadError String m) => (Page -> ReaderT FontSet m b) -> Page -> FontCache (LayerReader m) b
 withResources f p =
  loadResources (resources p) >>= lift . lift . runReaderT (f p)
--- a/src/PDF/Parser.hs
+++ b/src/PDF/Parser.hs
@ -1,56 +1,78 @@
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE ConstraintKinds #-}
 {-# LANGUAGE UndecidableInstances #-}
 module PDF.Parser (
-      Parser
+      MonadParser(..)
    , Parser
    , (<?>)
    , block
    , char
    , choice
    , count
    , decNumber
    , hexNumber
    , many
    , octDigit
    , on
    , oneOf
    , option
    , runParser
-    , sepBy
+    , evalParser
    , string
    , takeAll
    , takeAll1
  ) where
-import Control.Applicative ((<|>), empty)
+import Control.Applicative (Alternative, (<|>))
 import Control.Monad (MonadPlus)
 import Control.Monad.Fail (MonadFail(..))
 import Control.Monad.State (StateT(..), evalStateT)
-import Control.Monad.Trans (lift)
+import Control.Monad.Trans (MonadTrans(..))
 import qualified Data.Attoparsec.ByteString.Char8 as Atto (
-    Parser, char, parseOnly, satisfy, string, take, takeWhile, takeWhile1
+      Parser, char, decimal, double, endOfInput, parseOnly, peekChar', satisfy, string, take
    , takeWhile, takeWhile1
  )
 import Data.ByteString (ByteString)
 import Data.ByteString.Char8.Util (B16Int(..))
 import Data.Char (toLower)
 import Data.Set (Set)
 import qualified Data.Set as Set (fromList, member, unions)
 import Prelude hiding (fail)
 type MonadDeps m = (MonadFail m, MonadPlus m)
 class MonadDeps m => MonadParser m where
  block :: Int -> m ByteString
  char :: Char -> m Char
  decNumber :: m Int
  floatNumber :: m Double
  endOfInput :: m ()
  hexNumber :: m B16Int
  oneOf :: String -> m Char
  peek :: m Char
  string :: ByteString -> m ByteString
  takeAll :: (Char -> Bool) -> m ByteString
  takeAll1 :: (Char -> Bool) -> m ByteString
 instance MonadParser Atto.Parser where
  block = Atto.take
  char = Atto.char
  endOfInput = Atto.endOfInput
  decNumber = Atto.decimal
  floatNumber = Atto.double
  hexNumber = B16Int <$> Atto.takeWhile1 (`Set.member` hexDigits)
  oneOf charSet = Atto.satisfy (`elem` charSet)
  peek = Atto.peekChar'
  string s = Atto.string s <?> show s
  takeAll = Atto.takeWhile
  takeAll1 = Atto.takeWhile1
 instance (MonadParser m, MonadTrans t, MonadDeps (t m)) => MonadParser (t m) where
  block = lift . block
  char = lift . char
  endOfInput = lift $ endOfInput
  decNumber = lift $ decNumber
  floatNumber = lift $ floatNumber
  hexNumber = lift $ hexNumber
  oneOf = lift . oneOf
  peek = lift $ peek
  string = lift . string
  takeAll = lift . takeAll
  takeAll1 = lift . takeAll1
 type Parser s = StateT s Atto.Parser
-(<?>) :: Parser s a -> String -> Parser s a
+(<?>) :: (Alternative m, MonadFail m) => m a -> String -> m a
 (<?>) parser debugMessage = parser <|> fail debugMessage
 block :: Int -> Parser s ByteString
 block = lift . Atto.take
 char :: Char -> Parser s Char
 char = lift . Atto.char
 choice :: [Parser s a] -> Parser s a
 choice = foldr (<|>) empty
 count :: Int -> Parser s a -> Parser s [a]
 count 0 _ = return []
 count n p = (:) <$> p <*> count (n-1) p
 decNumber :: Parser s ByteString
 decNumber = lift $ Atto.takeWhile1 (`Set.member` digits)
 digits :: Set Char
 digits = Set.fromList ['0'..'9']
@ -59,13 +81,7 @@ hexDigits = Set.unions [digits, Set.fromList af, Set.fromList $ toLower <$> af]
  where
    af = ['A'..'F']
-hexNumber :: Parser s ByteString
+octDigit :: MonadParser m => m Char
 hexNumber = lift $ Atto.takeWhile1 (`Set.member` hexDigits)
 many :: Parser s a -> Parser s [a]
 many parser = (:) <$> parser <*> many parser <|> return []
 octDigit :: Parser s Char
 octDigit = oneOf ['0'..'7']
 on :: Parser s a -> ByteString -> Parser s (Either String a)
@ -74,25 +90,8 @@ on (StateT parserF) input = StateT $ \state ->
    Left errorMsg -> return (Left errorMsg, state)
    Right (result, newState) -> return (Right result, newState)
-oneOf :: String -> Parser s Char
+runParser :: Parser s a -> s -> ByteString -> Either String (a, s)
-oneOf charSet = lift $ Atto.satisfy (`elem` charSet)
+runParser parser initState = Atto.parseOnly (runStateT parser initState)
-option :: a -> Parser s a -> Parser s a
+evalParser :: Parser s a -> s -> ByteString -> Either String a
-option defaultValue p = p <|> pure defaultValue
+evalParser parser initState = Atto.parseOnly (evalStateT parser initState)
 runParser :: Parser s a -> s -> ByteString -> Either String a
 runParser parser initState =
  Atto.parseOnly (evalStateT parser initState)
 sepBy :: Parser s a -> Parser s b -> Parser s [a]
 sepBy parser separator =
  option [] $ (:) <$> parser <*> many (separator *> parser)
 string :: ByteString -> Parser s ByteString
 string = lift . Atto.string
 takeAll :: (Char -> Bool) -> Parser s ByteString
 takeAll = lift . Atto.takeWhile
 takeAll1 :: (Char -> Bool) -> Parser s ByteString
 takeAll1 = lift . Atto.takeWhile1
--- a/src/PDF/Update.hs
+++ b/src/PDF/Update.hs
@ -1,47 +0,0 @@
 {-# LANGUAGE NamedFieldPuns #-}
 module PDF.Update (
    unify
  ) where
 import Data.Map (member)
 import qualified Data.Map as Map (empty, union)
 import PDF.Object (
      Content(..), IndexedObjects, IndirectObjCoordinates(..), Occurrence(..)
    , Structure(..)
  )
 emptyContent :: Content
 emptyContent = Content {
      docStructure = Structure {xRef = Map.empty, trailer = Map.empty}
    , objects = Map.empty
    , occurrences = []
  }
 unify :: [Content] -> Content
 unify = foldl complete emptyContent
  where
    complete tmpContent older =
      let mergedObjects = Map.union (objects tmpContent) (objects older) in
      Content {
          docStructure =
            unifyDocStructure (docStructure tmpContent) (docStructure older)
        , objects = mergedObjects
        , occurrences = 
            unifyOccurrences mergedObjects (occurrences tmpContent) (occurrences older)
      }
 unifyDocStructure :: Structure -> Structure -> Structure
 unifyDocStructure update original = Structure {
      xRef = Map.union (xRef update) (xRef original)
    , trailer = Map.union (trailer update) (trailer original)
  }
 unifyOccurrences :: IndexedObjects -> [Occurrence] -> [Occurrence] -> [Occurrence]
 unifyOccurrences objects update = foldr addOlder update
  where
    addOlder occurrence@(Comment _) existing = occurrence : existing
    addOlder occurrence@(Indirect indirect) existing =
      if objectId indirect `member` objects
      then occurrence : existing
      else existing
--- a/test/Main.hs
+++ b/test/Main.hs
@ -0,0 +1,9 @@
 {-# LANGUAGE OverloadedStrings #-}
 import Test.HUnit
 import Object (testNumber, testString)
 main :: IO ()
 main = runTestTT (TestList [
      testNumber
    , testString
  ]) *> return ()
--- a/test/Object.hs
+++ b/test/Object.hs
@ -0,0 +1,50 @@
 {-# LANGUAGE OverloadedStrings #-}
 module Object (
      testNumber
    , testString
  ) where
 import Data.ByteString.Char8 (pack)
 import Data.ByteString.Char8.Util (B16Int(..))
 import PDF.Object (Number(..), StringObject(..), name, number, stringObject)
 import PDF.Parser (MonadParser, Parser, evalParser)
 import Test.HUnit
 check :: (Eq a, Show a) => Parser () a -> (a, String) -> Test
 check parser (aim, input) =
  TestCase $ assertEqual message (Right aim) (parse input)
  where
    message = "parse: " ++ input
    parse = evalParser parser () . pack
 testName :: Test
 testName = TestLabel "Name" . TestList $ check name <$> [
  ]
 testNumber :: Test
 testNumber = TestLabel "Number" . TestList $ check number <$> [
      (Number 123, "123")
    , (Number 43445, "43445")
    , (Number 17, "+17")
    , (Number (-98), "-98")
    , (Number 0, "0")
    , (Number 34.5, "34.5")
    , (Number (-3.62), "-3.62")
    , (Number 123.6, "+123.6")
    , (Number 4, "4.")
    , (Number (-0.002), "-.002")
    , (Number 0, "0.0")
  ]
 testString :: Test
 testString = TestLabel "StringObject" . TestList $ check stringObject <$> [
      (Literal "This is a string", "(This is a string)")
    , (Literal "Strings may contain newlines\nand such .", "(Strings may contain newlines\nand such .)")
    , (Literal "Strings may contain balanced parentheses ( ) and\nspecial characters ( * ! & } ^ % and so on ) .", "(Strings may contain balanced parentheses ( ) and\nspecial characters ( * ! & } ^ % and so on ) .)")
    , (Literal "The following is an empty string .", "(The following is an empty string .)")
    , (Literal "", "()")
    , (Literal "It has zero ( 0 ) length .", "(It has zero ( 0 ) length .)")
    , (Hexadecimal $ B16Int "4E6F762073686D6F7A206B6120706F702E", "<4E6F762073686D6F7A206B6120706F702E>")
    , (Hexadecimal $ B16Int "901FA3", "<901FA3>")
    , (Hexadecimal $ B16Int "901FA0", "<901FA>")
  ]
Author	SHA1	Message	Date
Tissevert	6a7e9e9595	Expose a toList function similar to Data.Map in module Id for IdMap and allow mapping over keys and values in a monadic construct like mapM	2020-06-03 15:15:07 +02:00
Tissevert	d9f69014a0	Make a couple improvements in performance + add an example script to extract pages from a PDF	2020-05-28 18:54:15 +02:00
Tissevert	f6664683c7	Once again something that should never have been committed	2020-03-20 09:34:53 +01:00
Tissevert	c491e8a70c	Forgot to remove deprecated source file	2020-03-19 12:53:35 +01:00
Tissevert	09bd706748	Export Content operators, needed to write filters like reveal	2020-03-19 10:27:29 +01:00
Tissevert	729e312f90	Actually, the spec calls 'catalog' what we call 'origin' — use 'catalog' for more clarity in regard to the spec	2020-03-19 10:27:29 +01:00
Tissevert	6d265633e4	Export Instructions constructor from PDF.Content, used by reveal	2020-03-19 10:27:29 +01:00
Tissevert	1eb1c23053	Found a nicer way to handle the too long IndirectObjCoordinates for Object Navigation	2020-03-19 10:27:29 +01:00
Tissevert	44125f75a6	The orphan instance for MonadState s m => MonadReader s m really can't be used, so replace it with a mere function that runs an operation on a ReaderT into the monad State, allowing to borrow operations on MonadReader in a MonadState context	2020-03-19 10:27:29 +01:00
Tissevert	c8a5e2b191	Wait, CachedFonts are indexed by Id Object so it could be an IdMap actually	2020-03-19 10:27:29 +01:00
Tissevert	11640c8465	Replace 'cacheFonts' by more versatile 'withFonts' inspired by 'withResources' that avoid having to declare an inline function to capture the 'layer' argument and pass it twice	2020-03-19 10:27:29 +01:00
Tissevert	e94a09b3ec	Add a Traversable instance for IdMap, needed in reveal and useful in general to be able to use atAll	2020-03-19 10:27:29 +01:00
Tissevert	ba7dd6a690	Make cacheFonts slightly more useful by passing layer directly to it and run the ReaderT underneath	2020-03-19 10:27:29 +01:00
Tissevert	d21e14f9a4	Hey, zlib isn't needed anymore for getText since all decoding is done directly in the Box instance for Streams	2020-03-19 10:27:28 +01:00
Tissevert	a1c2fbf110	Add an alias to Id to lift type ambiguities like 'chunk' in PDF.Content.Text	2020-03-19 10:27:28 +01:00
Tissevert	24630a04a1	Implement 'w' for Pages Box instances	2020-03-19 10:27:28 +01:00
Tissevert	ee5e7500a8	Implement 'w' for Box m Chunks Content (Indexed Text)	2020-03-19 10:27:28 +01:00
Tissevert	d8aec5bf80	Add Box instance for IdMap a b, remove restriction on new keys in the Map instance since it's not really needed and could be better implemented like in OrderedMap by first using 'r'	2020-03-19 10:27:28 +01:00
Tissevert	25e2823c75	Generalize register to all IdMap a b, since it's gonna be needed by Indexed Text too	2020-03-19 10:27:28 +01:00
Tissevert	5027b079eb	Include page numbers in chunks label, needed for long documents with many pages	2020-03-19 10:27:28 +01:00
Tissevert	5722dd1a04	Use IntMap for all Maps on Ids	2020-03-19 10:27:28 +01:00
Tissevert	f31e9eb38b	Generalize Ids out of Content to handle Object Ids too	2020-03-19 10:27:21 +01:00
Tissevert	0f857c457d	Use a defined monadic stack in Pages to lift the MonadReader ambiguity and allow finishing to reimplement getText demo	2020-03-14 16:57:16 +01:00
Tissevert	40475a3093	Clean unneeded stuff separating the monadic type constraint from the actual monad stack used, one more step towrds MonadFail -> MonadError	2020-03-14 16:55:34 +01:00
Tissevert	a9d3e5d326	Clean unused dependencies from Map + use a more defined Monad for the Box Chunks instance, hoping we will be able to clear the whole stack someday and stop requiring that RoContext type, unboxing and reboxing the FontSet for no good	2020-03-14 16:27:56 +01:00
Tissevert	f2a99e1fd2	Reorder module PDF.Body in alphabetical order	2020-03-14 16:25:26 +01:00
Tissevert	5bf2b08fa9	Try replacing general monadic type constraint by a definite monad stack	2020-03-11 22:35:19 +01:00
Tissevert	5b8d951516	WIP: Try about everything that's possible to try, OrderedMap or [(,)], try to decouple Box instance for Content and the one for Indexed Text, breaks getText… will probably require some advanced effect library, there seems to be a weird MonadReader conflict in the errors messages	2020-03-11 18:55:18 +01:00
Tissevert	d3f1b97f3a	Replace the fake instance of Box for Content over Indexed Text with the true one using renderText	2020-03-11 18:53:41 +01:00
Tissevert	c4c3e35e09	Write said instance	2020-03-11 18:52:09 +01:00
Tissevert	10f8c711da	Implement set and mapi on OrderedMap for convenience and to write a Box instance over OrderedMap like the one over Map	2020-03-11 18:51:49 +01:00
Tissevert	b6c1f670ef	Generalize the search for FlateDecode (there can be several filters in an array)	2020-03-11 10:47:52 +01:00
Tissevert	3b1a5152e4	Try connecting all the Box instance in the getText demo, try to encode pages contents with a simple assoc list	2020-03-10 22:57:11 +01:00
Tissevert	a04adff1d2	Prepare real instance of Box using renderText	2020-03-10 22:55:16 +01:00
Tissevert	103037ffb2	Fix mistake in arity of operator "	2020-03-10 22:53:27 +01:00
Tissevert	dce10ae63a	Keep Page as only a reference object keeping the ObjectId explicit so we can modify the actual objects one day, write an OrderedMap data structure to help	2020-03-08 22:18:47 +01:00
Tissevert	f2986da96d	Simplify Content abstracting over MonadParser for no reason and provide instead an parse that's in MonadFail to avoid having to handle Either outside	2020-03-08 22:16:23 +01:00
Tissevert	673321bf0a	Implement encoder for good	2020-03-08 22:14:36 +01:00
Tissevert	0ade9cc2f5	Implement proper text formatting into PDF instructions using the new encode feature available in Fonts	2020-03-08 00:04:18 +01:00
Tissevert	457f1755e6	Prepare storing the reverse mapping for CMaps, divided by length to be able to implement encoding with a reasonable complexity	2020-03-08 00:02:24 +01:00
Tissevert	ca40d2df76	Don't use (!?) operator that doesn't exist before containers 0.5.9 for maximum compatibility	2020-03-08 00:00:24 +01:00
Tissevert	44bc898ed3	Generalize the Indexed type to handle both arbitrary Content instructions and text-related ones that can be viewed as text chunks	2020-03-06 19:21:16 +01:00
Tissevert	1ec47c5d07	Update Font type to cover both encoding and decoding — WIP for CMap, but complete though not tested yet for MacRoman encoding	2020-03-06 19:19:53 +01:00
Tissevert	6e245189fd	Add a simple Box instance that exposes IndexedInstructions within a Content	2020-03-05 17:44:38 +01:00
Tissevert	90348c57d6	Disable text rendering and font loading from the Page abstraction, this code will have to be moved into a separate Box instance	2020-03-05 17:40:58 +01:00
Tissevert	50ac0692b2	Implement r for access by PageNumber and clean the mess a bit	2020-03-05 10:09:09 +01:00
Tissevert	2b9abc24b6	Add a separate instance for Raw streams that don't try to decode them	2020-03-04 18:31:30 +01:00
Tissevert	309f6ed461	Actually re-implement getText with the simpler Box instance	2020-03-04 18:19:10 +01:00
Tissevert	93c9863426	Remove accidentally commited trailing space on a line	2020-03-04 18:14:54 +01:00
Tissevert	7cef65d799	Fixed vicious bug introduced by `6096a1a237` (since follow is now automatic for references, it's not called explicitely but should in case of 'several' Content, which is an array of references, each of which should be expended) — TODO: add a unit test for that	2020-03-04 18:14:33 +01:00
Tissevert	d288ecf0ac	Start reimplementing getAll as a Box instance and try to separate the various monad run steps	2020-03-03 18:17:44 +01:00
Tissevert	3b3eeef218	Maybe we need a MonadState s m => MonadReader s m instance some day ?	2020-03-03 18:16:49 +01:00
Tissevert	2c02e44adf	Export the PDFContent monadic type used in PDF.Pages	2020-03-03 18:16:12 +01:00
Tissevert	9ce1a48030	Optimistically prepare the instance declaration for Pages that should replace get / getAll, not really getting out of the Monad	2020-02-28 18:15:40 +01:00
Tissevert	4969c6442e	Simple String aliasing to prepare the day when we'll be able to have more complex Component than just PDF Names (and access elements in an array)	2020-02-28 18:14:27 +01:00
Tissevert	cb257fc07e	Rename function for clarity : actually it's doing just what w StreamContent does, but without checking the headers to re-zlib-encode the stream content	2020-02-27 17:30:42 +01:00
Tissevert	d90eaf6f1c	Add Box instances to allow handling some exceptions in monad and converting them to Traversable accessible from the data part of the type	2020-02-27 17:22:12 +01:00
Tissevert	99014ff30d	Recognize openStream was just an implementation of r for the Box m () Object ByteString, and extend it implementing the w operation while we're at it	2020-02-26 22:13:29 +01:00
Tissevert	f4df4aab22	Found a nicer formulation that doesn't require transitivity or index agglomeration and swapped argument of w for more reusability with at / atAll	2020-02-26 17:19:22 +01:00
Tissevert	bdbc5f7351	Generalize the Box instances on containers from the particular cases of Document/Layers and Layers/Objects and move them to PDF.Box	2020-02-25 17:36:54 +01:00
Tissevert	30fece6537	Notice the 'edit' I exported earlier could be reused to simplify the w implementation of the proof that Box is a transitive relation	2020-02-25 09:27:56 +01:00
Tissevert	1a70f2972b	Expose Box index flags in PDF and PDF.Layer	2020-02-24 21:37:09 +01:00
Tissevert	67faa06ea2	Lift unused restriction on MonadFail for AllObjects instance of Box Layer	2020-02-24 21:36:31 +01:00
Tissevert	83a63d4b02	Implement Box instance from Layer to Object, either all at once or indexed by an ObjectId	2020-02-24 17:29:22 +01:00
Tissevert	85ee8519c4	Implement Box instances from Document to Layers and EOLStyle	2020-02-24 17:28:17 +01:00
Tissevert	e607f9cd37	Implement transitivity instance, extract a part of modifyAt as a convenient 'edit' function useful elsewhere and present a right-infix version of (,) to allow writing the nested tuple indexes more conveniently	2020-02-24 17:27:37 +01:00
Tissevert	a9252b129a	Start a Box module to describe inclusion relations between different types and get a MonadState action on the top type for any modification down there	2020-02-23 22:24:59 +01:00
Tissevert	71e62ee732	Add IDs to Instructions so that they can be selected in a given Content (and modified one day…)	2020-02-23 22:21:09 +01:00
Tissevert	160999a7d7	A small renaming for more clarity and because I thought «update» could be needed for a function name but after all maybe not but it's still better that way	2020-02-23 22:17:09 +01:00
Tissevert	36b1782464	Follow previous renaming for a local variable in Navigation for more clarity	2020-02-23 22:15:52 +01:00
Tissevert	bcf2e05bfb	Move Content out of Object module into a separate one incorporating PDF.Update (which is actually an operation that is defined only on that structure), and rename it Layer to avoid confusion with Content streams as defined in the specs (which have their own PDF.Content module already)	2020-02-17 15:29:59 +01:00
Tissevert	6096a1a237	Simplify navigations by centering everything on Objects to avoid needing to many conversion tools between DirectObject / Object / Dictionary	2020-02-15 13:51:24 +01:00
Tissevert	23186100a8	Reimplement getObj with the newest tools in PDF.Object.Navigation, in particular implement browsing by paths or random objectId access	2020-02-15 10:25:09 +01:00
Tissevert	b916ab5206	Just noticed Streams are a kind of Dictionary too, since they have a header	2020-02-15 10:23:32 +01:00
Tissevert	4a6dbda7d3	Move Error type from Pages to Navigation as a candidate for MonadFail required by PDFContent defined there	2020-02-15 10:22:42 +01:00
Tissevert	923d1800b0	Gain a bit of speed by using native Attoparsec for number types instead of reimplementing them with ByteString conversion and call to read	2020-02-14 18:02:40 +01:00
Tissevert	1c457d71d8	Fix the reading of Hexadecimal string objects detected by running the tests implemented from the spec	2020-02-14 18:00:12 +01:00
Tissevert	a72d76e229	Add unit tests to make sure I'm not breaking things too much	2020-02-14 17:58:03 +01:00
Tissevert	919f640443	Merge branch 'extract-text' into navigation	2020-02-12 17:35:56 +01:00
Tissevert	ae938acc02	Merge branch 'main' into extract-text	2020-02-12 17:34:56 +01:00
Tissevert	32f9866106	Use peek to improve directObject parser avoiding a large <\|> disjunction	2020-02-12 17:34:27 +01:00
Tissevert	eb4d76002c	Finish the split of Navigation out of Page, generalize the use of MonadFail with a custom Error monad (~= Either String)	2020-02-11 22:41:46 +01:00
Tissevert	af994cb50c	WIP: in the process of migrating to Object.Navigation in Pages, still unsure how to manage simple Content parsing and efficient font loading (+ giving a way to edit Contents)	2020-02-11 17:59:15 +01:00
Tissevert	704d7a7fcf	It turns out Output.concat wasn't necessary, OBuilder seems already is a Monoid so mconcat works (that fact was used in the very implementation of concat…)	2020-02-11 17:36:29 +01:00
Tissevert	11647eb4eb	Implement output for Content streams	2020-02-11 17:26:47 +01:00
Tissevert	aed7af376a	WIP: still trying to figure things out, moved to a separate submodule for Navigation, proper naming is hell	2020-02-11 08:29:08 +01:00
Tissevert	e77bbbcda9	WIP: start moving some navigation-related routines from Pages into Object directly and generalize them to multi-component to allow easier browsing	2020-02-10 17:43:04 +01:00
Tissevert	195446e653	Allow resources with no /Font field, they won't cause any problem as long as no call to Tf (to load a font) is made	2020-02-10 17:41:44 +01:00
Tissevert	9f1b1afafe	Implement Text rendering from parsed Content	2020-02-10 10:54:44 +01:00
Tissevert	20466c4f13	WIP: Clean code parsing «pages» (now Content), separated from text rendering (will be reimplemented as an upper layer, also providing modification as stream filters) — Page is also forgotten for now, will need a big improvement in Object navigation	2020-02-09 22:42:57 +01:00
Tissevert	325250383a	Add support for fonts and implement MacRomanEncoding	2020-02-08 08:15:32 +01:00
Tissevert	c48ab22808	Forgot some useless parentheses when playing with operator precedences	2020-02-04 17:05:15 +01:00
Tissevert	a2b66ac6d6	Generalize the getFont function because some /Resources have a direct dictionary as value for their /Font property	2020-02-04 17:04:42 +01:00
Tissevert	cefb08ee50	Going a step further in «optimization» (slowing it even more…) by replacing choice by a search in a Map	2019-11-30 21:46:22 +01:00
Tissevert	afbbcbffc5	Finish implementing the new stack-based call parser	2019-11-30 12:39:40 +01:00
Tissevert	8373bd1ea0	Removing +x permission on getText source that shouldn't ever have been set	2019-11-29 19:07:54 +01:00
Tissevert	bac08446dd	WIP: starting to fix this criminally inefficient parser for PDF's postfix-operator instructions	2019-11-29 17:42:57 +01:00
Tissevert	f9f799c59b	Take the dirty code of «getText» and turn it into a relatively clean module exposing pages, that can be retrieved all at once or by page number (numbered human-style, starting from 1)	2019-11-29 11:51:35 +01:00
Tissevert	08a9717b3a	Get rid of wrapper PageContents structure returned by PageContent in the PDF.Text module (and return directly [ByteString] instead)	2019-11-29 11:48:28 +01:00
Tissevert	42a02808c1	Merge branch 'main' into extract-text	2019-11-27 18:05:47 +01:00
Tissevert	c9f050e64b	Remove deprecated debug script and forgotten comments to bypass the selective export of Text module	2019-10-14 10:17:15 +02:00
Tissevert	3a3e1533b4	Clean ByteString types to identify when a ByteString contains the representation of an integer in a given base and fix the last remaining PDF string (un)escaping issue	2019-10-14 10:17:15 +02:00
Tissevert	a96e36ec5a	Fix error silently discarding code ranges, make sure ByteString intervals are created with the correct byte length and decode utf16BE encoded values in single-value ranges	2019-10-14 10:17:15 +02:00
Tissevert	d07c286f8e	Clean exported ByteString custom functions	2019-10-14 10:17:15 +02:00
Tissevert	7a15113285	Try and re-implement string decoding — compiles but now fails to decode any string	2019-10-14 10:17:15 +02:00
Tissevert	36d7f9b819	Still debugging, broke pretty much everything and finally implementing a proper coderange parsing for CMap because apparently that's necessary	2019-10-14 10:17:15 +02:00
Tissevert	b8ca7281aa	Fix parsing errors forgetting to make sure there's a space after special operator arguments like names and stringObjects	2019-10-14 10:17:15 +02:00
Tissevert	32efdcdd6b	Try and fix stuff by generalizing a signature to ease debugging and add parenthesis which I think should have been here all along	2019-10-14 10:17:15 +02:00
Tissevert	3b59fd0c61	Separate CMap and Text in two distinct modules	2019-10-14 10:17:15 +02:00
Tissevert	0374b72920	Finish implementing reading, still bugs to investigate	2019-10-14 10:17:15 +02:00
Tissevert	1dd22c3889	Going to try with Text, naturally handling UTF-16 but will still have to parse «int codes» manually from strings	2019-10-14 10:17:15 +02:00
Tissevert	98d029c4d4	In complete debug, more or less implemented CMap parsing but apparently it uses UTF16 ?!	2019-10-14 10:17:15 +02:00
Tissevert	c349d9b4c2	Don't trust serializer, they have nothing todo with a reasonable binary encoding	2019-10-14 10:17:15 +02:00
Tissevert	e7484ef536	Completely lost, the same old Char8 / Word8 again, implemented all the text reading, still needing a couple details to parse CMaps	2019-10-14 10:17:15 +02:00
Tissevert	f9e5683bf4	WIP: Use previous changes to start implementing font caching and text parsing (still very broken, doesn't compile)	2019-10-14 10:17:15 +02:00
Tissevert	b8eb9e6856	Generalize the Parser type into a MonadParser class to use with MonadTrans and remove redundant code already defined in Applicative or Attoparsec	2019-10-14 10:17:15 +02:00
Tissevert	66d315b7fe	Reflect the distinction between eval and run from State monad into the Parser module	2019-10-14 10:17:15 +02:00
Tissevert	51db57ec67	Ugly commit, breaks everything, still trying to figure a grammar for text	2019-10-14 10:17:15 +02:00
Tissevert	6f3c159ea7	Adding a module to implement text reading and a demo program to go with it	2019-10-14 10:17:15 +02:00