Expose a toList function similar to Data.Map in module Id for IdMap and allow mapping over keys and values in a monadic construct like mapM

Make a couple improvements in performance + add an example script to extract pages from a PDF
Once again something that should never have been committed
2020-06-03 15:15:07 +02:00 · 2020-05-28 18:54:15 +02:00 · 2020-03-20 09:34:53 +01:00 · 2020-03-19 12:53:35 +01:00 · 2020-03-19 10:27:29 +01:00 · 2020-03-19 10:27:29 +01:00
34 changed files with 2224 additions and 334 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +1,3 @@
 dist*/
 .ghc.environment.*
+cabal.project.local
--- a/Hufflepdf.cabal
+++ b/Hufflepdf.cabal
@ -16,20 +16,40 @@ extra-source-files:  ChangeLog.md
 cabal-version:       >=1.10

 library
-  exposed-modules:     PDF
+  exposed-modules:     Data.OrderedMap
+                     , Data.Id
+                     , PDF
+                     , PDF.Box
+                     , PDF.CMap
+                     , PDF.Content
+                     , PDF.Content.Operator
+                     , PDF.Content.Operator.Color
+                     , PDF.Content.Operator.Common
+                     , PDF.Content.Operator.GraphicState
+                     , PDF.Content.Operator.Path
+                     , PDF.Content.Operator.Text
+                     , PDF.Content.Text
                     , PDF.EOL
+                     , PDF.Layer
                     , PDF.Object
+                     , PDF.Object.Navigation
                     , PDF.Output
-                     , PDF.Update
+                     , PDF.Parser
+                     , PDF.Pages
  other-modules:       Data.ByteString.Char8.Util
                     , PDF.Body
-                     , PDF.Parser
+                     , PDF.Encoding
+                     , PDF.Encoding.MacRoman
+                     , PDF.Font
  -- other-extensions:    
  build-depends:       attoparsec
                     , base >=4.9 && <4.13
                     , bytestring
                     , containers
                     , mtl
+                     , text
+                     , utf8-string
+                     , zlib
  hs-source-dirs:      src
  ghc-options:         -Wall
  default-language:    Haskell2010
@ -39,7 +59,7 @@ executable             equivalent
  build-depends:       base
                     , bytestring
                     , Hufflepdf
-  ghc-options:         -Wall
+  ghc-options:         -Wall -rtsopts
  default-language:    Haskell2010

 executable             getObj
@ -48,6 +68,63 @@ executable             getObj
                     , bytestring
                     , containers
                     , Hufflepdf
-                     , zlib
-  ghc-options:         -Wall
+                     , mtl
+  ghc-options:         -Wall -rtsopts
+  default-language:    Haskell2010
+
+executable             getText
+  main-is:             examples/getText.hs
+  build-depends:       base
+                     , bytestring
+                     , containers
+                     , ExceptIOH
+                     , Hufflepdf
+                     , mtl
+                     , text
+  ghc-options:         -Wall -rtsopts
+  default-language:    Haskell2010
+
+executable             pdfCut
+  main-is:             examples/pdfCut.hs
+  build-depends:       base
+                     , bytestring
+                     , containers
+                     , ExceptIOH
+                     , filepath
+                     , Hufflepdf
+                     , mtl
+                     , text
+  ghc-options:         -Wall -rtsopts
+  default-language:    Haskell2010
+
+executable             fixMermoz
+  main-is:             examples/fixMermoz.hs
+  build-depends:       base
+                     , bytestring
+                     , ExceptIOH
+                     , Hufflepdf
+                     , mtl
+  ghc-options:         -Wall -rtsopts
+  default-language:    Haskell2010
+
+Test-Suite unitTests
+  type:               exitcode-stdio-1.0
+  main-is:            Main.hs
+  other-modules:      Object
+                    , Data.ByteString.Char8.Util
+                    , Data.Id
+                    , PDF.EOL
+                    , PDF.Parser
+                    , PDF.Object
+                    , PDF.Output
+  hs-source-dirs:     test
+                    , src
+  build-depends:      attoparsec
+                    , base >=4.9 && <4.13
+                    , bytestring
+                    , containers
+                    , Hufflepdf
+                    , HUnit
+                    , mtl
+                    , text
  default-language:    Haskell2010
--- a/examples/getContent.hs
+++ b/examples/getContent.hs
@ -0,0 +1,11 @@
+main :: IO ()
+main = do
+  hSetBuffering stdout LineBuffering
+  args <- getArgs
+  case args of
+    --[inputFile] -> wholeDoc inputFile
+    [inputFile, pageNumber] -> do
+      content <- fmap (unify . updates) . parseDocument <$> BS.readFile inputFile
+      get content (read pageNumber)
+      singlePage inputFile (read pageNumber)
+    _ -> die "Syntax: getContent INPUT_FILE PAGE_NUMBER"
--- a/examples/getObj.hs
+++ b/examples/getObj.hs
@ -1,53 +1,52 @@
 {-# LANGUAGE NamedFieldPuns #-}
+{-# LANGUAGE FlexibleContexts #-}

-import Codec.Compression.Zlib (decompress)
+import Control.Monad.Reader (ReaderT, runReaderT)
 import Data.ByteString.Lazy.Char8 (ByteString)
 import qualified Data.ByteString.Char8 as BS (readFile)
-import qualified Data.ByteString.Lazy.Char8 as Lazy (fromStrict, putStr, toStrict)
-import Data.Map ((!?))
-import qualified Data.Map as Map (keys, lookup)
+import qualified Data.ByteString.Lazy.Char8 as Lazy (putStrLn)
+import Data.Id (Id(..))
 import PDF (Document(..), parseDocument)
-import qualified PDF.EOL as EOL (Style)
-import PDF.Object (Content(..), DirectObject(..), Object(..), Name(..))
-import PDF.Output (ObjectId(..))
+import PDF.Box (Box(..))
+import PDF.Layer (Layer(..), unify)
+import PDF.Object (Object(..))
+import PDF.Object.Navigation (
+    Nav(..), PPath(..), StreamContent(..), (//), objectById, catalog
+  )
+import PDF.Output (Output)
 import qualified PDF.Output as Output (render)
-import PDF.Update (unify)
+import Prelude hiding (fail)
 import System.Environment (getArgs, getProgName)
 import System.Exit (die)
 import Text.Printf (printf)
+import Text.Read (readMaybe)

-display :: EOL.Style -> Object -> ByteString
-display eolStyle d@(Direct _) = Output.render eolStyle d
-display eolStyle s@(Stream {header, streamContent}) = Output.render eolStyle $
-  case Map.lookup (Name "Filter") header of
-    Just (NameObject (Name "FlateDecode")) -> Stream {
-          header
-        , streamContent = Lazy.toStrict . decompress $ Lazy.fromStrict streamContent
-      }
-    _ -> s
+decodedStream :: Object -> Object
+decodedStream object =
+  either (const object) id $ r Clear object >>= flip (w Raw) object

-extractObject :: ObjectId -> Document -> Either String ByteString
-extractObject objectId (Document {eolStyle, updates}) =
-  case objects content !? objectId of
-    Nothing -> Left $ "No object has ID " ++ show (getObjectId objectId)
-    Just o -> Right $ display eolStyle o
+display :: Functor m => Output a => ReaderT Layer m a -> Document -> m ByteString
+display getter (Document {eolStyle, layers}) =
+  Output.render eolStyle <$> runReaderT getter (unify layers)
+
+parse :: [String] -> IO (FilePath, Document -> Either String ByteString)
+parse [inputFile] = return (inputFile, display $ value <$> catalog)
+parse [inputFile, key] =
+  return (inputFile, clear . maybe (byPath key) byId $ readMaybe key)
  where
-    content = unify updates
-
-listObjectIds :: Document -> Either String [String]
-listObjectIds =
-  Right . prependTitle . toString . Map.keys . objects . unify . updates
-  where
-    toString = fmap (show . getObjectId)
-    prependTitle = ("ObjectIds defined in this PDF:":)
+    byId = objectById . Id
+    byPath path = (catalog // PPath (explode path))
+    explode "" = []
+    explode path =
+      case break (== '.') path of
+        (name, "") -> [name]
+        (name, rest) -> name : explode (drop 1 rest)
+    clear = display . fmap (decodedStream . value)
+parse _ =
+  die . printf "Syntax: %s inputFile [OBJECT_ID | PATH_TO_OBJ]\n" =<< getProgName

 main :: IO ()
 main = do
  (inputFile, getData) <- parse =<< getArgs
  input <- BS.readFile inputFile
-  either die id $ (parseDocument input >>= getData)
-  where
-    parse [inputFile] = return (inputFile, fmap (mapM_ putStrLn) . listObjectIds)
-    parse [inputFile, objectId] = return
-      (inputFile, fmap Lazy.putStr . extractObject (ObjectId (read objectId)))
-    parse _ = die . printf "Syntax: %s inputFile [OBJECT_ID]\n" =<< getProgName
+  either die Lazy.putStrLn $ (parseDocument input >>= getData)
--- a/examples/getText.hs
+++ b/examples/getText.hs
@ -0,0 +1,54 @@
+{-# LANGUAGE FlexibleContexts #-}
+import Control.Monad ((>=>))
+import Control.Monad.Except (ExceptT(..))
+import Control.Monad.Except.IOH (handle)
+import Control.Monad.IO.Class (liftIO)
+import qualified Data.ByteString.Char8 as BS (readFile)
+import Data.Id (Id(..), mapWithKey)
+import qualified Data.Map as Map (mapWithKey)
+import Data.OrderedMap (mapi)
+import qualified Data.Text as Text (unpack)
+import PDF (UnifiedLayers(..), parseDocument)
+import PDF.Box (Box(..))
+import PDF.Content.Text (Chunks(..))
+import PDF.Layer (Layer, LayerReader)
+import PDF.Pages (
+      Contents(..), FontCache, Page(..), PageNumber(..), Pages(..), withFonts
+    , withResources
+  )
+import System.Environment (getArgs)
+import System.Exit (die)
+import System.IO (BufferMode(..), hSetBuffering, stdout)
+import Text.Printf (printf)
+
+displayPage :: Int -> Page -> FontCache (LayerReader (ExceptT String IO)) ()
+displayPage n = withResources (
+      r Contents
+    >=> sequence_ . mapi (\objectId ->
+            r Chunks >=> sequence_ . mapWithKey (display objectId)
+          )
+  )
+  where
+    display a b v =
+      liftIO . putStrLn $
+        printf "p#%d obj#%d instr#%d: %s" n (getId a) (getId b) (Text.unpack v)
+
+getAll :: Layer -> ExceptT String IO ()
+getAll = withFonts $ r Pages >=> sequence_ . Map.mapWithKey displayPage
+
+get :: Int -> Layer -> ExceptT String IO ()
+get n = withFonts $ r (P n) >=> displayPage n
+
+onDoc :: FilePath -> (Layer -> ExceptT String IO ()) -> ExceptT String IO ()
+onDoc inputFile f =
+  ExceptT (parseDocument <$> BS.readFile inputFile) >>= r UnifiedLayers >>= f
+
+main :: IO ()
+main = do
+  hSetBuffering stdout LineBuffering
+  args <- getArgs
+  case args of
+    [inputFile] -> onDoc inputFile getAll `handle` die
+    [inputFile, pageNumber] ->
+      onDoc inputFile (get $ read pageNumber) `handle` die
+    _ -> die "Syntax: getText INPUT_FILE [PAGE_NUMBER]"
--- a/examples/pdfCut.hs
+++ b/examples/pdfCut.hs
@ -0,0 +1,41 @@
+import Control.Monad.State (MonadState(..), evalStateT)
+import Control.Monad.Except.IOH (handle)
+import Control.Monad.Trans (lift)
+import qualified Data.ByteString.Char8 as BS (readFile)
+import qualified Data.ByteString.Lazy.Char8 as Lazy (writeFile)
+import Data.Map ((!), fromSet)
+import qualified Data.Set as Set (fromList)
+import PDF (Document, UnifiedLayers(..), parseDocument, render)
+import PDF.Box (at)
+import PDF.Pages (Pages(..))
+import System.Environment (getArgs)
+import System.Exit (die)
+import System.FilePath (replaceBaseName, takeBaseName)
+import Text.Read (readEither)
+
+parseRange :: String -> Either String (Int, Int)
+parseRange = evalStateT $ do
+  from <- lift . readEither =<< state (break (== '-'))
+  get >>= makeRange from
+  where
+    makeRange from "" = return (from, from)
+    makeRange from (_:to) = (,) from <$> lift (readEither to)
+
+cut :: (Int, Int) -> Document -> IO Document
+cut (p1, p2) doc = ((
+      at UnifiedLayers .at Pages $ \pages ->
+      return $ fromSet (pages!) $ Set.fromList [pMin .. pMax]
+    ) doc) `handle` die
+  where
+    (pMin, pMax) = (min p1 p2, max p1 p2)
+
+main :: IO ()
+main = do
+  [inputPath, inputRange] <- getArgs
+  range <- catchLeft $ parseRange inputRange
+  doc <- catchLeft =<< parseDocument <$> BS.readFile inputPath
+  Lazy.writeFile (outputPath inputPath inputRange) . render =<< cut range doc
+  where
+    catchLeft = either die return
+    outputPath fileName range =
+      replaceBaseName fileName (takeBaseName fileName ++ "_p" ++ range)
--- a/src/Data/ByteString/Char8/Util.hs
+++ b/src/Data/ByteString/Char8/Util.hs
@ -1,16 +1,91 @@
 module Data.ByteString.Char8.Util (
-      previous
+      B16Int(..)
+    , B256Int(..)
+    , b8ToInt
+    , b16ToBytes
+    , b16ToInt
+    , b256ToInt
+    , intToB256
+    , previous
    , subBS
+    , toBytes
+    , unescape
+    , utf16BEToutf8
  ) where

-import Data.ByteString.Char8 (ByteString)
-import qualified Data.ByteString.Char8 as BS (drop, index, take)
+import Data.ByteString (ByteString, snoc)
+import qualified Data.ByteString as BS (empty, foldl', length, pack, singleton, splitAt)
+import qualified Data.ByteString.Char8 as Char8 (
+    cons, drop, index, splitAt, take, uncons, unpack
+  )
+import Data.Text (Text)
+import Data.Text.Encoding (decodeUtf16BE)
 import Prelude hiding (length)
+import Text.Printf (printf)
+
+newtype B8Int = B8Int ByteString deriving (Eq, Show)
+newtype B16Int = B16Int ByteString deriving (Eq, Show)
+newtype B256Int = B256Int ByteString deriving (Eq, Show)

 previous :: Char -> Int -> ByteString -> Int
 previous char position byteString
-  | BS.index byteString position == char = position
+  | Char8.index byteString position == char = position
  | otherwise = previous char (position - 1) byteString

 subBS :: Int -> Int -> ByteString -> ByteString
-subBS offset length = BS.take length . BS.drop offset
+subBS offset length = Char8.take length . Char8.drop offset
+
+intToB256 :: Int -> B256Int
+intToB256 n
+  | n < 0x100 = B256Int . BS.singleton $ toEnum n
+  | otherwise =
+    let B256Int begining = intToB256 (n `div` 0x100) in
+    B256Int $ begining `snoc` (toEnum (n `mod` 0x100))
+
+b256ToInt :: B256Int -> Int
+b256ToInt (B256Int n) = BS.foldl' (\k w -> 0x100*k + fromEnum w) 0 n
+
+toBytes :: Int -> Int -> ByteString
+toBytes 0 _ = BS.empty
+toBytes size n =
+  (toBytes (size - 1) (n `div` 0x100)) `snoc` (toEnum (n `mod` 0x100))
+
+b16ToBytes :: B16Int -> ByteString
+b16ToBytes (B16Int n) = BS.pack . fmap b16ToInt $ pairDigits n
+  where
+    pairDigits s =
+      case BS.length s of
+        0 -> []
+        1 -> [B16Int s]
+        _ ->
+          let (twoHexDigits, rest) = BS.splitAt 2 s in
+          (B16Int $ twoHexDigits):(pairDigits rest)
+
+fromBase :: (Num a, Read a) => Char -> ByteString -> a
+fromBase b = read . printf "0%c%s" b . Char8.unpack
+
+b16ToInt :: (Num a, Read a) => B16Int -> a
+b16ToInt (B16Int n) = fromBase 'x' n
+
+b8ToInt :: (Num a, Read a) => B8Int -> a
+b8ToInt (B8Int n) = fromBase 'o' n
+
+unescape :: ByteString -> ByteString
+unescape escapedBS =
+  case Char8.uncons escapedBS of
+    Nothing -> BS.empty
+    Just ('\\', s) -> unescapeChar s
+    Just (c, s) -> Char8.cons c (unescape s)
+  where
+    unescapeChar s =
+      case Char8.uncons s of
+        Nothing -> BS.empty
+        Just (c, s')
+          | c `elem` "()" -> Char8.cons c (unescape s')
+          | c `elem` "nrtbf" -> Char8.cons (read (printf "'\\%c'" c)) (unescape s')
+          | c `elem` ['0'..'7'] -> fromOctal (Char8.splitAt 3 s)
+          | otherwise -> Char8.cons c (unescape s')
+    fromOctal (code, s) = Char8.cons (toEnum $ b8ToInt (B8Int code)) (unescape s)
+
+utf16BEToutf8 :: ByteString -> Text
+utf16BEToutf8 = decodeUtf16BE
--- a/src/Data/Id.hs
+++ b/src/Data/Id.hs
@ -0,0 +1,95 @@
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE DeriveTraversable #-}
+{-# LANGUAGE FlexibleContexts #-}
+module Data.Id (
+      Id(..)
+    , IdMap
+    , Indexed
+    , at
+    , delete
+    , empty
+    , filterWithKey
+    , fromList
+    , insert
+    , keysSet
+    , lookup
+    , mapWithKey
+    , member
+    , minViewWithKey
+    , register
+    , singleton
+    , size
+    , toList
+    , union
+  ) where
+
+import Control.Monad.State.Strict (MonadState, modify, gets)
+import Data.IntMap (IntMap, (!))
+import qualified Data.IntMap as IntMap (
+      delete, empty, filterWithKey, fromList, keysSet, insert, lookup, mapWithKey
+    , maxViewWithKey, member, minViewWithKey, size, toList, union
+  )
+import Data.IntSet (IntSet)
+import Prelude hiding (lookup)
+
+newtype Id a = Id {
+    getId :: Int
+  } deriving (Eq, Enum, Ord, Show)
+newtype IdMap a b = IdMap {
+    intMap :: IntMap b
+  } deriving (Show, Functor, Semigroup, Monoid, Foldable, Traversable)
+type Indexed a = IdMap a a
+
+at :: IdMap a b -> Id a -> b
+at (IdMap idMap) = (idMap !) . getId
+
+lookup :: Id a -> IdMap a b -> Maybe b
+lookup (Id a) (IdMap idMap) = IntMap.lookup a idMap
+
+size :: IdMap a b -> Int
+size = IntMap.size . intMap
+
+member :: Id a -> IdMap a b -> Bool
+member (Id a) (IdMap idMap) = IntMap.member a idMap
+
+empty :: IdMap a b
+empty = IdMap {intMap = IntMap.empty}
+
+singleton :: Id a -> b -> IdMap a b
+singleton a b = fromList [(a, b)]
+
+insert :: Id a -> b -> IdMap a b -> IdMap a b
+insert (Id a) b (IdMap idMap) = IdMap {intMap = IntMap.insert a b idMap}
+
+delete :: Id a -> IdMap a b -> IdMap a b
+delete (Id a) (IdMap idMap) = IdMap {intMap = IntMap.delete a idMap}
+
+minViewWithKey :: IdMap a b -> Maybe ((Id a, b), IdMap a b)
+minViewWithKey = fmap wrap . IntMap.minViewWithKey . intMap
+  where
+    wrap ((key, b), idMap) = ((Id key, b), IdMap idMap)
+
+union :: IdMap a b -> IdMap a b -> IdMap a b
+union (IdMap intMap1) (IdMap intMap2) =
+  IdMap {intMap = IntMap.union intMap1 intMap2}
+
+mapWithKey :: (Id a -> b -> c) -> IdMap a b -> IdMap a c
+mapWithKey f (IdMap idMap) = IdMap {intMap = IntMap.mapWithKey (f . Id) idMap}
+
+filterWithKey :: (Id a -> b -> Bool) -> IdMap a b -> IdMap a b
+filterWithKey f = IdMap . IntMap.filterWithKey (f . Id) . intMap
+
+fromList :: [(Id a, b)] -> IdMap a b
+fromList = IdMap . IntMap.fromList . fmap (\(key, b) -> (getId key, b))
+
+toList :: IdMap a b -> [(Id a, b)]
+toList = fmap (\(key, b) -> (Id key, b)) . IntMap.toList . intMap
+
+keysSet :: IdMap a b -> IntSet
+keysSet = IntMap.keysSet . intMap
+
+register :: MonadState (IdMap a b) m => b -> m (Id a)
+register b = do
+  newId <- gets (Id . maybe 0 ((+1) . fst . fst) . IntMap.maxViewWithKey . intMap)
+  modify (insert newId b)
+  return newId
--- a/src/Data/OrderedMap.hs
+++ b/src/Data/OrderedMap.hs
@ -0,0 +1,79 @@
+{-# LANGUAGE NamedFieldPuns #-}
+module Data.OrderedMap (
+      OrderedMap
+    , build
+    , elems
+    , fromList
+    , get
+    , keys
+    , lookup
+    , mapi
+    , set
+    , toList
+  ) where
+
+import Data.Map (Map, (!), mapWithKey)
+import qualified Data.Map as Map (fromList, insert, lookup, member)
+import Prelude hiding (lookup)
+
+data OrderedMap k a = OrderedMap {
+      assoc :: Map k a
+    , keys :: [k]
+  } 
+
+instance (Ord k, Show k, Show a) => Show (OrderedMap k a) where
+  show = show . toList
+
+instance Functor (OrderedMap k) where
+  fmap f orderedMap = orderedMap {assoc = fmap f (assoc orderedMap)}
+
+instance Ord k => Foldable (OrderedMap k) where
+  foldMap f (OrderedMap {assoc, keys}) = foldMap f $ (assoc !) <$> keys
+
+instance Ord k => Traversable (OrderedMap k) where
+  sequenceA (OrderedMap {assoc, keys}) =
+    (flip OrderedMap keys) <$> sequenceA assoc 
+
+elems :: Ord k => OrderedMap k a -> [a]
+elems (OrderedMap {assoc, keys}) = (assoc !) <$> keys
+
+toList :: Ord k => OrderedMap k a -> [(k, a)]
+toList (OrderedMap {assoc, keys}) = (\k -> (k, assoc ! k)) <$> keys
+
+fromList :: Ord k => [(k, a)] -> OrderedMap k a
+fromList keyValueList = OrderedMap {
+      assoc = Map.fromList keyValueList
+    , keys = fst <$> keyValueList
+  }
+
+build :: Ord k => (k -> a) -> [k] -> OrderedMap k a
+build f keys = OrderedMap {
+      assoc = Map.fromList $ (\k -> (k, f k)) <$> keys
+    , keys
+  }
+
+get :: Ord k => k -> OrderedMap k a -> a
+get k = (! k) . assoc
+
+lookup :: Ord k => k -> OrderedMap k a -> Maybe a
+lookup k = (Map.lookup k) . assoc
+
+set :: Ord k => k -> a -> OrderedMap k a -> OrderedMap k a
+set k v orderedMap@(OrderedMap {assoc})
+  | Map.member k assoc = orderedMap {assoc = Map.insert k v assoc}
+  | otherwise = orderedMap
+
+mapi :: Ord k => (k -> a -> b) -> OrderedMap k a -> OrderedMap k b
+mapi f orderedMap = orderedMap {
+    assoc = mapWithKey f $ assoc orderedMap
+  }
+
+{-
+cons :: Ord k => k -> a -> OrderedMap k a -> OrderedMap k a
+cons k a orderedMap =
+
+snoc :: Ord k => k -> a -> OrderedMap k a -> OrderedMap k a
+
+alter :: Ord k => (Maybe a -> Maybe a) -> k -> OrderedMap k a -> OrderedMap k a
+alter 
+-}
--- a/src/PDF.hs
+++ b/src/PDF.hs
@ -1,6 +1,11 @@
 {-# LANGUAGE NamedFieldPuns #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
 module PDF (
      Document(..)
+    , EOLStyle(..)
+    , Layers(..)
+    , UnifiedLayers(..)
    , parseDocument
    , render
  ) where
@ -13,27 +18,45 @@ import Data.ByteString.Char8.Util (previous, subBS)
 import qualified Data.ByteString.Lazy as Lazy (ByteString)
 import qualified Data.Map as Map (lookup)
 import PDF.Body (populate)
+import PDF.Box (Box(..))
 import qualified PDF.EOL as EOL (Style(..), charset, parser)
+import PDF.Layer (Layer, unify)
 import PDF.Object (
-      Content(..), DirectObject(..), InputStructure(..), Name(..), Number(..)
+      DirectObject(..), InputStructure(..), Name(..), Number(..)
    , Structure(..)
    , eofMarker, magicNumber, structure
  )
 import qualified PDF.Output as Output (render, line)
 import PDF.Output (Output(..))
-import PDF.Parser (Parser, runParser, string, takeAll)
+import PDF.Parser (Parser, evalParser, string, takeAll)
 import Text.Printf (printf)

 data Document = Document {
      pdfVersion :: String
    , eolStyle :: EOL.Style
-    , updates :: [Content]
+    , layers :: [Layer]
  } deriving Show

 instance Output Document where
-  output (Document {pdfVersion, updates}) =
+  output (Document {pdfVersion, layers}) =
      Output.line (printf "%%PDF-%s" pdfVersion)
-    `mappend` output updates
+    `mappend` output layers
+
+data EOLStyle = EOLStyle
+data Layers = Layers
+data UnifiedLayers = UnifiedLayers
+
+instance Monad m => Box m EOLStyle Document EOL.Style where
+  r EOLStyle = return . eolStyle
+  w EOLStyle eolStyle document = return $ document {eolStyle}
+
+instance Monad m => Box m UnifiedLayers Document Layer where
+  r UnifiedLayers = return . unify . layers
+  w UnifiedLayers layer = w Layers [layer]
+
+instance Monad m => Box m Layers Document [Layer] where
+  r Layers = return . layers
+  w Layers layers document = return $ document {layers = layers}

 render :: Document -> Lazy.ByteString
 render document@(Document {eolStyle}) = Output.render eolStyle document
@ -83,7 +106,7 @@ findNextSection offset input =

 readStructures :: Int -> ByteString -> Either String [InputStructure]
 readStructures startXref input =
-  runParser structure () (BS.drop startXref input) >>= stopOrFollow
+  evalParser structure () (BS.drop startXref input) >>= stopOrFollow
  where
    stopOrFollow s@(Structure {trailer}) =
      case Map.lookup (Name "Prev") trailer of
@ -96,8 +119,8 @@ readStructures startXref input =

 parseDocument :: ByteString -> Either String Document
 parseDocument input = do
-  (pdfVersion, eolStyle) <- runParser ((,) <$> version <*> EOL.parser) () input
+  (pdfVersion, eolStyle) <- evalParser ((,) <$> version <*> EOL.parser) () input
  startXref <- readStartXref eolStyle input
  structuresRead <- readStructures startXref input
-  let updates = populate input <$> structuresRead
-  return $ Document {pdfVersion, eolStyle, updates}
+  let layers = populate input <$> structuresRead
+  return $ Document {pdfVersion, eolStyle, layers}
--- a/src/PDF/Body.hs
+++ b/src/PDF/Body.hs
@ -6,19 +6,22 @@ module PDF.Body (

 import Control.Applicative ((<|>))
 import Control.Monad.State (get, gets, modify)
+import Data.Attoparsec.ByteString.Char8 (option)
 import Data.ByteString.Char8 (ByteString)
 import qualified Data.ByteString.Char8 as BS (cons, drop, unpack)
-import Data.Map ((!))
-import qualified Data.Map as Map (empty, insert, lookup)
+import Data.Id (Id(..), at, empty)
+import qualified Data.Id as Id (insert, lookup)
+import qualified Data.Map as Map (lookup)
 import qualified PDF.EOL as EOL (charset, parser)
+import PDF.Layer (Layer(..))
 import PDF.Object (
-      Content(..), DirectObject(..), Flow(..), IndirectObjCoordinates(..)
+      DirectObject(..), Flow(..), IndirectObjCoordinates(..)
    , InputStructure(..), Name(..), Number(..), Object(..), Occurrence(..)
    , Structure(..), XRefEntry(..), XRefSection
    , blank, dictionary, directObject, integer, line
  )
-import PDF.Output (ObjectId(..), Offset(..))
-import PDF.Parser (Parser, (<?>), block, char, on, option, runParser, takeAll)
+import PDF.Output (Offset(..))
+import PDF.Parser (MonadParser(..), Parser, (<?>), evalParser, on)

 data UserState = UserState {
      input :: ByteString
@ -31,9 +34,9 @@ type SParser = Parser UserState
 modifyFlow :: (Flow -> Flow) -> SParser ()
 modifyFlow f = modify $ \state -> state {flow = f $ flow state}

-addObject :: ObjectId -> Object -> SParser ()
+addObject :: (Id Object) -> Object -> SParser ()
 addObject objectId newObject = modifyFlow $ \flow -> flow {
-    tmpObjects = Map.insert objectId newObject $ tmpObjects flow
+    tmpObjects = Id.insert objectId newObject $ tmpObjects flow
  }

 pushOccurrence :: Occurrence -> SParser ()
@ -46,10 +49,10 @@ comment = BS.unpack <$> (option "" afterPercent <* EOL.parser)
  where
    afterPercent = BS.cons <$> char '%' <*> takeAll (not . (`elem` EOL.charset))

-lookupOffset :: ObjectId -> SParser Offset
+lookupOffset :: (Id Object) -> SParser Offset
 lookupOffset objectId = do
  table <- gets xreferences
-  case Map.lookup objectId table >>= entryOffset of
+  case Id.lookup objectId table >>= entryOffset of
    Nothing -> fail $
      "obj " ++ show objectId ++ " is referenced but missing in XRef table"
    Just offset -> return offset
@ -57,12 +60,12 @@ lookupOffset objectId = do
    entryOffset (InUse {offset}) = Just offset
    entryOffset _ = Nothing

-loadNumber :: ObjectId -> SParser Double
+loadNumber :: (Id Object) -> SParser Double
 loadNumber objectId = do
  offset <- getOffset <$> lookupOffset objectId
  objectStart <- BS.drop offset <$> gets input
  indirectObjCoordinates `on` (objectStart :: ByteString) >> return ()
-  objectValue <- (!objectId) . tmpObjects <$> gets flow
+  objectValue <- (`at` objectId) . tmpObjects <$> gets flow
  case objectValue of
    Direct (NumberObject (Number n)) -> return n
    obj -> fail $ "Expected obj@" ++ show offset ++ " (" ++ show obj ++ ") to be a Number"
@ -75,7 +78,7 @@ getSize Nothing = fail "Missing '/Length' key on stream"
 getSize (Just (NumberObject (Number size))) = return size
 getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do
  Flow {tmpObjects} <- gets flow
-  case Map.lookup objectId tmpObjects of
+  case Id.lookup objectId tmpObjects of
    Nothing -> loadNumber objectId
    Just (Direct (NumberObject (Number size))) -> return size
    Just v -> fail $
@ -96,7 +99,7 @@ object = streamObject <|> Direct <$> directObject

 indirectObjCoordinates :: SParser IndirectObjCoordinates
 indirectObjCoordinates = do
-  objectId <- ObjectId <$> integer
+  objectId <- Id <$> integer
  coordinates <- IndirectObjCoordinates objectId <$> integer
  objectValue <- line "obj" *> object <* blank <* line "endobj"
  addObject objectId objectValue
@ -106,14 +109,14 @@ occurrence :: SParser Occurrence
 occurrence =
  Comment <$> comment <|> Indirect <$> indirectObjCoordinates <?> "comment or object"

-populate :: ByteString -> InputStructure -> Content
+populate :: ByteString -> InputStructure -> Layer
 populate input structure =
  let bodyInput = BS.drop (startOffset structure) input in
-  case runParser recurseOnOccurrences initialState bodyInput of
-    Left _ -> Content {occurrences = [], objects = Map.empty, docStructure}
+  case evalParser recurseOnOccurrences initialState bodyInput of
+    Left _ -> Layer {occurrences = [], objects = empty, docStructure}
    Right finalState ->
      let Flow {occurrencesStack, tmpObjects} = flow finalState in
-      Content {
+      Layer {
          occurrences = reverse occurrencesStack, objects = tmpObjects, docStructure
        }
  where
@ -121,7 +124,7 @@ populate input structure =
    xreferences = xRef docStructure
    initialState = UserState {
        input, xreferences, flow = Flow {
-            occurrencesStack = [], tmpObjects = Map.empty
+            occurrencesStack = [], tmpObjects = empty
          }
      }

--- a/src/PDF/Box.hs
+++ b/src/PDF/Box.hs
@ -0,0 +1,80 @@
+{-# LANGUAGE NamedFieldPuns #-}
+{-# LANGUAGE FunctionalDependencies #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE UndecidableInstances #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+module PDF.Box (
+      Box(..)
+    , Index(..)
+    , Maybe_(..)
+    , Either_(..)
+    , at
+    , atAll
+    , edit
+    , runRO
+  ) where
+
+import Control.Monad.Except (ExceptT(..), runExceptT)
+import Control.Monad.Fail (MonadFail(..))
+import Control.Monad.Reader (ReaderT, runReaderT)
+import Control.Monad.State (MonadState(..))
+import Data.Id (Id, IdMap)
+import qualified Data.Id as Id (insert, lookup)
+import Data.Map (Map)
+import qualified Data.Map as Map (insert, lookup)
+import Data.OrderedMap (OrderedMap)
+import qualified Data.OrderedMap as OrderedMap (lookup, set)
+import Prelude hiding (fail)
+
+runRO :: MonadState s m => ReaderT s m a -> m a
+runRO ro = get >>= runReaderT ro
+
+newtype Index = Index Int
+newtype Maybe_ x = Maybe_ x
+newtype Either_ b x = Either_ x
+
+class Monad m => Box m i a b | m a i -> b where
+  r :: i -> a -> m b
+  w :: i -> b -> a -> m a
+
+at :: Box m i a b => i -> (b -> m b) -> a -> m a
+at i f a = r i a >>= f >>= flip (w i) a
+
+atAll :: (Traversable t, Monad m, Box m i a (t b)) => i -> (b -> m b) -> a -> m a
+atAll i f = at i $ (mapM f)
+
+edit :: MonadState a m => (a -> m a) -> m ()
+edit f = get >>= f >>= put
+
+instance MonadFail m => Box m Index [a] a where
+  r (Index i) [] = fail $ "Index out of bounds " ++ show i
+  r (Index 0) (x:_) = return x
+  r (Index i) (_:xs) = r (Index (i-1)) xs
+
+  w (Index i) _ [] = fail $ "Index out of bounds " ++ show i
+  w (Index 0) newX (_:xs) = return (newX:xs)
+  w (Index i) newX (x:xs) = (x:) <$> w (Index (i-1)) newX xs
+
+instance (Ord k, MonadFail m) => Box m k (Map k a) a where
+  r k = maybe (fail "Unknown key") return . Map.lookup k
+  w k a = return . Map.insert k a
+
+instance (Ord k, MonadFail m) => Box m k (OrderedMap k a) a where
+  r k = maybe (fail "Unknown key") return . OrderedMap.lookup k
+  w k a orderedMap = r k orderedMap >> return (OrderedMap.set k a orderedMap)
+
+instance MonadFail m => Box m (Id k) (IdMap k a) a where
+  r k = maybe (fail "Unknown key") return . Id.lookup k
+  w k a = return . Id.insert k a
+
+instance (Monad m, Box Maybe i a b) => Box m (Maybe_ i) a (Maybe b) where
+  r (Maybe_ i) = return . r i
+  w (Maybe_ i) (Just b) a =
+    return . maybe a id $ w i b a
+  w _ _ obj = return obj
+
+instance (Monad m, Box (ExceptT e m) i a b) => Box m (Either_ e i) a (Either e b) where
+  r (Either_ i) a = runExceptT (r i a)
+  w (Either_ i) (Right b) a =
+    either (const a) id <$> runExceptT (w i b a :: ExceptT e m a)
+  w _ _ a = return a
--- a/src/PDF/CMap.hs
+++ b/src/PDF/CMap.hs
@ -0,0 +1,196 @@
+{-# LANGUAGE NamedFieldPuns #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE FlexibleContexts #-}
+module PDF.CMap (
+      CMap
+    , CMappers
+    , CRange(..)
+    , matches
+    , parse
+  ) where
+
+import Control.Applicative ((<|>), many)
+import Control.Monad.Except (MonadError(..))
+import Control.Monad.Fail (fail)
+import Control.Monad.State (modify)
+import Data.Attoparsec.ByteString.Char8 (count)
+import Data.ByteString (ByteString)
+import qualified Data.ByteString as BS (drop, length, null, take)
+import Data.ByteString.Char8 (unpack)
+import Data.ByteString.Char8.Util (
+    B16Int(..), b16ToBytes, b16ToInt, toBytes, utf16BEToutf8
+  )
+import Data.Foldable (foldl', foldr')
+import Data.Map (Map, mapWithKey, union)
+import qualified Data.Map as Map (
+    adjust, empty, fromList, insert, insertWith, lookup, toList
+  )
+import Data.Text (Text)
+import qualified Data.Text as Text (length, null, splitAt)
+import qualified PDF.EOL as EOL (charset, parser)
+import PDF.Font (Decoder, Encoder, Font(..))
+import PDF.Object (
+      DirectObject(..), Name, StringObject(..)
+    , blank, directObject, integer, line, stringObject
+  )
+import PDF.Parser (MonadParser, Parser, runParser, takeAll)
+import Prelude hiding (fail)
+
+type CMappers = Map Name CMap
+type ToUnicode = Map ByteString Text
+type FromUnicode = Map Text ByteString
+data CRange = CRange {
+      fromSequence :: ByteString
+    , toSequence :: ByteString
+    , toUnicode :: ToUnicode
+  } deriving Show
+type Size = Int
+data CMap = CMap {
+      toUnicodeBySize :: Map Size [CRange]
+    , fromUnicodeBySize :: Map Size FromUnicode
+  }
+
+toFont :: CMap -> Font
+toFont (CMap {toUnicodeBySize, fromUnicodeBySize})  =
+  Font {decode = decoder toUnicodeBySize, encode = encoder fromUnicodeBySize}
+
+decoder :: Map Size [CRange] -> Decoder
+decoder rangesBySize input
+  | BS.null input = Right ""
+  | otherwise = do
+    (output, remainingInput) <- trySizes input $ Map.toList rangesBySize
+    mappend output <$> decoder rangesBySize remainingInput
+  where
+    trySizes s [] = Left $ "No matching code found in font for " ++ unpack s
+    trySizes s ((size, cRanges):others) =
+      let prefix = BS.take size s in
+      case tryRanges prefix cRanges of
+        Nothing -> trySizes s others
+        Just outputSequence -> Right (outputSequence, BS.drop size s)
+    tryRanges :: ByteString -> [CRange] -> Maybe Text
+    tryRanges _ [] = Nothing
+    tryRanges prefix ((CRange {toUnicode}):cRanges) =
+      case Map.lookup prefix toUnicode of
+        Nothing -> tryRanges prefix cRanges
+        outputSequence -> outputSequence
+
+encoder :: Map Size FromUnicode -> Encoder
+encoder fromUnicodes input
+  | Text.null input = Right ""
+  | otherwise =
+    foldr' (<>) (Left "No encoding found") $ mapWithKey tryOn fromUnicodes
+    where
+      tryOn size fromUnicode =
+        let (prefix, end) = Text.splitAt size input in
+        case Map.lookup prefix fromUnicode of
+          Nothing -> Left ""
+          Just code -> (code <>) <$> encoder fromUnicodes end
+
+matches :: ByteString -> CRange -> Bool
+matches code (CRange {fromSequence, toSequence}) =
+  fromSequence <= code && code <= toSequence
+
+parse :: MonadError String m => ByteString -> m Font
+parse = either throwError (return . toFont . snd) . runParser
+  (many (codeRanges <|> cMapRange <|> cMapChar <|> ignoredLine))
+  emptyCMap
+  where
+    ignoredLine =
+      takeAll (not . (`elem` EOL.charset)) *> EOL.parser *> return ()
+    emptyCMap = CMap {toUnicodeBySize = Map.empty, fromUnicodeBySize = Map.empty}
+
+codeRanges :: Parser CMap ()
+codeRanges = do
+  size <- integer <* line "begincodespacerange"
+  mapM_ createMapping =<< count size codeRange
+  line "endcodespacerange"
+  where
+    codeRange =
+      (,) <$> stringObject <* blank <*> stringObject <* EOL.parser
+
+createMapping :: (StringObject, StringObject) -> Parser CMap ()
+createMapping (Hexadecimal from, Hexadecimal to) = modify $ \cmap -> cmap {
+      toUnicodeBySize = Map.insertWith (++) size [cRange] (toUnicodeBySize cmap)
+    }
+  where
+    fromSequence = b16ToBytes from
+    size = BS.length fromSequence
+    toSequence = b16ToBytes to
+    cRange = CRange {fromSequence, toSequence, toUnicode = Map.empty}
+createMapping _ = return ()
+
+cMapRange :: Parser CMap ()
+cMapRange = do
+  size <- integer <* line "beginbfrange"
+  mapM_ saveMapping =<< count size rangeMapping
+  line "endbfrange"
+  where
+    rangeMapping = do
+      from <- (stringObject <* blank)
+      to <- (stringObject <* blank)
+      mapFromTo from to =<< directObject <* EOL.parser
+
+saveMapping :: [(ByteString, Text)] -> Parser CMap ()
+saveMapping assoc =
+  modify $ \(CMap {toUnicodeBySize, fromUnicodeBySize}) -> CMap {
+        toUnicodeBySize = saveToUnicodeBySize assoc toUnicodeBySize
+      , fromUnicodeBySize = saveFromUnicodeBySize reversed fromUnicodeBySize
+    }
+    where
+      reversed = (\(a, b) -> (b, a)) <$> assoc
+
+saveToUnicodeBySize :: [(ByteString, Text)] -> Map Size [CRange] -> Map Size [CRange]
+saveToUnicodeBySize [] = id
+saveToUnicodeBySize assoc@((code, _):_) = Map.adjust insertCRange (BS.length code)
+  where
+    newMapping = Map.fromList assoc
+    appendMapping cRange =
+      cRange {toUnicode = toUnicode cRange `union` newMapping}
+    insertCRange = fmap (\cRange ->
+        if code `matches` cRange then appendMapping cRange else cRange
+      )
+
+saveFromUnicodeBySize :: [(Text, ByteString)] -> Map Size FromUnicode -> Map Size FromUnicode
+saveFromUnicodeBySize = flip (foldl' insertFromUnicode)
+  where
+    insertFromUnicode :: Map Size FromUnicode -> (Text, ByteString) -> Map Size FromUnicode
+    insertFromUnicode tmpFromUnicodeBySize (unicodeSequence, code) =
+      let size = Text.length unicodeSequence in
+      Map.adjust (Map.insert unicodeSequence code) size tmpFromUnicodeBySize
+
+cMapChar :: Parser CMap ()
+cMapChar = do
+  size <- integer <* line "beginbfchar"
+  saveMapping =<< count size charMapping <* line "endbfchar"
+  where
+    charMapping = do
+      from <- stringObject <* blank
+      pairMapping from =<< stringObject <* EOL.parser
+
+between :: B16Int -> B16Int -> [ByteString]
+between from@(B16Int s) to =
+  let size = BS.length s `div` 2 in
+  toBytes size <$> [b16ToInt from .. b16ToInt to]
+
+startFrom :: B16Int -> [ByteString]
+startFrom from@(B16Int s) =
+  let size = BS.length s `div` 2 in
+  toBytes size <$> [b16ToInt from .. ]
+
+mapFromTo :: MonadParser m => StringObject -> StringObject -> DirectObject -> m [(ByteString, Text)]
+mapFromTo (Hexadecimal from) (Hexadecimal to) (StringObject (Hexadecimal dstFrom)) =
+  return $ zip (between from to) (utf16BEToutf8 <$> startFrom dstFrom)
+
+mapFromTo (Hexadecimal from) (Hexadecimal to) (Array dstPoints) =
+  zip (between from to) <$> (mapM dstByteString dstPoints)
+  where
+    dstByteString (StringObject (Hexadecimal dst)) =
+      return . utf16BEToutf8 $ b16ToBytes dst
+    dstByteString _ = fail "Invalid for a replacement string"
+
+mapFromTo _ _ _ = fail "invalid range mapping found"
+
+pairMapping :: MonadParser m => StringObject -> StringObject -> m (ByteString, Text)
+pairMapping (Hexadecimal from) (Hexadecimal to) =
+  return (b16ToBytes  from, utf16BEToutf8 $ b16ToBytes  to)
+pairMapping _ _ = fail "invalid pair mapping found"
--- a/src/PDF/Content.hs
+++ b/src/PDF/Content.hs
@ -0,0 +1,117 @@
+{-# LANGUAGE NamedFieldPuns #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ConstraintKinds #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE TypeSynonymInstances #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+module PDF.Content (
+      Content(..)
+    , ContentUnit(..)
+    , GraphicContextUnit(..)
+    , IdContentUnit
+    , IdGraphicContextUnit
+    , IdTextContext
+    , Instructions(..)
+    , TextContext
+    , parse
+  ) where
+
+import Control.Applicative ((<|>))
+import Control.Monad.Reader (asks, runReader)
+import Control.Monad.State.Strict (runState, evalStateT, modify)
+import Data.Attoparsec.ByteString.Char8 (sepBy)
+import qualified Data.Attoparsec.ByteString.Char8 as Atto (Parser, parseOnly)
+import Data.ByteString (ByteString)
+import Data.Id (Id(..), Indexed, at, empty, register)
+import PDF.Box (Box(..))
+import PDF.Content.Operator (Instruction, operator)
+import PDF.Object (blank, directObject)
+import PDF.Output (Output(..), line)
+import PDF.Parser (string)
+
+data Instructions = Instructions
+
+data GraphicContextUnit a =
+    GraphicInstruction a
+  | ContentUnit (ContentUnit a)
+  deriving Show
+type TextContext a = [a]
+data ContentUnit a =
+    GraphicContext [GraphicContextUnit a]
+  | TextContext (TextContext a)
+  deriving Show
+data Content = Content {
+      contentUnits :: [IdContentUnit]
+    , indexedInstructions :: Indexed Instruction
+    , firstError :: Maybe String
+  } deriving Show
+
+type TmpContentUnit = ContentUnit Instruction
+type TmpGraphicContextUnit = GraphicContextUnit Instruction
+type TmpTextContext = TextContext Instruction
+
+type IdContentUnit = ContentUnit (Id Instruction)
+type IdGraphicContextUnit = GraphicContextUnit (Id Instruction)
+type IdTextContext = TextContext (Id Instruction)
+
+instance Monad m => Box m Instructions Content (Indexed Instruction) where
+  r Instructions = return . indexedInstructions
+  w Instructions indexedInstructions someContent =
+    return $ someContent {indexedInstructions}
+
+parse :: ByteString -> Content
+parse input =
+  let result = Atto.parseOnly (contentUnit `sepBy` blank) input in
+  let (contentUnits, indexedInstructions) = either (const ([], empty)) buildContent result in
+  let firstError = either Just (const Nothing) result in
+  Content {contentUnits, indexedInstructions, firstError}
+
+buildContent :: [TmpContentUnit] -> ([IdContentUnit], Indexed Instruction)
+buildContent instructionContentUnits =
+  runState (mapM registerContentUnit instructionContentUnits) empty
+  where
+    registerContentUnit (GraphicContext gc) =
+      GraphicContext <$> (mapM registerGraphicContext gc)
+    registerContentUnit (TextContext tc) = TextContext <$> (mapM register tc)
+    registerGraphicContext (GraphicInstruction gi) =
+      GraphicInstruction <$> (register gi)
+    registerGraphicContext (ContentUnit cu) =
+      ContentUnit <$> (registerContentUnit cu)
+
+contentUnit :: Atto.Parser TmpContentUnit
+contentUnit =
+    (GraphicContext <$> graphicContext)
+  <|> (TextContext <$> textContext)
+  where
+    graphicContext =
+      string "q" *> blank *> graphicContextUnit `sepBy` blank <* blank <* string "Q"
+
+graphicContextUnit :: Atto.Parser TmpGraphicContextUnit
+graphicContextUnit =
+    (GraphicInstruction <$> instruction)
+  <|> (ContentUnit <$> contentUnit)
+
+instruction :: Atto.Parser Instruction
+instruction = evalStateT stackParser []
+  where
+    stackParser = ((directObject <* blank) >>= push) <|> operator
+    push arg = modify (arg:) *> stackParser
+
+textContext :: Atto.Parser TmpTextContext
+textContext =
+  string "BT" *> blank *> instruction `sepBy` blank <* blank <* string "ET"
+
+instance Output Content where
+  output (Content {contentUnits, indexedInstructions}) =
+    runReader (mconcat <$> mapM outputCU contentUnits) indexedInstructions
+    where
+      outputCU (GraphicContext gc) = do
+        inside <- mconcat <$> mapM outputGCU gc
+        return (line "q" `mappend` inside `mappend` line "Q")
+      outputCU (TextContext tc) = do
+        inside <- mconcat <$> mapM outputIId tc
+        return (line "BT" `mappend` inside `mappend` line "ET")
+      outputGCU (GraphicInstruction gi) = outputIId gi
+      outputGCU (ContentUnit cu) = outputCU cu
+      outputIId instructionId = asks (output . (`at` instructionId))
--- a/src/PDF/Content/Operator.hs
+++ b/src/PDF/Content/Operator.hs
@ -0,0 +1,76 @@
+{-# LANGUAGE ConstraintKinds #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE TypeSynonymInstances #-}
+{-# LANGUAGE FlexibleInstances #-}
+module PDF.Content.Operator (
+      Instruction
+    , Operator(..)
+    , operator
+  ) where
+
+import Control.Monad.Fail (MonadFail(..))
+import Control.Monad.State (MonadState(..))
+import Data.ByteString.Char8 (ByteString, pack, unpack)
+import Data.Char (toLower)
+import Data.Map (Map)
+import qualified Data.Map as Map (fromList, lookup)
+import qualified PDF.Content.Operator.Color as Color (Operator, signature)
+import PDF.Content.Operator.Common (Signature)
+import qualified PDF.Content.Operator.GraphicState as GraphicState (Operator, signature)
+import qualified PDF.Content.Operator.Path as Path (Operator, signature)
+import qualified PDF.Content.Operator.Text as Text (Operator, signature)
+import PDF.Object (DirectObject, blank, regular)
+import PDF.Output (Output(..), join, line)
+import PDF.Parser (MonadParser, takeAll1)
+import Prelude hiding (fail)
+import Text.Printf (printf)
+
+data Operator =
+    GraphicState GraphicState.Operator
+  | Path Path.Operator
+  | Color Color.Operator
+  | Text Text.Operator 
+
+instance Show Operator where
+  show (GraphicState gcOp) = code gcOp
+  show (Path pOp) = code pOp
+  show (Color cOp) = code cOp
+  show (Text tOp) = code tOp
+
+type Instruction = (Operator, [DirectObject])
+
+instance Output Instruction where
+  output (op, args) = join " " ((output <$> args) ++ [line (show op)])
+
+operatorsTable :: Map ByteString (Signature Operator)
+operatorsTable = Map.fromList (
+      (prepare GraphicState <$> GraphicState.signature)
+    ++ (prepare Path <$> Path.signature)
+    ++ (prepare Color <$> Color.signature)
+    ++ (prepare Text <$> Text.signature)
+  )
+  where
+    prepare constructor (op, sig) = (pack $ code op, (constructor op, sig))
+
+code :: Show a => a -> String
+code = expand . show
+  where
+    expand "" = ""
+    expand (c:'_':s) = toLower c : expand s
+    expand ('s':'t':'a':'r':s) = '*' : expand s
+    expand ('Q':'u':'o':'t':'e':s) = '\'' : expand s
+    expand ('D':'Q':'u':'o':'t':'e':s) = '"' : expand s
+    expand (c:s) = c : expand s
+
+type StackParser m = (MonadState [DirectObject] m, MonadParser m)
+
+operator :: StackParser m => m Instruction
+operator = do
+  chunk <- takeAll1 regular <* blank
+  args <- reverse <$> get
+  case Map.lookup chunk operatorsTable of
+    Just (op, sig)
+      | sig args -> return (op, args)
+      | otherwise -> 
+        get >>= fail . printf "Operator %s with stack %s" (show op) . show
+    _ -> fail ("Unknown chunk " ++ unpack chunk)
--- a/src/PDF/Content/Operator/Color.hs
+++ b/src/PDF/Content/Operator/Color.hs
@ -0,0 +1,27 @@
+module PDF.Content.Operator.Color (
+      Operator(..)
+    , signature
+  ) where
+
+import PDF.Content.Operator.Common (Signature)
+import PDF.Object (DirectObject(..))
+
+data Operator =
+    CS | C_s | SC | SCN | S_c | S_cn | G | G_ | RG | R_g | K | K_
+  deriving (Bounded, Enum, Show)
+
+signature :: [Signature Operator]
+signature = [
+      (CS, \l -> case l of [NameObject _] -> True ; _ -> False)
+    , (C_s, \l -> case l of [NameObject _] -> True ; _ -> False)
+    , (SC, \_ -> True)
+    , (SCN, \_ -> True)
+    , (S_c, \_ -> True)
+    , (S_cn, \_ -> True)
+    , (G, \l -> case l of [_] -> True ; _ -> False)
+    , (G_, \l -> case l of [_] -> True ; _ -> False)
+    , (RG, \l -> case l of [_, _, _] -> True ; _ -> False)
+    , (R_g, \l -> case l of [_, _, _] -> True ; _ -> False)
+    , (K, \l -> case l of [_, _, _, _] -> True ; _ -> False)
+    , (K_, \l -> case l of [_, _, _, _] -> True ; _ -> False)
+  ]
--- a/src/PDF/Content/Operator/Common.hs
+++ b/src/PDF/Content/Operator/Common.hs
@ -0,0 +1,7 @@
+module PDF.Content.Operator.Common (
+    Signature
+  ) where
+
+import PDF.Object (DirectObject)
+
+type Signature a = (a, [DirectObject] -> Bool)
--- a/src/PDF/Content/Operator/GraphicState.hs
+++ b/src/PDF/Content/Operator/GraphicState.hs
@ -0,0 +1,24 @@
+module PDF.Content.Operator.GraphicState (
+      Operator(..)
+    , signature
+  ) where
+
+import PDF.Content.Operator.Common (Signature)
+import PDF.Object (DirectObject(..))
+
+data Operator =
+  C_m | W_ | J | J_ | M | D_ | R_i | I_ | G_s -- general graphic state
+  deriving (Bounded, Enum, Show)
+
+signature :: [Signature Operator]
+signature = [
+      (C_m, \l -> case l of [_, _, _, _, _, _] -> True ; _ -> False)
+    , (W_, \l -> case l of [_] -> True ; _ -> False)
+    , (J, \l -> case l of [_] -> True ; _ -> False)
+    , (J_, \l -> case l of [_] -> True ; _ -> False)
+    , (M, \l -> case l of [_] -> True ; _ -> False)
+    , (D_, \l -> case l of [_, _] -> True ; _ -> False)
+    , (R_i, \l -> case l of [_] -> True ; _ -> False)
+    , (I_, \l -> case l of [_] -> True ; _ -> False)
+    , (G_s, \l -> case l of [NameObject _] -> True ; _ -> False)
+  ]
--- a/src/PDF/Content/Operator/Path.hs
+++ b/src/PDF/Content/Operator/Path.hs
@ -0,0 +1,35 @@
+module PDF.Content.Operator.Path (
+      Operator(..)
+    , signature
+  ) where
+
+import PDF.Content.Operator.Common (Signature)
+
+data Operator =
+    M_ | L_ | C_ | V_ | Y_ | H_ | R_e -- path construction
+  | S | S_ | F_ | F | Fstar | B | Bstar | B_ | B_star | N_ -- path painting
+  | W | Wstar -- clipping path
+  deriving (Bounded, Enum, Show)
+
+signature :: [Signature Operator]
+signature = [
+      (M_, \l -> case l of [_, _] -> True ; _ -> False)
+    , (L_, \l -> case l of [_, _] -> True ; _ -> False)
+    , (C_, \l -> case l of [_, _, _, _, _, _] -> True ; _ -> False)
+    , (V_, \l -> case l of [_, _, _, _] -> True ; _ -> False)
+    , (Y_, \l -> case l of [_, _, _, _] -> True ; _ -> False)
+    , (H_, \l -> case l of [] -> True ; _ -> False)
+    , (R_e, \l -> case l of [_, _, _, _] -> True ; _ -> False)
+    , (S, \l -> case l of [] -> True ; _ -> False)
+    , (S_, \l -> case l of [] -> True ; _ -> False)
+    , (F_, \l -> case l of [] -> True ; _ -> False)
+    , (F, \l -> case l of [] -> True ; _ -> False)
+    , (Fstar, \l -> case l of [] -> True ; _ -> False)
+    , (B, \l -> case l of [] -> True ; _ -> False)
+    , (Bstar, \l -> case l of [] -> True ; _ -> False)
+    , (B_, \l -> case l of [] -> True ; _ -> False)
+    , (B_star, \l -> case l of [] -> True ; _ -> False)
+    , (N_, \l -> case l of [] -> True ; _ -> False)
+    , (W, \l -> case l of [] -> True ; _ -> False)
+    , (Wstar, \l -> case l of [] -> True ; _ -> False)
+  ]
--- a/src/PDF/Content/Operator/Text.hs
+++ b/src/PDF/Content/Operator/Text.hs
@ -0,0 +1,32 @@
+module PDF.Content.Operator.Text (
+      Operator(..)
+    , signature
+  ) where
+
+import PDF.Content.Operator.Common (Signature)
+import PDF.Object (DirectObject(..))
+
+data Operator =
+    Td | TD | Tm | Tstar -- text positioning
+  | TJ | Tj | Quote | DQuote -- text showing
+  | Tc | Tw | Tz | TL | Tf | Tr | Ts -- text state
+  deriving (Bounded, Enum, Show)
+
+signature :: [Signature Operator]
+signature = [
+      (Td, \l -> case l of [_, _] -> True ; _ -> False)
+    , (TD, \l -> case l of [_, _] -> True ; _ -> False)
+    , (Tm, \l -> case l of [_, _, _, _, _, _] -> True ; _ -> False)
+    , (Tstar, \l -> case l of [] -> True ; _ -> False)
+    , (TJ, \l -> case l of [Array _] -> True ; _ -> False)
+    , (Tj, \l -> case l of [StringObject _] -> True ; _ -> False)
+    , (Quote, \l -> case l of [StringObject _] -> True ; _ -> False)
+    , (DQuote, \l -> case l of [_, _, StringObject _] -> True ; _ -> False)
+    , (Tc, \l -> case l of [_] -> True ; _ -> False)
+    , (Tw, \l -> case l of [_] -> True ; _ -> False)
+    , (Tz, \l -> case l of [_] -> True ; _ -> False)
+    , (TL, \l -> case l of [_] -> True ; _ -> False)
+    , (Tf, \l -> case l of [NameObject _, _] -> True ; _ -> False)
+    , (Tr, \l -> case l of [_] -> True ; _ -> False)
+    , (Ts, \l -> case l of [_] -> True ; _ -> False)
+  ]
--- a/src/PDF/Content/Text.hs
+++ b/src/PDF/Content/Text.hs
@ -0,0 +1,151 @@
+{-# LANGUAGE NamedFieldPuns #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+module PDF.Content.Text (
+      Chunks(..)
+    , chunk
+    , format
+  ) where
+
+import Control.Monad (foldM)
+import Control.Monad.Fail (MonadFail(..))
+import Control.Monad.Reader (ReaderT, asks, runReaderT)
+import Control.Monad.State (
+    MonadState(..), StateT, evalStateT, gets, modify, runStateT
+  )
+import Control.Monad.Trans (lift)
+import qualified Data.ByteString.Char8 as BS (concatMap, singleton)
+import Data.Id (Id(..), Indexed, at, empty, singleton)
+import qualified Data.Id as Id (delete, lookup, register)
+import Data.Map ((!))
+import Data.Text (Text, breakOn)
+import qualified Data.Text as Text (drop)
+import PDF.Box (Box(..))
+import PDF.Content (
+      Content(..), ContentUnit(..), GraphicContextUnit(..), IdContentUnit
+    , IdGraphicContextUnit
+  )
+import PDF.Content.Operator (Instruction, Operator(..))
+import PDF.Content.Operator.Text (Operator(..))
+import PDF.Font (Font(..), FontSet, emptyFont)
+import PDF.Object (DirectObject(..), StringObject(..), toByteString)
+import Prelude hiding (fail)
+
+type TmpFont = StateT Font
+type Renderer m = ReaderT (Indexed Instruction) (ReaderT FontSet m)
+type Updater m = StateT (Indexed Instruction) (ReaderT (Indexed Text) (ReaderT FontSet m))
+
+decodeString :: MonadFail m => StringObject -> TmpFont (Renderer m) Text
+decodeString input = do
+  Font {decode} <- get
+  either fail return . decode $ toByteString input
+
+data Chunks = Chunks
+
+chunk :: Int -> Id Text
+chunk = Id
+
+instance MonadFail m => Box (ReaderT FontSet m) Chunks Content (Indexed Text) where
+  r Chunks content =
+    runReaderT (mconcat <$> renderer) $ indexedInstructions content
+    where
+      renderer = mapM renderContentUnit (contentUnits content)
+
+  w Chunks indexedText content = do
+    (contentUnits, indexedInstructions) <- runReaderT readerUpdate indexedText
+    return $ content {contentUnits, indexedInstructions}
+    where
+      stateUpdate = mapM updateContentUnit (contentUnits content)
+      readerUpdate = runStateT stateUpdate (indexedInstructions content)
+
+renderContentUnit :: MonadFail m => IdContentUnit -> Renderer m (Indexed Text)
+renderContentUnit (GraphicContext graphicContextUnits) =
+  mconcat <$> mapM renderGraphicContextUnit graphicContextUnits
+renderContentUnit (TextContext instructionIds) =
+  evalStateT (mconcat <$> mapM renderInstructionId instructionIds) emptyFont
+
+updateContentUnit :: MonadFail m => IdContentUnit -> Updater m IdContentUnit
+updateContentUnit (GraphicContext graphicContextUnits) = GraphicContext <$>
+  mapM updateGraphicContextUnit graphicContextUnits
+updateContentUnit (TextContext instructionIds) = TextContext . concat <$>
+  evalStateT (mapM updateInstructionId instructionIds) emptyFont
+
+renderGraphicContextUnit :: MonadFail m => IdGraphicContextUnit -> Renderer m (Indexed Text)
+renderGraphicContextUnit (GraphicInstruction _) = return empty
+renderGraphicContextUnit (ContentUnit contentUnit) =
+  renderContentUnit contentUnit
+
+updateGraphicContextUnit :: MonadFail m => IdGraphicContextUnit -> Updater m IdGraphicContextUnit
+updateGraphicContextUnit gI@(GraphicInstruction _) = return gI
+updateGraphicContextUnit (ContentUnit contentUnit) =
+  ContentUnit <$> updateContentUnit contentUnit
+
+renderInstructionId :: MonadFail m => Id Instruction -> TmpFont (Renderer m) (Indexed Text)
+renderInstructionId instructionId@(Id n) = toMap <$>
+  (asks ((`at` instructionId)) >>= renderInstruction)
+  where
+    toMap = maybe empty (singleton (Id n))
+
+updateInstructionId :: MonadFail m => Id Instruction -> TmpFont (Updater m) [Id Instruction]
+updateInstructionId instructionId =
+  lift (gets (`at` instructionId)) >>= updateInstruction instructionId
+
+renderInstruction :: MonadFail m => Instruction -> TmpFont (Renderer m) (Maybe Text)
+renderInstruction (Text Tf, [NameObject fontName, _]) =
+  lift (lift $ asks (! fontName)) >>= put >> return Nothing
+
+renderInstruction (Text Tstar, []) = return $ Just "\n"
+
+renderInstruction (Text TJ, [Array arrayObject]) =
+  Just <$> foldM appendText "" arrayObject
+  where
+    appendText t (StringObject outputString) =
+      mappend t <$> decodeString outputString
+    appendText t _ = return t
+
+renderInstruction (Text Tj, [StringObject outputString]) =
+  Just <$> decodeString outputString
+
+renderInstruction (Text Quote, [StringObject outputString]) =
+  (Just . mappend "\n") <$> decodeString outputString
+
+renderInstruction (Text DQuote, [_, _, StringObject outputString]) =
+  (Just . mappend "\n") <$> decodeString outputString
+
+renderInstruction _ = return Nothing
+
+updateInstruction :: MonadFail m => Id Instruction -> Instruction -> TmpFont (Updater m) [Id Instruction]
+updateInstruction instructionId (Text Tf, [NameObject fontName, _]) =
+  (lift . lift . lift $ asks (!fontName)) >>= put >> return [instructionId]
+
+updateInstruction instructionId@(Id n) instruction = do
+  if emitsText $ fst instruction
+  then asks (Id.lookup (Id n)) >>= replaceText
+  else return [instructionId]
+  where
+    emitsText (Text Tstar) = True
+    emitsText (Text TJ) = True
+    emitsText (Text Tj) = True
+    emitsText (Text Quote) = True
+    emitsText (Text DQuote) = True
+    emitsText _ = False
+    replaceText = maybe (return []) $ \text -> do
+      lift $ modify (Id.delete instructionId)
+      format text >>= mapM (lift . Id.register)
+
+format :: MonadFail m => Text -> StateT Font m [Instruction]
+format input = do
+  case breakOn "\n" input of
+    ("", "") -> return []
+    ("", left) -> ((Text Tstar, []) :) <$> format (Text.drop 1 left)
+    (line, left) -> (:) <$> tj line <*> format left
+  where
+    tj t = do
+      encoded <- either fail return =<< gets (($t) . encode)
+      return (Text Tj, [StringObject . Literal $ BS.concatMap escape encoded])
+    escape '\\' = "\\\\"
+    escape '(' = "\\("
+    escape ')' = "\\)"
+    escape c = BS.singleton c
--- a/src/PDF/EOL.hs
+++ b/src/PDF/EOL.hs
@ -6,14 +6,14 @@ module PDF.EOL (
  ) where

 import Control.Applicative ((<|>))
-import PDF.Parser (Parser, string)
+import PDF.Parser (MonadParser, string)

 data Style = CR | LF | CRLF deriving Show

 charset :: String
 charset = "\r\n"

-parser :: Parser s Style
+parser :: MonadParser m => m Style
 parser =
    (string "\r\n" >> return CRLF)
  <|> (string "\r" >> return CR)
--- a/src/PDF/Encoding.hs
+++ b/src/PDF/Encoding.hs
@ -0,0 +1,13 @@
+{-# LANGUAGE FlexibleContexts #-}
+module PDF.Encoding (
+    encoding
+  ) where
+
+import Control.Monad.Except (MonadError(..))
+import PDF.Encoding.MacRoman (macRomanEncoding)
+import PDF.Font (Font)
+import Prelude hiding (fail)
+
+encoding :: MonadError String m => String -> m Font
+encoding "MacRomanEncoding" = return macRomanEncoding
+encoding s = throwError $ "Unknown encoding " ++ s
--- a/src/PDF/Encoding/MacRoman.hs
+++ b/src/PDF/Encoding/MacRoman.hs
@ -0,0 +1,162 @@
+module PDF.Encoding.MacRoman (
+    macRomanEncoding
+  ) where
+
+import qualified Data.ByteString.Char8 as BS (pack, unpack)
+import Data.Foldable (foldl')
+import Data.Map (Map)
+import qualified Data.Map as Map (empty, insert, lookup)
+import qualified Data.Text as Text (pack, unpack)
+import PDF.Font (Font(..))
+
+type Mapper = Map Char Char
+macRomanEncoding :: Font
+macRomanEncoding = Font {
+      decode = Right . Text.pack . fmap decodeChar . BS.unpack
+    , encode = fmap BS.pack . mapM encodeChar . Text.unpack
+  }
+  where
+    decodeChar k = maybe k id $ Map.lookup k (fst mappers)
+    encodeChar k =
+      case Map.lookup k (snd mappers) of
+        Just v -> Right v
+        Nothing
+          | k < '\x80' -> Right k
+          | otherwise -> Left ("Character '" ++ k :"' unavailable in MacRoman")
+
+mappers :: (Mapper, Mapper)
+mappers = foldl' generateMapers (Map.empty, Map.empty) [
+      ('\x80', '\x00C4') -- LATIN CAPITAL LETTER A WITH DIAERESIS
+    , ('\x81', '\x00C5') -- LATIN CAPITAL LETTER A WITH RING ABOVE
+    , ('\x82', '\x00C7') -- LATIN CAPITAL LETTER C WITH CEDILLA
+    , ('\x83', '\x00C9') -- LATIN CAPITAL LETTER E WITH ACUTE
+    , ('\x84', '\x00D1') -- LATIN CAPITAL LETTER N WITH TILDE
+    , ('\x85', '\x00D6') -- LATIN CAPITAL LETTER O WITH DIAERESIS
+    , ('\x86', '\x00DC') -- LATIN CAPITAL LETTER U WITH DIAERESIS
+    , ('\x87', '\x00E1') -- LATIN SMALL LETTER A WITH ACUTE
+    , ('\x88', '\x00E0') -- LATIN SMALL LETTER A WITH GRAVE
+    , ('\x89', '\x00E2') -- LATIN SMALL LETTER A WITH CIRCUMFLEX
+    , ('\x8A', '\x00E4') -- LATIN SMALL LETTER A WITH DIAERESIS
+    , ('\x8B', '\x00E3') -- LATIN SMALL LETTER A WITH TILDE
+    , ('\x8C', '\x00E5') -- LATIN SMALL LETTER A WITH RING ABOVE
+    , ('\x8D', '\x00E7') -- LATIN SMALL LETTER C WITH CEDILLA
+    , ('\x8E', '\x00E9') -- LATIN SMALL LETTER E WITH ACUTE
+    , ('\x8F', '\x00E8') -- LATIN SMALL LETTER E WITH GRAVE
+    , ('\x90', '\x00EA') -- LATIN SMALL LETTER E WITH CIRCUMFLEX
+    , ('\x91', '\x00EB') -- LATIN SMALL LETTER E WITH DIAERESIS
+    , ('\x92', '\x00ED') -- LATIN SMALL LETTER I WITH ACUTE
+    , ('\x93', '\x00EC') -- LATIN SMALL LETTER I WITH GRAVE
+    , ('\x94', '\x00EE') -- LATIN SMALL LETTER I WITH CIRCUMFLEX
+    , ('\x95', '\x00EF') -- LATIN SMALL LETTER I WITH DIAERESIS
+    , ('\x96', '\x00F1') -- LATIN SMALL LETTER N WITH TILDE
+    , ('\x97', '\x00F3') -- LATIN SMALL LETTER O WITH ACUTE
+    , ('\x98', '\x00F2') -- LATIN SMALL LETTER O WITH GRAVE
+    , ('\x99', '\x00F4') -- LATIN SMALL LETTER O WITH CIRCUMFLEX
+    , ('\x9A', '\x00F6') -- LATIN SMALL LETTER O WITH DIAERESIS
+    , ('\x9B', '\x00F5') -- LATIN SMALL LETTER O WITH TILDE
+    , ('\x9C', '\x00FA') -- LATIN SMALL LETTER U WITH ACUTE
+    , ('\x9D', '\x00F9') -- LATIN SMALL LETTER U WITH GRAVE
+    , ('\x9E', '\x00FB') -- LATIN SMALL LETTER U WITH CIRCUMFLEX
+    , ('\x9F', '\x00FC') -- LATIN SMALL LETTER U WITH DIAERESIS
+    , ('\xA0', '\x2020') -- DAGGER
+    , ('\xA1', '\x00B0') -- DEGREE SIGN
+    , ('\xA2', '\x00A2') -- CENT SIGN
+    , ('\xA3', '\x00A3') -- POUND SIGN
+    , ('\xA4', '\x00A7') -- SECTION SIGN
+    , ('\xA5', '\x2022') -- BULLET
+    , ('\xA6', '\x00B6') -- PILCROW SIGN
+    , ('\xA7', '\x00DF') -- LATIN SMALL LETTER SHARP S
+    , ('\xA8', '\x00AE') -- REGISTERED SIGN
+    , ('\xA9', '\x00A9') -- COPYRIGHT SIGN
+    , ('\xAA', '\x2122') -- TRADE MARK SIGN
+    , ('\xAB', '\x00B4') -- ACUTE ACCENT
+    , ('\xAC', '\x00A8') -- DIAERESIS
+    , ('\xAD', '\x2260') -- NOT EQUAL TO
+    , ('\xAE', '\x00C6') -- LATIN CAPITAL LETTER AE
+    , ('\xAF', '\x00D8') -- LATIN CAPITAL LETTER O WITH STROKE
+    , ('\xB0', '\x221E') -- INFINITY
+    , ('\xB1', '\x00B1') -- PLUS-MINUS SIGN
+    , ('\xB2', '\x2264') -- LESS-THAN OR EQUAL TO
+    , ('\xB3', '\x2265') -- GREATER-THAN OR EQUAL TO
+    , ('\xB4', '\x00A5') -- YEN SIGN
+    , ('\xB5', '\x00B5') -- MICRO SIGN
+    , ('\xB6', '\x2202') -- PARTIAL DIFFERENTIAL
+    , ('\xB7', '\x2211') -- N-ARY SUMMATION
+    , ('\xB8', '\x220F') -- N-ARY PRODUCT
+    , ('\xB9', '\x03C0') -- GREEK SMALL LETTER PI
+    , ('\xBA', '\x222B') -- INTEGRAL
+    , ('\xBB', '\x00AA') -- FEMININE ORDINAL INDICATOR
+    , ('\xBC', '\x00BA') -- MASCULINE ORDINAL INDICATOR
+    , ('\xBD', '\x03A9') -- GREEK CAPITAL LETTER OMEGA
+    , ('\xBE', '\x00E6') -- LATIN SMALL LETTER AE
+    , ('\xBF', '\x00F8') -- LATIN SMALL LETTER O WITH STROKE
+    , ('\xC0', '\x00BF') -- INVERTED QUESTION MARK
+    , ('\xC1', '\x00A1') -- INVERTED EXCLAMATION MARK
+    , ('\xC2', '\x00AC') -- NOT SIGN
+    , ('\xC3', '\x221A') -- SQUARE ROOT
+    , ('\xC4', '\x0192') -- LATIN SMALL LETTER F WITH HOOK
+    , ('\xC5', '\x2248') -- ALMOST EQUAL TO
+    , ('\xC6', '\x2206') -- INCREMENT
+    , ('\xC7', '\x00AB') -- LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    , ('\xC8', '\x00BB') -- RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    , ('\xC9', '\x2026') -- HORIZONTAL ELLIPSIS
+    , ('\xCA', '\x00A0') -- NO-BREAK SPACE
+    , ('\xCB', '\x00C0') -- LATIN CAPITAL LETTER A WITH GRAVE
+    , ('\xCC', '\x00C3') -- LATIN CAPITAL LETTER A WITH TILDE
+    , ('\xCD', '\x00D5') -- LATIN CAPITAL LETTER O WITH TILDE
+    , ('\xCE', '\x0152') -- LATIN CAPITAL LIGATURE OE
+    , ('\xCF', '\x0153') -- LATIN SMALL LIGATURE OE
+    , ('\xD0', '\x2013') -- EN DASH
+    , ('\xD1', '\x2014') -- EM DASH
+    , ('\xD2', '\x201C') -- LEFT DOUBLE QUOTATION MARK
+    , ('\xD3', '\x201D') -- RIGHT DOUBLE QUOTATION MARK
+    , ('\xD4', '\x2018') -- LEFT SINGLE QUOTATION MARK
+    , ('\xD5', '\x2019') -- RIGHT SINGLE QUOTATION MARK
+    , ('\xD6', '\x00F7') -- DIVISION SIGN
+    , ('\xD7', '\x25CA') -- LOZENGE
+    , ('\xD8', '\x00FF') -- LATIN SMALL LETTER Y WITH DIAERESIS
+    , ('\xD9', '\x0178') -- LATIN CAPITAL LETTER Y WITH DIAERESIS
+    , ('\xDA', '\x2044') -- FRACTION SLASH
+    , ('\xDB', '\x20AC') -- EURO SIGN
+    , ('\xDC', '\x2039') -- SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+    , ('\xDD', '\x203A') -- SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+    , ('\xDE', '\xFB01') -- LATIN SMALL LIGATURE FI
+    , ('\xDF', '\xFB02') -- LATIN SMALL LIGATURE FL
+    , ('\xE0', '\x2021') -- DOUBLE DAGGER
+    , ('\xE1', '\x00B7') -- MIDDLE DOT
+    , ('\xE2', '\x201A') -- SINGLE LOW-9 QUOTATION MARK
+    , ('\xE3', '\x201E') -- DOUBLE LOW-9 QUOTATION MARK
+    , ('\xE4', '\x2030') -- PER MILLE SIGN
+    , ('\xE5', '\x00C2') -- LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    , ('\xE6', '\x00CA') -- LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+    , ('\xE7', '\x00C1') -- LATIN CAPITAL LETTER A WITH ACUTE
+    , ('\xE8', '\x00CB') -- LATIN CAPITAL LETTER E WITH DIAERESIS
+    , ('\xE9', '\x00C8') -- LATIN CAPITAL LETTER E WITH GRAVE
+    , ('\xEA', '\x00CD') -- LATIN CAPITAL LETTER I WITH ACUTE
+    , ('\xEB', '\x00CE') -- LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+    , ('\xEC', '\x00CF') -- LATIN CAPITAL LETTER I WITH DIAERESIS
+    , ('\xED', '\x00CC') -- LATIN CAPITAL LETTER I WITH GRAVE
+    , ('\xEE', '\x00D3') -- LATIN CAPITAL LETTER O WITH ACUTE
+    , ('\xEF', '\x00D4') -- LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    , ('\xF0', '\xF8FF') -- Apple logo
+    , ('\xF1', '\x00D2') -- LATIN CAPITAL LETTER O WITH GRAVE
+    , ('\xF2', '\x00DA') -- LATIN CAPITAL LETTER U WITH ACUTE
+    , ('\xF3', '\x00DB') -- LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+    , ('\xF4', '\x00D9') -- LATIN CAPITAL LETTER U WITH GRAVE
+    , ('\xF5', '\x0131') -- LATIN SMALL LETTER DOTLESS I
+    , ('\xF6', '\x02C6') -- MODIFIER LETTER CIRCUMFLEX ACCENT
+    , ('\xF7', '\x02DC') -- SMALL TILDE
+    , ('\xF8', '\x00AF') -- MACRON
+    , ('\xF9', '\x02D8') -- BREVE
+    , ('\xFA', '\x02D9') -- DOT ABOVE
+    , ('\xFB', '\x02DA') -- RING ABOVE
+    , ('\xFC', '\x00B8') -- CEDILLA
+    , ('\xFD', '\x02DD') -- DOUBLE ACUTE ACCENT
+    , ('\xFE', '\x02DB') -- OGONEK
+    , ('\xFF', '\x02C7') -- CARON
+  ]
+  where
+    generateMapers (tmpDecoder, tmpEncoder) (macChar, utf8Char) = (
+          Map.insert macChar utf8Char tmpDecoder
+        , Map.insert utf8Char macChar tmpEncoder
+      )
--- a/src/PDF/Font.hs
+++ b/src/PDF/Font.hs
@ -0,0 +1,27 @@
+module PDF.Font (
+      Decoder
+    , Encoder
+    , Font(..)
+    , FontSet
+    , emptyFont
+  ) where
+
+import Data.ByteString (ByteString)
+import Data.Map (Map)
+import Data.Text (Text)
+import PDF.Object (Name)
+
+type Decoder = ByteString -> Either String Text
+type Encoder = Text -> Either String ByteString
+data Font = Font {
+      decode :: Decoder
+    , encode :: Encoder
+  }
+
+type FontSet = Map Name Font
+
+emptyFont :: Font
+emptyFont = Font {
+      decode = \_ -> Left "No fond loaded"
+    , encode = \_ -> Left "No fond loaded"
+  }
--- a/src/PDF/Layer.hs
+++ b/src/PDF/Layer.hs
@ -0,0 +1,129 @@
+{-# LANGUAGE NamedFieldPuns #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE FlexibleContexts #-}
+module PDF.Layer (
+      Layer(..)
+    , LayerReader
+    , Objects(..)
+    , unify
+  ) where
+
+import Control.Monad.Except (MonadError(..))
+import Control.Monad.Reader (ReaderT)
+import Data.Foldable (foldl')
+import Data.Id (Id(..), Indexed, keysSet, mapWithKey, member)
+import qualified Data.Id as Id (empty, insert, lookup, union)
+import Data.Map (Map, (!))
+import qualified Data.Map as Map (empty, lookup, union)
+import qualified Data.IntSet as IntSet (delete, toList)
+import PDF.Box (Box(..))
+import PDF.Object (
+      IndirectObjCoordinates(..), Object, Occurrence(..), Structure(..)
+    , XRefEntry(..), XRefSection, eofMarker, outputBody
+  )
+import qualified PDF.Output as Output (line)
+import PDF.Output (
+      Offset(..), Output(..), Resource(..), byteString, getOffset
+    , getOffsets, newLine
+  )
+import Text.Printf (printf)
+
+data Layer = Layer {
+      occurrences :: [Occurrence]
+    , objects :: Indexed Object
+    , docStructure :: Structure
+  } deriving Show
+type LayerReader m = ReaderT Layer m
+
+updateXRefs :: XRefSection -> Map Resource Offset -> (XRefSection, Offset)
+updateXRefs xRef offsets = (mapWithKey updateEntry xRef, offsets ! StartXRef)
+  where
+    updateEntry objectId e@(InUse {offset}) =
+      case Map.lookup (ObjectId $ getId objectId) offsets of
+        Nothing -> Free {nextFree = Id $ getOffset offset, generation = 65535}
+        Just newOffset -> e {offset = newOffset}
+    updateEntry _ e = e
+
+instance Output Layer where
+  output (Layer {occurrences, objects, docStructure}) = do
+    (body, savedOffsets) <- getOffsets (outputBody (occurrences, objects))
+    let (newXRef, startXRef) = updateXRefs xRef savedOffsets
+    mconcat [
+          return body
+        , Output.line "xref"
+        , output newXRef
+        , Output.line "trailer"
+        , output trailer, newLine
+        , Output.line "startxref"
+        , Output.line (printf "%d" (getOffset startXRef))
+        , byteString eofMarker
+      ]
+    where
+      Structure {xRef, trailer} = docStructure
+
+data Objects = Objects
+
+instance Monad m => Box m Objects Layer (Indexed Object) where
+  r Objects = return . objects
+  w Objects newObjects layer@(Layer {occurrences, docStructure}) =
+    return $ layer {
+          occurrences = keptOccurrences ++ newOccurrences
+        , objects = newObjects
+        , docStructure = docStructure {
+              xRef = (const $ InUse (Offset 0) 0) <$> newObjects
+            }
+      }
+    where
+      filterOccurrences c@(Comment _) (occ, newObjIds) = (c:occ, newObjIds)
+      filterOccurrences i@(Indirect (IndirectObjCoordinates {objectId})) (occ, newObjIds)
+        | member objectId newObjects = (i:occ, IntSet.delete (getId objectId) newObjIds)
+        | otherwise = (occ, newObjIds)
+      (keptOccurrences, newObjectIds) =
+        foldr filterOccurrences ([], keysSet newObjects) occurrences
+      makeOccurrence objectId =
+        Indirect (IndirectObjCoordinates {objectId, versionNumber = 0})
+      newOccurrences = (makeOccurrence . Id) <$> IntSet.toList newObjectIds
+
+instance MonadError String m => Box m (Id Object) Layer Object where
+  r objectId =
+    maybe (throwError "Unknown key") return . Id.lookup objectId . objects
+  w objectId a layer@(Layer {objects})
+    | member objectId objects = return $
+      layer {objects = Id.insert objectId a objects}
+    | otherwise = throwError "Unknown key"
+
+emptyLayer :: Layer
+emptyLayer = Layer {
+      docStructure = Structure {xRef = Id.empty, trailer = Map.empty}
+    , objects = Id.empty
+    , occurrences = []
+  }
+
+unify :: [Layer] -> Layer
+unify = foldl' complete emptyLayer
+  where
+    complete tmpLayer older =
+      let mergedObjects = Id.union (objects tmpLayer) (objects older) in
+      Layer {
+          docStructure =
+            unifyDocStructure (docStructure tmpLayer) (docStructure older)
+        , objects = mergedObjects
+        , occurrences =
+            unifyOccurrences mergedObjects (occurrences tmpLayer) (occurrences older)
+      }
+
+unifyDocStructure :: Structure -> Structure -> Structure
+unifyDocStructure new old = Structure {
+      xRef = Id.union (xRef new) (xRef old)
+    , trailer = Map.union (trailer new) (trailer old)
+  }
+
+unifyOccurrences :: (Indexed Object) -> [Occurrence] -> [Occurrence] -> [Occurrence]
+unifyOccurrences objects new = foldr addOlder new
+  where
+    addOlder occurrence@(Comment _) existing = occurrence : existing
+    addOlder occurrence@(Indirect indirect) existing =
+      if objectId indirect `member` objects
+      then occurrence : existing
+      else existing
--- a/src/PDF/Object.hs
+++ b/src/PDF/Object.hs
@ -2,19 +2,20 @@
 {-# LANGUAGE NamedFieldPuns #-}
 {-# LANGUAGE FlexibleInstances #-}
 module PDF.Object (
-      Content(..)
+      Dictionary
    , DirectObject(..)
    , Flow(..)
-    , IndexedObjects
    , IndirectObjCoordinates(..)
    , InputStructure(..)
    , Name(..)
    , Number(..)
    , Object(..)
    , Occurrence(..)
+    , StringObject(..)
    , Structure(..)
    , XRefEntry(..)
    , XRefSection
+    , array
    , blank
    , dictionary
    , directObject
@ -22,34 +23,43 @@ module PDF.Object (
    , integer
    , line
    , magicNumber
+    , name
+    , number
+    , object
+    , outputBody
+    , regular
+    , stringObject
    , structure
+    , toByteString
  ) where

-import Control.Applicative ((<|>))
-import Data.ByteString.Char8 (ByteString)
-import qualified Data.ByteString.Char8 as BS (
-    concat, cons, pack, singleton, unpack
+import Control.Applicative ((<|>), many)
+import Control.Monad.Reader (asks)
+import Data.Attoparsec.ByteString.Char8 (choice, count, option, sepBy)
+import Data.ByteString (ByteString)
+import qualified Data.ByteString as BS (concat)
+import qualified Data.ByteString.Char8 as Char8 (
+    cons, length, pack, singleton, snoc, unpack
  )
-import Data.Map (Map, (!), mapWithKey)
-import qualified Data.Map as Map (
-    delete, empty, fromList, lookup, minViewWithKey, toList, union
+import Data.ByteString.Char8.Util (B16Int(..), b16ToBytes, unescape)
+import Data.Id (Id(..), IdMap, Indexed)
+import qualified Data.Id as Id (
+    at, delete, empty, fromList, lookup, minViewWithKey, union
  )
-import qualified PDF.EOL as EOL (charset, parser)
-import qualified PDF.Output as Output (concat, line, string)
+import Data.Map (Map)
+import qualified Data.Map as Map (fromList, toList)
+import qualified Data.Set as Set (fromList, member)
+import qualified PDF.EOL as EOL (Style(..), charset, parser)
+import qualified PDF.Output as Output (line, string)
 import PDF.Output (
-      OBuilder, ObjectId(..), Offset(..), Output(..), Resource(..)
-    , byteString, getObjectId, getOffset, getOffsets, join, newLine
-    , saveOffset
-  )
-import PDF.Parser (
-      Parser, (<?>)
-    , char, choice, count, decNumber, hexNumber, many, octDigit, oneOf, option
-    , sepBy, string, takeAll, takeAll1
+      OBuilder, OContext(..), Offset(..), Output(..), Resource(..), byteString
+    , getOffset, join, newLine, saveOffset
  )
+import PDF.Parser (MonadParser(..), Parser, (<?>), octDigit, oneOf)
 import Text.Printf (printf)

-line :: String -> Parser u ()
-line l = (string (BS.pack l) *> blank *> return ()) <?> printf "line «%s»" l
+line :: MonadParser m => String -> m ()
+line l = (string (Char8.pack l) *> blank *> return ()) <?> printf "line «%s»" l

 magicNumber :: ByteString
 magicNumber = "%PDF-"
@ -60,8 +70,8 @@ eofMarker = "%%EOF"
 whiteSpaceCharset :: String
 whiteSpaceCharset = "\0\t\12 "

-blank :: Parser u ()
-blank = takeAll (`elem` (EOL.charset ++ whiteSpaceCharset)) *> return ()
+blank :: MonadParser m => m ()
+blank = takeAll (`elem` (EOL.charset ++ whiteSpaceCharset)) *> pure ()

 delimiterCharset :: String
 delimiterCharset = "()<>[]{}/%"
@ -69,26 +79,24 @@ delimiterCharset = "()<>[]{}/%"
 regular :: Char -> Bool
 regular = not . (`elem` (EOL.charset ++ whiteSpaceCharset ++ delimiterCharset))

-integer :: (Read a, Num a) => Parser u a
-integer = read . BS.unpack <$> decNumber <* blank <?> "decimal integer"
+integer :: MonadParser m => m Int
+integer = decNumber <* blank <?> "decimal integer"

 -------------------------------------
 --          OBJECTS
 -------------------------------------

-type IndexedObjects = Map ObjectId Object
-
 --
 -- Boolean
 --
-boolean :: Parser u Bool
+boolean :: MonadParser m => m Bool
 boolean =
  (string "true" *> return True) <|> (string "false" *> return False) <?> "boolean"

 --
 -- Number
 --
-newtype Number = Number Double deriving Show
+newtype Number = Number Double deriving (Eq, Show)

 instance Output Number where
  output (Number f) = Output.string $
@ -96,38 +104,44 @@ instance Output Number where
      (n, 0) -> printf "%d" (n :: Int)
      _ -> printf "%f" f

-number :: Parser u Number
-number = Number . read . BS.unpack <$>
-  (mappend <$> sign <*> (integerPart <|> BS.cons '0' <$> floatPart))
-  <?> "number"
+number :: MonadParser m => m Number
+number = Number <$> (sign <*> value) <?> "number"
  where
-    sign = string "-" <|> option "" (char '+' >> return "")
-    integerPart = mappend <$> decNumber <*> option "" floatPart
-    floatPart = BS.cons <$> char '.' <*> (option "0" $ decNumber)
+    sign = (string "-" *> return negate) <|> option id (char '+' >> return id)
+    value = floatNumber <|> (char '.' *> afterPoint)
+    afterPoint = read . ("0." ++) . Char8.unpack <$> takeAll (`Set.member` digits)
+    digits = Set.fromList ['0' .. '9']

 --
 -- StringObject
 --
-data StringObject = Literal String | Hexadecimal String deriving Show
+data StringObject = Literal ByteString | Hexadecimal B16Int deriving (Eq, Show)

 instance Output StringObject where
-  output (Literal s) = Output.string (printf "(%s)" s)
-  output (Hexadecimal s) = Output.string (printf "<%s>" s)
+  output (Literal s) = Output.string (printf "(%s)" (Char8.unpack s))
+  output (Hexadecimal (B16Int n)) = Output.string (printf "<%s>" (Char8.unpack n))

-stringObject :: Parser u StringObject
+stringObject :: MonadParser m => m StringObject
 stringObject =
-    Literal . BS.unpack <$> (char '(' *> (BS.concat <$> literalString) <* char ')')
-  <|> Hexadecimal . BS.unpack <$> (char '<' *> hexNumber <* char '>')
+    Literal <$> (char '(' *> (BS.concat <$> literalString) <* char ')')
+  <|> Hexadecimal . roundBytes <$> (char '<' *> hexNumber <* char '>')
  <?> "string object (literal or hexadecimal)"
  where
    literalString = many literalStringBlock
    literalStringBlock = takeAll1 normalChar <|> matchingParenthesis <|> escapedChar
    normalChar = not . (`elem` ("\\()" :: String))
    matchingParenthesis =
-      mappend <$> (BS.cons <$> char '(' <*> literalStringBlock) <*> string ")"
+      mappend <$> (Char8.cons <$> char '(' <*> literalStringBlock) <*> string ")"
    escapedChar =
-      BS.cons <$> char '\\' <*> (BS.singleton <$> oneOf "nrtbf()\\" <|> octalCode)
-    octalCode = choice $ (\n -> BS.pack <$> count n octDigit) <$> [1..3]
+      Char8.cons <$> char '\\' <*> (Char8.singleton <$> oneOf "nrtbf()\\\n" <|> octalCode)
+    octalCode = choice $ (\n -> Char8.pack <$> count n octDigit) <$> [1..3]
+    roundBytes (B16Int bs)
+      | Char8.length bs `mod` 2 == 1 = B16Int (bs `Char8.snoc` '0')
+      | otherwise = B16Int bs
+
+toByteString :: StringObject -> ByteString
+toByteString (Hexadecimal h) = b16ToBytes h
+toByteString (Literal s) = unescape s

 --
 -- Name
@ -137,13 +151,13 @@ newtype Name = Name String deriving (Eq, Ord, Show)
 instance Output Name where
  output (Name n) = Output.string ('/':n)

-name :: Parser u Name
-name = Name . BS.unpack <$> (char '/' *> takeAll regular) <?> "name"
+name :: MonadParser m => m Name
+name = Name . Char8.unpack <$> (char '/' *> takeAll regular) <?> "name"

 --
 -- Array
 --
-array :: Parser u [DirectObject]
+array :: MonadParser m => m [DirectObject]
 array =
  char '[' *> blank *> directObject `sepBy` blank <* blank <* char ']' <?> "array"

@ -153,14 +167,14 @@ array =
 type Dictionary = Map Name DirectObject

 instance Output Dictionary where
-  output dict =
+  output aDictionary =
    "<<" `mappend` keyValues `mappend` ">>"
    where
-      keyValues = join " " $ outputKeyVal <$> Map.toList dict
+      keyValues = join " " $ outputKeyVal <$> Map.toList aDictionary
      outputKeyVal :: (Name, DirectObject) -> OBuilder
-      outputKeyVal (key, val) = Output.concat [output key, " ", output val]
+      outputKeyVal (key, val) = mconcat [output key, " ", output val]

-dictionary :: Parser u Dictionary
+dictionary :: MonadParser m => m Dictionary
 dictionary =
  string "<<" *> blank *> keyValPairs <* string ">>" <?> "dictionary"
  where
@ -170,33 +184,33 @@ dictionary =
 --
 -- Null
 --
-nullObject :: Parser u ()
+nullObject :: MonadParser m => m ()
 nullObject = string "null" *> return () <?> "null object"

 --
 -- Reference
 --
 data IndirectObjCoordinates = IndirectObjCoordinates {
-      objectId :: ObjectId
-    , versionNumber :: Int
+      objectId :: {-# UNPACK #-} !(Id Object)
+    , versionNumber :: {-# UNPACK #-} !Int
  } deriving Show

-reference :: Parser u IndirectObjCoordinates
+reference :: MonadParser m => m IndirectObjCoordinates
 reference = IndirectObjCoordinates
-  <$> (fmap ObjectId integer) <*> integer <* char 'R' <?> "reference to an object"
+  <$> (fmap Id integer) <*> integer <* char 'R' <?> "reference to an object"

 --
 -- DirectObject
 --
 data DirectObject =
-    Boolean Bool
-  | NumberObject Number
-  | StringObject StringObject
-  | NameObject Name
-  | Array [DirectObject]
-  | Dictionary Dictionary
+    Boolean !Bool
+  | NumberObject !Number
+  | StringObject !StringObject
+  | NameObject !Name
+  | Array ![DirectObject]
+  | Dictionary !Dictionary
  | Null
-  | Reference IndirectObjCoordinates
+  | Reference !IndirectObjCoordinates
  deriving Show

 instance Output DirectObject where
@ -204,23 +218,25 @@ instance Output DirectObject where
  output (NumberObject n) = output n
  output (StringObject s) = output s
  output (NameObject n) = output n
-  output (Array a) = Output.concat ["[", join " " a, "]"]
+  output (Array a) = mconcat ["[", join " " a, "]"]
  output (Dictionary d) = output d
  output (Null) = "null"
  output (Reference (IndirectObjCoordinates {objectId, versionNumber})) =
-    Output.string (printf "%d %d R" (getObjectId objectId) versionNumber)
+    Output.string (printf "%d %d R" (getId objectId) versionNumber)

-directObject :: Parser u DirectObject
-directObject =
-    Boolean <$> boolean
-  <|> Reference <$> reference {- defined before Number because Number is a prefix of it -}
+directObject :: MonadParser m => m DirectObject
+directObject = (peek >>= dispatch) <?> "direct object"
+  where
+    dispatch 't' = Boolean <$> boolean
+    dispatch 'f' = Boolean <$> boolean
+    dispatch '(' = StringObject <$> stringObject
+    dispatch '<' = StringObject <$> stringObject <|> Dictionary <$> dictionary
+    dispatch '/' = NameObject <$> name
+    dispatch '[' = Array <$> array
+    dispatch 'n' = nullObject *> return Null
+    dispatch _ =
+        Reference <$> reference {- defined before Number because Number is a prefix of it -}
      <|> NumberObject <$> number
-  <|> StringObject <$> stringObject
-  <|> NameObject <$> name
-  <|> Array <$> array
-  <|> Dictionary <$> dictionary
-  <|> const Null <$> nullObject
-  <?> "direct object"

 --
 -- Object
@ -235,27 +251,34 @@ data Object =

 instance Output Object where
  output (Direct d) = output d
-  output (Stream {header, streamContent}) = Output.concat [
+  output (Stream {header, streamContent}) = mconcat [
        output header, newLine
      , Output.line "stream"
      , byteString streamContent
      , "endstream"
    ]

+object :: Int -> Id Object
+object = Id
+
 --
 -- Occurrence
 --
 data Occurrence = Comment String | Indirect IndirectObjCoordinates deriving Show

-outputOccurrence :: IndexedObjects -> Occurrence -> OBuilder
+outputOccurrence :: (Indexed Object) -> Occurrence -> OBuilder
 outputOccurrence _ (Comment c) = Output.line c
 outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
-  saveOffset (Object objectId) >> Output.concat [
-        Output.line (printf "%d %d obj" (getObjectId objectId) versionNumber)
-      , output (objects ! objectId), newLine
+  saveOffset (ObjectId $ getId objectId) >> mconcat [
+        Output.line (printf "%d %d obj" (getId objectId) versionNumber)
+      , output (objects `Id.at` objectId), newLine
      , Output.line "endobj"
    ]

+outputBody :: ([Occurrence], (Indexed Object)) -> OBuilder
+outputBody (occurrences, objects) =
+  output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
+
 -------------------------------------
 --          XREF TABLE
 -------------------------------------
@ -267,15 +290,20 @@ data XRefEntry = InUse {
      offset :: Offset
    , generation :: Int
  } | Free {
-      nextFree :: ObjectId
+      nextFree :: (Id Object)
    , generation :: Int
  } deriving Show

 instance Output XRefEntry where
-  output (InUse {offset, generation}) =
-      Output.line (printf "%010d %05d n " (getOffset offset) generation)
-  output (Free {nextFree, generation}) =
-      Output.line (printf "%010d %05d f " (getObjectId nextFree) generation)
+  output xRefEntry = Output.string (build xRefEntry) `mappend` endXRefEntryLine
+    where
+      build (InUse {offset, generation}) =
+        printf "%010d %05d n" (getOffset offset) generation
+      build (Free {nextFree, generation}) =
+        printf "%010d %05d f" (getId nextFree) generation
+      endXRefEntryLine = OContext (asks padEOLToTwoBytes) >>= Output.line
+      padEOLToTwoBytes EOL.CRLF = ("" :: String)
+      padEOLToTwoBytes _ = " "

 entry :: Parser u XRefEntry
 entry = do
@ -287,52 +315,51 @@ entry = do
      char 'n' *> return (InUse {offset = Offset big, generation})
    free :: Int -> Int -> Parser u XRefEntry
    free big generation =
-      char 'f' *> return (Free {nextFree = ObjectId big, generation})
+      char 'f' *> return (Free {nextFree = Id big, generation})

 --
 -- XRefSubSection
 --
 data XRefSubSection = XRefSubSection {
-      firstObjectId :: ObjectId
+      firstObjectId :: (Id Object)
    , entries :: [XRefEntry]
  } deriving Show

 instance Output XRefSubSection where
  output (XRefSubSection {firstObjectId, entries}) =
-      Output.line (printf "%d %d" (getObjectId firstObjectId) (length entries))
+      Output.line (printf "%d %d" (getId firstObjectId) (length entries))
    `mappend` output entries

 xRefSubSection :: Parser u XRefSubSection
 xRefSubSection = do
  (firstId, entriesNumber) <- (,) <$> integer <*> integer <?> "XRef subsection"
  entries <- count entriesNumber entry
-  return $ XRefSubSection {firstObjectId = ObjectId firstId, entries}
+  return $ XRefSubSection {firstObjectId = Id firstId, entries}

-type XRefSection = Map ObjectId XRefEntry
+type XRefSection = IdMap Object XRefEntry

 instance Output XRefSection where
  output = output . sections
    where
      sections tmp =
-        case Map.minViewWithKey tmp of
+        case Id.minViewWithKey tmp of
          Nothing -> []
-          Just ((objectId@(ObjectId value), firstEntry), rest) ->
+          Just ((objectId@(Id value), firstEntry), rest) ->
            let (subSection, sndRest) = section objectId [firstEntry] (value + 1) rest in
            subSection : sections sndRest
      section firstObjectId stack nextValue tmp =
-        let nextId = ObjectId nextValue in
-        case Map.lookup nextId tmp of
+        let nextId = (Id nextValue :: Id Object) in
+        case Id.lookup nextId tmp of
          Nothing -> (XRefSubSection {firstObjectId, entries = reverse stack}, tmp)
          Just nextEntry ->
-            section firstObjectId (nextEntry:stack) (nextValue + 1) (Map.delete nextId tmp)
-
+            section firstObjectId (nextEntry:stack) (nextValue + 1) (Id.delete nextId tmp)

 xRefSection :: Parser u XRefSection
-xRefSection = foldr addSubsection Map.empty <$>
+xRefSection = foldr addSubsection Id.empty <$>
  (line "xref" *> xRefSubSection `sepBy` many EOL.parser)
  where
    addSubsection (XRefSubSection {firstObjectId, entries}) =
-      Map.union . Map.fromList $ zip ([firstObjectId..]) entries
+      Id.union . Id.fromList $ zip ([firstObjectId..]) entries

 --
 -- Structure
@ -353,45 +380,10 @@ structure =
  <$> xRefSection
  <*> (string "trailer" *> blank *> dictionary <* EOL.parser)

-updateXrefs :: XRefSection -> Map Resource Offset -> (XRefSection, Offset)
-updateXrefs xRef offsets = (mapWithKey updateEntry xRef, offsets ! StartXRef)
-  where
-    updateEntry objectId e@(InUse {}) = e {offset = offsets ! (Object objectId)}
-    updateEntry _ e = e
-
 --
 -- Flow
 --
 data Flow = Flow {
      occurrencesStack :: [Occurrence]
-    , tmpObjects :: IndexedObjects
+    , tmpObjects :: (Indexed Object)
  } deriving Show
-
--
-- Content
--
-data Content = Content {
-      occurrences :: [Occurrence]
-    , objects :: IndexedObjects
-    , docStructure :: Structure
-  } deriving Show
-
-outputBody :: ([Occurrence], IndexedObjects) -> OBuilder
-outputBody (occurrences, objects) =
-  output (outputOccurrence objects <$> occurrences) <* saveOffset StartXRef
-
-instance Output Content where
-  output (Content {occurrences, objects, docStructure}) =
-    fmap (updateXrefs xRef) <$> getOffsets (outputBody (occurrences, objects))
-    >>= \(body, (xref, startXRef)) -> Output.concat [
-          body
-        , Output.line "xref"
-        , output xref
-        , Output.line "trailer"
-        , output trailer, newLine
-        , Output.line "startxref"
-        , Output.line (printf "%d" (getOffset startXRef))
-        , byteString eofMarker
-      ]
-    where
-      Structure {xRef, trailer} = docStructure
--- a/src/PDF/Object/Navigation.hs
+++ b/src/PDF/Object/Navigation.hs
@ -0,0 +1,180 @@
+{-# LANGUAGE NamedFieldPuns #-}
+{-# LANGUAGE ConstraintKinds #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE DeriveFunctor #-}
+module PDF.Object.Navigation (
+      Nav(..)
+    , PPath(..)
+    , ROLayer
+    , RWLayer
+    , StreamContent(..)
+    , (./)
+    , (//)
+    , (>./)
+    , (>//)
+    , castObject
+    , catalog
+    , getDictionary
+    , getKey
+    , objectById
+    , save
+  ) where
+
+import Codec.Compression.Zlib (compress, decompress)
+import Control.Monad.Except (MonadError(..))
+import Control.Monad.Reader (MonadReader(..))
+import Control.Monad.State (MonadState)
+import Data.ByteString (ByteString)
+import qualified Data.ByteString as BS (length)
+import qualified Data.ByteString.Lazy as Lazy (ByteString, fromStrict, toStrict)
+import Data.Id (Id)
+import qualified Data.Id as Id (at)
+import qualified Data.Map as Map (adjust, insert, lookup)
+import PDF.Box (Box(..), at, edit{-, runRO-})
+import PDF.Layer (Layer(..))
+import PDF.Object (
+      Dictionary, DirectObject(..), IndirectObjCoordinates(..)
+    , Name(..), Number(..), Object(..), Structure(..)
+  )
+import Prelude hiding (fail)
+import Text.Printf (printf)
+
+newtype PPath = PPath [Component]
+data DPath = DPath {
+      root :: Id Object
+    , offset :: [Component]
+  } deriving Show
+
+push :: Component -> DPath -> DPath
+push component dPath = dPath {offset = (offset dPath) ++ [component]}
+
+data Nav a = Nav {
+      dPath :: DPath
+    , value :: a
+  } deriving (Functor)
+
+instance Show a => Show (Nav a) where
+  show (Nav {dPath, value}) = "Nav {dPath = " ++ show dPath ++ ", value = " ++ show value ++ "}"
+
+type ROLayer m = (MonadReader Layer m, MonadError String m)
+type RWLayer m = (MonadState Layer m, MonadError String m)
+type Component = String
+
+getDictionary :: ROLayer m => Nav Object -> m (Nav Dictionary)
+getDictionary (Nav {dPath, value}) =
+  case value of
+    (Direct (Dictionary aDict)) -> return $ Nav {dPath, value = aDict}
+    (Direct (Reference ref)) -> objectById (objectId ref) >>= getDictionary
+    (Stream {header}) -> return $ Nav {dPath, value = header}
+    obj -> expected "dictionary : " obj
+
+expected :: (MonadError String m, Show a) => String -> a -> m b
+expected name = throwError . printf "Not a %s: %s" name . show
+
+getKey :: ROLayer m => String -> Nav Object -> m (Nav DirectObject)
+getKey key navObject = getDictionary navObject >>= f
+  where
+    errorMessage =
+      printf "Key %s not found in object %s" key (show navObject)
+    f (Nav {dPath, value}) =
+      case Map.lookup (Name key) value of
+        Nothing -> throwError errorMessage
+        Just dObj -> return $ Nav {dPath = push key dPath, value = dObj}
+
+objectById :: ROLayer m => (Id Object) -> m (Nav Object)
+objectById objectId = do
+  layer <- ask
+  return $ Nav {
+        dPath = DPath {root = objectId, offset = []}
+      , value = objects layer `Id.at` objectId
+    }
+
+castObject :: ROLayer m => Nav DirectObject -> m (Nav Object)
+castObject (Nav {value = !(Reference (IndirectObjCoordinates {objectId}))}) =
+  objectById objectId
+castObject (Nav {dPath, value}) = return $ Nav {dPath, value = Direct value}
+
+(./) :: ROLayer m => m (Nav Object) -> Component -> m (Nav Object)
+(./) navObject key = (navObject >>= getKey key >>= castObject)
+
+(//) :: ROLayer m => m (Nav Object) -> PPath -> m (Nav Object)
+(//) navObject (PPath []) = navObject
+(//) navObject (PPath (key:keys)) = navObject ./ key // (PPath keys)
+
+(>./) :: ROLayer m => Nav Object -> Component -> m (Nav Object)
+(>./) navObject = (return navObject ./)
+
+(>//) :: ROLayer m => Nav Object -> PPath -> m (Nav Object)
+(>//) navObject = (return navObject //)
+
+catalog :: ROLayer m => m (Nav Object)
+catalog = do
+  value <- Direct . Dictionary . trailer . docStructure <$> ask
+  return $ Nav {dPath = undefined, value}
+
+setAt :: [Component] -> DirectObject -> Dictionary -> Dictionary
+setAt [] _ dict = dict
+setAt [component] directObject dict =
+  Map.insert (Name component) directObject dict
+setAt (component:components) directObject dict =
+  Map.adjust setDirObj (Name component) dict
+  where
+    setDirObj (Dictionary subDict) =
+      Dictionary $ setAt components directObject subDict
+    setDirObj x = x
+
+save :: RWLayer m => Nav Object -> m ()
+save nav@(Nav {dPath, value = Direct dObj}) =
+  edit .at (root dPath) $ return . setObj
+  where
+    setObj obj@(Stream {header}) =
+      obj {header = setAt (offset dPath) dObj header}
+    setObj (Direct (Dictionary dict)) =
+      Direct . Dictionary $ setAt (offset dPath) dObj dict
+    setObj _ = value nav
+save (Nav {dPath = DPath {root, offset = []}, value}) = edit $ w root value
+save _ = throwError "Streams can't be properties of PDF objects"
+
+data StreamContent = Clear | Raw
+
+onLazy :: (Lazy.ByteString -> Lazy.ByteString) -> ByteString -> ByteString
+onLazy f = Lazy.toStrict . f . Lazy.fromStrict
+
+contains :: String -> DirectObject -> Bool
+contains needle !(NameObject (Name n)) = needle == n
+contains needle !(Array directObjects) = oneOf directObjects (contains needle)
+  where
+    oneOf [] _ = False
+    oneOf (x:xs) p = p x || oneOf xs p
+contains _ _ = False
+
+instance MonadError String m => Box m StreamContent (Nav Object) ByteString where
+  r sc = r sc . value
+  w sc newStreamContent nav = setValue <$> w sc newStreamContent (value nav)
+    where
+      setValue value = nav {value}
+
+instance MonadError String m => Box m StreamContent Object ByteString where
+  r Raw (Stream {streamContent}) = return streamContent
+  r Clear (Stream {header, streamContent}) = return $
+    case Map.lookup (Name "Filter") header of
+      Just directObject
+        | contains "FlateDecode" directObject -> onLazy decompress streamContent
+      _ -> streamContent
+  r _ obj = expected "stream" obj
+
+  w Raw streamContent obj@(Stream {}) = return $ obj {streamContent}
+  w Clear newStreamContent (Stream {header}) =
+    let streamContent = getStreamContent (Map.lookup (Name "Filter") header) in
+    return $ Stream {header = fixLength streamContent, streamContent}
+    where
+      getStreamContent (Just directObject)
+        | contains "FlateDecode" directObject = onLazy compress newStreamContent
+      getStreamContent _ = newStreamContent
+      fixLength sc =
+        let newLength = NumberObject . Number . fromIntegral $ BS.length sc in
+        Map.insert (Name "Length") newLength header
+  w _ _ obj = expected "stream" obj
--- a/src/PDF/Output.hs
+++ b/src/PDF/Output.hs
@ -5,14 +5,12 @@
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 module PDF.Output (
      OBuilder
-    , ObjectId(..)
    , OContext(..)
    , Offset(..)
    , Output(..)
    , Resource(..)
    , byteString
    , char
-    , concat
    , getOffsets
    , join
    , line
@ -32,12 +30,10 @@ import qualified Data.Map as Map (singleton)
 import Data.String (IsString(..))
 import Control.Monad.RWS (RWS, runRWS, ask, get, listen, modify, tell)
 import qualified PDF.EOL as EOL (Style(..))
-import Prelude hiding (concat)

-newtype ObjectId = ObjectId {getObjectId :: Int} deriving (Enum, Eq, Ord, Show)
 newtype Offset = Offset {getOffset :: Int} deriving (Show)

-data Resource = StartXRef | Object ObjectId deriving (Eq, Ord)
+data Resource = StartXRef | ObjectId Int deriving (Eq, Ord)

 newtype OContext a = OContext (RWS EOL.Style (Map Resource Offset) Offset a)
 type OBuilder = OContext Builder
@ -59,16 +55,12 @@ saveOffset resource = OContext $
 lift :: (a -> Builder) -> a -> OBuilder
 lift f a = return (f a)

-getOffsets :: OBuilder -> OContext (OBuilder, Map Resource Offset)
-getOffsets (OContext builder) =
-  OContext (listen builder >>= \(a, w) -> return (return a, w))
+getOffsets :: OBuilder -> OContext (Builder, Map Resource Offset)
+getOffsets (OContext builder) = OContext $ listen builder

 append :: OBuilder -> OBuilder -> OBuilder
 append (OContext a) (OContext b) = OContext (mappend <$> a <*> b)

-concat :: [OBuilder] -> OBuilder
-concat = foldl mappend mempty
-
 #if MIN_VERSION_base(4,11,0)
 instance Semigroup OBuilder where
  (<>) = append
@ -94,7 +86,7 @@ instance Output Bool where
  output True = string "true"

 instance Output a => Output [a] where
-  output = concat . fmap output
+  output = mconcat . fmap output

 join :: Output a => String -> [a] -> OBuilder
 join _ [] = mempty
@ -116,7 +108,7 @@ char :: Char -> OBuilder
 char c = lift char8 c <* offset (+1)

 string :: String -> OBuilder
-string s = lift string8 s <* offset (+ toEnum (length s))
+string s = lift string8 s <* offset (+ length s)

 line :: String -> OBuilder
 line l = string l `mappend` newLine
--- a/src/PDF/Pages.hs
+++ b/src/PDF/Pages.hs
@ -0,0 +1,181 @@
+{-# LANGUAGE NamedFieldPuns #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE ConstraintKinds #-}
+{-# LANGUAGE FlexibleContexts #-}
+module PDF.Pages (
+      Contents(..)
+    , FontCache
+    , Page(..)
+    , PageNumber(..)
+    , Pages(..)
+    , withFonts
+    , withResources
+  ) where
+
+import Control.Applicative (Alternative)
+import Control.Applicative ((<|>))
+import Control.Monad (foldM)
+import Control.Monad.Except (MonadError(..))
+import Control.Monad.Reader (ReaderT, runReaderT)
+import Control.Monad.State (StateT(..), evalStateT, execStateT, gets, modify)
+import Control.Monad.Trans (lift)
+import Data.ByteString (ByteString)
+import Data.ByteString.Lazy (toStrict)
+import Data.Id (Id(..), IdMap)
+import qualified Data.Id as Id (empty, insert, lookup)
+import Data.Map (Map)
+import qualified Data.Map as Map (empty, elems, fromList, insert, toList)
+import Data.Maybe (listToMaybe)
+import Data.OrderedMap (OrderedMap, build, mapi)
+import PDF.Box (Box(..), at, edit, runRO)
+import qualified PDF.CMap as CMap (parse)
+import PDF.Content (Content(..))
+import qualified PDF.Content as Content (parse)
+import PDF.Encoding (encoding)
+import PDF.EOL (Style(..))
+import PDF.Font (Font, FontSet)
+import PDF.Layer (Layer(..), LayerReader)
+import PDF.Object (
+      Dictionary, DirectObject(..), IndirectObjCoordinates(..)
+    , Name(..), Number(..), Object(..)
+  ,)
+import PDF.Object.Navigation (
+      Nav(..), PPath(..), ROLayer, RWLayer, StreamContent(..), (//), (>./)
+    , (>//), catalog, getDictionary, getKey, objectById, save
+  )
+import PDF.Output (render)
+import Text.Printf (printf)
+
+type CachedFonts = IdMap Object Font
+type FontCache m = StateT CachedFonts m
+data Page = Page {
+      byteContents :: OrderedMap (Id Object) ByteString
+    , resources :: Dictionary
+    , source :: (Id Object)
+  }
+
+loadByteContents :: ROLayer m => DirectObject -> m (OrderedMap (Id Object) ByteString)
+loadByteContents directObject = do
+  objs <- sequence . build objectById $ objectIds directObject
+  mapM (r Clear) objs
+  where
+    objectIds (Array l) = l >>= getReference
+    objectIds dirObj = getReference dirObj
+
+getFontDictionary :: (Alternative m, ROLayer m) => Nav Object -> m Dictionary
+getFontDictionary pageObj =
+    (pageObj >// PPath ["Resources", "Font"] >>= fmap value . getDictionary)
+  <|> return Map.empty
+
+cache :: ROLayer m => (Id Object -> FontCache m Font) -> Id Object -> FontCache m Font
+cache loader objectId =
+  gets (Id.lookup objectId) >>= maybe load return
+  where
+    load = do
+      value <- loader objectId
+      modify $ Id.insert objectId value
+      return value
+
+loadFont :: (Alternative m, ROLayer m) => Id Object -> FontCache m Font
+loadFont objectId = lift $ objectById objectId >>= tryMappings
+  where
+    tryMappings object =
+        (object >./ "ToUnicode" >>= r Clear >>= CMap.parse)
+      <|> (object >./ "Encoding" >>= loadEncoding . value)
+      <|> (throwError $ unknownFormat (show objectId) (show object))
+    unknownFormat = printf "Unknown font format for object #%s : %s"
+    loadEncoding :: MonadError String m => Object -> m Font
+    loadEncoding (Direct (NameObject (Name name))) = encoding name
+    loadEncoding object =
+      throwError $ printf "Encoding must be a name, not that : %s" $ show object
+
+loadResources :: (Alternative m, ROLayer m) => Dictionary -> FontCache m FontSet
+loadResources = foldM addFont Map.empty . Map.toList
+  where
+    addFont output (name, Reference (IndirectObjCoordinates {objectId})) =
+      flip (Map.insert name) output <$> cache loadFont objectId
+    addFont output _ = return output
+
+getReference :: DirectObject -> [Id Object]
+getReference (Reference (IndirectObjCoordinates {objectId})) = [objectId]
+getReference _ = []
+
+loadPage :: (Alternative m, ROLayer m) => Id Object -> m Page
+loadPage source = do
+  page <- objectById source
+  byteContents <- loadByteContents . value =<< getKey "Contents" page
+  resources <- getFontDictionary page
+  return $ Page {byteContents, resources, source}
+
+pagesList :: (Alternative m, ROLayer m) => m [Id Object]
+pagesList =
+    (catalog // PPath ["Root", "Pages", "Kids"] >>= getReferences . value)
+  <|> return []
+  where
+    getReferences (Direct (Array kids)) = return $ getReference =<< kids
+    getReferences _ = throwError "Not a pages array"
+
+editPagesList :: RWLayer m => ([DirectObject] -> [DirectObject]) -> m ()
+editPagesList f = do
+  pages <- runRO (catalog // PPath ["Root", "Pages"])
+  kids <- runRO (pages >./ "Kids")
+  count <- runRO (pages >./ "Count")
+  (newSize, newKids) <- editKids (value kids)
+  save $ kids {value = newKids}
+  save $ count {value = Direct $ NumberObject newSize}
+  where
+    editKids (Direct (Array pageRefs)) =
+      let result = f pageRefs in
+      return (Number . fromIntegral $ length result, Direct $ Array result)
+    editKids _ = throwError "Invalid format for Root.Pages.Kids (not an array)"
+
+updatePage :: RWLayer m => Page -> m ()
+updatePage (Page {byteContents}) = sequence_ $ mapi updateByteContent byteContents
+  where
+    updateByteContent source byteContent =
+      edit .at source .at Clear $ \_ -> return byteContent
+
+data Pages = Pages
+newtype PageNumber = P Int
+data Contents = Contents
+
+instance (Alternative m, MonadError String m) => Box m Pages Layer (Map Int Page) where
+  r Pages = runReaderT (numbered <$> pagesList >>= mapM loadPage)
+    where
+      numbered :: [Id Object] -> Map Int (Id Object)
+      numbered = Map.fromList . zip [1..]
+
+  w Pages pages = execStateT $ do
+    mapM_ updatePage pages
+    setPagesList $ Map.elems (source <$> pages)
+    where
+      setPagesList =
+        editPagesList . const . fmap (Reference . flip IndirectObjCoordinates 0)
+
+instance (Alternative m, MonadError String m) => Box m PageNumber Layer Page where
+  r (P p) layer
+    | p < 1 = throwError "Pages start at 1"
+    | otherwise = runReaderT (drop (p - 1) <$> pagesList >>= firstPage) layer
+      where
+        firstPage =
+          maybe (throwError "Page is out of bounds") loadPage . listToMaybe
+
+  w (P p) page = execStateT $ do
+    updatePage page
+    editPagesList $ setPage (Reference $ IndirectObjCoordinates (source page) 0)
+    where
+      setPage ref l = take (p-1) l ++ ref : drop p l
+
+instance Monad m => Box m Contents Page (OrderedMap (Id Object) Content) where
+  r Contents = return . fmap Content.parse . byteContents
+  w Contents contents page = return $ page {byteContents}
+    where
+      byteContents = toStrict . render LF <$> contents
+
+withFonts :: MonadError String m => (Layer -> FontCache (LayerReader m) a) -> Layer -> m a
+withFonts f layer = runReaderT (evalStateT (f layer) Id.empty) layer
+
+withResources :: (Alternative m, MonadError String m) => (Page -> ReaderT FontSet m b) -> Page -> FontCache (LayerReader m) b
+withResources f p =
+  loadResources (resources p) >>= lift . lift . runReaderT (f p)
--- a/src/PDF/Parser.hs
+++ b/src/PDF/Parser.hs
@ -1,56 +1,78 @@
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE ConstraintKinds #-}
+{-# LANGUAGE UndecidableInstances #-}
 module PDF.Parser (
-      Parser
+      MonadParser(..)
+    , Parser
    , (<?>)
-    , block
-    , char
-    , choice
-    , count
-    , decNumber
-    , hexNumber
-    , many
    , octDigit
    , on
-    , oneOf
-    , option
    , runParser
-    , sepBy
-    , string
-    , takeAll
-    , takeAll1
+    , evalParser
  ) where

-import Control.Applicative ((<|>), empty)
+import Control.Applicative (Alternative, (<|>))
+import Control.Monad (MonadPlus)
+import Control.Monad.Fail (MonadFail(..))
 import Control.Monad.State (StateT(..), evalStateT)
-import Control.Monad.Trans (lift)
+import Control.Monad.Trans (MonadTrans(..))
 import qualified Data.Attoparsec.ByteString.Char8 as Atto (
-    Parser, char, parseOnly, satisfy, string, take, takeWhile, takeWhile1
+      Parser, char, decimal, double, endOfInput, parseOnly, peekChar', satisfy, string, take
+    , takeWhile, takeWhile1
  )
 import Data.ByteString (ByteString)
+import Data.ByteString.Char8.Util (B16Int(..))
 import Data.Char (toLower)
 import Data.Set (Set)
 import qualified Data.Set as Set (fromList, member, unions)
+import Prelude hiding (fail)
+
+type MonadDeps m = (MonadFail m, MonadPlus m)
+
+class MonadDeps m => MonadParser m where
+  block :: Int -> m ByteString
+  char :: Char -> m Char
+  decNumber :: m Int
+  floatNumber :: m Double
+  endOfInput :: m ()
+  hexNumber :: m B16Int
+  oneOf :: String -> m Char
+  peek :: m Char
+  string :: ByteString -> m ByteString
+  takeAll :: (Char -> Bool) -> m ByteString
+  takeAll1 :: (Char -> Bool) -> m ByteString
+
+instance MonadParser Atto.Parser where
+  block = Atto.take
+  char = Atto.char
+  endOfInput = Atto.endOfInput
+  decNumber = Atto.decimal
+  floatNumber = Atto.double
+  hexNumber = B16Int <$> Atto.takeWhile1 (`Set.member` hexDigits)
+  oneOf charSet = Atto.satisfy (`elem` charSet)
+  peek = Atto.peekChar'
+  string s = Atto.string s <?> show s
+  takeAll = Atto.takeWhile
+  takeAll1 = Atto.takeWhile1
+
+instance (MonadParser m, MonadTrans t, MonadDeps (t m)) => MonadParser (t m) where
+  block = lift . block
+  char = lift . char
+  endOfInput = lift $ endOfInput
+  decNumber = lift $ decNumber
+  floatNumber = lift $ floatNumber
+  hexNumber = lift $ hexNumber
+  oneOf = lift . oneOf
+  peek = lift $ peek
+  string = lift . string
+  takeAll = lift . takeAll
+  takeAll1 = lift . takeAll1

 type Parser s = StateT s Atto.Parser

-(<?>) :: Parser s a -> String -> Parser s a
+(<?>) :: (Alternative m, MonadFail m) => m a -> String -> m a
 (<?>) parser debugMessage = parser <|> fail debugMessage

-block :: Int -> Parser s ByteString
-block = lift . Atto.take
-
-char :: Char -> Parser s Char
-char = lift . Atto.char
-
-choice :: [Parser s a] -> Parser s a
-choice = foldr (<|>) empty
-
-count :: Int -> Parser s a -> Parser s [a]
-count 0 _ = return []
-count n p = (:) <$> p <*> count (n-1) p
-
-decNumber :: Parser s ByteString
-decNumber = lift $ Atto.takeWhile1 (`Set.member` digits)
-
 digits :: Set Char
 digits = Set.fromList ['0'..'9']

@ -59,13 +81,7 @@ hexDigits = Set.unions [digits, Set.fromList af, Set.fromList $ toLower <$> af]
  where
    af = ['A'..'F']

-hexNumber :: Parser s ByteString
-hexNumber = lift $ Atto.takeWhile1 (`Set.member` hexDigits)
-
-many :: Parser s a -> Parser s [a]
-many parser = (:) <$> parser <*> many parser <|> return []
-
-octDigit :: Parser s Char
+octDigit :: MonadParser m => m Char
 octDigit = oneOf ['0'..'7']

 on :: Parser s a -> ByteString -> Parser s (Either String a)
@ -74,25 +90,8 @@ on (StateT parserF) input = StateT $ \state ->
    Left errorMsg -> return (Left errorMsg, state)
    Right (result, newState) -> return (Right result, newState)

-oneOf :: String -> Parser s Char
-oneOf charSet = lift $ Atto.satisfy (`elem` charSet)
+runParser :: Parser s a -> s -> ByteString -> Either String (a, s)
+runParser parser initState = Atto.parseOnly (runStateT parser initState)

-option :: a -> Parser s a -> Parser s a
-option defaultValue p = p <|> pure defaultValue
-
-runParser :: Parser s a -> s -> ByteString -> Either String a
-runParser parser initState =
-  Atto.parseOnly (evalStateT parser initState)
-
-sepBy :: Parser s a -> Parser s b -> Parser s [a]
-sepBy parser separator =
-  option [] $ (:) <$> parser <*> many (separator *> parser)
-
-string :: ByteString -> Parser s ByteString
-string = lift . Atto.string
-
-takeAll :: (Char -> Bool) -> Parser s ByteString
-takeAll = lift . Atto.takeWhile
-
-takeAll1 :: (Char -> Bool) -> Parser s ByteString
-takeAll1 = lift . Atto.takeWhile1
+evalParser :: Parser s a -> s -> ByteString -> Either String a
+evalParser parser initState = Atto.parseOnly (evalStateT parser initState)
--- a/src/PDF/Update.hs
+++ b/src/PDF/Update.hs
@ -1,47 +0,0 @@
-{-# LANGUAGE NamedFieldPuns #-}
-module PDF.Update (
-    unify
-  ) where
-
-import Data.Map (member)
-import qualified Data.Map as Map (empty, union)
-import PDF.Object (
-      Content(..), IndexedObjects, IndirectObjCoordinates(..), Occurrence(..)
-    , Structure(..)
-  )
-
-emptyContent :: Content
-emptyContent = Content {
-      docStructure = Structure {xRef = Map.empty, trailer = Map.empty}
-    , objects = Map.empty
-    , occurrences = []
-  }
-
-unify :: [Content] -> Content
-unify = foldl complete emptyContent
-  where
-    complete tmpContent older =
-      let mergedObjects = Map.union (objects tmpContent) (objects older) in
-      Content {
-          docStructure =
-            unifyDocStructure (docStructure tmpContent) (docStructure older)
-        , objects = mergedObjects
-        , occurrences = 
-            unifyOccurrences mergedObjects (occurrences tmpContent) (occurrences older)
-      }
-
-unifyDocStructure :: Structure -> Structure -> Structure
-unifyDocStructure update original = Structure {
-      xRef = Map.union (xRef update) (xRef original)
-    , trailer = Map.union (trailer update) (trailer original)
-  }
-
-unifyOccurrences :: IndexedObjects -> [Occurrence] -> [Occurrence] -> [Occurrence]
-unifyOccurrences objects update = foldr addOlder update
-  where
-    addOlder occurrence@(Comment _) existing = occurrence : existing
-    addOlder occurrence@(Indirect indirect) existing =
-      if objectId indirect `member` objects
-      then occurrence : existing
-      else existing
-  
--- a/test/Main.hs
+++ b/test/Main.hs
@ -0,0 +1,9 @@
+{-# LANGUAGE OverloadedStrings #-}
+import Test.HUnit
+import Object (testNumber, testString)
+
+main :: IO ()
+main = runTestTT (TestList [
+      testNumber
+    , testString
+  ]) *> return ()
--- a/test/Object.hs
+++ b/test/Object.hs
@ -0,0 +1,50 @@
+{-# LANGUAGE OverloadedStrings #-}
+module Object (
+      testNumber
+    , testString
+  ) where
+
+import Data.ByteString.Char8 (pack)
+import Data.ByteString.Char8.Util (B16Int(..))
+import PDF.Object (Number(..), StringObject(..), name, number, stringObject)
+import PDF.Parser (MonadParser, Parser, evalParser)
+import Test.HUnit
+
+check :: (Eq a, Show a) => Parser () a -> (a, String) -> Test
+check parser (aim, input) =
+  TestCase $ assertEqual message (Right aim) (parse input)
+  where
+    message = "parse: " ++ input
+    parse = evalParser parser () . pack
+
+testName :: Test
+testName = TestLabel "Name" . TestList $ check name <$> [
+  ]
+
+testNumber :: Test
+testNumber = TestLabel "Number" . TestList $ check number <$> [
+      (Number 123, "123")
+    , (Number 43445, "43445")
+    , (Number 17, "+17")
+    , (Number (-98), "-98")
+    , (Number 0, "0")
+    , (Number 34.5, "34.5")
+    , (Number (-3.62), "-3.62")
+    , (Number 123.6, "+123.6")
+    , (Number 4, "4.")
+    , (Number (-0.002), "-.002")
+    , (Number 0, "0.0")
+  ]
+
+testString :: Test
+testString = TestLabel "StringObject" . TestList $ check stringObject <$> [
+      (Literal "This is a string", "(This is a string)")
+    , (Literal "Strings may contain newlines\nand such .", "(Strings may contain newlines\nand such .)")
+    , (Literal "Strings may contain balanced parentheses ( ) and\nspecial characters ( * ! & } ^ % and so on ) .", "(Strings may contain balanced parentheses ( ) and\nspecial characters ( * ! & } ^ % and so on ) .)")
+    , (Literal "The following is an empty string .", "(The following is an empty string .)")
+    , (Literal "", "()")
+    , (Literal "It has zero ( 0 ) length .", "(It has zero ( 0 ) length .)")
+    , (Hexadecimal $ B16Int "4E6F762073686D6F7A206B6120706F702E", "<4E6F762073686D6F7A206B6120706F702E>")
+    , (Hexadecimal $ B16Int "901FA3", "<901FA3>")
+    , (Hexadecimal $ B16Int "901FA0", "<901FA>")
+  ]
Author	SHA1	Message	Date
Tissevert	6a7e9e9595	Expose a toList function similar to Data.Map in module Id for IdMap and allow mapping over keys and values in a monadic construct like mapM	2020-06-03 15:15:07 +02:00
Tissevert	d9f69014a0	Make a couple improvements in performance + add an example script to extract pages from a PDF	2020-05-28 18:54:15 +02:00
Tissevert	f6664683c7	Once again something that should never have been committed	2020-03-20 09:34:53 +01:00
Tissevert	c491e8a70c	Forgot to remove deprecated source file	2020-03-19 12:53:35 +01:00
Tissevert	09bd706748	Export Content operators, needed to write filters like reveal	2020-03-19 10:27:29 +01:00
Tissevert	729e312f90	Actually, the spec calls 'catalog' what we call 'origin' — use 'catalog' for more clarity in regard to the spec	2020-03-19 10:27:29 +01:00
Tissevert	6d265633e4	Export Instructions constructor from PDF.Content, used by reveal	2020-03-19 10:27:29 +01:00
Tissevert	1eb1c23053	Found a nicer way to handle the too long IndirectObjCoordinates for Object Navigation	2020-03-19 10:27:29 +01:00
Tissevert	44125f75a6	The orphan instance for MonadState s m => MonadReader s m really can't be used, so replace it with a mere function that runs an operation on a ReaderT into the monad State, allowing to borrow operations on MonadReader in a MonadState context	2020-03-19 10:27:29 +01:00
Tissevert	c8a5e2b191	Wait, CachedFonts are indexed by Id Object so it could be an IdMap actually	2020-03-19 10:27:29 +01:00
Tissevert	11640c8465	Replace 'cacheFonts' by more versatile 'withFonts' inspired by 'withResources' that avoid having to declare an inline function to capture the 'layer' argument and pass it twice	2020-03-19 10:27:29 +01:00
Tissevert	e94a09b3ec	Add a Traversable instance for IdMap, needed in reveal and useful in general to be able to use atAll	2020-03-19 10:27:29 +01:00
Tissevert	ba7dd6a690	Make cacheFonts slightly more useful by passing layer directly to it and run the ReaderT underneath	2020-03-19 10:27:29 +01:00
Tissevert	d21e14f9a4	Hey, zlib isn't needed anymore for getText since all decoding is done directly in the Box instance for Streams	2020-03-19 10:27:28 +01:00
Tissevert	a1c2fbf110	Add an alias to Id to lift type ambiguities like 'chunk' in PDF.Content.Text	2020-03-19 10:27:28 +01:00
Tissevert	24630a04a1	Implement 'w' for Pages Box instances	2020-03-19 10:27:28 +01:00
Tissevert	ee5e7500a8	Implement 'w' for Box m Chunks Content (Indexed Text)	2020-03-19 10:27:28 +01:00
Tissevert	d8aec5bf80	Add Box instance for IdMap a b, remove restriction on new keys in the Map instance since it's not really needed and could be better implemented like in OrderedMap by first using 'r'	2020-03-19 10:27:28 +01:00
Tissevert	25e2823c75	Generalize register to all IdMap a b, since it's gonna be needed by Indexed Text too	2020-03-19 10:27:28 +01:00
Tissevert	5027b079eb	Include page numbers in chunks label, needed for long documents with many pages	2020-03-19 10:27:28 +01:00
Tissevert	5722dd1a04	Use IntMap for all Maps on Ids	2020-03-19 10:27:28 +01:00
Tissevert	f31e9eb38b	Generalize Ids out of Content to handle Object Ids too	2020-03-19 10:27:21 +01:00
Tissevert	0f857c457d	Use a defined monadic stack in Pages to lift the MonadReader ambiguity and allow finishing to reimplement getText demo	2020-03-14 16:57:16 +01:00
Tissevert	40475a3093	Clean unneeded stuff separating the monadic type constraint from the actual monad stack used, one more step towrds MonadFail -> MonadError	2020-03-14 16:55:34 +01:00
Tissevert	a9d3e5d326	Clean unused dependencies from Map + use a more defined Monad for the Box Chunks instance, hoping we will be able to clear the whole stack someday and stop requiring that RoContext type, unboxing and reboxing the FontSet for no good	2020-03-14 16:27:56 +01:00
Tissevert	f2a99e1fd2	Reorder module PDF.Body in alphabetical order	2020-03-14 16:25:26 +01:00
Tissevert	5bf2b08fa9	Try replacing general monadic type constraint by a definite monad stack	2020-03-11 22:35:19 +01:00
Tissevert	5b8d951516	WIP: Try about everything that's possible to try, OrderedMap or [(,)], try to decouple Box instance for Content and the one for Indexed Text, breaks getText… will probably require some advanced effect library, there seems to be a weird MonadReader conflict in the errors messages	2020-03-11 18:55:18 +01:00
Tissevert	d3f1b97f3a	Replace the fake instance of Box for Content over Indexed Text with the true one using renderText	2020-03-11 18:53:41 +01:00
Tissevert	c4c3e35e09	Write said instance	2020-03-11 18:52:09 +01:00
Tissevert	10f8c711da	Implement set and mapi on OrderedMap for convenience and to write a Box instance over OrderedMap like the one over Map	2020-03-11 18:51:49 +01:00
Tissevert	b6c1f670ef	Generalize the search for FlateDecode (there can be several filters in an array)	2020-03-11 10:47:52 +01:00
Tissevert	3b1a5152e4	Try connecting all the Box instance in the getText demo, try to encode pages contents with a simple assoc list	2020-03-10 22:57:11 +01:00
Tissevert	a04adff1d2	Prepare real instance of Box using renderText	2020-03-10 22:55:16 +01:00
Tissevert	103037ffb2	Fix mistake in arity of operator "	2020-03-10 22:53:27 +01:00
Tissevert	dce10ae63a	Keep Page as only a reference object keeping the ObjectId explicit so we can modify the actual objects one day, write an OrderedMap data structure to help	2020-03-08 22:18:47 +01:00
Tissevert	f2986da96d	Simplify Content abstracting over MonadParser for no reason and provide instead an parse that's in MonadFail to avoid having to handle Either outside	2020-03-08 22:16:23 +01:00
Tissevert	673321bf0a	Implement encoder for good	2020-03-08 22:14:36 +01:00
Tissevert	0ade9cc2f5	Implement proper text formatting into PDF instructions using the new encode feature available in Fonts	2020-03-08 00:04:18 +01:00
Tissevert	457f1755e6	Prepare storing the reverse mapping for CMaps, divided by length to be able to implement encoding with a reasonable complexity	2020-03-08 00:02:24 +01:00
Tissevert	ca40d2df76	Don't use (!?) operator that doesn't exist before containers 0.5.9 for maximum compatibility	2020-03-08 00:00:24 +01:00
Tissevert	44bc898ed3	Generalize the Indexed type to handle both arbitrary Content instructions and text-related ones that can be viewed as text chunks	2020-03-06 19:21:16 +01:00
Tissevert	1ec47c5d07	Update Font type to cover both encoding and decoding — WIP for CMap, but complete though not tested yet for MacRoman encoding	2020-03-06 19:19:53 +01:00
Tissevert	6e245189fd	Add a simple Box instance that exposes IndexedInstructions within a Content	2020-03-05 17:44:38 +01:00
Tissevert	90348c57d6	Disable text rendering and font loading from the Page abstraction, this code will have to be moved into a separate Box instance	2020-03-05 17:40:58 +01:00
Tissevert	50ac0692b2	Implement r for access by PageNumber and clean the mess a bit	2020-03-05 10:09:09 +01:00
Tissevert	2b9abc24b6	Add a separate instance for Raw streams that don't try to decode them	2020-03-04 18:31:30 +01:00
Tissevert	309f6ed461	Actually re-implement getText with the simpler Box instance	2020-03-04 18:19:10 +01:00
Tissevert	93c9863426	Remove accidentally commited trailing space on a line	2020-03-04 18:14:54 +01:00
Tissevert	7cef65d799	Fixed vicious bug introduced by `6096a1a237` (since follow is now automatic for references, it's not called explicitely but should in case of 'several' Content, which is an array of references, each of which should be expended) — TODO: add a unit test for that	2020-03-04 18:14:33 +01:00
Tissevert	d288ecf0ac	Start reimplementing getAll as a Box instance and try to separate the various monad run steps	2020-03-03 18:17:44 +01:00
Tissevert	3b3eeef218	Maybe we need a MonadState s m => MonadReader s m instance some day ?	2020-03-03 18:16:49 +01:00
Tissevert	2c02e44adf	Export the PDFContent monadic type used in PDF.Pages	2020-03-03 18:16:12 +01:00
Tissevert	9ce1a48030	Optimistically prepare the instance declaration for Pages that should replace get / getAll, not really getting out of the Monad	2020-02-28 18:15:40 +01:00
Tissevert	4969c6442e	Simple String aliasing to prepare the day when we'll be able to have more complex Component than just PDF Names (and access elements in an array)	2020-02-28 18:14:27 +01:00
Tissevert	cb257fc07e	Rename function for clarity : actually it's doing just what w StreamContent does, but without checking the headers to re-zlib-encode the stream content	2020-02-27 17:30:42 +01:00
Tissevert	d90eaf6f1c	Add Box instances to allow handling some exceptions in monad and converting them to Traversable accessible from the data part of the type	2020-02-27 17:22:12 +01:00
Tissevert	99014ff30d	Recognize openStream was just an implementation of r for the Box m () Object ByteString, and extend it implementing the w operation while we're at it	2020-02-26 22:13:29 +01:00
Tissevert	f4df4aab22	Found a nicer formulation that doesn't require transitivity or index agglomeration and swapped argument of w for more reusability with at / atAll	2020-02-26 17:19:22 +01:00
Tissevert	bdbc5f7351	Generalize the Box instances on containers from the particular cases of Document/Layers and Layers/Objects and move them to PDF.Box	2020-02-25 17:36:54 +01:00
Tissevert	30fece6537	Notice the 'edit' I exported earlier could be reused to simplify the w implementation of the proof that Box is a transitive relation	2020-02-25 09:27:56 +01:00
Tissevert	1a70f2972b	Expose Box index flags in PDF and PDF.Layer	2020-02-24 21:37:09 +01:00
Tissevert	67faa06ea2	Lift unused restriction on MonadFail for AllObjects instance of Box Layer	2020-02-24 21:36:31 +01:00
Tissevert	83a63d4b02	Implement Box instance from Layer to Object, either all at once or indexed by an ObjectId	2020-02-24 17:29:22 +01:00
Tissevert	85ee8519c4	Implement Box instances from Document to Layers and EOLStyle	2020-02-24 17:28:17 +01:00
Tissevert	e607f9cd37	Implement transitivity instance, extract a part of modifyAt as a convenient 'edit' function useful elsewhere and present a right-infix version of (,) to allow writing the nested tuple indexes more conveniently	2020-02-24 17:27:37 +01:00
Tissevert	a9252b129a	Start a Box module to describe inclusion relations between different types and get a MonadState action on the top type for any modification down there	2020-02-23 22:24:59 +01:00
Tissevert	71e62ee732	Add IDs to Instructions so that they can be selected in a given Content (and modified one day…)	2020-02-23 22:21:09 +01:00
Tissevert	160999a7d7	A small renaming for more clarity and because I thought «update» could be needed for a function name but after all maybe not but it's still better that way	2020-02-23 22:17:09 +01:00
Tissevert	36b1782464	Follow previous renaming for a local variable in Navigation for more clarity	2020-02-23 22:15:52 +01:00
Tissevert	bcf2e05bfb	Move Content out of Object module into a separate one incorporating PDF.Update (which is actually an operation that is defined only on that structure), and rename it Layer to avoid confusion with Content streams as defined in the specs (which have their own PDF.Content module already)	2020-02-17 15:29:59 +01:00
Tissevert	6096a1a237	Simplify navigations by centering everything on Objects to avoid needing to many conversion tools between DirectObject / Object / Dictionary	2020-02-15 13:51:24 +01:00
Tissevert	23186100a8	Reimplement getObj with the newest tools in PDF.Object.Navigation, in particular implement browsing by paths or random objectId access	2020-02-15 10:25:09 +01:00
Tissevert	b916ab5206	Just noticed Streams are a kind of Dictionary too, since they have a header	2020-02-15 10:23:32 +01:00
Tissevert	4a6dbda7d3	Move Error type from Pages to Navigation as a candidate for MonadFail required by PDFContent defined there	2020-02-15 10:22:42 +01:00
Tissevert	923d1800b0	Gain a bit of speed by using native Attoparsec for number types instead of reimplementing them with ByteString conversion and call to read	2020-02-14 18:02:40 +01:00
Tissevert	1c457d71d8	Fix the reading of Hexadecimal string objects detected by running the tests implemented from the spec	2020-02-14 18:00:12 +01:00
Tissevert	a72d76e229	Add unit tests to make sure I'm not breaking things too much	2020-02-14 17:58:03 +01:00
Tissevert	919f640443	Merge branch 'extract-text' into navigation	2020-02-12 17:35:56 +01:00
Tissevert	ae938acc02	Merge branch 'main' into extract-text	2020-02-12 17:34:56 +01:00
Tissevert	32f9866106	Use peek to improve directObject parser avoiding a large <\|> disjunction	2020-02-12 17:34:27 +01:00
Tissevert	eb4d76002c	Finish the split of Navigation out of Page, generalize the use of MonadFail with a custom Error monad (~= Either String)	2020-02-11 22:41:46 +01:00
Tissevert	af994cb50c	WIP: in the process of migrating to Object.Navigation in Pages, still unsure how to manage simple Content parsing and efficient font loading (+ giving a way to edit Contents)	2020-02-11 17:59:15 +01:00
Tissevert	704d7a7fcf	It turns out Output.concat wasn't necessary, OBuilder seems already is a Monoid so mconcat works (that fact was used in the very implementation of concat…)	2020-02-11 17:36:29 +01:00
Tissevert	11647eb4eb	Implement output for Content streams	2020-02-11 17:26:47 +01:00
Tissevert	aed7af376a	WIP: still trying to figure things out, moved to a separate submodule for Navigation, proper naming is hell	2020-02-11 08:29:08 +01:00
Tissevert	e77bbbcda9	WIP: start moving some navigation-related routines from Pages into Object directly and generalize them to multi-component to allow easier browsing	2020-02-10 17:43:04 +01:00
Tissevert	195446e653	Allow resources with no /Font field, they won't cause any problem as long as no call to Tf (to load a font) is made	2020-02-10 17:41:44 +01:00
Tissevert	9f1b1afafe	Implement Text rendering from parsed Content	2020-02-10 10:54:44 +01:00
Tissevert	20466c4f13	WIP: Clean code parsing «pages» (now Content), separated from text rendering (will be reimplemented as an upper layer, also providing modification as stream filters) — Page is also forgotten for now, will need a big improvement in Object navigation	2020-02-09 22:42:57 +01:00
Tissevert	325250383a	Add support for fonts and implement MacRomanEncoding	2020-02-08 08:15:32 +01:00
Tissevert	c48ab22808	Forgot some useless parentheses when playing with operator precedences	2020-02-04 17:05:15 +01:00
Tissevert	a2b66ac6d6	Generalize the getFont function because some /Resources have a direct dictionary as value for their /Font property	2020-02-04 17:04:42 +01:00
Tissevert	cefb08ee50	Going a step further in «optimization» (slowing it even more…) by replacing choice by a search in a Map	2019-11-30 21:46:22 +01:00
Tissevert	afbbcbffc5	Finish implementing the new stack-based call parser	2019-11-30 12:39:40 +01:00
Tissevert	8373bd1ea0	Removing +x permission on getText source that shouldn't ever have been set	2019-11-29 19:07:54 +01:00
Tissevert	bac08446dd	WIP: starting to fix this criminally inefficient parser for PDF's postfix-operator instructions	2019-11-29 17:42:57 +01:00
Tissevert	f9f799c59b	Take the dirty code of «getText» and turn it into a relatively clean module exposing pages, that can be retrieved all at once or by page number (numbered human-style, starting from 1)	2019-11-29 11:51:35 +01:00
Tissevert	08a9717b3a	Get rid of wrapper PageContents structure returned by PageContent in the PDF.Text module (and return directly [ByteString] instead)	2019-11-29 11:48:28 +01:00
Tissevert	42a02808c1	Merge branch 'main' into extract-text	2019-11-27 18:05:47 +01:00
Tissevert	c9f050e64b	Remove deprecated debug script and forgotten comments to bypass the selective export of Text module	2019-10-14 10:17:15 +02:00
Tissevert	3a3e1533b4	Clean ByteString types to identify when a ByteString contains the representation of an integer in a given base and fix the last remaining PDF string (un)escaping issue	2019-10-14 10:17:15 +02:00
Tissevert	a96e36ec5a	Fix error silently discarding code ranges, make sure ByteString intervals are created with the correct byte length and decode utf16BE encoded values in single-value ranges	2019-10-14 10:17:15 +02:00
Tissevert	d07c286f8e	Clean exported ByteString custom functions	2019-10-14 10:17:15 +02:00
Tissevert	7a15113285	Try and re-implement string decoding — compiles but now fails to decode any string	2019-10-14 10:17:15 +02:00
Tissevert	36d7f9b819	Still debugging, broke pretty much everything and finally implementing a proper coderange parsing for CMap because apparently that's necessary	2019-10-14 10:17:15 +02:00
Tissevert	b8ca7281aa	Fix parsing errors forgetting to make sure there's a space after special operator arguments like names and stringObjects	2019-10-14 10:17:15 +02:00
Tissevert	32efdcdd6b	Try and fix stuff by generalizing a signature to ease debugging and add parenthesis which I think should have been here all along	2019-10-14 10:17:15 +02:00
Tissevert	3b59fd0c61	Separate CMap and Text in two distinct modules	2019-10-14 10:17:15 +02:00
Tissevert	0374b72920	Finish implementing reading, still bugs to investigate	2019-10-14 10:17:15 +02:00
Tissevert	1dd22c3889	Going to try with Text, naturally handling UTF-16 but will still have to parse «int codes» manually from strings	2019-10-14 10:17:15 +02:00
Tissevert	98d029c4d4	In complete debug, more or less implemented CMap parsing but apparently it uses UTF16 ?!	2019-10-14 10:17:15 +02:00
Tissevert	c349d9b4c2	Don't trust serializer, they have nothing todo with a reasonable binary encoding	2019-10-14 10:17:15 +02:00
Tissevert	e7484ef536	Completely lost, the same old Char8 / Word8 again, implemented all the text reading, still needing a couple details to parse CMaps	2019-10-14 10:17:15 +02:00
Tissevert	f9e5683bf4	WIP: Use previous changes to start implementing font caching and text parsing (still very broken, doesn't compile)	2019-10-14 10:17:15 +02:00
Tissevert	b8eb9e6856	Generalize the Parser type into a MonadParser class to use with MonadTrans and remove redundant code already defined in Applicative or Attoparsec	2019-10-14 10:17:15 +02:00
Tissevert	66d315b7fe	Reflect the distinction between eval and run from State monad into the Parser module	2019-10-14 10:17:15 +02:00
Tissevert	51db57ec67	Ugly commit, breaks everything, still trying to figure a grammar for text	2019-10-14 10:17:15 +02:00
Tissevert	6f3c159ea7	Adding a module to implement text reading and a demo program to go with it	2019-10-14 10:17:15 +02:00