Make a couple improvements in performance + add an example script to extract pages from a PDF

This commit is contained in:
Tissevert 2020-05-28 18:54:15 +02:00
parent f6664683c7
commit d9f69014a0
17 changed files with 448 additions and 258 deletions

View File

@ -59,7 +59,7 @@ executable equivalent
build-depends: base
, bytestring
, Hufflepdf
ghc-options: -Wall
ghc-options: -Wall -rtsopts
default-language: Haskell2010
executable getObj
@ -69,7 +69,7 @@ executable getObj
, containers
, Hufflepdf
, mtl
ghc-options: -Wall
ghc-options: -Wall -rtsopts
default-language: Haskell2010
executable getText
@ -77,9 +77,34 @@ executable getText
build-depends: base
, bytestring
, containers
, ExceptIOH
, Hufflepdf
, mtl
, text
ghc-options: -Wall
ghc-options: -Wall -rtsopts
default-language: Haskell2010
executable pdfCut
main-is: examples/pdfCut.hs
build-depends: base
, bytestring
, containers
, ExceptIOH
, filepath
, Hufflepdf
, mtl
, text
ghc-options: -Wall -rtsopts
default-language: Haskell2010
executable fixMermoz
main-is: examples/fixMermoz.hs
build-depends: base
, bytestring
, ExceptIOH
, Hufflepdf
, mtl
ghc-options: -Wall -rtsopts
default-language: Haskell2010
Test-Suite unitTests

View File

@ -1,8 +1,6 @@
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
import Control.Monad.Fail (MonadFail(..))
import Control.Monad.Reader (ReaderT, runReaderT)
import Data.ByteString.Lazy.Char8 (ByteString)
import qualified Data.ByteString.Char8 as BS (readFile)
@ -13,7 +11,7 @@ import PDF.Box (Box(..))
import PDF.Layer (Layer(..), unify)
import PDF.Object (Object(..))
import PDF.Object.Navigation (
Clear(..), Raw(..), (//), objectById, catalog
Nav(..), PPath(..), StreamContent(..), (//), objectById, catalog
)
import PDF.Output (Output)
import qualified PDF.Output as Output (render)
@ -23,33 +21,27 @@ import System.Exit (die)
import Text.Printf (printf)
import Text.Read (readMaybe)
newtype Error a = Error {
runError :: Either String a
} deriving (Functor, Applicative, Monad)
instance MonadFail Error where
fail = Error . Left
decodedStream :: Object -> Object
decodedStream object =
maybe object id $ r Clear object >>= flip (w Raw) object
either (const object) id $ r Clear object >>= flip (w Raw) object
display :: Output a => ReaderT Layer Error a -> Document -> Either String ByteString
display :: Functor m => Output a => ReaderT Layer m a -> Document -> m ByteString
display getter (Document {eolStyle, layers}) =
Output.render eolStyle <$> runError (runReaderT getter (unify layers))
Output.render eolStyle <$> runReaderT getter (unify layers)
parse :: [String] -> IO (FilePath, Document -> Either String ByteString)
parse [inputFile] = return (inputFile, display catalog)
parse [inputFile] = return (inputFile, display $ value <$> catalog)
parse [inputFile, key] =
return (inputFile, clear . maybe (byPath key) byId $ readMaybe key)
where
byId = objectById . Id
byPath path = catalog // (explode path)
byPath path = (catalog // PPath (explode path))
explode "" = []
explode path =
case break (== '.') path of
(name, "") -> [name]
(name, rest) -> name : explode (drop 1 rest)
clear = display . fmap decodedStream
clear = display . fmap (decodedStream . value)
parse _ =
die . printf "Syntax: %s inputFile [OBJECT_ID | PATH_TO_OBJ]\n" =<< getProgName

View File

@ -1,5 +1,7 @@
{-# LANGUAGE FlexibleContexts #-}
import Control.Monad ((>=>))
import Control.Monad.Except (ExceptT(..))
import Control.Monad.Except.IOH (handle)
import Control.Monad.IO.Class (liftIO)
import qualified Data.ByteString.Char8 as BS (readFile)
import Data.Id (Id(..), mapWithKey)
@ -9,7 +11,7 @@ import qualified Data.Text as Text (unpack)
import PDF (UnifiedLayers(..), parseDocument)
import PDF.Box (Box(..))
import PDF.Content.Text (Chunks(..))
import PDF.Layer (Layer)
import PDF.Layer (Layer, LayerReader)
import PDF.Pages (
Contents(..), FontCache, Page(..), PageNumber(..), Pages(..), withFonts
, withResources
@ -19,7 +21,7 @@ import System.Exit (die)
import System.IO (BufferMode(..), hSetBuffering, stdout)
import Text.Printf (printf)
displayPage :: Int -> Page -> FontCache IO ()
displayPage :: Int -> Page -> FontCache (LayerReader (ExceptT String IO)) ()
displayPage n = withResources (
r Contents
>=> sequence_ . mapi (\objectId ->
@ -31,22 +33,22 @@ displayPage n = withResources (
liftIO . putStrLn $
printf "p#%d obj#%d instr#%d: %s" n (getId a) (getId b) (Text.unpack v)
getAll :: Layer -> IO ()
getAll :: Layer -> ExceptT String IO ()
getAll = withFonts $ r Pages >=> sequence_ . Map.mapWithKey displayPage
get :: Int -> Layer -> IO ()
get :: Int -> Layer -> ExceptT String IO ()
get n = withFonts $ r (P n) >=> displayPage n
onDoc :: FilePath -> (Layer -> IO ()) -> IO ()
onDoc inputFile f = do
(parseDocument <$> BS.readFile inputFile)
>>= either die (r UnifiedLayers >=> f)
onDoc :: FilePath -> (Layer -> ExceptT String IO ()) -> ExceptT String IO ()
onDoc inputFile f =
ExceptT (parseDocument <$> BS.readFile inputFile) >>= r UnifiedLayers >>= f
main :: IO ()
main = do
hSetBuffering stdout LineBuffering
args <- getArgs
case args of
[inputFile] -> onDoc inputFile getAll
[inputFile, pageNumber] -> onDoc inputFile (get $ read pageNumber)
[inputFile] -> onDoc inputFile getAll `handle` die
[inputFile, pageNumber] ->
onDoc inputFile (get $ read pageNumber) `handle` die
_ -> die "Syntax: getText INPUT_FILE [PAGE_NUMBER]"

41
examples/pdfCut.hs Normal file
View File

@ -0,0 +1,41 @@
import Control.Monad.State (MonadState(..), evalStateT)
import Control.Monad.Except.IOH (handle)
import Control.Monad.Trans (lift)
import qualified Data.ByteString.Char8 as BS (readFile)
import qualified Data.ByteString.Lazy.Char8 as Lazy (writeFile)
import Data.Map ((!), fromSet)
import qualified Data.Set as Set (fromList)
import PDF (Document, UnifiedLayers(..), parseDocument, render)
import PDF.Box (at)
import PDF.Pages (Pages(..))
import System.Environment (getArgs)
import System.Exit (die)
import System.FilePath (replaceBaseName, takeBaseName)
import Text.Read (readEither)
parseRange :: String -> Either String (Int, Int)
parseRange = evalStateT $ do
from <- lift . readEither =<< state (break (== '-'))
get >>= makeRange from
where
makeRange from "" = return (from, from)
makeRange from (_:to) = (,) from <$> lift (readEither to)
cut :: (Int, Int) -> Document -> IO Document
cut (p1, p2) doc = ((
at UnifiedLayers .at Pages $ \pages ->
return $ fromSet (pages!) $ Set.fromList [pMin .. pMax]
) doc) `handle` die
where
(pMin, pMax) = (min p1 p2, max p1 p2)
main :: IO ()
main = do
[inputPath, inputRange] <- getArgs
range <- catchLeft $ parseRange inputRange
doc <- catchLeft =<< parseDocument <$> BS.readFile inputPath
Lazy.writeFile (outputPath inputPath inputRange) . render =<< cut range doc
where
catchLeft = either die return
outputPath fileName range =
replaceBaseName fileName (takeBaseName fileName ++ "_p" ++ range)

View File

@ -14,7 +14,7 @@ module Data.ByteString.Char8.Util (
) where
import Data.ByteString (ByteString, snoc)
import qualified Data.ByteString as BS (empty, foldl, length, pack, singleton, splitAt)
import qualified Data.ByteString as BS (empty, foldl', length, pack, singleton, splitAt)
import qualified Data.ByteString.Char8 as Char8 (
cons, drop, index, splitAt, take, uncons, unpack
)
@ -43,7 +43,7 @@ intToB256 n
B256Int $ begining `snoc` (toEnum (n `mod` 0x100))
b256ToInt :: B256Int -> Int
b256ToInt (B256Int n) = BS.foldl (\k w -> 0x100*k + fromEnum w) 0 n
b256ToInt (B256Int n) = BS.foldl' (\k w -> 0x100*k + fromEnum w) 0 n
toBytes :: Int -> Int -> ByteString
toBytes 0 _ = BS.empty

View File

@ -8,6 +8,7 @@ module Data.Id (
, at
, delete
, empty
, filterWithKey
, fromList
, insert
, keysSet
@ -21,11 +22,11 @@ module Data.Id (
, union
) where
import Control.Monad.State (MonadState, modify, gets)
import Control.Monad.State.Strict (MonadState, modify, gets)
import Data.IntMap (IntMap, (!))
import qualified Data.IntMap as IntMap (
delete, empty, fromList, keysSet, insert, lookup, mapWithKey, maxViewWithKey
, member, minViewWithKey, size, union
delete, empty, filterWithKey, fromList, keysSet, insert, lookup
, mapWithKey, maxViewWithKey, member, minViewWithKey, size, union
)
import Data.IntSet (IntSet)
import Prelude hiding (lookup)
@ -74,6 +75,9 @@ union (IdMap intMap1) (IdMap intMap2) =
mapWithKey :: (Id a -> b -> c) -> IdMap a b -> IdMap a c
mapWithKey f (IdMap idMap) = IdMap {intMap = IntMap.mapWithKey (f . Id) idMap}
filterWithKey :: (Id a -> b -> Bool) -> IdMap a b -> IdMap a b
filterWithKey f = IdMap . IntMap.filterWithKey (f . Id) . intMap
fromList :: [(Id a, b)] -> IdMap a b
fromList = IdMap . IntMap.fromList . fmap (\(key, b) -> (getId key, b))

View File

@ -1,7 +1,6 @@
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE ScopedTypeVariables #-}
module PDF.Box (
@ -17,8 +16,8 @@ module PDF.Box (
import Control.Monad.Except (ExceptT(..), runExceptT)
import Control.Monad.Fail (MonadFail(..))
import Control.Monad.State (MonadState(..))
import Control.Monad.Reader (ReaderT, runReaderT)
import Control.Monad.State (MonadState(..))
import Data.Id (Id, IdMap)
import qualified Data.Id as Id (insert, lookup)
import Data.Map (Map)

View File

@ -1,15 +1,17 @@
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
module PDF.CMap (
CMap
, CMappers
, CRange(..)
, cMap
, matches
, parse
) where
import Control.Applicative ((<|>), many)
import Control.Monad.Fail (MonadFail(..))
import Control.Monad.Except (MonadError(..))
import Control.Monad.Fail (fail)
import Control.Monad.State (modify)
import Data.Attoparsec.ByteString.Char8 (count)
import Data.ByteString (ByteString)
@ -18,6 +20,7 @@ import Data.ByteString.Char8 (unpack)
import Data.ByteString.Char8.Util (
B16Int(..), b16ToBytes, b16ToInt, toBytes, utf16BEToutf8
)
import Data.Foldable (foldl', foldr')
import Data.Map (Map, mapWithKey, union)
import qualified Data.Map as Map (
adjust, empty, fromList, insert, insertWith, lookup, toList
@ -75,7 +78,7 @@ encoder :: Map Size FromUnicode -> Encoder
encoder fromUnicodes input
| Text.null input = Right ""
| otherwise =
foldl (<>) (Left "No encoding found") $ mapWithKey tryOn fromUnicodes
foldr' (<>) (Left "No encoding found") $ mapWithKey tryOn fromUnicodes
where
tryOn size fromUnicode =
let (prefix, end) = Text.splitAt size input in
@ -87,8 +90,8 @@ matches :: ByteString -> CRange -> Bool
matches code (CRange {fromSequence, toSequence}) =
fromSequence <= code && code <= toSequence
cMap :: MonadFail m => ByteString -> m Font
cMap = either fail (return . toFont . snd) . runParser
parse :: MonadError String m => ByteString -> m Font
parse = either throwError (return . toFont . snd) . runParser
(many (codeRanges <|> cMapRange <|> cMapChar <|> ignoredLine))
emptyCMap
where
@ -148,7 +151,7 @@ saveToUnicodeBySize assoc@((code, _):_) = Map.adjust insertCRange (BS.length cod
)
saveFromUnicodeBySize :: [(Text, ByteString)] -> Map Size FromUnicode -> Map Size FromUnicode
saveFromUnicodeBySize = flip (foldl insertFromUnicode)
saveFromUnicodeBySize = flip (foldl' insertFromUnicode)
where
insertFromUnicode :: Map Size FromUnicode -> (Text, ByteString) -> Map Size FromUnicode
insertFromUnicode tmpFromUnicodeBySize (unicodeSequence, code) =

View File

@ -9,54 +9,77 @@ module PDF.Content (
Content(..)
, ContentUnit(..)
, GraphicContextUnit(..)
, IdContentUnit
, IdGraphicContextUnit
, IdTextContext
, Instructions(..)
, TextContext
, parse
) where
import Control.Applicative ((<|>))
import Control.Monad.Fail (MonadFail)
import Control.Monad.Reader (asks, runReader)
import Control.Monad.State (evalStateT, modify)
import Control.Monad.State.Strict (runState, evalStateT, modify)
import Data.Attoparsec.ByteString.Char8 (sepBy)
import qualified Data.Attoparsec.ByteString.Char8 as Atto (Parser, parseOnly)
import Data.ByteString (ByteString)
import Data.Id (Id(..), Indexed, at, empty, register)
import PDF.Box (Box(..))
import PDF.Content.Operator (Instruction, operator)
import PDF.Object (blank, directObject)
import PDF.Output (Output(..), line)
import PDF.Parser (Parser, runParser, string)
import PDF.Parser (string)
data Instructions = Instructions
data GraphicContextUnit =
GraphicInstruction (Id Instruction)
| ContentUnit ContentUnit
data GraphicContextUnit a =
GraphicInstruction a
| ContentUnit (ContentUnit a)
deriving Show
type TextContext = [Id Instruction]
data ContentUnit =
GraphicContext [GraphicContextUnit]
| TextContext TextContext
type TextContext a = [a]
data ContentUnit a =
GraphicContext [GraphicContextUnit a]
| TextContext (TextContext a)
deriving Show
data Content = Content {
contentUnits :: [ContentUnit]
contentUnits :: [IdContentUnit]
, indexedInstructions :: Indexed Instruction
, firstError :: Maybe String
} deriving Show
type InstructionParser = Parser (Indexed Instruction)
type TmpContentUnit = ContentUnit Instruction
type TmpGraphicContextUnit = GraphicContextUnit Instruction
type TmpTextContext = TextContext Instruction
type IdContentUnit = ContentUnit (Id Instruction)
type IdGraphicContextUnit = GraphicContextUnit (Id Instruction)
type IdTextContext = TextContext (Id Instruction)
instance Monad m => Box m Instructions Content (Indexed Instruction) where
r Instructions = return . indexedInstructions
w Instructions indexedInstructions someContent =
return $ someContent {indexedInstructions}
parse :: MonadFail m => ByteString -> m Content
parse =
either fail (return . uncurry Content) . runParser contentUnits empty
where
contentUnits = contentUnit `sepBy` blank
parse :: ByteString -> Content
parse input =
let result = Atto.parseOnly (contentUnit `sepBy` blank) input in
let (contentUnits, indexedInstructions) = either (const ([], empty)) buildContent result in
let firstError = either Just (const Nothing) result in
Content {contentUnits, indexedInstructions, firstError}
contentUnit :: InstructionParser ContentUnit
buildContent :: [TmpContentUnit] -> ([IdContentUnit], Indexed Instruction)
buildContent instructionContentUnits =
runState (mapM registerContentUnit instructionContentUnits) empty
where
registerContentUnit (GraphicContext gc) =
GraphicContext <$> (mapM registerGraphicContext gc)
registerContentUnit (TextContext tc) = TextContext <$> (mapM register tc)
registerGraphicContext (GraphicInstruction gi) =
GraphicInstruction <$> (register gi)
registerGraphicContext (ContentUnit cu) =
ContentUnit <$> (registerContentUnit cu)
contentUnit :: Atto.Parser TmpContentUnit
contentUnit =
(GraphicContext <$> graphicContext)
<|> (TextContext <$> textContext)
@ -64,18 +87,18 @@ contentUnit =
graphicContext =
string "q" *> blank *> graphicContextUnit `sepBy` blank <* blank <* string "Q"
graphicContextUnit :: InstructionParser GraphicContextUnit
graphicContextUnit :: Atto.Parser TmpGraphicContextUnit
graphicContextUnit =
(GraphicInstruction <$> instruction)
<|> (ContentUnit <$> contentUnit)
instruction :: InstructionParser (Id Instruction)
instruction = evalStateT stackParser [] >>= register
instruction :: Atto.Parser Instruction
instruction = evalStateT stackParser []
where
stackParser = ((directObject <* blank) >>= push) <|> operator
push arg = modify (arg:) *> stackParser
textContext :: InstructionParser TextContext
textContext :: Atto.Parser TmpTextContext
textContext =
string "BT" *> blank *> instruction `sepBy` blank <* blank <* string "ET"

View File

@ -1,24 +1,22 @@
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module PDF.Content.Text (
Chunks(..)
, chunk
, format
, renderText
) where
import Control.Monad (foldM)
import Control.Monad.Fail (MonadFail(..))
import Control.Monad.Reader (MonadReader(..), ReaderT, asks, runReaderT)
import Control.Monad.Reader (ReaderT, asks, runReaderT)
import Control.Monad.State (
MonadState(..), StateT, evalStateT, gets, modify, runStateT
)
import Control.Monad.Trans (lift)
import qualified Data.ByteString.Char8 as BS (concatMap, singleton)
import Data.Id (Id(..), Indexed, at, empty, singleton)
import qualified Data.Id as Id (delete, lookup, register)
import Data.Map ((!))
@ -26,27 +24,20 @@ import Data.Text (Text, breakOn)
import qualified Data.Text as Text (drop)
import PDF.Box (Box(..))
import PDF.Content (
Content, ContentUnit(..), GraphicContextUnit(..), contentUnits
Content(..), ContentUnit(..), GraphicContextUnit(..), IdContentUnit
, IdGraphicContextUnit
)
import qualified PDF.Content as Content (Content(..))
import PDF.Content.Operator (Instruction, Operator(..))
import PDF.Content.Operator.Text (Operator(..))
import PDF.Font (Font(..), FontSet, emptyFont)
import PDF.Object (DirectObject(..), StringObject(..), toByteString)
import Prelude hiding (fail)
data ROContext = ROContext {
indexedInstructions :: Indexed Instruction
, fontSet :: FontSet
}
type TextContent m = ReaderT ROContext m
type FontContext m = StateT Font (TextContent m)
type TmpFont = StateT Font
type Renderer m = ReaderT (Indexed Instruction) (ReaderT FontSet m)
type Updater m = StateT (Indexed Instruction) (ReaderT (Indexed Text) (ReaderT FontSet m))
type TextUpdater m = StateT Font (Updater m)
decodeString :: MonadFail m => StringObject -> FontContext m Text
decodeString :: MonadFail m => StringObject -> TmpFont (Renderer m) Text
decodeString input = do
Font {decode} <- get
either fail return . decode $ toByteString input
@ -57,56 +48,53 @@ chunk :: Int -> Id Text
chunk = Id
instance MonadFail m => Box (ReaderT FontSet m) Chunks Content (Indexed Text) where
r Chunks content = do
fontSet <- ask
renderText fontSet content
r Chunks content =
runReaderT (mconcat <$> renderer) $ indexedInstructions content
where
renderer = mapM renderContentUnit (contentUnits content)
w Chunks indexedText (Content.Content {contentUnits, Content.indexedInstructions}) =
uncurry Content.Content <$> runReaderT (
runStateT (mapM updateContentUnit contentUnits) indexedInstructions
) indexedText
w Chunks indexedText content = do
(contentUnits, indexedInstructions) <- runReaderT readerUpdate indexedText
return $ content {contentUnits, indexedInstructions}
where
stateUpdate = mapM updateContentUnit (contentUnits content)
readerUpdate = runStateT stateUpdate (indexedInstructions content)
renderText :: MonadFail m => FontSet -> Content -> m (Indexed Text)
renderText fontSet (Content.Content {contentUnits, Content.indexedInstructions}) =
runReaderT (mconcat <$> mapM renderContentUnit contentUnits) roContext
where
roContext = ROContext {indexedInstructions, fontSet}
renderContentUnit :: MonadFail m => ContentUnit -> TextContent m (Indexed Text)
renderContentUnit :: MonadFail m => IdContentUnit -> Renderer m (Indexed Text)
renderContentUnit (GraphicContext graphicContextUnits) =
mconcat <$> mapM renderGraphicContextUnit graphicContextUnits
renderContentUnit (TextContext instructionIds) =
evalStateT (mconcat <$> mapM renderInstructionId instructionIds) emptyFont
updateContentUnit :: MonadFail m => ContentUnit -> Updater m ContentUnit
updateContentUnit :: MonadFail m => IdContentUnit -> Updater m IdContentUnit
updateContentUnit (GraphicContext graphicContextUnits) = GraphicContext <$>
mapM updateGraphicContextUnit graphicContextUnits
updateContentUnit (TextContext instructionIds) = TextContext . concat <$>
evalStateT (mapM updateInstructionId instructionIds) emptyFont
renderGraphicContextUnit :: MonadFail m => GraphicContextUnit -> TextContent m (Indexed Text)
renderGraphicContextUnit :: MonadFail m => IdGraphicContextUnit -> Renderer m (Indexed Text)
renderGraphicContextUnit (GraphicInstruction _) = return empty
renderGraphicContextUnit (ContentUnit contentUnit) =
renderContentUnit contentUnit
updateGraphicContextUnit :: MonadFail m => GraphicContextUnit -> Updater m GraphicContextUnit
updateGraphicContextUnit :: MonadFail m => IdGraphicContextUnit -> Updater m IdGraphicContextUnit
updateGraphicContextUnit gI@(GraphicInstruction _) = return gI
updateGraphicContextUnit (ContentUnit contentUnit) =
ContentUnit <$> updateContentUnit contentUnit
renderInstructionId :: MonadFail m => Id Instruction -> FontContext m (Indexed Text)
renderInstructionId :: MonadFail m => Id Instruction -> TmpFont (Renderer m) (Indexed Text)
renderInstructionId instructionId@(Id n) = toMap <$>
(asks ((`at` instructionId) . indexedInstructions) >>= renderInstruction)
(asks ((`at` instructionId)) >>= renderInstruction)
where
toMap = maybe empty (singleton (Id n))
updateInstructionId :: MonadFail m => Id Instruction -> TextUpdater m [Id Instruction]
updateInstructionId :: MonadFail m => Id Instruction -> TmpFont (Updater m) [Id Instruction]
updateInstructionId instructionId =
lift (gets (`at` instructionId)) >>= updateInstruction instructionId
renderInstruction :: MonadFail m => Instruction -> FontContext m (Maybe Text)
renderInstruction :: MonadFail m => Instruction -> TmpFont (Renderer m) (Maybe Text)
renderInstruction (Text Tf, [NameObject fontName, _]) =
asks ((! fontName) . fontSet) >>= put >> return Nothing
lift (lift $ asks (! fontName)) >>= put >> return Nothing
renderInstruction (Text Tstar, []) = return $ Just "\n"
@ -128,7 +116,7 @@ renderInstruction (Text DQuote, [_, _, StringObject outputString]) =
renderInstruction _ = return Nothing
updateInstruction :: MonadFail m => Id Instruction -> Instruction -> TextUpdater m [Id Instruction]
updateInstruction :: MonadFail m => Id Instruction -> Instruction -> TmpFont (Updater m) [Id Instruction]
updateInstruction instructionId (Text Tf, [NameObject fontName, _]) =
(lift . lift . lift $ asks (!fontName)) >>= put >> return [instructionId]
@ -155,5 +143,9 @@ format input = do
(line, left) -> (:) <$> tj line <*> format left
where
tj t = do
literal <- either fail (return . Literal) =<< gets (($t) . encode)
return (Text Tj, [StringObject literal])
encoded <- either fail return =<< gets (($t) . encode)
return (Text Tj, [StringObject . Literal $ BS.concatMap escape encoded])
escape '\\' = "\\\\"
escape '(' = "\\("
escape ')' = "\\)"
escape c = BS.singleton c

View File

@ -1,12 +1,13 @@
{-# LANGUAGE FlexibleContexts #-}
module PDF.Encoding (
encoding
) where
import Control.Monad.Fail (MonadFail(..))
import Control.Monad.Except (MonadError(..))
import PDF.Encoding.MacRoman (macRomanEncoding)
import PDF.Font (Font)
import Prelude hiding (fail)
encoding :: MonadFail m => String -> m Font
encoding :: MonadError String m => String -> m Font
encoding "MacRomanEncoding" = return macRomanEncoding
encoding s = fail $ "Unknown encoding " ++ s
encoding s = throwError $ "Unknown encoding " ++ s

View File

@ -3,6 +3,7 @@ module PDF.Encoding.MacRoman (
) where
import qualified Data.ByteString.Char8 as BS (pack, unpack)
import Data.Foldable (foldl')
import Data.Map (Map)
import qualified Data.Map as Map (empty, insert, lookup)
import qualified Data.Text as Text (pack, unpack)
@ -24,7 +25,7 @@ macRomanEncoding = Font {
| otherwise -> Left ("Character '" ++ k :"' unavailable in MacRoman")
mappers :: (Mapper, Mapper)
mappers = foldl generateMapers (Map.empty, Map.empty) [
mappers = foldl' generateMapers (Map.empty, Map.empty) [
('\x80', '\x00C4') -- LATIN CAPITAL LETTER A WITH DIAERESIS
, ('\x81', '\x00C5') -- LATIN CAPITAL LETTER A WITH RING ABOVE
, ('\x82', '\x00C7') -- LATIN CAPITAL LETTER C WITH CEDILLA

View File

@ -1,21 +1,26 @@
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FlexibleContexts #-}
module PDF.Layer (
Layer(..)
, LayerReader
, Objects(..)
, unify
) where
import Control.Monad.Except (MonadError(..))
import Control.Monad.Reader (ReaderT)
import Data.Foldable (foldl')
import Data.Id (Id(..), Indexed, keysSet, mapWithKey, member)
import qualified Data.Id as Id (empty, union)
import qualified Data.Id as Id (empty, insert, lookup, union)
import Data.Map (Map, (!))
import qualified Data.Map as Map (empty, union)
import qualified Data.Map as Map (empty, lookup, union)
import qualified Data.IntSet as IntSet (delete, toList)
import PDF.Box (Box(..))
import PDF.Object (
IndirectObjCoordinates(..), Object, Occurrence(..)
, Structure(..), XRefEntry(..), XRefSection, eofMarker, outputBody
IndirectObjCoordinates(..), Object, Occurrence(..), Structure(..)
, XRefEntry(..), XRefSection, eofMarker, outputBody
)
import qualified PDF.Output as Output (line)
import PDF.Output (
@ -26,24 +31,28 @@ import Text.Printf (printf)
data Layer = Layer {
occurrences :: [Occurrence]
, objects :: (Indexed Object)
, objects :: Indexed Object
, docStructure :: Structure
} deriving Show
type LayerReader m = ReaderT Layer m
updateXrefs :: XRefSection -> Map Resource Offset -> (XRefSection, Offset)
updateXrefs xRef offsets = (mapWithKey updateEntry xRef, offsets ! StartXRef)
updateXRefs :: XRefSection -> Map Resource Offset -> (XRefSection, Offset)
updateXRefs xRef offsets = (mapWithKey updateEntry xRef, offsets ! StartXRef)
where
updateEntry objectId e@(InUse {}) =
e {offset = offsets ! (ObjectId $ getId objectId)}
updateEntry objectId e@(InUse {offset}) =
case Map.lookup (ObjectId $ getId objectId) offsets of
Nothing -> Free {nextFree = Id $ getOffset offset, generation = 65535}
Just newOffset -> e {offset = newOffset}
updateEntry _ e = e
instance Output Layer where
output (Layer {occurrences, objects, docStructure}) =
fmap (updateXrefs xRef) <$> getOffsets (outputBody (occurrences, objects))
>>= \(body, (xref, startXRef)) -> mconcat [
body
output (Layer {occurrences, objects, docStructure}) = do
(body, savedOffsets) <- getOffsets (outputBody (occurrences, objects))
let (newXRef, startXRef) = updateXRefs xRef savedOffsets
mconcat [
return body
, Output.line "xref"
, output xref
, output newXRef
, Output.line "trailer"
, output trailer, newLine
, Output.line "startxref"
@ -76,6 +85,14 @@ instance Monad m => Box m Objects Layer (Indexed Object) where
Indirect (IndirectObjCoordinates {objectId, versionNumber = 0})
newOccurrences = (makeOccurrence . Id) <$> IntSet.toList newObjectIds
instance MonadError String m => Box m (Id Object) Layer Object where
r objectId =
maybe (throwError "Unknown key") return . Id.lookup objectId . objects
w objectId a layer@(Layer {objects})
| member objectId objects = return $
layer {objects = Id.insert objectId a objects}
| otherwise = throwError "Unknown key"
emptyLayer :: Layer
emptyLayer = Layer {
docStructure = Structure {xRef = Id.empty, trailer = Map.empty}
@ -84,7 +101,7 @@ emptyLayer = Layer {
}
unify :: [Layer] -> Layer
unify = foldl complete emptyLayer
unify = foldl' complete emptyLayer
where
complete tmpLayer older =
let mergedObjects = Id.union (objects tmpLayer) (objects older) in

View File

@ -34,6 +34,7 @@ module PDF.Object (
) where
import Control.Applicative ((<|>), many)
import Control.Monad.Reader (asks)
import Data.Attoparsec.ByteString.Char8 (choice, count, option, sepBy)
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS (concat)
@ -48,11 +49,11 @@ import qualified Data.Id as Id (
import Data.Map (Map)
import qualified Data.Map as Map (fromList, toList)
import qualified Data.Set as Set (fromList, member)
import qualified PDF.EOL as EOL (charset, parser)
import qualified PDF.EOL as EOL (Style(..), charset, parser)
import qualified PDF.Output as Output (line, string)
import PDF.Output (
OBuilder, Offset(..), Output(..), Resource(..)
, byteString, getOffset, join, newLine, saveOffset
OBuilder, OContext(..), Offset(..), Output(..), Resource(..), byteString
, getOffset, join, newLine, saveOffset
)
import PDF.Parser (MonadParser(..), Parser, (<?>), octDigit, oneOf)
import Text.Printf (printf)
@ -190,8 +191,8 @@ nullObject = string "null" *> return () <?> "null object"
-- Reference
--
data IndirectObjCoordinates = IndirectObjCoordinates {
objectId :: (Id Object)
, versionNumber :: Int
objectId :: {-# UNPACK #-} !(Id Object)
, versionNumber :: {-# UNPACK #-} !Int
} deriving Show
reference :: MonadParser m => m IndirectObjCoordinates
@ -202,14 +203,14 @@ reference = IndirectObjCoordinates
-- DirectObject
--
data DirectObject =
Boolean Bool
| NumberObject Number
| StringObject StringObject
| NameObject Name
| Array [DirectObject]
| Dictionary Dictionary
Boolean !Bool
| NumberObject !Number
| StringObject !StringObject
| NameObject !Name
| Array ![DirectObject]
| Dictionary !Dictionary
| Null
| Reference IndirectObjCoordinates
| Reference !IndirectObjCoordinates
deriving Show
instance Output DirectObject where
@ -294,10 +295,15 @@ data XRefEntry = InUse {
} deriving Show
instance Output XRefEntry where
output (InUse {offset, generation}) =
Output.line (printf "%010d %05d n " (getOffset offset) generation)
output (Free {nextFree, generation}) =
Output.line (printf "%010d %05d f " (getId nextFree) generation)
output xRefEntry = Output.string (build xRefEntry) `mappend` endXRefEntryLine
where
build (InUse {offset, generation}) =
printf "%010d %05d n" (getOffset offset) generation
build (Free {nextFree, generation}) =
printf "%010d %05d f" (getId nextFree) generation
endXRefEntryLine = OContext (asks padEOLToTwoBytes) >>= Output.line
padEOLToTwoBytes EOL.CRLF = ("" :: String)
padEOLToTwoBytes _ = " "
entry :: Parser u XRefEntry
entry = do

View File

@ -3,98 +3,162 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveFunctor #-}
module PDF.Object.Navigation (
Clear(..)
, PDFContent
, Raw(..)
Nav(..)
, PPath(..)
, ROLayer
, RWLayer
, StreamContent(..)
, (./)
, (//)
, (>./)
, (>//)
, castObject
, catalog
, getDictionary
, getKey
, objectById
, catalog
, save
) where
import Codec.Compression.Zlib (compress, decompress)
import Control.Monad.Except (MonadError(..))
import Control.Monad.Reader (MonadReader(..))
import Control.Monad.Fail (MonadFail(..))
import Control.Monad.State (MonadState)
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS (length)
import qualified Data.ByteString.Lazy as Lazy (ByteString, fromStrict, toStrict)
import Data.Id (Id, at)
import qualified Data.Map as Map (lookup)
import PDF.Box (Box(..))
import Data.Id (Id)
import qualified Data.Id as Id (at)
import qualified Data.Map as Map (adjust, insert, lookup)
import PDF.Box (Box(..), at, edit{-, runRO-})
import PDF.Layer (Layer(..))
import PDF.Object (
Dictionary, DirectObject(..), IndirectObjCoordinates(..)
, Name(..), Object(..), Structure(..)
, Name(..), Number(..), Object(..), Structure(..)
)
import Prelude hiding (fail)
import Text.Printf (printf)
type PDFContent m = (MonadReader Layer m, MonadFail m)
newtype PPath = PPath [Component]
data DPath = DPath {
root :: Id Object
, offset :: [Component]
} deriving Show
push :: Component -> DPath -> DPath
push component dPath = dPath {offset = (offset dPath) ++ [component]}
data Nav a = Nav {
dPath :: DPath
, value :: a
} deriving (Functor)
instance Show a => Show (Nav a) where
show (Nav {dPath, value}) = "Nav {dPath = " ++ show dPath ++ ", value = " ++ show value ++ "}"
type ROLayer m = (MonadReader Layer m, MonadError String m)
type RWLayer m = (MonadState Layer m, MonadError String m)
type Component = String
getDictionary :: PDFContent m => Object -> m Dictionary
getDictionary (Direct (Dictionary aDict)) = return aDict
getDictionary (Direct (Reference ref)) =
objectById (objectId ref) >>= getDictionary
getDictionary (Stream {header}) = return header
getDictionary obj = expected "dictionary : " obj
getDictionary :: ROLayer m => Nav Object -> m (Nav Dictionary)
getDictionary (Nav {dPath, value}) =
case value of
(Direct (Dictionary aDict)) -> return $ Nav {dPath, value = aDict}
(Direct (Reference ref)) -> objectById (objectId ref) >>= getDictionary
(Stream {header}) -> return $ Nav {dPath, value = header}
obj -> expected "dictionary : " obj
expected :: (MonadFail m, Show a) => String -> a -> m b
expected name = fail . printf "Not a %s: %s" name . show
expected :: (MonadError String m, Show a) => String -> a -> m b
expected name = throwError . printf "Not a %s: %s" name . show
getKey :: PDFContent m => String -> Object -> m DirectObject
getKey key object = getDictionary object >>= catchMaybe . Map.lookup (Name key)
getKey :: ROLayer m => String -> Nav Object -> m (Nav DirectObject)
getKey key navObject = getDictionary navObject >>= f
where
errorMessage =
printf "Key %s not found in object %s" key (show object)
catchMaybe = maybe (fail errorMessage) return
printf "Key %s not found in object %s" key (show navObject)
f (Nav {dPath, value}) =
case Map.lookup (Name key) value of
Nothing -> throwError errorMessage
Just dObj -> return $ Nav {dPath = push key dPath, value = dObj}
objectById :: PDFContent m => (Id Object) -> m Object
objectById :: ROLayer m => (Id Object) -> m (Nav Object)
objectById objectId = do
layer <- ask
return (objects layer `at` objectId)
return $ Nav {
dPath = DPath {root = objectId, offset = []}
, value = objects layer `Id.at` objectId
}
(./) :: PDFContent m => m Object -> Component -> m Object
(./) object key = (object >>= getKey key >>= castObject)
castObject :: PDFContent m => DirectObject -> m Object
castObject (Reference (IndirectObjCoordinates {objectId})) =
castObject :: ROLayer m => Nav DirectObject -> m (Nav Object)
castObject (Nav {value = !(Reference (IndirectObjCoordinates {objectId}))}) =
objectById objectId
castObject directObject = return $ Direct directObject
castObject (Nav {dPath, value}) = return $ Nav {dPath, value = Direct value}
(//) :: PDFContent m => m Object -> [Component] -> m Object
(//) object [] = object
(//) object (key:keys) = object ./ key // keys
(./) :: ROLayer m => m (Nav Object) -> Component -> m (Nav Object)
(./) navObject key = (navObject >>= getKey key >>= castObject)
(>./) :: PDFContent m => Object -> Component -> m Object
(>./) object = (return object ./)
(//) :: ROLayer m => m (Nav Object) -> PPath -> m (Nav Object)
(//) navObject (PPath []) = navObject
(//) navObject (PPath (key:keys)) = navObject ./ key // (PPath keys)
(>//) :: PDFContent m => Object -> [Component] -> m Object
(>//) object = (return object //)
(>./) :: ROLayer m => Nav Object -> Component -> m (Nav Object)
(>./) navObject = (return navObject ./)
catalog :: PDFContent m => m Object
catalog = Direct . Dictionary . trailer . docStructure <$> ask
(>//) :: ROLayer m => Nav Object -> PPath -> m (Nav Object)
(>//) navObject = (return navObject //)
data Clear = Clear
data Raw = Raw
catalog :: ROLayer m => m (Nav Object)
catalog = do
value <- Direct . Dictionary . trailer . docStructure <$> ask
return $ Nav {dPath = undefined, value}
setAt :: [Component] -> DirectObject -> Dictionary -> Dictionary
setAt [] _ dict = dict
setAt [component] directObject dict =
Map.insert (Name component) directObject dict
setAt (component:components) directObject dict =
Map.adjust setDirObj (Name component) dict
where
setDirObj (Dictionary subDict) =
Dictionary $ setAt components directObject subDict
setDirObj x = x
save :: RWLayer m => Nav Object -> m ()
save nav@(Nav {dPath, value = Direct dObj}) =
edit .at (root dPath) $ return . setObj
where
setObj obj@(Stream {header}) =
obj {header = setAt (offset dPath) dObj header}
setObj (Direct (Dictionary dict)) =
Direct . Dictionary $ setAt (offset dPath) dObj dict
setObj _ = value nav
save (Nav {dPath = DPath {root, offset = []}, value}) = edit $ w root value
save _ = throwError "Streams can't be properties of PDF objects"
data StreamContent = Clear | Raw
onLazy :: (Lazy.ByteString -> Lazy.ByteString) -> ByteString -> ByteString
onLazy f = Lazy.toStrict . f . Lazy.fromStrict
contains :: String -> DirectObject -> Bool
contains needle (NameObject (Name n)) = needle == n
contains needle (Array directObjects) = oneOf directObjects (contains needle)
contains needle !(NameObject (Name n)) = needle == n
contains needle !(Array directObjects) = oneOf directObjects (contains needle)
where
oneOf [] _ = False
oneOf (x:xs) p = p x || oneOf xs p
contains _ _ = False
instance MonadFail m => Box m Clear Object ByteString where
instance MonadError String m => Box m StreamContent (Nav Object) ByteString where
r sc = r sc . value
w sc newStreamContent nav = setValue <$> w sc newStreamContent (value nav)
where
setValue value = nav {value}
instance MonadError String m => Box m StreamContent Object ByteString where
r Raw (Stream {streamContent}) = return streamContent
r Clear (Stream {header, streamContent}) = return $
case Map.lookup (Name "Filter") header of
Just directObject
@ -102,17 +166,15 @@ instance MonadFail m => Box m Clear Object ByteString where
_ -> streamContent
r _ obj = expected "stream" obj
w Clear streamContent obj@(Stream {header}) = return $
case Map.lookup (Name "Filter") header of
Just directObject
| contains "FlateDecode" directObject ->
obj {streamContent = onLazy compress streamContent}
_ -> obj {streamContent}
w _ _ obj = expected "stream" obj
instance MonadFail m => Box m Raw Object ByteString where
r Raw (Stream {streamContent}) = return streamContent
r _ obj = expected "stream" obj
w Raw streamContent obj@(Stream {}) = return $ obj {streamContent}
w Clear newStreamContent (Stream {header}) =
let streamContent = getStreamContent (Map.lookup (Name "Filter") header) in
return $ Stream {header = fixLength streamContent, streamContent}
where
getStreamContent (Just directObject)
| contains "FlateDecode" directObject = onLazy compress newStreamContent
getStreamContent _ = newStreamContent
fixLength sc =
let newLength = NumberObject . Number . fromIntegral $ BS.length sc in
Map.insert (Name "Length") newLength header
w _ _ obj = expected "stream" obj

View File

@ -55,9 +55,8 @@ saveOffset resource = OContext $
lift :: (a -> Builder) -> a -> OBuilder
lift f a = return (f a)
getOffsets :: OBuilder -> OContext (OBuilder, Map Resource Offset)
getOffsets (OContext builder) =
OContext (listen builder >>= \(a, w) -> return (return a, w))
getOffsets :: OBuilder -> OContext (Builder, Map Resource Offset)
getOffsets (OContext builder) = OContext $ listen builder
append :: OBuilder -> OBuilder -> OBuilder
append (OContext a) (OContext b) = OContext (mappend <$> a <*> b)

View File

@ -13,63 +13,62 @@ module PDF.Pages (
, withResources
) where
import Control.Applicative (Alternative, (<|>))
import Control.Applicative (Alternative)
import Control.Applicative ((<|>))
import Control.Monad (foldM)
import Control.Monad.Fail (MonadFail(..))
import Control.Monad.Except (MonadError(..))
import Control.Monad.Reader (ReaderT, runReaderT)
import Control.Monad.State (StateT(..), evalStateT, execStateT, gets, modify)
import Control.Monad.Trans (lift)
import Data.ByteString (ByteString)
import Data.ByteString.Lazy (toStrict)
import Data.Id (Id, IdMap)
import Data.Id (Id(..), IdMap)
import qualified Data.Id as Id (empty, insert, lookup)
import Data.Map (Map)
import qualified Data.Map as Map (empty, fromList, insert, toList)
import qualified Data.Map as Map (empty, elems, fromList, insert, toList)
import Data.Maybe (listToMaybe)
import Data.OrderedMap (OrderedMap, build, mapi)
import PDF.Box (Box(..), at, edit)
import PDF.CMap (cMap)
import PDF.Box (Box(..), at, edit, runRO)
import qualified PDF.CMap as CMap (parse)
import PDF.Content (Content(..))
import qualified PDF.Content as Content (parse)
import PDF.Encoding (encoding)
import PDF.EOL (Style(..))
import PDF.Font (Font, FontSet)
import PDF.Layer (Layer(..), Objects(..))
import PDF.Layer (Layer(..), LayerReader)
import PDF.Object (
Dictionary, DirectObject(..), IndirectObjCoordinates(..)
, Name(..), Object(..)
, Name(..), Number(..), Object(..)
,)
import PDF.Object.Navigation (
Clear(..), (//), (>./), (>//), getDictionary
, getKey, objectById, origin
Nav(..), PPath(..), ROLayer, RWLayer, StreamContent(..), (//), (>./)
, (>//), catalog, getDictionary, getKey, objectById, save
)
import PDF.Output (render)
import Prelude hiding (fail)
import Text.Printf (printf)
type Except m = (Alternative m, MonadFail m)
type InLayer m = ReaderT Layer m
type CachedFonts = IdMap Object Font
type FontCache m = StateT CachedFonts (InLayer m)
type FontCache m = StateT CachedFonts m
data Page = Page {
contents :: OrderedMap (Id Object) Content
byteContents :: OrderedMap (Id Object) ByteString
, resources :: Dictionary
, source :: (Id Object)
}
loadContents :: Except m => DirectObject -> InLayer m (OrderedMap (Id Object) Content)
loadContents directObject =
sequenceA . build loadContent $ objectIds directObject
loadByteContents :: ROLayer m => DirectObject -> m (OrderedMap (Id Object) ByteString)
loadByteContents directObject = do
objs <- sequence . build objectById $ objectIds directObject
mapM (r Clear) objs
where
loadContent :: Except m => (Id Object) -> InLayer m Content
loadContent objectId = objectById objectId >>= r Clear >>= Content.parse
objectIds (Array l) = l >>= getReference
objectIds dirObj = getReference dirObj
getFontDictionary :: Except m => Object -> InLayer m Dictionary
getFontDictionary :: (Alternative m, ROLayer m) => Nav Object -> m Dictionary
getFontDictionary pageObj =
(pageObj >// ["Resources", "Font"] >>= getDictionary) <|> return Map.empty
(pageObj >// PPath ["Resources", "Font"] >>= fmap value . getDictionary)
<|> return Map.empty
cache :: Except m => ((Id Object) -> FontCache m Font) -> (Id Object) -> FontCache m Font
cache :: ROLayer m => (Id Object -> FontCache m Font) -> Id Object -> FontCache m Font
cache loader objectId =
gets (Id.lookup objectId) >>= maybe load return
where
@ -78,81 +77,105 @@ cache loader objectId =
modify $ Id.insert objectId value
return value
loadFont :: Except m => (Id Object) -> FontCache m Font
loadFont :: (Alternative m, ROLayer m) => Id Object -> FontCache m Font
loadFont objectId = lift $ objectById objectId >>= tryMappings
where
tryMappings object =
(object >./ "ToUnicode" >>= r Clear >>= cMap)
<|> (object >./ "Encoding" >>= loadEncoding)
<|> (fail $ unknownFormat (show objectId) (show object))
(object >./ "ToUnicode" >>= r Clear >>= CMap.parse)
<|> (object >./ "Encoding" >>= loadEncoding . value)
<|> (throwError $ unknownFormat (show objectId) (show object))
unknownFormat = printf "Unknown font format for object #%s : %s"
loadEncoding :: MonadFail m => Object -> m Font
loadEncoding :: MonadError String m => Object -> m Font
loadEncoding (Direct (NameObject (Name name))) = encoding name
loadEncoding object =
fail $ printf "Encoding must be a name, not that : %s" $ show object
throwError $ printf "Encoding must be a name, not that : %s" $ show object
loadResources :: Except m => Dictionary -> FontCache m FontSet
loadResources :: (Alternative m, ROLayer m) => Dictionary -> FontCache m FontSet
loadResources = foldM addFont Map.empty . Map.toList
where
addFont output (name, Reference (IndirectObjCoordinates {objectId})) =
flip (Map.insert name) output <$> cache loadFont objectId
addFont output _ = return output
getReference :: DirectObject -> [(Id Object)]
getReference :: DirectObject -> [Id Object]
getReference (Reference (IndirectObjCoordinates {objectId})) = [objectId]
getReference _ = []
loadPage :: Except m => (Id Object) -> InLayer m Page
loadPage :: (Alternative m, ROLayer m) => Id Object -> m Page
loadPage source = do
page <- objectById source
contents <- getKey "Contents" page >>= loadContents
byteContents <- loadByteContents . value =<< getKey "Contents" page
resources <- getFontDictionary page
return $ Page {contents, resources, source}
return $ Page {byteContents, resources, source}
pagesList :: Except m => InLayer m [(Id Object)]
pagesList :: (Alternative m, ROLayer m) => m [Id Object]
pagesList =
(origin // ["Root", "Pages"] >>= getKey "Kids" >>= return . getReferences)
(catalog // PPath ["Root", "Pages", "Kids"] >>= getReferences . value)
<|> return []
where
getReferences (Array kids) = kids >>= getReference
getReferences _ = fail "Not a pages array"
getReferences (Direct (Array kids)) = return $ getReference =<< kids
getReferences _ = throwError "Not a pages array"
updatePage :: MonadFail m => Page -> StateT Layer m ()
updatePage (Page {contents}) = sequence_ $ mapi updateContent contents
editPagesList :: RWLayer m => ([DirectObject] -> [DirectObject]) -> m ()
editPagesList f = do
pages <- runRO (catalog // PPath ["Root", "Pages"])
kids <- runRO (pages >./ "Kids")
count <- runRO (pages >./ "Count")
(newSize, newKids) <- editKids (value kids)
save $ kids {value = newKids}
save $ count {value = Direct $ NumberObject newSize}
where
updateContent source content =
edit . at Objects . at source . at Clear $ setContent content
setContent content _ = return . toStrict $ render LF content
editKids (Direct (Array pageRefs)) =
let result = f pageRefs in
return (Number . fromIntegral $ length result, Direct $ Array result)
editKids _ = throwError "Invalid format for Root.Pages.Kids (not an array)"
updatePage :: RWLayer m => Page -> m ()
updatePage (Page {byteContents}) = sequence_ $ mapi updateByteContent byteContents
where
updateByteContent source byteContent =
edit .at source .at Clear $ \_ -> return byteContent
data Pages = Pages
newtype PageNumber = P Int
data Contents = Contents
instance (Alternative m, MonadFail m) => Box m Pages Layer (Map Int Page) where
r Pages layer = runReaderT (numbered <$> (mapM loadPage =<< pagesList)) layer
instance (Alternative m, MonadError String m) => Box m Pages Layer (Map Int Page) where
r Pages = runReaderT (numbered <$> pagesList >>= mapM loadPage)
where
numbered :: [Page] -> Map Int Page
numbered :: [Id Object] -> Map Int (Id Object)
numbered = Map.fromList . zip [1..]
w Pages pages = execStateT $ mapM_ updatePage pages
instance (Alternative m, MonadFail m) => Box m PageNumber Layer Page where
r (P i) layer
| i < 1 = fail "Pages start at 1"
| otherwise = runReaderT (drop (i - 1) <$> pagesList >>= firstPage) layer
w Pages pages = execStateT $ do
mapM_ updatePage pages
setPagesList $ Map.elems (source <$> pages)
where
firstPage [] = fail "Page is out of bounds"
firstPage (p:_) = loadPage p
setPagesList =
editPagesList . const . fmap (Reference . flip IndirectObjCoordinates 0)
w _ page = execStateT $ updatePage page
instance (Alternative m, MonadError String m) => Box m PageNumber Layer Page where
r (P p) layer
| p < 1 = throwError "Pages start at 1"
| otherwise = runReaderT (drop (p - 1) <$> pagesList >>= firstPage) layer
where
firstPage =
maybe (throwError "Page is out of bounds") loadPage . listToMaybe
w (P p) page = execStateT $ do
updatePage page
editPagesList $ setPage (Reference $ IndirectObjCoordinates (source page) 0)
where
setPage ref l = take (p-1) l ++ ref : drop p l
instance Monad m => Box m Contents Page (OrderedMap (Id Object) Content) where
r Contents = return . contents
w _ contents page = return $ page {contents}
r Contents = return . fmap Content.parse . byteContents
w Contents contents page = return $ page {byteContents}
where
byteContents = toStrict . render LF <$> contents
withFonts :: Monad m => (Layer -> FontCache m a) -> Layer -> m a
withFonts :: MonadError String m => (Layer -> FontCache (LayerReader m) a) -> Layer -> m a
withFonts f layer = runReaderT (evalStateT (f layer) Id.empty) layer
withResources :: Except m => (Page -> ReaderT FontSet m b) -> Page -> FontCache m b
withResources :: (Alternative m, MonadError String m) => (Page -> ReaderT FontSet m b) -> Page -> FontCache (LayerReader m) b
withResources f p =
loadResources (resources p) >>= lift . lift . runReaderT (f p)