Use IntMap for all Maps on Ids

This commit is contained in:
Tissevert 2020-03-15 15:13:00 +01:00
parent f31e9eb38b
commit 5722dd1a04
8 changed files with 113 additions and 52 deletions

View file

@ -3,8 +3,7 @@ import Control.Monad ((>=>))
import Control.Monad.IO.Class (liftIO) import Control.Monad.IO.Class (liftIO)
import Control.Monad.Reader (runReaderT) import Control.Monad.Reader (runReaderT)
import qualified Data.ByteString.Char8 as BS (readFile) import qualified Data.ByteString.Char8 as BS (readFile)
import Data.Id (Id(..)) import Data.Id (Id(..), mapWithKey)
import Data.Map (mapWithKey)
import qualified Data.Map as Map (toList) import qualified Data.Map as Map (toList)
import Data.OrderedMap (mapi) import Data.OrderedMap (mapi)
import qualified Data.Text as Text (unpack) import qualified Data.Text as Text (unpack)

View file

@ -1,12 +1,77 @@
{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-}
module Data.Id ( module Data.Id (
Id(..) Id(..)
, IdMap
, Indexed , Indexed
, at
, delete
, empty
, fromList
, insert
, keysSet
, lookup
, mapWithKey
, member
, minViewWithKey
, singleton
, size
, union
) where ) where
import Data.Map (Map) import Data.IntMap (IntMap, (!))
import qualified Data.IntMap as IntMap (
delete, empty, fromList, keysSet, insert, lookup, mapWithKey, member
, minViewWithKey, size, union
)
import Data.IntSet (IntSet)
import Prelude hiding (lookup)
newtype Id a = Id { newtype Id a = Id {
getId :: Int getId :: Int
} deriving (Eq, Enum, Ord, Show) } deriving (Eq, Enum, Ord, Show)
type Indexed a = Map (Id a) a newtype IdMap a b = IdMap {
intMap :: IntMap b
} deriving (Show, Functor, Semigroup, Monoid, Foldable)
type Indexed a = IdMap a a
at :: IdMap a b -> Id a -> b
at (IdMap idMap) = (idMap !) . getId
lookup :: Id a -> IdMap a b -> Maybe b
lookup (Id a) (IdMap idMap) = IntMap.lookup a idMap
size :: IdMap a b -> Int
size = IntMap.size . intMap
member :: Id a -> IdMap a b -> Bool
member (Id a) (IdMap idMap) = IntMap.member a idMap
empty :: IdMap a b
empty = IdMap {intMap = IntMap.empty}
singleton :: Id a -> b -> IdMap a b
singleton a b = fromList [(a, b)]
insert :: Id a -> b -> IdMap a b -> IdMap a b
insert (Id a) b (IdMap idMap) = IdMap {intMap = IntMap.insert a b idMap}
delete :: Id a -> IdMap a b -> IdMap a b
delete (Id a) (IdMap idMap) = IdMap {intMap = IntMap.delete a idMap}
minViewWithKey :: IdMap a b -> Maybe ((Id a, b), IdMap a b)
minViewWithKey = fmap wrap . IntMap.minViewWithKey . intMap
where
wrap ((key, b), idMap) = ((Id key, b), IdMap idMap)
union :: IdMap a b -> IdMap a b -> IdMap a b
union (IdMap intMap1) (IdMap intMap2) =
IdMap {intMap = IntMap.union intMap1 intMap2}
mapWithKey :: (Id a -> b -> c) -> IdMap a b -> IdMap a c
mapWithKey f (IdMap idMap) = IdMap {intMap = IntMap.mapWithKey (f . Id) idMap}
fromList :: [(Id a, b)] -> IdMap a b
fromList = IdMap . IntMap.fromList . fmap (\(key, b) -> (getId key, b))
keysSet :: IdMap a b -> IntSet
keysSet = IntMap.keysSet . intMap

View file

@ -9,9 +9,9 @@ import Control.Monad.State (get, gets, modify)
import Data.Attoparsec.ByteString.Char8 (option) import Data.Attoparsec.ByteString.Char8 (option)
import Data.ByteString.Char8 (ByteString) import Data.ByteString.Char8 (ByteString)
import qualified Data.ByteString.Char8 as BS (cons, drop, unpack) import qualified Data.ByteString.Char8 as BS (cons, drop, unpack)
import Data.Id (Id(..)) import Data.Id (Id(..), at, empty)
import Data.Map ((!)) import qualified Data.Id as Id (insert, lookup)
import qualified Data.Map as Map (empty, insert, lookup) import qualified Data.Map as Map (lookup)
import qualified PDF.EOL as EOL (charset, parser) import qualified PDF.EOL as EOL (charset, parser)
import PDF.Layer (Layer(..)) import PDF.Layer (Layer(..))
import PDF.Object ( import PDF.Object (
@ -36,7 +36,7 @@ modifyFlow f = modify $ \state -> state {flow = f $ flow state}
addObject :: (Id Object) -> Object -> SParser () addObject :: (Id Object) -> Object -> SParser ()
addObject objectId newObject = modifyFlow $ \flow -> flow { addObject objectId newObject = modifyFlow $ \flow -> flow {
tmpObjects = Map.insert objectId newObject $ tmpObjects flow tmpObjects = Id.insert objectId newObject $ tmpObjects flow
} }
pushOccurrence :: Occurrence -> SParser () pushOccurrence :: Occurrence -> SParser ()
@ -52,7 +52,7 @@ comment = BS.unpack <$> (option "" afterPercent <* EOL.parser)
lookupOffset :: (Id Object) -> SParser Offset lookupOffset :: (Id Object) -> SParser Offset
lookupOffset objectId = do lookupOffset objectId = do
table <- gets xreferences table <- gets xreferences
case Map.lookup objectId table >>= entryOffset of case Id.lookup objectId table >>= entryOffset of
Nothing -> fail $ Nothing -> fail $
"obj " ++ show objectId ++ " is referenced but missing in XRef table" "obj " ++ show objectId ++ " is referenced but missing in XRef table"
Just offset -> return offset Just offset -> return offset
@ -65,7 +65,7 @@ loadNumber objectId = do
offset <- getOffset <$> lookupOffset objectId offset <- getOffset <$> lookupOffset objectId
objectStart <- BS.drop offset <$> gets input objectStart <- BS.drop offset <$> gets input
indirectObjCoordinates `on` (objectStart :: ByteString) >> return () indirectObjCoordinates `on` (objectStart :: ByteString) >> return ()
objectValue <- (!objectId) . tmpObjects <$> gets flow objectValue <- (`at` objectId) . tmpObjects <$> gets flow
case objectValue of case objectValue of
Direct (NumberObject (Number n)) -> return n Direct (NumberObject (Number n)) -> return n
obj -> fail $ "Expected obj@" ++ show offset ++ " (" ++ show obj ++ ") to be a Number" obj -> fail $ "Expected obj@" ++ show offset ++ " (" ++ show obj ++ ") to be a Number"
@ -78,7 +78,7 @@ getSize Nothing = fail "Missing '/Length' key on stream"
getSize (Just (NumberObject (Number size))) = return size getSize (Just (NumberObject (Number size))) = return size
getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do
Flow {tmpObjects} <- gets flow Flow {tmpObjects} <- gets flow
case Map.lookup objectId tmpObjects of case Id.lookup objectId tmpObjects of
Nothing -> loadNumber objectId Nothing -> loadNumber objectId
Just (Direct (NumberObject (Number size))) -> return size Just (Direct (NumberObject (Number size))) -> return size
Just v -> fail $ Just v -> fail $
@ -113,7 +113,7 @@ populate :: ByteString -> InputStructure -> Layer
populate input structure = populate input structure =
let bodyInput = BS.drop (startOffset structure) input in let bodyInput = BS.drop (startOffset structure) input in
case evalParser recurseOnOccurrences initialState bodyInput of case evalParser recurseOnOccurrences initialState bodyInput of
Left _ -> Layer {occurrences = [], objects = Map.empty, docStructure} Left _ -> Layer {occurrences = [], objects = empty, docStructure}
Right finalState -> Right finalState ->
let Flow {occurrencesStack, tmpObjects} = flow finalState in let Flow {occurrencesStack, tmpObjects} = flow finalState in
Layer { Layer {
@ -124,7 +124,7 @@ populate input structure =
xreferences = xRef docStructure xreferences = xRef docStructure
initialState = UserState { initialState = UserState {
input, xreferences, flow = Flow { input, xreferences, flow = Flow {
occurrencesStack = [], tmpObjects = Map.empty occurrencesStack = [], tmpObjects = empty
} }
} }

View file

@ -19,9 +19,7 @@ import Control.Monad.Reader (asks, runReader)
import Control.Monad.State (evalStateT, gets, modify) import Control.Monad.State (evalStateT, gets, modify)
import Data.Attoparsec.ByteString.Char8 (sepBy) import Data.Attoparsec.ByteString.Char8 (sepBy)
import Data.ByteString (ByteString) import Data.ByteString (ByteString)
import Data.Id (Id(..), Indexed) import Data.Id (Id(..), Indexed, at, empty, insert, size)
import Data.Map ((!))
import qualified Data.Map as Map (empty, insert, size)
import PDF.Box (Box(..)) import PDF.Box (Box(..))
import PDF.Content.Operator (Instruction, operator) import PDF.Content.Operator (Instruction, operator)
import PDF.Object (blank, directObject) import PDF.Object (blank, directObject)
@ -53,13 +51,13 @@ instance Monad m => Box m Instructions Content (Indexed Instruction) where
register :: Instruction -> InstructionParser (Id Instruction) register :: Instruction -> InstructionParser (Id Instruction)
register newInstruction = do register newInstruction = do
newInstructionID <- gets (Id . Map.size) newInstructionID <- gets (Id . size)
modify (Map.insert newInstructionID newInstruction) modify (insert newInstructionID newInstruction)
return newInstructionID return newInstructionID
parse :: MonadFail m => ByteString -> m Content parse :: MonadFail m => ByteString -> m Content
parse = parse =
either fail (return . uncurry Content) . runParser contentUnits Map.empty either fail (return . uncurry Content) . runParser contentUnits empty
where where
contentUnits = contentUnit `sepBy` blank contentUnits = contentUnit `sepBy` blank
@ -98,4 +96,4 @@ instance Output Content where
return (line "BT" `mappend` inside `mappend` line "ET") return (line "BT" `mappend` inside `mappend` line "ET")
outputGCU (GraphicInstruction gi) = outputIId gi outputGCU (GraphicInstruction gi) = outputIId gi
outputGCU (ContentUnit cu) = outputCU cu outputGCU (ContentUnit cu) = outputCU cu
outputIId instructionId = asks (output . (! instructionId)) outputIId instructionId = asks (output . (`at` instructionId))

View file

@ -15,9 +15,8 @@ import Control.Monad (foldM)
import Control.Monad.Fail (MonadFail(..)) import Control.Monad.Fail (MonadFail(..))
import Control.Monad.Reader (MonadReader(..), ReaderT, asks, runReaderT) import Control.Monad.Reader (MonadReader(..), ReaderT, asks, runReaderT)
import Control.Monad.State (MonadState(..), StateT, evalStateT) import Control.Monad.State (MonadState(..), StateT, evalStateT)
import Data.Id (Id(..), Indexed) import Data.Id (Id(..), Indexed, at, empty, singleton)
import Data.Map ((!)) import Data.Map ((!))
import qualified Data.Map as Map (empty, singleton)
import Data.Text (Text, breakOn) import Data.Text (Text, breakOn)
import qualified Data.Text as Text (drop) import qualified Data.Text as Text (drop)
import PDF.Box (Box(..)) import PDF.Box (Box(..))
@ -64,15 +63,15 @@ renderContentUnit (TextContext instructionIds) =
evalStateT (mconcat <$> mapM renderInstructionId instructionIds) emptyFont evalStateT (mconcat <$> mapM renderInstructionId instructionIds) emptyFont
renderGraphicContextUnit :: MonadFail m => GraphicContextUnit -> TextContent m (Indexed Text) renderGraphicContextUnit :: MonadFail m => GraphicContextUnit -> TextContent m (Indexed Text)
renderGraphicContextUnit (GraphicInstruction _) = return Map.empty renderGraphicContextUnit (GraphicInstruction _) = return empty
renderGraphicContextUnit (ContentUnit contentUnit) = renderGraphicContextUnit (ContentUnit contentUnit) =
renderContentUnit contentUnit renderContentUnit contentUnit
renderInstructionId :: MonadFail m => Id Instruction -> FontContext m (Indexed Text) renderInstructionId :: MonadFail m => Id Instruction -> FontContext m (Indexed Text)
renderInstructionId instructionId@(Id n) = toMap <$> renderInstructionId instructionId@(Id n) = toMap <$>
(asks ((! instructionId) . indexedInstructions) >>= renderInstruction) (asks ((`at` instructionId) . indexedInstructions) >>= renderInstruction)
where where
toMap = maybe Map.empty (Map.singleton (Id n)) toMap = maybe empty (singleton (Id n))
renderInstruction :: MonadFail m => Instruction -> FontContext m (Maybe Text) renderInstruction :: MonadFail m => Instruction -> FontContext m (Maybe Text)
renderInstruction (Text Tf, [NameObject fontName, _]) = renderInstruction (Text Tf, [NameObject fontName, _]) =

View file

@ -7,10 +7,11 @@ module PDF.Layer (
, unify , unify
) where ) where
import Data.Id (Id(..), Indexed) import Data.Id (Id(..), Indexed, keysSet, mapWithKey, member)
import Data.Map (Map, (!), mapWithKey, member) import qualified Data.Id as Id (empty, union)
import qualified Data.Map as Map (empty, keysSet, member, union) import Data.Map (Map, (!))
import qualified Data.Set as Set (delete, toList) import qualified Data.Map as Map (empty, union)
import qualified Data.IntSet as IntSet (delete, toList)
import PDF.Box (Box(..)) import PDF.Box (Box(..))
import PDF.Object ( import PDF.Object (
IndirectObjCoordinates(..), Object, Occurrence(..) IndirectObjCoordinates(..), Object, Occurrence(..)
@ -67,18 +68,18 @@ instance Monad m => Box m Objects Layer (Indexed Object) where
where where
filterOccurrences c@(Comment _) (occ, newObjIds) = (c:occ, newObjIds) filterOccurrences c@(Comment _) (occ, newObjIds) = (c:occ, newObjIds)
filterOccurrences i@(Indirect (IndirectObjCoordinates {objectId})) (occ, newObjIds) filterOccurrences i@(Indirect (IndirectObjCoordinates {objectId})) (occ, newObjIds)
| Map.member objectId newObjects = (i:occ, Set.delete objectId newObjIds) | member objectId newObjects = (i:occ, IntSet.delete (getId objectId) newObjIds)
| otherwise = (occ, newObjIds) | otherwise = (occ, newObjIds)
(keptOccurrences, newObjectIds) = (keptOccurrences, newObjectIds) =
foldr filterOccurrences ([], Map.keysSet newObjects) occurrences foldr filterOccurrences ([], keysSet newObjects) occurrences
makeOccurrence objectId = makeOccurrence objectId =
Indirect (IndirectObjCoordinates {objectId, versionNumber = 0}) Indirect (IndirectObjCoordinates {objectId, versionNumber = 0})
newOccurrences = makeOccurrence <$> Set.toList newObjectIds newOccurrences = (makeOccurrence . Id) <$> IntSet.toList newObjectIds
emptyLayer :: Layer emptyLayer :: Layer
emptyLayer = Layer { emptyLayer = Layer {
docStructure = Structure {xRef = Map.empty, trailer = Map.empty} docStructure = Structure {xRef = Id.empty, trailer = Map.empty}
, objects = Map.empty , objects = Id.empty
, occurrences = [] , occurrences = []
} }
@ -86,7 +87,7 @@ unify :: [Layer] -> Layer
unify = foldl complete emptyLayer unify = foldl complete emptyLayer
where where
complete tmpLayer older = complete tmpLayer older =
let mergedObjects = Map.union (objects tmpLayer) (objects older) in let mergedObjects = Id.union (objects tmpLayer) (objects older) in
Layer { Layer {
docStructure = docStructure =
unifyDocStructure (docStructure tmpLayer) (docStructure older) unifyDocStructure (docStructure tmpLayer) (docStructure older)
@ -97,7 +98,7 @@ unify = foldl complete emptyLayer
unifyDocStructure :: Structure -> Structure -> Structure unifyDocStructure :: Structure -> Structure -> Structure
unifyDocStructure new old = Structure { unifyDocStructure new old = Structure {
xRef = Map.union (xRef new) (xRef old) xRef = Id.union (xRef new) (xRef old)
, trailer = Map.union (trailer new) (trailer old) , trailer = Map.union (trailer new) (trailer old)
} }

View file

@ -40,11 +40,12 @@ import qualified Data.ByteString.Char8 as Char8 (
cons, length, pack, singleton, snoc, unpack cons, length, pack, singleton, snoc, unpack
) )
import Data.ByteString.Char8.Util (B16Int(..), b16ToBytes, unescape) import Data.ByteString.Char8.Util (B16Int(..), b16ToBytes, unescape)
import Data.Id (Id(..), Indexed) import Data.Id (Id(..), IdMap, Indexed)
import Data.Map (Map, (!)) import qualified Data.Id as Id (
import qualified Data.Map as Map ( at, delete, empty, fromList, lookup, minViewWithKey, union
delete, empty, fromList, lookup, minViewWithKey, toList, union
) )
import Data.Map (Map)
import qualified Data.Map as Map (fromList, toList)
import qualified Data.Set as Set (fromList, member) import qualified Data.Set as Set (fromList, member)
import qualified PDF.EOL as EOL (charset, parser) import qualified PDF.EOL as EOL (charset, parser)
import qualified PDF.Output as Output (line, string) import qualified PDF.Output as Output (line, string)
@ -265,7 +266,7 @@ outputOccurrence _ (Comment c) = Output.line c
outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) = outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) =
saveOffset (ObjectId $ getId objectId) >> mconcat [ saveOffset (ObjectId $ getId objectId) >> mconcat [
Output.line (printf "%d %d obj" (getId objectId) versionNumber) Output.line (printf "%d %d obj" (getId objectId) versionNumber)
, output (objects ! objectId), newLine , output (objects `Id.at` objectId), newLine
, Output.line "endobj" , Output.line "endobj"
] ]
@ -325,31 +326,30 @@ xRefSubSection = do
entries <- count entriesNumber entry entries <- count entriesNumber entry
return $ XRefSubSection {firstObjectId = Id firstId, entries} return $ XRefSubSection {firstObjectId = Id firstId, entries}
type XRefSection = Map (Id Object) XRefEntry type XRefSection = IdMap Object XRefEntry
instance Output XRefSection where instance Output XRefSection where
output = output . sections output = output . sections
where where
sections tmp = sections tmp =
case Map.minViewWithKey tmp of case Id.minViewWithKey tmp of
Nothing -> [] Nothing -> []
Just ((objectId@(Id value), firstEntry), rest) -> Just ((objectId@(Id value), firstEntry), rest) ->
let (subSection, sndRest) = section objectId [firstEntry] (value + 1) rest in let (subSection, sndRest) = section objectId [firstEntry] (value + 1) rest in
subSection : sections sndRest subSection : sections sndRest
section firstObjectId stack nextValue tmp = section firstObjectId stack nextValue tmp =
let nextId = Id nextValue in let nextId = (Id nextValue :: Id Object) in
case Map.lookup nextId tmp of case Id.lookup nextId tmp of
Nothing -> (XRefSubSection {firstObjectId, entries = reverse stack}, tmp) Nothing -> (XRefSubSection {firstObjectId, entries = reverse stack}, tmp)
Just nextEntry -> Just nextEntry ->
section firstObjectId (nextEntry:stack) (nextValue + 1) (Map.delete nextId tmp) section firstObjectId (nextEntry:stack) (nextValue + 1) (Id.delete nextId tmp)
xRefSection :: Parser u XRefSection xRefSection :: Parser u XRefSection
xRefSection = foldr addSubsection Map.empty <$> xRefSection = foldr addSubsection Id.empty <$>
(line "xref" *> xRefSubSection `sepBy` many EOL.parser) (line "xref" *> xRefSubSection `sepBy` many EOL.parser)
where where
addSubsection (XRefSubSection {firstObjectId, entries}) = addSubsection (XRefSubSection {firstObjectId, entries}) =
Map.union . Map.fromList $ zip ([firstObjectId..]) entries Id.union . Id.fromList $ zip ([firstObjectId..]) entries
-- --
-- Structure -- Structure

View file

@ -23,8 +23,7 @@ import Control.Monad.Reader (MonadReader(..))
import Control.Monad.Fail (MonadFail(..)) import Control.Monad.Fail (MonadFail(..))
import Data.ByteString (ByteString) import Data.ByteString (ByteString)
import qualified Data.ByteString.Lazy as Lazy (ByteString, fromStrict, toStrict) import qualified Data.ByteString.Lazy as Lazy (ByteString, fromStrict, toStrict)
import Data.Id (Id) import Data.Id (Id, at)
import Data.Map ((!))
import qualified Data.Map as Map (lookup) import qualified Data.Map as Map (lookup)
import PDF.Box (Box(..)) import PDF.Box (Box(..))
import PDF.Layer (Layer(..)) import PDF.Layer (Layer(..))
@ -58,7 +57,7 @@ getKey key object = getDictionary object >>= catchMaybe . Map.lookup (Name key)
objectById :: PDFContent m => (Id Object) -> m Object objectById :: PDFContent m => (Id Object) -> m Object
objectById objectId = do objectById objectId = do
layer <- ask layer <- ask
return (objects layer ! objectId) return (objects layer `at` objectId)
(./) :: PDFContent m => m Object -> Component -> m Object (./) :: PDFContent m => m Object -> Component -> m Object
(./) object key = (object >>= getKey key >>= castObject) (./) object key = (object >>= getKey key >>= castObject)