From 5722dd1a0479c054417600a9a2fab39ec93234b1 Mon Sep 17 00:00:00 2001 From: Tissevert Date: Sun, 15 Mar 2020 15:13:00 +0100 Subject: [PATCH] Use IntMap for all Maps on Ids --- examples/getText.hs | 3 +- src/Data/Id.hs | 69 ++++++++++++++++++++++++++++++++++-- src/PDF/Body.hs | 18 +++++----- src/PDF/Content.hs | 12 +++---- src/PDF/Content/Text.hs | 9 +++-- src/PDF/Layer.hs | 23 ++++++------ src/PDF/Object.hs | 26 +++++++------- src/PDF/Object/Navigation.hs | 5 ++- 8 files changed, 113 insertions(+), 52 deletions(-) diff --git a/examples/getText.hs b/examples/getText.hs index 7ec1eca..1d96951 100644 --- a/examples/getText.hs +++ b/examples/getText.hs @@ -3,8 +3,7 @@ import Control.Monad ((>=>)) import Control.Monad.IO.Class (liftIO) import Control.Monad.Reader (runReaderT) import qualified Data.ByteString.Char8 as BS (readFile) -import Data.Id (Id(..)) -import Data.Map (mapWithKey) +import Data.Id (Id(..), mapWithKey) import qualified Data.Map as Map (toList) import Data.OrderedMap (mapi) import qualified Data.Text as Text (unpack) diff --git a/src/Data/Id.hs b/src/Data/Id.hs index f7e4683..22d7d09 100644 --- a/src/Data/Id.hs +++ b/src/Data/Id.hs @@ -1,12 +1,77 @@ {-# LANGUAGE GeneralizedNewtypeDeriving #-} module Data.Id ( Id(..) + , IdMap , Indexed + , at + , delete + , empty + , fromList + , insert + , keysSet + , lookup + , mapWithKey + , member + , minViewWithKey + , singleton + , size + , union ) where -import Data.Map (Map) +import Data.IntMap (IntMap, (!)) +import qualified Data.IntMap as IntMap ( + delete, empty, fromList, keysSet, insert, lookup, mapWithKey, member + , minViewWithKey, size, union + ) +import Data.IntSet (IntSet) +import Prelude hiding (lookup) newtype Id a = Id { getId :: Int } deriving (Eq, Enum, Ord, Show) -type Indexed a = Map (Id a) a +newtype IdMap a b = IdMap { + intMap :: IntMap b + } deriving (Show, Functor, Semigroup, Monoid, Foldable) +type Indexed a = IdMap a a + +at :: IdMap a b -> Id a -> b +at (IdMap idMap) = (idMap !) . getId + +lookup :: Id a -> IdMap a b -> Maybe b +lookup (Id a) (IdMap idMap) = IntMap.lookup a idMap + +size :: IdMap a b -> Int +size = IntMap.size . intMap + +member :: Id a -> IdMap a b -> Bool +member (Id a) (IdMap idMap) = IntMap.member a idMap + +empty :: IdMap a b +empty = IdMap {intMap = IntMap.empty} + +singleton :: Id a -> b -> IdMap a b +singleton a b = fromList [(a, b)] + +insert :: Id a -> b -> IdMap a b -> IdMap a b +insert (Id a) b (IdMap idMap) = IdMap {intMap = IntMap.insert a b idMap} + +delete :: Id a -> IdMap a b -> IdMap a b +delete (Id a) (IdMap idMap) = IdMap {intMap = IntMap.delete a idMap} + +minViewWithKey :: IdMap a b -> Maybe ((Id a, b), IdMap a b) +minViewWithKey = fmap wrap . IntMap.minViewWithKey . intMap + where + wrap ((key, b), idMap) = ((Id key, b), IdMap idMap) + +union :: IdMap a b -> IdMap a b -> IdMap a b +union (IdMap intMap1) (IdMap intMap2) = + IdMap {intMap = IntMap.union intMap1 intMap2} + +mapWithKey :: (Id a -> b -> c) -> IdMap a b -> IdMap a c +mapWithKey f (IdMap idMap) = IdMap {intMap = IntMap.mapWithKey (f . Id) idMap} + +fromList :: [(Id a, b)] -> IdMap a b +fromList = IdMap . IntMap.fromList . fmap (\(key, b) -> (getId key, b)) + +keysSet :: IdMap a b -> IntSet +keysSet = IntMap.keysSet . intMap diff --git a/src/PDF/Body.hs b/src/PDF/Body.hs index 5741a24..8560393 100644 --- a/src/PDF/Body.hs +++ b/src/PDF/Body.hs @@ -9,9 +9,9 @@ import Control.Monad.State (get, gets, modify) import Data.Attoparsec.ByteString.Char8 (option) import Data.ByteString.Char8 (ByteString) import qualified Data.ByteString.Char8 as BS (cons, drop, unpack) -import Data.Id (Id(..)) -import Data.Map ((!)) -import qualified Data.Map as Map (empty, insert, lookup) +import Data.Id (Id(..), at, empty) +import qualified Data.Id as Id (insert, lookup) +import qualified Data.Map as Map (lookup) import qualified PDF.EOL as EOL (charset, parser) import PDF.Layer (Layer(..)) import PDF.Object ( @@ -36,7 +36,7 @@ modifyFlow f = modify $ \state -> state {flow = f $ flow state} addObject :: (Id Object) -> Object -> SParser () addObject objectId newObject = modifyFlow $ \flow -> flow { - tmpObjects = Map.insert objectId newObject $ tmpObjects flow + tmpObjects = Id.insert objectId newObject $ tmpObjects flow } pushOccurrence :: Occurrence -> SParser () @@ -52,7 +52,7 @@ comment = BS.unpack <$> (option "" afterPercent <* EOL.parser) lookupOffset :: (Id Object) -> SParser Offset lookupOffset objectId = do table <- gets xreferences - case Map.lookup objectId table >>= entryOffset of + case Id.lookup objectId table >>= entryOffset of Nothing -> fail $ "obj " ++ show objectId ++ " is referenced but missing in XRef table" Just offset -> return offset @@ -65,7 +65,7 @@ loadNumber objectId = do offset <- getOffset <$> lookupOffset objectId objectStart <- BS.drop offset <$> gets input indirectObjCoordinates `on` (objectStart :: ByteString) >> return () - objectValue <- (!objectId) . tmpObjects <$> gets flow + objectValue <- (`at` objectId) . tmpObjects <$> gets flow case objectValue of Direct (NumberObject (Number n)) -> return n obj -> fail $ "Expected obj@" ++ show offset ++ " (" ++ show obj ++ ") to be a Number" @@ -78,7 +78,7 @@ getSize Nothing = fail "Missing '/Length' key on stream" getSize (Just (NumberObject (Number size))) = return size getSize (Just (Reference (IndirectObjCoordinates {objectId}))) = do Flow {tmpObjects} <- gets flow - case Map.lookup objectId tmpObjects of + case Id.lookup objectId tmpObjects of Nothing -> loadNumber objectId Just (Direct (NumberObject (Number size))) -> return size Just v -> fail $ @@ -113,7 +113,7 @@ populate :: ByteString -> InputStructure -> Layer populate input structure = let bodyInput = BS.drop (startOffset structure) input in case evalParser recurseOnOccurrences initialState bodyInput of - Left _ -> Layer {occurrences = [], objects = Map.empty, docStructure} + Left _ -> Layer {occurrences = [], objects = empty, docStructure} Right finalState -> let Flow {occurrencesStack, tmpObjects} = flow finalState in Layer { @@ -124,7 +124,7 @@ populate input structure = xreferences = xRef docStructure initialState = UserState { input, xreferences, flow = Flow { - occurrencesStack = [], tmpObjects = Map.empty + occurrencesStack = [], tmpObjects = empty } } diff --git a/src/PDF/Content.hs b/src/PDF/Content.hs index ab425d2..c87bf5f 100644 --- a/src/PDF/Content.hs +++ b/src/PDF/Content.hs @@ -19,9 +19,7 @@ import Control.Monad.Reader (asks, runReader) import Control.Monad.State (evalStateT, gets, modify) import Data.Attoparsec.ByteString.Char8 (sepBy) import Data.ByteString (ByteString) -import Data.Id (Id(..), Indexed) -import Data.Map ((!)) -import qualified Data.Map as Map (empty, insert, size) +import Data.Id (Id(..), Indexed, at, empty, insert, size) import PDF.Box (Box(..)) import PDF.Content.Operator (Instruction, operator) import PDF.Object (blank, directObject) @@ -53,13 +51,13 @@ instance Monad m => Box m Instructions Content (Indexed Instruction) where register :: Instruction -> InstructionParser (Id Instruction) register newInstruction = do - newInstructionID <- gets (Id . Map.size) - modify (Map.insert newInstructionID newInstruction) + newInstructionID <- gets (Id . size) + modify (insert newInstructionID newInstruction) return newInstructionID parse :: MonadFail m => ByteString -> m Content parse = - either fail (return . uncurry Content) . runParser contentUnits Map.empty + either fail (return . uncurry Content) . runParser contentUnits empty where contentUnits = contentUnit `sepBy` blank @@ -98,4 +96,4 @@ instance Output Content where return (line "BT" `mappend` inside `mappend` line "ET") outputGCU (GraphicInstruction gi) = outputIId gi outputGCU (ContentUnit cu) = outputCU cu - outputIId instructionId = asks (output . (! instructionId)) + outputIId instructionId = asks (output . (`at` instructionId)) diff --git a/src/PDF/Content/Text.hs b/src/PDF/Content/Text.hs index ff4f0b9..1f05ada 100644 --- a/src/PDF/Content/Text.hs +++ b/src/PDF/Content/Text.hs @@ -15,9 +15,8 @@ import Control.Monad (foldM) import Control.Monad.Fail (MonadFail(..)) import Control.Monad.Reader (MonadReader(..), ReaderT, asks, runReaderT) import Control.Monad.State (MonadState(..), StateT, evalStateT) -import Data.Id (Id(..), Indexed) +import Data.Id (Id(..), Indexed, at, empty, singleton) import Data.Map ((!)) -import qualified Data.Map as Map (empty, singleton) import Data.Text (Text, breakOn) import qualified Data.Text as Text (drop) import PDF.Box (Box(..)) @@ -64,15 +63,15 @@ renderContentUnit (TextContext instructionIds) = evalStateT (mconcat <$> mapM renderInstructionId instructionIds) emptyFont renderGraphicContextUnit :: MonadFail m => GraphicContextUnit -> TextContent m (Indexed Text) -renderGraphicContextUnit (GraphicInstruction _) = return Map.empty +renderGraphicContextUnit (GraphicInstruction _) = return empty renderGraphicContextUnit (ContentUnit contentUnit) = renderContentUnit contentUnit renderInstructionId :: MonadFail m => Id Instruction -> FontContext m (Indexed Text) renderInstructionId instructionId@(Id n) = toMap <$> - (asks ((! instructionId) . indexedInstructions) >>= renderInstruction) + (asks ((`at` instructionId) . indexedInstructions) >>= renderInstruction) where - toMap = maybe Map.empty (Map.singleton (Id n)) + toMap = maybe empty (singleton (Id n)) renderInstruction :: MonadFail m => Instruction -> FontContext m (Maybe Text) renderInstruction (Text Tf, [NameObject fontName, _]) = diff --git a/src/PDF/Layer.hs b/src/PDF/Layer.hs index f178511..8daa3a2 100644 --- a/src/PDF/Layer.hs +++ b/src/PDF/Layer.hs @@ -7,10 +7,11 @@ module PDF.Layer ( , unify ) where -import Data.Id (Id(..), Indexed) -import Data.Map (Map, (!), mapWithKey, member) -import qualified Data.Map as Map (empty, keysSet, member, union) -import qualified Data.Set as Set (delete, toList) +import Data.Id (Id(..), Indexed, keysSet, mapWithKey, member) +import qualified Data.Id as Id (empty, union) +import Data.Map (Map, (!)) +import qualified Data.Map as Map (empty, union) +import qualified Data.IntSet as IntSet (delete, toList) import PDF.Box (Box(..)) import PDF.Object ( IndirectObjCoordinates(..), Object, Occurrence(..) @@ -67,18 +68,18 @@ instance Monad m => Box m Objects Layer (Indexed Object) where where filterOccurrences c@(Comment _) (occ, newObjIds) = (c:occ, newObjIds) filterOccurrences i@(Indirect (IndirectObjCoordinates {objectId})) (occ, newObjIds) - | Map.member objectId newObjects = (i:occ, Set.delete objectId newObjIds) + | member objectId newObjects = (i:occ, IntSet.delete (getId objectId) newObjIds) | otherwise = (occ, newObjIds) (keptOccurrences, newObjectIds) = - foldr filterOccurrences ([], Map.keysSet newObjects) occurrences + foldr filterOccurrences ([], keysSet newObjects) occurrences makeOccurrence objectId = Indirect (IndirectObjCoordinates {objectId, versionNumber = 0}) - newOccurrences = makeOccurrence <$> Set.toList newObjectIds + newOccurrences = (makeOccurrence . Id) <$> IntSet.toList newObjectIds emptyLayer :: Layer emptyLayer = Layer { - docStructure = Structure {xRef = Map.empty, trailer = Map.empty} - , objects = Map.empty + docStructure = Structure {xRef = Id.empty, trailer = Map.empty} + , objects = Id.empty , occurrences = [] } @@ -86,7 +87,7 @@ unify :: [Layer] -> Layer unify = foldl complete emptyLayer where complete tmpLayer older = - let mergedObjects = Map.union (objects tmpLayer) (objects older) in + let mergedObjects = Id.union (objects tmpLayer) (objects older) in Layer { docStructure = unifyDocStructure (docStructure tmpLayer) (docStructure older) @@ -97,7 +98,7 @@ unify = foldl complete emptyLayer unifyDocStructure :: Structure -> Structure -> Structure unifyDocStructure new old = Structure { - xRef = Map.union (xRef new) (xRef old) + xRef = Id.union (xRef new) (xRef old) , trailer = Map.union (trailer new) (trailer old) } diff --git a/src/PDF/Object.hs b/src/PDF/Object.hs index 6c3efdc..4aef0ee 100644 --- a/src/PDF/Object.hs +++ b/src/PDF/Object.hs @@ -40,11 +40,12 @@ import qualified Data.ByteString.Char8 as Char8 ( cons, length, pack, singleton, snoc, unpack ) import Data.ByteString.Char8.Util (B16Int(..), b16ToBytes, unescape) -import Data.Id (Id(..), Indexed) -import Data.Map (Map, (!)) -import qualified Data.Map as Map ( - delete, empty, fromList, lookup, minViewWithKey, toList, union +import Data.Id (Id(..), IdMap, Indexed) +import qualified Data.Id as Id ( + at, delete, empty, fromList, lookup, minViewWithKey, union ) +import Data.Map (Map) +import qualified Data.Map as Map (fromList, toList) import qualified Data.Set as Set (fromList, member) import qualified PDF.EOL as EOL (charset, parser) import qualified PDF.Output as Output (line, string) @@ -265,7 +266,7 @@ outputOccurrence _ (Comment c) = Output.line c outputOccurrence objects (Indirect (IndirectObjCoordinates {objectId, versionNumber})) = saveOffset (ObjectId $ getId objectId) >> mconcat [ Output.line (printf "%d %d obj" (getId objectId) versionNumber) - , output (objects ! objectId), newLine + , output (objects `Id.at` objectId), newLine , Output.line "endobj" ] @@ -325,31 +326,30 @@ xRefSubSection = do entries <- count entriesNumber entry return $ XRefSubSection {firstObjectId = Id firstId, entries} -type XRefSection = Map (Id Object) XRefEntry +type XRefSection = IdMap Object XRefEntry instance Output XRefSection where output = output . sections where sections tmp = - case Map.minViewWithKey tmp of + case Id.minViewWithKey tmp of Nothing -> [] Just ((objectId@(Id value), firstEntry), rest) -> let (subSection, sndRest) = section objectId [firstEntry] (value + 1) rest in subSection : sections sndRest section firstObjectId stack nextValue tmp = - let nextId = Id nextValue in - case Map.lookup nextId tmp of + let nextId = (Id nextValue :: Id Object) in + case Id.lookup nextId tmp of Nothing -> (XRefSubSection {firstObjectId, entries = reverse stack}, tmp) Just nextEntry -> - section firstObjectId (nextEntry:stack) (nextValue + 1) (Map.delete nextId tmp) - + section firstObjectId (nextEntry:stack) (nextValue + 1) (Id.delete nextId tmp) xRefSection :: Parser u XRefSection -xRefSection = foldr addSubsection Map.empty <$> +xRefSection = foldr addSubsection Id.empty <$> (line "xref" *> xRefSubSection `sepBy` many EOL.parser) where addSubsection (XRefSubSection {firstObjectId, entries}) = - Map.union . Map.fromList $ zip ([firstObjectId..]) entries + Id.union . Id.fromList $ zip ([firstObjectId..]) entries -- -- Structure diff --git a/src/PDF/Object/Navigation.hs b/src/PDF/Object/Navigation.hs index 9bff17b..6d8a4b3 100644 --- a/src/PDF/Object/Navigation.hs +++ b/src/PDF/Object/Navigation.hs @@ -23,8 +23,7 @@ import Control.Monad.Reader (MonadReader(..)) import Control.Monad.Fail (MonadFail(..)) import Data.ByteString (ByteString) import qualified Data.ByteString.Lazy as Lazy (ByteString, fromStrict, toStrict) -import Data.Id (Id) -import Data.Map ((!)) +import Data.Id (Id, at) import qualified Data.Map as Map (lookup) import PDF.Box (Box(..)) import PDF.Layer (Layer(..)) @@ -58,7 +57,7 @@ getKey key object = getDictionary object >>= catchMaybe . Map.lookup (Name key) objectById :: PDFContent m => (Id Object) -> m Object objectById objectId = do layer <- ask - return (objects layer ! objectId) + return (objects layer `at` objectId) (./) :: PDFContent m => m Object -> Component -> m Object (./) object key = (object >>= getKey key >>= castObject)