{-# LANGUAGE NamedFieldPuns #-} module PDF.CMap ( CMap , CMappers , cMap , emptyCMap ) where import Control.Applicative ((<|>), many) import Control.Monad.State (modify) import Data.Attoparsec.ByteString.Char8 (count) import Data.ByteString (ByteString) import qualified Data.ByteString as BS (length) import Data.ByteString.Char8.Util ( decodeHex, fromInt, toInt, utf16BEToutf8 ) import Data.Map (Map, union) import qualified Data.Map as Map (adjust, empty, fromList, insertWith) import qualified PDF.EOL as EOL (charset, parser) import PDF.Object ( DirectObject(..), Name, StringObject(..) , blank, directObject, integer, line, stringObject ) import PDF.Parser (MonadParser, Parser, runParser, takeAll) type CMappers = Map Name CMap type Mapping = Map ByteString ByteString data CRange = CRange { fromSequence :: ByteString , toSequence :: ByteString , mapping :: Mapping } type RangeSize = Int type CMap = Map RangeSize [CRange] emptyCMap :: CMap emptyCMap = Map.empty cMap :: ByteString -> Either String CMap cMap = fmap snd <$> runParser (many (codeRanges <|> cMapRange <|> cMapChar <|> ignoredLine)) emptyCMap where ignoredLine = takeAll (not . (`elem` EOL.charset)) *> EOL.parser *> return () codeRanges :: Parser CMap () codeRanges = do size <- integer <* line "begincodespacerange" count size (createMapping <$> codeRange) *> return () line "endcodespacerange" where codeRange = (,) <$> (stringObject <* blank) <*> (stringObject <* blank) <* EOL.parser createMapping :: (StringObject, StringObject) -> Parser CMap () createMapping (Hexadecimal from, Hexadecimal to) = modify $ Map.insertWith (++) size [CRange {fromSequence, toSequence, mapping}] where fromSequence = decodeHex from size = BS.length fromSequence toSequence = decodeHex to mapping = Map.empty createMapping _ = return () cMapRange :: Parser CMap () cMapRange = do size <- integer <* line "beginbfrange" mapM_ saveMapping =<< count size rangeMapping <* line "endbfrange" where rangeMapping = (,,) <$> (stringObject <* blank) <*> (stringObject <* blank) <*> directObject <* EOL.parser >>= mapFromTo saveMapping :: [(ByteString, ByteString)] -> Parser CMap () saveMapping [] = return () saveMapping assoc@((code, _):_) = modify $ Map.adjust insertCRange mappingSize where newMapping = Map.fromList assoc mappingSize = BS.length code matchingRange (CRange {fromSequence, toSequence}) = fromSequence <= code && code <= toSequence appendMapping cRange = cRange {mapping = mapping cRange `union` newMapping} insertCRange = fmap (\cRange -> if matchingRange cRange then appendMapping cRange else cRange ) cMapChar :: Parser CMap () cMapChar = do size <- integer <* line "beginbfchar" saveMapping =<< count size charMapping <* line "endbfchar" where charMapping = (,) <$> (stringObject <* blank) <*> (stringObject <* blank) <* EOL.parser >>= pairMapping between :: ByteString -> ByteString -> [ByteString] between from to = fromInt <$> [toInt from .. toInt to] startFrom :: ByteString -> [ByteString] startFrom from = fromInt <$> [toInt from .. ] mapFromTo :: MonadParser m => (StringObject, StringObject, DirectObject) -> m [(ByteString, ByteString)] mapFromTo (Hexadecimal from, Hexadecimal to, StringObject (Hexadecimal dstFrom)) = return $ zip (between fromBS toBS) (startFrom dstBS) where (fromBS, toBS, dstBS) = (decodeHex from, decodeHex to, decodeHex dstFrom) mapFromTo (Hexadecimal from, Hexadecimal to, Array dstPoints) = zip (between fromBS toBS) <$> (mapM dstByteString dstPoints) where (fromBS, toBS) = (decodeHex from, decodeHex to) dstByteString (StringObject (Hexadecimal dst)) = return . utf16BEToutf8 $ decodeHex dst dstByteString _ = fail "Invalid for a replacement string" mapFromTo _ = fail "invalid range mapping found" pairMapping :: MonadParser m => (StringObject, StringObject) -> m (ByteString, ByteString) pairMapping (Hexadecimal from, Hexadecimal to) = return (decodeHex from, utf16BEToutf8 $ decodeHex to) pairMapping _ = fail "invalid pair mapping found"