75 lines
2.5 KiB
Haskell
75 lines
2.5 KiB
Haskell
module PDF.CMap (
|
|
CMap
|
|
, CMappers
|
|
, cMap
|
|
, emptyCMap
|
|
) where
|
|
|
|
import Control.Applicative ((<|>), many)
|
|
import Data.Attoparsec.ByteString.Char8 (count, parseOnly)
|
|
import qualified Data.Attoparsec.ByteString.Char8 as Atto (Parser)
|
|
import Data.ByteString (ByteString, snoc)
|
|
import qualified Data.ByteString as BS (init, last, null)
|
|
import Data.Map (Map)
|
|
import qualified Data.Map as Map (empty, fromList)
|
|
import qualified PDF.EOL as EOL (charset, parser)
|
|
import PDF.Object (
|
|
DirectObject(..), Name, StringObject(..)
|
|
, blank, directObject, hexString, integer, line, parseBytes, stringObject
|
|
)
|
|
import PDF.Parser (takeAll)
|
|
|
|
type CMappers = Map Name CMap
|
|
type CMap = Map Int ByteString
|
|
|
|
emptyCMap :: CMap
|
|
emptyCMap = Map.empty
|
|
|
|
cMap :: ByteString -> Either String CMap
|
|
cMap = parseOnly $ mconcat <$> many (cMapRange <|> cMapChar <|> ignoredLine)
|
|
where
|
|
ignoredLine =
|
|
takeAll (not . (`elem` EOL.charset)) *> EOL.parser *> return Map.empty
|
|
|
|
cMapRange :: Atto.Parser CMap
|
|
cMapRange = do
|
|
size <- integer <* line "beginbfrange"
|
|
mconcat <$> count size (Map.fromList <$> rangeMapping) <* line "endbfrange"
|
|
where
|
|
rangeMapping = (,,)
|
|
<$> (stringObject <* blank)
|
|
<*> (stringObject <* blank)
|
|
<*> directObject <* EOL.parser
|
|
>>= mapFromTo
|
|
|
|
cMapChar :: Atto.Parser CMap
|
|
cMapChar = do
|
|
size <- integer <* line "beginbfchar"
|
|
Map.fromList <$> count size charMapping <* line "endbfchar"
|
|
where
|
|
charMapping =
|
|
(,) <$> (stringObject <* blank) <*> (stringObject <* blank) <* EOL.parser
|
|
>>= pairMapping
|
|
|
|
mapFromTo :: (StringObject, StringObject, DirectObject) -> Atto.Parser [(Int, ByteString)]
|
|
mapFromTo (Hexadecimal from, Hexadecimal to, StringObject (Hexadecimal dstFrom)) =
|
|
let dstString = parseBytes dstFrom in
|
|
return $ zip [hexString from .. hexString to] (textsFrom dstString)
|
|
where
|
|
textsFrom t
|
|
| BS.null t = [t]
|
|
| otherwise = (BS.init t `snoc`) <$> [BS.last t ..]
|
|
|
|
mapFromTo (Hexadecimal from, Hexadecimal to, Array dstPoints) =
|
|
zip [hexString from .. hexString to] <$> (mapM dstString dstPoints)
|
|
where
|
|
dstString (StringObject (Hexadecimal dstPoint)) = return $ parseBytes dstPoint
|
|
dstString _ = fail "Invalid for a replacement string"
|
|
|
|
mapFromTo _ = fail "invalid range mapping found"
|
|
|
|
pairMapping :: (StringObject, StringObject) -> Atto.Parser (Int, ByteString)
|
|
pairMapping (Hexadecimal from, Hexadecimal to) =
|
|
return (hexString from, parseBytes to)
|
|
pairMapping _ = fail "invalid pair mapping found"
|