Hufflepdf/src/PDF/CMap.hs

75 lines
2.5 KiB
Haskell

module PDF.CMap (
CMap
, CMappers
, cMap
, emptyCMap
) where
import Control.Applicative ((<|>), many)
import Data.Attoparsec.ByteString.Char8 (count, parseOnly)
import qualified Data.Attoparsec.ByteString.Char8 as Atto (Parser)
import Data.ByteString (ByteString, snoc)
import qualified Data.ByteString as BS (init, last, null)
import Data.Map (Map)
import qualified Data.Map as Map (empty, fromList)
import qualified PDF.EOL as EOL (charset, parser)
import PDF.Object (
DirectObject(..), Name, StringObject(..)
, blank, directObject, hexString, integer, line, parseBytes, stringObject
)
import PDF.Parser (takeAll)
type CMappers = Map Name CMap
type CMap = Map Int ByteString
emptyCMap :: CMap
emptyCMap = Map.empty
cMap :: ByteString -> Either String CMap
cMap = parseOnly $ mconcat <$> many (cMapRange <|> cMapChar <|> ignoredLine)
where
ignoredLine =
takeAll (not . (`elem` EOL.charset)) *> EOL.parser *> return Map.empty
cMapRange :: Atto.Parser CMap
cMapRange = do
size <- integer <* line "beginbfrange"
mconcat <$> count size (Map.fromList <$> rangeMapping) <* line "endbfrange"
where
rangeMapping = (,,)
<$> (stringObject <* blank)
<*> (stringObject <* blank)
<*> directObject <* EOL.parser
>>= mapFromTo
cMapChar :: Atto.Parser CMap
cMapChar = do
size <- integer <* line "beginbfchar"
Map.fromList <$> count size charMapping <* line "endbfchar"
where
charMapping =
(,) <$> (stringObject <* blank) <*> (stringObject <* blank) <* EOL.parser
>>= pairMapping
mapFromTo :: (StringObject, StringObject, DirectObject) -> Atto.Parser [(Int, ByteString)]
mapFromTo (Hexadecimal from, Hexadecimal to, StringObject (Hexadecimal dstFrom)) =
let dstString = parseBytes dstFrom in
return $ zip [hexString from .. hexString to] (textsFrom dstString)
where
textsFrom t
| BS.null t = [t]
| otherwise = (BS.init t `snoc`) <$> [BS.last t ..]
mapFromTo (Hexadecimal from, Hexadecimal to, Array dstPoints) =
zip [hexString from .. hexString to] <$> (mapM dstString dstPoints)
where
dstString (StringObject (Hexadecimal dstPoint)) = return $ parseBytes dstPoint
dstString _ = fail "Invalid for a replacement string"
mapFromTo _ = fail "invalid range mapping found"
pairMapping :: (StringObject, StringObject) -> Atto.Parser (Int, ByteString)
pairMapping (Hexadecimal from, Hexadecimal to) =
return (hexString from, parseBytes to)
pairMapping _ = fail "invalid pair mapping found"