Try and re-implement string decoding — compiles but now fails to decode any string
This commit is contained in:
parent
36d7f9b819
commit
7a15113285
2 changed files with 37 additions and 7 deletions
|
@ -2,8 +2,10 @@
|
|||
module PDF.CMap (
|
||||
CMap
|
||||
, CMappers
|
||||
, CRange(..)
|
||||
, cMap
|
||||
, emptyCMap
|
||||
, matches
|
||||
) where
|
||||
|
||||
import Control.Applicative ((<|>), many)
|
||||
|
@ -36,6 +38,10 @@ type CMap = Map RangeSize [CRange]
|
|||
emptyCMap :: CMap
|
||||
emptyCMap = Map.empty
|
||||
|
||||
matches :: ByteString -> CRange -> Bool
|
||||
matches code (CRange {fromSequence, toSequence}) =
|
||||
fromSequence <= code && code <= toSequence
|
||||
|
||||
cMap :: ByteString -> Either String CMap
|
||||
cMap = fmap snd <$> runParser
|
||||
(many (codeRanges <|> cMapRange <|> cMapChar <|> ignoredLine))
|
||||
|
@ -80,12 +86,10 @@ saveMapping assoc@((code, _):_) = modify $ Map.adjust insertCRange mappingSize
|
|||
where
|
||||
newMapping = Map.fromList assoc
|
||||
mappingSize = BS.length code
|
||||
matchingRange (CRange {fromSequence, toSequence}) =
|
||||
fromSequence <= code && code <= toSequence
|
||||
appendMapping cRange =
|
||||
cRange {mapping = mapping cRange `union` newMapping}
|
||||
insertCRange = fmap (\cRange ->
|
||||
if matchingRange cRange then appendMapping cRange else cRange
|
||||
if code `matches` cRange then appendMapping cRange else cRange
|
||||
)
|
||||
|
||||
cMapChar :: Parser CMap ()
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
{-# LANGUAGE NamedFieldPuns #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
module PDF.Text {-(
|
||||
PageContents(..)
|
||||
|
@ -10,11 +11,12 @@ import Control.Monad.Reader (ReaderT, runReaderT, asks)
|
|||
import Control.Monad.State (get, put)
|
||||
import Data.Attoparsec.ByteString.Char8 (choice, count, sepBy)
|
||||
import Data.ByteString (ByteString)
|
||||
import qualified Data.ByteString.Char8 as Char8 (unpack)
|
||||
import qualified Data.ByteString as BS (drop, null, take)
|
||||
import Data.ByteString.Char8.Util (decodeHex)
|
||||
import Data.List (find)
|
||||
import Data.Map ((!))
|
||||
import qualified Data.Map as Map (lookup)
|
||||
import PDF.CMap (CMappers, CMap, emptyCMap)
|
||||
import qualified Data.Map as Map (lookup, toList)
|
||||
import PDF.CMap (CMappers, CMap, CRange(..), emptyCMap)
|
||||
import PDF.Object (
|
||||
DirectObject(..), StringObject(..)
|
||||
, array, blank, name, parseBytes, regular, stringObject
|
||||
|
@ -127,8 +129,31 @@ runOperator _ = return []
|
|||
|
||||
decodeString :: StringObject -> ParserWithFont ByteString
|
||||
decodeString (Hexadecimal h) = decodeString (Literal (decodeHex h))
|
||||
decodeString (Literal litString) =
|
||||
decodeString (Literal litString) = do
|
||||
cRangesBySize <- Map.toList <$> get
|
||||
f cRangesBySize litString
|
||||
where
|
||||
f :: [(Int, [CRange])] -> ByteString -> ParserWithFont ByteString
|
||||
f cRangesBySize input
|
||||
| BS.null input = return ""
|
||||
| otherwise = do
|
||||
(output, newInput) <- g cRangesBySize input
|
||||
mappend output <$> f cRangesBySize newInput
|
||||
g :: [(Int, [CRange])] -> ByteString -> ParserWithFont (ByteString, ByteString)
|
||||
g [] _ = fail "No matching code found in font"
|
||||
g ((size, cRanges):others) s =
|
||||
let prefix = BS.take size s in
|
||||
case h prefix cRanges of
|
||||
Nothing -> g others s
|
||||
Just outputSequence -> return (outputSequence, BS.drop size s)
|
||||
h :: ByteString -> [CRange] -> Maybe ByteString
|
||||
h prefix [] = Nothing
|
||||
h prefix ((CRange {mapping}):cRanges) =
|
||||
case Map.lookup prefix mapping of
|
||||
Nothing -> h prefix cRanges
|
||||
outputSequence -> outputSequence
|
||||
|
||||
{-
|
||||
get >>= convertBytes litString
|
||||
where
|
||||
convertBytes :: String -> CMap -> ParserWithFont ByteString
|
||||
|
@ -143,3 +168,4 @@ get >>= convertBytes litString
|
|||
(Nothing, (c:cs)) -> convertBytesAux (code * 256 + fromEnum c) (size + 1) cs someCMap
|
||||
(Nothing, []) -> fail "No character left to read but no code recognized"
|
||||
(Just outputText, _) -> mappend outputText <$> convertBytes s someCMap
|
||||
-}
|
||||
|
|
Loading…
Reference in a new issue