Try and re-implement string decoding — compiles but now fails to decode any string
This commit is contained in:
parent
36d7f9b819
commit
7a15113285
2 changed files with 37 additions and 7 deletions
|
@ -2,8 +2,10 @@
|
||||||
module PDF.CMap (
|
module PDF.CMap (
|
||||||
CMap
|
CMap
|
||||||
, CMappers
|
, CMappers
|
||||||
|
, CRange(..)
|
||||||
, cMap
|
, cMap
|
||||||
, emptyCMap
|
, emptyCMap
|
||||||
|
, matches
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Control.Applicative ((<|>), many)
|
import Control.Applicative ((<|>), many)
|
||||||
|
@ -36,6 +38,10 @@ type CMap = Map RangeSize [CRange]
|
||||||
emptyCMap :: CMap
|
emptyCMap :: CMap
|
||||||
emptyCMap = Map.empty
|
emptyCMap = Map.empty
|
||||||
|
|
||||||
|
matches :: ByteString -> CRange -> Bool
|
||||||
|
matches code (CRange {fromSequence, toSequence}) =
|
||||||
|
fromSequence <= code && code <= toSequence
|
||||||
|
|
||||||
cMap :: ByteString -> Either String CMap
|
cMap :: ByteString -> Either String CMap
|
||||||
cMap = fmap snd <$> runParser
|
cMap = fmap snd <$> runParser
|
||||||
(many (codeRanges <|> cMapRange <|> cMapChar <|> ignoredLine))
|
(many (codeRanges <|> cMapRange <|> cMapChar <|> ignoredLine))
|
||||||
|
@ -80,12 +86,10 @@ saveMapping assoc@((code, _):_) = modify $ Map.adjust insertCRange mappingSize
|
||||||
where
|
where
|
||||||
newMapping = Map.fromList assoc
|
newMapping = Map.fromList assoc
|
||||||
mappingSize = BS.length code
|
mappingSize = BS.length code
|
||||||
matchingRange (CRange {fromSequence, toSequence}) =
|
|
||||||
fromSequence <= code && code <= toSequence
|
|
||||||
appendMapping cRange =
|
appendMapping cRange =
|
||||||
cRange {mapping = mapping cRange `union` newMapping}
|
cRange {mapping = mapping cRange `union` newMapping}
|
||||||
insertCRange = fmap (\cRange ->
|
insertCRange = fmap (\cRange ->
|
||||||
if matchingRange cRange then appendMapping cRange else cRange
|
if code `matches` cRange then appendMapping cRange else cRange
|
||||||
)
|
)
|
||||||
|
|
||||||
cMapChar :: Parser CMap ()
|
cMapChar :: Parser CMap ()
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
{-# LANGUAGE NamedFieldPuns #-}
|
||||||
{-# LANGUAGE OverloadedStrings #-}
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
module PDF.Text {-(
|
module PDF.Text {-(
|
||||||
PageContents(..)
|
PageContents(..)
|
||||||
|
@ -10,11 +11,12 @@ import Control.Monad.Reader (ReaderT, runReaderT, asks)
|
||||||
import Control.Monad.State (get, put)
|
import Control.Monad.State (get, put)
|
||||||
import Data.Attoparsec.ByteString.Char8 (choice, count, sepBy)
|
import Data.Attoparsec.ByteString.Char8 (choice, count, sepBy)
|
||||||
import Data.ByteString (ByteString)
|
import Data.ByteString (ByteString)
|
||||||
import qualified Data.ByteString.Char8 as Char8 (unpack)
|
import qualified Data.ByteString as BS (drop, null, take)
|
||||||
import Data.ByteString.Char8.Util (decodeHex)
|
import Data.ByteString.Char8.Util (decodeHex)
|
||||||
|
import Data.List (find)
|
||||||
import Data.Map ((!))
|
import Data.Map ((!))
|
||||||
import qualified Data.Map as Map (lookup)
|
import qualified Data.Map as Map (lookup, toList)
|
||||||
import PDF.CMap (CMappers, CMap, emptyCMap)
|
import PDF.CMap (CMappers, CMap, CRange(..), emptyCMap)
|
||||||
import PDF.Object (
|
import PDF.Object (
|
||||||
DirectObject(..), StringObject(..)
|
DirectObject(..), StringObject(..)
|
||||||
, array, blank, name, parseBytes, regular, stringObject
|
, array, blank, name, parseBytes, regular, stringObject
|
||||||
|
@ -127,8 +129,31 @@ runOperator _ = return []
|
||||||
|
|
||||||
decodeString :: StringObject -> ParserWithFont ByteString
|
decodeString :: StringObject -> ParserWithFont ByteString
|
||||||
decodeString (Hexadecimal h) = decodeString (Literal (decodeHex h))
|
decodeString (Hexadecimal h) = decodeString (Literal (decodeHex h))
|
||||||
decodeString (Literal litString) =
|
decodeString (Literal litString) = do
|
||||||
|
cRangesBySize <- Map.toList <$> get
|
||||||
|
f cRangesBySize litString
|
||||||
|
where
|
||||||
|
f :: [(Int, [CRange])] -> ByteString -> ParserWithFont ByteString
|
||||||
|
f cRangesBySize input
|
||||||
|
| BS.null input = return ""
|
||||||
|
| otherwise = do
|
||||||
|
(output, newInput) <- g cRangesBySize input
|
||||||
|
mappend output <$> f cRangesBySize newInput
|
||||||
|
g :: [(Int, [CRange])] -> ByteString -> ParserWithFont (ByteString, ByteString)
|
||||||
|
g [] _ = fail "No matching code found in font"
|
||||||
|
g ((size, cRanges):others) s =
|
||||||
|
let prefix = BS.take size s in
|
||||||
|
case h prefix cRanges of
|
||||||
|
Nothing -> g others s
|
||||||
|
Just outputSequence -> return (outputSequence, BS.drop size s)
|
||||||
|
h :: ByteString -> [CRange] -> Maybe ByteString
|
||||||
|
h prefix [] = Nothing
|
||||||
|
h prefix ((CRange {mapping}):cRanges) =
|
||||||
|
case Map.lookup prefix mapping of
|
||||||
|
Nothing -> h prefix cRanges
|
||||||
|
outputSequence -> outputSequence
|
||||||
|
|
||||||
|
{-
|
||||||
get >>= convertBytes litString
|
get >>= convertBytes litString
|
||||||
where
|
where
|
||||||
convertBytes :: String -> CMap -> ParserWithFont ByteString
|
convertBytes :: String -> CMap -> ParserWithFont ByteString
|
||||||
|
@ -143,3 +168,4 @@ get >>= convertBytes litString
|
||||||
(Nothing, (c:cs)) -> convertBytesAux (code * 256 + fromEnum c) (size + 1) cs someCMap
|
(Nothing, (c:cs)) -> convertBytesAux (code * 256 + fromEnum c) (size + 1) cs someCMap
|
||||||
(Nothing, []) -> fail "No character left to read but no code recognized"
|
(Nothing, []) -> fail "No character left to read but no code recognized"
|
||||||
(Just outputText, _) -> mappend outputText <$> convertBytes s someCMap
|
(Just outputText, _) -> mappend outputText <$> convertBytes s someCMap
|
||||||
|
-}
|
||||||
|
|
Loading…
Reference in a new issue