Try and re-implement string decoding — compiles but now fails to decode any string

This commit is contained in:
Tissevert 2019-10-03 07:59:09 +02:00
parent 36d7f9b819
commit 7a15113285
2 changed files with 37 additions and 7 deletions

View file

@ -2,8 +2,10 @@
module PDF.CMap (
CMap
, CMappers
, CRange(..)
, cMap
, emptyCMap
, matches
) where
import Control.Applicative ((<|>), many)
@ -36,6 +38,10 @@ type CMap = Map RangeSize [CRange]
emptyCMap :: CMap
emptyCMap = Map.empty
matches :: ByteString -> CRange -> Bool
matches code (CRange {fromSequence, toSequence}) =
fromSequence <= code && code <= toSequence
cMap :: ByteString -> Either String CMap
cMap = fmap snd <$> runParser
(many (codeRanges <|> cMapRange <|> cMapChar <|> ignoredLine))
@ -80,12 +86,10 @@ saveMapping assoc@((code, _):_) = modify $ Map.adjust insertCRange mappingSize
where
newMapping = Map.fromList assoc
mappingSize = BS.length code
matchingRange (CRange {fromSequence, toSequence}) =
fromSequence <= code && code <= toSequence
appendMapping cRange =
cRange {mapping = mapping cRange `union` newMapping}
insertCRange = fmap (\cRange ->
if matchingRange cRange then appendMapping cRange else cRange
if code `matches` cRange then appendMapping cRange else cRange
)
cMapChar :: Parser CMap ()

View file

@ -1,3 +1,4 @@
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
module PDF.Text {-(
PageContents(..)
@ -10,11 +11,12 @@ import Control.Monad.Reader (ReaderT, runReaderT, asks)
import Control.Monad.State (get, put)
import Data.Attoparsec.ByteString.Char8 (choice, count, sepBy)
import Data.ByteString (ByteString)
import qualified Data.ByteString.Char8 as Char8 (unpack)
import qualified Data.ByteString as BS (drop, null, take)
import Data.ByteString.Char8.Util (decodeHex)
import Data.List (find)
import Data.Map ((!))
import qualified Data.Map as Map (lookup)
import PDF.CMap (CMappers, CMap, emptyCMap)
import qualified Data.Map as Map (lookup, toList)
import PDF.CMap (CMappers, CMap, CRange(..), emptyCMap)
import PDF.Object (
DirectObject(..), StringObject(..)
, array, blank, name, parseBytes, regular, stringObject
@ -127,8 +129,31 @@ runOperator _ = return []
decodeString :: StringObject -> ParserWithFont ByteString
decodeString (Hexadecimal h) = decodeString (Literal (decodeHex h))
decodeString (Literal litString) =
decodeString (Literal litString) = do
cRangesBySize <- Map.toList <$> get
f cRangesBySize litString
where
f :: [(Int, [CRange])] -> ByteString -> ParserWithFont ByteString
f cRangesBySize input
| BS.null input = return ""
| otherwise = do
(output, newInput) <- g cRangesBySize input
mappend output <$> f cRangesBySize newInput
g :: [(Int, [CRange])] -> ByteString -> ParserWithFont (ByteString, ByteString)
g [] _ = fail "No matching code found in font"
g ((size, cRanges):others) s =
let prefix = BS.take size s in
case h prefix cRanges of
Nothing -> g others s
Just outputSequence -> return (outputSequence, BS.drop size s)
h :: ByteString -> [CRange] -> Maybe ByteString
h prefix [] = Nothing
h prefix ((CRange {mapping}):cRanges) =
case Map.lookup prefix mapping of
Nothing -> h prefix cRanges
outputSequence -> outputSequence
{-
get >>= convertBytes litString
where
convertBytes :: String -> CMap -> ParserWithFont ByteString
@ -143,3 +168,4 @@ get >>= convertBytes litString
(Nothing, (c:cs)) -> convertBytesAux (code * 256 + fromEnum c) (size + 1) cs someCMap
(Nothing, []) -> fail "No character left to read but no code recognized"
(Just outputText, _) -> mappend outputText <$> convertBytes s someCMap
-}