Fix error silently discarding code ranges, make sure ByteString intervals are created with the correct byte length and decode utf16BE encoded values in single-value ranges

This commit is contained in:
Tissevert 2019-10-03 14:59:06 +02:00
parent d07c286f8e
commit a96e36ec5a
1 changed files with 14 additions and 9 deletions

View File

@ -14,7 +14,7 @@ import Data.Attoparsec.ByteString.Char8 (count)
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS (length)
import Data.ByteString.Char8.Util (
decodeHex, fromInt, toInt, utf16BEToutf8
decodeHex, toBytes, toInt, utf16BEToutf8
)
import Data.Map (Map, union)
import qualified Data.Map as Map (adjust, empty, fromList, insertWith)
@ -31,7 +31,7 @@ data CRange = CRange {
fromSequence :: ByteString
, toSequence :: ByteString
, mapping :: Mapping
}
} deriving Show
type RangeSize = Int
type CMap = Map RangeSize [CRange]
@ -53,11 +53,11 @@ cMap = fmap snd <$> runParser
codeRanges :: Parser CMap ()
codeRanges = do
size <- integer <* line "begincodespacerange"
count size (createMapping <$> codeRange) *> return ()
mapM_ createMapping =<< count size codeRange
line "endcodespacerange"
where
codeRange =
(,) <$> (stringObject <* blank) <*> (stringObject <* blank) <* EOL.parser
(,) <$> stringObject <* blank <*> stringObject <* EOL.parser
createMapping :: (StringObject, StringObject) -> Parser CMap ()
createMapping (Hexadecimal from, Hexadecimal to) = modify $
@ -72,7 +72,8 @@ createMapping _ = return ()
cMapRange :: Parser CMap ()
cMapRange = do
size <- integer <* line "beginbfrange"
mapM_ saveMapping =<< count size rangeMapping <* line "endbfrange"
mapM_ saveMapping =<< count size rangeMapping
line "endbfrange"
where
rangeMapping = (,,)
<$> (stringObject <* blank)
@ -98,18 +99,22 @@ cMapChar = do
saveMapping =<< count size charMapping <* line "endbfchar"
where
charMapping =
(,) <$> (stringObject <* blank) <*> (stringObject <* blank) <* EOL.parser
(,) <$> stringObject <* blank <*> stringObject <* EOL.parser
>>= pairMapping
between :: ByteString -> ByteString -> [ByteString]
between from to = fromInt <$> [toInt from .. toInt to]
between from to =
let size = BS.length from in
toBytes size <$> [toInt from .. toInt to]
startFrom :: ByteString -> [ByteString]
startFrom from = fromInt <$> [toInt from .. ]
startFrom from =
let size = BS.length from in
toBytes size <$> [toInt from .. ]
mapFromTo :: MonadParser m => (StringObject, StringObject, DirectObject) -> m [(ByteString, ByteString)]
mapFromTo (Hexadecimal from, Hexadecimal to, StringObject (Hexadecimal dstFrom)) =
return $ zip (between fromBS toBS) (startFrom dstBS)
return $ zip (between fromBS toBS) (utf16BEToutf8 <$> startFrom dstBS)
where
(fromBS, toBS, dstBS) = (decodeHex from, decodeHex to, decodeHex dstFrom)