Fix error silently discarding code ranges, make sure ByteString intervals are created with the correct byte length and decode utf16BE encoded values in single-value ranges

This commit is contained in:
Tissevert 2019-10-03 14:59:06 +02:00
parent d07c286f8e
commit a96e36ec5a

View file

@ -14,7 +14,7 @@ import Data.Attoparsec.ByteString.Char8 (count)
import Data.ByteString (ByteString) import Data.ByteString (ByteString)
import qualified Data.ByteString as BS (length) import qualified Data.ByteString as BS (length)
import Data.ByteString.Char8.Util ( import Data.ByteString.Char8.Util (
decodeHex, fromInt, toInt, utf16BEToutf8 decodeHex, toBytes, toInt, utf16BEToutf8
) )
import Data.Map (Map, union) import Data.Map (Map, union)
import qualified Data.Map as Map (adjust, empty, fromList, insertWith) import qualified Data.Map as Map (adjust, empty, fromList, insertWith)
@ -31,7 +31,7 @@ data CRange = CRange {
fromSequence :: ByteString fromSequence :: ByteString
, toSequence :: ByteString , toSequence :: ByteString
, mapping :: Mapping , mapping :: Mapping
} } deriving Show
type RangeSize = Int type RangeSize = Int
type CMap = Map RangeSize [CRange] type CMap = Map RangeSize [CRange]
@ -53,11 +53,11 @@ cMap = fmap snd <$> runParser
codeRanges :: Parser CMap () codeRanges :: Parser CMap ()
codeRanges = do codeRanges = do
size <- integer <* line "begincodespacerange" size <- integer <* line "begincodespacerange"
count size (createMapping <$> codeRange) *> return () mapM_ createMapping =<< count size codeRange
line "endcodespacerange" line "endcodespacerange"
where where
codeRange = codeRange =
(,) <$> (stringObject <* blank) <*> (stringObject <* blank) <* EOL.parser (,) <$> stringObject <* blank <*> stringObject <* EOL.parser
createMapping :: (StringObject, StringObject) -> Parser CMap () createMapping :: (StringObject, StringObject) -> Parser CMap ()
createMapping (Hexadecimal from, Hexadecimal to) = modify $ createMapping (Hexadecimal from, Hexadecimal to) = modify $
@ -72,7 +72,8 @@ createMapping _ = return ()
cMapRange :: Parser CMap () cMapRange :: Parser CMap ()
cMapRange = do cMapRange = do
size <- integer <* line "beginbfrange" size <- integer <* line "beginbfrange"
mapM_ saveMapping =<< count size rangeMapping <* line "endbfrange" mapM_ saveMapping =<< count size rangeMapping
line "endbfrange"
where where
rangeMapping = (,,) rangeMapping = (,,)
<$> (stringObject <* blank) <$> (stringObject <* blank)
@ -98,18 +99,22 @@ cMapChar = do
saveMapping =<< count size charMapping <* line "endbfchar" saveMapping =<< count size charMapping <* line "endbfchar"
where where
charMapping = charMapping =
(,) <$> (stringObject <* blank) <*> (stringObject <* blank) <* EOL.parser (,) <$> stringObject <* blank <*> stringObject <* EOL.parser
>>= pairMapping >>= pairMapping
between :: ByteString -> ByteString -> [ByteString] between :: ByteString -> ByteString -> [ByteString]
between from to = fromInt <$> [toInt from .. toInt to] between from to =
let size = BS.length from in
toBytes size <$> [toInt from .. toInt to]
startFrom :: ByteString -> [ByteString] startFrom :: ByteString -> [ByteString]
startFrom from = fromInt <$> [toInt from .. ] startFrom from =
let size = BS.length from in
toBytes size <$> [toInt from .. ]
mapFromTo :: MonadParser m => (StringObject, StringObject, DirectObject) -> m [(ByteString, ByteString)] mapFromTo :: MonadParser m => (StringObject, StringObject, DirectObject) -> m [(ByteString, ByteString)]
mapFromTo (Hexadecimal from, Hexadecimal to, StringObject (Hexadecimal dstFrom)) = mapFromTo (Hexadecimal from, Hexadecimal to, StringObject (Hexadecimal dstFrom)) =
return $ zip (between fromBS toBS) (startFrom dstBS) return $ zip (between fromBS toBS) (utf16BEToutf8 <$> startFrom dstBS)
where where
(fromBS, toBS, dstBS) = (decodeHex from, decodeHex to, decodeHex dstFrom) (fromBS, toBS, dstBS) = (decodeHex from, decodeHex to, decodeHex dstFrom)