Fix error silently discarding code ranges, make sure ByteString intervals are created with the correct byte length and decode utf16BE encoded values in single-value ranges
This commit is contained in:
parent
d07c286f8e
commit
a96e36ec5a
1 changed files with 14 additions and 9 deletions
|
@ -14,7 +14,7 @@ import Data.Attoparsec.ByteString.Char8 (count)
|
||||||
import Data.ByteString (ByteString)
|
import Data.ByteString (ByteString)
|
||||||
import qualified Data.ByteString as BS (length)
|
import qualified Data.ByteString as BS (length)
|
||||||
import Data.ByteString.Char8.Util (
|
import Data.ByteString.Char8.Util (
|
||||||
decodeHex, fromInt, toInt, utf16BEToutf8
|
decodeHex, toBytes, toInt, utf16BEToutf8
|
||||||
)
|
)
|
||||||
import Data.Map (Map, union)
|
import Data.Map (Map, union)
|
||||||
import qualified Data.Map as Map (adjust, empty, fromList, insertWith)
|
import qualified Data.Map as Map (adjust, empty, fromList, insertWith)
|
||||||
|
@ -31,7 +31,7 @@ data CRange = CRange {
|
||||||
fromSequence :: ByteString
|
fromSequence :: ByteString
|
||||||
, toSequence :: ByteString
|
, toSequence :: ByteString
|
||||||
, mapping :: Mapping
|
, mapping :: Mapping
|
||||||
}
|
} deriving Show
|
||||||
type RangeSize = Int
|
type RangeSize = Int
|
||||||
type CMap = Map RangeSize [CRange]
|
type CMap = Map RangeSize [CRange]
|
||||||
|
|
||||||
|
@ -53,11 +53,11 @@ cMap = fmap snd <$> runParser
|
||||||
codeRanges :: Parser CMap ()
|
codeRanges :: Parser CMap ()
|
||||||
codeRanges = do
|
codeRanges = do
|
||||||
size <- integer <* line "begincodespacerange"
|
size <- integer <* line "begincodespacerange"
|
||||||
count size (createMapping <$> codeRange) *> return ()
|
mapM_ createMapping =<< count size codeRange
|
||||||
line "endcodespacerange"
|
line "endcodespacerange"
|
||||||
where
|
where
|
||||||
codeRange =
|
codeRange =
|
||||||
(,) <$> (stringObject <* blank) <*> (stringObject <* blank) <* EOL.parser
|
(,) <$> stringObject <* blank <*> stringObject <* EOL.parser
|
||||||
|
|
||||||
createMapping :: (StringObject, StringObject) -> Parser CMap ()
|
createMapping :: (StringObject, StringObject) -> Parser CMap ()
|
||||||
createMapping (Hexadecimal from, Hexadecimal to) = modify $
|
createMapping (Hexadecimal from, Hexadecimal to) = modify $
|
||||||
|
@ -72,7 +72,8 @@ createMapping _ = return ()
|
||||||
cMapRange :: Parser CMap ()
|
cMapRange :: Parser CMap ()
|
||||||
cMapRange = do
|
cMapRange = do
|
||||||
size <- integer <* line "beginbfrange"
|
size <- integer <* line "beginbfrange"
|
||||||
mapM_ saveMapping =<< count size rangeMapping <* line "endbfrange"
|
mapM_ saveMapping =<< count size rangeMapping
|
||||||
|
line "endbfrange"
|
||||||
where
|
where
|
||||||
rangeMapping = (,,)
|
rangeMapping = (,,)
|
||||||
<$> (stringObject <* blank)
|
<$> (stringObject <* blank)
|
||||||
|
@ -98,18 +99,22 @@ cMapChar = do
|
||||||
saveMapping =<< count size charMapping <* line "endbfchar"
|
saveMapping =<< count size charMapping <* line "endbfchar"
|
||||||
where
|
where
|
||||||
charMapping =
|
charMapping =
|
||||||
(,) <$> (stringObject <* blank) <*> (stringObject <* blank) <* EOL.parser
|
(,) <$> stringObject <* blank <*> stringObject <* EOL.parser
|
||||||
>>= pairMapping
|
>>= pairMapping
|
||||||
|
|
||||||
between :: ByteString -> ByteString -> [ByteString]
|
between :: ByteString -> ByteString -> [ByteString]
|
||||||
between from to = fromInt <$> [toInt from .. toInt to]
|
between from to =
|
||||||
|
let size = BS.length from in
|
||||||
|
toBytes size <$> [toInt from .. toInt to]
|
||||||
|
|
||||||
startFrom :: ByteString -> [ByteString]
|
startFrom :: ByteString -> [ByteString]
|
||||||
startFrom from = fromInt <$> [toInt from .. ]
|
startFrom from =
|
||||||
|
let size = BS.length from in
|
||||||
|
toBytes size <$> [toInt from .. ]
|
||||||
|
|
||||||
mapFromTo :: MonadParser m => (StringObject, StringObject, DirectObject) -> m [(ByteString, ByteString)]
|
mapFromTo :: MonadParser m => (StringObject, StringObject, DirectObject) -> m [(ByteString, ByteString)]
|
||||||
mapFromTo (Hexadecimal from, Hexadecimal to, StringObject (Hexadecimal dstFrom)) =
|
mapFromTo (Hexadecimal from, Hexadecimal to, StringObject (Hexadecimal dstFrom)) =
|
||||||
return $ zip (between fromBS toBS) (startFrom dstBS)
|
return $ zip (between fromBS toBS) (utf16BEToutf8 <$> startFrom dstBS)
|
||||||
where
|
where
|
||||||
(fromBS, toBS, dstBS) = (decodeHex from, decodeHex to, decodeHex dstFrom)
|
(fromBS, toBS, dstBS) = (decodeHex from, decodeHex to, decodeHex dstFrom)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue