2019-05-24 10:48:09 +02:00
|
|
|
module Data.ByteString.Char8.Util (
|
2019-10-04 18:46:07 +02:00
|
|
|
B16Int(..)
|
|
|
|
, B256Int(..)
|
|
|
|
, b8ToInt
|
|
|
|
, b16ToBytes
|
|
|
|
, b16ToInt
|
|
|
|
, b256ToInt
|
|
|
|
, intToB256
|
2019-09-30 14:13:12 +02:00
|
|
|
, previous
|
2019-05-24 10:48:09 +02:00
|
|
|
, subBS
|
2019-10-03 14:43:56 +02:00
|
|
|
, toBytes
|
2019-10-04 18:46:07 +02:00
|
|
|
, unescape
|
2019-09-30 14:13:12 +02:00
|
|
|
, utf16BEToutf8
|
2019-05-24 10:48:09 +02:00
|
|
|
) where
|
|
|
|
|
2019-09-30 14:13:12 +02:00
|
|
|
import Data.ByteString (ByteString, snoc)
|
2019-10-04 18:46:07 +02:00
|
|
|
import qualified Data.ByteString as BS (empty, foldl, length, pack, singleton, splitAt)
|
|
|
|
import qualified Data.ByteString.Char8 as Char8 (
|
|
|
|
cons, drop, index, splitAt, take, uncons, unpack
|
|
|
|
)
|
2020-02-08 08:15:32 +01:00
|
|
|
import Data.Text (Text)
|
|
|
|
import Data.Text.Encoding (decodeUtf16BE)
|
2019-05-24 10:48:09 +02:00
|
|
|
import Prelude hiding (length)
|
2019-10-04 18:46:07 +02:00
|
|
|
import Text.Printf (printf)
|
|
|
|
|
2020-02-14 11:53:05 +01:00
|
|
|
newtype B8Int = B8Int ByteString deriving (Eq, Show)
|
|
|
|
newtype B16Int = B16Int ByteString deriving (Eq, Show)
|
|
|
|
newtype B256Int = B256Int ByteString deriving (Eq, Show)
|
2019-05-24 10:48:09 +02:00
|
|
|
|
|
|
|
previous :: Char -> Int -> ByteString -> Int
|
|
|
|
previous char position byteString
|
2019-09-30 14:13:12 +02:00
|
|
|
| Char8.index byteString position == char = position
|
2019-05-24 10:48:09 +02:00
|
|
|
| otherwise = previous char (position - 1) byteString
|
|
|
|
|
|
|
|
subBS :: Int -> Int -> ByteString -> ByteString
|
2019-09-30 14:13:12 +02:00
|
|
|
subBS offset length = Char8.take length . Char8.drop offset
|
|
|
|
|
2019-10-04 18:46:07 +02:00
|
|
|
intToB256 :: Int -> B256Int
|
|
|
|
intToB256 n
|
|
|
|
| n < 0x100 = B256Int . BS.singleton $ toEnum n
|
|
|
|
| otherwise =
|
|
|
|
let B256Int begining = intToB256 (n `div` 0x100) in
|
|
|
|
B256Int $ begining `snoc` (toEnum (n `mod` 0x100))
|
2019-09-30 14:13:12 +02:00
|
|
|
|
2019-10-04 18:46:07 +02:00
|
|
|
b256ToInt :: B256Int -> Int
|
|
|
|
b256ToInt (B256Int n) = BS.foldl (\k w -> 0x100*k + fromEnum w) 0 n
|
2019-09-30 14:13:12 +02:00
|
|
|
|
2019-10-03 14:43:56 +02:00
|
|
|
toBytes :: Int -> Int -> ByteString
|
|
|
|
toBytes 0 _ = BS.empty
|
2019-10-04 18:46:07 +02:00
|
|
|
toBytes size n =
|
|
|
|
(toBytes (size - 1) (n `div` 0x100)) `snoc` (toEnum (n `mod` 0x100))
|
|
|
|
|
|
|
|
b16ToBytes :: B16Int -> ByteString
|
|
|
|
b16ToBytes (B16Int n) = BS.pack . fmap b16ToInt $ pairDigits n
|
|
|
|
where
|
|
|
|
pairDigits s =
|
|
|
|
case BS.length s of
|
|
|
|
0 -> []
|
|
|
|
1 -> [B16Int s]
|
|
|
|
_ ->
|
|
|
|
let (twoHexDigits, rest) = BS.splitAt 2 s in
|
|
|
|
(B16Int $ twoHexDigits):(pairDigits rest)
|
|
|
|
|
|
|
|
fromBase :: (Num a, Read a) => Char -> ByteString -> a
|
|
|
|
fromBase b = read . printf "0%c%s" b . Char8.unpack
|
|
|
|
|
|
|
|
b16ToInt :: (Num a, Read a) => B16Int -> a
|
|
|
|
b16ToInt (B16Int n) = fromBase 'x' n
|
2019-10-03 14:43:56 +02:00
|
|
|
|
2019-10-04 18:46:07 +02:00
|
|
|
b8ToInt :: (Num a, Read a) => B8Int -> a
|
|
|
|
b8ToInt (B8Int n) = fromBase 'o' n
|
2019-09-30 14:13:12 +02:00
|
|
|
|
2019-10-04 18:46:07 +02:00
|
|
|
unescape :: ByteString -> ByteString
|
|
|
|
unescape escapedBS =
|
|
|
|
case Char8.uncons escapedBS of
|
|
|
|
Nothing -> BS.empty
|
|
|
|
Just ('\\', s) -> unescapeChar s
|
|
|
|
Just (c, s) -> Char8.cons c (unescape s)
|
2019-09-30 14:13:12 +02:00
|
|
|
where
|
2019-10-04 18:46:07 +02:00
|
|
|
unescapeChar s =
|
|
|
|
case Char8.uncons s of
|
|
|
|
Nothing -> BS.empty
|
|
|
|
Just (c, s')
|
|
|
|
| c `elem` "()" -> Char8.cons c (unescape s')
|
|
|
|
| c `elem` "nrtbf" -> Char8.cons (read (printf "'\\%c'" c)) (unescape s')
|
|
|
|
| c `elem` ['0'..'7'] -> fromOctal (Char8.splitAt 3 s)
|
|
|
|
| otherwise -> Char8.cons c (unescape s')
|
|
|
|
fromOctal (code, s) = Char8.cons (toEnum $ b8ToInt (B8Int code)) (unescape s)
|
2019-09-30 14:13:12 +02:00
|
|
|
|
2020-02-08 08:15:32 +01:00
|
|
|
utf16BEToutf8 :: ByteString -> Text
|
|
|
|
utf16BEToutf8 = decodeUtf16BE
|