61 lines
2.1 KiB
Haskell
61 lines
2.1 KiB
Haskell
{-# LANGUAGE NamedFieldPuns #-}
|
|
module Data.ByteString.Lazy.Char8.Util (
|
|
previous
|
|
, search
|
|
, subBS
|
|
) where
|
|
|
|
import Data.ByteString.Lazy.Char8 (ByteString)
|
|
import qualified Data.ByteString.Lazy.Char8 as BS (drop, index, isPrefixOf, length, tails, take, uncons)
|
|
import Data.Int (Int64)
|
|
import Data.Map (Map)
|
|
import qualified Data.Map as Map (empty, fromList, insert, lookup, member)
|
|
import Prelude hiding (length)
|
|
|
|
previous :: Char -> Int64 -> ByteString -> Int64
|
|
previous char position byteString
|
|
| BS.index byteString position == char = position
|
|
| otherwise = previous char (position - 1) byteString
|
|
|
|
data BMTable = BMTable {
|
|
length :: Int64
|
|
, offsets :: Map Int64 (Map Char Int64)
|
|
} deriving Show
|
|
|
|
prepare :: ByteString -> BMTable
|
|
prepare needle =
|
|
let length = BS.length needle in
|
|
let offsets = Map.fromList $ generateSuffixOffsets <$> [0..length - 1] in
|
|
BMTable {length, offsets}
|
|
where
|
|
generateSuffixOffsets l =
|
|
let suffix = BS.drop l needle in
|
|
let prefixes = fmap (l -) <$> zip (BS.tails needle) [0..l] in
|
|
(l, foldl (addOffset suffix) Map.empty prefixes)
|
|
addOffset suffix tmpMap ((aSuffix, delta)) = maybe tmpMap id $ do
|
|
(initial, rest) <- BS.uncons aSuffix
|
|
if BS.isPrefixOf suffix rest && not (Map.member initial tmpMap)
|
|
then return (Map.insert initial delta tmpMap)
|
|
else Nothing
|
|
|
|
jump :: BMTable -> Int64 -> Char -> Int64
|
|
jump (BMTable {length, offsets}) index char =
|
|
maybe length (maybe length id . Map.lookup char) (Map.lookup index offsets)
|
|
|
|
search :: ByteString -> ByteString -> Maybe Int64
|
|
search needle = boyerMoore 0
|
|
where
|
|
table = prepare needle
|
|
tryMatch offset haystack n
|
|
| n < 0 = Just offset
|
|
| BS.index haystack n == BS.index needle n = tryMatch offset haystack (n-1)
|
|
| otherwise =
|
|
let delta = jump table n (BS.index haystack n) in
|
|
boyerMoore (offset + delta) (BS.drop delta haystack)
|
|
boyerMoore offset haystack
|
|
| BS.length haystack < 1 = Nothing
|
|
| otherwise = tryMatch offset haystack (BS.length needle - 1)
|
|
|
|
subBS :: Int64 -> Int64 -> ByteString -> ByteString
|
|
subBS offset length = BS.take length . BS.drop offset
|