Hufflepdf/src/Data/ByteString/Lazy/Char8/Util.hs

61 lines
2.1 KiB
Haskell

{-# LANGUAGE NamedFieldPuns #-}
module Data.ByteString.Lazy.Char8.Util (
previous
, search
, subBS
) where
import Data.ByteString.Lazy.Char8 (ByteString)
import qualified Data.ByteString.Lazy.Char8 as BS (drop, index, isPrefixOf, length, tails, take, uncons)
import Data.Int (Int64)
import Data.Map (Map)
import qualified Data.Map as Map (empty, fromList, insert, lookup, member)
import Prelude hiding (length)
previous :: Char -> Int64 -> ByteString -> Int64
previous char position byteString
| BS.index byteString position == char = position
| otherwise = previous char (position - 1) byteString
data BMTable = BMTable {
length :: Int64
, offsets :: Map Int64 (Map Char Int64)
} deriving Show
prepare :: ByteString -> BMTable
prepare needle =
let length = BS.length needle in
let offsets = Map.fromList $ generateSuffixOffsets <$> [0..length - 1] in
BMTable {length, offsets}
where
generateSuffixOffsets l =
let suffix = BS.drop l needle in
let prefixes = fmap (l -) <$> zip (BS.tails needle) [0..l] in
(l, foldl (addOffset suffix) Map.empty prefixes)
addOffset suffix tmpMap ((aSuffix, delta)) = maybe tmpMap id $ do
(initial, rest) <- BS.uncons aSuffix
if BS.isPrefixOf suffix rest && not (Map.member initial tmpMap)
then return (Map.insert initial delta tmpMap)
else Nothing
jump :: BMTable -> Int64 -> Char -> Int64
jump (BMTable {length, offsets}) index char =
maybe length (maybe length id . Map.lookup char) (Map.lookup index offsets)
search :: ByteString -> ByteString -> Maybe Int64
search needle = boyerMoore 0
where
table = prepare needle
tryMatch offset haystack n
| n < 0 = Just offset
| BS.index haystack n == BS.index needle n = tryMatch offset haystack (n-1)
| otherwise =
let delta = jump table n (BS.index haystack n) in
boyerMoore (offset + delta) (BS.drop delta haystack)
boyerMoore offset haystack
| BS.length haystack < 1 = Nothing
| otherwise = tryMatch offset haystack (BS.length needle - 1)
subBS :: Int64 -> Int64 -> ByteString -> ByteString
subBS offset length = BS.take length . BS.drop offset