{-|
Module      : Z.Data.Vector.Search
Description : Searching vectors
Copyright   : (c) Dong Han, 2017-2018
License     : BSD
Maintainer  : winterland1989@gmail.com
Stability   : experimental
Portability : non-portable

This module provides:

  * Element-wise searching within vectors

  * Fast sub-vector searching algorithm based on KMP string searching.

  * A hybrid sub-vector searching algorithm for 'Bytes'.

  * Rewrite rules to use 'Bytes' specialized version if possible.

-}

module Z.Data.Vector.Search (
  -- * Element-wise search
    findIndices, elemIndices
  , find, findR
  , findIndex, findIndexR
  , filter, partition
  -- * Sub-vector search
  , indicesOverlapping
  , indices
  -- * 'Bytes' specialized combinators
  , elemIndicesBytes, findByte, findByteR
  , indicesOverlappingBytes, indicesBytes
  -- * Helpers
  , kmpNextTable
  , sundayBloom
  , elemSundayBloom
  ) where

import           Control.Monad.ST
import           Data.Bits
import           GHC.Word
import           Prelude                       hiding (filter)
import           Z.Data.Array
import           Z.Data.Vector.Base

--------------------------------------------------------------------------------
-- Searching by equality or predicate

-- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
-- the indices of all elements equal to the query element, in ascending order.
elemIndices :: (Vec v a, Eq a) => a -> v a -> [Int]
{-# INLINE [1] elemIndices #-}
{-# RULES "elemIndices/Bytes" elemIndices = elemIndicesBytes #-}
elemIndices :: a -> v a -> [Int]
elemIndices a
w (Vec IArray v a
arr Int
s Int
l) = Int -> [Int]
go Int
s
  where
    !end :: Int
end = Int
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l
    go :: Int -> [Int]
go !Int
i
        | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end  = []
        | a
x a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
w    = let !i' :: Int
i' = Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
s in Int
i' Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: Int -> [Int]
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
        | Bool
otherwise = Int -> [Int]
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
        where (# a
x #) = IArray v a -> Int -> (# a #)
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> (# a #)
indexArr' IArray v a
arr Int
i

-- | The 'findIndex' function takes a predicate and a vector and
-- returns the index of the first element in the vector
-- satisfying the predicate.
findIndices :: Vec v a => (a -> Bool) -> v a -> [Int]
{-# INLINE [1] findIndices #-}
{-# RULES "findIndices/Bytes1" forall w. findIndices (w `eqWord8`) = elemIndicesBytes w #-}
{-# RULES "findIndices/Bytes2" forall w. findIndices (`eqWord8` w) = elemIndicesBytes w #-}
findIndices :: (a -> Bool) -> v a -> [Int]
findIndices a -> Bool
f (Vec IArray v a
arr Int
s Int
l) = Int -> [Int]
go Int
s
  where
    !end :: Int
end = Int
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l
    go :: Int -> [Int]
go !Int
p | Int
p Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end  = []
          | a -> Bool
f a
x       = Int
p Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: Int -> [Int]
go (Int
pInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
          | Bool
otherwise = Int -> [Int]
go (Int
pInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
        where (# a
x #) = IArray v a -> Int -> (# a #)
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> (# a #)
indexArr' IArray v a
arr Int
p

-- | /O(n)/ Special 'elemIndices' for 'Bytes' using @memchr(3)@
elemIndicesBytes :: Word8 -> Bytes -> [Int]
{-# INLINE elemIndicesBytes #-}
elemIndicesBytes :: Word8 -> Bytes -> [Int]
elemIndicesBytes Word8
w (PrimVector (PrimArray ByteArray#
ba#) Int
s Int
l) = Int -> [Int]
go Int
s
  where
    !end :: Int
end = Int
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l
    go :: Int -> [Int]
go !Int
i
        | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end = []
        | Bool
otherwise =
            case ByteArray# -> Int -> Word8 -> Int -> Int
c_memchr ByteArray#
ba# Int
i Word8
w (Int
end Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i) of
                -1 -> []
                Int
r  -> let !i' :: Int
i' = (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
r) in Int
i'Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: Int -> [Int]
go (Int
i'Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)

-- | @findIndex f v = fst (find f v)@
findIndex :: Vec v a => (a -> Bool) -> v a -> Int
{-# INLINE findIndex #-}
findIndex :: (a -> Bool) -> v a -> Int
findIndex a -> Bool
f v a
v = (Int, Maybe a) -> Int
forall a b. (a, b) -> a
fst ((a -> Bool) -> v a -> (Int, Maybe a)
forall (v :: * -> *) a.
Vec v a =>
(a -> Bool) -> v a -> (Int, Maybe a)
find a -> Bool
f v a
v)

-- | @findIndexR f v = fst (findR f v)@
findIndexR :: Vec v a => (a -> Bool) -> v a -> Int
{-# INLINE findIndexR #-}
findIndexR :: (a -> Bool) -> v a -> Int
findIndexR a -> Bool
f v a
v = (Int, Maybe a) -> Int
forall a b. (a, b) -> a
fst ((a -> Bool) -> v a -> (Int, Maybe a)
forall (v :: * -> *) a.
Vec v a =>
(a -> Bool) -> v a -> (Int, Maybe a)
findR a -> Bool
f v a
v)

-- | /O(n)/ find the first index and element matching the predicate in a vector
-- from left to right, if there isn't one, return (length of the vector, Nothing).
--
find :: Vec v a => (a -> Bool) -> v a -> (Int, Maybe a)
{-# INLINE [1] find #-}
{-# RULES "find/Bytes1" forall w. find (w `eqWord8`) = findByte w #-}
{-# RULES "find/Bytes2" forall w. find (`eqWord8` w) = findByte w #-}
find :: (a -> Bool) -> v a -> (Int, Maybe a)
find a -> Bool
f (Vec IArray v a
arr Int
s Int
l) = Int -> (Int, Maybe a)
go Int
s
  where
    !end :: Int
end = Int
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l
    go :: Int -> (Int, Maybe a)
go !Int
p | Int
p Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end  = (Int
l, Maybe a
forall a. Maybe a
Nothing)
          | a -> Bool
f a
x       = let !i :: Int
i = Int
pInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
s in (Int
i, a -> Maybe a
forall a. a -> Maybe a
Just a
x)
          | Bool
otherwise = Int -> (Int, Maybe a)
go (Int
pInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
        where (# a
x #) = IArray v a -> Int -> (# a #)
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> (# a #)
indexArr' IArray v a
arr Int
p

-- | /O(n)/ Special 'findByte' for 'Word8' using @memchr(3)@
findByte :: Word8 -> Bytes -> (Int, Maybe Word8)
{-# INLINE findByte #-}
findByte :: Word8 -> Bytes -> (Int, Maybe Word8)
findByte Word8
w (PrimVector (PrimArray ByteArray#
ba#) Int
s Int
l) =
    case ByteArray# -> Int -> Word8 -> Int -> Int
c_memchr ByteArray#
ba# Int
s Word8
w Int
l of
        -1 -> (Int
l, Maybe Word8
forall a. Maybe a
Nothing)
        Int
r  -> (Int
r, Word8 -> Maybe Word8
forall a. a -> Maybe a
Just Word8
w)


-- | /O(n)/ Find the first index and element matching the predicate
-- in a vector from right to left, if there isn't one, return '(-1, Nothing)'.
findR :: Vec v a => (a -> Bool) -> v a -> (Int, Maybe a)
{-# INLINE [1] findR #-}
{-# RULES "findR/Bytes1" forall w. findR (w `eqWord8`) = findByteR w #-}
{-# RULES "findR/Bytes2" forall w. findR (`eqWord8` w) = findByteR w #-}
findR :: (a -> Bool) -> v a -> (Int, Maybe a)
findR a -> Bool
f (Vec IArray v a
arr Int
s Int
l) = Int -> (Int, Maybe a)
go (Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)
  where
    go :: Int -> (Int, Maybe a)
go !Int
p | Int
p Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
s     = (-Int
1, Maybe a
forall a. Maybe a
Nothing)
          | a -> Bool
f a
x       = let !i :: Int
i = Int
pInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
s in (Int
i, a -> Maybe a
forall a. a -> Maybe a
Just a
x)
          | Bool
otherwise = Int -> (Int, Maybe a)
go (Int
pInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)
        where (# a
x #) = IArray v a -> Int -> (# a #)
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> (# a #)
indexArr' IArray v a
arr Int
p

-- | /O(n)/ Special 'findR' for 'Bytes' with @memrchr@.
findByteR :: Word8 -> Bytes -> (Int, Maybe Word8)
{-# INLINE findByteR #-}
findByteR :: Word8 -> Bytes -> (Int, Maybe Word8)
findByteR Word8
w (PrimVector (PrimArray ByteArray#
ba#) Int
s Int
l) =
    case ByteArray# -> Int -> Word8 -> Int -> Int
c_memrchr ByteArray#
ba# Int
s Word8
w Int
l of
        -1 -> (-Int
1, Maybe Word8
forall a. Maybe a
Nothing)
        Int
r  -> (Int
r, Word8 -> Maybe Word8
forall a. a -> Maybe a
Just Word8
w)

-- | /O(n)/ 'filter', applied to a predicate and a vector,
-- returns a vector containing those elements that satisfy the
-- predicate.
filter :: forall v a. Vec v a => (a -> Bool) -> v a -> v a
{-# INLINE filter #-}
filter :: (a -> Bool) -> v a -> v a
filter a -> Bool
g (Vec IArray v a
arr Int
s Int
l)
    | Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0    = v a
forall (v :: * -> *) a. Vec v a => v a
empty
    | Bool
otherwise = Int -> (forall s. MArr (IArray v) s a -> ST s Int) -> v a
forall (v :: * -> *) a.
(Vec v a, HasCallStack) =>
Int -> (forall s. MArr (IArray v) s a -> ST s Int) -> v a
createN Int
l ((a -> Bool) -> Int -> Int -> MArr (IArray v) s a -> ST s Int
forall s.
(a -> Bool) -> Int -> Int -> MArr (IArray v) s a -> ST s Int
go a -> Bool
g Int
0 Int
s)
  where
    !end :: Int
end = Int
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l
    go :: (a -> Bool) -> Int -> Int -> MArr (IArray v) s a -> ST s Int
    go :: (a -> Bool) -> Int -> Int -> MArr (IArray v) s a -> ST s Int
go a -> Bool
f !Int
i !Int
p !MArr (IArray v) s a
marr
        | Int
p Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end    = Int -> ST s Int
forall (m :: * -> *) a. Monad m => a -> m a
return Int
i
        | a -> Bool
f a
x         = MArr (IArray v) s a -> Int -> a -> ST s ()
forall (arr :: * -> *) a (m :: * -> *) s.
(Arr arr a, PrimMonad m, PrimState m ~ s) =>
MArr arr s a -> Int -> a -> m ()
writeArr MArr (IArray v) s a
marr Int
i a
x ST s () -> ST s Int -> ST s Int
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (a -> Bool) -> Int -> Int -> MArr (IArray v) s a -> ST s Int
forall s.
(a -> Bool) -> Int -> Int -> MArr (IArray v) s a -> ST s Int
go a -> Bool
f (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) (Int
pInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) MArr (IArray v) s a
marr
        | Bool
otherwise   = (a -> Bool) -> Int -> Int -> MArr (IArray v) s a -> ST s Int
forall s.
(a -> Bool) -> Int -> Int -> MArr (IArray v) s a -> ST s Int
go a -> Bool
f Int
i (Int
pInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) MArr (IArray v) s a
marr
        where (# a
x #) = IArray v a -> Int -> (# a #)
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> (# a #)
indexArr' IArray v a
arr Int
p

-- | /O(n)/ The 'partition' function takes a predicate, a vector, returns
-- a pair of vector with elements which do and do not satisfy the
-- predicate, respectively; i.e.,
--
-- > partition p vs == (filter p vs, filter (not . p) vs)
partition :: forall v a. Vec v a => (a -> Bool) -> v a -> (v a, v a)
{-# INLINE partition #-}
partition :: (a -> Bool) -> v a -> (v a, v a)
partition a -> Bool
g (Vec IArray v a
arr Int
s Int
l)
    | Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0    = (v a
forall (v :: * -> *) a. Vec v a => v a
empty, v a
forall (v :: * -> *) a. Vec v a => v a
empty)
    | Bool
otherwise = Int
-> Int
-> (forall s.
    MArr (IArray v) s a -> MArr (IArray v) s a -> ST s (Int, Int))
-> (v a, v a)
forall (v :: * -> *) a (u :: * -> *) b.
(Vec v a, Vec u b, HasCallStack) =>
Int
-> Int
-> (forall s.
    MArr (IArray v) s a -> MArr (IArray u) s b -> ST s (Int, Int))
-> (v a, u b)
createN2 Int
l Int
l ((a -> Bool)
-> Int
-> Int
-> Int
-> MArr (IArray v) s a
-> MArr (IArray v) s a
-> ST s (Int, Int)
forall s.
(a -> Bool)
-> Int
-> Int
-> Int
-> MArr (IArray v) s a
-> MArr (IArray v) s a
-> ST s (Int, Int)
go a -> Bool
g Int
0 Int
0 Int
s)
  where
    !end :: Int
end = Int
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l
    go :: (a -> Bool) -> Int -> Int -> Int -> MArr (IArray v) s a -> MArr (IArray v) s a -> ST s (Int, Int)
    go :: (a -> Bool)
-> Int
-> Int
-> Int
-> MArr (IArray v) s a
-> MArr (IArray v) s a
-> ST s (Int, Int)
go a -> Bool
f !Int
i !Int
j !Int
p !MArr (IArray v) s a
mba0 !MArr (IArray v) s a
mba1
        | Int
p Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end   = (Int, Int) -> ST s (Int, Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Int
j)
        | a -> Bool
f a
x        = MArr (IArray v) s a -> Int -> a -> ST s ()
forall (arr :: * -> *) a (m :: * -> *) s.
(Arr arr a, PrimMonad m, PrimState m ~ s) =>
MArr arr s a -> Int -> a -> m ()
writeArr MArr (IArray v) s a
mba0 Int
i a
x ST s () -> ST s (Int, Int) -> ST s (Int, Int)
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (a -> Bool)
-> Int
-> Int
-> Int
-> MArr (IArray v) s a
-> MArr (IArray v) s a
-> ST s (Int, Int)
forall s.
(a -> Bool)
-> Int
-> Int
-> Int
-> MArr (IArray v) s a
-> MArr (IArray v) s a
-> ST s (Int, Int)
go a -> Bool
f (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
j (Int
pInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) MArr (IArray v) s a
mba0 MArr (IArray v) s a
mba1
        | Bool
otherwise  = MArr (IArray v) s a -> Int -> a -> ST s ()
forall (arr :: * -> *) a (m :: * -> *) s.
(Arr arr a, PrimMonad m, PrimState m ~ s) =>
MArr arr s a -> Int -> a -> m ()
writeArr MArr (IArray v) s a
mba1 Int
j a
x ST s () -> ST s (Int, Int) -> ST s (Int, Int)
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (a -> Bool)
-> Int
-> Int
-> Int
-> MArr (IArray v) s a
-> MArr (IArray v) s a
-> ST s (Int, Int)
forall s.
(a -> Bool)
-> Int
-> Int
-> Int
-> MArr (IArray v) s a
-> MArr (IArray v) s a
-> ST s (Int, Int)
go a -> Bool
f Int
i (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) (Int
pInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) MArr (IArray v) s a
mba0 MArr (IArray v) s a
mba1
        where (# a
x #) = IArray v a -> Int -> (# a #)
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> (# a #)
indexArr' IArray v a
arr Int
p

--------------------------------------------------------------------------------
-- Sub vector search

-- | /O(n+m)/ Find the offsets of all indices (possibly overlapping) of @needle@
-- within @haystack@ using KMP algorithm.
--
-- The KMP algorithm need pre-calculate a shift table in /O(m)/ time and space,
-- the worst case time complexity is /O(n+m)/. Partial apply this function to
-- reuse pre-calculated table between same needles.
--
-- Chunked input are support via partial match argument, if set we will return an
-- extra negative index in case of partial match at the end of input chunk, e.g.
--
-- > indicesOverlapping [ascii|ada|]  [ascii|adadad|] True == [0,2,-2]
--
-- Where @-2@ is the length of the partial match part @ad@ 's negation.
--
-- If an empty pattern is supplied, we will return every possible index of haystack,
-- e.g.
--
-- > indicesOverlapping "" "abc" = [0,1,2]
--
-- References:
--
--  * Knuth, Donald; Morris, James H.; Pratt, Vaughan: "Fast pattern matching in strings" (1977)
--  * <http://www-igm.univ-mlv.fr/~lecroq/string/node8.html#SECTION0080>
indicesOverlapping :: (Vec v a, Eq a)
        => v a -- ^ vector to search for (@needle@)
        -> v a -- ^ vector to search in (@haystack@)
        -> Bool -- ^ report partial match at the end of haystack
        -> [Int]
{-# INLINABLE[1] indicesOverlapping #-}
{-# RULES "indicesOverlapping/Bytes" indicesOverlapping = indicesOverlappingBytes #-}
indicesOverlapping :: v a -> v a -> Bool -> [Int]
indicesOverlapping needle :: v a
needle@(Vec IArray v a
narr Int
noff Int
nlen) = v a -> Bool -> [Int]
search
  where
    next :: PrimArray Int
next = v a -> PrimArray Int
forall (v :: * -> *) a. (Vec v a, Eq a) => v a -> PrimArray Int
kmpNextTable v a
needle
    search :: v a -> Bool -> [Int]
search haystack :: v a
haystack@(Vec IArray v a
harr Int
hoff Int
hlen) Bool
reportPartial
        | Int
nlen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0 = [Int
0..Int
hlenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1]
        | Int
nlen Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 = case IArray v a -> Int -> (# a #)
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> (# a #)
indexArr' IArray v a
narr Int
0 of
                       (# a
x #) -> a -> v a -> [Int]
forall (v :: * -> *) a. (Vec v a, Eq a) => a -> v a -> [Int]
elemIndices a
x v a
haystack
        | Bool
otherwise = Int -> Int -> [Int]
kmp Int
0 Int
0
      where
        kmp :: Int -> Int -> [Int]
kmp !Int
i !Int
j | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
hlen = if Bool
reportPartial Bool -> Bool -> Bool
&& Int
j Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
0 then [-Int
j] else []
                  | IArray v a
narr IArray v a -> Int -> a
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
`indexArr` (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
noff) a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== IArray v a
harr IArray v a -> Int -> a
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
`indexArr` (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
hoff) =
                        let !j' :: Int
j' = Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1
                        in if Int
j' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
nlen
                        then let !i' :: Int
i' = Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
j
                            in case PrimArray Int
next PrimArray Int -> Int -> Int
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
`indexArr` Int
j' of
                                -1 -> Int
i' Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: Int -> Int -> [Int]
kmp (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
0
                                Int
j'' -> Int
i' Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: Int -> Int -> [Int]
kmp (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
j''
                        else Int -> Int -> [Int]
kmp (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
j'
                  | Bool
otherwise = case PrimArray Int
next PrimArray Int -> Int -> Int
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
`indexArr` Int
j of
                                    -1 -> Int -> Int -> [Int]
kmp (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
0
                                    Int
j' -> Int -> Int -> [Int]
kmp Int
i Int
j'

-- | /O(n\/m)/ Find the offsets of all indices (possibly overlapping) of @needle@
-- within @haystack@ using KMP algorithm, combined with simplified sunday's
-- rule to obtain /O(n\/m)/ complexity in average use case.
--
-- The hybrid algorithm need pre-calculate a shift table in /O(m)/ time and space,
-- and a bad character bloom filter in /O(m)/ time and /O(1)/ space, the worst case
-- time complexity is /O(n+m)/.
--
-- References:
--
-- * Frantisek FranekChristopher G. JenningsWilliam F. Smyth A Simple Fast Hybrid Pattern-Matching Algorithm (2005)
-- * D. M. Sunday: A Very Fast Substring Search Algorithm. Communications of the ACM, 33, 8, 132-142 (1990)
-- * F. Lundh: The Fast Search Algorithm. <http://effbot.org/zone/stringlib.htm> (2006)
indicesOverlappingBytes :: Bytes -- ^ bytes to search for (@needle@)
                        -> Bytes -- ^ bytes to search in (@haystack@)
                        -> Bool -- ^ report partial match at the end of haystack
                        -> [Int]
{-# INLINABLE indicesOverlappingBytes #-}
indicesOverlappingBytes :: Bytes -> Bytes -> Bool -> [Int]
indicesOverlappingBytes needle :: Bytes
needle@(Vec IArray PrimVector Word8
narr Int
noff Int
nlen) | Word64 -> Int
forall a. Bits a => a -> Int
popCount Word64
bloom Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
48 = Bytes -> Bool -> [Int]
search
                                                    | Bool
otherwise = Bytes -> Bool -> [Int]
search'
  where
    next :: PrimArray Int
next = Bytes -> PrimArray Int
forall (v :: * -> *) a. (Vec v a, Eq a) => v a -> PrimArray Int
kmpNextTable Bytes
needle
    bloom :: Word64
bloom = Bytes -> Word64
sundayBloom Bytes
needle
    search :: Bytes -> Bool -> [Int]
search haystack :: Bytes
haystack@(Vec IArray PrimVector Word8
harr Int
hoff Int
hlen) Bool
reportPartial
        | Int
nlen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0 = [Int
0..Int
hlenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1]
        | Int
nlen Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 = case PrimArray Word8 -> Int -> (# Word8 #)
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> (# a #)
indexArr' PrimArray Word8
IArray PrimVector Word8
narr Int
0 of
                       (# Word8
x #) -> Word8 -> Bytes -> [Int]
forall (v :: * -> *) a. (Vec v a, Eq a) => a -> v a -> [Int]
elemIndices Word8
x Bytes
haystack
        | Bool
otherwise = Int -> Int -> [Int]
kmp Int
0 Int
0
      where
        kmp :: Int -> Int -> [Int]
kmp !Int
i !Int
j | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
hlen = if Bool
reportPartial Bool -> Bool -> Bool
&& Int
j Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
0 then [-Int
j] else []
                  | PrimArray Word8
IArray PrimVector Word8
narr PrimArray Word8 -> Int -> Word8
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
`indexArr` (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
noff) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== PrimArray Word8
IArray PrimVector Word8
harr PrimArray Word8 -> Int -> Word8
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
`indexArr` (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
hoff) =
                        let !j' :: Int
j' = Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1
                        in if Int
j' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
nlen
                        then let !i' :: Int
i' = Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
j
                            in case PrimArray Int
next PrimArray Int -> Int -> Int
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
`indexArr` Int
j' of
                                -1 -> Int
i' Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: Int -> Int -> [Int]
kmp (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
0
                                Int
j'' -> Int
i' Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: Int -> Int -> [Int]
kmp (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
j''
                        else Int -> Int -> [Int]
kmp (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
j'
                  | Bool
otherwise = case PrimArray Int
next PrimArray Int -> Int -> Int
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
`indexArr` Int
j of
                                    -1 -> Int -> Int -> [Int]
kmp (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
0
                                    Int
j' -> Int -> Int -> [Int]
kmp Int
i Int
j'
    search' :: Bytes -> Bool -> [Int]
search' haystack :: Bytes
haystack@(Vec IArray PrimVector Word8
harr Int
hoff Int
hlen) Bool
reportPartial
        | Int
nlen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0 = [Int
0..Int
hlenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1]
        | Int
nlen Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 = Word8 -> Bytes -> [Int]
forall (v :: * -> *) a. (Vec v a, Eq a) => a -> v a -> [Int]
elemIndices (PrimArray Word8 -> Int -> Word8
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
indexArr PrimArray Word8
IArray PrimVector Word8
narr Int
0) Bytes
haystack
        | Bool
otherwise = Int -> Int -> [Int]
sunday Int
0 Int
0
      where
        kmp :: Int -> Int -> [Int]
kmp !Int
i !Int
j | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
hlen = if Bool
reportPartial Bool -> Bool -> Bool
&& Int
j Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
0 then [-Int
j] else []
                  | PrimArray Word8
IArray PrimVector Word8
narr PrimArray Word8 -> Int -> Word8
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
`indexArr` (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
noff) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== PrimArray Word8
IArray PrimVector Word8
harr PrimArray Word8 -> Int -> Word8
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
`indexArr` (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
hoff) =
                        let !j' :: Int
j' = Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1
                        in if Int
j' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
nlen
                        then let !i' :: Int
i' = Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
j
                            in case PrimArray Int
next PrimArray Int -> Int -> Int
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
`indexArr` Int
j' of
                                -1 -> Int
i' Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: Int -> Int -> [Int]
kmp (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
0
                                Int
j'' -> Int
i' Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: Int -> Int -> [Int]
kmp (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
j''
                        else Int -> Int -> [Int]
kmp (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
j'
                  | Bool
otherwise = case PrimArray Int
next PrimArray Int -> Int -> Int
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
`indexArr` Int
j of
                                    -1 -> Int -> Int -> [Int]
kmp (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
0
                                    Int
j' -> Int -> Int -> [Int]
kmp Int
i Int
j'
        !hlen' :: Int
hlen' = Int
hlen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
nlen
        sunday :: Int -> Int -> [Int]
sunday !Int
i !Int
j | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
hlen' = Int -> Int -> [Int]
kmp Int
i Int
j
                     | PrimArray Word8
IArray PrimVector Word8
narr PrimArray Word8 -> Int -> Word8
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
`indexArr` (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
noff) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== PrimArray Word8
IArray PrimVector Word8
harr PrimArray Word8 -> Int -> Word8
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
`indexArr` (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
hoff) =
                            let !j' :: Int
j' = Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1
                            in if Int
j' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
nlen
                            then let !i' :: Int
i' = Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
j
                                in case PrimArray Int
next PrimArray Int -> Int -> Int
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
`indexArr` Int
j' of
                                    -1 -> Int
i' Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: Int -> Int -> [Int]
sunday (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
0
                                    Int
j'' -> Int
i' Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: Int -> Int -> [Int]
sunday (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
j''
                            else Int -> Int -> [Int]
sunday (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
j'
                     | Bool
otherwise = let !k :: Int
k = Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
nlenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
j
                                       !afterNeedle :: Word8
afterNeedle = PrimArray Word8 -> Int -> Word8
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
indexArr PrimArray Word8
IArray PrimVector Word8
harr (Int
kInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
hoff)
                                   in if Word64 -> Word8 -> Bool
elemSundayBloom Word64
bloom Word8
afterNeedle
                                      -- fallback to KMP
                                      then case PrimArray Int
next PrimArray Int -> Int -> Int
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
`indexArr` Int
j of
                                               -1 -> Int -> Int -> [Int]
sunday (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
0
                                               Int
j' -> Int -> Int -> [Int]
sunday Int
i Int
j'
                                      -- sunday's shifting
                                      else Int -> Int -> [Int]
sunday (Int
kInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
0

-- | /O(n+m)/ Find the offsets of all non-overlapping indices of @needle@
-- within @haystack@ using KMP algorithm.
--
-- If an empty pattern is supplied, we will return every possible index of haystack,
-- e.g.
--
-- > indicesOverlapping "" "abc" = [0,1,2]
indices :: (Vec v a, Eq a) => v a -> v a -> Bool -> [Int]
{-# INLINABLE[1] indices #-}
{-# RULES "indices/Bytes" indices = indicesBytes #-}
indices :: v a -> v a -> Bool -> [Int]
indices needle :: v a
needle@(Vec IArray v a
narr Int
noff Int
nlen) = v a -> Bool -> [Int]
search
  where
    next :: PrimArray Int
next = v a -> PrimArray Int
forall (v :: * -> *) a. (Vec v a, Eq a) => v a -> PrimArray Int
kmpNextTable v a
needle
    search :: v a -> Bool -> [Int]
search haystack :: v a
haystack@(Vec IArray v a
harr Int
hoff Int
hlen) Bool
reportPartial
        | Int
nlen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0 = [Int
0..Int
hlenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1]
        | Int
nlen Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 = case IArray v a -> Int -> (# a #)
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> (# a #)
indexArr' IArray v a
narr Int
0 of
                       (# a
x #) -> a -> v a -> [Int]
forall (v :: * -> *) a. (Vec v a, Eq a) => a -> v a -> [Int]
elemIndices a
x v a
haystack
        | Bool
otherwise = Int -> Int -> [Int]
kmp Int
0 Int
0
      where
        kmp :: Int -> Int -> [Int]
kmp !Int
i !Int
j | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
hlen = if Bool
reportPartial Bool -> Bool -> Bool
&& Int
j Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
0 then [-Int
j] else []
                  | IArray v a
narr IArray v a -> Int -> a
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
`indexArr` (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
noff) a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== IArray v a
harr IArray v a -> Int -> a
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
`indexArr` (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
hoff) =
                        let !j' :: Int
j' = Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1
                        in if Int
j' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
nlen
                            then let !i' :: Int
i' = Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
j in Int
i' Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: Int -> Int -> [Int]
kmp (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
0
                            else Int -> Int -> [Int]
kmp (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
j'
                  | Bool
otherwise = case PrimArray Int
next PrimArray Int -> Int -> Int
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
`indexArr` Int
j of
                                    -1 -> Int -> Int -> [Int]
kmp (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
0
                                    Int
j' -> Int -> Int -> [Int]
kmp Int
i Int
j'

-- | /O(n\/m)/ Find the offsets of all non-overlapping indices of @needle@
-- within @haystack@ using KMP algorithm, combined with simplified sunday's
-- rule to obtain /O(m\/n)/ complexity in average use case.
indicesBytes :: Bytes -- ^ bytes to search for (@needle@)
             -> Bytes -- ^ bytes to search in (@haystack@)
             -> Bool -- ^ report partial match at the end of haystack
             -> [Int]
{-# INLINABLE indicesBytes #-}
indicesBytes :: Bytes -> Bytes -> Bool -> [Int]
indicesBytes needle :: Bytes
needle@(Vec IArray PrimVector Word8
narr Int
noff Int
nlen) | Word64 -> Int
forall a. Bits a => a -> Int
popCount Word64
bloom Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
48 = Bytes -> Bool -> [Int]
search
                                         | Bool
otherwise = Bytes -> Bool -> [Int]
search'
  where
    next :: PrimArray Int
next = Bytes -> PrimArray Int
forall (v :: * -> *) a. (Vec v a, Eq a) => v a -> PrimArray Int
kmpNextTable Bytes
needle
    bloom :: Word64
bloom = Bytes -> Word64
sundayBloom Bytes
needle
    search :: Bytes -> Bool -> [Int]
search haystack :: Bytes
haystack@(Vec IArray PrimVector Word8
harr Int
hoff Int
hlen) Bool
reportPartial
        | Int
nlen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0 = [Int
0..Int
hlenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1]
        | Int
nlen Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 = case PrimArray Word8 -> Int -> (# Word8 #)
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> (# a #)
indexArr' PrimArray Word8
IArray PrimVector Word8
narr Int
0 of
                       (# Word8
x #) -> Word8 -> Bytes -> [Int]
forall (v :: * -> *) a. (Vec v a, Eq a) => a -> v a -> [Int]
elemIndices Word8
x Bytes
haystack
        | Bool
otherwise = Int -> Int -> [Int]
kmp Int
0 Int
0
      where
        kmp :: Int -> Int -> [Int]
kmp !Int
i !Int
j | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
hlen = if Bool
reportPartial Bool -> Bool -> Bool
&& Int
j Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
0 then [-Int
j] else []
                  | PrimArray Word8
IArray PrimVector Word8
narr PrimArray Word8 -> Int -> Word8
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
`indexArr` (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
noff) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== PrimArray Word8
IArray PrimVector Word8
harr PrimArray Word8 -> Int -> Word8
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
`indexArr` (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
hoff) =
                        let !j' :: Int
j' = Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1
                        in if Int
j' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
nlen
                            then let !i' :: Int
i' = Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
j in Int
i' Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: Int -> Int -> [Int]
kmp (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
0
                            else Int -> Int -> [Int]
kmp (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
j'
                  | Bool
otherwise = case PrimArray Int
next PrimArray Int -> Int -> Int
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
`indexArr` Int
j of
                                    -1 -> Int -> Int -> [Int]
kmp (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
0
                                    Int
j' -> Int -> Int -> [Int]
kmp Int
i Int
j'
    search' :: Bytes -> Bool -> [Int]
search' haystack :: Bytes
haystack@(Vec IArray PrimVector Word8
harr Int
hoff Int
hlen) Bool
reportPartial
        | Int
nlen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0 = [Int
0..Int
hlenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1]
        | Int
nlen Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 = Word8 -> Bytes -> [Int]
forall (v :: * -> *) a. (Vec v a, Eq a) => a -> v a -> [Int]
elemIndices (PrimArray Word8 -> Int -> Word8
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
indexArr PrimArray Word8
IArray PrimVector Word8
narr Int
0) Bytes
haystack
        | Bool
otherwise = Int -> Int -> [Int]
sunday Int
0 Int
0
      where
        kmp :: Int -> Int -> [Int]
kmp !Int
i !Int
j | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
hlen = if Bool
reportPartial Bool -> Bool -> Bool
&& Int
j Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
0 then [-Int
j] else []
                  | PrimArray Word8
IArray PrimVector Word8
narr PrimArray Word8 -> Int -> Word8
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
`indexArr` (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
noff) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== PrimArray Word8
IArray PrimVector Word8
harr PrimArray Word8 -> Int -> Word8
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
`indexArr` (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
hoff) =
                        let !j' :: Int
j' = Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1
                        in if Int
j' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
nlen
                            then let !i' :: Int
i' = Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
j in Int
i' Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: Int -> Int -> [Int]
kmp (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
0
                            else Int -> Int -> [Int]
kmp (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
j'
                  | Bool
otherwise = case PrimArray Int
next PrimArray Int -> Int -> Int
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
`indexArr` Int
j of
                                    -1 -> Int -> Int -> [Int]
kmp (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
0
                                    Int
j' -> Int -> Int -> [Int]
kmp Int
i Int
j'
        !hlen' :: Int
hlen' = Int
hlen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
nlen
        sunday :: Int -> Int -> [Int]
sunday !Int
i !Int
j | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
hlen' = Int -> Int -> [Int]
kmp Int
i Int
j
                     | PrimArray Word8
IArray PrimVector Word8
narr PrimArray Word8 -> Int -> Word8
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
`indexArr` (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
noff) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== PrimArray Word8
IArray PrimVector Word8
harr PrimArray Word8 -> Int -> Word8
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
`indexArr` (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
hoff) =
                            let !j' :: Int
j' = Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1
                            in if Int
j' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
nlen
                                then let !i' :: Int
i' = Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
j in Int
i' Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: Int -> Int -> [Int]
sunday (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
0
                                else Int -> Int -> [Int]
sunday (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
j'
                     | Bool
otherwise = let !k :: Int
k = Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
nlenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
j
                                       !afterNeedle :: Word8
afterNeedle = PrimArray Word8 -> Int -> Word8
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
indexArr PrimArray Word8
IArray PrimVector Word8
harr (Int
kInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
hoff)
                                   in if Word64 -> Word8 -> Bool
elemSundayBloom Word64
bloom Word8
afterNeedle
                                      -- fallback to KMP
                                      then case PrimArray Int
next PrimArray Int -> Int -> Int
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
`indexArr` Int
j of
                                               -1 -> Int -> Int -> [Int]
sunday (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
0
                                               Int
j' -> Int -> Int -> [Int]
sunday Int
i Int
j'
                                      -- sunday's shifting
                                      else Int -> Int -> [Int]
sunday (Int
kInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
0

-- | /O(m)/ Calculate the KMP next shift table.
--
-- The shifting rules is: when a mismatch between @needle[j]@ and @haystack[i]@
-- is found, check if @next[j] == -1@, if so next search continue with @needle[0]@
-- and @haystack[i+1]@, otherwise continue with @needle[next[j]]@ and @haystack[i]@.
kmpNextTable :: (Vec v a, Eq a) => v a -> PrimArray Int
{-# INLINE kmpNextTable #-}
kmpNextTable :: v a -> PrimArray Int
kmpNextTable (Vec IArray v a
arr Int
s Int
l) = (forall s. ST s (PrimArray Int)) -> PrimArray Int
forall a. (forall s. ST s a) -> a
runST (do
    MutablePrimArray s Int
ma <- Int -> ST s (MArr PrimArray s Int)
forall (arr :: * -> *) a (m :: * -> *) s.
(Arr arr a, PrimMonad m, PrimState m ~ s) =>
Int -> m (MArr arr s a)
newArr (Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
    MArr PrimArray s Int -> Int -> Int -> ST s ()
forall (arr :: * -> *) a (m :: * -> *) s.
(Arr arr a, PrimMonad m, PrimState m ~ s) =>
MArr arr s a -> Int -> a -> m ()
writeArr MutablePrimArray s Int
MArr PrimArray s Int
ma Int
0 (-Int
1)
    let dec :: a -> Int -> ST s Int
dec !a
w !Int
j
            | Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 Bool -> Bool -> Bool
|| a
w a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== IArray v a -> Int -> a
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
indexArr IArray v a
arr (Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
j) = Int -> ST s Int
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> ST s Int) -> Int -> ST s Int
forall a b. (a -> b) -> a -> b
$! Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1
            | Bool
otherwise = MArr PrimArray s Int -> Int -> ST s Int
forall (arr :: * -> *) a (m :: * -> *) s.
(Arr arr a, PrimMonad m, PrimState m ~ s) =>
MArr arr s a -> Int -> m a
readArr MutablePrimArray s Int
MArr PrimArray s Int
ma Int
j ST s Int -> (Int -> ST s Int) -> ST s Int
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= a -> Int -> ST s Int
dec a
w
        go :: Int -> Int -> ST s (PrimArray Int)
go !Int
i !Int
j
            | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
l    = MArr PrimArray s Int -> ST s (PrimArray Int)
forall (arr :: * -> *) a (m :: * -> *) s.
(Arr arr a, PrimMonad m, PrimState m ~ s) =>
MArr arr s a -> m (arr a)
unsafeFreezeArr MutablePrimArray s Int
MArr PrimArray s Int
ma
            | Bool
otherwise = do
                let !w :: a
w = IArray v a -> Int -> a
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
indexArr IArray v a
arr (Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)
                Int
j' <- a -> Int -> ST s Int
dec a
w Int
j
                if Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
l Bool -> Bool -> Bool
&& IArray v a -> Int -> a
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
indexArr IArray v a
arr (Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
j') a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== IArray v a -> Int -> a
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
indexArr IArray v a
arr (Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
i)
                    then MArr PrimArray s Int -> Int -> ST s Int
forall (arr :: * -> *) a (m :: * -> *) s.
(Arr arr a, PrimMonad m, PrimState m ~ s) =>
MArr arr s a -> Int -> m a
readArr MutablePrimArray s Int
MArr PrimArray s Int
ma Int
j' ST s Int -> (Int -> ST s ()) -> ST s ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= MArr PrimArray s Int -> Int -> Int -> ST s ()
forall (arr :: * -> *) a (m :: * -> *) s.
(Arr arr a, PrimMonad m, PrimState m ~ s) =>
MArr arr s a -> Int -> a -> m ()
writeArr MutablePrimArray s Int
MArr PrimArray s Int
ma Int
i
                    else MArr PrimArray s Int -> Int -> Int -> ST s ()
forall (arr :: * -> *) a (m :: * -> *) s.
(Arr arr a, PrimMonad m, PrimState m ~ s) =>
MArr arr s a -> Int -> a -> m ()
writeArr MutablePrimArray s Int
MArr PrimArray s Int
ma Int
i Int
j'
                Int -> Int -> ST s (PrimArray Int)
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
j'
    Int -> Int -> ST s (PrimArray Int)
go Int
1 (-Int
1))

-- | /O(m)/ Calculate a simple bloom filter for simplified sunday's rule.
--
-- The shifting rules is: when a mismatch between @needle[j]@ and @haystack[i]@
-- is found, check if @elemSundayBloom bloom haystack[i+n-j]@, where n is the
-- length of needle, if not then next search can be safely continued with
-- @haystack[i+n-j+1]@ and @needle[0]@, otherwise next searh should continue with
-- @haystack[i]@ and @needle[0]@, or fallback to other shifting rules such as KMP.
--
-- The algorithm is very simple: for a given 'Word8' @w@, we set the bloom's bit
-- at @unsafeShiftL 0x01 (w .&. 0x3f)@, so there're three false positives per bit.
-- This's particularly suitable for search UTF-8 bytes since the significant bits
-- of a beginning byte is usually the same.
sundayBloom :: Bytes -> Word64
{-# INLINE sundayBloom #-}
sundayBloom :: Bytes -> Word64
sundayBloom (Vec IArray PrimVector Word8
arr Int
s Int
l) = Word64 -> Int -> Word64
go Word64
0x00000000 Int
s
  where
    !end :: Int
end = Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
l
    go :: Word64 -> Int -> Word64
go !Word64
b !Int
i
        | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end  = Word64
b
        | Bool
otherwise =
            let !w :: Word8
w = PrimArray Word8 -> Int -> Word8
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
indexArr PrimArray Word8
IArray PrimVector Word8
arr Int
i
                !b' :: Word64
b' = Word64
b Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word64
0x00000001 Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftL` (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3f))
            in Word64 -> Int -> Word64
go Word64
b' (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)

-- | O(1) Test if a bloom filter contain a certain 'Word8'.
--
elemSundayBloom :: Word64 -> Word8 -> Bool
{-# INLINE elemSundayBloom #-}
elemSundayBloom :: Word64 -> Word8 -> Bool
elemSundayBloom Word64
b Word8
w = Word64
b Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.&. (Word64
0x01 Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftL` (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3f)) Word64 -> Word64 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word64
0