{-|
Module      : Z.Data.Text.Search
Description : Searching text
Copyright   : (c) Dong Han, 2017-2018
License     : BSD
Maintainer  : winterland1989@gmail.com
Stability   : experimental
Portability : non-portable

-}

module Z.Data.Text.Search (
  -- * element-wise search
    elem, notElem
  -- * Searching by equality
  , findIndices
  , findBytesIndices
  , find, findR
  , findIndex
  , findIndexR
  , findBytesIndex
  , findBytesIndexR
  , filter, partition
  ) where


import           Control.Monad.ST
import           Data.Word
import           Prelude                 hiding (elem, notElem, filter)
import           Z.Data.Array
import           Z.Data.Text.Base
import           Z.Data.Text.UTF8Codec
import qualified Z.Data.Vector.Base    as V

-- | find all char index matching the predicate.
findIndices :: (Char -> Bool) -> Text -> [Int]
{-# INLINE findIndices #-}
findIndices :: (Char -> Bool) -> Text -> [Int]
findIndices Char -> Bool
f (Text (V.PrimVector PrimArray Word8
arr Int
s Int
l)) = Int -> Int -> [Int]
go Int
0 Int
s
  where
    !end :: Int
end = Int
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l
    go :: Int -> Int -> [Int]
go !Int
i !Int
p | Int
p Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end  = []
             | Char -> Bool
f Char
x       = Int
i Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: Int -> Int -> [Int]
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) (Int
pInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
off)
             | Bool
otherwise = Int -> Int -> [Int]
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) (Int
pInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
off)
        where (# Char
x, Int
off #) = PrimArray Word8 -> Int -> (# Char, Int #)
decodeChar PrimArray Word8
arr Int
p

-- | find all char's byte index matching the predicate.
findBytesIndices :: (Char -> Bool) -> Text -> [Int]
{-# INLINE findBytesIndices #-}
findBytesIndices :: (Char -> Bool) -> Text -> [Int]
findBytesIndices Char -> Bool
f (Text (V.PrimVector PrimArray Word8
arr Int
s Int
l)) = Int -> [Int]
go Int
s
  where
    !end :: Int
end = Int
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l
    go :: Int -> [Int]
go !Int
p | Int
p Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end  = []
          | Char -> Bool
f Char
x       = (Int
pInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
s) Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: Int -> [Int]
go (Int
pInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
off)
          | Bool
otherwise = Int -> [Int]
go (Int
pInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
off)
        where (# Char
x, Int
off #) = PrimArray Word8 -> Int -> (# Char, Int #)
decodeChar PrimArray Word8
arr Int
p

-- | /O(n)/ find the first char matching the predicate in a text
-- from left to right, if there isn't one, return the text length.
find :: (Char -> Bool)
     -> Text
     -> (Int, Maybe Char)  -- ^ (char index, matching char)
{-# INLINE find #-}
find :: (Char -> Bool) -> Text -> (Int, Maybe Char)
find Char -> Bool
f (Text (V.PrimVector PrimArray Word8
arr Int
s Int
l)) = Int -> Int -> (Int, Maybe Char)
go Int
0 Int
s
  where
    !end :: Int
end = Int
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l
    go :: Int -> Int -> (Int, Maybe Char)
go !Int
i !Int
j | Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end  = (Int
i, Maybe Char
forall a. Maybe a
Nothing)
             | Bool
otherwise =
                let (# Char
x, Int
off #) = PrimArray Word8 -> Int -> (# Char, Int #)
decodeChar PrimArray Word8
arr Int
j
                in if Char -> Bool
f Char
x
                    then (Int
i, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
x)
                    else Int -> Int -> (Int, Maybe Char)
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
off)

-- | /O(n)/ find the first char matching the predicate in a text
-- from right to left.
--
findR :: (Char -> Bool)
      -> Text
      -> (Int, Maybe Char)  -- ^ (char index(counting backwards), matching char)
{-# INLINE findR #-}
findR :: (Char -> Bool) -> Text -> (Int, Maybe Char)
findR Char -> Bool
f (Text (V.PrimVector PrimArray Word8
arr Int
s Int
l)) = Int -> Int -> (Int, Maybe Char)
go Int
0 (Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)
  where
    go :: Int -> Int -> (Int, Maybe Char)
go !Int
i !Int
j | Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
s     = (Int
i, Maybe Char
forall a. Maybe a
Nothing)
             | Bool
otherwise =
                let (# Char
x, Int
off #) = PrimArray Word8 -> Int -> (# Char, Int #)
decodeCharReverse PrimArray Word8
arr Int
j
                in if Char -> Bool
f Char
x
                    then (Int
i, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
x)
                    else Int -> Int -> (Int, Maybe Char)
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
off)

--------------------------------------------------------------------------------

-- | /O(n)/ find the char index.
findIndex :: (Char -> Bool) -> Text -> Int
{-# INLINE findIndex #-}
findIndex :: (Char -> Bool) -> Text -> Int
findIndex Char -> Bool
f Text
t = case (Char -> Bool) -> Text -> (Int, Maybe Char)
find Char -> Bool
f Text
t of (Int
i, Maybe Char
_) -> Int
i

-- | /O(n)/ find the char index in reverse order.
findIndexR ::  (Char -> Bool) -> Text -> Int
{-# INLINE findIndexR #-}
findIndexR :: (Char -> Bool) -> Text -> Int
findIndexR Char -> Bool
f Text
t = case (Char -> Bool) -> Text -> (Int, Maybe Char)
findR Char -> Bool
f Text
t of (Int
i, Maybe Char
_) -> Int
i

-- | /O(n)/ find the char's byte slice index.
findBytesIndex :: (Char -> Bool) -> Text -> Int
{-# INLINE findBytesIndex #-}
findBytesIndex :: (Char -> Bool) -> Text -> Int
findBytesIndex Char -> Bool
f (Text (V.PrimVector PrimArray Word8
arr Int
s Int
l)) = Int -> Int
go Int
s
  where
    !end :: Int
end = Int
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l
    go :: Int -> Int
go !Int
j | Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end  = Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
s
          | Bool
otherwise =
              let (# Char
x, Int
off #) = PrimArray Word8 -> Int -> (# Char, Int #)
decodeChar PrimArray Word8
arr Int
j
              in if Char -> Bool
f Char
x
                  then Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
s
                  else Int -> Int
go (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
off)

-- | /O(n)/ find the char's byte slice index in reverse order(pointing to the right char's first byte).
findBytesIndexR ::  (Char -> Bool) -> Text -> Int
{-# INLINE findBytesIndexR #-}
findBytesIndexR :: (Char -> Bool) -> Text -> Int
findBytesIndexR Char -> Bool
f (Text (V.PrimVector PrimArray Word8
arr Int
s Int
l)) = Int -> Int
go (Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)
  where
    go :: Int -> Int
go !Int
j | Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
s     = Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1
          | Bool
otherwise =
              let (# Char
x, Int
off #) = PrimArray Word8 -> Int -> (# Char, Int #)
decodeCharReverse PrimArray Word8
arr Int
j
              in if Char -> Bool
f Char
x
                  then Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1
                  else Int -> Int
go (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
off)

-- | /O(n)/ 'filter', applied to a predicate and a text,
-- returns a text containing those chars that satisfy the
-- predicate.
filter :: (Char -> Bool) -> Text -> Text
{-# INLINE filter #-}
filter :: (Char -> Bool) -> Text -> Text
filter Char -> Bool
f (Text (V.PrimVector PrimArray Word8
arr Int
s Int
l)) = PrimVector Word8 -> Text
Text (Int
-> (forall s. MArr (IArray PrimVector) s Word8 -> ST s Int)
-> PrimVector Word8
forall (v :: * -> *) a.
(Vec v a, HasCallStack) =>
Int -> (forall s. MArr (IArray v) s a -> ST s Int) -> v a
V.createN Int
l (Int -> Int -> MutablePrimArray s Word8 -> ST s Int
forall s. Int -> Int -> MutablePrimArray s Word8 -> ST s Int
go Int
s Int
0))
  where
    !end :: Int
end = Int
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l
    go :: Int -> Int -> MutablePrimArray s Word8 -> ST s Int
    go :: Int -> Int -> MutablePrimArray s Word8 -> ST s Int
go !Int
i !Int
j MutablePrimArray s Word8
marr
        | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end = Int -> ST s Int
forall (m :: * -> *) a. Monad m => a -> m a
return Int
j
        | Bool
otherwise =
            let (# Char
x, Int
off #) = PrimArray Word8 -> Int -> (# Char, Int #)
decodeChar PrimArray Word8
arr Int
i
            in if Char -> Bool
f Char
x
                then do
                    Int
-> MutablePrimArray s Word8
-> Int
-> PrimArray Word8
-> Int
-> ST s ()
forall s.
Int
-> MutablePrimArray s Word8
-> Int
-> PrimArray Word8
-> Int
-> ST s ()
copyChar Int
off MutablePrimArray s Word8
marr Int
j PrimArray Word8
arr Int
i
                    Int -> Int -> MutablePrimArray s Word8 -> ST s Int
forall s. Int -> Int -> MutablePrimArray s Word8 -> ST s Int
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
off) (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
off) MutablePrimArray s Word8
marr
                else Int -> Int -> MutablePrimArray s Word8 -> ST s Int
forall s. Int -> Int -> MutablePrimArray s Word8 -> ST s Int
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
off) Int
j MutablePrimArray s Word8
marr

-- | /O(n)/ The 'partition' function takes a predicate, a text, returns
-- a pair of text with codepoints which do and do not satisfy the
-- predicate, respectively; i.e.,
--
-- > partition p txt == (filter p txt, filter (not . p) txt)
partition :: (Char -> Bool) -> Text -> (Text, Text)
{-# INLINE partition #-}
partition :: (Char -> Bool) -> Text -> (Text, Text)
partition Char -> Bool
f (Text (V.PrimVector PrimArray Word8
arr Int
s Int
l))
    | Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0    = (Text
empty, Text
empty)
    | Bool
otherwise = let !(PrimVector Word8
bs1, PrimVector Word8
bs2) = Int
-> Int
-> (forall s.
    MArr (IArray PrimVector) s Word8
    -> MArr (IArray PrimVector) s Word8 -> ST s (Int, Int))
-> (PrimVector Word8, PrimVector Word8)
forall (v :: * -> *) a (u :: * -> *) b.
(Vec v a, Vec u b, HasCallStack) =>
Int
-> Int
-> (forall s.
    MArr (IArray v) s a -> MArr (IArray u) s b -> ST s (Int, Int))
-> (v a, u b)
V.createN2 Int
l Int
l (Int
-> Int
-> Int
-> MutablePrimArray s Word8
-> MutablePrimArray s Word8
-> ST s (Int, Int)
forall s.
Int
-> Int
-> Int
-> MutablePrimArray s Word8
-> MutablePrimArray s Word8
-> ST s (Int, Int)
go Int
0 Int
0 Int
s) in (PrimVector Word8 -> Text
Text PrimVector Word8
bs1, PrimVector Word8 -> Text
Text PrimVector Word8
bs2)
  where
    !end :: Int
end = Int
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l
    go :: Int -> Int -> Int -> MutablePrimArray s Word8 -> MutablePrimArray s Word8 -> ST s (Int, Int)
    go :: Int
-> Int
-> Int
-> MutablePrimArray s Word8
-> MutablePrimArray s Word8
-> ST s (Int, Int)
go !Int
i !Int
j !Int
p !MutablePrimArray s Word8
mba0 !MutablePrimArray s Word8
mba1
        | Int
p Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end   = (Int, Int) -> ST s (Int, Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Int
j)
        | Bool
otherwise =
            let (# Char
x, Int
off #) = PrimArray Word8 -> Int -> (# Char, Int #)
decodeChar PrimArray Word8
arr Int
p
            in if Char -> Bool
f Char
x
                then Int
-> MutablePrimArray s Word8
-> Int
-> PrimArray Word8
-> Int
-> ST s ()
forall s.
Int
-> MutablePrimArray s Word8
-> Int
-> PrimArray Word8
-> Int
-> ST s ()
copyChar Int
off MutablePrimArray s Word8
mba0 Int
i PrimArray Word8
arr Int
p ST s () -> ST s (Int, Int) -> ST s (Int, Int)
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Int
-> Int
-> Int
-> MutablePrimArray s Word8
-> MutablePrimArray s Word8
-> ST s (Int, Int)
forall s.
Int
-> Int
-> Int
-> MutablePrimArray s Word8
-> MutablePrimArray s Word8
-> ST s (Int, Int)
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
off) Int
j (Int
pInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
off) MutablePrimArray s Word8
mba0 MutablePrimArray s Word8
mba1
                else Int
-> MutablePrimArray s Word8
-> Int
-> PrimArray Word8
-> Int
-> ST s ()
forall s.
Int
-> MutablePrimArray s Word8
-> Int
-> PrimArray Word8
-> Int
-> ST s ()
copyChar Int
off MutablePrimArray s Word8
mba1 Int
j PrimArray Word8
arr Int
p ST s () -> ST s (Int, Int) -> ST s (Int, Int)
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Int
-> Int
-> Int
-> MutablePrimArray s Word8
-> MutablePrimArray s Word8
-> ST s (Int, Int)
forall s.
Int
-> Int
-> Int
-> MutablePrimArray s Word8
-> MutablePrimArray s Word8
-> ST s (Int, Int)
go Int
i (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
off) (Int
pInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
off) MutablePrimArray s Word8
mba0 MutablePrimArray s Word8
mba1

--------------------------------------------------------------------------------
-- Searching by equality

-- | /O(n)/ 'elem' test if given char is in given text.
elem :: Char -> Text -> Bool
{-# INLINE elem #-}
elem :: Char -> Text -> Bool
elem Char
x Text
t = case (Char -> Bool) -> Text -> (Int, Maybe Char)
find (Char
xChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==) Text
t of (Int
_,Maybe Char
Nothing) -> Bool
False
                                (Int, Maybe Char)
_           -> Bool
True

-- | /O(n)/ @not . elem@
notElem ::  Char -> Text -> Bool
{-# INLINE notElem #-}
notElem :: Char -> Text -> Bool
notElem Char
x = Bool -> Bool
not (Bool -> Bool) -> (Text -> Bool) -> Text -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Text -> Bool
elem Char
x