{-| Module : Z.Data.Text.Search Description : Searching text Copyright : (c) Dong Han, 2017-2018 License : BSD Maintainer : winterland1989@gmail.com Stability : experimental Portability : non-portable -} module Z.Data.Text.Search ( -- * element-wise search elem, notElem -- * Searching by equality , findIndices , find, findR , findIndex , findIndexR , filter, partition ) where import Control.Monad.ST import Data.Word import Prelude hiding (elem, notElem, filter) import Z.Data.Array import Z.Data.Text.Base import Z.Data.Text.UTF8Codec import qualified Z.Data.Vector.Base as V findIndices :: (Char -> Bool) -> Text -> [Int] {-# INLINE findIndices #-} findIndices f (Text (V.PrimVector arr s l)) = go 0 s where !end = s + l go !i !p | p >= end = [] | f x = i : go (i+1) (p+off) | otherwise = go (i+1) (p+off) where (# x, off #) = decodeChar arr p -- | /O(n)/ find the first char matching the predicate in a text -- from left to right, if there isn't one, return the index point to the end of the byte slice. find :: (Char -> Bool) -> Text -> (Int, Int, Maybe Char) -- ^ (char index, byte index in slice, matching char) {-# INLINE find #-} find f (Text (V.PrimVector arr s l)) = go 0 s where !end = s + l go !i !j | j >= end = (i, j, Nothing) | otherwise = let (# x, off #) = decodeChar arr j in if f x then (i, j, Just x) else go (i+1) (j+off) -- | /O(n)/ find the first char matching the predicate in a text -- from right to left, if there isn't one, return the index point to the start of the byte slice. -- findR :: (Char -> Bool) -> Text -> (Int, Int, Maybe Char) -- ^ (char index(counting backwards), byte index in slice, matching char) {-# INLINE findR #-} findR f (Text (V.PrimVector arr s l)) = go 0 (s+l-1) where go !i !j | j < s = (i, j, Nothing) | otherwise = let (# x, off #) = decodeCharReverse arr j in if f x then (i, j, Just x) else go (i+1) (j-off) -------------------------------------------------------------------------------- -- | /O(n)/ find the index of the byte slice. findIndex :: (Char -> Bool) -> Text -> Int {-# INLINE findIndex #-} findIndex f t = case find f t of (_, i, _) -> i -- | /O(n)/ find the index of the byte slice in reverse order. findIndexR :: (Char -> Bool) -> Text -> Int {-# INLINE findIndexR #-} findIndexR f t = case findR f t of (_, i, _) -> i -- | /O(n)/ 'filter', applied to a predicate and a text, -- returns a text containing those chars that satisfy the -- predicate. filter :: (Char -> Bool) -> Text -> Text {-# INLINE filter #-} filter f (Text (V.PrimVector arr s l)) = Text (V.createN l (go s 0)) where !end = s + l go :: Int -> Int -> MutablePrimArray s Word8 -> ST s Int go !i !j marr | i >= end = return j | otherwise = let (# x, off #) = decodeChar arr i in if f x then do copyChar off marr j arr i go (i+off) (j+off) marr else go (i+off) j marr -- | /O(n)/ The 'partition' function takes a predicate, a text, returns -- a pair of text with codepoints which do and do not satisfy the -- predicate, respectively; i.e., -- -- > partition p txt == (filter p txt, filter (not . p) txt) partition :: (Char -> Bool) -> Text -> (Text, Text) {-# INLINE partition #-} partition f (Text (V.PrimVector arr s l)) | l == 0 = (empty, empty) | otherwise = let !(bs1, bs2) = V.createN2 l l (go 0 0 s) in (Text bs1, Text bs2) where !end = s + l go :: Int -> Int -> Int -> MutablePrimArray s Word8 -> MutablePrimArray s Word8 -> ST s (Int, Int) go !i !j !p !mba0 !mba1 | p >= end = return (i, j) | otherwise = let (# x, off #) = decodeChar arr p in if f x then copyChar off mba0 i arr p >> go (i+off) j (p+off) mba0 mba1 else copyChar off mba1 j arr p >> go i (j+off) (p+off) mba0 mba1 -------------------------------------------------------------------------------- -- Searching by equality -- | /O(n)/ 'elem' test if given char is in given text. elem :: Char -> Text -> Bool {-# INLINE elem #-} elem x t = case find (x==) t of (_,_,Nothing) -> False _ -> True -- | /O(n)/ @not . elem@ notElem :: Char -> Text -> Bool {-# INLINE notElem #-} notElem x = not . elem x