module Z.Data.Text.Extra (
cons, snoc
, uncons, unsnoc
, headMaybe, tailMayEmpty, lastMaybe, initMayEmpty
, head, tail, last, init
, inits, tails
, take, drop, takeR, dropR
, slice
, splitAt
, takeWhile, takeWhileR, dropWhile, dropWhileR, dropAround
, break, span
, breakR, spanR, breakOn, breakOnAll
, group, groupBy
, stripPrefix, stripSuffix
, split, splitWith, splitOn
, isPrefixOf, isSuffixOf, isInfixOf
, commonPrefix
, words, lines, unwords, unlines
, padLeft, padRight
, reverse
, intersperse
, intercalate
, intercalateElem
, transpose
) where
import Data.Primitive.PrimArray
import qualified Z.Data.Vector.Base as V
import qualified Z.Data.Vector.Extra as V
import qualified Z.Data.Vector.Search as V
import Data.Coerce
import qualified Data.List as List
import Z.Data.Text.Base
import Z.Data.Text.UTF8Codec
import Z.Data.Text.Search
import Control.Monad.ST
import Data.Char
import Data.Word
import Prelude hiding (concat, concatMap,
elem, notElem, null, length, map,
foldl, foldl1, foldr, foldr1,
maximum, minimum, product, sum,
all, any, replicate, traverse,
head, tail, init, last,
take, drop, splitAt,
takeWhile, dropWhile,
break, span, reverse,
words, lines, unwords, unlines)
cons :: Char -> Text -> Text
{-# INLINABLE cons #-}
cons c (Text (V.PrimVector ba s l)) = Text (V.createN (4 + l) (\ mba -> do
i <- encodeChar mba 0 c
copyPrimArray mba i ba s l
return $! i + l))
snoc :: Text -> Char -> Text
{-# INLINABLE snoc #-}
snoc (Text (V.PrimVector ba s l)) c = Text (V.createN (4 + l) (\ mba -> do
copyPrimArray mba 0 ba s l
encodeChar mba l c))
uncons :: Text -> Maybe (Char, Text)
{-# INLINE uncons #-}
uncons (Text (V.PrimVector ba s l))
| l == 0 = Nothing
| otherwise =
let (# c, i #) = decodeChar ba s
in Just (c, Text (V.PrimVector ba (s+i) (l-i)))
unsnoc :: Text -> Maybe (Text, Char)
{-# INLINE unsnoc #-}
unsnoc (Text (V.PrimVector ba s l))
| l == 0 = Nothing
| otherwise =
let (# c, i #) = decodeCharReverse ba (s + l - 1)
in Just (Text (V.PrimVector ba s (l-i)), c)
head :: Text -> Char
{-# INLINABLE head #-}
head t = case uncons t of { Just (c, _) -> c; _ -> errorEmptyText }
tail :: Text -> Text
{-# INLINABLE tail #-}
tail t = case uncons t of { Nothing -> errorEmptyText; Just (_, t') -> t' }
last :: Text -> Char
{-# INLINABLE last #-}
last t = case unsnoc t of { Just (_, c) -> c; _ -> errorEmptyText }
init :: Text -> Text
{-# INLINABLE init #-}
init t = case unsnoc t of { Just (t', _) -> t'; _ -> errorEmptyText }
headMaybe :: Text -> Maybe Char
{-# INLINABLE headMaybe #-}
headMaybe t = case uncons t of { Just (c, _) -> Just c; _ -> Nothing }
tailMayEmpty :: Text -> Text
{-# INLINABLE tailMayEmpty #-}
tailMayEmpty t = case uncons t of { Nothing -> empty; Just (_, t') -> t' }
lastMaybe :: Text -> Maybe Char
{-# INLINABLE lastMaybe #-}
lastMaybe t = case unsnoc t of { Just (_, c) -> Just c; _ -> Nothing }
initMayEmpty :: Text -> Text
{-# INLINABLE initMayEmpty #-}
initMayEmpty t = case unsnoc t of { Just (t', _) -> t'; _ -> empty }
inits :: Text -> [Text]
{-# INLINABLE inits #-}
inits t0 = go t0 [t0]
where go t acc = case unsnoc t of Just (t', _) -> go t' (t':acc)
Nothing -> acc
tails :: Text -> [Text]
{-# INLINABLE tails #-}
tails t = t : case uncons t of Just (_, t') -> tails t'
Nothing -> []
take :: Int -> Text -> Text
{-# INLINABLE take #-}
take n t@(Text (V.PrimVector ba s _))
| n <= 0 = empty
| otherwise = case charByteIndex t n of i -> Text (V.PrimVector ba s (i-s))
drop :: Int -> Text -> Text
{-# INLINABLE drop #-}
drop n t@(Text (V.PrimVector ba s l))
| n <= 0 = t
| otherwise = case charByteIndex t n of i -> Text (V.PrimVector ba i (l+s-i))
takeR :: Int -> Text -> Text
{-# INLINABLE takeR #-}
takeR n t@(Text (V.PrimVector ba s l))
| n <= 0 = empty
| otherwise = case charByteIndexR t n of i -> Text (V.PrimVector ba (i+1) (s+l-1-i))
dropR :: Int -> Text -> Text
{-# INLINABLE dropR #-}
dropR n t@(Text (V.PrimVector ba s _))
| n <= 0 = t
| otherwise = case charByteIndexR t n of i -> Text (V.PrimVector ba s (i-s+1))
slice :: Int -> Int -> Text -> Text
{-# INLINE slice #-}
slice x y t | y <= 0 = empty
| end <= 0 = empty
| x <= 0 = take end t
| otherwise = take y (drop x t)
where
!end = x + y
splitAt :: Int -> Text -> (Text, Text)
{-# INLINE splitAt #-}
splitAt n t@(Text (V.PrimVector ba s l))
| n <= 0 = (empty, t)
| otherwise = case charByteIndex t n of
i -> (Text (V.PrimVector ba s (i-s)), Text (V.PrimVector ba i (s+l-i)))
takeWhile :: (Char -> Bool) -> Text -> Text
{-# INLINE takeWhile #-}
takeWhile f t@(Text (V.PrimVector arr s _)) =
let !i = findIndex (not . f) t in Text (V.PrimVector arr s (i-s))
takeWhileR :: (Char -> Bool) -> Text -> Text
{-# INLINE takeWhileR #-}
takeWhileR f t@(Text (V.PrimVector arr s l)) =
let !i = findIndexR (not . f) t in Text (V.PrimVector arr (i+1) (s+l-i-1))
dropWhile :: (Char -> Bool) -> Text -> Text
{-# INLINE dropWhile #-}
dropWhile f t@(Text (V.PrimVector arr s l)) =
let !i = findIndex (not . f) t in Text (V.PrimVector arr i (s+l-i))
dropWhileR :: (Char -> Bool) -> Text -> Text
{-# INLINE dropWhileR #-}
dropWhileR f t@(Text (V.PrimVector arr s _)) =
let !i = findIndexR (not . f) t in Text (V.PrimVector arr s (i-s+1))
dropAround :: (Char -> Bool) -> Text -> Text
{-# INLINE dropAround #-}
dropAround f = dropWhileR f . dropWhile f
break :: (Char -> Bool) -> Text -> (Text, Text)
{-# INLINE break #-}
break f t@(Text (V.PrimVector arr s l)) =
let !i = findIndex f t
in (Text (V.PrimVector arr s (i-s)), Text (V.PrimVector arr i (s+l-i)))
span :: (Char -> Bool) -> Text -> (Text, Text)
{-# INLINE span #-}
span f t@(Text (V.PrimVector arr s l)) =
let !i = findIndex (not . f) t
in (Text (V.PrimVector arr s (i-s)), Text (V.PrimVector arr i (s+l-i)))
breakR :: (Char -> Bool) -> Text -> (Text, Text)
{-# INLINE breakR #-}
breakR f t@(Text (V.PrimVector arr s l)) =
let !i = findIndexR f t
in (Text (V.PrimVector arr s (i-s+1)), Text (V.PrimVector arr (i+1) (s+l-i-1)))
spanR :: (Char -> Bool) -> Text -> (Text, Text)
{-# INLINE spanR #-}
spanR f t@(Text (V.PrimVector arr s l)) =
let !i = findIndexR (not . f) t
in (Text (V.PrimVector arr s (i-s+1)), Text (V.PrimVector arr (i+1) (s+l-i-1)))
breakOn :: Text -> Text -> (Text, Text)
{-# INLINE breakOn #-}
breakOn (Text needle) (Text haystack) =
case V.breakOn needle haystack of (v1, v2) -> (Text v1, Text v2)
breakOnAll :: Text
-> Text
-> [(Text, Text)]
{-# INLINE breakOnAll #-}
breakOnAll (Text needle) (Text haystack@(V.PrimVector arr s l)) =
List.map breaker (V.indices needle haystack False)
where
breaker i = (Text (V.PrimVector arr s (i-s)), Text (V.PrimVector arr i (s+l-i)))
group :: Text -> [Text]
{-# INLINE group #-}
group = groupBy (==)
groupBy :: (Char -> Char -> Bool) -> Text -> [Text]
{-# INLINE groupBy #-}
groupBy f (Text (V.PrimVector arr s l))
| l == 0 = []
| otherwise = Text (V.PrimVector arr s (s'-s)) : groupBy f (Text (V.PrimVector arr s' (l+s-s')))
where
(# c0, s0 #) = decodeChar arr s
end = s + l
s' = go arr (s+s0)
go arr' !i
| i >= end = i
| otherwise = let (# c1, s1 #) = decodeChar arr' i
in if f c0 c1 then go arr' (i+s1) else i
stripPrefix :: Text -> Text -> Maybe Text
{-# INLINE stripPrefix #-}
stripPrefix = coerce (V.stripPrefix @V.PrimVector @Word8)
stripSuffix :: Text -> Text -> Maybe Text
{-# INLINE stripSuffix #-}
stripSuffix = coerce (V.stripSuffix @V.PrimVector @Word8)
split :: Char -> Text -> [Text]
{-# INLINE split #-}
split x = splitWith (==x)
splitWith :: (Char -> Bool) -> Text -> [Text]
{-# INLINE splitWith #-}
splitWith f (Text (V.PrimVector arr s l)) = go s s
where
!end = s + l
go !p !q | q >= end = let !v = V.PrimVector arr p (q-p) in [Text v]
| f c = let !v = V.PrimVector arr p (q-p) in Text v:go (q+n) (q+n)
| otherwise = go p (q+n)
where (# c, n #) = decodeChar arr q
splitOn :: Text -> Text -> [Text]
{-# INLINE splitOn #-}
splitOn = coerce (V.splitOn @V.PrimVector @Word8)
isPrefixOf :: Text -> Text -> Bool
{-# INLINE isPrefixOf #-}
isPrefixOf = coerce (V.isPrefixOf @V.PrimVector @Word8)
isSuffixOf :: Text -> Text -> Bool
{-# INLINE isSuffixOf #-}
isSuffixOf = coerce (V.isSuffixOf @V.PrimVector @Word8)
isInfixOf :: Text -> Text -> Bool
{-# INLINE isInfixOf #-}
isInfixOf = coerce (V.isInfixOf @V.PrimVector @Word8)
commonPrefix :: Text -> Text -> (Text, Text, Text)
{-# INLINE commonPrefix #-}
commonPrefix = coerce (V.commonPrefix @V.PrimVector @Word8)
words :: Text -> [Text]
{-# INLINE words #-}
words (Text (V.PrimVector arr s l)) = go s s
where
!end = s + l
go !s' !i | i >= end =
if s' == end
then []
else let !v = V.PrimVector arr s' (end-s') in [Text v]
| otherwise =
let (# c, n #) = decodeChar arr i
in if isSpace c
then if s' == i
then go (i+n) (i+n)
else let !v = V.PrimVector arr s' (i-s') in Text v : go (i+n) (i+n)
else go s' (i+n)
lines :: Text -> [Text]
{-# INLINE lines #-}
lines = coerce V.lines
unwords :: [Text] -> Text
{-# INLINE unwords #-}
unwords = coerce V.unwords
unlines :: [Text] -> Text
{-# INLINE unlines #-}
unlines = coerce V.unlines
padLeft :: Int -> Char -> Text -> Text
{-# INLINE padLeft #-}
padLeft n c t@(Text (V.PrimVector arr s l))
| n <= tsiz = t
| otherwise =
let psiz = (n-tsiz)*csiz
siz = psiz + l
in Text (V.create siz (\ marr -> do
_ <- encodeChar marr 0 c
go marr csiz psiz
copyPrimArray marr (siz-l) arr s l))
where
tsiz = length t
csiz = encodeCharLength c
go :: forall s. MutablePrimArray s Word8 -> Int -> Int -> ST s ()
go marr s' psiz
| s' >= psiz = return ()
| otherwise = copyChar' csiz marr s' marr (s'-csiz) >> go marr (s'+csiz) psiz
padRight :: Int -> Char -> Text -> Text
{-# INLINE padRight #-}
padRight n c t@(Text (V.PrimVector arr s l))
| n <= tsiz = t
| otherwise =
let psiz = (n-tsiz)*csiz
siz = psiz + l
in Text (V.create siz (\ marr -> do
copyPrimArray marr 0 arr s l
_ <- encodeChar marr l c
go marr (l+csiz) siz))
where
tsiz = length t
csiz = encodeCharLength c
go :: forall s. MutablePrimArray s Word8 -> Int -> Int -> ST s ()
go marr s' siz
| s' >= siz = return ()
| otherwise = copyChar' csiz marr s' marr (s'-csiz) >> go marr (s'+csiz) siz
intersperse :: Char -> Text -> Text
{-# INLINE intersperse #-}
intersperse c = \ t@(Text (V.PrimVector ba s l)) ->
let tlen = length t
in if length t < 2
then t
else (runST (do
mbaC <- newPrimArray 4
clen <- encodeChar mbaC 0 c
shrinkMutablePrimArray mbaC clen
baC <- unsafeFreezePrimArray mbaC
let e = decodeCharLenReverse ba (s+l-1)
return . Text $ V.create (l + (tlen-1) * clen) (go baC ba s 0 (s+l-e))
))
where
go :: PrimArray Word8
-> PrimArray Word8
-> Int
-> Int
-> Int
-> MutablePrimArray s Word8
-> ST s ()
go !baC !ba !i !j !end !mba
| i >= end = do
let l = decodeCharLen ba i
copyChar l mba j ba i
| otherwise = do
let l = decodeCharLen ba i
copyChar l mba j ba i
let i' = i + l
j' = j + l
let clen = sizeofPrimArray baC
copyChar clen mba j' baC 0
go baC ba i' (j'+clen) end mba
reverse :: Text -> Text
{-# INLINE reverse #-}
reverse = \ (Text (V.PrimVector ba s l)) -> Text $ V.create l (go ba s l (s+l))
where
go :: PrimArray Word8 -> Int -> Int -> Int -> MutablePrimArray s Word8 -> ST s ()
go !ba !i !j !end !mba
| i >= end = return ()
| otherwise = do
let l = decodeCharLen ba i
j' = j - l
copyChar l mba j' ba i
go ba (i+l) j' end mba
intercalate :: Text -> [Text] -> Text
{-# INLINE intercalate #-}
intercalate s = concat . List.intersperse s
intercalateElem :: Char -> [Text] -> Text
{-# INLINE intercalateElem #-}
intercalateElem c = concat . List.intersperse (singleton c)
transpose :: [Text] -> [Text]
{-# INLINE transpose #-}
transpose ts = List.map pack . List.transpose . List.map unpack $ ts