{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE TypeApplications #-}
module Std.Data.Text.Extra (
cons, snoc
, uncons, unsnoc
, headMaybe, tailMayEmpty
, lastMaybe, initMayEmpty
, inits, tails
, take, drop, takeR, dropR
, slice
, splitAt
, takeWhile, takeWhileR, dropWhile, dropWhileR, dropAround
, break, span
, breakR, spanR, breakOn
, breakOnAll, breakOnAllOverlapping
, group, groupBy
, stripPrefix, stripSuffix
, split, splitWith, splitOn
, isPrefixOf, isSuffixOf, isInfixOf
, commonPrefix
, words, lines, unwords, unlines
, padLeft, padRight
, reverse
, intersperse
, intercalate
, intercalateElem
, transpose
) where
import Data.Primitive.PrimArray
import qualified Std.Data.Vector.Base as V
import qualified Std.Data.Vector.Extra as V
import qualified Std.Data.Vector.Search as V
import Data.Coerce
import qualified Data.List as List
import Std.Data.Text.Base
import Std.Data.Text.UTF8Codec
import Std.Data.Text.Search
import Control.Monad.ST
import GHC.Stack
import Data.Char
import Data.Word
import Prelude hiding (concat, concatMap,
elem, notElem, null, length, map,
foldl, foldl1, foldr, foldr1,
maximum, minimum, product, sum,
all, any, replicate, traverse,
take, drop, splitAt,
takeWhile, dropWhile,
break, span, reverse,
words, lines, unwords, unlines)
cons :: Char -> Text -> Text
{-# INLINABLE cons #-}
cons c (Text (V.PrimVector ba s l)) = Text (V.createN (4 + l) (\ mba -> do
i <- encodeChar mba 0 c
copyPrimArray mba i ba s l
return $! i + l))
snoc :: Text -> Char -> Text
{-# INLINABLE snoc #-}
snoc (Text (V.PrimVector ba s l)) c = Text (V.createN (4 + l) (\ mba -> do
copyPrimArray mba 0 ba s l
encodeChar mba l c))
uncons :: Text -> Maybe (Char, Text)
{-# INLINE uncons #-}
uncons (Text (V.PrimVector ba s l))
| l == 0 = Nothing
| otherwise =
let (# c, i #) = decodeChar ba s
in Just (c, Text (V.PrimVector ba (s+i) (l-i)))
unsnoc :: Text -> Maybe (Text, Char)
{-# INLINE unsnoc #-}
unsnoc (Text (V.PrimVector ba s l))
| l == 0 = Nothing
| otherwise =
let (# c, i #) = decodeCharReverse ba (s + l - 1)
in Just (Text (V.PrimVector ba s (l-i)), c)
headMaybe :: Text -> Maybe Char
{-# INLINABLE headMaybe #-}
headMaybe t = case uncons t of { Just (c, _) -> Just c; _ -> Nothing }
tailMayEmpty :: Text -> Text
{-# INLINABLE tailMayEmpty #-}
tailMayEmpty t = case uncons t of { Nothing -> empty; Just (_, t) -> t }
lastMaybe :: Text -> Maybe Char
{-# INLINABLE lastMaybe #-}
lastMaybe t = case unsnoc t of { Just (_, c) -> Just c; _ -> Nothing }
initMayEmpty :: Text -> Text
{-# INLINABLE initMayEmpty #-}
initMayEmpty t = case unsnoc t of { Just (t, _) -> t; _ -> empty }
inits :: Text -> [Text]
{-# INLINABLE inits #-}
inits t = go t [t]
where go t acc = case unsnoc t of Just (t', _) -> go t' (t':acc)
Nothing -> acc
tails :: Text -> [Text]
{-# INLINABLE tails #-}
tails t = t : case uncons t of Just (_, t') -> tails t'
Nothing -> []
take :: Int -> Text -> Text
{-# INLINABLE take #-}
take n t@(Text (V.PrimVector ba s l))
| n <= 0 = empty
| otherwise = case charByteIndex t n of i -> Text (V.PrimVector ba s (i-s))
drop :: Int -> Text -> Text
{-# INLINABLE drop #-}
drop n t@(Text (V.PrimVector ba s l))
| n <= 0 = t
| otherwise = case charByteIndex t n of i -> Text (V.PrimVector ba i (l+s-i))
takeR :: Int -> Text -> Text
{-# INLINABLE takeR #-}
takeR n t@(Text (V.PrimVector ba s l))
| n <= 0 = empty
| otherwise = case charByteIndexR t n of i -> Text (V.PrimVector ba (i+1) (s+l-1-i))
dropR :: Int -> Text -> Text
{-# INLINABLE dropR #-}
dropR n t@(Text (V.PrimVector ba s l))
| n <= 0 = t
| otherwise = case charByteIndexR t n of i -> Text (V.PrimVector ba s (i-s+1))
slice :: Int -> Int -> Text -> Text
{-# INLINE slice #-}
slice x y t | y <= 0 = empty
| end <= 0 = empty
| x <= 0 = take end t
| otherwise = take y (drop x t)
where
!end = x + y
splitAt :: Int -> Text -> (Text, Text)
{-# INLINE splitAt #-}
splitAt n t@(Text (V.PrimVector ba s l))
| n <= 0 = (empty, t)
| otherwise = case charByteIndex t n of
i -> (Text (V.PrimVector ba s (i-s)), Text (V.PrimVector ba i (s+l-i)))
takeWhile :: (Char -> Bool) -> Text -> Text
{-# INLINE takeWhile #-}
takeWhile f t@(Text (V.PrimVector arr s l)) =
let !i = findIndex (not . f) t in Text (V.PrimVector arr s (i-s))
takeWhileR :: (Char -> Bool) -> Text -> Text
{-# INLINE takeWhileR #-}
takeWhileR f t@(Text (V.PrimVector arr s l)) =
let !i = findIndexR (not . f) t in Text (V.PrimVector arr (i+1) (s+l-i-1))
dropWhile :: (Char -> Bool) -> Text -> Text
{-# INLINE dropWhile #-}
dropWhile f t@(Text (V.PrimVector arr s l)) =
let !i = findIndex (not . f) t in Text (V.PrimVector arr i (s+l-i))
dropWhileR :: (Char -> Bool) -> Text -> Text
{-# INLINE dropWhileR #-}
dropWhileR f t@(Text (V.PrimVector arr s l)) =
let !i = findIndexR (not . f) t in Text (V.PrimVector arr s (i-s+1))
dropAround :: (Char -> Bool) -> Text -> Text
{-# INLINE dropAround #-}
dropAround f = dropWhileR f . dropWhile f
break :: (Char -> Bool) -> Text -> (Text, Text)
{-# INLINE break #-}
break f t@(Text (V.PrimVector arr s l)) =
let !i = findIndex f t
in (Text (V.PrimVector arr s (i-s)), Text (V.PrimVector arr i (s+l-i)))
span :: (Char -> Bool) -> Text -> (Text, Text)
{-# INLINE span #-}
span f t@(Text (V.PrimVector arr s l)) =
let !i = findIndex (not . f) t
in (Text (V.PrimVector arr s (i-s)), Text (V.PrimVector arr i (s+l-i)))
breakR :: (Char -> Bool) -> Text -> (Text, Text)
{-# INLINE breakR #-}
breakR f t@(Text (V.PrimVector arr s l)) =
let !i = findIndexR f t
in (Text (V.PrimVector arr s (i-s+1)), Text (V.PrimVector arr (i+1) (s+l-i-1)))
spanR :: (Char -> Bool) -> Text -> (Text, Text)
{-# INLINE spanR #-}
spanR f t@(Text (V.PrimVector arr s l)) =
let !i = findIndexR (not . f) t
in (Text (V.PrimVector arr s (i-s+1)), Text (V.PrimVector arr (i+1) (s+l-i-1)))
breakOn :: Text -> Text -> (Text, Text)
{-# INLINE breakOn #-}
breakOn (Text needle) (Text haystack) =
case V.breakOn needle haystack of (v1, v2) -> (Text v1, Text v2)
breakOnAll :: Text
-> Text
-> [(Text, Text)]
{-# INLINE breakOnAll #-}
breakOnAll (Text needle) (Text haystack@(V.PrimVector arr s l)) =
List.map breaker (V.indices needle haystack False)
where
breaker i = (Text (V.PrimVector arr s (i-s)), Text (V.PrimVector arr i (s+l-i)))
breakOnAllOverlapping :: Text -> Text -> [(Text, Text)]
{-# INLINE breakOnAllOverlapping #-}
breakOnAllOverlapping (Text needle) (Text haystack@(V.PrimVector arr s l)) =
List.map breaker (V.indicesOverlapping needle haystack False)
where
breaker i = (Text (V.PrimVector arr s (i-s)), Text (V.PrimVector arr i (s+l-i)))
group :: Text -> [Text]
{-# INLINE group #-}
group = groupBy (==)
groupBy :: (Char -> Char -> Bool) -> Text -> [Text]
{-# INLINE groupBy #-}
groupBy f (Text (V.PrimVector arr s l))
| l == 0 = []
| otherwise = Text (V.PrimVector arr s (s'-s)) : groupBy f (Text (V.PrimVector arr s' (l+s-s')))
where
(# c0, s0 #) = decodeChar arr s
end = s + l
s' = go arr (s+s0)
go arr !i
| i >= end = i
| otherwise = let (# c1, s1 #) = decodeChar arr i
in if f c0 c1 then go arr (i+s1) else i
stripPrefix :: Text -> Text -> Maybe Text
{-# INLINE stripPrefix #-}
stripPrefix = coerce (V.stripPrefix @V.PrimVector @Word8)
stripSuffix :: Text -> Text -> Maybe Text
{-# INLINE stripSuffix #-}
stripSuffix = coerce (V.stripSuffix @V.PrimVector @Word8)
split :: Char -> Text -> [Text]
{-# INLINE split #-}
split x = splitWith (==x)
splitWith :: (Char -> Bool) -> Text -> [Text]
{-# INLINE splitWith #-}
splitWith f (Text (V.PrimVector arr s l)) = go s s
where
!end = s + l
go !p !q | q >= end = let !v = V.PrimVector arr p (q-p) in [Text v]
| f c = let !v = V.PrimVector arr p (q-p) in Text v:go (q+n) (q+n)
| otherwise = go p (q+n)
where (# c, n #) = decodeChar arr q
splitOn :: Text -> Text -> [Text]
{-# INLINE splitOn #-}
splitOn = coerce (V.splitOn @V.PrimVector @Word8)
isPrefixOf :: Text -> Text -> Bool
{-# INLINE isPrefixOf #-}
isPrefixOf = coerce (V.isPrefixOf @V.PrimVector @Word8)
isSuffixOf :: Text -> Text -> Bool
{-# INLINE isSuffixOf #-}
isSuffixOf = coerce (V.isSuffixOf @V.PrimVector @Word8)
isInfixOf :: Text -> Text -> Bool
{-# INLINE isInfixOf #-}
isInfixOf = coerce (V.isInfixOf @V.PrimVector @Word8)
commonPrefix :: Text -> Text -> (Text, Text, Text)
{-# INLINE commonPrefix #-}
commonPrefix = coerce (V.commonPrefix @V.PrimVector @Word8)
words :: Text -> [Text]
{-# INLINE words #-}
words (Text (V.PrimVector arr s l)) = go s s
where
!end = s + l
go !s' !i | i >= end =
if s' == end
then []
else let !v = V.PrimVector arr s' (end-s') in [Text v]
| otherwise =
let (# c, n #) = decodeChar arr i
in if isSpace c
then if s' == i
then go (i+n) (i+n)
else let !v = V.PrimVector arr s' (i-s') in Text v : go (i+n) (i+n)
else go s' (i+n)
lines :: Text -> [Text]
{-# INLINE lines #-}
lines = coerce V.lines
unwords :: [Text] -> Text
{-# INLINE unwords #-}
unwords = coerce V.unwords
unlines :: [Text] -> Text
{-# INLINE unlines #-}
unlines = coerce V.unlines
padLeft :: Int -> Char -> Text -> Text
{-# INLINE padLeft #-}
padLeft n c t@(Text (V.PrimVector arr s l))
| n <= tsiz = t
| otherwise =
let psiz = (n-tsiz)*csiz
siz = psiz + l
in Text (V.create siz (\ marr -> do
encodeChar marr 0 c
go marr csiz psiz
copyPrimArray marr (siz-l) arr s l))
where
tsiz = length t
csiz = encodeCharLength c
go marr s psiz
| s >= psiz = return ()
| otherwise = copyChar' csiz marr s marr (s-csiz) >> go marr (s+csiz) psiz
padRight :: Int -> Char -> Text -> Text
{-# INLINE padRight #-}
padRight n c t@(Text (V.PrimVector arr s l))
| n <= tsiz = t
| otherwise =
let psiz = (n-tsiz)*csiz
siz = psiz + l
in Text (V.create siz (\ marr -> do
copyPrimArray marr 0 arr s l
encodeChar marr l c
go marr (l+csiz) siz))
where
tsiz = length t
csiz = encodeCharLength c
go marr s siz
| s >= siz = return ()
| otherwise = copyChar' csiz marr s marr (s-csiz) >> go marr (s+csiz) siz
intersperse :: Char -> Text -> Text
{-# INLINE intersperse #-}
intersperse c = \ t@(Text (V.PrimVector ba s l)) ->
let tlen = length t
in if length t < 2
then t
else (runST (do
mbaC <- newPrimArray 4
clen <- encodeChar mbaC 0 c
shrinkMutablePrimArray mbaC clen
baC <- unsafeFreezePrimArray mbaC
let e = decodeCharLenReverse ba (s+l-1)
return . Text $ V.create (l + (tlen-1) * clen) (go baC ba s 0 (s+l-e))
))
where
go :: PrimArray Word8
-> PrimArray Word8
-> Int
-> Int
-> Int
-> MutablePrimArray s Word8
-> ST s ()
go !baC !ba !i !j !end !mba
| i >= end = do
let l = decodeCharLen ba i
copyChar l mba j ba i
| otherwise = do
let l = decodeCharLen ba i
copyChar l mba j ba i
let i' = i + l
j' = j + l
let clen = sizeofPrimArray baC
copyChar clen mba j' baC 0
go baC ba i' (j'+clen) end mba
reverse :: Text -> Text
{-# INLINE reverse #-}
reverse = \ (Text (V.PrimVector ba s l)) -> Text $ V.create l (go ba s l (s+l))
where
go :: PrimArray Word8 -> Int -> Int -> Int -> MutablePrimArray s Word8 -> ST s ()
go !ba !i !j !end !mba
| i >= end = return ()
| otherwise = do
let l = decodeCharLen ba i
j' = j - l
copyChar l mba j' ba i
go ba (i+l) j' end mba
intercalate :: Text -> [Text] -> Text
{-# INLINE intercalate #-}
intercalate s = concat . List.intersperse s
intercalateElem :: Char -> [Text] -> Text
{-# INLINE intercalateElem #-}
intercalateElem c = concat . List.intersperse (singleton c)
transpose :: [Text] -> [Text]
{-# INLINE transpose #-}
transpose ts = List.map pack . List.transpose . List.map unpack $ ts