{-# LANGUAGE MagicHash, UnboxedTuples #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE UnliftedFFITypes #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE CPP #-}

{-|
Module      : Z.Data.Text.Base
Description : Unicode text processing
Copyright   : (c) Dong Han, 2017-2018
License     : BSD
Maintainer  : winterland1989@gmail.com
Stability   : experimental
Portability : non-portable

A 'Text' wrap a 'Bytes' which will be interpreted using UTF-8 encoding. User should always use 'validate' to construt a 'Text' (instead of using construtor directly or coercing), otherwise illegal UTF-8 encoded codepoints will cause undefined behaviours.

-}

module Z.Data.Text.Base (
  -- * Text type
    Text(..)
  -- * Building text
  , validate
  , InvalidUTF8Exception(..)
  , validateMaybe
  , replicate
  , cycleN
  , indexMaybe, charByteIndex, indexMaybeR, charByteIndexR
  -- * Basic creating
  , empty, singleton, copy
  -- * Conversion between list
  , pack, packN, packR, packRN
  , unpack, unpackR
  -- * Conversion between codepoint vector
  , fromVector
  , toVector
  -- * Basic interface
  , null
  , length
  , append
  , map', imap'
  , foldl', ifoldl'
  , foldr', ifoldr'
  , concat, concatMap
    -- ** Special folds
  , count, all, any
    -- ** normalization
  , NormalizationResult(..), NormalizeMode(..)
  , isNormalized, isNormalizedTo, normalize, normalizeTo
    -- ** Case conversion
    -- $case
  , Locale, localeDefault, localeLithuanian, localeTurkishAndAzeriLatin
  , caseFold, caseFoldWith, toLower, toLowerWith, toUpper, toUpperWith, toTitle, toTitleWith
    -- ** Unicode category
  , isCategory, spanCategory
  , Category
  , categoryLetterUppercase
  , categoryLetterLowercase
  , categoryLetterTitlecase
  , categoryLetterOther
  , categoryLetter
  , categoryCaseMapped

  , categoryMarkNonSpacing
  , categoryMarkSpacing
  , categoryMarkEnclosing
  , categoryMark

  , categoryNumberDecimal
  , categoryNumberLetter
  , categoryNumberOther
  , categoryNumber

  , categoryPunctuationConnector
  , categoryPunctuationDash
  , categoryPunctuationOpen
  , categoryPunctuationClose
  , categoryPunctuationInitial
  , categoryPunctuationFinal
  , categoryPunctuationOther
  , categoryPunctuation

  , categorySymbolMath
  , categorySymbolCurrency
  , categorySymbolModifier
  , categorySymbolOther
  , categorySymbol

  , categorySeparatorSpace
  , categorySeparatorLine
  , categorySeparatorParagraph
  , categorySeparator
  , categoryControl
  , categoryFormat
  , categorySurrogate
  , categoryPrivateUse
  , categoryUnassigned
  , categoryCompatibility
  , categoryIgnoreGraphemeCluste
  , categoryIscntrl

  , categoryIsprint
  , categoryIsspace
  , categoryIsblank
  , categoryIsgraph
  , categoryIspunct
  , categoryIsalnum
  , categoryIsalpha
  , categoryIsupper
  , categoryIslower
  , categoryIsdigit
  , categoryIsxdigit
  -- * Misc
  , c_utf8_validate_ba
  , c_utf8_validate_addr
 ) where

import           Control.DeepSeq
import           Control.Exception
import           Control.Monad.ST
import           Control.Monad
import           Data.Bits
import           Data.Char          hiding (toLower, toUpper, toTitle)
import           Data.Foldable            (foldlM)
import           Data.Hashable            (Hashable(..))
import qualified Data.List                as List
import           Data.Primitive.PrimArray
import           Data.Typeable
import           Data.Word
import           Foreign.C.Types          (CSize(..))
import           GHC.Exts
import           GHC.Types
import           GHC.Stack
import           GHC.CString              (unpackCString#, unpackCStringUtf8#)
import           Z.Data.Array
import           Z.Data.Text.UTF8Codec
import           Z.Data.Text.UTF8Rewind
import           Z.Data.Vector.Base     (Bytes, PrimVector(..), c_strlen)
import qualified Z.Data.Vector.Base     as V
import qualified Z.Data.Vector.Search   as V
import           System.IO.Unsafe (unsafeDupablePerformIO)

import           Prelude                       hiding (concat, concatMap,
                                                elem, notElem, null, length, map,
                                                foldl, foldl1, foldr, foldr1,
                                                maximum, minimum, product, sum,
                                                all, any, replicate, traverse)

import           Test.QuickCheck.Arbitrary (Arbitrary(..), CoArbitrary(..))

-- | 'Text' represented as UTF-8 encoded 'Bytes'
--
newtype Text = Text
    { Text -> Bytes
getUTF8Bytes :: Bytes -- ^ Extract UTF-8 encoded 'Bytes' from 'Text'
    } deriving (b -> Text -> Text
NonEmpty Text -> Text
Text -> Text -> Text
(Text -> Text -> Text)
-> (NonEmpty Text -> Text)
-> (forall b. Integral b => b -> Text -> Text)
-> Semigroup Text
forall b. Integral b => b -> Text -> Text
forall a.
(a -> a -> a)
-> (NonEmpty a -> a)
-> (forall b. Integral b => b -> a -> a)
-> Semigroup a
stimes :: b -> Text -> Text
$cstimes :: forall b. Integral b => b -> Text -> Text
sconcat :: NonEmpty Text -> Text
$csconcat :: NonEmpty Text -> Text
<> :: Text -> Text -> Text
$c<> :: Text -> Text -> Text
Semigroup, Semigroup Text
Text
Semigroup Text
-> Text
-> (Text -> Text -> Text)
-> ([Text] -> Text)
-> Monoid Text
[Text] -> Text
Text -> Text -> Text
forall a.
Semigroup a -> a -> (a -> a -> a) -> ([a] -> a) -> Monoid a
mconcat :: [Text] -> Text
$cmconcat :: [Text] -> Text
mappend :: Text -> Text -> Text
$cmappend :: Text -> Text -> Text
mempty :: Text
$cmempty :: Text
$cp1Monoid :: Semigroup Text
Monoid, Typeable)

instance Eq Text where
    Text Bytes
b1 == :: Text -> Text -> Bool
== Text Bytes
b2 = Bytes
b1 Bytes -> Bytes -> Bool
forall a. Eq a => a -> a -> Bool
== Bytes
b2
    {-# INLINE (==) #-}

instance Ord Text where
    Text Bytes
b1 compare :: Text -> Text -> Ordering
`compare` Text Bytes
b2 = Bytes
b1 Bytes -> Bytes -> Ordering
forall a. Ord a => a -> a -> Ordering
`compare` Bytes
b2 -- UTF-8 encoding property
    {-# INLINE compare #-}

instance Show Text where
    showsPrec :: Int -> Text -> ShowS
showsPrec Int
p Text
t = Int -> String -> ShowS
forall a. Show a => Int -> a -> ShowS
showsPrec Int
p (Text -> String
unpack Text
t)

instance Read Text where
    readsPrec :: Int -> ReadS Text
readsPrec Int
p String
str = [ (String -> Text
pack String
x, String
y) | (String
x, String
y) <- Int -> ReadS String
forall a. Read a => Int -> ReadS a
readsPrec Int
p String
str ]

instance NFData Text where
    rnf :: Text -> ()
rnf (Text Bytes
bs) = Bytes -> ()
forall a. NFData a => a -> ()
rnf Bytes
bs

instance Arbitrary Text where
    arbitrary :: Gen Text
arbitrary = String -> Text
pack (String -> Text) -> Gen String -> Gen Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Gen String
forall a. Arbitrary a => Gen a
arbitrary
    shrink :: Text -> [Text]
shrink Text
a = String -> Text
pack (String -> Text) -> [String] -> [Text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> [String]
forall a. Arbitrary a => a -> [a]
shrink (Text -> String
unpack Text
a)

instance CoArbitrary Text where
    coarbitrary :: Text -> Gen b -> Gen b
coarbitrary = String -> Gen b -> Gen b
forall a b. CoArbitrary a => a -> Gen b -> Gen b
coarbitrary (String -> Gen b -> Gen b)
-> (Text -> String) -> Text -> Gen b -> Gen b
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
unpack

instance Hashable Text where
    {-# INLINE hashWithSalt #-}
    hashWithSalt :: Int -> Text -> Int
hashWithSalt Int
salt (Text Bytes
bs) = Int -> Bytes -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
salt Bytes
bs

instance IsString Text where
    {-# INLINE fromString #-}
    fromString :: String -> Text
fromString = String -> Text
pack

packASCIIAddr :: Addr# -> Text
packASCIIAddr :: Addr# -> Text
packASCIIAddr Addr#
addr0# = Addr# -> Text
go Addr#
addr0#
  where
    len :: Int
len = CSize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (CSize -> Int) -> (IO CSize -> CSize) -> IO CSize -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. IO CSize -> CSize
forall a. IO a -> a
unsafeDupablePerformIO (IO CSize -> Int) -> IO CSize -> Int
forall a b. (a -> b) -> a -> b
$ Addr# -> IO CSize
c_strlen Addr#
addr0#
    go :: Addr# -> Text
go Addr#
addr# = (forall s. ST s Text) -> Text
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s Text) -> Text) -> (forall s. ST s Text) -> Text
forall a b. (a -> b) -> a -> b
$ do
        MutablePrimArray s Word8
marr <- Int -> ST s (MutablePrimArray (PrimState (ST s)) Word8)
forall (m :: * -> *) a.
(PrimMonad m, Prim a) =>
Int -> m (MutablePrimArray (PrimState m) a)
newPrimArray Int
len
        MutablePrimArray (PrimState (ST s)) Word8
-> Int -> Ptr Word8 -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Prim a) =>
MutablePrimArray (PrimState m) a -> Int -> Ptr a -> Int -> m ()
copyPtrToMutablePrimArray MutablePrimArray s Word8
MutablePrimArray (PrimState (ST s)) Word8
marr Int
0 (Addr# -> Ptr Word8
forall a. Addr# -> Ptr a
Ptr Addr#
addr#) Int
len
        PrimArray Word8
arr <- MutablePrimArray (PrimState (ST s)) Word8 -> ST s (PrimArray Word8)
forall (m :: * -> *) a.
PrimMonad m =>
MutablePrimArray (PrimState m) a -> m (PrimArray a)
unsafeFreezePrimArray MutablePrimArray s Word8
MutablePrimArray (PrimState (ST s)) Word8
marr
        Text -> ST s Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> ST s Text) -> Text -> ST s Text
forall a b. (a -> b) -> a -> b
$ Bytes -> Text
Text (PrimArray Word8 -> Int -> Int -> Bytes
forall a. PrimArray a -> Int -> Int -> PrimVector a
PrimVector PrimArray Word8
arr Int
0 Int
len)

packUTF8Addr :: Addr# -> Text
packUTF8Addr :: Addr# -> Text
packUTF8Addr Addr#
addr0# = Addr# -> Text
validateAndCopy Addr#
addr0#
  where
    len :: Int
len = CSize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (CSize -> Int) -> (IO CSize -> CSize) -> IO CSize -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. IO CSize -> CSize
forall a. IO a -> a
unsafeDupablePerformIO (IO CSize -> Int) -> IO CSize -> Int
forall a b. (a -> b) -> a -> b
$ Addr# -> IO CSize
c_strlen Addr#
addr0#
    valid :: Int
valid = IO Int -> Int
forall a. IO a -> a
unsafeDupablePerformIO (IO Int -> Int) -> IO Int -> Int
forall a b. (a -> b) -> a -> b
$ Addr# -> Int -> IO Int
c_utf8_validate_addr Addr#
addr0# Int
len
    validateAndCopy :: Addr# -> Text
validateAndCopy Addr#
addr#
        | Int
valid Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Int -> String -> Text
packN Int
len (Addr# -> String
unpackCStringUtf8# Addr#
addr#) -- three bytes surrogate -> three bytes replacement
                                                        -- two bytes NUL -> \NUL
                                                        -- the result's length will either smaller or equal
        | Bool
otherwise  = (forall s. ST s Text) -> Text
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s Text) -> Text) -> (forall s. ST s Text) -> Text
forall a b. (a -> b) -> a -> b
$ do
            MutablePrimArray s Word8
marr <- Int -> ST s (MutablePrimArray (PrimState (ST s)) Word8)
forall (m :: * -> *) a.
(PrimMonad m, Prim a) =>
Int -> m (MutablePrimArray (PrimState m) a)
newPrimArray Int
len
            MutablePrimArray (PrimState (ST s)) Word8
-> Int -> Ptr Word8 -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Prim a) =>
MutablePrimArray (PrimState m) a -> Int -> Ptr a -> Int -> m ()
copyPtrToMutablePrimArray MutablePrimArray s Word8
MutablePrimArray (PrimState (ST s)) Word8
marr Int
0 (Addr# -> Ptr Word8
forall a. Addr# -> Ptr a
Ptr Addr#
addr#) Int
len
            PrimArray Word8
arr <- MutablePrimArray (PrimState (ST s)) Word8 -> ST s (PrimArray Word8)
forall (m :: * -> *) a.
PrimMonad m =>
MutablePrimArray (PrimState m) a -> m (PrimArray a)
unsafeFreezePrimArray MutablePrimArray s Word8
MutablePrimArray (PrimState (ST s)) Word8
marr
            Text -> ST s Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> ST s Text) -> Text -> ST s Text
forall a b. (a -> b) -> a -> b
$ Bytes -> Text
Text (PrimArray Word8 -> Int -> Int -> Bytes
forall a. PrimArray a -> Int -> Int -> PrimVector a
PrimVector PrimArray Word8
arr Int
0 Int
len)

-- | /O(n)/ Get the nth codepoint from 'Text'.
indexMaybe :: Text -> Int -> Maybe Char
{-# INLINABLE indexMaybe #-}
indexMaybe :: Text -> Int -> Maybe Char
indexMaybe (Text (V.PrimVector PrimArray Word8
ba Int
s Int
l)) Int
n
    | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 = Maybe Char
forall a. Maybe a
Nothing
    | Bool
otherwise = Int -> Int -> Maybe Char
go Int
s Int
0
  where
    !end :: Int
end = Int
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l
    go :: Int -> Int -> Maybe Char
go !Int
i !Int
j
        | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end = Maybe Char
forall a. Maybe a
Nothing
        | Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
n = let !c :: Char
c = PrimArray Word8 -> Int -> Char
decodeChar_ PrimArray Word8
ba Int
i in Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c
        | Bool
otherwise = let l' :: Int
l' = PrimArray Word8 -> Int -> Int
decodeCharLen PrimArray Word8
ba Int
i in Int -> Int -> Maybe Char
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
l') (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)

-- | /O(n)/ Find the nth codepoint's byte index (pointing to the nth char's begining byte).
--
-- The index is only meaningful to the whole byte slice, if there's less than n codepoints,
-- the index will point to next byte after the end.
charByteIndex :: Text -> Int -> Int
{-# INLINABLE charByteIndex #-}
charByteIndex :: Text -> Int -> Int
charByteIndex (Text (V.PrimVector PrimArray Word8
ba Int
s Int
l)) Int
n
    | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 = Int
s
    | Bool
otherwise = Int -> Int -> Int
go Int
s Int
0
  where
    !end :: Int
end = Int
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l
    go :: Int -> Int -> Int
go !Int
i !Int
j
        | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end = Int
i
        | Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
n = Int
i
        | Bool
otherwise = let l' :: Int
l' = PrimArray Word8 -> Int -> Int
decodeCharLen PrimArray Word8
ba Int
i in Int -> Int -> Int
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
l') (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)

-- | /O(n)/ Get the nth codepoint from 'Text' counting from the end.
indexMaybeR :: Text -> Int -> Maybe Char
{-# INLINABLE indexMaybeR #-}
indexMaybeR :: Text -> Int -> Maybe Char
indexMaybeR (Text (V.PrimVector PrimArray Word8
ba Int
s Int
l)) Int
n
    | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 = Maybe Char
forall a. Maybe a
Nothing
    | Bool
otherwise = Int -> Int -> Maybe Char
go (Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) Int
0
  where
    go :: Int -> Int -> Maybe Char
go !Int
i !Int
j
        | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
s = Maybe Char
forall a. Maybe a
Nothing
        | Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
n = let !c :: Char
c = PrimArray Word8 -> Int -> Char
decodeCharReverse_ PrimArray Word8
ba Int
i in Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c
        | Bool
otherwise = let l' :: Int
l' = PrimArray Word8 -> Int -> Int
decodeCharLenReverse PrimArray Word8
ba Int
i in Int -> Int -> Maybe Char
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
l') (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)

-- | /O(n)/ Find the nth codepoint's byte index from the end
-- (pointing to the previous char's ending byte).
--
-- The index is only meaningful to the whole byte slice, if there's less than n codepoints,
-- the index will point to previous byte before the start.
charByteIndexR :: Text -> Int -> Int
{-# INLINABLE charByteIndexR #-}
charByteIndexR :: Text -> Int -> Int
charByteIndexR (Text (V.PrimVector PrimArray Word8
ba Int
s Int
l)) Int
n
    | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 = Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
l
    | Bool
otherwise = Int -> Int -> Int
go (Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) Int
0
  where
    go :: Int -> Int -> Int
go !Int
i !Int
j
        | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
s = Int
i
        | Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
n = Int
i
        | Bool
otherwise = let l' :: Int
l' = PrimArray Word8 -> Int -> Int
decodeCharLenReverse PrimArray Word8
ba Int
i in Int -> Int -> Int
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
l') (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)

--------------------------------------------------------------------------------

-- | /O(n)/ Validate a sequence of bytes is UTF-8 encoded.
--
-- Throw 'InvalidUTF8Exception' in case of invalid codepoint.
--
validate :: HasCallStack => Bytes -> Text
{-# INLINE validate #-}
validate :: Bytes -> Text
validate bs :: Bytes
bs@(V.PrimVector (PrimArray ByteArray#
ba#) (I# Int#
s#) l :: Int
l@(I# Int#
l#))
    | Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Bytes -> Text
Text Bytes
bs
    | ByteArray# -> Int# -> Int# -> Int
c_utf8_validate_ba ByteArray#
ba# Int#
s# Int#
l# Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0 = Bytes -> Text
Text Bytes
bs
    | Bool
otherwise = InvalidUTF8Exception -> Text
forall a e. Exception e => e -> a
throw (CallStack -> InvalidUTF8Exception
InvalidUTF8Exception CallStack
HasCallStack => CallStack
callStack)

validateMaybe :: Bytes -> Maybe Text
{-# INLINE validateMaybe #-}
validateMaybe :: Bytes -> Maybe Text
validateMaybe bs :: Bytes
bs@(V.PrimVector (PrimArray ByteArray#
ba#) (I# Int#
s#) l :: Int
l@(I# Int#
l#))
    | Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Text -> Maybe Text
forall a. a -> Maybe a
Just (Bytes -> Text
Text Bytes
bs)
    | ByteArray# -> Int# -> Int# -> Int
c_utf8_validate_ba ByteArray#
ba# Int#
s# Int#
l# Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0 = Text -> Maybe Text
forall a. a -> Maybe a
Just (Bytes -> Text
Text Bytes
bs)
    | Bool
otherwise = Maybe Text
forall a. Maybe a
Nothing

foreign import ccall unsafe "text.h utf8_validate"
    c_utf8_validate_ba :: ByteArray# -> Int# -> Int# -> Int
foreign import ccall unsafe "text.h utf8_validate_addr"
    c_utf8_validate_addr :: Addr# -> Int -> IO Int

data InvalidUTF8Exception = InvalidUTF8Exception CallStack
                    deriving (Int -> InvalidUTF8Exception -> ShowS
[InvalidUTF8Exception] -> ShowS
InvalidUTF8Exception -> String
(Int -> InvalidUTF8Exception -> ShowS)
-> (InvalidUTF8Exception -> String)
-> ([InvalidUTF8Exception] -> ShowS)
-> Show InvalidUTF8Exception
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [InvalidUTF8Exception] -> ShowS
$cshowList :: [InvalidUTF8Exception] -> ShowS
show :: InvalidUTF8Exception -> String
$cshow :: InvalidUTF8Exception -> String
showsPrec :: Int -> InvalidUTF8Exception -> ShowS
$cshowsPrec :: Int -> InvalidUTF8Exception -> ShowS
Show, Typeable)
instance Exception InvalidUTF8Exception

--------------------------------------------------------------------------------

-- | /O(n)/ Convert a string into a text
--
-- Alias for @'packN' 'defaultInitSize'@, will be rewritten to a memcpy if possible.
pack :: String -> Text
pack :: String -> Text
pack = Int -> String -> Text
packN Int
V.defaultInitSize
{-# INLINE CONLIKE [0] pack #-}
{-# RULES "pack/packASCIIAddr" forall addr . pack (unpackCString# addr) = packASCIIAddr addr #-}
{-# RULES "pack/packUTF8Addr" forall addr . pack (unpackCStringUtf8# addr) = packUTF8Addr addr #-}

-- | /O(n)/ Convert a list into a text with an approximate size(in bytes, not codepoints).
--
-- If the encoded bytes length is larger than the size given, we simply double the buffer size
-- and continue building.
--
-- This function is a /good consumer/ in the sense of build/foldr fusion.
--
packN :: Int -> String -> Text
{-# INLINE packN #-}
packN :: Int -> String -> Text
packN Int
n0 = \ String
ws0 ->
    Bytes -> Text
Text (Int
-> (forall s.
    MArr (IArray PrimVector) s Word8
    -> ST s (IPair (MArr (IArray PrimVector) s Word8)))
-> Bytes
forall (v :: * -> *) a.
Vec v a =>
Int
-> (forall s.
    MArr (IArray v) s a -> ST s (IPair (MArr (IArray v) s a)))
-> v a
V.create' (Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
4 Int
n0) (\ MArr (IArray PrimVector) s Word8
marr -> (IPair (MutablePrimArray s Word8)
 -> Char -> ST s (IPair (MutablePrimArray s Word8)))
-> IPair (MutablePrimArray s Word8)
-> String
-> ST s (IPair (MutablePrimArray s Word8))
forall (t :: * -> *) (m :: * -> *) b a.
(Foldable t, Monad m) =>
(b -> a -> m b) -> b -> t a -> m b
foldlM IPair (MutablePrimArray s Word8)
-> Char -> ST s (IPair (MutablePrimArray s Word8))
forall s.
IPair (MutablePrimArray s Word8)
-> Char -> ST s (IPair (MutablePrimArray s Word8))
go (Int -> MutablePrimArray s Word8 -> IPair (MutablePrimArray s Word8)
forall a. Int -> a -> IPair a
V.IPair Int
0 MutablePrimArray s Word8
MArr (IArray PrimVector) s Word8
marr) String
ws0))
  where
    -- It's critical that this function get specialized and unboxed
    -- Keep an eye on its core!
    go :: V.IPair (MutablePrimArray s Word8) -> Char -> ST s (V.IPair (MutablePrimArray s Word8))
    go :: IPair (MutablePrimArray s Word8)
-> Char -> ST s (IPair (MutablePrimArray s Word8))
go (V.IPair Int
i MutablePrimArray s Word8
marr) !Char
c = do
        Int
siz <- MutablePrimArray (PrimState (ST s)) Word8 -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Prim a) =>
MutablePrimArray (PrimState m) a -> m Int
getSizeofMutablePrimArray MutablePrimArray s Word8
MutablePrimArray (PrimState (ST s)) Word8
marr
        if Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
siz Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
3  -- we need at least 4 bytes for safety
        then do
            Int
i' <- MutablePrimArray s Word8 -> Int -> Char -> ST s Int
forall s. MutablePrimArray s Word8 -> Int -> Char -> ST s Int
encodeChar MutablePrimArray s Word8
marr Int
i Char
c
            IPair (MutablePrimArray s Word8)
-> ST s (IPair (MutablePrimArray s Word8))
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> MutablePrimArray s Word8 -> IPair (MutablePrimArray s Word8)
forall a. Int -> a -> IPair a
V.IPair Int
i' MutablePrimArray s Word8
marr)
        else do
            let !siz' :: Int
siz' = Int
siz Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
1
            !MutablePrimArray s Word8
marr' <- MutablePrimArray (PrimState (ST s)) Word8
-> Int -> ST s (MutablePrimArray (PrimState (ST s)) Word8)
forall (m :: * -> *) a.
(PrimMonad m, Prim a) =>
MutablePrimArray (PrimState m) a
-> Int -> m (MutablePrimArray (PrimState m) a)
resizeMutablePrimArray MutablePrimArray s Word8
MutablePrimArray (PrimState (ST s)) Word8
marr Int
siz'
            Int
i' <- MutablePrimArray s Word8 -> Int -> Char -> ST s Int
forall s. MutablePrimArray s Word8 -> Int -> Char -> ST s Int
encodeChar MutablePrimArray s Word8
marr' Int
i Char
c
            IPair (MutablePrimArray s Word8)
-> ST s (IPair (MutablePrimArray s Word8))
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> MutablePrimArray s Word8 -> IPair (MutablePrimArray s Word8)
forall a. Int -> a -> IPair a
V.IPair Int
i' MutablePrimArray s Word8
marr')

-- | /O(n)/ Alias for @'packRN' 'defaultInitSize'@.
--
packR :: String -> Text
{-# INLINE packR #-}
packR :: String -> Text
packR = Int -> String -> Text
packRN Int
V.defaultInitSize

-- | /O(n)/ 'packN' in reverse order.
--
-- This function is a /good consumer/ in the sense of build/foldr fusion.
--
packRN :: Int -> String -> Text
{-# INLINE packRN #-}
packRN :: Int -> String -> Text
packRN Int
n0 = \ String
ws0 -> (forall s. ST s Text) -> Text
forall a. (forall s. ST s a) -> a
runST (do let n :: Int
n = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
4 Int
n0
                               MutablePrimArray s Word8
marr <- Int -> ST s (MArr PrimArray s Word8)
forall (arr :: * -> *) a (m :: * -> *) s.
(Arr arr a, PrimMonad m, PrimState m ~ s) =>
Int -> m (MArr arr s a)
newArr Int
n
                               (V.IPair Int
i MutablePrimArray s Word8
marr') <- (IPair (MutablePrimArray s Word8)
 -> Char -> ST s (IPair (MutablePrimArray s Word8)))
-> IPair (MutablePrimArray s Word8)
-> String
-> ST s (IPair (MutablePrimArray s Word8))
forall (t :: * -> *) (m :: * -> *) b a.
(Foldable t, Monad m) =>
(b -> a -> m b) -> b -> t a -> m b
foldM IPair (MutablePrimArray s Word8)
-> Char -> ST s (IPair (MutablePrimArray s Word8))
forall s.
IPair (MutablePrimArray s Word8)
-> Char -> ST s (IPair (MutablePrimArray s Word8))
go (Int -> MutablePrimArray s Word8 -> IPair (MutablePrimArray s Word8)
forall a. Int -> a -> IPair a
V.IPair Int
n MutablePrimArray s Word8
marr) String
ws0
                               PrimArray Word8
ba <- MArr PrimArray s Word8 -> ST s (PrimArray Word8)
forall (arr :: * -> *) a (m :: * -> *) s.
(Arr arr a, PrimMonad m, PrimState m ~ s) =>
MArr arr s a -> m (arr a)
unsafeFreezeArr MutablePrimArray s Word8
MArr PrimArray s Word8
marr'
                               Text -> ST s Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> ST s Text) -> Text -> ST s Text
forall a b. (a -> b) -> a -> b
$! Bytes -> Text
Text (IArray PrimVector Word8 -> Int -> Int -> Bytes
forall (v :: * -> *) a. Vec v a => IArray v a -> Int -> Int -> v a
V.fromArr PrimArray Word8
IArray PrimVector Word8
ba Int
i (PrimArray Word8 -> Int
forall (arr :: * -> *) a. Arr arr a => arr a -> Int
sizeofArr PrimArray Word8
baInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
i))
                           )
  where
    go :: V.IPair (MutablePrimArray s Word8) -> Char -> ST s (V.IPair (MutablePrimArray s Word8))
    go :: IPair (MutablePrimArray s Word8)
-> Char -> ST s (IPair (MutablePrimArray s Word8))
go (V.IPair Int
i MutablePrimArray s Word8
marr) !Char
c = do
        Int
n <- MArr PrimArray s Word8 -> ST s Int
forall (arr :: * -> *) a (m :: * -> *) s.
(Arr arr a, PrimMonad m, PrimState m ~ s) =>
MArr arr s a -> m Int
sizeofMutableArr MutablePrimArray s Word8
MArr PrimArray s Word8
marr
        let l :: Int
l = Char -> Int
encodeCharLength Char
c
        if Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
l
        then do Int
_ <- MutablePrimArray s Word8 -> Int -> Char -> ST s Int
forall s. MutablePrimArray s Word8 -> Int -> Char -> ST s Int
encodeChar MutablePrimArray s Word8
marr (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
l) Char
c
                IPair (MutablePrimArray s Word8)
-> ST s (IPair (MutablePrimArray s Word8))
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> MutablePrimArray s Word8 -> IPair (MutablePrimArray s Word8)
forall a. Int -> a -> IPair a
V.IPair (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
l) MutablePrimArray s Word8
marr)
        else do let !n' :: Int
n' = Int
n Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
1  -- double the buffer
                !MutablePrimArray s Word8
marr' <- Int -> ST s (MArr PrimArray s Word8)
forall (arr :: * -> *) a (m :: * -> *) s.
(Arr arr a, PrimMonad m, PrimState m ~ s) =>
Int -> m (MArr arr s a)
newArr Int
n'
                MArr PrimArray s Word8
-> Int -> MArr PrimArray s Word8 -> Int -> Int -> ST s ()
forall (arr :: * -> *) a (m :: * -> *) s.
(Arr arr a, PrimMonad m, PrimState m ~ s) =>
MArr arr s a -> Int -> MArr arr s a -> Int -> Int -> m ()
copyMutableArr MutablePrimArray s Word8
MArr PrimArray s Word8
marr' (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
i) MutablePrimArray s Word8
MArr PrimArray s Word8
marr Int
i (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
i)
                let i' :: Int
i' = Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
l
                Int
_ <- MutablePrimArray s Word8 -> Int -> Char -> ST s Int
forall s. MutablePrimArray s Word8 -> Int -> Char -> ST s Int
encodeChar MutablePrimArray s Word8
marr' Int
i' Char
c
                IPair (MutablePrimArray s Word8)
-> ST s (IPair (MutablePrimArray s Word8))
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> MutablePrimArray s Word8 -> IPair (MutablePrimArray s Word8)
forall a. Int -> a -> IPair a
V.IPair Int
i' MutablePrimArray s Word8
marr')

-- | /O(n)/ Convert text to a char list.
--
-- Unpacking is done lazily. i.e. we will retain reference to the array until all element are consumed.
--
-- This function is a /good producer/ in the sense of build/foldr fusion.
unpack :: Text -> String
{-# INLINE [1] unpack #-}
unpack :: Text -> String
unpack (Text (V.PrimVector PrimArray Word8
ba Int
s Int
l)) = Int -> String
go Int
s
  where
    !end :: Int
end = Int
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l
    go :: Int -> String
go !Int
idx
        | Int
idx Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end = []
        | Bool
otherwise = let (# Char
c, Int
i #) = PrimArray Word8 -> Int -> (# Char, Int #)
decodeChar PrimArray Word8
ba Int
idx in Char
c Char -> ShowS
forall a. a -> [a] -> [a]
: Int -> String
go (Int
idx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i)

unpackFB :: Text -> (Char -> a -> a) -> a -> a
{-# INLINE [0] unpackFB #-}
unpackFB :: Text -> (Char -> a -> a) -> a -> a
unpackFB (Text (V.PrimVector PrimArray Word8
ba Int
s Int
l)) Char -> a -> a
k a
z = Int -> a
go Int
s
  where
    !end :: Int
end = Int
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l
    go :: Int -> a
go !Int
idx
        | Int
idx Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end = a
z
        | Bool
otherwise = let (# Char
c, Int
i #) = PrimArray Word8 -> Int -> (# Char, Int #)
decodeChar PrimArray Word8
ba Int
idx in Char
c Char -> a -> a
`k` Int -> a
go (Int
idx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i)

{-# RULES
"unpack" [~1] forall t . unpack t = build (\ k z -> unpackFB t k z)
"unpackFB" [1] forall t . unpackFB t (:) [] = unpack t
 #-}

-- | /O(n)/ Convert text to a list in reverse order.
--
-- This function is a /good producer/ in the sense of build/foldr fusion.
unpackR :: Text -> String
{-# INLINE [1] unpackR #-}
unpackR :: Text -> String
unpackR (Text (V.PrimVector PrimArray Word8
ba Int
s Int
l)) = Int -> String
go (Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)
  where
    go :: Int -> String
go !Int
idx
        | Int
idx Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
s = []
        | Bool
otherwise = let (# Char
c, Int
i #) = PrimArray Word8 -> Int -> (# Char, Int #)
decodeCharReverse PrimArray Word8
ba Int
idx in Char
c Char -> ShowS
forall a. a -> [a] -> [a]
: Int -> String
go (Int
idx Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i)

unpackRFB :: Text -> (Char -> a -> a) -> a -> a
{-# INLINE [0] unpackRFB #-}
unpackRFB :: Text -> (Char -> a -> a) -> a -> a
unpackRFB (Text (V.PrimVector PrimArray Word8
ba Int
s Int
l)) Char -> a -> a
k a
z = Int -> a
go (Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)
  where
    go :: Int -> a
go !Int
idx
        | Int
idx Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
s = a
z
        | Bool
otherwise = let (# Char
c, Int
i #) = PrimArray Word8 -> Int -> (# Char, Int #)
decodeCharReverse PrimArray Word8
ba Int
idx in Char
c Char -> a -> a
`k` Int -> a
go (Int
idx Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i)

{-# RULES
"unpackR" [~1] forall t . unpackR t = build (\ k z -> unpackRFB t k z)
"unpackRFB" [1] forall t . unpackRFB t (:) [] = unpackR t
 #-}

-- | /O(1)/. Single char text.
singleton :: Char -> Text
{-# INLINABLE singleton #-}
singleton :: Char -> Text
singleton Char
c = Bytes -> Text
Text (Bytes -> Text) -> Bytes -> Text
forall a b. (a -> b) -> a -> b
$ Int
-> (forall s. MArr (IArray PrimVector) s Word8 -> ST s Int)
-> Bytes
forall (v :: * -> *) a.
(Vec v a, HasCallStack) =>
Int -> (forall s. MArr (IArray v) s a -> ST s Int) -> v a
V.createN Int
4 ((forall s. MArr (IArray PrimVector) s Word8 -> ST s Int) -> Bytes)
-> (forall s. MArr (IArray PrimVector) s Word8 -> ST s Int)
-> Bytes
forall a b. (a -> b) -> a -> b
$ \ MArr (IArray PrimVector) s Word8
marr -> MutablePrimArray s Word8 -> Int -> Char -> ST s Int
forall s. MutablePrimArray s Word8 -> Int -> Char -> ST s Int
encodeChar MutablePrimArray s Word8
MArr (IArray PrimVector) s Word8
marr Int
0 Char
c

-- | /O(1)/. Empty text.
empty :: Text
{-# INLINABLE empty #-}
empty :: Text
empty = Bytes -> Text
Text Bytes
forall (v :: * -> *) a. Vec v a => v a
V.empty

-- | /O(n)/. Copy a text from slice.
copy :: Text -> Text
{-# INLINE copy #-}
copy :: Text -> Text
copy (Text Bytes
bs) = Bytes -> Text
Text (Bytes -> Bytes
forall (v :: * -> *) a. Vec v a => v a -> v a
V.copy Bytes
bs)

--------------------------------------------------------------------------------
-- * Basic interface

-- | /O(m+n)/
--
-- There's no need to guard empty vector because we guard them for you, so
-- appending empty text are no-ops.
append :: Text -> Text -> Text
append :: Text -> Text -> Text
append Text
ta Text
tb = Bytes -> Text
Text ( Text -> Bytes
getUTF8Bytes Text
ta Bytes -> Bytes -> Bytes
forall (v :: * -> *) a. Vec v a => v a -> v a -> v a
`V.append` Text -> Bytes
getUTF8Bytes Text
tb )
{-# INLINE append #-}

-- | /O(1)/ Test whether a text is empty.
null :: Text -> Bool
{-# INLINABLE null #-}
null :: Text -> Bool
null (Text Bytes
bs) = Bytes -> Bool
forall (v :: * -> *) a. Vec v a => v a -> Bool
V.null Bytes
bs

-- |  /O(n)/ The char length of a text.
length :: Text -> Int
{-# INLINABLE length #-}
length :: Text -> Int
length (Text (V.PrimVector PrimArray Word8
ba Int
s Int
l)) = Int -> Int -> Int
forall t. Num t => Int -> t -> t
go Int
s Int
0
  where
    !end :: Int
end = Int
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l
    go :: Int -> t -> t
go !Int
i !t
acc | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end = t
acc
               | Bool
otherwise = let j :: Int
j = PrimArray Word8 -> Int -> Int
decodeCharLen PrimArray Word8
ba Int
i in Int -> t -> t
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
j) (t
1t -> t -> t
forall a. Num a => a -> a -> a
+t
acc)

--------------------------------------------------------------------------------
-- * Transformations
--
-- | /O(n)/ 'map' @f@ @t@ is the 'Text' obtained by applying @f@ to
-- each char of @t@. Performs replacement on invalid scalar values.
map' :: (Char -> Char) -> Text -> Text
{-# INLINE map' #-}
map' :: (Char -> Char) -> Text -> Text
map' Char -> Char
f (Text (V.PrimVector PrimArray Word8
arr Int
s Int
l)) | Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Text
empty
                                     | Bool
otherwise = Bytes -> Text
Text (Int
-> (forall s.
    MArr (IArray PrimVector) s Word8
    -> ST s (IPair (MArr (IArray PrimVector) s Word8)))
-> Bytes
forall (v :: * -> *) a.
Vec v a =>
Int
-> (forall s.
    MArr (IArray v) s a -> ST s (IPair (MArr (IArray v) s a)))
-> v a
V.create' (Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
3) (Int
-> Int
-> MutablePrimArray s Word8
-> ST s (IPair (MutablePrimArray s Word8))
forall s.
Int
-> Int
-> MutablePrimArray s Word8
-> ST s (IPair (MutablePrimArray s Word8))
go Int
s Int
0))
  where
    end :: Int
end = Int
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l
    -- the 3 bytes buffer is here for optimizing ascii mapping
    -- we do resize if less than 4 bytes left when building
    -- to save us from pre-checking encoding char length everytime
    go :: Int -> Int -> MutablePrimArray s Word8 -> ST s (V.IPair (MutablePrimArray s Word8))
    go :: Int
-> Int
-> MutablePrimArray s Word8
-> ST s (IPair (MutablePrimArray s Word8))
go !Int
i !Int
j !MutablePrimArray s Word8
marr
        | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end = IPair (MutablePrimArray s Word8)
-> ST s (IPair (MutablePrimArray s Word8))
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> MutablePrimArray s Word8 -> IPair (MutablePrimArray s Word8)
forall a. Int -> a -> IPair a
V.IPair Int
j MutablePrimArray s Word8
marr)
        | Bool
otherwise = do
            let (# Char
c, Int
d #) = PrimArray Word8 -> Int -> (# Char, Int #)
decodeChar PrimArray Word8
arr Int
i
            Int
j' <- MutablePrimArray s Word8 -> Int -> Char -> ST s Int
forall s. MutablePrimArray s Word8 -> Int -> Char -> ST s Int
encodeChar MutablePrimArray s Word8
marr Int
j (Char -> Char
f Char
c)
            let !i' :: Int
i' = Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
d
            Int
siz <- MArr PrimArray s Word8 -> ST s Int
forall (arr :: * -> *) a (m :: * -> *) s.
(Arr arr a, PrimMonad m, PrimState m ~ s) =>
MArr arr s a -> m Int
sizeofMutableArr MutablePrimArray s Word8
MArr PrimArray s Word8
marr
            if  Int
j' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
siz Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
3
            then Int
-> Int
-> MutablePrimArray s Word8
-> ST s (IPair (MutablePrimArray s Word8))
forall s.
Int
-> Int
-> MutablePrimArray s Word8
-> ST s (IPair (MutablePrimArray s Word8))
go Int
i' Int
j' MutablePrimArray s Word8
marr
            else do
                let !siz' :: Int
siz' = Int
siz Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
1
                !MutablePrimArray s Word8
marr' <- MutablePrimArray (PrimState (ST s)) Word8
-> Int -> ST s (MutablePrimArray (PrimState (ST s)) Word8)
forall (m :: * -> *) a.
(PrimMonad m, Prim a) =>
MutablePrimArray (PrimState m) a
-> Int -> m (MutablePrimArray (PrimState m) a)
resizeMutablePrimArray MutablePrimArray s Word8
MutablePrimArray (PrimState (ST s)) Word8
marr Int
siz'
                Int
-> Int
-> MutablePrimArray s Word8
-> ST s (IPair (MutablePrimArray s Word8))
forall s.
Int
-> Int
-> MutablePrimArray s Word8
-> ST s (IPair (MutablePrimArray s Word8))
go Int
i' Int
j' MutablePrimArray s Word8
marr'

-- | Strict mapping with index.
imap' :: (Int -> Char -> Char) -> Text -> Text
{-# INLINE imap' #-}
imap' :: (Int -> Char -> Char) -> Text -> Text
imap' Int -> Char -> Char
f (Text (V.PrimVector PrimArray Word8
arr Int
s Int
l)) | Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Text
empty
                                      | Bool
otherwise = Bytes -> Text
Text (Int
-> (forall s.
    MArr (IArray PrimVector) s Word8
    -> ST s (IPair (MArr (IArray PrimVector) s Word8)))
-> Bytes
forall (v :: * -> *) a.
Vec v a =>
Int
-> (forall s.
    MArr (IArray v) s a -> ST s (IPair (MArr (IArray v) s a)))
-> v a
V.create' (Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
3) (Int
-> Int
-> Int
-> MutablePrimArray s Word8
-> ST s (IPair (MutablePrimArray s Word8))
forall s.
Int
-> Int
-> Int
-> MutablePrimArray s Word8
-> ST s (IPair (MutablePrimArray s Word8))
go Int
s Int
0 Int
0))
  where
    end :: Int
end = Int
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l
    go :: Int -> Int -> Int -> MutablePrimArray s Word8 -> ST s (V.IPair (MutablePrimArray s Word8))
    go :: Int
-> Int
-> Int
-> MutablePrimArray s Word8
-> ST s (IPair (MutablePrimArray s Word8))
go !Int
i !Int
j !Int
k !MutablePrimArray s Word8
marr
        | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end = IPair (MutablePrimArray s Word8)
-> ST s (IPair (MutablePrimArray s Word8))
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> MutablePrimArray s Word8 -> IPair (MutablePrimArray s Word8)
forall a. Int -> a -> IPair a
V.IPair Int
j MutablePrimArray s Word8
marr)
        | Bool
otherwise = do
            let (# Char
c, Int
d #) = PrimArray Word8 -> Int -> (# Char, Int #)
decodeChar PrimArray Word8
arr Int
i
            Int
j' <- MutablePrimArray s Word8 -> Int -> Char -> ST s Int
forall s. MutablePrimArray s Word8 -> Int -> Char -> ST s Int
encodeChar MutablePrimArray s Word8
marr Int
j (Int -> Char -> Char
f Int
k Char
c)
            let !i' :: Int
i' = Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
d
                !k' :: Int
k' = Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
            Int
siz <- MArr PrimArray s Word8 -> ST s Int
forall (arr :: * -> *) a (m :: * -> *) s.
(Arr arr a, PrimMonad m, PrimState m ~ s) =>
MArr arr s a -> m Int
sizeofMutableArr MutablePrimArray s Word8
MArr PrimArray s Word8
marr
            if  Int
j' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
siz Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
3
            then Int
-> Int
-> Int
-> MutablePrimArray s Word8
-> ST s (IPair (MutablePrimArray s Word8))
forall s.
Int
-> Int
-> Int
-> MutablePrimArray s Word8
-> ST s (IPair (MutablePrimArray s Word8))
go Int
i' Int
j' Int
k' MutablePrimArray s Word8
marr
            else do
                let !siz' :: Int
siz' = Int
siz Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
1
                !MutablePrimArray s Word8
marr' <- MutablePrimArray (PrimState (ST s)) Word8
-> Int -> ST s (MutablePrimArray (PrimState (ST s)) Word8)
forall (m :: * -> *) a.
(PrimMonad m, Prim a) =>
MutablePrimArray (PrimState m) a
-> Int -> m (MutablePrimArray (PrimState m) a)
resizeMutablePrimArray MutablePrimArray s Word8
MutablePrimArray (PrimState (ST s)) Word8
marr Int
siz'
                Int
-> Int
-> Int
-> MutablePrimArray s Word8
-> ST s (IPair (MutablePrimArray s Word8))
forall s.
Int
-> Int
-> Int
-> MutablePrimArray s Word8
-> ST s (IPair (MutablePrimArray s Word8))
go Int
i' Int
j' Int
k' MutablePrimArray s Word8
marr'

--------------------------------------------------------------------------------
--
-- Strict folds
--

-- | Strict left to right fold.
foldl' :: (b -> Char -> b) -> b -> Text -> b
{-# INLINE foldl' #-}
foldl' :: (b -> Char -> b) -> b -> Text -> b
foldl' b -> Char -> b
f b
z (Text (V.PrimVector PrimArray Word8
arr Int
s Int
l)) = b -> Int -> b
go b
z Int
s
  where
    !end :: Int
end = Int
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l
    -- tail recursive; traverses array left to right
    go :: b -> Int -> b
go !b
acc !Int
i | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
end  = case PrimArray Word8 -> Int -> (# Char, Int #)
decodeChar PrimArray Word8
arr Int
i of
                                (# Char
x, Int
d #) -> b -> Int -> b
go (b -> Char -> b
f b
acc Char
x) (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
d)
               | Bool
otherwise = b
acc

-- | Strict left to right fold with index.
ifoldl' :: (b -> Int ->  Char -> b) -> b -> Text -> b
{-# INLINE ifoldl' #-}
ifoldl' :: (b -> Int -> Char -> b) -> b -> Text -> b
ifoldl' b -> Int -> Char -> b
f b
z (Text (V.PrimVector PrimArray Word8
arr Int
s Int
l)) = b -> Int -> Int -> b
go b
z Int
s Int
0
  where
    !end :: Int
end = Int
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l
    go :: b -> Int -> Int -> b
go !b
acc !Int
i !Int
k | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
end  = case PrimArray Word8 -> Int -> (# Char, Int #)
decodeChar PrimArray Word8
arr Int
i of
                                    (# Char
x, Int
d #) -> b -> Int -> Int -> b
go (b -> Int -> Char -> b
f b
acc Int
k Char
x) (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
d) (Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
                  | Bool
otherwise = b
acc

-- | Strict right to left fold
foldr' :: (Char -> b -> b) -> b -> Text -> b
{-# INLINE foldr' #-}
foldr' :: (Char -> b -> b) -> b -> Text -> b
foldr' Char -> b -> b
f b
z (Text (V.PrimVector PrimArray Word8
arr Int
s Int
l)) = b -> Int -> b
go b
z (Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)
  where
    -- tail recursive; traverses array right to left
    go :: b -> Int -> b
go !b
acc !Int
i | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
s    = case PrimArray Word8 -> Int -> (# Char, Int #)
decodeCharReverse PrimArray Word8
arr Int
i of
                                (# Char
x, Int
d #) -> b -> Int -> b
go (Char -> b -> b
f Char
x b
acc) (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
d)
               | Bool
otherwise = b
acc

-- | Strict right to left fold with index
--
-- NOTE: the index is counting from 0, not backwards
ifoldr' :: (Int -> Char -> b -> b) -> b -> Text -> b
{-# INLINE ifoldr' #-}
ifoldr' :: (Int -> Char -> b -> b) -> b -> Text -> b
ifoldr' Int -> Char -> b -> b
f b
z (Text (V.PrimVector PrimArray Word8
arr Int
s Int
l)) = b -> Int -> Int -> b
go b
z (Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) Int
0
  where
    go :: b -> Int -> Int -> b
go !b
acc !Int
i !Int
k | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
s    = case PrimArray Word8 -> Int -> (# Char, Int #)
decodeCharReverse PrimArray Word8
arr Int
i of
                                    (# Char
x, Int
d #) -> b -> Int -> Int -> b
go (Int -> Char -> b -> b
f Int
k Char
x b
acc) (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
d) (Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
                  | Bool
otherwise = b
acc


-- | /O(n)/ Concatenate a list of text.
--
-- Note: 'concat' have to force the entire list to filter out empty text and calculate
-- the length for allocation.
concat :: [Text] -> Text
concat :: [Text] -> Text
concat = Bytes -> Text
Text (Bytes -> Text) -> ([Text] -> Bytes) -> [Text] -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Bytes] -> Bytes
forall (v :: * -> *) a. Vec v a => [v a] -> v a
V.concat ([Bytes] -> Bytes) -> ([Text] -> [Bytes]) -> [Text] -> Bytes
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Text] -> [Bytes]
coerce
{-# INLINE concat #-}

-- | Map a function over a text and concatenate the results
concatMap :: (Char -> Text) -> Text -> Text
{-# INLINE concatMap #-}
concatMap :: (Char -> Text) -> Text -> Text
concatMap Char -> Text
f = [Text] -> Text
concat ([Text] -> Text) -> (Text -> [Text]) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> [Text] -> [Text]) -> [Text] -> Text -> [Text]
forall b. (Char -> b -> b) -> b -> Text -> b
foldr' ((:) (Text -> [Text] -> [Text])
-> (Char -> Text) -> Char -> [Text] -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Text
f) []

-- | /O(n)/ 'count' returns count of an element from a text.
count :: Char -> Text -> Int
{-# INLINE count #-}
count :: Char -> Text -> Int
count Char
c (Text Bytes
v)
    | Char -> Int
encodeCharLength Char
c Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 = let w :: Word8
w = Char -> Word8
V.c2w Char
c in Word8 -> Bytes -> Int
forall (v :: * -> *) a. (Vec v a, Eq a) => a -> v a -> Int
V.count Word8
w Bytes
v
    | Bool
otherwise = let (Text Bytes
pat) = Char -> Text
singleton Char
c
                  in [Int] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
List.length ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ Bytes -> Bytes -> Bool -> [Int]
forall (v :: * -> *) a.
(Vec v a, Eq a) =>
v a -> v a -> Bool -> [Int]
V.indices Bytes
pat Bytes
v Bool
False

-- | /O(n)/ Applied to a predicate and a text, 'any' determines
-- if any chars of the text satisfy the predicate.
any :: (Char -> Bool) -> Text -> Bool
{-# INLINE any #-}
any :: (Char -> Bool) -> Text -> Bool
any Char -> Bool
f (Text (V.PrimVector PrimArray Word8
arr Int
s Int
l))
    | Int
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0    = Bool
False
    | Bool
otherwise = case PrimArray Word8 -> Int -> (# Char, Int #)
decodeChar PrimArray Word8
arr Int
s of
                    (# Char
x0, Int
d #) -> Bool -> Int -> Bool
go (Char -> Bool
f Char
x0) (Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
d)
  where
    !end :: Int
end = Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
l
    go :: Bool -> Int -> Bool
go !Bool
acc !Int
i | Bool
acc       = Bool
True
               | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end  = Bool
acc
               | Bool
otherwise = case PrimArray Word8 -> Int -> (# Char, Int #)
decodeChar PrimArray Word8
arr Int
i of
                                (# Char
x, Int
d #) -> Bool -> Int -> Bool
go (Bool
acc Bool -> Bool -> Bool
|| Char -> Bool
f Char
x) (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
d)

-- | /O(n)/ Applied to a predicate and text, 'all' determines
-- if all chars of the text satisfy the predicate.
all :: (Char -> Bool) -> Text -> Bool
{-# INLINE all #-}
all :: (Char -> Bool) -> Text -> Bool
all Char -> Bool
f (Text (V.PrimVector PrimArray Word8
arr Int
s Int
l))
    | Int
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0    = Bool
True
    | Bool
otherwise = case PrimArray Word8 -> Int -> (# Char, Int #)
decodeChar PrimArray Word8
arr Int
s of
                    (# Char
x0, Int
d #) -> Bool -> Int -> Bool
go (Char -> Bool
f Char
x0) (Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
d)
  where
    !end :: Int
end = Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
l
    go :: Bool -> Int -> Bool
go !Bool
acc !Int
i | Bool -> Bool
not Bool
acc   = Bool
False
               | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end  = Bool
acc
               | Bool
otherwise = case PrimArray Word8 -> Int -> (# Char, Int #)
decodeChar PrimArray Word8
arr Int
i of
                                (# Char
x, Int
d #) -> Bool -> Int -> Bool
go (Bool
acc Bool -> Bool -> Bool
&& Char -> Bool
f Char
x) (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
d)

--------------------------------------------------------------------------------
--
-- Building text

-- | /O(n)/ 'replicate' char n time.
--
replicate :: Int -> Char -> Text
{-# INLINE replicate #-}
replicate :: Int -> Char -> Text
replicate Int
0 Char
_ = Text
empty
replicate Int
n Char
c = Bytes -> Text
Text (Int
-> (forall s. MArr (IArray PrimVector) s Word8 -> ST s ()) -> Bytes
forall (v :: * -> *) a.
Vec v a =>
Int -> (forall s. MArr (IArray v) s a -> ST s ()) -> v a
V.create Int
siz (Int -> MutablePrimArray s Word8 -> ST s ()
forall s. Int -> MutablePrimArray s Word8 -> ST s ()
go Int
0))
  where
    !csiz :: Int
csiz = Char -> Int
encodeCharLength Char
c
    !siz :: Int
siz = Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
csiz
    go :: Int -> MutablePrimArray s Word8 -> ST s ()
    go :: Int -> MutablePrimArray s Word8 -> ST s ()
go Int
0 MutablePrimArray s Word8
marr = MutablePrimArray s Word8 -> Int -> Char -> ST s Int
forall s. MutablePrimArray s Word8 -> Int -> Char -> ST s Int
encodeChar MutablePrimArray s Word8
marr Int
0 Char
c ST s Int -> ST s () -> ST s ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Int -> MutablePrimArray s Word8 -> ST s ()
forall s. Int -> MutablePrimArray s Word8 -> ST s ()
go Int
csiz MutablePrimArray s Word8
marr
    go Int
i MutablePrimArray s Word8
marr | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
siz = () -> ST s ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
              | Bool
otherwise = do Int
-> MutablePrimArray s Word8
-> Int
-> MutablePrimArray s Word8
-> Int
-> ST s ()
forall s.
Int
-> MutablePrimArray s Word8
-> Int
-> MutablePrimArray s Word8
-> Int
-> ST s ()
copyChar' Int
csiz MutablePrimArray s Word8
marr Int
i MutablePrimArray s Word8
marr (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
csiz)
                               Int -> MutablePrimArray s Word8 -> ST s ()
forall s. Int -> MutablePrimArray s Word8 -> ST s ()
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
csiz) MutablePrimArray s Word8
marr

-- | /O(n*m)/ 'cycleN' a text n times.
cycleN :: Int -> Text -> Text
{-# INLINE cycleN #-}
cycleN :: Int -> Text -> Text
cycleN Int
0 Text
_ = Text
empty
cycleN Int
n (Text Bytes
v) = Bytes -> Text
Text (Int -> Bytes -> Bytes
forall (v :: * -> *) a. Vec v a => Int -> v a -> v a
V.cycleN Int
n Bytes
v)

--------------------------------------------------------------------------------
-- Convert between codepoint vector and text

-- | /O(n)/ convert from a char vector.
fromVector :: V.PrimVector Char -> Text
{-# INLINE fromVector #-}
fromVector :: PrimVector Char -> Text
fromVector (V.PrimVector PrimArray Char
arr Int
s Int
l) = Bytes -> Text
Text (Int
-> (forall s. MArr (IArray PrimVector) s Word8 -> ST s Int)
-> Bytes
forall (v :: * -> *) a.
(Vec v a, HasCallStack) =>
Int -> (forall s. MArr (IArray v) s a -> ST s Int) -> v a
V.createN Int
l (Int -> Int -> MutablePrimArray s Word8 -> ST s Int
forall s. Int -> Int -> MutablePrimArray s Word8 -> ST s Int
go Int
s Int
0))
  where
    end :: Int
end = Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
l
    go :: Int -> Int -> MutablePrimArray s Word8 -> ST s Int
go !Int
i !Int
j !MutablePrimArray s Word8
marr
        | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end = Int -> ST s Int
forall (m :: * -> *) a. Monad m => a -> m a
return Int
j
        | Bool
otherwise = do
            let c :: Char
c = PrimArray Char -> Int -> Char
forall a. Prim a => PrimArray a -> Int -> a
indexPrimArray PrimArray Char
arr Int
i
            Int
j' <- MutablePrimArray s Word8 -> Int -> Char -> ST s Int
forall s. MutablePrimArray s Word8 -> Int -> Char -> ST s Int
encodeChar MutablePrimArray s Word8
marr Int
j Char
c
            Int -> Int -> MutablePrimArray s Word8 -> ST s Int
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
j' MutablePrimArray s Word8
marr

-- | /O(n)/ convert to a char vector.
toVector :: Text -> V.PrimVector Char
{-# INLINE toVector #-}
toVector :: Text -> PrimVector Char
toVector (Text (V.PrimVector PrimArray Word8
arr Int
s Int
l)) = Int
-> (forall s. MArr (IArray PrimVector) s Char -> ST s Int)
-> PrimVector Char
forall (v :: * -> *) a.
(Vec v a, HasCallStack) =>
Int -> (forall s. MArr (IArray v) s a -> ST s Int) -> v a
V.createN (Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
*Int
4) (Int -> Int -> MutablePrimArray (PrimState (ST s)) Char -> ST s Int
forall (m :: * -> *).
PrimMonad m =>
Int -> Int -> MutablePrimArray (PrimState m) Char -> m Int
go Int
s Int
0)
  where
    end :: Int
end = Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
l
    go :: Int -> Int -> MutablePrimArray (PrimState m) Char -> m Int
go !Int
i !Int
j !MutablePrimArray (PrimState m) Char
marr
        | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end = Int -> m Int
forall (m :: * -> *) a. Monad m => a -> m a
return Int
j
        | Bool
otherwise = do
            let (# Char
c, Int
n #) = PrimArray Word8 -> Int -> (# Char, Int #)
decodeChar PrimArray Word8
arr Int
i
            MutablePrimArray (PrimState m) Char -> Int -> Char -> m ()
forall a (m :: * -> *).
(Prim a, PrimMonad m) =>
MutablePrimArray (PrimState m) a -> Int -> a -> m ()
writePrimArray MutablePrimArray (PrimState m) Char
marr Int
j Char
c
            Int -> Int -> MutablePrimArray (PrimState m) Char -> m Int
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
n) (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) MutablePrimArray (PrimState m) Char
marr

-- ----------------------------------------------------------------------------
-- ** Normalization
--
-- $normalization

-- | Check if a string is stable in the NFC (Normalization Form C).
isNormalized :: Text -> NormalizationResult
{-# INLINE isNormalized #-}
isNormalized :: Text -> NormalizationResult
isNormalized = NormalizeMode -> Text -> NormalizationResult
isNormalizedTo NormalizeMode
NFC

{-|
Check if a string is stable in the specified Unicode Normalization
Form.

This function can be used as a preprocessing step, before attempting to
normalize a string. Normalization is a very expensive process, it is often
cheaper to first determine if the string is unstable in the requested
normalization form.

The result of the check will be YES if the string is stable and MAYBE or NO
if it is unstable. If the result is MAYBE, the string does not necessarily
have to be normalized.

If the result is unstable, the offset parameter is set to the offset for the
first unstable code point. If the string is stable, the offset is equivalent
to the length of the string in bytes.

For more information, please review [Unicode Standard Annex #15 - Unicode
Normalization Forms](http://www.unicode.org/reports/tr15/).
-}
isNormalizedTo :: NormalizeMode -> Text -> NormalizationResult
isNormalizedTo :: NormalizeMode -> Text -> NormalizationResult
isNormalizedTo NormalizeMode
nmode (Text (V.PrimVector (PrimArray ByteArray#
arr#) (I# Int#
s#) l :: Int
l@(I# Int#
l#)))
    | Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = NormalizationResult
NormalizedYes
    | Bool
otherwise =
        let nflag :: CSize
nflag = NormalizeMode -> CSize
normalizeModeToFlag NormalizeMode
nmode
        in Int -> NormalizationResult
toNormalizationResult (ByteArray# -> Int# -> Int# -> CSize -> Int
utf8_isnormalized ByteArray#
arr# Int#
s# Int#
l# CSize
nflag)

-- | Normalize a string to NFC (Normalization Form C).
normalize :: Text -> Text
{-# INLINE normalize #-}
normalize :: Text -> Text
normalize = NormalizeMode -> Text -> Text
normalizeTo NormalizeMode
NFC

{-|
Normalize a string to the specified Unicode Normalization Form.

The Unicode standard defines two standards for equivalence between
characters: canonical and compatibility equivalence. Canonically equivalent
characters and sequence represent the same abstract character and must be
rendered with the same appearance and behavior. Compatibility equivalent
characters have a weaker equivalence and may be rendered differently.

Unicode Normalization Forms are formally defined standards that can be used
to test whether any two strings of characters are equivalent to each other.
This equivalence may be canonical or compatibility.

The algorithm puts all combining marks into a specified order and uses the
rules for decomposition and composition to transform the string into one of
four Unicode Normalization Forms. A binary comparison can then be used to
determine equivalence.
-}
normalizeTo :: NormalizeMode -> Text -> Text
normalizeTo :: NormalizeMode -> Text -> Text
normalizeTo NormalizeMode
nmode (Text (V.PrimVector (PrimArray ByteArray#
arr#) (I# Int#
s#) l :: Int
l@(I# Int#
l#)))
    | Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Text
empty
    | Bool
otherwise = IO Text -> Text
forall a. IO a -> a
unsafeDupablePerformIO (IO Text -> Text) -> IO Text -> Text
forall a b. (a -> b) -> a -> b
$ do
        let nflag :: CSize
nflag = NormalizeMode -> CSize
normalizeModeToFlag NormalizeMode
nmode
            !l' :: Int
l'@(I# Int#
l'#) = ByteArray# -> Int# -> Int# -> CSize -> Int
utf8_normalize_length ByteArray#
arr# Int#
s# Int#
l# CSize
nflag
        Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
l' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0) (String -> IO ()
forall a. HasCallStack => String -> a
error String
"impossible happened!")
        !pa :: MutablePrimArray RealWorld Word8
pa@(MutablePrimArray MutableByteArray# RealWorld
marr#) <- Int -> IO (MArr PrimArray RealWorld Word8)
forall (arr :: * -> *) a (m :: * -> *) s.
(Arr arr a, PrimMonad m, PrimState m ~ s) =>
Int -> m (MArr arr s a)
newArr Int
l'
        ByteArray#
-> Int#
-> Int#
-> MutableByteArray# RealWorld
-> Int#
-> CSize
-> IO ()
utf8_normalize ByteArray#
arr# Int#
s# Int#
l# MutableByteArray# RealWorld
marr# Int#
l'# CSize
nflag
        PrimArray Word8
arr' <- MArr PrimArray RealWorld Word8 -> IO (PrimArray Word8)
forall (arr :: * -> *) a (m :: * -> *) s.
(Arr arr a, PrimMonad m, PrimState m ~ s) =>
MArr arr s a -> m (arr a)
unsafeFreezeArr MutablePrimArray RealWorld Word8
MArr PrimArray RealWorld Word8
pa
        let !v :: Bytes
v = IArray PrimVector Word8 -> Int -> Int -> Bytes
forall (v :: * -> *) a. Vec v a => IArray v a -> Int -> Int -> v a
V.fromArr PrimArray Word8
IArray PrimVector Word8
arr' Int
0 Int
l'
        Text -> IO Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Bytes -> Text
Text Bytes
v)

-- functions below will return error if the source ByteArray# is empty
--
foreign import ccall unsafe utf8_isnormalized ::
    ByteArray# -> Int# -> Int# -> CSize -> Int
foreign import ccall unsafe utf8_normalize ::
    ByteArray# -> Int# -> Int# -> MutableByteArray# RealWorld -> Int# -> CSize -> IO ()
foreign import ccall unsafe utf8_normalize_length ::
    ByteArray# -> Int# -> Int# -> CSize -> Int

-- ----------------------------------------------------------------------------
-- ** Case conversions

-- $case

-- | Remove case distinction from UTF-8 encoded text with default locale.
caseFold :: Text -> Text
caseFold :: Text -> Text
caseFold = Locale -> Text -> Text
caseFoldWith Locale
localeDefault

{-|
Remove case distinction from UTF-8 encoded text.

Case folding is the process of eliminating differences between code points
concerning case mapping. It is most commonly used for comparing strings in a
case-insensitive manner. Conversion is fully compliant with the Unicode 7.0
standard.

Although similar to lowercasing text, there are significant differences.
For one, case folding does _not_ take locale into account when converting.
In some cases, case folding can be up to 20% faster than lowercasing the
same text, but the result cannot be treated as correct lowercased text.

Only two locale-specific exception are made when case folding text.
In Turkish, U+0049 LATIN CAPITAL LETTER I maps to U+0131 LATIN SMALL LETTER
DOTLESS I and U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE maps to U+0069
LATIN SMALL LETTER I.

Although most code points can be case folded without changing length, there are notable
exceptions. For example, U+0130 (LATIN CAPITAL LETTER I WITH DOT ABOVE) maps
to "U+0069 U+0307" (LATIN SMALL LETTER I and COMBINING DOT ABOVE) when
converted to lowercase.

Only a handful of scripts make a distinction between upper- and lowercase.
In addition to modern scripts, such as Latin, Greek, Armenian and Cyrillic,
a few historic or archaic scripts have case. The vast majority of scripts
do not have case distinctions.
-}

caseFoldWith :: Locale -> Text -> Text
caseFoldWith :: Locale -> Text -> Text
caseFoldWith Locale
locale (Text (V.PrimVector (PrimArray ByteArray#
arr#) (I# Int#
s#) l :: Int
l@(I# Int#
l#)))
    | Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Text
empty
    | Bool
otherwise = IO Text -> Text
forall a. IO a -> a
unsafeDupablePerformIO (IO Text -> Text) -> IO Text -> Text
forall a b. (a -> b) -> a -> b
$ do
        let !l' :: Int
l'@(I# Int#
l'#) = ByteArray# -> Int# -> Int# -> Locale -> Int
utf8_casefold_length ByteArray#
arr# Int#
s# Int#
l# Locale
locale
        Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
l' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0) (String -> IO ()
forall a. HasCallStack => String -> a
error String
"impossible happened!")
        !pa :: MutablePrimArray RealWorld Word8
pa@(MutablePrimArray MutableByteArray# RealWorld
marr#) <- Int -> IO (MArr PrimArray RealWorld Word8)
forall (arr :: * -> *) a (m :: * -> *) s.
(Arr arr a, PrimMonad m, PrimState m ~ s) =>
Int -> m (MArr arr s a)
newArr Int
l'
        ByteArray#
-> Int#
-> Int#
-> MutableByteArray# RealWorld
-> Int#
-> Locale
-> IO ()
utf8_casefold ByteArray#
arr# Int#
s# Int#
l# MutableByteArray# RealWorld
marr# Int#
l'# Locale
locale
        PrimArray Word8
arr' <- MArr PrimArray RealWorld Word8 -> IO (PrimArray Word8)
forall (arr :: * -> *) a (m :: * -> *) s.
(Arr arr a, PrimMonad m, PrimState m ~ s) =>
MArr arr s a -> m (arr a)
unsafeFreezeArr MutablePrimArray RealWorld Word8
MArr PrimArray RealWorld Word8
pa
        let !v :: Bytes
v = IArray PrimVector Word8 -> Int -> Int -> Bytes
forall (v :: * -> *) a. Vec v a => IArray v a -> Int -> Int -> v a
V.fromArr PrimArray Word8
IArray PrimVector Word8
arr' Int
0 Int
l'
        Text -> IO Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Bytes -> Text
Text Bytes
v)

-- | Convert UTF-8 encoded text to lowercase with default locale.
toLower :: Text -> Text
toLower :: Text -> Text
toLower = Locale -> Text -> Text
toLowerWith Locale
localeDefault

{-|
Convert UTF-8 encoded text to lowercase.

This function allows conversion of UTF-8 encoded strings to lowercase
without first changing the encoding to UTF-32. Conversion is fully compliant
with the Unicode 7.0 standard.

Although most code points can be converted to lowercase with changing length,
there are notable exceptions. For example, U+0130 (LATIN CAPITAL LETTER I WITH DOT
ABOVE) maps to "U+0069 U+0307" (LATIN SMALL LETTER I and COMBINING DOT
ABOVE) when converted to lowercase.

Only a handful of scripts make a distinction between upper- and lowercase.
In addition to modern scripts, such as Latin, Greek, Armenian and Cyrillic,
a few historic or archaic scripts have case. The vast majority of scripts do
not have case distinctions.

Case mapping is not reversible. That is, @toUpper(toLower(x)) != toLower(toUpper(x))@.

Certain code points (or combinations of code points) apply rules
based on the locale. For more information about these exceptional
code points, please refer to the Unicode standard:
ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt
-}
toLowerWith :: Locale -> Text -> Text
toLowerWith :: Locale -> Text -> Text
toLowerWith Locale
locale (Text (V.PrimVector (PrimArray ByteArray#
arr#) (I# Int#
s#) l :: Int
l@(I# Int#
l#)))
    | Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Text
empty
    | Bool
otherwise = IO Text -> Text
forall a. IO a -> a
unsafeDupablePerformIO (IO Text -> Text) -> IO Text -> Text
forall a b. (a -> b) -> a -> b
$ do
        let !l' :: Int
l'@(I# Int#
l'#) = ByteArray# -> Int# -> Int# -> Locale -> Int
utf8_tolower_length ByteArray#
arr# Int#
s# Int#
l# Locale
locale
        Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
l' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0) (String -> IO ()
forall a. HasCallStack => String -> a
error String
"impossible happened!")
        !pa :: MutablePrimArray RealWorld Word8
pa@(MutablePrimArray MutableByteArray# RealWorld
marr#) <- Int -> IO (MArr PrimArray RealWorld Word8)
forall (arr :: * -> *) a (m :: * -> *) s.
(Arr arr a, PrimMonad m, PrimState m ~ s) =>
Int -> m (MArr arr s a)
newArr Int
l'
        ByteArray#
-> Int#
-> Int#
-> MutableByteArray# RealWorld
-> Int#
-> Locale
-> IO ()
utf8_tolower ByteArray#
arr# Int#
s# Int#
l# MutableByteArray# RealWorld
marr# Int#
l'# Locale
locale
        PrimArray Word8
arr' <- MArr PrimArray RealWorld Word8 -> IO (PrimArray Word8)
forall (arr :: * -> *) a (m :: * -> *) s.
(Arr arr a, PrimMonad m, PrimState m ~ s) =>
MArr arr s a -> m (arr a)
unsafeFreezeArr MutablePrimArray RealWorld Word8
MArr PrimArray RealWorld Word8
pa
        let !v :: Bytes
v = IArray PrimVector Word8 -> Int -> Int -> Bytes
forall (v :: * -> *) a. Vec v a => IArray v a -> Int -> Int -> v a
V.fromArr PrimArray Word8
IArray PrimVector Word8
arr' Int
0 Int
l'
        Text -> IO Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Bytes -> Text
Text Bytes
v)

-- | Convert UTF-8 encoded text to uppercase with default locale.
toUpper :: Text -> Text
toUpper :: Text -> Text
toUpper = Locale -> Text -> Text
toUpperWith Locale
localeDefault

{-|
Convert UTF-8 encoded text to uppercase.

Conversion is fully compliant with the Unicode 7.0 standard.

Although most code points can be converted without changing length, there are notable
exceptions. For example, U+00DF (LATIN SMALL LETTER SHARP S) maps to
"U+0053 U+0053" (LATIN CAPITAL LETTER S and LATIN CAPITAL LETTER S) when
converted to uppercase.

Only a handful of scripts make a distinction between upper and lowercase.
In addition to modern scripts, such as Latin, Greek, Armenian and Cyrillic,
a few historic or archaic scripts have case. The vast majority of scripts
do not have case distinctions.

Case mapping is not reversible. That is, @toUpper(toLower(x)) != toLower(toUpper(x))@.

Certain code points (or combinations of code points) apply rules
based on the locale. For more information about these exceptional
code points, please refer to the Unicode standard:
ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt
-}
toUpperWith :: Locale -> Text -> Text
toUpperWith :: Locale -> Text -> Text
toUpperWith Locale
locale (Text (V.PrimVector (PrimArray ByteArray#
arr#) (I# Int#
s#) l :: Int
l@(I# Int#
l#)))
    | Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Text
empty
    | Bool
otherwise = IO Text -> Text
forall a. IO a -> a
unsafeDupablePerformIO (IO Text -> Text) -> IO Text -> Text
forall a b. (a -> b) -> a -> b
$ do
        let !l' :: Int
l'@(I# Int#
l'#) = ByteArray# -> Int# -> Int# -> Locale -> Int
utf8_toupper_length ByteArray#
arr# Int#
s# Int#
l# Locale
locale
        Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
l' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0) (String -> IO ()
forall a. HasCallStack => String -> a
error String
"impossible happened!")
        !pa :: MutablePrimArray RealWorld Word8
pa@(MutablePrimArray MutableByteArray# RealWorld
marr#) <- Int -> IO (MArr PrimArray RealWorld Word8)
forall (arr :: * -> *) a (m :: * -> *) s.
(Arr arr a, PrimMonad m, PrimState m ~ s) =>
Int -> m (MArr arr s a)
newArr Int
l'
        ByteArray#
-> Int#
-> Int#
-> MutableByteArray# RealWorld
-> Int#
-> Locale
-> IO ()
utf8_toupper ByteArray#
arr# Int#
s# Int#
l# MutableByteArray# RealWorld
marr# Int#
l'# Locale
locale
        PrimArray Word8
arr' <- MArr PrimArray RealWorld Word8 -> IO (PrimArray Word8)
forall (arr :: * -> *) a (m :: * -> *) s.
(Arr arr a, PrimMonad m, PrimState m ~ s) =>
MArr arr s a -> m (arr a)
unsafeFreezeArr MutablePrimArray RealWorld Word8
MArr PrimArray RealWorld Word8
pa
        let !v :: Bytes
v = IArray PrimVector Word8 -> Int -> Int -> Bytes
forall (v :: * -> *) a. Vec v a => IArray v a -> Int -> Int -> v a
V.fromArr PrimArray Word8
IArray PrimVector Word8
arr' Int
0 Int
l'
        Text -> IO Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Bytes -> Text
Text Bytes
v)

-- | Convert UTF-8 encoded text to titlecase with default locale.
toTitle :: Text -> Text
toTitle :: Text -> Text
toTitle = Locale -> Text -> Text
toTitleWith Locale
localeDefault

{-|
Convert UTF-8 encoded text to titlecase.

This function allows conversion of UTF-8 encoded strings to titlecase.
Conversion is fully compliant with the Unicode 7.0 standard.

Titlecase requires a bit more explanation than uppercase and lowercase,
because it is not a common text transformation. Titlecase uses uppercase
for the first letter of each word and lowercase for the rest. Words are
defined as "collections of code points with general category Lu, Ll, Lt, Lm
or Lo according to the Unicode database".

Effectively, any type of punctuation can break up a word, even if this is
not grammatically valid. This happens because the titlecasing algorithm
does not and cannot take grammar rules into account.

@
Text                                 | Titlecase
-------------------------------------|-------------------------------------
The running man                      | The Running Man
NATO Alliance                        | Nato Alliance
You're amazing at building libraries | You'Re Amazing At Building Libraries
@

Although most code points can be converted to titlecase without changing length,
there are notable exceptions. For example, U+00DF (LATIN SMALL LETTER SHARP S) maps to
"U+0053 U+0073" (LATIN CAPITAL LETTER S and LATIN SMALL LETTER S) when
converted to titlecase.

Certain code points (or combinations of code points) apply rules
based on the locale. For more information about these exceptional
code points, please refer to the Unicode standard:
ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt
-}

toTitleWith :: Locale -> Text -> Text
toTitleWith :: Locale -> Text -> Text
toTitleWith Locale
locale (Text (V.PrimVector (PrimArray ByteArray#
arr#) (I# Int#
s#) l :: Int
l@(I# Int#
l#)))
    | Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Text
empty
    | Bool
otherwise = IO Text -> Text
forall a. IO a -> a
unsafeDupablePerformIO (IO Text -> Text) -> IO Text -> Text
forall a b. (a -> b) -> a -> b
$ do
        let !l' :: Int
l'@(I# Int#
l'#) = ByteArray# -> Int# -> Int# -> Locale -> Int
utf8_totitle_length ByteArray#
arr# Int#
s# Int#
l# Locale
locale
        Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
l' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0) (String -> IO ()
forall a. HasCallStack => String -> a
error String
"impossible happened!")
        !pa :: MutablePrimArray RealWorld Word8
pa@(MutablePrimArray MutableByteArray# RealWorld
marr#) <- Int -> IO (MArr PrimArray RealWorld Word8)
forall (arr :: * -> *) a (m :: * -> *) s.
(Arr arr a, PrimMonad m, PrimState m ~ s) =>
Int -> m (MArr arr s a)
newArr Int
l'
        ByteArray#
-> Int#
-> Int#
-> MutableByteArray# RealWorld
-> Int#
-> Locale
-> IO ()
utf8_totitle ByteArray#
arr# Int#
s# Int#
l# MutableByteArray# RealWorld
marr# Int#
l'# Locale
locale
        PrimArray Word8
arr' <- MArr PrimArray RealWorld Word8 -> IO (PrimArray Word8)
forall (arr :: * -> *) a (m :: * -> *) s.
(Arr arr a, PrimMonad m, PrimState m ~ s) =>
MArr arr s a -> m (arr a)
unsafeFreezeArr MutablePrimArray RealWorld Word8
MArr PrimArray RealWorld Word8
pa
        let !v :: Bytes
v = IArray PrimVector Word8 -> Int -> Int -> Bytes
forall (v :: * -> *) a. Vec v a => IArray v a -> Int -> Int -> v a
V.fromArr PrimArray Word8
IArray PrimVector Word8
arr' Int
0 Int
l'
        Text -> IO Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Bytes -> Text
Text Bytes
v)

-- functions below will return error if the source ByteArray# is empty
foreign import ccall unsafe utf8_casefold ::
    ByteArray# -> Int# -> Int# -> MutableByteArray# RealWorld -> Int# -> Locale -> IO ()
foreign import ccall unsafe utf8_casefold_length ::
    ByteArray# -> Int# -> Int# -> Locale -> Int

foreign import ccall unsafe utf8_tolower ::
    ByteArray# -> Int# -> Int# -> MutableByteArray# RealWorld -> Int# -> Locale -> IO ()
foreign import ccall unsafe utf8_tolower_length ::
    ByteArray# -> Int# -> Int# -> Locale -> Int

foreign import ccall unsafe utf8_toupper ::
    ByteArray# -> Int# -> Int# -> MutableByteArray# RealWorld -> Int# -> Locale -> IO ()
foreign import ccall unsafe utf8_toupper_length ::
    ByteArray# -> Int# -> Int# -> Locale -> Int

foreign import ccall unsafe utf8_totitle ::
    ByteArray# -> Int# -> Int# -> MutableByteArray# RealWorld -> Int# -> Locale -> IO ()
foreign import ccall unsafe utf8_totitle_length ::
    ByteArray# -> Int# -> Int# -> Locale -> Int

{-|
Check if the input string conforms to the category specified by the
flags.

This function can be used to check if the code points in a string are part
of a category. Valid flags are members of the "list of categories".
The category for a code point is defined as part of the entry in UnicodeData.txt,
the data file for the Unicode code point database.

By default, the function will treat grapheme clusters as a single code
point. This means that the following string:

@
Code point | Canonical combining class | General category      | Name
---------- | ------------------------- | --------------------- | ----------------------
U+0045     | 0                         | Lu (Uppercase letter) | LATIN CAPITAL LETTER E
U+0300     | 230                       | Mn (Non-spacing mark) | COMBINING GRAVE ACCENT
@

Will match with 'categoryLetterUppercase' in its entirety, because
the COMBINING GRAVE ACCENT is treated as part of the grapheme cluster. This
is useful when e.g. creating a text parser, because you do not have to
normalize the text first.

If this is undesired behavior, specify the 'UTF8_CATEGORY_IGNORE_GRAPHEME_CLUSTER' flag.

In order to maintain backwards compatibility with POSIX functions
like `isdigit` and `isspace`, compatibility flags have been provided. Note,
however, that the result is only guaranteed to be correct for code points
in the Basic Latin range, between U+0000 and 0+007F. Combining a
compatibility flag with a regular category flag will result in undefined
behavior.
-}

isCategory :: Category -> Text -> Bool
isCategory :: Category -> Text -> Bool
isCategory Category
c (Text (V.PrimVector (PrimArray ByteArray#
arr#) (I# Int#
s#) l :: Int
l@(I# Int#
l#)))
    | Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Bool
True
    | Bool
otherwise = ByteArray# -> Int# -> Int# -> Category -> Int
utf8_iscategory ByteArray#
arr# Int#
s# Int#
l# Category
c Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
l

{-|
Try to match as many code points with the matching category flags as possible
and return the prefix and suffix.
-}
spanCategory :: Category -> Text -> (Text, Text)
spanCategory :: Category -> Text -> (Text, Text)
spanCategory Category
c (Text (V.PrimVector arr :: PrimArray Word8
arr@(PrimArray ByteArray#
arr#) s :: Int
s@(I# Int#
s#) l :: Int
l@(I# Int#
l#)))
    | Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = (Text
empty, Text
empty)
    | Bool
otherwise =
        let i :: Int
i = ByteArray# -> Int# -> Int# -> Category -> Int
utf8_iscategory ByteArray#
arr# Int#
s# Int#
l# Category
c
        in (Bytes -> Text
Text (PrimArray Word8 -> Int -> Int -> Bytes
forall a. PrimArray a -> Int -> Int -> PrimVector a
V.PrimVector PrimArray Word8
arr Int
s Int
i), Bytes -> Text
Text (PrimArray Word8 -> Int -> Int -> Bytes
forall a. PrimArray a -> Int -> Int -> PrimVector a
V.PrimVector PrimArray Word8
arr (Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
i) (Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
i)))

-- functions below will return error if the source ByteArray# is empty
foreign import ccall utf8_iscategory :: ByteArray# -> Int# -> Int# -> Category -> Int