{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE PartialTypeSignatures #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE UnliftedFFITypes #-}

{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# OPTIONS_GHC -Wno-partial-type-signatures #-}

-- |
-- Module      : Data.Text.Internal.Transformation
-- Copyright   : (c) 2008, 2009 Tom Harper,
--               (c) 2009, 2010 Bryan O'Sullivan,
--               (c) 2009 Duncan Coutts
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : GHC
--
-- This module holds functions shared between the strict and lazy implementations of @Text@ transformations.

module Data.Text.Internal.Transformation
  ( mapNonEmpty
  , toCaseFoldNonEmpty
  , toLowerNonEmpty
  , toUpperNonEmpty
  , toTitleNonEmpty
  , filter_
  ) where

import Prelude (Char, Bool(..), Int,
                Ord(..),
                Monad(..), pure,
                (+), (-), ($), (&&), (||), (==),
                not, return, otherwise, fromIntegral, (/=), const)
import Data.Bits ((.&.), shiftR, shiftL)
import Data.Char (isLetter, isSpace, ord)
import Control.Monad.ST (ST, runST)
import qualified Data.Text.Array as A
import Data.Text.Internal.Encoding.Utf8 (utf8LengthByLeader, chr2, chr3, chr4)
import Data.Text.Internal.Fusion.CaseMapping (foldMapping, lowerMapping, upperMapping, titleMapping)
import Data.Text.Internal (Text(..), safe)
import Data.Text.Internal.Unsafe.Char (unsafeWrite, unsafeChr8)
import qualified Prelude as P
import Data.Text.Unsafe (Iter(..), iterArray)
import Data.Word (Word8, Word)
import qualified GHC.Exts as Exts
import GHC.Int (Int64(..))

-- | /O(n)/ 'map' @f@ @t@ is the 'Text' obtained by applying @f@ to
-- each element of @t@.
-- Assume that the @Text@ is non-empty. The returned @Text@ is guaranteed to be non-empty.
mapNonEmpty :: (Char -> Char) -> Text -> Text
mapNonEmpty :: (Char -> Char) -> Text -> Text
mapNonEmpty Char -> Char
f = Text -> Text
go
  where
    go :: Text -> Text
go (Text Array
src Int
o Int
l) = (forall s. ST s Text) -> Text
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s Text) -> Text) -> (forall s. ST s Text) -> Text
forall a b. (a -> b) -> a -> b
$ do
      MArray s
marr <- Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
A.new (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
4)
      MArray s -> Int -> Int -> Int -> ST s Text
forall s. MArray s -> Int -> Int -> Int -> ST s Text
outer MArray s
marr (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
4) Int
o Int
0
      where
        outer :: forall s. A.MArray s -> Int -> Int -> Int -> ST s Text
        outer :: forall s. MArray s -> Int -> Int -> Int -> ST s Text
outer !MArray s
dst !Int
dstLen = Int -> Int -> ST s Text
inner
          where
            inner :: Int -> Int -> ST s Text
inner !Int
srcOff !Int
dstOff
              | Int
srcOff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
o = do
                MArray s -> Int -> ST s ()
forall s. MArray s -> Int -> ST s ()
A.shrinkM MArray s
dst Int
dstOff
                Array
arr <- MArray s -> ST s Array
forall s. MArray s -> ST s Array
A.unsafeFreeze MArray s
dst
                Text -> ST s Text
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return (Array -> Int -> Int -> Text
Text Array
arr Int
0 Int
dstOff)
              | Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
4 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
dstLen = do
                let !dstLen' :: Int
dstLen' = Int
dstLen Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
o) Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
srcOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
4
                MArray s
dst' <- MArray s -> Int -> ST s (MArray s)
forall s. MArray s -> Int -> ST s (MArray s)
A.resizeM MArray s
dst Int
dstLen'
                MArray s -> Int -> Int -> Int -> ST s Text
forall s. MArray s -> Int -> Int -> Int -> ST s Text
outer MArray s
dst' Int
dstLen' Int
srcOff Int
dstOff
              | Bool
otherwise = do
                let !(Iter Char
c Int
d) = Array -> Int -> Iter
iterArray Array
src Int
srcOff
                Int
d' <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
dst Int
dstOff (Char -> Char
safe (Char -> Char
f Char
c))
                Int -> Int -> ST s Text
inner (Int
srcOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
d) (Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
d')
{-# INLINE mapNonEmpty #-}

caseConvert :: (Word8 -> Word8) -> (Exts.Char# -> _ {- unboxed Int64 -}) -> Text -> Text
caseConvert :: (Word8 -> Word8) -> (Char# -> Int64#) -> Text -> Text
caseConvert Word8 -> Word8
ascii Char# -> Int64#
remap (Text Array
src Int
o Int
l) = (forall s. ST s Text) -> Text
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s Text) -> Text) -> (forall s. ST s Text) -> Text
forall a b. (a -> b) -> a -> b
$ do
  -- Case conversion a single code point may produce up to 3 code-points,
  -- each up to 4 bytes, so 12 in total.
  MArray s
dst <- Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
A.new (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
12)
  MArray s -> Int -> Int -> Int -> ST s Text
forall s. MArray s -> Int -> Int -> Int -> ST s Text
outer MArray s
dst Int
l Int
o Int
0
  where
    outer :: forall s. A.MArray s -> Int -> Int -> Int -> ST s Text
    outer :: forall s. MArray s -> Int -> Int -> Int -> ST s Text
outer !MArray s
dst !Int
dstLen = Int -> Int -> ST s Text
inner
      where
        inner :: Int -> Int -> ST s Text
inner !Int
srcOff !Int
dstOff
          | Int
srcOff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
o Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l = do
            MArray s -> Int -> ST s ()
forall s. MArray s -> Int -> ST s ()
A.shrinkM MArray s
dst Int
dstOff
            Array
arr <- MArray s -> ST s Array
forall s. MArray s -> ST s Array
A.unsafeFreeze MArray s
dst
            Text -> ST s Text
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return (Array -> Int -> Int -> Text
Text Array
arr Int
0 Int
dstOff)
          | Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
12 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
dstLen = do
            -- Ensure to extend the buffer by at least 12 bytes.
            let !dstLen' :: Int
dstLen' = Int
dstLen Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
12 (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
o Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
srcOff)
            MArray s
dst' <- MArray s -> Int -> ST s (MArray s)
forall s. MArray s -> Int -> ST s (MArray s)
A.resizeM MArray s
dst Int
dstLen'
            MArray s -> Int -> Int -> Int -> ST s Text
forall s. MArray s -> Int -> Int -> Int -> ST s Text
outer MArray s
dst' Int
dstLen' Int
srcOff Int
dstOff
          -- If a character is to remain unchanged, no need to decode Char back into UTF8,
          -- just copy bytes from input.
          | Bool
otherwise = do
            let m0 :: Word8
m0 = Array -> Int -> Word8
A.unsafeIndex Array
src Int
srcOff
                m1 :: Word8
m1 = Array -> Int -> Word8
A.unsafeIndex Array
src (Int
srcOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
                m2 :: Word8
m2 = Array -> Int -> Word8
A.unsafeIndex Array
src (Int
srcOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2)
                m3 :: Word8
m3 = Array -> Int -> Word8
A.unsafeIndex Array
src (Int
srcOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
3)
                !d :: Int
d = Word8 -> Int
utf8LengthByLeader Word8
m0
            case Int
d of
              Int
1 -> do
                MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst Int
dstOff (Word8 -> Word8
ascii Word8
m0)
                Int -> Int -> ST s Text
inner (Int
srcOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
              Int
2 -> do
                let !(Exts.C# Char#
c) = Word8 -> Word8 -> Char
chr2 Word8
m0 Word8
m1
                Int
dstOff' <- case Int64# -> Int64
I64# (Char# -> Int64#
remap Char#
c) of
                  Int64
0 -> do
                    MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst Int
dstOff Word8
m0
                    MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst (Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Word8
m1
                    Int -> ST s Int
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int -> ST s Int) -> Int -> ST s Int
forall a b. (a -> b) -> a -> b
$ Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2
                  Int64
i -> MArray s -> Int64 -> Int -> ST s Int
forall s. MArray s -> Int64 -> Int -> ST s Int
writeMapping MArray s
dst Int64
i Int
dstOff
                Int -> Int -> ST s Text
inner (Int
srcOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) Int
dstOff'
              Int
3 -> do
                let !(Exts.C# Char#
c) = Word8 -> Word8 -> Word8 -> Char
chr3 Word8
m0 Word8
m1 Word8
m2
                Int
dstOff' <- case Int64# -> Int64
I64# (Char# -> Int64#
remap Char#
c) of
                  Int64
0 -> do
                    MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst Int
dstOff Word8
m0
                    MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst (Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Word8
m1
                    MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst (Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) Word8
m2
                    Int -> ST s Int
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int -> ST s Int) -> Int -> ST s Int
forall a b. (a -> b) -> a -> b
$ Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
3
                  Int64
i -> MArray s -> Int64 -> Int -> ST s Int
forall s. MArray s -> Int64 -> Int -> ST s Int
writeMapping MArray s
dst Int64
i Int
dstOff
                Int -> Int -> ST s Text
inner (Int
srcOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
3) Int
dstOff'
              Int
_ -> do
                let !(Exts.C# Char#
c) = Word8 -> Word8 -> Word8 -> Word8 -> Char
chr4 Word8
m0 Word8
m1 Word8
m2 Word8
m3
                Int
dstOff' <- case Int64# -> Int64
I64# (Char# -> Int64#
remap Char#
c) of
                  Int64
0 -> do
                    MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst Int
dstOff Word8
m0
                    MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst (Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Word8
m1
                    MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst (Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) Word8
m2
                    MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst (Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
3) Word8
m3
                    Int -> ST s Int
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int -> ST s Int) -> Int -> ST s Int
forall a b. (a -> b) -> a -> b
$ Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
4
                  Int64
i -> MArray s -> Int64 -> Int -> ST s Int
forall s. MArray s -> Int64 -> Int -> ST s Int
writeMapping MArray s
dst Int64
i Int
dstOff
                Int -> Int -> ST s Text
inner (Int
srcOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
4) Int
dstOff'

{-# INLINE caseConvert #-}

writeMapping :: A.MArray s -> Int64 -> Int -> ST s Int
writeMapping :: forall s. MArray s -> Int64 -> Int -> ST s Int
writeMapping !MArray s
_ Int64
0 !Int
dstOff = Int -> ST s Int
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
dstOff
writeMapping MArray s
dst Int64
i Int
dstOff = do
  let (Char
ch, Int64
j) = Int64 -> (Char, Int64)
chopOffChar Int64
i
  Int
d <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
dst Int
dstOff Char
ch
  MArray s -> Int64 -> Int -> ST s Int
forall s. MArray s -> Int64 -> Int -> ST s Int
writeMapping MArray s
dst Int64
j (Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
d)

chopOffChar :: Int64 -> (Char, Int64)
chopOffChar :: Int64 -> (Char, Int64)
chopOffChar Int64
ab = (Int -> Char
chr Int
a, Int64
ab Int64 -> Int -> Int64
forall a. Bits a => a -> Int -> a
`shiftR` Int
21)
  where
    chr :: Int -> Char
chr (Exts.I# Int#
n) = Char# -> Char
Exts.C# (Int# -> Char#
Exts.chr# Int#
n)
    mask :: Int64
mask = (Int64
1 Int64 -> Int -> Int64
forall a. Bits a => a -> Int -> a
`shiftL` Int
21) Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
- Int64
1
    a :: Int
a = Int64 -> Int
forall a b. (Integral a, Num b) => a -> b
P.fromIntegral (Int64 -> Int) -> Int64 -> Int
forall a b. (a -> b) -> a -> b
$ Int64
ab Int64 -> Int64 -> Int64
forall a. Bits a => a -> a -> a
.&. Int64
mask

-- | /O(n)/ Convert a string to folded case.
-- Assume that the @Text@ is non-empty. The returned @Text@ is guaranteed to be non-empty.
toCaseFoldNonEmpty :: Text -> Text
toCaseFoldNonEmpty :: Text -> Text
toCaseFoldNonEmpty  = \Text
xs -> (Word8 -> Word8) -> (Char# -> Int64#) -> Text -> Text
caseConvert Word8 -> Word8
asciiToLower Char# -> Int64#
foldMapping Text
xs
{-# INLINE toCaseFoldNonEmpty #-}

-- | /O(n)/ Convert a string to lower case, using simple case
-- conversion.
-- Assume that the @Text@ is non-empty. The returned @Text@ is guaranteed to be non-empty.
toLowerNonEmpty :: Text -> Text
toLowerNonEmpty :: Text -> Text
toLowerNonEmpty = \Text
xs -> (Word8 -> Word8) -> (Char# -> Int64#) -> Text -> Text
caseConvert Word8 -> Word8
asciiToLower Char# -> Int64#
lowerMapping Text
xs
{-# INLINE toLowerNonEmpty #-}

-- | /O(n)/ Convert a string to upper case, using simple case
-- conversion.
-- Assume that the @Text@ is non-empty. The returned @Text@ is guaranteed to be non-empty.
toUpperNonEmpty :: Text -> Text
toUpperNonEmpty :: Text -> Text
toUpperNonEmpty = \Text
xs -> (Word8 -> Word8) -> (Char# -> Int64#) -> Text -> Text
caseConvert Word8 -> Word8
asciiToUpper Char# -> Int64#
upperMapping Text
xs
{-# INLINE toUpperNonEmpty #-}

asciiToLower :: Word8 -> Word8
asciiToLower :: Word8 -> Word8
asciiToLower Word8
w = if Word8
w Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Word8
65 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
25 then Word8
w Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
+ Word8
32 else Word8
w

asciiToUpper :: Word8 -> Word8
asciiToUpper :: Word8 -> Word8
asciiToUpper Word8
w = if Word8
w Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Word8
97 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
25 then Word8
w Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Word8
32 else Word8
w

isAsciiLetter :: Word8 -> Bool
isAsciiLetter :: Word8 -> Bool
isAsciiLetter Word8
w = Word8
w Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Word8
65 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
25 Bool -> Bool -> Bool
|| Word8
w Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Word8
97 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
25

isAsciiSpace :: Word8 -> Bool
isAsciiSpace :: Word8 -> Bool
isAsciiSpace Word8
w = Word8
w Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x50 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0 Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
0x80 Bool -> Bool -> Bool
&& (Word8
w Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0x20 Bool -> Bool -> Bool
|| Word8
w Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Word8
0x09 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
5)

-- | /O(n)/ Convert a string to title case, see 'Data.Text.toTitle' for discussion.
-- Assume that the @Text@ is non-empty. The returned @Text@ is guaranteed to be non-empty.
toTitleNonEmpty :: Text -> Text
toTitleNonEmpty :: Text -> Text
toTitleNonEmpty (Text Array
src Int
o Int
l) = (forall s. ST s Text) -> Text
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s Text) -> Text) -> (forall s. ST s Text) -> Text
forall a b. (a -> b) -> a -> b
$ do
  -- Case conversion a single code point may produce up to 3 code-points,
  -- each up to 4 bytes, so 12 in total.
  MArray s
dst <- Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
A.new (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
12)
  MArray s -> Int -> Int -> Int -> Bool -> ST s Text
forall s. MArray s -> Int -> Int -> Int -> Bool -> ST s Text
outer MArray s
dst Int
l Int
o Int
0 Bool
False
  where
    outer :: forall s. A.MArray s -> Int -> Int -> Int -> Bool -> ST s Text
    outer :: forall s. MArray s -> Int -> Int -> Int -> Bool -> ST s Text
outer !MArray s
dst !Int
dstLen = Int -> Int -> Bool -> ST s Text
inner
      where
        inner :: Int -> Int -> Bool -> ST s Text
inner !Int
srcOff !Int
dstOff !Bool
mode
          | Int
srcOff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
o Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l = do
            MArray s -> Int -> ST s ()
forall s. MArray s -> Int -> ST s ()
A.shrinkM MArray s
dst Int
dstOff
            Array
arr <- MArray s -> ST s Array
forall s. MArray s -> ST s Array
A.unsafeFreeze MArray s
dst
            Text -> ST s Text
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return (Array -> Int -> Int -> Text
Text Array
arr Int
0 Int
dstOff)
          | Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
12 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
dstLen = do
            -- Ensure to extend the buffer by at least 12 bytes.
            let !dstLen' :: Int
dstLen' = Int
dstLen Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
12 (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
o Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
srcOff)
            MArray s
dst' <- MArray s -> Int -> ST s (MArray s)
forall s. MArray s -> Int -> ST s (MArray s)
A.resizeM MArray s
dst Int
dstLen'
            MArray s -> Int -> Int -> Int -> Bool -> ST s Text
forall s. MArray s -> Int -> Int -> Int -> Bool -> ST s Text
outer MArray s
dst' Int
dstLen' Int
srcOff Int
dstOff Bool
mode
          -- If a character is to remain unchanged, no need to decode Char back into UTF8,
          -- just copy bytes from input.
          | Bool
otherwise = do
            let m0 :: Word8
m0 = Array -> Int -> Word8
A.unsafeIndex Array
src Int
srcOff
                m1 :: Word8
m1 = Array -> Int -> Word8
A.unsafeIndex Array
src (Int
srcOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
                m2 :: Word8
m2 = Array -> Int -> Word8
A.unsafeIndex Array
src (Int
srcOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2)
                m3 :: Word8
m3 = Array -> Int -> Word8
A.unsafeIndex Array
src (Int
srcOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
3)
                !d :: Int
d = Word8 -> Int
utf8LengthByLeader Word8
m0

            case Int
d of
              Int
1 -> do
                let (Bool
mode', Word8
m0') = Bool -> Word8 -> (Bool, Word8)
asciiAdvance Bool
mode Word8
m0
                MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst Int
dstOff Word8
m0'
                Int -> Int -> Bool -> ST s Text
inner (Int
srcOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Bool
mode'
              Int
2 -> do
                let !(Exts.C# Char#
c) = Word8 -> Word8 -> Char
chr2 Word8
m0 Word8
m1
                    !(# Bool
mode', Int64#
c' #) = (Char# -> Bool) -> Bool -> Char# -> (# Bool, Int64# #)
advance (\Char#
_ -> Word8
m0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0xC2 Bool -> Bool -> Bool
&& Word8
m1 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0xA0) Bool
mode Char#
c
                Int
dstOff' <- case Int64# -> Int64
I64# Int64#
c' of
                  Int64
0 -> do
                    MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst Int
dstOff Word8
m0
                    MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst (Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Word8
m1
                    Int -> ST s Int
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int -> ST s Int) -> Int -> ST s Int
forall a b. (a -> b) -> a -> b
$ Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2
                  Int64
i -> MArray s -> Int64 -> Int -> ST s Int
forall s. MArray s -> Int64 -> Int -> ST s Int
writeMapping MArray s
dst Int64
i Int
dstOff
                Int -> Int -> Bool -> ST s Text
inner (Int
srcOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) Int
dstOff' Bool
mode'
              Int
3 -> do
                let !(Exts.C# Char#
c) = Word8 -> Word8 -> Word8 -> Char
chr3 Word8
m0 Word8
m1 Word8
m2
                    isSpace3 :: Char# -> Bool
isSpace3 Char#
ch
                      =  Word8
m0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0xE1 Bool -> Bool -> Bool
&& Word8
m1 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0x9A Bool -> Bool -> Bool
&& Word8
m2 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0x80
                      Bool -> Bool -> Bool
|| Word8
m0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0xE2 Bool -> Bool -> Bool
&& (Word8
m1 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0x80 Bool -> Bool -> Bool
&& Char -> Bool
isSpace (Char# -> Char
Exts.C# Char#
ch) Bool -> Bool -> Bool
|| Word8
m1 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0x81 Bool -> Bool -> Bool
&& Word8
m2 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0x9F)
                      Bool -> Bool -> Bool
|| Word8
m0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0xE3 Bool -> Bool -> Bool
&& Word8
m1 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0x80 Bool -> Bool -> Bool
&& Word8
m2 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0x80
                    !(# Bool
mode', Int64#
c' #) = (Char# -> Bool) -> Bool -> Char# -> (# Bool, Int64# #)
advance Char# -> Bool
isSpace3 Bool
mode Char#
c
                Int
dstOff' <- case Int64# -> Int64
I64# Int64#
c' of
                  Int64
0 -> do
                    MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst Int
dstOff Word8
m0
                    MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst (Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Word8
m1
                    MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst (Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) Word8
m2
                    Int -> ST s Int
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int -> ST s Int) -> Int -> ST s Int
forall a b. (a -> b) -> a -> b
$ Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
3
                  Int64
i -> MArray s -> Int64 -> Int -> ST s Int
forall s. MArray s -> Int64 -> Int -> ST s Int
writeMapping MArray s
dst Int64
i Int
dstOff
                Int -> Int -> Bool -> ST s Text
inner (Int
srcOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
3) Int
dstOff' Bool
mode'
              Int
_ -> do
                let !(Exts.C# Char#
c) = Word8 -> Word8 -> Word8 -> Word8 -> Char
chr4 Word8
m0 Word8
m1 Word8
m2 Word8
m3
                    !(# Bool
mode', Int64#
c' #) = (Char# -> Bool) -> Bool -> Char# -> (# Bool, Int64# #)
advance (\Char#
_ -> Bool
False) Bool
mode Char#
c
                Int
dstOff' <- case Int64# -> Int64
I64# Int64#
c' of
                  Int64
0 -> do
                    MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst Int
dstOff Word8
m0
                    MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst (Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Word8
m1
                    MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst (Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) Word8
m2
                    MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst (Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
3) Word8
m3
                    Int -> ST s Int
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int -> ST s Int) -> Int -> ST s Int
forall a b. (a -> b) -> a -> b
$ Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
4
                  Int64
i -> MArray s -> Int64 -> Int -> ST s Int
forall s. MArray s -> Int64 -> Int -> ST s Int
writeMapping MArray s
dst Int64
i Int
dstOff
                Int -> Int -> Bool -> ST s Text
inner (Int
srcOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
4) Int
dstOff' Bool
mode'

        asciiAdvance :: Bool -> Word8 -> (Bool, Word8)
        asciiAdvance :: Bool -> Word8 -> (Bool, Word8)
asciiAdvance Bool
False Word8
w = (Word8 -> Bool
isAsciiLetter Word8
w, Word8 -> Word8
asciiToUpper Word8
w)
        asciiAdvance Bool
True Word8
w = (Bool -> Bool
not (Word8 -> Bool
isAsciiSpace Word8
w), Word8 -> Word8
asciiToLower Word8
w)

        advance :: (Exts.Char# -> Bool) -> Bool -> Exts.Char# -> (# Bool, _ {- unboxed Int64 -} #)
        advance :: (Char# -> Bool) -> Bool -> Char# -> (# Bool, Int64# #)
advance Char# -> Bool
_ Bool
False Char#
c = (# Char -> Bool
isLetter (Char# -> Char
Exts.C# Char#
c), Char# -> Int64#
titleMapping Char#
c #)
        advance Char# -> Bool
isSpaceChar Bool
True Char#
c = (# Bool -> Bool
not (Char# -> Bool
isSpaceChar Char#
c), Char# -> Int64#
lowerMapping Char#
c #)
        {-# INLINE advance #-}

-- | /O(n)/ 'filter_', applied to a continuation, a predicate and a @Text@,
-- calls the continuation with the @Text@ containing only the characters satisfying the predicate.
filter_ :: forall a. (A.Array -> Int -> Int -> a) -> (Char -> Bool) -> Text -> a
filter_ :: forall a. (Array -> Int -> Int -> a) -> (Char -> Bool) -> Text -> a
filter_ Array -> Int -> Int -> a
mkText Char -> Bool
p = Text -> a
go
  where
    go :: Text -> a
go (Text Array
src Int
o Int
l) = (forall s. ST s a) -> a
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s a) -> a) -> (forall s. ST s a) -> a
forall a b. (a -> b) -> a -> b
$ do
      -- It's tempting to allocate l elements at once and avoid resizing.
      -- However, this can be unacceptable in scenarios where a huge array
      -- is filtered with a rare predicate, resulting in a much shorter buffer.
      let !dstLen :: Int
dstLen = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
l Int
64
      MArray s
dst <- Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
A.new Int
dstLen
      MArray s -> Int -> Int -> Int -> ST s a
forall s. MArray s -> Int -> Int -> Int -> ST s a
outer MArray s
dst Int
dstLen Int
o Int
0
      where
        outer :: forall s. A.MArray s -> Int -> Int -> Int -> ST s a
        outer :: forall s. MArray s -> Int -> Int -> Int -> ST s a
outer !MArray s
dst !Int
dstLen = Int -> Int -> ST s a
inner
          where
            inner :: Int -> Int -> ST s a
inner !Int
srcOff !Int
dstOff
              | Int
srcOff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
o Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l = do
                MArray s -> Int -> ST s ()
forall s. MArray s -> Int -> ST s ()
A.shrinkM MArray s
dst Int
dstOff
                Array
arr <- MArray s -> ST s Array
forall s. MArray s -> ST s Array
A.unsafeFreeze MArray s
dst
                a -> ST s a
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return (a -> ST s a) -> a -> ST s a
forall a b. (a -> b) -> a -> b
$ Array -> Int -> Int -> a
mkText Array
arr Int
0 Int
dstOff
              | Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
4 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
dstLen = do
                -- Double size of the buffer, unless it becomes longer than
                -- source string. Ensure to extend it by least 4 bytes.
                let !dstLen' :: Int
dstLen' = Int
dstLen Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
4 (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
o Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
srcOff) Int
dstLen)
                MArray s
dst' <- MArray s -> Int -> ST s (MArray s)
forall s. MArray s -> Int -> ST s (MArray s)
A.resizeM MArray s
dst Int
dstLen'
                MArray s -> Int -> Int -> Int -> ST s a
forall s. MArray s -> Int -> Int -> Int -> ST s a
outer MArray s
dst' Int
dstLen' Int
srcOff Int
dstOff
              -- In case of success, filter writes exactly the same character
              -- it just read (this is not a case for map, for example).
              -- We leverage this fact below: no need to decode Char back into UTF8,
              -- just copy bytes from input.
              | Bool
otherwise = do
                let m0 :: Word8
m0 = Array -> Int -> Word8
A.unsafeIndex Array
src Int
srcOff
                    m1 :: Word8
m1 = Array -> Int -> Word8
A.unsafeIndex Array
src (Int
srcOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
                    m2 :: Word8
m2 = Array -> Int -> Word8
A.unsafeIndex Array
src (Int
srcOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2)
                    m3 :: Word8
m3 = Array -> Int -> Word8
A.unsafeIndex Array
src (Int
srcOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
3)
                    !d :: Int
d = Word8 -> Int
utf8LengthByLeader Word8
m0
                case Int
d of
                  Int
1 -> do
                    let !c :: Char
c = Word8 -> Char
unsafeChr8 Word8
m0
                    if Bool -> Bool
not (Char -> Bool
p Char
c) then Int -> Int -> ST s a
inner (Int
srcOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Int
dstOff else do
                      MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst Int
dstOff Word8
m0
                      Int -> Int -> ST s a
inner (Int
srcOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
                  Int
2 -> do
                    let !c :: Char
c = Word8 -> Word8 -> Char
chr2 Word8
m0 Word8
m1
                    if Bool -> Bool
not (Char -> Bool
p Char
c) then Int -> Int -> ST s a
inner (Int
srcOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) Int
dstOff else do
                      MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst Int
dstOff Word8
m0
                      MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst (Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Word8
m1
                      Int -> Int -> ST s a
inner (Int
srcOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) (Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2)
                  Int
3 -> do
                    let !c :: Char
c = Word8 -> Word8 -> Word8 -> Char
chr3 Word8
m0 Word8
m1 Word8
m2
                    if Bool -> Bool
not (Char -> Bool
p Char
c) then Int -> Int -> ST s a
inner (Int
srcOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
3) Int
dstOff else do
                      MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst Int
dstOff Word8
m0
                      MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst (Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Word8
m1
                      MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst (Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) Word8
m2
                      Int -> Int -> ST s a
inner (Int
srcOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
3) (Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
3)
                  Int
_ -> do
                    let !c :: Char
c = Word8 -> Word8 -> Word8 -> Word8 -> Char
chr4 Word8
m0 Word8
m1 Word8
m2 Word8
m3
                    if Bool -> Bool
not (Char -> Bool
p Char
c) then Int -> Int -> ST s a
inner (Int
srcOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
4) Int
dstOff else do
                      MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst Int
dstOff Word8
m0
                      MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst (Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Word8
m1
                      MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst (Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) Word8
m2
                      MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
A.unsafeWrite MArray s
dst (Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
3) Word8
m3
                      Int -> Int -> ST s a
inner (Int
srcOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
4) (Int
dstOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
4)
{-# INLINE filter_ #-}