{-# LANGUAGE BangPatterns, CPP, MagicHash #-}

-- |
-- Module      : Data.Text.Internal.Fusion
-- Copyright   : (c) Tom Harper 2008-2009,
--               (c) Bryan O'Sullivan 2009-2010,
--               (c) Duncan Coutts 2009
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : GHC
--
-- /Warning/: this is an internal module, and does not have a stable
-- API or name. Functions in this module may not check or enforce
-- preconditions expected by public modules. Use at your own risk!
--
-- Text manipulation functions represented as fusible operations over
-- streams.
module Data.Text.Internal.Fusion
    (
    -- * Types
      Stream(..)
    , Step(..)

    -- * Creation and elimination
    , stream
    , unstream
    , reverseStream

    , length

    -- * Transformations
    , reverse

    -- * Construction
    -- ** Scans
    , reverseScanr

    -- ** Accumulating maps
    , mapAccumL

    -- ** Generation and unfolding
    , unfoldrN

    -- * Indexing
    , index
    , findIndex
    , countChar
    ) where

import Prelude (Bool(..), Char, Maybe(..), Monad(..), Int,
                Num(..), Ord(..), ($),
                otherwise)
import Data.Bits (shiftL, shiftR)
import Data.Text.Internal (Text(..))
import Data.Text.Internal.Private (runText)
import Data.Text.Internal.Unsafe.Char (unsafeChr8, unsafeWrite)
import qualified Data.Text.Array as A
import qualified Data.Text.Internal.Fusion.Common as S
import Data.Text.Internal.Fusion.Types
import Data.Text.Internal.Fusion.Size
import qualified Data.Text.Internal as I
import qualified Data.Text.Internal.Encoding.Utf8 as U8
import GHC.Stack (HasCallStack)

default(Int)

-- | /O(n)/ Convert 'Text' into a 'Stream' 'Char'.
--
-- __Properties__
--
-- @'unstream' . 'stream' = 'Data.Function.id'@
--
-- @'stream' . 'unstream' = 'Data.Function.id' @
stream ::
#if defined(ASSERTS)
  HasCallStack =>
#endif
  Text -> Stream Char
stream :: Text -> Stream Char
stream (Text Array
arr Int
off Int
len) = forall a s. (s -> Step s a) -> s -> Size -> Stream a
Stream Int -> Step Int Char
next Int
off (Int -> Int -> Size
betweenSize (Int
len forall a. Bits a => a -> Int -> a
`shiftR` Int
2) Int
len)
    where
      !end :: Int
end = Int
offforall a. Num a => a -> a -> a
+Int
len
      next :: Int -> Step Int Char
next !Int
i
          | Int
i forall a. Ord a => a -> a -> Bool
>= Int
end  = forall s a. Step s a
Done
          | Bool
otherwise = forall s a. a -> s -> Step s a
Yield Char
chr (Int
i forall a. Num a => a -> a -> a
+ Int
l)
          where
            n0 :: Word8
n0 = Array -> Int -> Word8
A.unsafeIndex Array
arr Int
i
            n1 :: Word8
n1 = Array -> Int -> Word8
A.unsafeIndex Array
arr (Int
i forall a. Num a => a -> a -> a
+ Int
1)
            n2 :: Word8
n2 = Array -> Int -> Word8
A.unsafeIndex Array
arr (Int
i forall a. Num a => a -> a -> a
+ Int
2)
            n3 :: Word8
n3 = Array -> Int -> Word8
A.unsafeIndex Array
arr (Int
i forall a. Num a => a -> a -> a
+ Int
3)

            l :: Int
l  = Word8 -> Int
U8.utf8LengthByLeader Word8
n0
            chr :: Char
chr = case Int
l of
              Int
1 -> Word8 -> Char
unsafeChr8 Word8
n0
              Int
2 -> Word8 -> Word8 -> Char
U8.chr2 Word8
n0 Word8
n1
              Int
3 -> Word8 -> Word8 -> Word8 -> Char
U8.chr3 Word8
n0 Word8
n1 Word8
n2
              Int
_ -> Word8 -> Word8 -> Word8 -> Word8 -> Char
U8.chr4 Word8
n0 Word8
n1 Word8
n2 Word8
n3
{-# INLINE [0] stream #-}

-- | /O(n)/ Converts 'Text' into a 'Stream' 'Char', but iterates
-- backwards through the text.
--
-- __Properties__
--
-- @'unstream' . 'reverseStream' = 'Data.Text.reverse' @
reverseStream :: Text -> Stream Char
reverseStream :: Text -> Stream Char
reverseStream (Text Array
arr Int
off Int
len) = forall a s. (s -> Step s a) -> s -> Size -> Stream a
Stream Int -> Step Int Char
next (Int
offforall a. Num a => a -> a -> a
+Int
lenforall a. Num a => a -> a -> a
-Int
1) (Int -> Int -> Size
betweenSize (Int
len forall a. Bits a => a -> Int -> a
`shiftR` Int
2) Int
len)
    where
      {-# INLINE next #-}
      next :: Int -> Step Int Char
next !Int
i
          | Int
i forall a. Ord a => a -> a -> Bool
< Int
off    = forall s a. Step s a
Done
          | Word8
n0 forall a. Ord a => a -> a -> Bool
<  Word8
0x80 = forall s a. a -> s -> Step s a
Yield (Word8 -> Char
unsafeChr8 Word8
n0)       (Int
i forall a. Num a => a -> a -> a
- Int
1)
          | Word8
n1 forall a. Ord a => a -> a -> Bool
>= Word8
0xC0 = forall s a. a -> s -> Step s a
Yield (Word8 -> Word8 -> Char
U8.chr2 Word8
n1 Word8
n0)       (Int
i forall a. Num a => a -> a -> a
- Int
2)
          | Word8
n2 forall a. Ord a => a -> a -> Bool
>= Word8
0xC0 = forall s a. a -> s -> Step s a
Yield (Word8 -> Word8 -> Word8 -> Char
U8.chr3 Word8
n2 Word8
n1 Word8
n0)    (Int
i forall a. Num a => a -> a -> a
- Int
3)
          | Bool
otherwise  = forall s a. a -> s -> Step s a
Yield (Word8 -> Word8 -> Word8 -> Word8 -> Char
U8.chr4 Word8
n3 Word8
n2 Word8
n1 Word8
n0) (Int
i forall a. Num a => a -> a -> a
- Int
4)
          where
            n0 :: Word8
n0 = Array -> Int -> Word8
A.unsafeIndex Array
arr Int
i
            n1 :: Word8
n1 = Array -> Int -> Word8
A.unsafeIndex Array
arr (Int
i forall a. Num a => a -> a -> a
- Int
1)
            n2 :: Word8
n2 = Array -> Int -> Word8
A.unsafeIndex Array
arr (Int
i forall a. Num a => a -> a -> a
- Int
2)
            n3 :: Word8
n3 = Array -> Int -> Word8
A.unsafeIndex Array
arr (Int
i forall a. Num a => a -> a -> a
- Int
3)
{-# INLINE [0] reverseStream #-}

-- | /O(n)/ Convert 'Stream' 'Char' into a 'Text'.
--
-- __Properties__
--
-- @'unstream' . 'stream' = 'Data.Function.id'@
--
-- @'stream' . 'unstream' = 'Data.Function.id' @
unstream :: Stream Char -> Text
unstream :: Stream Char -> Text
unstream (Stream s -> Step s Char
next0 s
s0 Size
len) = (forall s. (MArray s -> Int -> ST s Text) -> ST s Text) -> Text
runText forall a b. (a -> b) -> a -> b
$ \MArray s -> Int -> ST s Text
done -> do
  -- Before encoding each char we perform a buffer realloc check assuming
  -- worst case encoding size of four 8-bit units for the char. Just add an
  -- extra space to the buffer so that we do not end up reallocating even when
  -- all the chars are encoded as single unit.
  let mlen :: Int
mlen = Int -> Size -> Int
upperBound Int
4 Size
len forall a. Num a => a -> a -> a
+ Int
3
  MArray s
arr0 <- forall s. Int -> ST s (MArray s)
A.new Int
mlen
  let outer :: MArray s -> Int -> s -> Int -> ST s Text
outer !MArray s
arr !Int
maxi = s -> Int -> ST s Text
encode
       where
        -- keep the common case loop as small as possible
        encode :: s -> Int -> ST s Text
encode !s
si !Int
di =
            case s -> Step s Char
next0 s
si of
                Step s Char
Done        -> MArray s -> Int -> ST s Text
done MArray s
arr Int
di
                Skip s
si'    -> s -> Int -> ST s Text
encode s
si' Int
di
                Yield Char
c s
si'
                    -- simply check for the worst case
                    | Int
maxi forall a. Ord a => a -> a -> Bool
< Int
di forall a. Num a => a -> a -> a
+ Int
3 -> s -> Int -> ST s Text
realloc s
si Int
di
                    | Bool
otherwise -> do
                            Int
n <- forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
di Char
c
                            s -> Int -> ST s Text
encode s
si' (Int
di forall a. Num a => a -> a -> a
+ Int
n)

        -- keep uncommon case separate from the common case code
        {-# NOINLINE realloc #-}
        realloc :: s -> Int -> ST s Text
realloc !s
si !Int
di = do
            let newlen :: Int
newlen = (Int
maxi forall a. Num a => a -> a -> a
+ Int
1) forall a. Num a => a -> a -> a
* Int
2
            MArray s
arr' <- forall s. MArray s -> Int -> ST s (MArray s)
A.resizeM MArray s
arr Int
newlen
            MArray s -> Int -> s -> Int -> ST s Text
outer MArray s
arr' (Int
newlen forall a. Num a => a -> a -> a
- Int
1) s
si Int
di

  MArray s -> Int -> s -> Int -> ST s Text
outer MArray s
arr0 (Int
mlen forall a. Num a => a -> a -> a
- Int
1) s
s0 Int
0
{-# INLINE [0] unstream #-}
{-# RULES "STREAM stream/unstream fusion" forall s. stream (unstream s) = s #-}


-- ----------------------------------------------------------------------------
-- * Basic stream functions

-- | /O(n)/ Returns the number of characters in a 'Stream'.
--
-- __Properties__
--
-- @'length' . 'stream' = 'Data.Text.length' @
length :: Stream Char -> Int
length :: Stream Char -> Int
length = forall a. Integral a => Stream Char -> a
S.lengthI
{-# INLINE[0] length #-}

-- | /O(n)/ Reverse the characters of a 'Stream' returning 'Text'.
--
-- __Properties__
--
-- @'reverse' . 'stream' = 'Data.Text.reverse' @
reverse ::
#if defined(ASSERTS)
  HasCallStack =>
#endif
  Stream Char -> Text
reverse :: Stream Char -> Text
reverse (Stream s -> Step s Char
next s
s Size
len0)
    | Size -> Bool
isEmpty Size
len0 = Text
I.empty
    | Bool
otherwise    = Array -> Int -> Int -> Text
I.text Array
arr Int
off' Int
len'
  where
    len0' :: Int
len0' = Int -> Size -> Int
upperBound Int
4 (Size -> Size -> Size
larger Size
len0 Size
4)
    (Array
arr, (Int
off', Int
len')) = forall a. (forall s. ST s (MArray s, a)) -> (Array, a)
A.run2 (forall s. Int -> ST s (MArray s)
A.new Int
len0' forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall {s}.
s
-> Int
-> Int
-> MutableByteArray s
-> ST s (MutableByteArray s, (Int, Int))
loop s
s (Int
len0'forall a. Num a => a -> a -> a
-Int
1) Int
len0')
    loop :: s
-> Int
-> Int
-> MutableByteArray s
-> ST s (MutableByteArray s, (Int, Int))
loop !s
s0 !Int
i !Int
len MutableByteArray s
marr =
        case s -> Step s Char
next s
s0 of
          Step s Char
Done -> forall (m :: * -> *) a. Monad m => a -> m a
return (MutableByteArray s
marr, (Int
j, Int
lenforall a. Num a => a -> a -> a
-Int
j))
              where j :: Int
j = Int
i forall a. Num a => a -> a -> a
+ Int
1
          Skip s
s1    -> s
-> Int
-> Int
-> MutableByteArray s
-> ST s (MutableByteArray s, (Int, Int))
loop s
s1 Int
i Int
len MutableByteArray s
marr
          Yield Char
x s
s1 | Int
i forall a. Ord a => a -> a -> Bool
< Int
least -> {-# SCC "reverse/resize" #-} do
                       let newLen :: Int
newLen = Int
len forall a. Bits a => a -> Int -> a
`shiftL` Int
1
                       MutableByteArray s
marr' <- forall s. Int -> ST s (MArray s)
A.new Int
newLen
                       forall s. MArray s -> Int -> MArray s -> Int -> Int -> ST s ()
A.copyM MutableByteArray s
marr' (Int
newLenforall a. Num a => a -> a -> a
-Int
len) MutableByteArray s
marr Int
0 Int
len
                       Int
_ <- forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MutableByteArray s
marr' (Int
len forall a. Num a => a -> a -> a
+ Int
i forall a. Num a => a -> a -> a
- Int
least) Char
x
                       s
-> Int
-> Int
-> MutableByteArray s
-> ST s (MutableByteArray s, (Int, Int))
loop s
s1 (Int
len forall a. Num a => a -> a -> a
+ Int
i forall a. Num a => a -> a -> a
- Int
least forall a. Num a => a -> a -> a
- Int
1) Int
newLen MutableByteArray s
marr'
                     | Bool
otherwise -> do
                       Int
_ <- forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MutableByteArray s
marr (Int
i forall a. Num a => a -> a -> a
- Int
least) Char
x
                       s
-> Int
-> Int
-> MutableByteArray s
-> ST s (MutableByteArray s, (Int, Int))
loop s
s1 (Int
i forall a. Num a => a -> a -> a
- Int
least forall a. Num a => a -> a -> a
- Int
1) Int
len MutableByteArray s
marr
            where least :: Int
least = Char -> Int
U8.utf8Length Char
x forall a. Num a => a -> a -> a
- Int
1
{-# INLINE [0] reverse #-}

-- | /O(n)/ Perform the equivalent of 'scanr' over a list, only with
-- the input and result reversed.
--
-- __Properties__
--
-- @'reverse' . 'reverseScanr' f c . 'reverseStream' = 'Data.Text.scanr' f c @
reverseScanr :: (Char -> Char -> Char) -> Char -> Stream Char -> Stream Char
reverseScanr :: (Char -> Char -> Char) -> Char -> Stream Char -> Stream Char
reverseScanr Char -> Char -> Char
f Char
z0 (Stream s -> Step s Char
next0 s
s0 Size
len) = forall a s. (s -> Step s a) -> s -> Size -> Stream a
Stream Scan s -> Step (Scan s) Char
next (forall s. Char -> s -> Scan s
Scan1 Char
z0 s
s0) (Size
lenforall a. Num a => a -> a -> a
+Size
1) -- HINT maybe too low
  where
    {-# INLINE next #-}
    next :: Scan s -> Step (Scan s) Char
next (Scan1 Char
z s
s) = forall s a. a -> s -> Step s a
Yield Char
z (forall s. Char -> s -> Scan s
Scan2 Char
z s
s)
    next (Scan2 Char
z s
s) = case s -> Step s Char
next0 s
s of
                         Yield Char
x s
s' -> let !x' :: Char
x' = Char -> Char -> Char
f Char
x Char
z
                                       in forall s a. a -> s -> Step s a
Yield Char
x' (forall s. Char -> s -> Scan s
Scan2 Char
x' s
s')
                         Skip s
s'    -> forall s a. s -> Step s a
Skip (forall s. Char -> s -> Scan s
Scan2 Char
z s
s')
                         Step s Char
Done       -> forall s a. Step s a
Done
{-# INLINE reverseScanr #-}

-- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a stream from a seed
-- value. However, the length of the result is limited by the
-- first argument to 'unfoldrN'. This function is more efficient than
-- 'unfoldr' when the length of the result is known.
--
-- __Properties__
--
-- @'unstream' ('unfoldrN' n f a) = 'Data.Text.unfoldrN' n f a @
unfoldrN :: Int -> (a -> Maybe (Char,a)) -> a -> Stream Char
unfoldrN :: forall a. Int -> (a -> Maybe (Char, a)) -> a -> Stream Char
unfoldrN Int
n = forall a b.
Integral a =>
a -> (b -> Maybe (Char, b)) -> b -> Stream Char
S.unfoldrNI Int
n
{-# INLINE [0] unfoldrN #-}

-------------------------------------------------------------------------------
-- ** Indexing streams

-- | /O(n)/ stream index (subscript) operator, starting from 0.
--
-- __Properties__
--
-- @'index' ('stream' t) n  = 'Data.Text.index' t n @
index :: HasCallStack => Stream Char -> Int -> Char
index :: HasCallStack => Stream Char -> Int -> Char
index = forall a. (HasCallStack, Integral a) => Stream Char -> a -> Char
S.indexI
{-# INLINE [0] index #-}

-- | The 'findIndex' function takes a predicate and a stream and
-- returns the index of the first element in the stream
-- satisfying the predicate.
--
-- __Properties__
--
-- @'findIndex' p . 'stream'  = 'Data.Text.findIndex' p @
findIndex :: (Char -> Bool) -> Stream Char -> Maybe Int
findIndex :: (Char -> Bool) -> Stream Char -> Maybe Int
findIndex = forall a. Integral a => (Char -> Bool) -> Stream Char -> Maybe a
S.findIndexI
{-# INLINE [0] findIndex #-}

-- | /O(n)/ The 'count' function returns the number of times the query
-- element appears in the given stream.
--
-- __Properties__
--
-- @'countChar' c . 'stream'  = 'Data.Text.countChar' c @
countChar :: Char -> Stream Char -> Int
countChar :: Char -> Stream Char -> Int
countChar = forall a. Integral a => Char -> Stream Char -> a
S.countCharI
{-# INLINE [0] countChar #-}

-- | /O(n)/ Like a combination of 'map' and 'foldl''. Applies a
-- function to each element of a 'Text', passing an accumulating
-- parameter from left to right, and returns a final 'Text'.
--
-- __Properties__
--
-- @'mapAccumL' g z0 . 'stream' = 'Data.Text.mapAccumL' g z0@
mapAccumL ::
#if defined(ASSERTS)
  HasCallStack =>
#endif
  (a -> Char -> (a,Char)) -> a -> Stream Char -> (a, Text)
mapAccumL :: forall a. (a -> Char -> (a, Char)) -> a -> Stream Char -> (a, Text)
mapAccumL a -> Char -> (a, Char)
f a
z0 (Stream s -> Step s Char
next0 s
s0 Size
len) = (a
nz, Array -> Int -> Int -> Text
I.text Array
na Int
0 Int
nl)
  where
    (Array
na,(a
nz,Int
nl)) = forall a. (forall s. ST s (MArray s, a)) -> (Array, a)
A.run2 (forall s. Int -> ST s (MArray s)
A.new Int
mlen forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \MArray s
arr -> forall {s}.
MArray s -> Int -> a -> s -> Int -> ST s (MArray s, (a, Int))
outer MArray s
arr Int
mlen a
z0 s
s0 Int
0)
      where mlen :: Int
mlen = Int -> Size -> Int
upperBound Int
4 Size
len
    outer :: MArray s -> Int -> a -> s -> Int -> ST s (MArray s, (a, Int))
outer MArray s
arr Int
top = a -> s -> Int -> ST s (MArray s, (a, Int))
loop
      where
        loop :: a -> s -> Int -> ST s (MArray s, (a, Int))
loop !a
z !s
s !Int
i =
            case s -> Step s Char
next0 s
s of
              Step s Char
Done          -> forall (m :: * -> *) a. Monad m => a -> m a
return (MArray s
arr, (a
z,Int
i))
              Skip s
s'       -> a -> s -> Int -> ST s (MArray s, (a, Int))
loop a
z s
s' Int
i
              Yield Char
x s
s'
                | Int
j forall a. Ord a => a -> a -> Bool
>= Int
top  -> {-# SCC "mapAccumL/resize" #-} do
                               let top' :: Int
top' = (Int
top forall a. Num a => a -> a -> a
+ Int
1) forall a. Bits a => a -> Int -> a
`shiftL` Int
1
                               MArray s
arr' <- forall s. MArray s -> Int -> ST s (MArray s)
A.resizeM MArray s
arr Int
top'
                               MArray s -> Int -> a -> s -> Int -> ST s (MArray s, (a, Int))
outer MArray s
arr' Int
top' a
z s
s Int
i
                | Bool
otherwise -> do Int
d <- forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
i Char
c
                                  a -> s -> Int -> ST s (MArray s, (a, Int))
loop a
z' s
s' (Int
iforall a. Num a => a -> a -> a
+Int
d)
                where (a
z',Char
c) = a -> Char -> (a, Char)
f a
z Char
x
                      j :: Int
j = Int
i forall a. Num a => a -> a -> a
+ Char -> Int
U8.utf8Length Char
c forall a. Num a => a -> a -> a
- Int
1
{-# INLINE [0] mapAccumL #-}