{-# LANGUAGE BangPatterns, CPP, MagicHash, RankNTypes, RecordWildCards,
    UnboxedTuples #-}

-- |
-- Module      :  Data.Attoparsec.Text.Buffer
-- Copyright   :  Bryan O'Sullivan 2007-2015
-- License     :  BSD3
--
-- Maintainer  :  bos@serpentine.com
-- Stability   :  experimental
-- Portability :  GHC
--
-- An immutable buffer that supports cheap appends.

-- A Buffer is divided into an immutable read-only zone, followed by a
-- mutable area that we've preallocated, but not yet written to.
--
-- We overallocate at the end of a Buffer so that we can cheaply
-- append.  Since a user of an existing Buffer cannot see past the end
-- of its immutable zone into the data that will change during an
-- append, this is safe.
--
-- Once we run out of space at the end of a Buffer, we do the usual
-- doubling of the buffer size.

module Data.Attoparsec.Text.Buffer
    (
      Buffer
    , buffer
    , unbuffer
    , unbufferAt
    , length
    , pappend
    , iter
    , iter_
    , substring
    , lengthCodeUnits
    , dropCodeUnits
    ) where

import Control.Exception (assert)
import Data.List (foldl1')
import Data.Monoid as Mon (Monoid(..))
import Data.Semigroup (Semigroup(..))
import Data.Text ()
import Data.Text.Internal (Text(..))
#if MIN_VERSION_text(2,0,0)
import Data.Text.Internal.Encoding.Utf8 (utf8LengthByLeader)
import Data.Text.Unsafe (iterArray, lengthWord8)
#else
import Data.Bits (shiftR)
import Data.Text.Internal.Encoding.Utf16 (chr2)
import Data.Text.Internal.Unsafe.Char (unsafeChr)
import Data.Text.Unsafe (lengthWord16)
#endif
import Data.Text.Unsafe (Iter(..))
import Foreign.Storable (sizeOf)
import GHC.Exts (Int(..), indexIntArray#, unsafeCoerce#, writeIntArray#)
import GHC.ST (ST(..), runST)
import Prelude hiding (length)
import qualified Data.Text.Array as A

-- If _cap is zero, this buffer is empty.
data Buffer = Buf {
      Buffer -> Array
_arr :: {-# UNPACK #-} !A.Array
    , Buffer -> Int
_off :: {-# UNPACK #-} !Int
    , Buffer -> Int
_len :: {-# UNPACK #-} !Int
    , Buffer -> Int
_cap :: {-# UNPACK #-} !Int
    , Buffer -> Int
_gen :: {-# UNPACK #-} !Int
    }

instance Show Buffer where
    showsPrec :: Int -> Buffer -> ShowS
showsPrec Int
p = Int -> Text -> ShowS
forall a. Show a => Int -> a -> ShowS
showsPrec Int
p (Text -> ShowS) -> (Buffer -> Text) -> Buffer -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Buffer -> Text
unbuffer

-- | The initial 'Buffer' has no mutable zone, so we can avoid all
-- copies in the (hopefully) common case of no further input being fed
-- to us.
buffer :: Text -> Buffer
buffer :: Text -> Buffer
buffer (Text Array
arr Int
off Int
len) = Array -> Int -> Int -> Int -> Int -> Buffer
Buf Array
arr Int
off Int
len Int
len Int
0

unbuffer :: Buffer -> Text
unbuffer :: Buffer -> Text
unbuffer (Buf Array
arr Int
off Int
len Int
_ Int
_) = Array -> Int -> Int -> Text
Text Array
arr Int
off Int
len

unbufferAt :: Int -> Buffer -> Text
unbufferAt :: Int -> Buffer -> Text
unbufferAt Int
s (Buf Array
arr Int
off Int
len Int
_ Int
_) =
  Bool -> Text -> Text
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
s Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
s Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
len) (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$
  Array -> Int -> Int -> Text
Text Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
s) (Int
lenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
s)

instance Semigroup Buffer where
    (Buf Array
_ Int
_ Int
_ Int
0 Int
_) <> :: Buffer -> Buffer -> Buffer
<> Buffer
b                     = Buffer
b
    Buffer
a               <> (Buf Array
_ Int
_ Int
_ Int
0 Int
_)       = Buffer
a
    Buffer
buf             <> (Buf Array
arr Int
off Int
len Int
_ Int
_) = Buffer -> Array -> Int -> Int -> Buffer
append Buffer
buf Array
arr Int
off Int
len
    {-# INLINE (<>) #-}

instance Monoid Buffer where
    mempty :: Buffer
mempty = Array -> Int -> Int -> Int -> Int -> Buffer
Buf Array
A.empty Int
0 Int
0 Int
0 Int
0
    {-# INLINE mempty #-}

    mappend :: Buffer -> Buffer -> Buffer
mappend = Buffer -> Buffer -> Buffer
forall a. Semigroup a => a -> a -> a
(<>)

    mconcat :: [Buffer] -> Buffer
mconcat [] = Buffer
forall a. Monoid a => a
Mon.mempty
    mconcat [Buffer]
xs = (Buffer -> Buffer -> Buffer) -> [Buffer] -> Buffer
forall a. (a -> a -> a) -> [a] -> a
foldl1' Buffer -> Buffer -> Buffer
forall a. Semigroup a => a -> a -> a
(<>) [Buffer]
xs

pappend :: Buffer -> Text -> Buffer
pappend :: Buffer -> Text -> Buffer
pappend (Buf Array
_ Int
_ Int
_ Int
0 Int
_) Text
t      = Text -> Buffer
buffer Text
t
pappend Buffer
buf (Text Array
arr Int
off Int
len) = Buffer -> Array -> Int -> Int -> Buffer
append Buffer
buf Array
arr Int
off Int
len

append :: Buffer -> A.Array -> Int -> Int -> Buffer
append :: Buffer -> Array -> Int -> Int -> Buffer
append (Buf Array
arr0 Int
off0 Int
len0 Int
cap0 Int
gen0) !Array
arr1 !Int
off1 !Int
len1 = (forall s. ST s Buffer) -> Buffer
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s Buffer) -> Buffer)
-> (forall s. ST s Buffer) -> Buffer
forall a b. (a -> b) -> a -> b
$ do
#if MIN_VERSION_text(2,0,0)
  let woff    = sizeOf (0::Int)
#else
  let woff :: Int
woff    = Int -> Int
forall a. Storable a => a -> Int
sizeOf (Int
0::Int) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
1
#endif
      newlen :: Int
newlen  = Int
len0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len1
      !gen :: Int
gen    = if Int
gen0 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 then Int
0 else Array -> Int
readGen Array
arr0
  if Int
gen Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
gen0 Bool -> Bool -> Bool
&& Int
newlen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
cap0
    then do
      let newgen :: Int
newgen = Int
gen Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
      MArray s
marr <- Array -> ST s (MArray s)
forall s. Array -> ST s (MArray s)
unsafeThaw Array
arr0
      MArray s -> Int -> ST s ()
forall s. MArray s -> Int -> ST s ()
writeGen MArray s
marr Int
newgen
#if MIN_VERSION_text(2,0,0)
      A.copyI len1 marr (off0+len0) arr1 off1
#else
      MArray s -> Int -> Array -> Int -> Int -> ST s ()
forall s. MArray s -> Int -> Array -> Int -> Int -> ST s ()
A.copyI MArray s
marr (Int
off0Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
len0) Array
arr1 Int
off1 (Int
off0Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
newlen)
#endif
      Array
arr2 <- MArray s -> ST s Array
forall s. MArray s -> ST s Array
A.unsafeFreeze MArray s
marr
      Buffer -> ST s Buffer
forall (m :: * -> *) a. Monad m => a -> m a
return (Array -> Int -> Int -> Int -> Int -> Buffer
Buf Array
arr2 Int
off0 Int
newlen Int
cap0 Int
newgen)
    else do
      let newcap :: Int
newcap = Int
newlen Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
2
          newgen :: Int
newgen = Int
1
      MArray s
marr <- Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
A.new (Int
newcap Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
woff)
      MArray s -> Int -> ST s ()
forall s. MArray s -> Int -> ST s ()
writeGen MArray s
marr Int
newgen
#if MIN_VERSION_text(2,0,0)
      A.copyI len0 marr woff arr0 off0
      A.copyI len1 marr (woff+len0) arr1 off1
#else
      MArray s -> Int -> Array -> Int -> Int -> ST s ()
forall s. MArray s -> Int -> Array -> Int -> Int -> ST s ()
A.copyI MArray s
marr Int
woff Array
arr0 Int
off0 (Int
woffInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
len0)
      MArray s -> Int -> Array -> Int -> Int -> ST s ()
forall s. MArray s -> Int -> Array -> Int -> Int -> ST s ()
A.copyI MArray s
marr (Int
woffInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
len0) Array
arr1 Int
off1 (Int
woffInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
newlen)
#endif
      Array
arr2 <- MArray s -> ST s Array
forall s. MArray s -> ST s Array
A.unsafeFreeze MArray s
marr
      Buffer -> ST s Buffer
forall (m :: * -> *) a. Monad m => a -> m a
return (Array -> Int -> Int -> Int -> Int -> Buffer
Buf Array
arr2 Int
woff Int
newlen Int
newcap Int
newgen)

length :: Buffer -> Int
length :: Buffer -> Int
length (Buf Array
_ Int
_ Int
len Int
_ Int
_) = Int
len
{-# INLINE length #-}

substring :: Int -> Int -> Buffer -> Text
substring :: Int -> Int -> Buffer -> Text
substring Int
s Int
l (Buf Array
arr Int
off Int
len Int
_ Int
_) =
  Bool -> Text -> Text
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
s Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
s Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
len) (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
  Bool -> Text -> Text
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
lenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
s) (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$
  Array -> Int -> Int -> Text
Text Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
s) Int
l
{-# INLINE substring #-}

#if MIN_VERSION_text(2,0,0)

lengthCodeUnits :: Text -> Int
lengthCodeUnits = lengthWord8

dropCodeUnits :: Int -> Buffer -> Text
dropCodeUnits s (Buf arr off len _ _) =
  assert (s >= 0 && s <= len) $
  Text arr (off+s) (len-s)
{-# INLINE dropCodeUnits #-}

-- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-8
-- array, returning the current character and the delta to add to give
-- the next offset to iterate at.
iter :: Buffer -> Int -> Iter
iter (Buf arr off _ _ _) i = iterArray arr (off + i)
{-# INLINE iter #-}

-- | /O(1)/ Iterate one step through a UTF-8 array, returning the
-- delta to add to give the next offset to iterate at.
iter_ :: Buffer -> Int -> Int
iter_ (Buf arr off _ _ _) i = utf8LengthByLeader $ A.unsafeIndex arr (off+i)
{-# INLINE iter_ #-}

unsafeThaw :: A.Array -> ST s (A.MArray s)
unsafeThaw (A.ByteArray a) = ST $ \s# ->
                          (# s#, A.MutableByteArray (unsafeCoerce# a) #)

readGen :: A.Array -> Int
readGen (A.ByteArray a) = case indexIntArray# a 0# of r# -> I# r#

writeGen :: A.MArray s -> Int -> ST s ()
writeGen (A.MutableByteArray a) (I# gen#) = ST $ \s0# ->
  case writeIntArray# a 0# gen# s0# of
    s1# -> (# s1#, () #)

#else

lengthCodeUnits :: Text -> Int
lengthCodeUnits :: Text -> Int
lengthCodeUnits = Text -> Int
lengthWord16

dropCodeUnits :: Int -> Buffer -> Text
dropCodeUnits :: Int -> Buffer -> Text
dropCodeUnits Int
s (Buf Array
arr Int
off Int
len Int
_ Int
_) =
  Bool -> Text -> Text
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
s Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
s Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
len) (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$
  Array -> Int -> Int -> Text
Text Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
s) (Int
lenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
s)
{-# INLINE dropCodeUnits #-}

-- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-16
-- array, returning the current character and the delta to add to give
-- the next offset to iterate at.
iter :: Buffer -> Int -> Iter
iter :: Buffer -> Int -> Iter
iter (Buf Array
arr Int
off Int
_ Int
_ Int
_) Int
i
    | Word16
m Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
< Word16
0xD800 Bool -> Bool -> Bool
|| Word16
m Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
> Word16
0xDBFF = Char -> Int -> Iter
Iter (Word16 -> Char
unsafeChr Word16
m) Int
1
    | Bool
otherwise                = Char -> Int -> Iter
Iter (Word16 -> Word16 -> Char
chr2 Word16
m Word16
n) Int
2
  where m :: Word16
m = Array -> Int -> Word16
A.unsafeIndex Array
arr Int
j
        n :: Word16
n = Array -> Int -> Word16
A.unsafeIndex Array
arr Int
k
        j :: Int
j = Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i
        k :: Int
k = Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
{-# INLINE iter #-}

-- | /O(1)/ Iterate one step through a UTF-16 array, returning the
-- delta to add to give the next offset to iterate at.
iter_ :: Buffer -> Int -> Int
iter_ :: Buffer -> Int -> Int
iter_ (Buf Array
arr Int
off Int
_ Int
_ Int
_) Int
i | Word16
m Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
< Word16
0xD800 Bool -> Bool -> Bool
|| Word16
m Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
> Word16
0xDBFF = Int
1
                                | Bool
otherwise                = Int
2
  where m :: Word16
m = Array -> Int -> Word16
A.unsafeIndex Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
i)
{-# INLINE iter_ #-}

unsafeThaw :: A.Array -> ST s (A.MArray s)
unsafeThaw :: Array -> ST s (MArray s)
unsafeThaw A.Array{ByteArray#
aBA :: Array -> ByteArray#
aBA :: ByteArray#
..} = STRep s (MArray s) -> ST s (MArray s)
forall s a. STRep s a -> ST s a
ST (STRep s (MArray s) -> ST s (MArray s))
-> STRep s (MArray s) -> ST s (MArray s)
forall a b. (a -> b) -> a -> b
$ \State# s
s# ->
                          (# State# s
s#, MutableByteArray# s -> MArray s
forall s. MutableByteArray# s -> MArray s
A.MArray (ByteArray# -> MutableByteArray# s
unsafeCoerce# ByteArray#
aBA) #)

readGen :: A.Array -> Int
readGen :: Array -> Int
readGen Array
a = case ByteArray# -> Int# -> Int#
indexIntArray# (Array -> ByteArray#
A.aBA Array
a) Int#
0# of Int#
r# -> Int# -> Int
I# Int#
r#

writeGen :: A.MArray s -> Int -> ST s ()
writeGen :: MArray s -> Int -> ST s ()
writeGen MArray s
a (I# Int#
gen#) = STRep s () -> ST s ()
forall s a. STRep s a -> ST s a
ST (STRep s () -> ST s ()) -> STRep s () -> ST s ()
forall a b. (a -> b) -> a -> b
$ \State# s
s0# ->
  case MutableByteArray# s -> Int# -> Int# -> State# s -> State# s
forall d.
MutableByteArray# d -> Int# -> Int# -> State# d -> State# d
writeIntArray# (MArray s -> MutableByteArray# s
forall s. MArray s -> MutableByteArray# s
A.maBA MArray s
a) Int#
0# Int#
gen# State# s
s0# of
    State# s
s1# -> (# State# s
s1#, () #)

#endif