{-# LANGUAGE BangPatterns, CPP, MagicHash, RankNTypes, RecordWildCards,
    UnboxedTuples #-}

-- |
-- Module      :  Data.Attoparsec.Text.Buffer
-- Copyright   :  Bryan O'Sullivan 2007-2015
-- License     :  BSD3
--
-- Maintainer  :  bos@serpentine.com
-- Stability   :  experimental
-- Portability :  GHC
--
-- An immutable buffer that supports cheap appends.

-- A Buffer is divided into an immutable read-only zone, followed by a
-- mutable area that we've preallocated, but not yet written to.
--
-- We overallocate at the end of a Buffer so that we can cheaply
-- append.  Since a user of an existing Buffer cannot see past the end
-- of its immutable zone into the data that will change during an
-- append, this is safe.
--
-- Once we run out of space at the end of a Buffer, we do the usual
-- doubling of the buffer size.

module Data.Attoparsec.Text.Buffer
    (
      Buffer
    , buffer
    , unbuffer
    , unbufferAt
    , length
    , pappend
    , iter
    , iter_
    , substring
    , dropWord16
    ) where

import Control.Exception (assert)
import Data.Bits (shiftR)
import Data.List (foldl1')
import Data.Monoid as Mon (Monoid(..))
import Data.Semigroup (Semigroup(..))
import Data.Text ()
import Data.Text.Internal (Text(..))
import Data.Text.Internal.Encoding.Utf16 (chr2)
import Data.Text.Internal.Unsafe.Char (unsafeChr)
import Data.Text.Unsafe (Iter(..))
import Foreign.Storable (sizeOf)
import GHC.Exts (Int(..), indexIntArray#, unsafeCoerce#, writeIntArray#)
import GHC.ST (ST(..), runST)
import Prelude hiding (length)
import qualified Data.Text.Array as A

-- If _cap is zero, this buffer is empty.
data Buffer = Buf {
      Buffer -> Array
_arr :: {-# UNPACK #-} !A.Array
    , Buffer -> Int
_off :: {-# UNPACK #-} !Int
    , Buffer -> Int
_len :: {-# UNPACK #-} !Int
    , Buffer -> Int
_cap :: {-# UNPACK #-} !Int
    , Buffer -> Int
_gen :: {-# UNPACK #-} !Int
    }

instance Show Buffer where
    showsPrec :: Int -> Buffer -> ShowS
showsPrec Int
p = Int -> Text -> ShowS
forall a. Show a => Int -> a -> ShowS
showsPrec Int
p (Text -> ShowS) -> (Buffer -> Text) -> Buffer -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Buffer -> Text
unbuffer

-- | The initial 'Buffer' has no mutable zone, so we can avoid all
-- copies in the (hopefully) common case of no further input being fed
-- to us.
buffer :: Text -> Buffer
buffer :: Text -> Buffer
buffer (Text Array
arr Int
off Int
len) = Array -> Int -> Int -> Int -> Int -> Buffer
Buf Array
arr Int
off Int
len Int
len Int
0

unbuffer :: Buffer -> Text
unbuffer :: Buffer -> Text
unbuffer (Buf Array
arr Int
off Int
len Int
_ Int
_) = Array -> Int -> Int -> Text
Text Array
arr Int
off Int
len

unbufferAt :: Int -> Buffer -> Text
unbufferAt :: Int -> Buffer -> Text
unbufferAt Int
s (Buf Array
arr Int
off Int
len Int
_ Int
_) =
  Bool -> Text -> Text
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
s Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
s Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
len) (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$
  Array -> Int -> Int -> Text
Text Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
s) (Int
lenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
s)

instance Semigroup Buffer where
    (Buf Array
_ Int
_ Int
_ Int
0 Int
_) <> :: Buffer -> Buffer -> Buffer
<> Buffer
b                     = Buffer
b
    Buffer
a               <> (Buf Array
_ Int
_ Int
_ Int
0 Int
_)       = Buffer
a
    Buffer
buf             <> (Buf Array
arr Int
off Int
len Int
_ Int
_) = Buffer -> Array -> Int -> Int -> Buffer
append Buffer
buf Array
arr Int
off Int
len
    {-# INLINE (<>) #-}

instance Monoid Buffer where
    mempty :: Buffer
mempty = Array -> Int -> Int -> Int -> Int -> Buffer
Buf Array
A.empty Int
0 Int
0 Int
0 Int
0
    {-# INLINE mempty #-}

    mappend :: Buffer -> Buffer -> Buffer
mappend = Buffer -> Buffer -> Buffer
forall a. Semigroup a => a -> a -> a
(<>)

    mconcat :: [Buffer] -> Buffer
mconcat [] = Buffer
forall a. Monoid a => a
Mon.mempty
    mconcat [Buffer]
xs = (Buffer -> Buffer -> Buffer) -> [Buffer] -> Buffer
forall a. (a -> a -> a) -> [a] -> a
foldl1' Buffer -> Buffer -> Buffer
forall a. Semigroup a => a -> a -> a
(<>) [Buffer]
xs

pappend :: Buffer -> Text -> Buffer
pappend :: Buffer -> Text -> Buffer
pappend (Buf Array
_ Int
_ Int
_ Int
0 Int
_) Text
t      = Text -> Buffer
buffer Text
t
pappend Buffer
buf (Text Array
arr Int
off Int
len) = Buffer -> Array -> Int -> Int -> Buffer
append Buffer
buf Array
arr Int
off Int
len

append :: Buffer -> A.Array -> Int -> Int -> Buffer
append :: Buffer -> Array -> Int -> Int -> Buffer
append (Buf Array
arr0 Int
off0 Int
len0 Int
cap0 Int
gen0) !Array
arr1 !Int
off1 !Int
len1 = (forall s. ST s Buffer) -> Buffer
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s Buffer) -> Buffer)
-> (forall s. ST s Buffer) -> Buffer
forall a b. (a -> b) -> a -> b
$ do
  let woff :: Int
woff    = Int -> Int
forall a. Storable a => a -> Int
sizeOf (Int
0::Int) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
1
      newlen :: Int
newlen  = Int
len0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len1
      !gen :: Int
gen    = if Int
gen0 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 then Int
0 else Array -> Int
readGen Array
arr0
  if Int
gen Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
gen0 Bool -> Bool -> Bool
&& Int
newlen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
cap0
    then do
      let newgen :: Int
newgen = Int
gen Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
      MArray s
marr <- Array -> ST s (MArray s)
forall s. Array -> ST s (MArray s)
unsafeThaw Array
arr0
      MArray s -> Int -> ST s ()
forall s. MArray s -> Int -> ST s ()
writeGen MArray s
marr Int
newgen
      MArray s -> Int -> Array -> Int -> Int -> ST s ()
forall s. MArray s -> Int -> Array -> Int -> Int -> ST s ()
A.copyI MArray s
marr (Int
off0Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
len0) Array
arr1 Int
off1 (Int
off0Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
newlen)
      Array
arr2 <- MArray s -> ST s Array
forall s. MArray s -> ST s Array
A.unsafeFreeze MArray s
marr
      Buffer -> ST s Buffer
forall (m :: * -> *) a. Monad m => a -> m a
return (Array -> Int -> Int -> Int -> Int -> Buffer
Buf Array
arr2 Int
off0 Int
newlen Int
cap0 Int
newgen)
    else do
      let newcap :: Int
newcap = Int
newlen Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
2
          newgen :: Int
newgen = Int
1
      MArray s
marr <- Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
A.new (Int
newcap Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
woff)
      MArray s -> Int -> ST s ()
forall s. MArray s -> Int -> ST s ()
writeGen MArray s
marr Int
newgen
      MArray s -> Int -> Array -> Int -> Int -> ST s ()
forall s. MArray s -> Int -> Array -> Int -> Int -> ST s ()
A.copyI MArray s
marr Int
woff Array
arr0 Int
off0 (Int
woffInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
len0)
      MArray s -> Int -> Array -> Int -> Int -> ST s ()
forall s. MArray s -> Int -> Array -> Int -> Int -> ST s ()
A.copyI MArray s
marr (Int
woffInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
len0) Array
arr1 Int
off1 (Int
woffInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
newlen)
      Array
arr2 <- MArray s -> ST s Array
forall s. MArray s -> ST s Array
A.unsafeFreeze MArray s
marr
      Buffer -> ST s Buffer
forall (m :: * -> *) a. Monad m => a -> m a
return (Array -> Int -> Int -> Int -> Int -> Buffer
Buf Array
arr2 Int
woff Int
newlen Int
newcap Int
newgen)

length :: Buffer -> Int
length :: Buffer -> Int
length (Buf Array
_ Int
_ Int
len Int
_ Int
_) = Int
len
{-# INLINE length #-}

substring :: Int -> Int -> Buffer -> Text
substring :: Int -> Int -> Buffer -> Text
substring Int
s Int
l (Buf Array
arr Int
off Int
len Int
_ Int
_) =
  Bool -> Text -> Text
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
s Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
s Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
len) (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
  Bool -> Text -> Text
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
lenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
s) (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$
  Array -> Int -> Int -> Text
Text Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
s) Int
l
{-# INLINE substring #-}

dropWord16 :: Int -> Buffer -> Text
dropWord16 :: Int -> Buffer -> Text
dropWord16 Int
s (Buf Array
arr Int
off Int
len Int
_ Int
_) =
  Bool -> Text -> Text
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
s Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
s Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
len) (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$
  Array -> Int -> Int -> Text
Text Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
s) (Int
lenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
s)
{-# INLINE dropWord16 #-}

-- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-16
-- array, returning the current character and the delta to add to give
-- the next offset to iterate at.
iter :: Buffer -> Int -> Iter
iter :: Buffer -> Int -> Iter
iter (Buf Array
arr Int
off Int
_ Int
_ Int
_) Int
i
    | Word16
m Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
< Word16
0xD800 Bool -> Bool -> Bool
|| Word16
m Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
> Word16
0xDBFF = Char -> Int -> Iter
Iter (Word16 -> Char
unsafeChr Word16
m) Int
1
    | Bool
otherwise                = Char -> Int -> Iter
Iter (Word16 -> Word16 -> Char
chr2 Word16
m Word16
n) Int
2
  where m :: Word16
m = Array -> Int -> Word16
A.unsafeIndex Array
arr Int
j
        n :: Word16
n = Array -> Int -> Word16
A.unsafeIndex Array
arr Int
k
        j :: Int
j = Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i
        k :: Int
k = Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
{-# INLINE iter #-}

-- | /O(1)/ Iterate one step through a UTF-16 array, returning the
-- delta to add to give the next offset to iterate at.
iter_ :: Buffer -> Int -> Int
iter_ :: Buffer -> Int -> Int
iter_ (Buf Array
arr Int
off Int
_ Int
_ Int
_) Int
i | Word16
m Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
< Word16
0xD800 Bool -> Bool -> Bool
|| Word16
m Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
> Word16
0xDBFF = Int
1
                                | Bool
otherwise                = Int
2
  where m :: Word16
m = Array -> Int -> Word16
A.unsafeIndex Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
i)
{-# INLINE iter_ #-}

unsafeThaw :: A.Array -> ST s (A.MArray s)
unsafeThaw :: Array -> ST s (MArray s)
unsafeThaw A.Array{ByteArray#
aBA :: Array -> ByteArray#
aBA :: ByteArray#
..} = STRep s (MArray s) -> ST s (MArray s)
forall s a. STRep s a -> ST s a
ST (STRep s (MArray s) -> ST s (MArray s))
-> STRep s (MArray s) -> ST s (MArray s)
forall a b. (a -> b) -> a -> b
$ \State# s
s# ->
                          (# State# s
s#, MutableByteArray# s -> MArray s
forall s. MutableByteArray# s -> MArray s
A.MArray (ByteArray# -> MutableByteArray# s
unsafeCoerce# ByteArray#
aBA) #)

readGen :: A.Array -> Int
readGen :: Array -> Int
readGen Array
a = case ByteArray# -> Int# -> Int#
indexIntArray# (Array -> ByteArray#
A.aBA Array
a) Int#
0# of Int#
r# -> Int# -> Int
I# Int#
r#

writeGen :: A.MArray s -> Int -> ST s ()
writeGen :: MArray s -> Int -> ST s ()
writeGen MArray s
a (I# Int#
gen#) = STRep s () -> ST s ()
forall s a. STRep s a -> ST s a
ST (STRep s () -> ST s ()) -> STRep s () -> ST s ()
forall a b. (a -> b) -> a -> b
$ \State# s
s0# ->
  case MutableByteArray# s -> Int# -> Int# -> State# s -> State# s
forall d.
MutableByteArray# d -> Int# -> Int# -> State# d -> State# d
writeIntArray# (MArray s -> MutableByteArray# s
forall s. MArray s -> MutableByteArray# s
A.maBA MArray s
a) Int#
0# Int#
gen# State# s
s0# of
    State# s
s1# -> (# State# s
s1#, () #)