-- |
-- Module      : Streamly.Internal.Data.Serialize.Type
-- Copyright   : (c) 2023 Composewell Technologies
-- License     : BSD3-3-Clause
-- Maintainer  : streamly@composewell.com
-- Portability : GHC
--

module Streamly.Internal.Data.Serialize.Type
    (
      Serialize(..)
    ) where

--------------------------------------------------------------------------------
-- Imports
--------------------------------------------------------------------------------

import Control.Monad (when)
import Data.List (foldl')
import Data.Proxy (Proxy (..))
import Streamly.Internal.Data.Unbox (Unbox)
import Streamly.Internal.Data.MutByteArray.Type (MutByteArray(..))
import Streamly.Internal.Data.Array.Type (Array(..))
import GHC.Int (Int16(..), Int32(..), Int64(..), Int8(..))
import GHC.Word (Word16(..), Word32(..), Word64(..), Word8(..))
import GHC.Stable (StablePtr(..))

import qualified Streamly.Internal.Data.MutByteArray.Type as MBA
import qualified Streamly.Internal.Data.Unbox as Unbox
import qualified Streamly.Internal.Data.Array.Type as Array
import qualified Streamly.Internal.Data.MutArray as MutArray

import GHC.Exts

--------------------------------------------------------------------------------
-- Developer Note
--------------------------------------------------------------------------------

-- IMPORTANT
-- =========
--
-- Don't ever serialize the absolute offsets in the encoding. Serialize length
-- instead. Absolute offsets are NOT stable.
--
-- They will only work if the start offset of the Array when encoding and
-- decoding is the same. This is almost never the case.

--------------------------------------------------------------------------------
-- Types
--------------------------------------------------------------------------------

-- | The 'Serialize' type class provides operations for serialization and
-- deserialization of general Haskell data types to and from their byte stream
-- representation.
--
-- Unlike 'Unbox', 'Serialize' uses variable length encoding, therefore, it can
-- serialize recursive and variable length data types like lists, or variable
-- length sum types where the length of the value may vary depending on a
-- particular constructor. For variable length data types the length is encoded
-- along with the data.
--
-- The 'deserializeAt' operation reads bytes from the mutable byte array and
-- builds a Haskell data type from these bytes, the number of bytes it reads
-- depends on the type and the encoded value it is reading. 'serializeAt'
-- operation converts a Haskell data type to its binary representation which
-- must consist of as many bytes as added by the @addSizeTo@ operation for that
-- value and then stores these bytes into the mutable byte array. The
-- programmer is expected to use the @addSizeTo@ operation and allocate an
-- array of sufficient length before calling 'serializeAt'.
--
-- IMPORTANT: The serialized data's byte ordering remains the same as the host
-- machine's byte order. Therefore, it can not be deserialized from host
-- machines with a different byte ordering.
--
-- Instances can be derived via Template Haskell, or written manually.
--
-- Here is an example, for deriving an instance of this type class using
-- template Haskell:
--
-- >>> :{
-- data Object = Object
--     { _obj1 :: [Int]
--     , _obj2 :: Int
--     }
-- :}
--
-- @
-- import Streamly.Data.MutByteArray (deriveSerialize)
-- \$(deriveSerialize [d|instance Serialize Object|])
-- @
--
-- See 'Streamly.Data.MutByteArray.deriveSerialize' and
-- 'Streamly.Data.MutByteArray.deriveSerializeWith' for more information on
-- deriving using Template Haskell.
--
-- Here is an example of a manual instance.
--
-- >>> import Streamly.Data.MutByteArray (Serialize(..))
--
-- >>> :{
-- instance Serialize Object where
--     addSizeTo acc obj = addSizeTo (addSizeTo acc (_obj1 obj)) (_obj2 obj)
--     deserializeAt i arr len = do
--          -- Check the array bounds before reading
--         (i1, x0) <- deserializeAt i arr len
--         (i2, x1) <- deserializeAt i1 arr len
--         pure (i2, Object x0 x1)
--     serializeAt i arr (Object x0 x1) = do
--         i1 <- serializeAt i arr x0
--         i2 <- serializeAt i1 arr x1
--         pure i2
-- :}
--
class Serialize a where
    -- XXX Use (a -> Sum Int) instead, remove the Size type

    -- A left fold step to fold a generic structure to its serializable size.
    -- It is of the form @Int -> a -> Int@ because you can have tail-recursive
    -- traversal of the structures.

    -- | @addSizeTo accum value@ returns @accum@ incremented by the size of the
    -- serialized representation of @value@ in bytes. Size cannot be zero. It
    -- should be at least 1 byte.
    addSizeTo :: Int -> a -> Int

    -- We can implement the following functions without returning the `Int`
    -- offset but that may require traversing the Haskell structure again to get
    -- the size. Therefore, this is a performance optimization.

    -- | @deserializeAt byte-offset array arrayLen@ deserializes a value from
    -- the given byte-offset in the array. Returns a tuple consisting of the
    -- next byte-offset and the deserialized value.
    --
    -- The arrayLen passed is the entire length of the input buffer. It is to
    -- be used to check if we would overflow the input buffer when
    -- deserializing.
    --
    -- Throws an exception if the operation would exceed the supplied arrayLen.
    deserializeAt :: Int -> MutByteArray -> Int -> IO (Int, a)

    -- | @serializeAt byte-offset array value@ writes the serialized
    -- representation of the @value@ in the array at the given byte-offset.
    -- Returns the next byte-offset.
    --
    -- This is an unsafe operation, the programmer must ensure that the array
    -- has enough space available to serialize the value as determined by the
    -- @addSizeTo@ operation.
    serializeAt :: Int -> MutByteArray -> a -> IO Int

--------------------------------------------------------------------------------
-- Instances
--------------------------------------------------------------------------------

-- _size is the length from array start to the last accessed byte.
#ifdef DEBUG
{-# INLINE checkBounds #-}
checkBounds :: String -> Int -> MutByteArray -> IO ()
checkBounds _label _size _arr = do
    sz <- MBA.sizeOfMutableByteArray _arr
    if (_size > sz)
    then error
        $ _label
            ++ ": accessing array at offset = "
            ++ show (_size - 1)
            ++ " max valid offset = " ++ show (sz - 1)
    else return ()
#endif

-- Note: Instead of passing around the size parameter, we can use
-- (sizeOfMutableByteArray arr) for checking the array bound, but that turns
-- out to be more expensive.
--
-- Another way to optimize this is to avoid the check for fixed size
-- structures. For fixed size structures we can do a check at the top level and
-- then use checkless deserialization using the Unbox type class. That will
-- require ConstSize and VarSize constructors in size. The programmer can
-- bundle all const size fields in a newtype to make serialization faster. This
-- can speed up the computation of size when serializing and checking size when
-- deserialing.
--
-- For variable size non-recursive structures a separate size validation method
-- could be used to validate the size before deserializing. "validate" can also
-- be used to collpase multiple chunks of arrays coming from network into a
-- single array for deserializing. But that can also be done by framing the
-- serialized value with a size header.
--
{-# INLINE deserializeChecked #-}
deserializeChecked :: forall a. Unbox a => Int -> MutByteArray -> Int -> IO (Int, a)
deserializeChecked :: forall a. Unbox a => Int -> MutByteArray -> Int -> IO (Int, a)
deserializeChecked Int
off MutByteArray
arr Int
sz =
    let next :: Int
next = Int
off forall a. Num a => a -> a -> a
+ forall a. Unbox a => Proxy a -> Int
Unbox.sizeOf (forall {k} (t :: k). Proxy t
Proxy :: Proxy a)
     in do
        -- Keep likely path in the straight branch.
        if (Int
next forall a. Ord a => a -> a -> Bool
<= Int
sz)
        then forall a. Unbox a => Int -> MutByteArray -> IO a
Unbox.peekAt Int
off MutByteArray
arr forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \a
val -> forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int
next, a
val)
        else forall a. HasCallStack => [Char] -> a
error
            forall a b. (a -> b) -> a -> b
$ [Char]
"deserializeAt: accessing array at offset = "
                forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show (Int
next forall a. Num a => a -> a -> a
- Int
1)
                forall a. [a] -> [a] -> [a]
++ [Char]
" max valid offset = " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show (Int
sz forall a. Num a => a -> a -> a
- Int
1)

{-# INLINE serializeUnsafe #-}
serializeUnsafe :: forall a. Unbox a => Int -> MutByteArray -> a -> IO Int
serializeUnsafe :: forall a. Unbox a => Int -> MutByteArray -> a -> IO Int
serializeUnsafe Int
off MutByteArray
arr a
val =
    let next :: Int
next = Int
off forall a. Num a => a -> a -> a
+ forall a. Unbox a => Proxy a -> Int
Unbox.sizeOf (forall {k} (t :: k). Proxy t
Proxy :: Proxy a)
     in do
#ifdef DEBUG
        checkBounds "serializeAt" next arr
#endif
        forall a. Unbox a => Int -> MutByteArray -> a -> IO ()
Unbox.pokeAt Int
off MutByteArray
arr a
val
        forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
next

#define DERIVE_SERIALIZE_FROM_UNBOX(_type) \
instance Serialize _type where \
; {-# INLINE addSizeTo #-} \
;    addSizeTo acc _ = acc +  Unbox.sizeOf (Proxy :: Proxy _type) \
; {-# INLINE deserializeAt #-} \
;    deserializeAt off arr end = deserializeChecked off arr end :: IO (Int, _type) \
; {-# INLINE serializeAt #-} \
;    serializeAt =  \
        serializeUnsafe :: Int -> MutByteArray -> _type -> IO Int

DERIVE_SERIALIZE_FROM_UNBOX(())
DERIVE_SERIALIZE_FROM_UNBOX(Bool)
DERIVE_SERIALIZE_FROM_UNBOX(Char)
DERIVE_SERIALIZE_FROM_UNBOX(Int8)
DERIVE_SERIALIZE_FROM_UNBOX(Int16)
DERIVE_SERIALIZE_FROM_UNBOX(Int32)
DERIVE_SERIALIZE_FROM_UNBOX(Int)
DERIVE_SERIALIZE_FROM_UNBOX(Int64)
DERIVE_SERIALIZE_FROM_UNBOX(Word)
DERIVE_SERIALIZE_FROM_UNBOX(Word8)
DERIVE_SERIALIZE_FROM_UNBOX(Word16)
DERIVE_SERIALIZE_FROM_UNBOX(Word32)
DERIVE_SERIALIZE_FROM_UNBOX(Word64)
DERIVE_SERIALIZE_FROM_UNBOX(Double)
DERIVE_SERIALIZE_FROM_UNBOX(Float)
DERIVE_SERIALIZE_FROM_UNBOX((StablePtr a))
DERIVE_SERIALIZE_FROM_UNBOX((Ptr a))
DERIVE_SERIALIZE_FROM_UNBOX((FunPtr a))

instance forall a. Serialize a => Serialize [a] where

    -- {-# INLINE addSizeTo #-}
    addSizeTo :: Int -> [a] -> Int
addSizeTo Int
acc [a]
xs =
        forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' forall a. Serialize a => Int -> a -> Int
addSizeTo (Int
acc forall a. Num a => a -> a -> a
+ (forall a. Unbox a => Proxy a -> Int
Unbox.sizeOf (forall {k} (t :: k). Proxy t
Proxy :: Proxy Int))) [a]
xs

    -- Inlining this causes large compilation times for tests
    {-# INLINABLE deserializeAt #-}
    deserializeAt :: Int -> MutByteArray -> Int -> IO (Int, [a])
deserializeAt Int
off MutByteArray
arr Int
sz = do
        (Int
off1, Int64
len64) <- forall a. Serialize a => Int -> MutByteArray -> Int -> IO (Int, a)
deserializeAt Int
off MutByteArray
arr Int
sz :: IO (Int, Int64)
        let len :: Int
len = (forall a b. (Integral a, Num b) => a -> b
fromIntegral :: Int64 -> Int) Int64
len64
            peekList :: ([a] -> b) -> Int -> t -> IO (Int, b)
peekList [a] -> b
f Int
o t
i | t
i forall a. Ord a => a -> a -> Bool
>= t
3 = do
              -- Unfold the loop three times
              (Int
o1, a
x1) <- forall a. Serialize a => Int -> MutByteArray -> Int -> IO (Int, a)
deserializeAt Int
o MutByteArray
arr Int
sz
              (Int
o2, a
x2) <- forall a. Serialize a => Int -> MutByteArray -> Int -> IO (Int, a)
deserializeAt Int
o1 MutByteArray
arr Int
sz
              (Int
o3, a
x3) <- forall a. Serialize a => Int -> MutByteArray -> Int -> IO (Int, a)
deserializeAt Int
o2 MutByteArray
arr Int
sz
              ([a] -> b) -> Int -> t -> IO (Int, b)
peekList ([a] -> b
f forall b c a. (b -> c) -> (a -> b) -> a -> c
. (\[a]
xs -> a
x1forall a. a -> [a] -> [a]
:a
x2forall a. a -> [a] -> [a]
:a
x3forall a. a -> [a] -> [a]
:[a]
xs)) Int
o3 (t
i forall a. Num a => a -> a -> a
- t
3)
            peekList [a] -> b
f Int
o t
0 = forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int
o, [a] -> b
f [])
            peekList [a] -> b
f Int
o t
i = do
              (Int
o1, a
x) <- forall a. Serialize a => Int -> MutByteArray -> Int -> IO (Int, a)
deserializeAt Int
o MutByteArray
arr Int
sz
              ([a] -> b) -> Int -> t -> IO (Int, b)
peekList ([a] -> b
f forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a
xforall a. a -> [a] -> [a]
:)) Int
o1 (t
i forall a. Num a => a -> a -> a
- t
1)
        forall {t} {a} {b}.
(Ord t, Num t, Serialize a) =>
([a] -> b) -> Int -> t -> IO (Int, b)
peekList forall a. a -> a
id Int
off1 Int
len

    -- Inlining this causes large compilation times for tests
    {-# INLINABLE serializeAt #-}
    serializeAt :: Int -> MutByteArray -> [a] -> IO Int
serializeAt Int
off MutByteArray
arr [a]
val = do
        let off1 :: Int
off1 = Int
off forall a. Num a => a -> a -> a
+ forall a. Unbox a => Proxy a -> Int
Unbox.sizeOf (forall {k} (t :: k). Proxy t
Proxy :: Proxy Int64)
        let pokeList :: Int64 -> Int -> [a] -> IO Int
pokeList Int64
acc Int
o [] =
              forall a. Unbox a => Int -> MutByteArray -> a -> IO ()
Unbox.pokeAt Int
off MutByteArray
arr (Int64
acc :: Int64) forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
o
            pokeList Int64
acc Int
o (a
x:[a]
xs) = do
              Int
o1 <- forall a. Serialize a => Int -> MutByteArray -> a -> IO Int
serializeAt Int
o MutByteArray
arr a
x
              Int64 -> Int -> [a] -> IO Int
pokeList (Int64
acc forall a. Num a => a -> a -> a
+ Int64
1) Int
o1 [a]
xs
        forall {a}. Serialize a => Int64 -> Int -> [a] -> IO Int
pokeList Int64
0 Int
off1 [a]
val

instance
#ifdef DEVBUILD
    Unbox a =>
#endif
  Serialize (Array a) where
    {-# INLINE addSizeTo #-}
    addSizeTo :: Int -> Array a -> Int
addSizeTo Int
i (Array {Int
MutByteArray
arrEnd :: forall a. Array a -> Int
arrStart :: forall a. Array a -> Int
arrContents :: forall a. Array a -> MutByteArray
arrEnd :: Int
arrStart :: Int
arrContents :: MutByteArray
..}) = Int
i forall a. Num a => a -> a -> a
+ (Int
arrEnd forall a. Num a => a -> a -> a
- Int
arrStart) forall a. Num a => a -> a -> a
+ Int
8

    {-# INLINE deserializeAt #-}
    deserializeAt :: Int -> MutByteArray -> Int -> IO (Int, Array a)
deserializeAt Int
off MutByteArray
arr Int
len = do
        (Int
off1, Int
byteLen) <- forall a. Serialize a => Int -> MutByteArray -> Int -> IO (Int, a)
deserializeAt Int
off MutByteArray
arr Int
len :: IO (Int, Int)
        let off2 :: Int
off2 = Int
off1 forall a. Num a => a -> a -> a
+ Int
byteLen
        forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
off2 forall a. Ord a => a -> a -> Bool
> Int
len) forall a b. (a -> b) -> a -> b
$
            forall a. HasCallStack => [Char] -> a
error
                forall a b. (a -> b) -> a -> b
$ [Char]
"deserializeAt: accessing array at offset = "
                    forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show (Int
off2 forall a. Num a => a -> a -> a
- Int
1)
                    forall a. [a] -> [a] -> [a]
++ [Char]
" max valid offset = " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show (Int
len forall a. Num a => a -> a -> a
- Int
1)
        -- XXX Use MutByteArray.cloneSliceUnsafe
        let slice :: MutArray a
slice = forall a. MutByteArray -> Int -> Int -> Int -> MutArray a
MutArray.MutArray MutByteArray
arr Int
off1 Int
off2 Int
off2
        MutArray a
newArr <- forall (m :: * -> *) a. MonadIO m => MutArray a -> m (MutArray a)
MutArray.clone forall {a}. MutArray a
slice
        forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int
off2, forall a. MutArray a -> Array a
Array.unsafeFreeze MutArray a
newArr)

    {-# INLINE serializeAt #-}
    serializeAt :: Int -> MutByteArray -> Array a -> IO Int
serializeAt Int
off MutByteArray
arr (Array {Int
MutByteArray
arrEnd :: Int
arrStart :: Int
arrContents :: MutByteArray
arrEnd :: forall a. Array a -> Int
arrStart :: forall a. Array a -> Int
arrContents :: forall a. Array a -> MutByteArray
..}) = do
        let arrLen :: Int
arrLen = Int
arrEnd forall a. Num a => a -> a -> a
- Int
arrStart
        Int
off1 <- forall a. Serialize a => Int -> MutByteArray -> a -> IO Int
serializeAt Int
off MutByteArray
arr Int
arrLen
        forall (m :: * -> *).
MonadIO m =>
MutByteArray -> Int -> MutByteArray -> Int -> Int -> m ()
MBA.putSliceUnsafe MutByteArray
arrContents Int
arrStart MutByteArray
arr Int
off1 Int
arrLen
        forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int
off1 forall a. Num a => a -> a -> a
+ Int
arrLen)

instance (Serialize a, Serialize b) => Serialize (a, b) where

    {-# INLINE addSizeTo #-}
    addSizeTo :: Int -> (a, b) -> Int
addSizeTo Int
acc (a
a, b
b) = forall a. Serialize a => Int -> a -> Int
addSizeTo (forall a. Serialize a => Int -> a -> Int
addSizeTo Int
acc a
a) b
b

    {-# INLINE serializeAt #-}
    serializeAt :: Int -> MutByteArray -> (a, b) -> IO Int
serializeAt Int
off MutByteArray
arr (a
a, b
b) = do
        Int
off1 <- forall a. Serialize a => Int -> MutByteArray -> a -> IO Int
serializeAt Int
off MutByteArray
arr a
a
        forall a. Serialize a => Int -> MutByteArray -> a -> IO Int
serializeAt Int
off1 MutByteArray
arr b
b

    {-# INLINE deserializeAt #-}
    deserializeAt :: Int -> MutByteArray -> Int -> IO (Int, (a, b))
deserializeAt Int
off MutByteArray
arr Int
end = do
        (Int
off1, a
a) <- forall a. Serialize a => Int -> MutByteArray -> Int -> IO (Int, a)
deserializeAt Int
off MutByteArray
arr Int
end
        (Int
off2, b
b) <- forall a. Serialize a => Int -> MutByteArray -> Int -> IO (Int, a)
deserializeAt Int
off1 MutByteArray
arr Int
end
        forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int
off2, (a
a, b
b))