{-# language DataKinds #-}
{-# language DuplicateRecordFields #-}
{-# language NamedFieldPuns #-}
{-# language TypeApplications #-}

module Kafka.Record.Request
  ( Record(..)
  , Header(..)
  , toChunks
  , toChunksOnto
  ) where

import Data.Bytes (Bytes)
import Data.Bytes.Chunks (Chunks(ChunksCons,ChunksNil))
import Data.Int (Int32,Int64)
import Data.Primitive (SmallArray)
import Data.Text (Text)
import Kafka.Builder (Builder)

import qualified Arithmetic.Nat as Nat
import qualified Data.Bytes as Bytes
import qualified Data.Bytes.Chunks as Chunks
import qualified Data.Bytes.Text.Utf8 as Utf8
import qualified Data.Primitive as PM
import qualified Kafka.Builder as Builder
import qualified Kafka.Builder.Bounded as Bounded

-- | Information about @Record@ from Kafka documentation:
--
-- > length: varint
-- > attributes: int8
-- >     bit 0~7: unused
-- > timestampDelta: varlong
-- > offsetDelta: varint
-- > keyLength: varint
-- > key: byte[]
-- > valueLen: varint
-- > value: byte[]
-- > Headers => [Header]
--
data Record = Record
  { timestampDelta :: !Int64
  , offsetDelta :: !Int32
  , key :: {-# UNPACK #-} !Bytes
    -- ^ Setting the key to the empty byte sequence causes it to be treated
    -- as though it were the null key. Technically, we could do the right
    -- thing by wrapping the Bytes with Maybe, but using the empty string
    -- as the key is a terrible idea anyway.
  , value :: {-# UNPACK #-} !Bytes
    -- ^ In a data-encoding setting, it actually makes more sense for this
    -- to be Chunks, not Bytes. But in a data-decoding setting, Bytes makes
    -- more sense. It might be better to create a separate type for each
    -- setting.
  , headers :: {-# UNPACK #-} !(SmallArray Header)
    -- ^ In a data-encoding setting, it makes more sense for this to be
    -- a chunked (or builder-like) type.
  }

toChunks :: Record -> Chunks
toChunks r = toChunksOnto r ChunksNil

-- | Variant of 'toChunks' that gives the caller control over what chunks
-- come after the encoded record. For example, it is possible to improve
-- the performance of
--
-- > foldMap toChunks records
--
-- by rewriting it as
--
-- > foldr toChunksOnto ChunksNil records
toChunksOnto :: Record -> Chunks -> Chunks
toChunksOnto r c = ChunksCons
  (Bytes.fromByteArray (Bounded.run Nat.constant (Bounded.varIntNative (fromIntegral n))))
  recordChunks
  where
  (n,recordChunks) = Builder.runOntoLength 128 (encodeWithoutLength r) c

encodeWithoutLength :: Record -> Builder
encodeWithoutLength Record{timestampDelta,offsetDelta,key,value,headers} =
  Builder.word8 0x00
  <>
  Builder.varInt64 timestampDelta
  <>
  Builder.varInt32 offsetDelta
  <>
  Builder.varIntNative
    (case Bytes.length key of
      0 -> (-1)
      klen -> klen
    )
  <>
  Builder.copy key
  <>
  Builder.varIntNative (Bytes.length value)
  <>
  Builder.bytes value
  <>
  Builder.varIntNative (PM.sizeofSmallArray headers)
  <>
  foldMap encodeHeader headers

-- | Information about @Header@ from Kafka documentation:
--
-- > headerKeyLength: varint
-- > headerKey: String
-- > headerValueLength: varint
-- > value: byte[]
data Header = Header
  { key :: {-# UNPACK #-} !Text
    -- ^ Header key. For records that we are encoding, text (rather than
    -- some kind of builder) is a reasonable choice since header keys are
    -- typically not assembled from smaller pieces.
  , value :: {-# UNPACK #-} !Bytes
    -- ^ Header value. This is currently Bytes, and I'm torn about whether
    -- or not to change it to a builder or chunks type. On one hand, it
    -- makes a more sense for it to be done that way, but on the
    -- other hand, I do not think that values are particularly likely
    -- to be constructed since they are small.
  }

encodeHeader :: Header -> Builder
encodeHeader Header{key,value} =
  Builder.varIntNative (Bytes.length keyBytes)
  <>
  Builder.copy keyBytes
  <>
  Builder.varIntNative (Bytes.length value)
  <>
  Builder.copy value
  where
  keyBytes = Utf8.fromText key