-- |
-- Module     : Codec.Compression.SnappyC.Framed
-- Copyright  : (c) 2024 Finley McIlwaine
-- License    : BSD-3-Clause (see LICENSE)
--
-- Maintainer : Finley McIlwaine <finley@well-typed.com>
--
-- Frame format Snappy compression/decompression.
-- See the framing format description here:
-- <https://github.com/google/snappy/blob/main/framing_format.txt>
--
-- Intended for qualified import:
--
-- > import Codec.Compression.SnappyC.Framed qualified as Snappy

module Codec.Compression.SnappyC.Framed
  ( -- * Compression
    compress

    -- ** Compression with custom parameters
  , EncodeParams(..)
  , FrameSize -- Opaque
  , Threshold(..)
  , compressWithParams
  , defaultFrameSize
  , customFrameSize
  , unFrameSize

    -- * Decompression
  , DecodeFailure(..)
  , decompress
  , decompress'

    -- ** Decompression with custom parameters
  , DecodeParams(..)
  , decompressWithParams
  , decompressWithParams'

    -- * Low-level incremental API
    -- ** Compression
  , Encoder -- Opaque
  , initializeEncoder
  , finalizeEncoder
  , compressStep

    -- ** Decompression
  , Decoder -- Opaque
  , initializeDecoder
  , finalizeDecoder
  , decompressStep
  , decompressStep'
  ) where

import Codec.Compression.SnappyC.Internal.Buffer qualified as Buffer
import Codec.Compression.SnappyC.Internal.FrameFormat
import Codec.Compression.SnappyC.Internal.Util

import Data.ByteString.Internal qualified as Strict (ByteString)
import Data.ByteString.Lazy qualified as Lazy (ByteString)
import Data.ByteString.Lazy qualified as BS.Lazy
import Data.Default
import GHC.Stack

-------------------------------------------------------------------------------
-- Compression
-------------------------------------------------------------------------------

-- | Compress the input using [Snappy](https://github.com/google/snappy/).
--
-- The output stream is in Snappy frame format.
compress :: BS.Lazy.ByteString -> BS.Lazy.ByteString
compress :: ByteString -> ByteString
compress = EncodeParams -> ByteString -> ByteString
compressWithParams EncodeParams
forall a. Default a => a
def

-- | Compress the input using [Snappy](https://github.com/google/snappy/) with
-- the given 'EncodeParams'.
--
-- The output stream is in Snappy frame format.
compressWithParams :: EncodeParams -> Lazy.ByteString -> Lazy.ByteString
compressWithParams :: EncodeParams -> ByteString -> ByteString
compressWithParams EncodeParams
ps =
      [ByteString] -> ByteString
BS.Lazy.fromChunks
    ([ByteString] -> ByteString)
-> (ByteString -> [ByteString]) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString
streamId ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:)
    ([ByteString] -> [ByteString])
-> (ByteString -> [ByteString]) -> ByteString -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Encoder -> [ByteString] -> [ByteString]
go Encoder
initialEncoder
    ([ByteString] -> [ByteString])
-> (ByteString -> [ByteString]) -> ByteString -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
BS.Lazy.toChunks
  where
    streamId :: Strict.ByteString
    initialEncoder :: Encoder
    (ByteString
streamId, Encoder
initialEncoder) = (ByteString, Encoder)
initializeEncoder

    -- Loop invariant: The 'Encoder' has strictly less than 'chunkSize' in it.
    go ::
         Encoder
      -> [Strict.ByteString]
      -> [Strict.ByteString]
    go :: Encoder -> [ByteString] -> [ByteString]
go Encoder
encoder =
        \case
          [] ->
            EncodeParams -> Encoder -> [ByteString]
finalizeEncoder EncodeParams
ps Encoder
encoder
          (ByteString
c:[ByteString]
cs) ->
            -- The encoded chunks are available to the caller before we process
            -- the rest of the chunks.
            case EncodeParams -> Encoder -> ByteString -> ([ByteString], Encoder)
compressStep EncodeParams
ps Encoder
encoder ByteString
c of
              ([ByteString]
encodedChunks, Encoder
encoder') ->
                [ByteString]
encodedChunks [ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++ Encoder -> [ByteString] -> [ByteString]
go Encoder
encoder' [ByteString]
cs

-- | Append the data to the 'Encoder' buffer and do as much compression as
-- possible.
--
-- __Postconditions:__
--
-- * The resulting 'Encoder' will not have more data in its buffer than the
--   'chunkSize' in the 'EncodeParams'.
-- * Each encoded frame will hold /exactly/ one `chunkSize` worth of
--   uncompressed data.
compressStep ::
     EncodeParams
  -> Encoder
  -> Strict.ByteString
  -> ([Strict.ByteString], Encoder)
compressStep :: EncodeParams -> Encoder -> ByteString -> ([ByteString], Encoder)
compressStep EncodeParams
ps (Encoder Buffer
b) ByteString
bs =
    let
      EncodeResult{[ByteString]
Encoder
encodeResultEncoded :: [ByteString]
encodeResultEncoder :: Encoder
encodeResultEncoder :: EncodeResult -> Encoder
encodeResultEncoded :: EncodeResult -> [ByteString]
..} = EncodeParams -> Encoder -> EncodeResult
encodeBuffered EncodeParams
ps (Buffer -> Encoder
Encoder (Buffer -> Encoder) -> Buffer -> Encoder
forall a b. (a -> b) -> a -> b
$ Buffer
b Buffer -> ByteString -> Buffer
`Buffer.append` ByteString
bs)
    in
      ([ByteString]
encodeResultEncoded, Encoder
encodeResultEncoder)


-------------------------------------------------------------------------------
-- Decompression
-------------------------------------------------------------------------------

-- | Decompress the input using [Snappy](https://github.com/google/snappy/).
--
-- The input stream is expected to be in the official Snappy frame format.
--
-- __Note:__ The extra laziness of this function (compared to `decompress'`)
-- comes at the cost of potential exceptions during decompression.
decompress :: HasCallStack => Lazy.ByteString -> Lazy.ByteString
decompress :: HasCallStack => ByteString -> ByteString
decompress = HasCallStack => DecodeParams -> ByteString -> ByteString
DecodeParams -> ByteString -> ByteString
decompressWithParams DecodeParams
forall a. Default a => a
def

-- | Decompress the input using [Snappy](https://github.com/google/snappy/) with
-- the given 'DecodeParams'.
--
-- The input stream is expected to be in the official Snappy frame format.
--
-- __Note:__ The extra laziness of this function (compared to
-- `decompressWithParams'`) comes at the cost of potential exceptions during
-- decompression.
decompressWithParams ::
     HasCallStack
  => DecodeParams
  -> Lazy.ByteString
  -> Lazy.ByteString
decompressWithParams :: HasCallStack => DecodeParams -> ByteString -> ByteString
decompressWithParams DecodeParams
dps =
      [ByteString] -> ByteString
BS.Lazy.fromChunks
    ([ByteString] -> ByteString)
-> (ByteString -> [ByteString]) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Decoder -> [ByteString] -> [ByteString]
go Decoder
initializeDecoder
    ([ByteString] -> [ByteString])
-> (ByteString -> [ByteString]) -> ByteString -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
BS.Lazy.toChunks
  where
    go ::
         Decoder
      -> [Strict.ByteString]
      -> [Strict.ByteString]
    go :: Decoder -> [ByteString] -> [ByteString]
go Decoder
decoder =
        \case
          [] ->
            Either DecodeFailure () -> ()
forall e a. Exception e => Either e a -> a
throwLeft (Decoder -> Either DecodeFailure ()
finalizeDecoder Decoder
decoder) () -> [ByteString] -> [ByteString]
forall a b. a -> b -> b
`seq` []
          (ByteString
c:[ByteString]
cs) ->
            let
              ([ByteString]
decompressed, Decoder
decoder') = HasCallStack =>
DecodeParams -> Decoder -> ByteString -> ([ByteString], Decoder)
DecodeParams -> Decoder -> ByteString -> ([ByteString], Decoder)
decompressStep DecodeParams
dps Decoder
decoder ByteString
c
            in
              [ByteString]
decompressed [ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++ Decoder -> [ByteString] -> [ByteString]
go Decoder
decoder' [ByteString]
cs

-- | Append the data to the 'Decoder' buffer and do as much decompression as
-- possible.
--
-- Throws an exception if any 'DecodeFailure' occurs.
decompressStep ::
     HasCallStack
  => DecodeParams
  -> Decoder
  -> Strict.ByteString
  -> ([Strict.ByteString], Decoder)
decompressStep :: HasCallStack =>
DecodeParams -> Decoder -> ByteString -> ([ByteString], Decoder)
decompressStep DecodeParams
dps d :: Decoder
d@Decoder{Buffer
DecodeState
decoderBuffer :: Buffer
decoderState :: DecodeState
decoderState :: Decoder -> DecodeState
decoderBuffer :: Decoder -> Buffer
..} ByteString
bs =
    let
      DecodeResult{[ByteString]
Decoder
decodeResultDecoded :: [ByteString]
decodeResultDecoder :: Decoder
decodeResultDecoder :: DecodeResult -> Decoder
decodeResultDecoded :: DecodeResult -> [ByteString]
..} =
        Either DecodeFailure DecodeResult -> DecodeResult
forall e a. Exception e => Either e a -> a
throwLeft (Either DecodeFailure DecodeResult -> DecodeResult)
-> Either DecodeFailure DecodeResult -> DecodeResult
forall a b. (a -> b) -> a -> b
$
          DecodeParams -> Decoder -> Either DecodeFailure DecodeResult
decodeBuffered
            DecodeParams
dps
            Decoder
d { decoderBuffer = decoderBuffer `Buffer.append` bs }
    in
      ( [ByteString]
decodeResultDecoded
      , Decoder
decodeResultDecoder
      )

-- | Decompress the input using [Snappy](https://github.com/google/snappy/).
--
-- The input stream is expected to be in the official Snappy frame format.
-- Evaluates to a 'DecodeFailure' if the input stream is ill-formed.
--
-- __WARNING:__ This function is not as lazy as you might hope. To determine
-- whether the result is a 'DecodeFailure', it must load the entire source
-- 'Lazy.ByteString' into memory during decompression. Use either
-- 'decompressWithParams' or the incremental `decompressStep'` instead. If you
-- are truly okay with the extra memory overhead, you may ignore this warning.
{-# DEPRECATED decompress' "Consider using decompress or decompressStep' instead" #-}
decompress' :: Lazy.ByteString -> Either DecodeFailure Lazy.ByteString
decompress' :: ByteString -> Either DecodeFailure ByteString
decompress' = DecodeParams -> ByteString -> Either DecodeFailure ByteString
decompressWithParams' DecodeParams
forall a. Default a => a
def

-- | Decompress the input using [Snappy](https://github.com/google/snappy/) with
-- the given 'DecodeParams'.
--
-- The input stream is expected to be in the official Snappy frame format.
-- Evaluates to a 'DecodeFailure' if the input stream is ill-formed.
--
-- __WARNING:__ This function is not as lazy as you might hope. To determine
-- whether the result is a 'DecodeFailure', it must load the entire source
-- 'Lazy.ByteString' into memory during decompression. Use either
-- 'decompressWithParams' or the incremental `decompressStep'` instead. If you
-- are truly okay with the extra memory overhead, you may ignore this warning.
{-# DEPRECATED decompressWithParams' "Consider using decompressWithParams or decompressStep' instead" #-}
decompressWithParams' ::
     DecodeParams
  -> Lazy.ByteString
  -> Either DecodeFailure Lazy.ByteString
decompressWithParams' :: DecodeParams -> ByteString -> Either DecodeFailure ByteString
decompressWithParams' DecodeParams
dps ByteString
compressed = do
    [ByteString]
decompressedChunks <- Decoder -> [ByteString] -> Either DecodeFailure [ByteString]
go Decoder
initializeDecoder ([ByteString] -> Either DecodeFailure [ByteString])
-> [ByteString] -> Either DecodeFailure [ByteString]
forall a b. (a -> b) -> a -> b
$ ByteString -> [ByteString]
BS.Lazy.toChunks ByteString
compressed
    ByteString -> Either DecodeFailure ByteString
forall a. a -> Either DecodeFailure a
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> Either DecodeFailure ByteString)
-> ByteString -> Either DecodeFailure ByteString
forall a b. (a -> b) -> a -> b
$ [ByteString] -> ByteString
BS.Lazy.fromChunks [ByteString]
decompressedChunks
  where
    go ::
         Decoder
      -> [Strict.ByteString]
      -> Either DecodeFailure [Strict.ByteString]
    go :: Decoder -> [ByteString] -> Either DecodeFailure [ByteString]
go Decoder
decoder =
        \case
          [] -> do
            Decoder -> Either DecodeFailure ()
finalizeDecoder Decoder
decoder
            [ByteString] -> Either DecodeFailure [ByteString]
forall a. a -> Either DecodeFailure a
forall (m :: * -> *) a. Monad m => a -> m a
return []
          (ByteString
c:[ByteString]
cs) -> do
            ([ByteString]
decompressed, Decoder
decompressor') <- DecodeParams
-> Decoder
-> ByteString
-> Either DecodeFailure ([ByteString], Decoder)
decompressStep' DecodeParams
dps Decoder
decoder ByteString
c
            ([ByteString]
decompressed [ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++) ([ByteString] -> [ByteString])
-> Either DecodeFailure [ByteString]
-> Either DecodeFailure [ByteString]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Decoder -> [ByteString] -> Either DecodeFailure [ByteString]
go Decoder
decompressor' [ByteString]
cs

-- | Append the data to the 'Decoder' buffer and do as much decompression as
-- possible.
--
-- __Note:__ This function is not as lazy as 'decompressStep', since it must
-- completely decode the given chunk before providing a result.
decompressStep' ::
     DecodeParams
  -> Decoder
  -> Strict.ByteString
  -> Either DecodeFailure ([Strict.ByteString], Decoder)
decompressStep' :: DecodeParams
-> Decoder
-> ByteString
-> Either DecodeFailure ([ByteString], Decoder)
decompressStep' DecodeParams
dps d :: Decoder
d@Decoder{Buffer
DecodeState
decoderState :: Decoder -> DecodeState
decoderBuffer :: Decoder -> Buffer
decoderBuffer :: Buffer
decoderState :: DecodeState
..} ByteString
bs = do
    DecodeResult{[ByteString]
Decoder
decodeResultDecoder :: DecodeResult -> Decoder
decodeResultDecoded :: DecodeResult -> [ByteString]
decodeResultDecoded :: [ByteString]
decodeResultDecoder :: Decoder
..} <-
      DecodeParams -> Decoder -> Either DecodeFailure DecodeResult
decodeBuffered DecodeParams
dps Decoder
d { decoderBuffer = decoderBuffer `Buffer.append` bs }
    ([ByteString], Decoder)
-> Either DecodeFailure ([ByteString], Decoder)
forall a. a -> Either DecodeFailure a
forall (m :: * -> *) a. Monad m => a -> m a
return ([ByteString]
decodeResultDecoded, Decoder
decodeResultDecoder)