{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DefaultSignatures #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE PackageImports #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}

-- |
-- Module: Data.Cuckoo
-- Copyright: Copyright © 2019 Lars Kuhtz <lakuhtz@gmail.com>
-- License: BSD3
-- Maintainer: Lars Kuhtz <lakuhtz@gmail.com>
-- Stability: experimental
--
-- Haskell implementation of Cuckoo filters as described in
--
-- <https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf B. Fan, D.G. Anderson, M. Kaminsky, M.D. Mitzenmacher. Cuckoo Filter: Practically Better Than Bloom. In Proc. CoNEXT, 2014.>
--
-- Cuckoo filters are a data structure for probabilistic set membership. They
-- support insertion, deletion, and membership queries for set elements.
--
-- Membership queries may return false positive results. But queries don't
-- return false negative results.
--
-- Unlike Bloom filters, Cuckoo filters maintain an upper bound on the false
-- positive rate that is independent of the load of the filter. However,
-- insertion of new elements in the filter can fail. For typical
-- configurations this probability is very small for load factors smaller than
-- 90 percent.
--
-- = Example
--
-- > {-# LANGUAGE DataKinds #-}
-- > {-# LANGUAGE TypeApplications #-}
-- > {-# LANGUAGE TypeFamilies #-}
-- > {-# OPTIONS_GHC -fno-warn-orphans #-}
-- >
-- > import Control.Monad (filterM)
-- > import Data.Cuckoo
-- > import Data.List ((\\))
-- >
-- > -- Define CuckooFilterHash instance (this uses the default implementation)
-- > instance CuckooFilterHash Int
-- >
-- > main :: IO ()
-- > main = do
-- >     -- Create Filter for a minimum of 500000 entries
-- >     f <- newCuckooFilter @4 @8 @Int 0 500000
-- >
-- >     -- Insert 450000 items
-- >     failed <- filterM (fmap not . insert f) [0..500000-1]
-- >
-- >     -- Query inserted items
-- >     missing <- filterM (fmap not . member f) [0..500000-1]
-- >
-- >     -- Test for false positives
-- >     false <- filterM (member f) [500000..1000000 - 1]
-- >
-- >     -- Report results
-- >     putStrLn $ "failed inserts: " <> show (length failed)
-- >     putStrLn $ "false positives: " <> show (length false)
-- >     putStrLn $ "false positive rate (%): " <> show @Double (fromIntegral (length false) * 100 / 500000)
-- >     putStrLn $ "missing (must be 0): " <> show (length $ missing \\ failed)
-- >
-- >     -- Filter properties
-- >     putStrLn $ "capacity: " <> show (capacityInItems f)
-- >     putStrLn $ "size in allocated bytes: " <> show (sizeInAllocatedBytes f)
-- >
-- >     -- computing the following is a bit slow
-- >     c <- itemCount f
-- >     putStrLn $ "item count: " <> show c
-- >     lf <- loadFactor f
-- >     putStrLn $ "load factor (%): " <> show lf
-- >     putStrLn $ "bits per item: " <> show @Double (fromIntegral (sizeInAllocatedBytes f) * 8 / fromIntegral c)
--
module Data.Cuckoo
(
-- * Hash Functions
  Salt(..)
, CuckooFilterHash(..)

-- ** Hash functions
, sip
, sip_bytes
, fnv1a
, fnv1a_bytes

-- * Cuckoo Filter
, CuckooFilter
, CuckooFilterIO
, newCuckooFilter

-- * Cuckoo Filter Operations
, insert
, member
, delete

-- * Utils
, sizeInAllocatedBytes
, capacityInItems
, itemCount
, loadFactor

-- * Debugging Utils
, showFilter
, itemHashes
) where

import Control.Applicative
import Control.Monad
import Control.Monad.Primitive

import Data.Bits
import Data.Bool
import Data.Kind
import Data.Maybe
import Data.Primitive.ByteArray

import Foreign

import GHC.TypeLits

import Numeric.Natural

import Prelude hiding (null)

import "cuckoo" System.Random.Internal

import Text.Printf

-- internal modules

import Data.Cuckoo.Internal

-- $setup
-- >>> :set -XTypeApplications -XDataKinds -XTypeFamilies
-- >>> instance CuckooFilterHash Int

-- -------------------------------------------------------------------------- --
-- Hash Functions

-- The hashable package is a bit of a kitchen sink. Instances for different data
-- types use hash functions with very different properites and of varying
-- quality. Neither of this is documented.
--
-- Primitive base types, including all number types use fast instances that
-- provided very little uniformity in the output with respect to input data and
-- salt. Don't use these!
--
-- ByteString uses a pure Haskell implementation of Sip hash.
--
-- The helper functions for low-level ptrs (Ptr, ByteArray) use a C
-- implementation of fnv.
--
-- Because of the variying quality and properties of the functions, absence of
-- any control over which function is use, and no guarantees with respect to
-- stability accross versions, we don't use that package altogether.
--

-- | Salt for hash computations.
--
newtype Salt = Salt Int
    deriving (Show, Eq, Ord, Enum, Integral, Real, Num)

-- | Choosing good hash functions is imperative for a good performance of a
-- cuckoo filter. The hash functions must be
--
-- * independent and
-- * provide good uniformity on the lower bits of the output.
--
-- The default implementations use sip hash for 'cuckooHash' and 'fnv1a' (64
-- bit) for 'cuckooFingerprint' and require an instance of 'Storable'.
--
-- >>> instance CuckooFilterHash Int
--
-- The following example uses the hash functions that are provided in this
-- module to define an instance for 'B.ByteString':
--
-- >>> import qualified Data.ByteString as B
-- >>> :{
-- instance CuckooFilterHash B.ByteString where
--     cuckooHash (Salt s) a = fnv1a_bytes s a
--     cuckooFingerprint (Salt s) a = sip_bytes s a
--     {-# INLINE cuckooHash #-}
--     {-# INLINE cuckooFingerprint #-}
-- :}
--
class CuckooFilterHash a where

    -- | This function must provide good entropy on the lower
    -- \(2^b - 1\) bits of the result, where \(b\) is the number of buckets.
    --
    cuckooHash :: Salt -> a -> Word64

    -- | This function must provide good entropy on the lower
    -- bits of the size of a fingerprint.
    --
    cuckooFingerprint :: Salt -> a -> Word64

    -- | Default implementation of 'cuckooHash' for types that are an instance
    -- of 'Storable'.
    --
    default cuckooHash :: Storable a => Salt -> a -> Word64
    cuckooHash (Salt s) a = sip s a
    {-# INLINE cuckooHash #-}

    -- | Default implementation of 'cuckooFingerprint' for types that are an
    -- instance of 'Storable'.
    --
    default cuckooFingerprint :: Storable a => Salt -> a -> Word64
    cuckooFingerprint (Salt s) a = fnv1a s a
    {-# INLINE cuckooFingerprint #-}

-- -------------------------------------------------------------------------- --
-- Cuckoo Filter

-- | Cuckoo Filter with
--
-- * State token @s :: Type@,
-- * bucket size @b :: Nat@,
-- * fingerprint size @f :: Nat@, and
-- * content type @a :: Type@.
--
-- The following constraints apply
--
-- * \(0 < f \leq 32\)
-- * \(0 < b\)
--
-- The implementation is not thread safe. For concurrent use the filter must be
-- wrapped in a read-write lock.
--
data CuckooFilter s (b :: Nat) (f :: Nat) (a :: Type)
    = CuckooFilter
        { _cfBucketCount :: {-# UNPACK #-} !Int
        , _cfSalt :: {-# UNPACK #-} !Salt
        , _cfRng :: {-# UNPACK #-} !(Gen s)
        , _cfData :: {-# UNPACK #-} !(MutableByteArray s)
        }

-- | Cuckoo filter that can be used in the `IO` monad.
--
type CuckooFilterIO b f a = CuckooFilter RealWorld b f a

-- | Create a new Cuckoo filter that has at least the given capacity.
--
-- Enabling the @TypeApplications@ language extension provides a convenient way
-- for passing the type parameters to the function.
--
-- >>> :set -XTypeApplications -XDataKinds -XTypeFamilies
-- >>> newCuckooFilter @4 @10 @Int 0 1000
--
-- The type parameters are
--
-- * bucket size @b :: Nat@,
-- * fingerprint size @f :: Nat@,
-- * content type @a :: Type@, and
-- * Monad @m :: Type -> Type@,
--
-- The following constraints apply:
--
-- * \(0 < f \leq 32\),
-- * \(0 < b\), and
-- * \(64 \leq n\), where \(n\) is the requested size.
--
-- The false positive rate depends mostly on the value of @f@. It is bounded
-- from above by \(\frac{2b}{2^f}\). In most cases @4@ is a good choice for @b@.
--
-- Actual performance depends on the choice of good hash functions that provide
-- high uniformity on the lower bits.
--
-- The actual capacity may be much larger than what is requested, because the
-- actual bucket count is a power of two.
--
-- >>> f <- newCuckooFilter @4 @10 @Int 0 600
-- >>> capacityInItems f
-- 1024
-- >>> sizeInAllocatedBytes f
-- 1284
--
newCuckooFilter
    :: forall b f a m
    . KnownNat b
    => KnownNat f
    => PrimMonad m
    => Salt
        -- ^ Salt for the hash functions.
    -> Natural
        -- ^ Size. Must be at least 64.
    -> m (CuckooFilter (PrimState m) b f a)
newCuckooFilter salt n = do
    check
    arr <- newByteArray bytes
    fillByteArray arr 0 bytes 0
    CuckooFilter buckets salt
        <$> initialize (int salt)
        <*> pure arr
  where
    minBuckets = intFit n (w @b) -- minimum number of buckets match requested capacity
    buckets = intNextPowerOfTwo (int minBuckets) -- actual number of buckets
    items = w @b * buckets -- actual capacity (in number of items)
    bytes = intFit @_ @Int (w @f * items) 8 + 4 -- total number of allocated bytes

        -- we add 4 extra bytes to avoid having to deal with corner cases when
        -- reading and writing fingerprints that are not aligned to Word32 at
        -- the end of the filter.

    check
        | not (0 < w @f) = error "Fingerprint size must be positive"
        | not (w @f <= 32) = error "Fingerprint size must not be larger than 32"
        | not (0 < w @b) = error "Bucket size (items per bucket) must be positive"
        | not (0 < n) = error "The size (number of items) of the filter must be positive"
        | not (64 <= n) = error "Seriously? Are you kidding me? If you need to represent such a tiny set, you'll have to pick another data structure for that"
        | otherwise = return ()

-- -------------------------------------------------------------------------- --
-- Insert

-- TODO: reduce number of reads, in checkBucket and don't re-read value
-- during setFingerprint. Similarly, don't recompute hashes.
--
-- TODO: is the RNG really important here (for security or performance) or is
-- the hash function sufficient? (For preventing attacks, is the salt
-- sufficient). Could we also compute the relocation slot from the hash?

-- | Insert an item into the filter and return whether the operation was
-- successful. If insertion fails, the filter is unchanged. An item can be
-- inserted more than once. The return value indicates whether insertion was
-- successful. The operation can fail when the filter doesn't have enough space
-- for the item.
--
-- This function is not thread safe. No concurrent writes or reads should occur
-- while this function is executed. If this is needed a lock must be used.
--
-- This function is not exception safe. The filter must not be used any more
-- after an asynchronous exception has been thrown during the computation of this
-- function. If this function is used in the presence of asynchronous exceptions
-- it should be apprioriately masked.
--
-- >>> f <- newCuckooFilter @4 @10 @Int 0 1000
-- >>> insert f 0
-- True
-- >>> insert f 0
-- True
-- >>> itemCount f
-- 2
--
insert
    :: forall b f a m
    . KnownNat f
    => KnownNat b
    => PrimMonad m
    => CuckooFilterHash a
    => CuckooFilter (PrimState m) b f a
    -> a
    -> m Bool
insert f a = do
  (b1, b2, fp) <- getBucketsRandom f a
  checkBucket f b1 null >>= \case
      Just i -> True <$ setFingerprint f b1 i fp
      Nothing -> kick 500 b2 fp
  where

    -- TODO make this exception safe? Do we need that?
    --
    kick 0 _ _ = return False
    kick c b k = checkBucket f b null >>= \case
        Just i -> True <$ setFingerprint f b i k
        Nothing -> do
            i <- randomSlot
            k' <- swapFingerprint b i k
            kick (pred @Int c) (otherBucket f b k') k' >>= \case
                False -> False <$ setFingerprint f b i k'
                x -> return x
    {-# INLINE kick #-}

    randomSlot = Slot <$> uniformR (0, w @b - 1) (_cfRng f)
    {-# INLINE randomSlot #-}

    swapFingerprint b i k = do
        k' <- readFingerprint f b i
        setFingerprint f b i k
        return k'
    {-# INLINE swapFingerprint #-}

-- -------------------------------------------------------------------------- --
-- Member Test

-- | Test whether an item is in the set that is represented by the Cuckoo
-- filter.
--
-- A negative result means that the item is definitively not in the set. A
-- positive result means that the item is most likely in the set. The rate of
-- false positives is bounded from above by \(\frac{2b}{2^f}\) where @b@ is the number
-- of items per bucket and @f@ is the size of a fingerprint in bits.
--
-- >>> f <- newCuckooFilter @4 @10 @Int 0 1000
-- >>> insert f 0
-- True
-- >>> member f 0
-- True
-- >>> member f 1
-- False
--
member
    :: CuckooFilterHash a
    => PrimMonad m
    => KnownNat f
    => KnownNat b
    => CuckooFilter (PrimState m) b f a
    -> a
    -> m Bool
member f a = checkBucket f b1 fp >>= \case
    Just _ -> return True
    Nothing -> checkBucket f b2 fp >>= \case
        Just _ -> return True
        Nothing -> return False
  where
    salt = _cfSalt f
    b1 = bucket1 f a
    b2 = otherBucket f b1 fp
    fp = mkFingerprint salt a
{-# INLINE member #-}

-- -------------------------------------------------------------------------- --
-- Delete

-- | Delete an items from the filter. An item that was inserted more than once
-- can also be deleted more than once.
--
-- /IMPORTANT/ An item must only be deleted if it was successfully added to the
-- filter before (and hasn't been deleted since then).
--
-- Deleting an item that isn't in the filter can result in the filter returning
-- false negative results.
--
-- This function is not thread safe. No concurrent writes must occur while this
-- function is executed. If this is needed a lock must be used. Concurrent reads
-- are fine.
--
-- >>> f <- newCuckooFilter @4 @10 @Int 0 1000
-- >>> insert f 0
-- True
-- >>> insert f 0
-- True
-- >>> itemCount f
-- 2
-- >>> delete f 0
-- True
-- >>> itemCount f
-- 1
-- >>> member f 0
-- True
-- >>> delete f 0
-- True
-- >>> itemCount f
-- 0
-- >>> member f 0
-- False
--
delete
    :: CuckooFilterHash a
    => PrimMonad m
    => KnownNat f
    => KnownNat b
    => CuckooFilter (PrimState m) b f a
    -> a
    -> m Bool
delete f a = do
    (b1, b2, fp) <- getBucketsRandom f a
    checkBucket f b1 fp >>= \case
        Just i -> True <$ setFingerprint f b1 i null
        Nothing -> checkBucket f b2 fp >>= \case
            Just i -> True <$ setFingerprint f b2 i null
            Nothing -> return False

-- -------------------------------------------------------------------------- --
-- Internal
-- -------------------------------------------------------------------------- --

newtype Fingerprint (f :: Nat) = Fingerprint Word64
    deriving (Show, Eq, Ord)

newtype Bucket = Bucket Int
    deriving (Show, Eq, Ord, Enum, Integral, Real, Num)

newtype Slot = Slot Int
    deriving (Show, Eq, Ord, Enum, Integral, Real, Num)

-- TODO: Should we expose this function, too, in 'CuckooFilterHash'? By hiding
-- it here there is some chance that a user accidentally picks a function that
-- isn't independent from this one.
--
hashFingerprint :: Salt -> Fingerprint f -> Int
hashFingerprint (Salt s) (Fingerprint a) = int $! sip2 s a
{-# INLINE hashFingerprint #-}

mkFingerprint
    :: forall f a
    . KnownNat f
    => CuckooFilterHash a
    => Salt
    -> a
    -> Fingerprint f
mkFingerprint salt a = Fingerprint $! max 1 $!
    cuckooFingerprint salt a .&. (2 ^ w @f - 1)
{-# INLINE mkFingerprint #-}

bucket1 :: CuckooFilterHash a => CuckooFilter s b f a -> a -> Bucket
bucket1 f a = Bucket $! int $! cuckooHash (_cfSalt f) a .&. (int $ _cfBucketCount f - 1)
{-# INLINE bucket1 #-}

otherBucket :: CuckooFilter s b f a -> Bucket -> Fingerprint f -> Bucket
otherBucket f (Bucket b) fp = Bucket $!
    (b `xor` hashFingerprint (_cfSalt f) fp) .&. (int $ _cfBucketCount f - 1)
{-# INLINE otherBucket #-}

ix :: Bucket -> Int
ix (Bucket i) = i
{-# INLINE ix #-}

-- | Fingerprints must be of at most 32 bits. Yet we represent them as Word64,
-- this is compromise to work with an reasonably efficient 32bit alignment,
-- while guaranteeing that the for each fingerprint there is an alignment such
-- that the fingerprint fits into the returned value with respect to the
-- alignment.
--
readFingerprint
    :: forall b f a m
    . KnownNat f
    => KnownNat b
    => PrimMonad m
    => CuckooFilter (PrimState m) b f a
        -- ^ Filter
    -> Bucket
        -- ^ bucket number
    -> Slot
        -- ^ slot number
    -> m (Fingerprint f)
readFingerprint f n (Slot i) = do
    v <- get dat pos
    return $ Fingerprint $! (v `shiftR` off) .&. mask
  where
    dat = _cfData f
    mask = (2 ^ (w @f)) - 1
    (pos, off) = (w @b * w @f * ix n + w @f * i) `divMod` 32
{-# INLINE readFingerprint #-}

setFingerprint
    :: forall b f a m
    . KnownNat f
    => KnownNat b
    => PrimMonad m
    => CuckooFilter (PrimState m) b f a
        -- ^ Filter
    -> Bucket
        -- ^ bucket number
    -> Slot
        -- ^ slot number
    -> Fingerprint f
    -> m ()
setFingerprint f n (Slot i) (Fingerprint fp) = do
    v <- get dat pos
    set dat pos $ (fp `shiftL` off) .|. (v .&. complement mask)
  where
    dat = _cfData f
    mask = (2 ^ (w @f) - 1) `shiftL` off
    (pos, off) = (w @b * w @f * ix n + w @f * i) `divMod` 32
{-# INLINE setFingerprint #-}

-- -------------------------------------------------------------------------- --
-- Utils

null :: Fingerprint f
null = Fingerprint 0
{-# INLINE null #-}

getBucketsRandom
    :: CuckooFilterHash a
    => PrimMonad m
    => KnownNat f
    => KnownNat b
    => CuckooFilter (PrimState m) b f a
    -> a
    -> m (Bucket, Bucket, Fingerprint f)
getBucketsRandom f a = bool (b1, b2, fp) (b2, b1, fp) <$> uniform rng
  where
    salt = _cfSalt f
    rng = _cfRng f
    b1 = bucket1 f a
    b2 = otherBucket f b1 fp
    fp = mkFingerprint salt a
{-# INLINE getBucketsRandom #-}

checkBucket
    :: forall b f a m
    . PrimMonad m
    => KnownNat f
    => KnownNat b
    => CuckooFilter (PrimState m) b f a
    -> Bucket
    -> Fingerprint f
    -> m (Maybe Slot)
checkBucket f b fp = go (w @b - 1)
  where
    go :: Int -> m (Maybe Slot)
    go (-1) = return Nothing
    go i = readFingerprint f b (Slot i) >>= \x -> if x == fp
        then return $ Just (Slot i)
        else go (pred i)
    -- TODO: can we teach GHC to unroll this loop statically without
    -- defining a class. Maybe GHC does already unroll it?

-- | Total number of items that the filter can hold. In practice a load factor
-- of ~95% of this number can be reached.
--
capacityInItems :: forall b f a s . KnownNat b => CuckooFilter s b f a -> Int
capacityInItems f = _cfBucketCount f * w @b
{-# INLINE capacityInItems #-}

-- | The total number of bytes allocated for storing items in the filter.
--
sizeInAllocatedBytes :: forall b f a s . KnownNat f => KnownNat b => CuckooFilter s b f a -> Int
sizeInAllocatedBytes f = intFit @_ @Int (capacityInItems f * w @f) 8 + 4
{-# INLINE sizeInAllocatedBytes #-}

-- | Number of items currently stored in the filter.
--
-- /Note/ that computing this number is expensive \(\mathcal{O}(n)\).
--
itemCount
    :: forall b f a m
    . PrimMonad m
    => KnownNat b
    => KnownNat f
    => CuckooFilter (PrimState m) b f a
    -> m Int
itemCount f = foldM (\x i -> foldM (\x' j -> go x' (Bucket i) (Slot j)) x [0.. w @b - 1]) 0 [0.._cfBucketCount f - 1]
  where
    go x b s = readFingerprint f b s >>= \fp -> case fp == Fingerprint 0 of
        True -> return x
        False -> return (succ x)

-- | The current load factor of the filter in percent.
--
-- @
-- loadFactor f = 100 * itemCount f / capacityInItems
-- @
--
-- /Note/ that computing this number is expensive \(\mathcal{O}(n)\).
--
loadFactor
    :: forall b f a m
    . PrimMonad m
    => KnownNat b
    => KnownNat f
    => CuckooFilter (PrimState m) b f a
    -> m Double
loadFactor f = do
    i <- itemCount f
    return $! 100 * int i / int (capacityInItems f)

-- -------------------------------------------------------------------------- --
-- Debugging Tools

-- | Show the contents of the filter as a list of buckets with values show in
-- hex. Used for debugging purposes.
--
showFilter
    :: forall b f a
    . KnownNat f
    => KnownNat b
    => CuckooFilter RealWorld b f a
    -> IO [[String]]
showFilter f = forM [0.. _cfBucketCount f - 1] $ \(i :: Int) -> do
        forM [0 .. w @b - 1] $ \(j :: Int) -> do
            Fingerprint fp <- readFingerprint f (Bucket i) (Slot j)
            return $ printf ("%0" <> show (intFit @_ @Int (w @f) 8) <> "x") fp

-- | Returns the different hashes that are associated with an item in the
-- filter. Used for debugging purposes.
--
itemHashes
    :: forall b f a s
    . KnownNat f
    => CuckooFilterHash a
    => CuckooFilter s b f a
    -> a
    -> (Int, Int, Word64)
itemHashes f a = (b1_, b2_, fp_)
  where
    fp@(Fingerprint fp_) = mkFingerprint @f (_cfSalt f) a
    b1@(Bucket b1_) = bucket1 f a
    Bucket b2_ = otherBucket f b1 fp