{-# LANGUAGE FlexibleInstances, BangPatterns #-}
{-|
Module      : Data.Digest.Murmur64
Copyright   : (c) Thomas Schilling 2010
License     : BSD-style

Maintainer  : nominolo@gmail.com
Stability   : experimental
Portability : portable

Type class and primitives for constructing 64 bit hashes using the
MurmurHash2 algorithm.  See <http://murmurhash.googlepages.com> for
details on MurmurHash2.
-}
module Data.Digest.Murmur64
  ( Hash64, asWord64,
    Hashable64(..),
    hash64AddWord64, hash64AddInt, hash64, hash64WithSeed, combine,
  )
where

import Data.Word ( Word64 )
import Numeric ( showHex )
import Data.Bits ( Bits(xor, shiftR) )
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
import Data.Char ( ord )
import Data.Foldable ( Foldable(foldl') )
import Data.List ( unfoldr )

-- | A 64 bit hash.
newtype Hash64 = Hash64 Word64
  deriving (Hash64 -> Hash64 -> Bool
(Hash64 -> Hash64 -> Bool)
-> (Hash64 -> Hash64 -> Bool) -> Eq Hash64
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Hash64 -> Hash64 -> Bool
== :: Hash64 -> Hash64 -> Bool
$c/= :: Hash64 -> Hash64 -> Bool
/= :: Hash64 -> Hash64 -> Bool
Eq, Eq Hash64
Eq Hash64 =>
(Hash64 -> Hash64 -> Ordering)
-> (Hash64 -> Hash64 -> Bool)
-> (Hash64 -> Hash64 -> Bool)
-> (Hash64 -> Hash64 -> Bool)
-> (Hash64 -> Hash64 -> Bool)
-> (Hash64 -> Hash64 -> Hash64)
-> (Hash64 -> Hash64 -> Hash64)
-> Ord Hash64
Hash64 -> Hash64 -> Bool
Hash64 -> Hash64 -> Ordering
Hash64 -> Hash64 -> Hash64
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: Hash64 -> Hash64 -> Ordering
compare :: Hash64 -> Hash64 -> Ordering
$c< :: Hash64 -> Hash64 -> Bool
< :: Hash64 -> Hash64 -> Bool
$c<= :: Hash64 -> Hash64 -> Bool
<= :: Hash64 -> Hash64 -> Bool
$c> :: Hash64 -> Hash64 -> Bool
> :: Hash64 -> Hash64 -> Bool
$c>= :: Hash64 -> Hash64 -> Bool
>= :: Hash64 -> Hash64 -> Bool
$cmax :: Hash64 -> Hash64 -> Hash64
max :: Hash64 -> Hash64 -> Hash64
$cmin :: Hash64 -> Hash64 -> Hash64
min :: Hash64 -> Hash64 -> Hash64
Ord, Hash64
Hash64 -> Hash64 -> Bounded Hash64
forall a. a -> a -> Bounded a
$cminBound :: Hash64
minBound :: Hash64
$cmaxBound :: Hash64
maxBound :: Hash64
Bounded)

instance Show Hash64 where
  showsPrec :: Int -> Hash64 -> ShowS
showsPrec Int
_ (Hash64 Word64
w) = String -> ShowS
showString String
"Hash64 0x" ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word64 -> ShowS
forall a. Integral a => a -> ShowS
showHex Word64
w

-- | Extract 64 bit word from hash.
asWord64 :: Hash64 -> Word64
asWord64 :: Hash64 -> Word64
asWord64 (Hash64 Word64
w) = Word64
w

class Hashable64 a where
  hash64Add :: a -> Hash64 -> Hash64

murmur_m :: Word64
murmur_m :: Word64
murmur_m = Word64
0xc6a4a7935bd1e995

murmur_r :: Int
murmur_r :: Int
murmur_r = Int
47

-- | Add a 64 bit word to the hash.
hash64AddWord64 :: Word64 -> Hash64 -> Hash64
hash64AddWord64 :: Word64 -> Hash64 -> Hash64
hash64AddWord64 Word64
k (Hash64 Word64
h) =
  let k1 :: Word64
k1 = Word64
k Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
* Word64
murmur_m
      k2 :: Word64
k2 = Word64
k1 Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
`xor` (Word64
k1 Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftR` Int
murmur_r)
      k3 :: Word64
k3 = Word64
k2 Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
* Word64
murmur_m
      h1 :: Word64
h1 = Word64
h Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
* Word64
murmur_m
      h2 :: Word64
h2 = Word64
h1 Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
`xor` Word64
k3
  in Word64 -> Hash64
Hash64 Word64
h2

hash64AddInt :: Int -> Hash64 -> Hash64
hash64AddInt :: Int -> Hash64 -> Hash64
hash64AddInt !Int
k0 = Word64 -> Hash64 -> Hash64
hash64AddWord64 (Int -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
k0)

hash64AddFoldable :: (Hashable64 a, Foldable c) => c a -> Hash64 -> Hash64
hash64AddFoldable :: forall a (c :: * -> *).
(Hashable64 a, Foldable c) =>
c a -> Hash64 -> Hash64
hash64AddFoldable c a
c !Hash64
h0 = (Hash64 -> a -> Hash64) -> Hash64 -> c a -> Hash64
forall b a. (b -> a -> b) -> b -> c a -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' Hash64 -> a -> Hash64
forall {a}. Hashable64 a => Hash64 -> a -> Hash64
f Hash64
h0 c a
c
  where f :: Hash64 -> a -> Hash64
f Hash64
h a
a = a -> Hash64 -> Hash64
forall a. Hashable64 a => a -> Hash64 -> Hash64
hash64Add a
a Hash64
h

-- | Create a hash using a custom seed.
--
-- The seed should be non-zero, but other than that can be an
-- arbitrary number.  Different seeds will give different hashes, and
-- thus (most likely) different hash collisions.
hash64WithSeed :: Hashable64 a => Word64 -> a -> Hash64
hash64WithSeed :: forall a. Hashable64 a => Word64 -> a -> Hash64
hash64WithSeed Word64
seed a
a = Hash64 -> Hash64
hash64End (a -> Hash64 -> Hash64
forall a. Hashable64 a => a -> Hash64 -> Hash64
hash64Add a
a (Word64 -> Hash64
Hash64 Word64
seed))

-- | Create a hash using the default seed.
hash64 :: Hashable64 a => a -> Hash64
hash64 :: forall a. Hashable64 a => a -> Hash64
hash64 = Word64 -> a -> Hash64
forall a. Hashable64 a => Word64 -> a -> Hash64
hash64WithSeed Word64
defaultSeed

-- | Combine two hash generators.  E.g.,
--
-- @
--   hashFoo (Foo a) = hash64AddInt 1 `combine` hash64Add a
-- @
combine :: (Hash64 -> Hash64) -> (Hash64 -> Hash64) -> (Hash64 -> Hash64)
combine :: (Hash64 -> Hash64) -> (Hash64 -> Hash64) -> Hash64 -> Hash64
combine Hash64 -> Hash64
x Hash64 -> Hash64
y = Hash64 -> Hash64
y (Hash64 -> Hash64) -> (Hash64 -> Hash64) -> Hash64 -> Hash64
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Hash64 -> Hash64
x

hash64End :: Hash64 -> Hash64
hash64End :: Hash64 -> Hash64
hash64End (Hash64 Word64
h) =
  let h1 :: Word64
h1 = Word64
h Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
`xor` (Word64
h Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftR` Int
murmur_r)
      h2 :: Word64
h2 = Word64
h1 Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
* Word64
murmur_m
      h3 :: Word64
h3 = Word64
h2 Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
`xor` (Word64
h2 Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftR` Int
murmur_r)
  in Word64 -> Hash64
Hash64 Word64
h3

defaultSeed :: Word64
defaultSeed :: Word64
defaultSeed = Word64
0xdeadbeef -- not 0, otherwise hash64 [0] == hash64 []

{-

On a CPU with two multipliers and two ALUs, Murmur2 can process one
word in every two cycles + set up (2 cycles) and finish (3 cycles).

Here's the data flow graph:

@
     h    k1   k2  k3  ...
     |    |     |   |
     |    * m   |   |
     |    |\    |   |
     |    | >> r    |
     |    | /   |   |
     |   xor    *   |
     |    |     |\  |
     * m  * m   | >> r
      \  /      |/  |
       xor     xor  * m
         \     /    |\
          * m * m   | >> r
           \ /      |/
           xor     xor
             \     /
              * m * m
               \ /
               xor
                 \
                  ...
@
-}

-- -------------------------------------------------------------------
-- Instances

instance Hashable64 Char where
  hash64Add :: Char -> Hash64 -> Hash64
hash64Add Char
c = Int -> Hash64 -> Hash64
hash64AddInt (Char -> Int
ord Char
c)

instance Hashable64 Int where
  hash64Add :: Int -> Hash64 -> Hash64
hash64Add = Int -> Hash64 -> Hash64
hash64AddInt

instance Hashable64 Word64 where
  hash64Add :: Word64 -> Hash64 -> Hash64
hash64Add = Word64 -> Hash64 -> Hash64
hash64AddWord64

instance Hashable64 a => Hashable64 [a] where
  hash64Add :: [a] -> Hash64 -> Hash64
hash64Add = [a] -> Hash64 -> Hash64
forall a (c :: * -> *).
(Hashable64 a, Foldable c) =>
c a -> Hash64 -> Hash64
hash64AddFoldable

instance Hashable64 Integer where
  -- Within Int range, make sure they hash to exactly the same value
  hash64Add :: Integer -> Hash64 -> Hash64
hash64Add Integer
i0
   | Integer
i0 Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
>= Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
forall a. Bounded a => a
minBound :: Int) Bool -> Bool -> Bool
&&
      Integer
i0 Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
<= Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
forall a. Bounded a => a
maxBound :: Int)
   = Int -> Hash64 -> Hash64
hash64AddInt (Integer -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
i0)
   | Bool
otherwise
   -- Prefix by sign, then hash the raw data words, starting with LSB
   = Bool -> Hash64 -> Hash64
forall a. Hashable64 a => a -> Hash64 -> Hash64
hash64Add (Integer -> Integer
forall a. Num a => a -> a
signum Integer
i0 Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
> Integer
0) (Hash64 -> Hash64) -> (Hash64 -> Hash64) -> Hash64 -> Hash64
`combine`
     [Word64] -> Hash64 -> Hash64
forall a (c :: * -> *).
(Hashable64 a, Foldable c) =>
c a -> Hash64 -> Hash64
hash64AddFoldable ((Integer -> Maybe (Word64, Integer)) -> Integer -> [Word64]
forall b a. (b -> Maybe (a, b)) -> b -> [a]
unfoldr Integer -> Maybe (Word64, Integer)
forall {a}. Num a => Integer -> Maybe (a, Integer)
f (Integer -> Integer
forall a. Num a => a -> a
abs Integer
i0) :: [Word64])
    where
      f :: Integer -> Maybe (a, Integer)
f Integer
i | Integer
i Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
0 = Maybe (a, Integer)
forall a. Maybe a
Nothing
      f Integer
i =
        let (Integer
i', Integer
a) = Integer -> Integer -> (Integer, Integer)
forall a. Integral a => a -> a -> (a, a)
quotRem Integer
i Integer
maxWord in
        (a, Integer) -> Maybe (a, Integer)
forall a. a -> Maybe a
Just (Integer -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
a, Integer
i')
      maxWord :: Integer
maxWord = Word64 -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word64
forall a. Bounded a => a
maxBound :: Word64) Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
+ Integer
1 :: Integer

instance Hashable64 Bool where
  hash64Add :: Bool -> Hash64 -> Hash64
hash64Add Bool
False = Word64 -> Hash64 -> Hash64
hash64AddWord64 Word64
1
  hash64Add Bool
True = Word64 -> Hash64 -> Hash64
hash64AddWord64 Word64
2

instance Hashable64 a => Hashable64 (Maybe a) where
  hash64Add :: Maybe a -> Hash64 -> Hash64
hash64Add Maybe a
Nothing = Word64 -> Hash64 -> Hash64
hash64AddWord64 Word64
3
  hash64Add (Just a
a) = Word64 -> Hash64 -> Hash64
hash64AddWord64 Word64
4 (Hash64 -> Hash64) -> (Hash64 -> Hash64) -> Hash64 -> Hash64
`combine` a -> Hash64 -> Hash64
forall a. Hashable64 a => a -> Hash64 -> Hash64
hash64Add a
a

instance (Hashable64 a, Hashable64 b) => Hashable64 (Either a b) where
  hash64Add :: Either a b -> Hash64 -> Hash64
hash64Add (Left a
a) = Word64 -> Hash64 -> Hash64
hash64AddWord64 Word64
5 (Hash64 -> Hash64) -> (Hash64 -> Hash64) -> Hash64 -> Hash64
`combine` a -> Hash64 -> Hash64
forall a. Hashable64 a => a -> Hash64 -> Hash64
hash64Add a
a
  hash64Add (Right b
b) = Word64 -> Hash64 -> Hash64
hash64AddWord64 Word64
6 (Hash64 -> Hash64) -> (Hash64 -> Hash64) -> Hash64 -> Hash64
`combine` b -> Hash64 -> Hash64
forall a. Hashable64 a => a -> Hash64 -> Hash64
hash64Add b
b

instance Hashable64 () where
  hash64Add :: () -> Hash64 -> Hash64
hash64Add () = Word64 -> Hash64 -> Hash64
hash64AddWord64 Word64
7

instance (Hashable64 a, Hashable64 b) => Hashable64 (a, b) where
  hash64Add :: (a, b) -> Hash64 -> Hash64
hash64Add (a
a, b
b) = a -> Hash64 -> Hash64
forall a. Hashable64 a => a -> Hash64 -> Hash64
hash64Add a
a (Hash64 -> Hash64) -> (Hash64 -> Hash64) -> Hash64 -> Hash64
`combine` b -> Hash64 -> Hash64
forall a. Hashable64 a => a -> Hash64 -> Hash64
hash64Add b
b

instance (Hashable64 a, Hashable64 b, Hashable64 c)
    => Hashable64 (a, b, c) where
  hash64Add :: (a, b, c) -> Hash64 -> Hash64
hash64Add (a
a, b
b, c
c) =
    a -> Hash64 -> Hash64
forall a. Hashable64 a => a -> Hash64 -> Hash64
hash64Add a
a (Hash64 -> Hash64) -> (Hash64 -> Hash64) -> Hash64 -> Hash64
`combine` b -> Hash64 -> Hash64
forall a. Hashable64 a => a -> Hash64 -> Hash64
hash64Add b
b (Hash64 -> Hash64) -> (Hash64 -> Hash64) -> Hash64 -> Hash64
`combine` c -> Hash64 -> Hash64
forall a. Hashable64 a => a -> Hash64 -> Hash64
hash64Add c
c

instance (Hashable64 a, Hashable64 b, Hashable64 c, Hashable64 d)
    => Hashable64 (a, b, c, d) where
  hash64Add :: (a, b, c, d) -> Hash64 -> Hash64
hash64Add (a
a, b
b, c
c, d
d) =
    a -> Hash64 -> Hash64
forall a. Hashable64 a => a -> Hash64 -> Hash64
hash64Add a
a (Hash64 -> Hash64) -> (Hash64 -> Hash64) -> Hash64 -> Hash64
`combine` b -> Hash64 -> Hash64
forall a. Hashable64 a => a -> Hash64 -> Hash64
hash64Add b
b (Hash64 -> Hash64) -> (Hash64 -> Hash64) -> Hash64 -> Hash64
`combine`
    c -> Hash64 -> Hash64
forall a. Hashable64 a => a -> Hash64 -> Hash64
hash64Add c
c (Hash64 -> Hash64) -> (Hash64 -> Hash64) -> Hash64 -> Hash64
`combine` d -> Hash64 -> Hash64
forall a. Hashable64 a => a -> Hash64 -> Hash64
hash64Add d
d

instance Hashable64 B.ByteString where
  hash64Add :: ByteString -> Hash64 -> Hash64
hash64Add ByteString
bs Hash64
h = (Hash64 -> Word8 -> Hash64) -> Hash64 -> ByteString -> Hash64
forall a. (a -> Word8 -> a) -> a -> ByteString -> a
B.foldl' Hash64 -> Word8 -> Hash64
forall {a}. Integral a => Hash64 -> a -> Hash64
go (Word64 -> Hash64 -> Hash64
hash64AddWord64 Word64
8 Hash64
h) ByteString
bs
    where go :: Hash64 -> a -> Hash64
go Hash64
acc a
b = Word64 -> Hash64 -> Hash64
hash64AddWord64 (a -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
b) Hash64
acc

instance Hashable64 L.ByteString where
  hash64Add :: ByteString -> Hash64 -> Hash64
hash64Add ByteString
bs Hash64
h = (Hash64 -> Word8 -> Hash64) -> Hash64 -> ByteString -> Hash64
forall a. (a -> Word8 -> a) -> a -> ByteString -> a
L.foldl' Hash64 -> Word8 -> Hash64
forall {a}. Integral a => Hash64 -> a -> Hash64
go (Word64 -> Hash64 -> Hash64
hash64AddWord64 Word64
9 Hash64
h) ByteString
bs
    where go :: Hash64 -> a -> Hash64
go Hash64
acc a
b = Word64 -> Hash64 -> Hash64
hash64AddWord64 (a -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
b) Hash64
acc