{-# LANGUAGE FlexibleInstances, BangPatterns #-}
{-|
Module      : Data.Digest.Murmur32
Copyright   : (c) Thomas Schilling 2010
License     : BSD-style

Maintainer  : nominolo@gmail.com
Stability   : experimental
Portability : portable

Type class and primitives for constructing 32 bit hashes using the
MurmurHash2 algorithm.  See <http://murmurhash.googlepages.com> for
details on MurmurHash2.
-}
module Data.Digest.Murmur32
  ( Hash32, asWord32,
    Hashable32(..),
    hash32AddWord32, hash32AddInt, hash32, hash32WithSeed
  )
where

import Data.Word ( Word32 )
import Numeric ( showHex )
import Data.Bits ( Bits(xor, shiftR), FiniteBits(finiteBitSize) )
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
import Data.Char ( ord )
import Data.Foldable ( Foldable(foldl') )
import Data.List ( unfoldr )

-- | A 32 bit hash.
newtype Hash32 = Hash32 Word32
  deriving (Hash32 -> Hash32 -> Bool
(Hash32 -> Hash32 -> Bool)
-> (Hash32 -> Hash32 -> Bool) -> Eq Hash32
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Hash32 -> Hash32 -> Bool
== :: Hash32 -> Hash32 -> Bool
$c/= :: Hash32 -> Hash32 -> Bool
/= :: Hash32 -> Hash32 -> Bool
Eq, Eq Hash32
Eq Hash32 =>
(Hash32 -> Hash32 -> Ordering)
-> (Hash32 -> Hash32 -> Bool)
-> (Hash32 -> Hash32 -> Bool)
-> (Hash32 -> Hash32 -> Bool)
-> (Hash32 -> Hash32 -> Bool)
-> (Hash32 -> Hash32 -> Hash32)
-> (Hash32 -> Hash32 -> Hash32)
-> Ord Hash32
Hash32 -> Hash32 -> Bool
Hash32 -> Hash32 -> Ordering
Hash32 -> Hash32 -> Hash32
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: Hash32 -> Hash32 -> Ordering
compare :: Hash32 -> Hash32 -> Ordering
$c< :: Hash32 -> Hash32 -> Bool
< :: Hash32 -> Hash32 -> Bool
$c<= :: Hash32 -> Hash32 -> Bool
<= :: Hash32 -> Hash32 -> Bool
$c> :: Hash32 -> Hash32 -> Bool
> :: Hash32 -> Hash32 -> Bool
$c>= :: Hash32 -> Hash32 -> Bool
>= :: Hash32 -> Hash32 -> Bool
$cmax :: Hash32 -> Hash32 -> Hash32
max :: Hash32 -> Hash32 -> Hash32
$cmin :: Hash32 -> Hash32 -> Hash32
min :: Hash32 -> Hash32 -> Hash32
Ord, Hash32
Hash32 -> Hash32 -> Bounded Hash32
forall a. a -> a -> Bounded a
$cminBound :: Hash32
minBound :: Hash32
$cmaxBound :: Hash32
maxBound :: Hash32
Bounded)

instance Show Hash32 where
  showsPrec :: Int -> Hash32 -> ShowS
showsPrec Int
_ (Hash32 Word32
w) = String -> ShowS
showString String
"Hash32 0x" ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word32 -> ShowS
forall a. Integral a => a -> ShowS
showHex Word32
w

-- | Extract 32 bit word from hash.
asWord32 :: Hash32 -> Word32
asWord32 :: Hash32 -> Word32
asWord32 (Hash32 Word32
w) = Word32
w

class Hashable32 a where
  hash32Add :: a -> Hash32 -> Hash32

murmur_m :: Word32
murmur_m :: Word32
murmur_m = Word32
0x5bd1e995

murmur_r :: Int
murmur_r :: Int
murmur_r = Int
24

hash32AddWord32 :: Word32 -> Hash32 -> Hash32
hash32AddWord32 :: Word32 -> Hash32 -> Hash32
hash32AddWord32 Word32
k (Hash32 Word32
h) =
  let k1 :: Word32
k1 = Word32
k Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
* Word32
murmur_m
      k2 :: Word32
k2 = Word32
k1 Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
`xor` (Word32
k1 Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftR` Int
murmur_r)
      k3 :: Word32
k3 = Word32
k2 Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
* Word32
murmur_m
      h1 :: Word32
h1 = Word32
h Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
* Word32
murmur_m
      h2 :: Word32
h2 = Word32
h1 Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
`xor` Word32
k3
  in Word32 -> Hash32
Hash32 Word32
h2

hash32AddInt :: Int -> Hash32 -> Hash32
hash32AddInt :: Int -> Hash32 -> Hash32
hash32AddInt !Int
k0
  | Int -> Int
forall b. FiniteBits b => b -> Int
finiteBitSize (Int
forall a. HasCallStack => a
undefined :: Int) Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
32     -- Int is 32 bits
    = Word32 -> Hash32 -> Hash32
hash32AddWord32 (Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
k0)
  | Bool
otherwise                            -- Int is 64 bits
    = Word32 -> Hash32 -> Hash32
hash32AddWord32 (Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
k0) (Hash32 -> Hash32) -> (Hash32 -> Hash32) -> Hash32 -> Hash32
`combine`
      Word32 -> Hash32 -> Hash32
hash32AddWord32 (Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
k0 Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
32))

hash32AddFoldable :: (Hashable32 a, Foldable c) => c a -> Hash32 -> Hash32
hash32AddFoldable :: forall a (c :: * -> *).
(Hashable32 a, Foldable c) =>
c a -> Hash32 -> Hash32
hash32AddFoldable c a
c !Hash32
h0 = (Hash32 -> a -> Hash32) -> Hash32 -> c a -> Hash32
forall b a. (b -> a -> b) -> b -> c a -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' Hash32 -> a -> Hash32
forall {a}. Hashable32 a => Hash32 -> a -> Hash32
f Hash32
h0 c a
c
  where f :: Hash32 -> a -> Hash32
f Hash32
h a
a = a -> Hash32 -> Hash32
forall a. Hashable32 a => a -> Hash32 -> Hash32
hash32Add a
a Hash32
h

-- | Create a hash using a custom seed.
--
-- The seed should be non-zero, but other than that can be an
-- arbitrary number.  Different seeds will give different hashes, and
-- thus (most likely) different hash collisions.
hash32WithSeed :: Hashable32 a => Word32 -> a -> Hash32
hash32WithSeed :: forall a. Hashable32 a => Word32 -> a -> Hash32
hash32WithSeed Word32
seed a
a = Hash32 -> Hash32
hash32End (a -> Hash32 -> Hash32
forall a. Hashable32 a => a -> Hash32 -> Hash32
hash32Add a
a (Word32 -> Hash32
Hash32 Word32
seed))

-- | Create a hash using the default seed.
hash32 :: Hashable32 a => a -> Hash32
hash32 :: forall a. Hashable32 a => a -> Hash32
hash32 = Word32 -> a -> Hash32
forall a. Hashable32 a => Word32 -> a -> Hash32
hash32WithSeed Word32
defaultSeed

-- | Combine two hash generators.  E.g.,
--
-- @
--   hashFoo (Foo a) = hash32AddInt 1 `combine` hash32Add a
-- @
combine :: (Hash32 -> Hash32) -> (Hash32 -> Hash32) -> (Hash32 -> Hash32)
combine :: (Hash32 -> Hash32) -> (Hash32 -> Hash32) -> Hash32 -> Hash32
combine Hash32 -> Hash32
x Hash32 -> Hash32
y = Hash32 -> Hash32
y (Hash32 -> Hash32) -> (Hash32 -> Hash32) -> Hash32 -> Hash32
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Hash32 -> Hash32
x

hash32End :: Hash32 -> Hash32
hash32End :: Hash32 -> Hash32
hash32End (Hash32 Word32
h) =
  let h1 :: Word32
h1 = Word32
h Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
`xor` (Word32
h Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftR` Int
13)
      h2 :: Word32
h2 = Word32
h1 Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
* Word32
murmur_m
      h3 :: Word32
h3 = Word32
h2 Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
`xor` (Word32
h2 Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftR` Int
15)
  in Word32 -> Hash32
Hash32 Word32
h3

defaultSeed :: Word32
defaultSeed :: Word32
defaultSeed = Word32
0xdeadbeef -- not 0, otherwise hash32 [0] == hash32 []

{-

On a CPU with two multipliers and two ALUs, Murmur2 can process one
word in every two cycles + set up (2 cycles) and finish (3 cycles).

Here's the data flow graph:

@
     h    k1   k2  k3  ...
     |    |     |   |
     |    * m   |   |
     |    |\    |   |
     |    | >> r    |
     |    | /   |   |
     |   xor    *   |
     |    |     |\  |
     * m  * m   | >> r
      \  /      |/  |
       xor     xor  * m
         \     /    |\
          * m * m   | >> r
           \ /      |/
           xor     xor
             \     /
              * m * m
               \ /
               xor
                 \
                  ...
@
-}

-- -------------------------------------------------------------------
-- Instances

instance Hashable32 Char where
  hash32Add :: Char -> Hash32 -> Hash32
hash32Add Char
c = Int -> Hash32 -> Hash32
hash32AddInt (Char -> Int
ord Char
c)

instance Hashable32 Int where
  hash32Add :: Int -> Hash32 -> Hash32
hash32Add = Int -> Hash32 -> Hash32
hash32AddInt

instance Hashable32 Word32 where
  hash32Add :: Word32 -> Hash32 -> Hash32
hash32Add = Word32 -> Hash32 -> Hash32
hash32AddWord32

instance Hashable32 a => Hashable32 [a] where
  hash32Add :: [a] -> Hash32 -> Hash32
hash32Add = [a] -> Hash32 -> Hash32
forall a (c :: * -> *).
(Hashable32 a, Foldable c) =>
c a -> Hash32 -> Hash32
hash32AddFoldable

instance Hashable32 Integer where
  -- Within Int range, make sure they hash to exactly the same value
  hash32Add :: Integer -> Hash32 -> Hash32
hash32Add Integer
i0
   | Integer
i0 Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
>= Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
forall a. Bounded a => a
minBound :: Int) Bool -> Bool -> Bool
&&
      Integer
i0 Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
<= Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
forall a. Bounded a => a
maxBound :: Int)
   = Int -> Hash32 -> Hash32
hash32AddInt (Integer -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
i0)
   | Bool
otherwise
   -- Prefix by sign, then hash the raw data words, starting with LSB
   = Bool -> Hash32 -> Hash32
forall a. Hashable32 a => a -> Hash32 -> Hash32
hash32Add (Integer -> Integer
forall a. Num a => a -> a
signum Integer
i0 Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
> Integer
0) (Hash32 -> Hash32) -> (Hash32 -> Hash32) -> Hash32 -> Hash32
`combine`
     [Word32] -> Hash32 -> Hash32
forall a (c :: * -> *).
(Hashable32 a, Foldable c) =>
c a -> Hash32 -> Hash32
hash32AddFoldable ((Integer -> Maybe (Word32, Integer)) -> Integer -> [Word32]
forall b a. (b -> Maybe (a, b)) -> b -> [a]
unfoldr Integer -> Maybe (Word32, Integer)
forall {a}. Num a => Integer -> Maybe (a, Integer)
f (Integer -> Integer
forall a. Num a => a -> a
abs Integer
i0) :: [Word32])
    where
      f :: Integer -> Maybe (a, Integer)
f Integer
i | Integer
i Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
0 = Maybe (a, Integer)
forall a. Maybe a
Nothing
      f Integer
i =
        let (Integer
i', Integer
a) = Integer -> Integer -> (Integer, Integer)
forall a. Integral a => a -> a -> (a, a)
quotRem Integer
i Integer
maxWord in
        (a, Integer) -> Maybe (a, Integer)
forall a. a -> Maybe a
Just (Integer -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
a, Integer
i')
      maxWord :: Integer
maxWord = Word32 -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32
forall a. Bounded a => a
maxBound :: Word32) Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
+ Integer
1 :: Integer

instance Hashable32 Bool where
  hash32Add :: Bool -> Hash32 -> Hash32
hash32Add Bool
False = Word32 -> Hash32 -> Hash32
hash32AddWord32 Word32
1
  hash32Add Bool
True = Word32 -> Hash32 -> Hash32
hash32AddWord32 Word32
2

instance Hashable32 a => Hashable32 (Maybe a) where
  hash32Add :: Maybe a -> Hash32 -> Hash32
hash32Add Maybe a
Nothing = Word32 -> Hash32 -> Hash32
hash32AddWord32 Word32
3
  hash32Add (Just a
a) = Word32 -> Hash32 -> Hash32
hash32AddWord32 Word32
4 (Hash32 -> Hash32) -> (Hash32 -> Hash32) -> Hash32 -> Hash32
`combine` a -> Hash32 -> Hash32
forall a. Hashable32 a => a -> Hash32 -> Hash32
hash32Add a
a

instance (Hashable32 a, Hashable32 b) => Hashable32 (Either a b) where
  hash32Add :: Either a b -> Hash32 -> Hash32
hash32Add (Left a
a) = Word32 -> Hash32 -> Hash32
hash32AddWord32 Word32
5 (Hash32 -> Hash32) -> (Hash32 -> Hash32) -> Hash32 -> Hash32
`combine` a -> Hash32 -> Hash32
forall a. Hashable32 a => a -> Hash32 -> Hash32
hash32Add a
a
  hash32Add (Right b
b) = Word32 -> Hash32 -> Hash32
hash32AddWord32 Word32
6 (Hash32 -> Hash32) -> (Hash32 -> Hash32) -> Hash32 -> Hash32
`combine` b -> Hash32 -> Hash32
forall a. Hashable32 a => a -> Hash32 -> Hash32
hash32Add b
b

instance Hashable32 () where
  hash32Add :: () -> Hash32 -> Hash32
hash32Add () = Word32 -> Hash32 -> Hash32
hash32AddWord32 Word32
7

instance (Hashable32 a, Hashable32 b) => Hashable32 (a, b) where
  hash32Add :: (a, b) -> Hash32 -> Hash32
hash32Add (a
a, b
b) = a -> Hash32 -> Hash32
forall a. Hashable32 a => a -> Hash32 -> Hash32
hash32Add a
a (Hash32 -> Hash32) -> (Hash32 -> Hash32) -> Hash32 -> Hash32
`combine` b -> Hash32 -> Hash32
forall a. Hashable32 a => a -> Hash32 -> Hash32
hash32Add b
b

instance (Hashable32 a, Hashable32 b, Hashable32 c)
    => Hashable32 (a, b, c) where
  hash32Add :: (a, b, c) -> Hash32 -> Hash32
hash32Add (a
a, b
b, c
c) =
    a -> Hash32 -> Hash32
forall a. Hashable32 a => a -> Hash32 -> Hash32
hash32Add a
a (Hash32 -> Hash32) -> (Hash32 -> Hash32) -> Hash32 -> Hash32
`combine` b -> Hash32 -> Hash32
forall a. Hashable32 a => a -> Hash32 -> Hash32
hash32Add b
b (Hash32 -> Hash32) -> (Hash32 -> Hash32) -> Hash32 -> Hash32
`combine` c -> Hash32 -> Hash32
forall a. Hashable32 a => a -> Hash32 -> Hash32
hash32Add c
c

instance (Hashable32 a, Hashable32 b, Hashable32 c, Hashable32 d)
    => Hashable32 (a, b, c, d) where
  hash32Add :: (a, b, c, d) -> Hash32 -> Hash32
hash32Add (a
a, b
b, c
c, d
d) =
    a -> Hash32 -> Hash32
forall a. Hashable32 a => a -> Hash32 -> Hash32
hash32Add a
a (Hash32 -> Hash32) -> (Hash32 -> Hash32) -> Hash32 -> Hash32
`combine` b -> Hash32 -> Hash32
forall a. Hashable32 a => a -> Hash32 -> Hash32
hash32Add b
b (Hash32 -> Hash32) -> (Hash32 -> Hash32) -> Hash32 -> Hash32
`combine`
    c -> Hash32 -> Hash32
forall a. Hashable32 a => a -> Hash32 -> Hash32
hash32Add c
c (Hash32 -> Hash32) -> (Hash32 -> Hash32) -> Hash32 -> Hash32
`combine` d -> Hash32 -> Hash32
forall a. Hashable32 a => a -> Hash32 -> Hash32
hash32Add d
d

instance Hashable32 B.ByteString where
  hash32Add :: ByteString -> Hash32 -> Hash32
hash32Add ByteString
bs Hash32
h = (Hash32 -> Word8 -> Hash32) -> Hash32 -> ByteString -> Hash32
forall a. (a -> Word8 -> a) -> a -> ByteString -> a
B.foldl' Hash32 -> Word8 -> Hash32
forall {a}. Integral a => Hash32 -> a -> Hash32
go (Word32 -> Hash32 -> Hash32
hash32AddWord32 Word32
8 Hash32
h) ByteString
bs
    where go :: Hash32 -> a -> Hash32
go Hash32
acc a
b = Word32 -> Hash32 -> Hash32
hash32AddWord32 (a -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
b) Hash32
acc

instance Hashable32 L.ByteString where
  hash32Add :: ByteString -> Hash32 -> Hash32
hash32Add ByteString
bs Hash32
h = (Hash32 -> Word8 -> Hash32) -> Hash32 -> ByteString -> Hash32
forall a. (a -> Word8 -> a) -> a -> ByteString -> a
L.foldl' Hash32 -> Word8 -> Hash32
forall {a}. Integral a => Hash32 -> a -> Hash32
go (Word32 -> Hash32 -> Hash32
hash32AddWord32 Word32
9 Hash32
h) ByteString
bs
    where go :: Hash32 -> a -> Hash32
go Hash32
acc a
b = Word32 -> Hash32 -> Hash32
hash32AddWord32 (a -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
b) Hash32
acc