{-# LANGUAGE Trustworthy, DeriveDataTypeable, DeriveGeneric #-} -- | An approximate vector clock implementation in terms of -- "Data.VectorClock.Simple". module Data.VectorClock.Approximate ( -- * Usage example -- $example -- * Vector clock type VectorClock, -- * Construction empty, singleton, fromList, toList, -- * Query null, size, member, lookup, -- * Insertion insert, inc, incWithDefault, -- * Deletion delete, -- * Merges combine, max, diff, -- * Relations Relation(..), relation, causes, -- * Debugging valid ) where import Prelude hiding ( null, lookup, max ) import qualified Prelude import Control.Applicative ( (<$>) ) import Data.Binary ( Binary(..) ) import Data.Data ( Data, Typeable ) import Data.Foldable ( Foldable(foldMap), foldl' ) import Data.Hashable ( Hashable, hash ) import Data.Traversable ( Traversable(..) ) import GHC.Generics ( Generic ) import Data.VectorClock.Simple ( Relation(..) ) import qualified Data.VectorClock.Simple as VC -- $example -- -- See "Data.VectorClock.Simple" for a more detailed example of using -- vector clock. -- -- An approximate vector clock, is like a normal one, but maps -- multiple keys to the same entry. Concretely, this is done by first -- hashing the keys, then using them modulo the vector clock's size. -- So, an approximate vector clock of size 1 would map all the keys to -- the same entry; an approximate vector clock of size 2 would map -- roughly half its keys to the first entry, and half to the second -- entry. -- -- To create an approximate vector clock, start from 'empty' and -- 'insert' elements into it. As a shortcut, 'fromList' just inserts -- all the elements in a list, in order. You must also specify the -- vector clock's maximum size when creating it. Experimental results -- suggest that small maximum sizes (e.g. 3 or 4) will yield good -- resulsts in practice. Higher maximum sizes will have no adverse -- effects, and will effectively turn approximate vector clocks into -- normal ones. -- -- > let vc = empty 4 in -- > let vc' = insert 'a' 1 vc in -- > let vc'' = insert 'b' 2 vc in -- > vc'' == fromList 4 [('a', 1), ('b', 2)] -- | An approximate vector clock is a normal vector clock, but several -- keys are mapped to the same value. This can lead to /false/ -- /positive/ 'relation's. In other words, the fact that one vector -- clock causes another is no longer enough information to say that -- one message causes the other. That said, experimental results show -- that approximate vector clocks have good results in practice; see -- the paper by R. Baldoni and M. Raynal for details. data VectorClock a b = VectorClock { vcClock :: VC.VectorClock Int b , vcSize :: Int } deriving ( Data, Typeable, Generic ) instance (Eq b) => Eq (VectorClock a b) where vc1 == vc2 = vcClock vc1 == vcClock vc2 instance (Show b) => Show (VectorClock a b) where show = show . vcClock instance (Binary b) => Binary (VectorClock a b) where put vc = do put (vcClock vc) put (vcSize vc) get = do xys <- get k <- get return (VectorClock { vcClock = xys, vcSize = k }) instance Foldable (VectorClock a) where foldMap f = foldMap f . vcClock instance Functor (VectorClock a) where fmap f vc = vc { vcClock = fmap f (vcClock vc) } instance Traversable (VectorClock a) where traverse f vc = (\xys -> vc { vcClock = xys }) <$> traverse f (vcClock vc) -- | /O(1)/. The empty vector clock. empty :: Int -- ^ /size/: the maximum number of -- entries in the vector clock -> VectorClock a b empty k = VectorClock { vcClock = VC.empty, vcSize = k } -- | /O(N)/. Insert each entry in the list one at a time. fromList :: (Hashable a) => Int -- ^ /size/: the maximum number of -- entries in the vector clock -> [(a, b)] -- ^ /entries/: the entries to insert -- in the newly created vector clock -> VectorClock a b fromList k = foldl' (\vc (x, y) -> insert x y vc) (empty k) -- | /O(1)/. All the entries in the vector clock. Note that this is -- /not/ the inverse of 'fromList'. Note that the keys are returned -- /hashed/. toList :: VectorClock a b -> [(Int, b)] toList = VC.toList . vcClock -- | /O(1)/. A vector clock with a single element. singleton :: (Hashable a) => Int -- ^ /size/: the maximum number of -- entries in the vector clock -> a -- ^ /key/: the key for the entry -> b -- ^ /value/: the value for the entry -> VectorClock a b singleton k x y = fromList k [(x, y)] -- | /O(1)/. Is the vector clock empty? null :: VectorClock a b -> Bool null = VC.null . vcClock -- | /O(N)/. The number of entries in the vector clock. Note that -- this may be less than the /size/ at construction. size :: VectorClock a b -> Int size = VC.size . vcClock -- | /O(N)/. Is the given key a key in an entry of the vector clock? member :: (Hashable a) => a -> VectorClock a b -> Bool member x (VectorClock { vcClock = xys, vcSize = k }) = VC.member (mapKey x k) xys -- | /O(N)/. Lookup the value for a key in the vector clock. lookup :: (Hashable a) => a -> VectorClock a b -> Maybe b lookup x (VectorClock { vcClock = xys, vcSize = k }) = VC.lookup (mapKey x k) xys -- | /O(N)/. Insert or replace the entry for a key. insert :: (Hashable a) => a -> b -> VectorClock a b -> VectorClock a b insert x y vc@(VectorClock { vcClock = xys, vcSize = k }) = let xys' = VC.insert (mapKey x k) y xys in vc { vcClock = xys' } -- | /O(N)/. Increment the entry for a key. inc :: (Hashable a, Num b) => a -> VectorClock a b -> Maybe (VectorClock a b) inc x vc@(VectorClock { vcClock = xys, vcSize = k }) = do xys' <- VC.inc (mapKey x k) xys return (vc { vcClock = xys' }) -- | /O(N)/. Increment the entry for a key. If the key does not -- exist, assume it was the default. incWithDefault :: (Hashable a, Num b) => a -- ^ /key/: the key of the entry -> VectorClock a b -- ^ /vc/: the vector clock -> b -- ^ /default/: if the key is not -- found, assume its value was the -- /default/ and increment that -> VectorClock a b incWithDefault x vc@(VectorClock { vcClock = xys, vcSize = k }) y' = let xys' = VC.incWithDefault (mapKey x k) xys y' in vc { vcClock = xys' } -- | /O(N)/. Delete an entry from the vector clock. If the requested -- entry does not exist, does nothing. delete :: (Hashable a) => a -> VectorClock a b -> VectorClock a b delete x vc@(VectorClock { vcClock = xys, vcSize = k }) = let xys' = VC.delete (mapKey x k) xys in vc { vcClock = xys' } -- | /O(max(N, M))/. Combine two vector clocks entry-by-entry. The -- size of the resulting vector clock is the maximum of the sizes of -- the given ones. combine :: (Ord b) => (Int -> Maybe b -> Maybe b -> Maybe b) -- ^ a function that takes the hashed /key/, the value of the -- entry in the left hand vector clock, if it exists, the value in -- the right hand vector clock, if it exists, and, if it wishes to -- keep a value for this /key/ in the resulting vector clock, -- returns it. -> VectorClock a b -- ^ /lhs/: the left hand vector clock -> VectorClock a b -- ^ /rhs/: the right hand vector clock -> VectorClock a b combine f (VectorClock { vcClock = xys1, vcSize = k1 }) (VectorClock { vcClock = xys2, vcSize = k2 }) = let xys' = VC.combine f xys1 xys2 in VectorClock { vcClock = xys', vcSize = Prelude.max k1 k2 } -- | /O(max(N, M))/. The maximum of the two vector clocks. max :: (Ord b) => VectorClock a b -> VectorClock a b -> VectorClock a b max = combine maxEntry where maxEntry _ Nothing Nothing = Nothing maxEntry _ x@(Just _) Nothing = x maxEntry _ Nothing y@(Just _) = y maxEntry _ (Just x) (Just y) = Just (Prelude.max x y) -- | /O(min(N, M))/. The relation between the two vector clocks. relation :: (Ord b) => VectorClock a b -> VectorClock a b -> Relation relation (VectorClock { vcClock = xys1 }) (VectorClock { vcClock = xys2 }) = VC.relation xys1 xys2 -- | /O(min(N, M))/. Short-hand for @relation vc1 vc2 == Causes@. causes :: (Ord a, Ord b) => VectorClock a b -> VectorClock a b -> Bool causes vc1 vc2 = relation vc1 vc2 == Causes -- | /O(M)/. If @vc2 `causes` vc1@, compute the smallest @vc3@ -- s.t. @max vc3 vc2 == vc1@. Note that the /first/ parameter is the -- newer vector clock. diff :: (Ord a, Ord b) => VectorClock a b -> VectorClock a b -> Maybe (VectorClock a b) diff vc1 vc2 = do xys <- VC.diff (vcClock vc1) (vcClock vc2) return (vc1 { vcClock = xys }) -- | Map a key into the domain of approximate keys. mapKey :: (Hashable a) => a -> Int -> Int mapKey x k = hash x `mod` k -- | /O(N)/. Check whether the vector clock is valid or not. valid :: (Ord b) => VectorClock a b -> Bool valid vc@(VectorClock { vcClock = xys, vcSize = k }) = size vc <= k && VC.valid xys