{-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE TupleSections #-} -- | This module defines the 'Distribution' monad and its operations. -- -- A @'Distribution' a@ is a discrete probability distribution over values of -- type @a@. -- -- You can define distributions in several ways: -- -- * Choosing from the common distributions exported by this module, such as -- a 'categorical', 'uniform', 'geometric', 'bernoulli', 'binomial', -- 'negativeBinomial', 'hypergeometric', or 'poisson' distribution. -- * Operating on existing distributions using the 'Functor', 'Applicative', and -- 'Monad' instances, or by conditioning on events using 'conditional' or -- 'finiteConditional'. -- -- Once you have a distribution, you can sample from it using 'sample', list its -- outcomes and their probabilities using 'probabilities' or 'possibilities', -- and compute various statistics using 'probability', 'approxProbability', -- 'expectation', 'variance', 'stddev', 'entropy', 'relativeEntropy', or -- 'mutualInformation'. -- -- It's important to make a distinction between *finite* and *infinite* -- distributions. An infinite distribution is one whose list of 'possibilities' -- is infinite. Note that this *includes* distributions for which there are -- only finitely many distinct outcomes, but still an infinite number of paths -- to reach these outcomes. Infinite distributions typically arise from -- recursive expressions. Certain functions only work on finite distributions, -- and will hang or OOM if given an infinite distribution. -- -- For example, if you express the process of rolling a six-sided die, but -- always rerolling if the result is one, then there are five distinct outcomes: -- 2, 3, 4, 5, or 6. Nevertheless, this is an infinite distribution, because -- it's possible to roll any number of ones prior to the final result. module Probability.Distribution ( -- * Types Distribution, Event, RandVar, EventView (..), -- * Basic operations possibilities, probabilities, simplify, sample, viewEvent, fromEventView, finitize, finitizeMaybe, conditional, finiteConditional, bayesian, finiteBayesian, -- * Common distributions categorical, uniform, geometric, bernoulli, binomial, negativeBinomial, hypergeometric, poisson, -- * Analysis probability, probabilityBounds, approxProbability, expectation, variance, stddev, entropy, relativeEntropy, mutualInformation, ) where import Control.Applicative (liftA2) import Control.Monad (ap) import Data.Bifunctor (Bifunctor (..)) import Data.Bool (bool) import Data.Map (Map) import qualified Data.Map as Map import qualified Data.Sequence as Seq import qualified Data.Set as Set import Data.Tuple (swap) import System.Random (randomRIO) -- | A probability distribution of values. data Distribution prob a = Certainly a | Choice prob (Distribution prob a) (Distribution prob a) deriving (Functor) instance Bifunctor Distribution where bimap _ g (Certainly a) = Certainly (g a) bimap f g (Choice p a b) = Choice (f p) (bimap f g a) (bimap f g b) instance Applicative (Distribution prob) where pure = Certainly (<*>) = ap instance Monad (Distribution prob) where Certainly x >>= f = f x Choice p a b >>= f = Choice p (a >>= f) (b >>= f) instance Num a => Num (Distribution prob a) where (+) = liftA2 (+) (-) = liftA2 (-) (*) = liftA2 (*) abs = fmap abs signum = fmap signum negate = fmap negate fromInteger = pure . fromInteger instance Fractional a => Fractional (Distribution prob a) where (/) = liftA2 (/) recip = fmap recip fromRational = pure . fromRational -- | An event is a predicate on values from a sample space. type Event s = s -> Bool -- | A random variable is a function mapping each element of a sample space to -- the corresponding value of the random variable. type RandVar s a = s -> a -- | Gives the list of all possible values of a given probability distribution. -- The list will often contain multiple entries for the same outcome, in which -- case the true probability for that outcome is the sum of probabilities of all -- entries. -- -- In the finite case, multiple entries can be combined by using 'simplify' on -- the 'Distribution' first. possibilities :: Num prob => Distribution prob a -> [(prob, a)] possibilities dist = go (Seq.singleton (1, dist)) where go Seq.Empty = [] go ((p, Certainly x) Seq.:<| queue') = (p, x) : go queue' go ((p, Choice q a b) Seq.:<| queue') = go (queue' Seq.:|> (p * q, a) Seq.:|> (p * (1 - q), b)) -- | Truncates an infinite distribution to make it finite. The epsilon -- parameter is the amount of tail probability that you're willing to ignore -- and assign to an arbitrary outcome. finitize :: (Fractional prob, Ord prob) => prob -> Distribution prob a -> Distribution prob a finitize epsilon = categorical . go 1 . possibilities where go q ((p, x) : poss) | q - p < epsilon = [(q, x)] | otherwise = (p, x) : go (q - p) poss go _ [] = [] -- | Truncates an infinite distribution to make it finite. This is equivalent -- to the original distribution, except with some arbitrary set of outcomes with -- probability less than epsilon replaced by Nothing. finitizeMaybe :: (Fractional prob, Ord prob) => prob -> Distribution prob a -> Distribution prob (Maybe a) finitizeMaybe epsilon = categorical . go 1 . possibilities where go q ((p, x) : poss) | q - p < epsilon = [(q, Nothing)] | otherwise = (p, Just x) : go (q - p) poss go _ [] = [] -- | Gives a map from outcomes to their probabilities in the given distribution. -- -- This only works for finite distributions. Infinite distributions (including -- even distributions with finitely many outcomes, but infinitely many paths to -- reach those outcomes) will hang. probabilities :: (Num prob, Ord a) => Distribution prob a -> Map a prob probabilities = Map.fromListWith (+) . fmap swap . possibilities -- | Simplifies a finite distribution. -- -- This only works for finite distributions. Infinite distributions (including -- even distributions with finitely many outcomes, but infinitely many paths to -- reach those outcomes) will hang. simplify :: (Fractional prob, Ord a) => Distribution prob a -> Distribution prob a simplify = categorical . fmap swap . Map.toList . probabilities -- | Samples the probability distribution to produce a value. sample :: Real prob => Distribution prob a -> IO a sample (Certainly x) = return x sample (Choice p a b) = bool (sample b) (sample a) . (< realToFrac p) =<< randomRIO (0 :: Double, 1) -- | A view of a probability distribution from the point of view of a given -- event. The event either always happens, never happens, or happens sometimes -- with some probability. In the latter case, there are posterior distributions -- for when the event does or does not happen. data EventView prob s = Always (Distribution prob s) | Never (Distribution prob s) | Sometimes prob (Distribution prob s) (Distribution prob s) -- | Gives a view on a probability distribution relative to some event. -- -- The following are guaranteed. -- 1. @'fromEventView' . 'viewEvent' ev = id@ -- 2. If @'viewEvent' ev dist = 'Always' dist'@, then @dist = dist'@ and -- @'probability' ev dist = 1@. -- 3. If @'viewEvent' ev dist = 'Never' dist'@, then @dist = dist'@ and -- @'probability' ev dist = 0@. -- 4. If @'viewEvent' ev dist = 'Sometimes' p a b@, then -- @'probability' ev dist = p@ and: -- * @dist = 'bernoulli' p >>= bool a b@ -- * @'probability' ev a = 1@ -- * @'probability' ev b = 0@ -- -- This only works for finite distributions. Infinite distributions (including -- even distributions with finitely many outcomes, but infinitely many paths to -- reach those outcomes) will hang. viewEvent :: Fractional prob => Event s -> Distribution prob s -> EventView prob s viewEvent event dist@(Certainly x) | event x = Always dist | otherwise = Never dist viewEvent event dist@(Choice p aa bb) = case (viewEvent event aa, viewEvent event bb) of (Never _, Never _) -> Never dist (Always _, Always _) -> Always dist (Always a, Never b) -> Sometimes p a b (Never a, Always b) -> Sometimes (1 - p) b a (Sometimes q a1 a2, Never b) -> let (p', _, p2) = blend q 0 in Sometimes p' a1 (Choice p2 a2 b) (Sometimes q a1 a2, Always b) -> let (p', p1, _) = blend q 1 in Sometimes p' (Choice p1 a1 b) a2 (Never a, Sometimes r b1 b2) -> let (p', _, p2) = blend 0 r in Sometimes p' b1 (Choice p2 a b2) (Always a, Sometimes r b1 b2) -> let (p', p1, _) = blend 1 r in Sometimes p' (Choice p1 a b1) b2 (Sometimes q a1 a2, Sometimes r b1 b2) -> let (p', p1, p2) = blend q r in Sometimes p' (Choice p1 a1 b1) (Choice p2 a2 b2) where blend q r = let p' = p * q + (1 - p) * r in (p', p * q / p', p * (1 - q) / (1 - p')) -- | Converts from 'EventView' back to a 'Distribution'. The resulting -- distribution is equivalent to the source distribution. fromEventView :: EventView prob s -> Distribution prob s fromEventView (Always dist) = dist fromEventView (Never dist) = dist fromEventView (Sometimes p a b) = Choice p a b -- | Produces the conditional probability distribution, assuming some event. -- This function works for all distributions, but always produces an infinite -- distribution for non-trivial events. conditional :: Event s -> Distribution prob s -> Distribution prob s conditional event dist = cdist where cdist = do x <- dist if event x then return x else cdist -- | Produces the conditional probability distribution, assuming some event. -- -- This only works for finite distributions. Infinite distributions (including -- even distributions with finitely many outcomes, but infinitely many paths to -- reach those outcomes) will hang. finiteConditional :: Fractional prob => Event s -> Distribution prob s -> Distribution prob s finiteConditional event dist = categorical (map (first (/ p_event)) filtered) where filtered = filter (event . snd) (possibilities dist) p_event = sum (map fst filtered) -- | Updates a prior distribution of parameters for a model, based on an -- observed event. This implements Bayes' Law for distributions. -- -- This function works for all distributions, but always produces an infinite -- distribution for non-trivial events. bayesian :: (param -> Distribution prob s) -> Event s -> Distribution prob param -> Distribution prob param bayesian model event prior = posterior where posterior = do param <- prior x <- model param if event x then return param else posterior -- | Updates a prior distribution of parameters for a model, based on an -- observed event. This implements Bayes' Law for distributions. -- -- This only works for finite distributions. Infinite distributions (including -- even distributions with finitely many outcomes, but infinitely many paths to -- reach those outcomes) will hang. finiteBayesian :: Fractional prob => (param -> Distribution prob s) -> Event s -> Distribution prob param -> Distribution prob param finiteBayesian model event prior = case viewEvent (event . snd) withParam of Always dist -> fst <$> dist Never _ -> error "Posterior is undefined for an impossible event" Sometimes _ dist _ -> fst <$> dist where withParam = do param <- prior obs <- model param return (param, obs) -- | A distribution with a fixed probability for each outcome. The -- probabilities should add to 1, but this is not checked. categorical :: Fractional prob => [(prob, a)] -> Distribution prob a categorical = go 1 where go _ [] = error "Empty distribution is not allowed" go _ [(_, x)] = Certainly x go p ((q, x) : xs) = Choice (q / p) (Certainly x) (go (p - q) xs) -- | A uniform distribution over a list of values. uniform :: Fractional prob => [a] -> Distribution prob a uniform xs = categorical $ (recip n,) <$> xs where n = realToFrac (length xs) -- | Geometric distribution over a list of possibilities. geometric :: prob -> [a] -> Distribution prob a geometric _ [] = error "geometric: Empty distribution is not allowed" geometric _ [x] = Certainly x geometric p (x : xs) = Choice p (Certainly x) (geometric p xs) -- | A Bernoulli distribution. This gives True with probability @p@, and False -- otherwise. bernoulli :: prob -> Distribution prob Bool bernoulli p = Choice p (Certainly True) (Certainly False) -- | Computes nCk. This is a building block for several well-known discrete -- distributions. choose :: Integral t => t -> t -> t n `choose` k | k > n `div` 2 = n `choose` (n - k) | otherwise = product [n - k + 1 .. n] `div` product [1 .. k] -- | A binomial distribution. This gives the distribution of number of -- successes in @n@ trials with probability @p@ of success. binomial :: (Fractional prob, Integral n) => prob -> n -> Distribution prob n binomial p n = categorical [ (fromIntegral (n `choose` k) * p ^ k * (1 - p) ^ (n - k), k) | k <- [0 .. n] ] -- | Negative binomial distribution. This gives the distribution of number of -- failures before @r@ successes with probability @p@ of success. negativeBinomial :: (Fractional prob, Integral n) => prob -> n -> Distribution prob n negativeBinomial _ 0 = pure 0 negativeBinomial p r = categorical [ (fromIntegral ((k + r - 1) `choose` (r - 1)) * p ^ r * (1 - p) ^ k, k) | k <- [0 ..] ] -- | Hypergeometric distribution. This gives the distribution of number of -- successful draws out of @n@ attempts without replacement, when @k@ -- possibilities are successful. hypergeometric :: (Fractional prob, Integral n) => n -> n -> n -> Distribution prob n hypergeometric pop k n = categorical [ ( fromIntegral ((k `choose` m) * ((pop - k) `choose` (n - m))) / fromIntegral (pop `choose` n), m ) | m <- [lo .. hi] ] where lo = max 0 (n + k - pop) hi = min n k -- | Poisson distribution. Gives the number of independent events occurring in -- a fixed time interval, if events are occurring at the given expected rate per -- time interval. poisson :: (Floating prob, Integral n) => prob -> Distribution prob n poisson lambda = categorical [ (lambda ^ k * exp (-lambda) / fromIntegral (product [1 .. k]), k) | k <- [0 ..] ] -- | Computes the probability of an event, represented by a predicate on values. -- -- This only works for finite distributions. Infinite distributions (including -- even distributions with finitely many outcomes, but infinitely many paths to -- reach those outcomes) will hang. probability :: Num prob => Event s -> Distribution prob s -> prob probability event (Certainly x) = if event x then 1 else 0 probability event (Choice p a b) = p * probability event a + (1 - p) * probability event b -- | Like probability, but produces a lazy list of ever-improving bounds on the -- probability. This can be used on infinite distributions, for which the -- exact probability cannot be calculated. probabilityBounds :: Num prob => Event s -> Distribution prob s -> [(prob, prob)] probabilityBounds event dist = go 0 1 (possibilities dist) where go p _ [] = [(p, p)] go p q ((q', x) : xs) | event x = (p, p + q) : go (p + q') (q - q') xs | otherwise = (p, p + q) : go p (q - q') xs -- | Like probability, but produces a value that differs from the true -- probability by at most epsilon. This can be used on infinite distributions, -- for which the exact probability cannot be calculated. approxProbability :: (Ord prob, Fractional prob) => prob -> Event s -> Distribution prob s -> prob approxProbability epsilon event dist = (/ 2) . uncurry (+) . head . dropWhile ((> epsilon) . abs . uncurry (-)) $ probabilityBounds event dist -- | Computes the expected value of a finite distribution. -- -- This only works for finite distributions. Infinite distributions (including -- even distributions with finitely many outcomes, but infinitely many paths to -- reach those outcomes) will hang. expectation :: Num a => Distribution a a -> a expectation (Certainly x) = x expectation (Choice p a b) = p * expectation a + (1 - p) * expectation b -- | Computes the variance of a finite distribution. -- -- This only works for finite distributions. Infinite distributions (including -- even distributions with finitely many outcomes, but infinitely many paths to -- reach those outcomes) will hang. variance :: Num a => Distribution a a -> a variance dist = expectation ((^ (2 :: Int)) . subtract mean <$> dist) where mean = expectation dist -- | Computes the standard deviation of a finite distribution. -- -- This only works for finite distributions. Infinite distributions (including -- even distributions with finitely many outcomes, but infinitely many paths to -- reach those outcomes) will hang. stddev :: Floating a => Distribution a a -> a stddev dist = sqrt (variance dist) -- | Computes the entropy of a distribution in bits. -- -- This only works for finite distributions. Infinite distributions (including -- even distributions with finitely many outcomes, but infinitely many paths to -- reach those outcomes) will hang. entropy :: (Floating prob, Ord a) => Distribution prob a -> prob entropy dist = sum [ -p * logBase 2 p | p <- map fst (possibilities (simplify dist)) ] -- | Computes the relative entropy, also known as Kullback-Leibler divergence, -- between two distributions in bits. -- -- This only works for finite distributions. Infinite distributions (including -- even distributions with finitely many outcomes, but infinitely many paths to -- reach those outcomes) will hang. relativeEntropy :: (Eq prob, Floating prob, Ord a) => Distribution prob a -> Distribution prob a -> prob relativeEntropy post prior = sum (term <$> Set.toList vals) where prob_post = probabilities post prob_prior = probabilities prior vals = Map.keysSet prob_post `Set.union` Map.keysSet prob_prior term x = let p = Map.findWithDefault 0 x prob_post q = Map.findWithDefault 0 x prob_prior in if p == 0 then 0 else p * logBase 2 (p / q) -- | Computes the mutual information between two random variables, in bits. The -- given distribution is taken as a definition of a probability space, and the -- random variables are represented as functions from the sample space to values -- taken by the random variable. -- -- This only works for finite distributions. Infinite distributions (including -- even distributions with finitely many outcomes, but infinitely many paths to -- reach those outcomes) will hang. mutualInformation :: (Eq prob, Floating prob, Ord a, Ord b) => RandVar s a -> RandVar s b -> Distribution prob s -> prob mutualInformation f g dist = sum (term <$> Map.keys f_probs <*> Map.keys g_probs) where joint_probs = probabilities ((\x -> (f x, g x)) <$> dist) f_probs = probabilities (f <$> dist) g_probs = probabilities (g <$> dist) term x y = let p_x = Map.findWithDefault 0 x f_probs p_y = Map.findWithDefault 0 y g_probs p_xy = Map.findWithDefault 0 (x, y) joint_probs in if p_xy == 0 then 0 else p_xy * logBase 2 (p_xy / (p_x * p_y))