{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE TupleSections #-}

-- | This module defines the 'Distribution' monad and its operations.
--
-- A @'Distribution' a@ is a discrete probability distribution over values of
-- type @a@.
--
-- You can define distributions in several ways:
--
-- * Choosing from the common distributions exported by this module, such as
--   a 'categorical', 'uniform', 'geometric', 'bernoulli', 'binomial',
--   'negativeBinomial', 'hypergeometric', or 'poisson' distribution.
-- * Operating on existing distributions using the 'Functor', 'Applicative', and
--   'Monad' instances, or by conditioning on events using 'conditional' or
--   'finiteConditional'.
--
-- Once you have a distribution, you can sample from it using 'sample', list its
-- outcomes and their probabilities using 'probabilities' or 'possibilities',
-- and compute various statistics using 'probability', 'approxProbability',
-- 'expectation', 'variance', 'stddev', 'entropy', 'relativeEntropy', or
-- 'mutualInformation'.
--
-- It's important to make a distinction between *finite* and *infinite*
-- distributions.  An infinite distribution is one whose list of 'possibilities'
-- is infinite.  Note that this *includes* distributions for which there are
-- only finitely many distinct outcomes, but still an infinite number of paths
-- to reach these outcomes.  Infinite distributions typically arise from
-- recursive expressions.  Certain functions only work on finite distributions,
-- and will hang or OOM if given an infinite distribution.
--
-- For example, if you express the process of rolling a six-sided die, but
-- always rerolling if the result is one, then there are five distinct outcomes:
-- 2, 3, 4, 5, or 6.  Nevertheless, this is an infinite distribution, because
-- it's possible to roll any number of ones prior to the final result.
module Probability.Distribution
  ( -- * Types
    Distribution,
    Event,
    RandVar,
    EventView (..),

    -- * Basic operations
    possibilities,
    probabilities,
    simplify,
    sample,
    viewEvent,
    fromEventView,
    finitize,
    finitizeMaybe,
    conditional,
    finiteConditional,
    bayesian,
    finiteBayesian,

    -- * Common distributions
    categorical,
    uniform,
    geometric,
    bernoulli,
    binomial,
    negativeBinomial,
    hypergeometric,
    poisson,

    -- * Analysis
    probability,
    probabilityBounds,
    approxProbability,
    expectation,
    variance,
    stddev,
    entropy,
    relativeEntropy,
    mutualInformation,
  )
where

import Control.Applicative (liftA2)
import Control.Monad (ap)
import Data.Bifunctor (Bifunctor (..))
import Data.Bool (bool)
import Data.Map (Map)
import qualified Data.Map as Map
import qualified Data.Sequence as Seq
import qualified Data.Set as Set
import Data.Tuple (swap)
import System.Random (randomRIO)

-- | A probability distribution of values.
data Distribution prob a
  = Certainly a
  | Choice prob (Distribution prob a) (Distribution prob a)
  deriving (forall a b. a -> Distribution prob b -> Distribution prob a
forall a b. (a -> b) -> Distribution prob a -> Distribution prob b
forall prob a b. a -> Distribution prob b -> Distribution prob a
forall prob a b.
(a -> b) -> Distribution prob a -> Distribution prob b
forall (f :: * -> *).
(forall a b. (a -> b) -> f a -> f b)
-> (forall a b. a -> f b -> f a) -> Functor f
<$ :: forall a b. a -> Distribution prob b -> Distribution prob a
$c<$ :: forall prob a b. a -> Distribution prob b -> Distribution prob a
fmap :: forall a b. (a -> b) -> Distribution prob a -> Distribution prob b
$cfmap :: forall prob a b.
(a -> b) -> Distribution prob a -> Distribution prob b
Functor)

instance Bifunctor Distribution where
  bimap :: forall a b c d.
(a -> b) -> (c -> d) -> Distribution a c -> Distribution b d
bimap a -> b
_ c -> d
g (Certainly c
a) = forall prob a. a -> Distribution prob a
Certainly (c -> d
g c
a)
  bimap a -> b
f c -> d
g (Choice a
p Distribution a c
a Distribution a c
b) = forall prob a.
prob
-> Distribution prob a
-> Distribution prob a
-> Distribution prob a
Choice (a -> b
f a
p) (forall (p :: * -> * -> *) a b c d.
Bifunctor p =>
(a -> b) -> (c -> d) -> p a c -> p b d
bimap a -> b
f c -> d
g Distribution a c
a) (forall (p :: * -> * -> *) a b c d.
Bifunctor p =>
(a -> b) -> (c -> d) -> p a c -> p b d
bimap a -> b
f c -> d
g Distribution a c
b)

instance Applicative (Distribution prob) where
  pure :: forall a. a -> Distribution prob a
pure = forall prob a. a -> Distribution prob a
Certainly
  <*> :: forall a b.
Distribution prob (a -> b)
-> Distribution prob a -> Distribution prob b
(<*>) = forall (m :: * -> *) a b. Monad m => m (a -> b) -> m a -> m b
ap

instance Monad (Distribution prob) where
  Certainly a
x >>= :: forall a b.
Distribution prob a
-> (a -> Distribution prob b) -> Distribution prob b
>>= a -> Distribution prob b
f = a -> Distribution prob b
f a
x
  Choice prob
p Distribution prob a
a Distribution prob a
b >>= a -> Distribution prob b
f = forall prob a.
prob
-> Distribution prob a
-> Distribution prob a
-> Distribution prob a
Choice prob
p (Distribution prob a
a forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= a -> Distribution prob b
f) (Distribution prob a
b forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= a -> Distribution prob b
f)

instance Num a => Num (Distribution prob a) where
  + :: Distribution prob a -> Distribution prob a -> Distribution prob a
(+) = forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 forall a. Num a => a -> a -> a
(+)
  (-) = forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 (-)
  * :: Distribution prob a -> Distribution prob a -> Distribution prob a
(*) = forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 forall a. Num a => a -> a -> a
(*)
  abs :: Distribution prob a -> Distribution prob a
abs = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a. Num a => a -> a
abs
  signum :: Distribution prob a -> Distribution prob a
signum = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a. Num a => a -> a
signum
  negate :: Distribution prob a -> Distribution prob a
negate = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a. Num a => a -> a
negate
  fromInteger :: Integer -> Distribution prob a
fromInteger = forall (f :: * -> *) a. Applicative f => a -> f a
pure forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Num a => Integer -> a
fromInteger

instance Fractional a => Fractional (Distribution prob a) where
  / :: Distribution prob a -> Distribution prob a -> Distribution prob a
(/) = forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 forall a. Fractional a => a -> a -> a
(/)
  recip :: Distribution prob a -> Distribution prob a
recip = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a. Fractional a => a -> a
recip
  fromRational :: Rational -> Distribution prob a
fromRational = forall (f :: * -> *) a. Applicative f => a -> f a
pure forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Fractional a => Rational -> a
fromRational

-- | An event is a predicate on values from a sample space.
type Event s = s -> Bool

-- | A random variable is a function mapping each element of a sample space to
-- the corresponding value of the random variable.
type RandVar s a = s -> a

-- | Gives the list of all possible values of a given probability distribution.
-- The list will often contain multiple entries for the same outcome, in which
-- case the true probability for that outcome is the sum of probabilities of all
-- entries.
--
-- In the finite case, multiple entries can be combined by using 'simplify' on
-- the 'Distribution' first.
possibilities :: Num prob => Distribution prob a -> [(prob, a)]
possibilities :: forall prob a. Num prob => Distribution prob a -> [(prob, a)]
possibilities Distribution prob a
dist = forall {a} {b}. Num a => Seq (a, Distribution a b) -> [(a, b)]
go (forall a. a -> Seq a
Seq.singleton (prob
1, Distribution prob a
dist))
  where
    go :: Seq (a, Distribution a b) -> [(a, b)]
go Seq (a, Distribution a b)
Seq.Empty = []
    go ((a
p, Certainly b
x) Seq.:<| Seq (a, Distribution a b)
queue') = (a
p, b
x) forall a. a -> [a] -> [a]
: Seq (a, Distribution a b) -> [(a, b)]
go Seq (a, Distribution a b)
queue'
    go ((a
p, Choice a
q Distribution a b
a Distribution a b
b) Seq.:<| Seq (a, Distribution a b)
queue') =
      Seq (a, Distribution a b) -> [(a, b)]
go (Seq (a, Distribution a b)
queue' forall a. Seq a -> a -> Seq a
Seq.:|> (a
p forall a. Num a => a -> a -> a
* a
q, Distribution a b
a) forall a. Seq a -> a -> Seq a
Seq.:|> (a
p forall a. Num a => a -> a -> a
* (a
1 forall a. Num a => a -> a -> a
- a
q), Distribution a b
b))

-- | Truncates an infinite distribution to make it finite.  The epsilon
-- parameter is the amount of tail probability that you're willing to ignore
-- and assign to an arbitrary outcome.
finitize ::
  (Fractional prob, Ord prob) =>
  prob ->
  Distribution prob a ->
  Distribution prob a
finitize :: forall prob a.
(Fractional prob, Ord prob) =>
prob -> Distribution prob a -> Distribution prob a
finitize prob
epsilon = forall prob a.
Fractional prob =>
[(prob, a)] -> Distribution prob a
categorical forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall {b}. prob -> [(prob, b)] -> [(prob, b)]
go prob
1 forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall prob a. Num prob => Distribution prob a -> [(prob, a)]
possibilities
  where
    go :: prob -> [(prob, b)] -> [(prob, b)]
go prob
q ((prob
p, b
x) : [(prob, b)]
poss)
      | prob
q forall a. Num a => a -> a -> a
- prob
p forall a. Ord a => a -> a -> Bool
< prob
epsilon = [(prob
q, b
x)]
      | Bool
otherwise = (prob
p, b
x) forall a. a -> [a] -> [a]
: prob -> [(prob, b)] -> [(prob, b)]
go (prob
q forall a. Num a => a -> a -> a
- prob
p) [(prob, b)]
poss
    go prob
_ [] = []

-- | Truncates an infinite distribution to make it finite.  This is equivalent
-- to the original distribution, except with some arbitrary set of outcomes with
-- probability less than epsilon replaced by Nothing.
finitizeMaybe ::
  (Fractional prob, Ord prob) =>
  prob ->
  Distribution prob a ->
  Distribution prob (Maybe a)
finitizeMaybe :: forall prob a.
(Fractional prob, Ord prob) =>
prob -> Distribution prob a -> Distribution prob (Maybe a)
finitizeMaybe prob
epsilon = forall prob a.
Fractional prob =>
[(prob, a)] -> Distribution prob a
categorical forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall {a}. prob -> [(prob, a)] -> [(prob, Maybe a)]
go prob
1 forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall prob a. Num prob => Distribution prob a -> [(prob, a)]
possibilities
  where
    go :: prob -> [(prob, a)] -> [(prob, Maybe a)]
go prob
q ((prob
p, a
x) : [(prob, a)]
poss)
      | prob
q forall a. Num a => a -> a -> a
- prob
p forall a. Ord a => a -> a -> Bool
< prob
epsilon = [(prob
q, forall a. Maybe a
Nothing)]
      | Bool
otherwise = (prob
p, forall a. a -> Maybe a
Just a
x) forall a. a -> [a] -> [a]
: prob -> [(prob, a)] -> [(prob, Maybe a)]
go (prob
q forall a. Num a => a -> a -> a
- prob
p) [(prob, a)]
poss
    go prob
_ [] = []

-- | Gives a map from outcomes to their probabilities in the given distribution.
--
-- This only works for finite distributions.  Infinite distributions (including
-- even distributions with finitely many outcomes, but infinitely many paths to
-- reach those outcomes) will hang.
probabilities :: (Num prob, Ord a) => Distribution prob a -> Map a prob
probabilities :: forall prob a.
(Num prob, Ord a) =>
Distribution prob a -> Map a prob
probabilities = forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
Map.fromListWith forall a. Num a => a -> a -> a
(+) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a b. (a, b) -> (b, a)
swap forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall prob a. Num prob => Distribution prob a -> [(prob, a)]
possibilities

-- | Simplifies a finite distribution.
--
-- This only works for finite distributions.  Infinite distributions (including
-- even distributions with finitely many outcomes, but infinitely many paths to
-- reach those outcomes) will hang.
simplify ::
  (Fractional prob, Ord a) => Distribution prob a -> Distribution prob a
simplify :: forall prob a.
(Fractional prob, Ord a) =>
Distribution prob a -> Distribution prob a
simplify = forall prob a.
Fractional prob =>
[(prob, a)] -> Distribution prob a
categorical forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a b. (a, b) -> (b, a)
swap forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall k a. Map k a -> [(k, a)]
Map.toList forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall prob a.
(Num prob, Ord a) =>
Distribution prob a -> Map a prob
probabilities

-- | Samples the probability distribution to produce a value.
sample :: Real prob => Distribution prob a -> IO a
sample :: forall prob a. Real prob => Distribution prob a -> IO a
sample (Certainly a
x) = forall (m :: * -> *) a. Monad m => a -> m a
return a
x
sample (Choice prob
p Distribution prob a
a Distribution prob a
b) =
  forall a. a -> a -> Bool -> a
bool (forall prob a. Real prob => Distribution prob a -> IO a
sample Distribution prob a
b) (forall prob a. Real prob => Distribution prob a -> IO a
sample Distribution prob a
a) forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. Ord a => a -> a -> Bool
< forall a b. (Real a, Fractional b) => a -> b
realToFrac prob
p) forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< forall a (m :: * -> *). (Random a, MonadIO m) => (a, a) -> m a
randomRIO (Double
0 :: Double, Double
1)

-- | A view of a probability distribution from the point of view of a given
-- event.  The event either always happens, never happens, or happens sometimes
-- with some probability.  In the latter case, there are posterior distributions
-- for when the event does or does not happen.
data EventView prob s
  = Always (Distribution prob s)
  | Never (Distribution prob s)
  | Sometimes prob (Distribution prob s) (Distribution prob s)

-- | Gives a view on a probability distribution relative to some event.
--
-- The following are guaranteed.
-- 1. @'fromEventView' . 'viewEvent' ev = id@
-- 2. If @'viewEvent' ev dist = 'Always' dist'@, then @dist = dist'@ and
--    @'probability' ev dist = 1@.
-- 3. If @'viewEvent' ev dist = 'Never' dist'@, then @dist = dist'@ and
--    @'probability' ev dist = 0@.
-- 4. If @'viewEvent' ev dist = 'Sometimes' p a b@, then
--    @'probability' ev dist = p@ and:
--    * @dist = 'bernoulli' p >>= bool a b@
--    * @'probability' ev a = 1@
--    * @'probability' ev b = 0@
--
-- This only works for finite distributions.  Infinite distributions (including
-- even distributions with finitely many outcomes, but infinitely many paths to
-- reach those outcomes) will hang.
viewEvent ::
  Fractional prob =>
  Event s ->
  Distribution prob s ->
  EventView prob s
viewEvent :: forall prob s.
Fractional prob =>
Event s -> Distribution prob s -> EventView prob s
viewEvent Event s
event dist :: Distribution prob s
dist@(Certainly s
x)
  | Event s
event s
x = forall prob s. Distribution prob s -> EventView prob s
Always Distribution prob s
dist
  | Bool
otherwise = forall prob s. Distribution prob s -> EventView prob s
Never Distribution prob s
dist
viewEvent Event s
event dist :: Distribution prob s
dist@(Choice prob
p Distribution prob s
aa Distribution prob s
bb) =
  case (forall prob s.
Fractional prob =>
Event s -> Distribution prob s -> EventView prob s
viewEvent Event s
event Distribution prob s
aa, forall prob s.
Fractional prob =>
Event s -> Distribution prob s -> EventView prob s
viewEvent Event s
event Distribution prob s
bb) of
    (Never Distribution prob s
_, Never Distribution prob s
_) -> forall prob s. Distribution prob s -> EventView prob s
Never Distribution prob s
dist
    (Always Distribution prob s
_, Always Distribution prob s
_) -> forall prob s. Distribution prob s -> EventView prob s
Always Distribution prob s
dist
    (Always Distribution prob s
a, Never Distribution prob s
b) -> forall prob s.
prob
-> Distribution prob s -> Distribution prob s -> EventView prob s
Sometimes prob
p Distribution prob s
a Distribution prob s
b
    (Never Distribution prob s
a, Always Distribution prob s
b) -> forall prob s.
prob
-> Distribution prob s -> Distribution prob s -> EventView prob s
Sometimes (prob
1 forall a. Num a => a -> a -> a
- prob
p) Distribution prob s
b Distribution prob s
a
    (Sometimes prob
q Distribution prob s
a1 Distribution prob s
a2, Never Distribution prob s
b) ->
      let (prob
p', prob
_, prob
p2) = prob -> prob -> (prob, prob, prob)
blend prob
q prob
0 in forall prob s.
prob
-> Distribution prob s -> Distribution prob s -> EventView prob s
Sometimes prob
p' Distribution prob s
a1 (forall prob a.
prob
-> Distribution prob a
-> Distribution prob a
-> Distribution prob a
Choice prob
p2 Distribution prob s
a2 Distribution prob s
b)
    (Sometimes prob
q Distribution prob s
a1 Distribution prob s
a2, Always Distribution prob s
b) ->
      let (prob
p', prob
p1, prob
_) = prob -> prob -> (prob, prob, prob)
blend prob
q prob
1 in forall prob s.
prob
-> Distribution prob s -> Distribution prob s -> EventView prob s
Sometimes prob
p' (forall prob a.
prob
-> Distribution prob a
-> Distribution prob a
-> Distribution prob a
Choice prob
p1 Distribution prob s
a1 Distribution prob s
b) Distribution prob s
a2
    (Never Distribution prob s
a, Sometimes prob
r Distribution prob s
b1 Distribution prob s
b2) ->
      let (prob
p', prob
_, prob
p2) = prob -> prob -> (prob, prob, prob)
blend prob
0 prob
r in forall prob s.
prob
-> Distribution prob s -> Distribution prob s -> EventView prob s
Sometimes prob
p' Distribution prob s
b1 (forall prob a.
prob
-> Distribution prob a
-> Distribution prob a
-> Distribution prob a
Choice prob
p2 Distribution prob s
a Distribution prob s
b2)
    (Always Distribution prob s
a, Sometimes prob
r Distribution prob s
b1 Distribution prob s
b2) ->
      let (prob
p', prob
p1, prob
_) = prob -> prob -> (prob, prob, prob)
blend prob
1 prob
r in forall prob s.
prob
-> Distribution prob s -> Distribution prob s -> EventView prob s
Sometimes prob
p' (forall prob a.
prob
-> Distribution prob a
-> Distribution prob a
-> Distribution prob a
Choice prob
p1 Distribution prob s
a Distribution prob s
b1) Distribution prob s
b2
    (Sometimes prob
q Distribution prob s
a1 Distribution prob s
a2, Sometimes prob
r Distribution prob s
b1 Distribution prob s
b2) ->
      let (prob
p', prob
p1, prob
p2) = prob -> prob -> (prob, prob, prob)
blend prob
q prob
r
       in forall prob s.
prob
-> Distribution prob s -> Distribution prob s -> EventView prob s
Sometimes prob
p' (forall prob a.
prob
-> Distribution prob a
-> Distribution prob a
-> Distribution prob a
Choice prob
p1 Distribution prob s
a1 Distribution prob s
b1) (forall prob a.
prob
-> Distribution prob a
-> Distribution prob a
-> Distribution prob a
Choice prob
p2 Distribution prob s
a2 Distribution prob s
b2)
  where
    blend :: prob -> prob -> (prob, prob, prob)
blend prob
q prob
r =
      let p' :: prob
p' = prob
p forall a. Num a => a -> a -> a
* prob
q forall a. Num a => a -> a -> a
+ (prob
1 forall a. Num a => a -> a -> a
- prob
p) forall a. Num a => a -> a -> a
* prob
r
       in (prob
p', prob
p forall a. Num a => a -> a -> a
* prob
q forall a. Fractional a => a -> a -> a
/ prob
p', prob
p forall a. Num a => a -> a -> a
* (prob
1 forall a. Num a => a -> a -> a
- prob
q) forall a. Fractional a => a -> a -> a
/ (prob
1 forall a. Num a => a -> a -> a
- prob
p'))

-- | Converts from 'EventView' back to a 'Distribution'.  The resulting
-- distribution is equivalent to the source distribution.
fromEventView :: EventView prob s -> Distribution prob s
fromEventView :: forall prob s. EventView prob s -> Distribution prob s
fromEventView (Always Distribution prob s
dist) = Distribution prob s
dist
fromEventView (Never Distribution prob s
dist) = Distribution prob s
dist
fromEventView (Sometimes prob
p Distribution prob s
a Distribution prob s
b) = forall prob a.
prob
-> Distribution prob a
-> Distribution prob a
-> Distribution prob a
Choice prob
p Distribution prob s
a Distribution prob s
b

-- | Produces the conditional probability distribution, assuming some event.
-- This function works for all distributions, but always produces an infinite
-- distribution for non-trivial events.
conditional :: Event s -> Distribution prob s -> Distribution prob s
conditional :: forall s prob.
Event s -> Distribution prob s -> Distribution prob s
conditional Event s
event Distribution prob s
dist = Distribution prob s
cdist
  where
    cdist :: Distribution prob s
cdist = do
      s
x <- Distribution prob s
dist
      if Event s
event s
x
        then forall (m :: * -> *) a. Monad m => a -> m a
return s
x
        else Distribution prob s
cdist

-- | Produces the conditional probability distribution, assuming some event.
--
-- This only works for finite distributions.  Infinite distributions (including
-- even distributions with finitely many outcomes, but infinitely many paths to
-- reach those outcomes) will hang.
finiteConditional ::
  Fractional prob => Event s -> Distribution prob s -> Distribution prob s
finiteConditional :: forall prob s.
Fractional prob =>
Event s -> Distribution prob s -> Distribution prob s
finiteConditional Event s
event Distribution prob s
dist = forall prob a.
Fractional prob =>
[(prob, a)] -> Distribution prob a
categorical (forall a b. (a -> b) -> [a] -> [b]
map (forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first (forall a. Fractional a => a -> a -> a
/ prob
p_event)) [(prob, s)]
filtered)
  where
    filtered :: [(prob, s)]
filtered = forall a. (a -> Bool) -> [a] -> [a]
filter (Event s
event forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> b
snd) (forall prob a. Num prob => Distribution prob a -> [(prob, a)]
possibilities Distribution prob s
dist)
    p_event :: prob
p_event = forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> a
fst [(prob, s)]
filtered)

-- | Updates a prior distribution of parameters for a model, based on an
-- observed event.  This implements Bayes' Law for distributions.
--
-- This function works for all distributions, but always produces an infinite
-- distribution for non-trivial events.
bayesian ::
  (param -> Distribution prob s) ->
  Event s ->
  Distribution prob param ->
  Distribution prob param
bayesian :: forall param prob s.
(param -> Distribution prob s)
-> Event s -> Distribution prob param -> Distribution prob param
bayesian param -> Distribution prob s
model Event s
event Distribution prob param
prior = Distribution prob param
posterior
  where
    posterior :: Distribution prob param
posterior = do
      param
param <- Distribution prob param
prior
      s
x <- param -> Distribution prob s
model param
param
      if Event s
event s
x then forall (m :: * -> *) a. Monad m => a -> m a
return param
param else Distribution prob param
posterior

-- | Updates a prior distribution of parameters for a model, based on an
-- observed event.  This implements Bayes' Law for distributions.
--
-- This only works for finite distributions.  Infinite distributions (including
-- even distributions with finitely many outcomes, but infinitely many paths to
-- reach those outcomes) will hang.
finiteBayesian ::
  Fractional prob =>
  (param -> Distribution prob s) ->
  Event s ->
  Distribution prob param ->
  Distribution prob param
finiteBayesian :: forall prob param s.
Fractional prob =>
(param -> Distribution prob s)
-> Event s -> Distribution prob param -> Distribution prob param
finiteBayesian param -> Distribution prob s
model Event s
event Distribution prob param
prior = case forall prob s.
Fractional prob =>
Event s -> Distribution prob s -> EventView prob s
viewEvent (Event s
event forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> b
snd) Distribution prob (param, s)
withParam of
  Always Distribution prob (param, s)
dist -> forall a b. (a, b) -> a
fst forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Distribution prob (param, s)
dist
  Never Distribution prob (param, s)
_ -> forall a. HasCallStack => [Char] -> a
error [Char]
"Posterior is undefined for an impossible event"
  Sometimes prob
_ Distribution prob (param, s)
dist Distribution prob (param, s)
_ -> forall a b. (a, b) -> a
fst forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Distribution prob (param, s)
dist
  where
    withParam :: Distribution prob (param, s)
withParam = do
      param
param <- Distribution prob param
prior
      s
obs <- param -> Distribution prob s
model param
param
      forall (m :: * -> *) a. Monad m => a -> m a
return (param
param, s
obs)

-- | A distribution with a fixed probability for each outcome.  The
-- probabilities should add to 1, but this is not checked.
categorical :: Fractional prob => [(prob, a)] -> Distribution prob a
categorical :: forall prob a.
Fractional prob =>
[(prob, a)] -> Distribution prob a
categorical = forall {t} {a}. Fractional t => t -> [(t, a)] -> Distribution t a
go prob
1
  where
    go :: t -> [(t, a)] -> Distribution t a
go t
_ [] = forall a. HasCallStack => [Char] -> a
error [Char]
"Empty distribution is not allowed"
    go t
_ [(t
_, a
x)] = forall prob a. a -> Distribution prob a
Certainly a
x
    go t
p ((t
q, a
x) : [(t, a)]
xs) = forall prob a.
prob
-> Distribution prob a
-> Distribution prob a
-> Distribution prob a
Choice (t
q forall a. Fractional a => a -> a -> a
/ t
p) (forall prob a. a -> Distribution prob a
Certainly a
x) (t -> [(t, a)] -> Distribution t a
go (t
p forall a. Num a => a -> a -> a
- t
q) [(t, a)]
xs)

-- | A uniform distribution over a list of values.
uniform :: Fractional prob => [a] -> Distribution prob a
uniform :: forall prob a. Fractional prob => [a] -> Distribution prob a
uniform [a]
xs = forall prob a.
Fractional prob =>
[(prob, a)] -> Distribution prob a
categorical forall a b. (a -> b) -> a -> b
$ (forall a. Fractional a => a -> a
recip prob
n,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [a]
xs where n :: prob
n = forall a b. (Real a, Fractional b) => a -> b
realToFrac (forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
xs)

-- | Geometric distribution over a list of possibilities.
geometric :: prob -> [a] -> Distribution prob a
geometric :: forall prob a. prob -> [a] -> Distribution prob a
geometric prob
_ [] = forall a. HasCallStack => [Char] -> a
error [Char]
"geometric: Empty distribution is not allowed"
geometric prob
_ [a
x] = forall prob a. a -> Distribution prob a
Certainly a
x
geometric prob
p (a
x : [a]
xs) = forall prob a.
prob
-> Distribution prob a
-> Distribution prob a
-> Distribution prob a
Choice prob
p (forall prob a. a -> Distribution prob a
Certainly a
x) (forall prob a. prob -> [a] -> Distribution prob a
geometric prob
p [a]
xs)

-- | A Bernoulli distribution.  This gives True with probability @p@, and False
-- otherwise.
bernoulli :: prob -> Distribution prob Bool
bernoulli :: forall prob. prob -> Distribution prob Bool
bernoulli prob
p = forall prob a.
prob
-> Distribution prob a
-> Distribution prob a
-> Distribution prob a
Choice prob
p (forall prob a. a -> Distribution prob a
Certainly Bool
True) (forall prob a. a -> Distribution prob a
Certainly Bool
False)

-- | Computes nCk.  This is a building block for several well-known discrete
-- distributions.
choose :: Integral t => t -> t -> t
t
n choose :: forall t. Integral t => t -> t -> t
`choose` t
k
  | t
k forall a. Ord a => a -> a -> Bool
> t
n forall t. Integral t => t -> t -> t
`div` t
2 = t
n forall t. Integral t => t -> t -> t
`choose` (t
n forall a. Num a => a -> a -> a
- t
k)
  | Bool
otherwise = forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
product [t
n forall a. Num a => a -> a -> a
- t
k forall a. Num a => a -> a -> a
+ t
1 .. t
n] forall t. Integral t => t -> t -> t
`div` forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
product [t
1 .. t
k]

-- | A binomial distribution.  This gives the distribution of number of
-- successes in @n@ trials with probability @p@ of success.
binomial :: (Fractional prob, Integral n) => prob -> n -> Distribution prob n
binomial :: forall prob n.
(Fractional prob, Integral n) =>
prob -> n -> Distribution prob n
binomial prob
p n
n =
  forall prob a.
Fractional prob =>
[(prob, a)] -> Distribution prob a
categorical
    [ (forall a b. (Integral a, Num b) => a -> b
fromIntegral (n
n forall t. Integral t => t -> t -> t
`choose` n
k) forall a. Num a => a -> a -> a
* prob
p forall a b. (Num a, Integral b) => a -> b -> a
^ n
k forall a. Num a => a -> a -> a
* (prob
1 forall a. Num a => a -> a -> a
- prob
p) forall a b. (Num a, Integral b) => a -> b -> a
^ (n
n forall a. Num a => a -> a -> a
- n
k), n
k)
      | n
k <- [n
0 .. n
n]
    ]

-- | Negative binomial distribution.  This gives the distribution of number of
-- failures before @r@ successes with probability @p@ of success.
negativeBinomial ::
  (Fractional prob, Integral n) => prob -> n -> Distribution prob n
negativeBinomial :: forall prob n.
(Fractional prob, Integral n) =>
prob -> n -> Distribution prob n
negativeBinomial prob
_ n
0 = forall (f :: * -> *) a. Applicative f => a -> f a
pure n
0
negativeBinomial prob
p n
r =
  forall prob a.
Fractional prob =>
[(prob, a)] -> Distribution prob a
categorical
    [ (forall a b. (Integral a, Num b) => a -> b
fromIntegral ((n
k forall a. Num a => a -> a -> a
+ n
r forall a. Num a => a -> a -> a
- n
1) forall t. Integral t => t -> t -> t
`choose` (n
r forall a. Num a => a -> a -> a
- n
1)) forall a. Num a => a -> a -> a
* prob
p forall a b. (Num a, Integral b) => a -> b -> a
^ n
r forall a. Num a => a -> a -> a
* (prob
1 forall a. Num a => a -> a -> a
- prob
p) forall a b. (Num a, Integral b) => a -> b -> a
^ n
k, n
k)
      | n
k <- [n
0 ..]
    ]

-- | Hypergeometric distribution.  This gives the distribution of number of
-- successful draws out of @n@ attempts without replacement, when @k@
-- possibilities are successful.
hypergeometric ::
  (Fractional prob, Integral n) => n -> n -> n -> Distribution prob n
hypergeometric :: forall prob n.
(Fractional prob, Integral n) =>
n -> n -> n -> Distribution prob n
hypergeometric n
pop n
k n
n =
  forall prob a.
Fractional prob =>
[(prob, a)] -> Distribution prob a
categorical
    [ ( forall a b. (Integral a, Num b) => a -> b
fromIntegral ((n
k forall t. Integral t => t -> t -> t
`choose` n
m) forall a. Num a => a -> a -> a
* ((n
pop forall a. Num a => a -> a -> a
- n
k) forall t. Integral t => t -> t -> t
`choose` (n
n forall a. Num a => a -> a -> a
- n
m)))
          forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral (n
pop forall t. Integral t => t -> t -> t
`choose` n
n),
        n
m
      )
      | n
m <- [n
lo .. n
hi]
    ]
  where
    lo :: n
lo = forall a. Ord a => a -> a -> a
max n
0 (n
n forall a. Num a => a -> a -> a
+ n
k forall a. Num a => a -> a -> a
- n
pop)
    hi :: n
hi = forall a. Ord a => a -> a -> a
min n
n n
k

-- | Poisson distribution.  Gives the number of independent events occurring in
-- a fixed time interval, if events are occurring at the given expected rate per
-- time interval.
poisson :: (Floating prob, Integral n) => prob -> Distribution prob n
poisson :: forall prob n.
(Floating prob, Integral n) =>
prob -> Distribution prob n
poisson prob
lambda =
  forall prob a.
Fractional prob =>
[(prob, a)] -> Distribution prob a
categorical
    [ (prob
lambda forall a b. (Num a, Integral b) => a -> b -> a
^ n
k forall a. Num a => a -> a -> a
* forall a. Floating a => a -> a
exp (-prob
lambda) forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
product [n
1 .. n
k]), n
k)
      | n
k <- [n
0 ..]
    ]

-- | Computes the probability of an event, represented by a predicate on values.
--
-- This only works for finite distributions.  Infinite distributions (including
-- even distributions with finitely many outcomes, but infinitely many paths to
-- reach those outcomes) will hang.
probability :: Num prob => Event s -> Distribution prob s -> prob
probability :: forall prob s. Num prob => Event s -> Distribution prob s -> prob
probability Event s
event (Certainly s
x) = if Event s
event s
x then prob
1 else prob
0
probability Event s
event (Choice prob
p Distribution prob s
a Distribution prob s
b) =
  prob
p forall a. Num a => a -> a -> a
* forall prob s. Num prob => Event s -> Distribution prob s -> prob
probability Event s
event Distribution prob s
a forall a. Num a => a -> a -> a
+ (prob
1 forall a. Num a => a -> a -> a
- prob
p) forall a. Num a => a -> a -> a
* forall prob s. Num prob => Event s -> Distribution prob s -> prob
probability Event s
event Distribution prob s
b

-- | Like probability, but produces a lazy list of ever-improving bounds on the
-- probability.  This can be used on infinite distributions, for which the
-- exact probability cannot be calculated.
probabilityBounds ::
  Num prob => Event s -> Distribution prob s -> [(prob, prob)]
probabilityBounds :: forall prob s.
Num prob =>
Event s -> Distribution prob s -> [(prob, prob)]
probabilityBounds Event s
event Distribution prob s
dist = forall {t}. Num t => t -> t -> [(t, s)] -> [(t, t)]
go prob
0 prob
1 (forall prob a. Num prob => Distribution prob a -> [(prob, a)]
possibilities Distribution prob s
dist)
  where
    go :: t -> t -> [(t, s)] -> [(t, t)]
go t
p t
_ [] = [(t
p, t
p)]
    go t
p t
q ((t
q', s
x) : [(t, s)]
xs)
      | Event s
event s
x = (t
p, t
p forall a. Num a => a -> a -> a
+ t
q) forall a. a -> [a] -> [a]
: t -> t -> [(t, s)] -> [(t, t)]
go (t
p forall a. Num a => a -> a -> a
+ t
q') (t
q forall a. Num a => a -> a -> a
- t
q') [(t, s)]
xs
      | Bool
otherwise = (t
p, t
p forall a. Num a => a -> a -> a
+ t
q) forall a. a -> [a] -> [a]
: t -> t -> [(t, s)] -> [(t, t)]
go t
p (t
q forall a. Num a => a -> a -> a
- t
q') [(t, s)]
xs

-- | Like probability, but produces a value that differs from the true
-- probability by at most epsilon. This can be used on infinite distributions,
-- for which the exact probability cannot be calculated.
approxProbability ::
  (Ord prob, Fractional prob) =>
  prob ->
  Event s ->
  Distribution prob s ->
  prob
approxProbability :: forall prob s.
(Ord prob, Fractional prob) =>
prob -> Event s -> Distribution prob s -> prob
approxProbability prob
epsilon Event s
event Distribution prob s
dist =
  (forall a. Fractional a => a -> a -> a
/ prob
2) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry forall a. Num a => a -> a -> a
(+) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> a
head forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [a]
dropWhile ((forall a. Ord a => a -> a -> Bool
> prob
epsilon) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Num a => a -> a
abs forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry (-)) forall a b. (a -> b) -> a -> b
$
    forall prob s.
Num prob =>
Event s -> Distribution prob s -> [(prob, prob)]
probabilityBounds Event s
event Distribution prob s
dist

-- | Computes the expected value of a finite distribution.
--
-- This only works for finite distributions.  Infinite distributions (including
-- even distributions with finitely many outcomes, but infinitely many paths to
-- reach those outcomes) will hang.
expectation :: Num a => Distribution a a -> a
expectation :: forall a. Num a => Distribution a a -> a
expectation (Certainly a
x) = a
x
expectation (Choice a
p Distribution a a
a Distribution a a
b) =
  a
p forall a. Num a => a -> a -> a
* forall a. Num a => Distribution a a -> a
expectation Distribution a a
a forall a. Num a => a -> a -> a
+ (a
1 forall a. Num a => a -> a -> a
- a
p) forall a. Num a => a -> a -> a
* forall a. Num a => Distribution a a -> a
expectation Distribution a a
b

-- | Computes the variance of a finite distribution.
--
-- This only works for finite distributions.  Infinite distributions (including
-- even distributions with finitely many outcomes, but infinitely many paths to
-- reach those outcomes) will hang.
variance :: Num a => Distribution a a -> a
variance :: forall a. Num a => Distribution a a -> a
variance Distribution a a
dist = forall a. Num a => Distribution a a -> a
expectation ((forall a b. (Num a, Integral b) => a -> b -> a
^ (Int
2 :: Int)) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Num a => a -> a -> a
subtract a
mean forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Distribution a a
dist)
  where
    mean :: a
mean = forall a. Num a => Distribution a a -> a
expectation Distribution a a
dist

-- | Computes the standard deviation of a finite distribution.
--
-- This only works for finite distributions.  Infinite distributions (including
-- even distributions with finitely many outcomes, but infinitely many paths to
-- reach those outcomes) will hang.
stddev :: Floating a => Distribution a a -> a
stddev :: forall a. Floating a => Distribution a a -> a
stddev Distribution a a
dist = forall a. Floating a => a -> a
sqrt (forall a. Num a => Distribution a a -> a
variance Distribution a a
dist)

-- | Computes the entropy of a distribution in bits.
--
-- This only works for finite distributions.  Infinite distributions (including
-- even distributions with finitely many outcomes, but infinitely many paths to
-- reach those outcomes) will hang.
entropy :: (Floating prob, Ord a) => Distribution prob a -> prob
entropy :: forall prob a.
(Floating prob, Ord a) =>
Distribution prob a -> prob
entropy Distribution prob a
dist =
  forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum
    [ -prob
p forall a. Num a => a -> a -> a
* forall a. Floating a => a -> a -> a
logBase prob
2 prob
p
      | prob
p <- forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> a
fst (forall prob a. Num prob => Distribution prob a -> [(prob, a)]
possibilities (forall prob a.
(Fractional prob, Ord a) =>
Distribution prob a -> Distribution prob a
simplify Distribution prob a
dist))
    ]

-- | Computes the relative entropy, also known as Kullback-Leibler divergence,
-- between two distributions in bits.
--
-- This only works for finite distributions.  Infinite distributions (including
-- even distributions with finitely many outcomes, but infinitely many paths to
-- reach those outcomes) will hang.
relativeEntropy ::
  (Eq prob, Floating prob, Ord a) =>
  Distribution prob a ->
  Distribution prob a ->
  prob
relativeEntropy :: forall prob a.
(Eq prob, Floating prob, Ord a) =>
Distribution prob a -> Distribution prob a -> prob
relativeEntropy Distribution prob a
post Distribution prob a
prior = forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (a -> prob
term forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a. Set a -> [a]
Set.toList Set a
vals)
  where
    prob_post :: Map a prob
prob_post = forall prob a.
(Num prob, Ord a) =>
Distribution prob a -> Map a prob
probabilities Distribution prob a
post
    prob_prior :: Map a prob
prob_prior = forall prob a.
(Num prob, Ord a) =>
Distribution prob a -> Map a prob
probabilities Distribution prob a
prior
    vals :: Set a
vals = forall k a. Map k a -> Set k
Map.keysSet Map a prob
prob_post forall a. Ord a => Set a -> Set a -> Set a
`Set.union` forall k a. Map k a -> Set k
Map.keysSet Map a prob
prob_prior
    term :: a -> prob
term a
x =
      let p :: prob
p = forall k a. Ord k => a -> k -> Map k a -> a
Map.findWithDefault prob
0 a
x Map a prob
prob_post
          q :: prob
q = forall k a. Ord k => a -> k -> Map k a -> a
Map.findWithDefault prob
0 a
x Map a prob
prob_prior
       in if prob
p forall a. Eq a => a -> a -> Bool
== prob
0 then prob
0 else prob
p forall a. Num a => a -> a -> a
* forall a. Floating a => a -> a -> a
logBase prob
2 (prob
p forall a. Fractional a => a -> a -> a
/ prob
q)

-- | Computes the mutual information between two random variables, in bits.  The
-- given distribution is taken as a definition of a probability space, and the
-- random variables are represented as functions from the sample space to values
-- taken by the random variable.
--
-- This only works for finite distributions.  Infinite distributions (including
-- even distributions with finitely many outcomes, but infinitely many paths to
-- reach those outcomes) will hang.
mutualInformation ::
  (Eq prob, Floating prob, Ord a, Ord b) =>
  RandVar s a ->
  RandVar s b ->
  Distribution prob s ->
  prob
mutualInformation :: forall prob a b s.
(Eq prob, Floating prob, Ord a, Ord b) =>
RandVar s a -> RandVar s b -> Distribution prob s -> prob
mutualInformation RandVar s a
f RandVar s b
g Distribution prob s
dist =
  forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (a -> b -> prob
term forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall k a. Map k a -> [k]
Map.keys Map a prob
f_probs forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall k a. Map k a -> [k]
Map.keys Map b prob
g_probs)
  where
    joint_probs :: Map (a, b) prob
joint_probs = forall prob a.
(Num prob, Ord a) =>
Distribution prob a -> Map a prob
probabilities ((\s
x -> (RandVar s a
f s
x, RandVar s b
g s
x)) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Distribution prob s
dist)
    f_probs :: Map a prob
f_probs = forall prob a.
(Num prob, Ord a) =>
Distribution prob a -> Map a prob
probabilities (RandVar s a
f forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Distribution prob s
dist)
    g_probs :: Map b prob
g_probs = forall prob a.
(Num prob, Ord a) =>
Distribution prob a -> Map a prob
probabilities (RandVar s b
g forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Distribution prob s
dist)
    term :: a -> b -> prob
term a
x b
y =
      let p_x :: prob
p_x = forall k a. Ord k => a -> k -> Map k a -> a
Map.findWithDefault prob
0 a
x Map a prob
f_probs
          p_y :: prob
p_y = forall k a. Ord k => a -> k -> Map k a -> a
Map.findWithDefault prob
0 b
y Map b prob
g_probs
          p_xy :: prob
p_xy = forall k a. Ord k => a -> k -> Map k a -> a
Map.findWithDefault prob
0 (a
x, b
y) Map (a, b) prob
joint_probs
       in if prob
p_xy forall a. Eq a => a -> a -> Bool
== prob
0 then prob
0 else prob
p_xy forall a. Num a => a -> a -> a
* forall a. Floating a => a -> a -> a
logBase prob
2 (prob
p_xy forall a. Fractional a => a -> a -> a
/ (prob
p_x forall a. Num a => a -> a -> a
* prob
p_y))