{-# LANGUAGE ScopedTypeVariables #-} module Main where import Control.Monad (replicateM, replicateM_) import Data.List (sort) import Probability.Distribution import Test.Hspec -- | A probability distribution with Rational probabilities. This is convenient -- in testing so that we can check for exact values. type ExactDist a = Distribution Rational a -- | A probability distribution with Double probabilities and values. This is -- inexact, but allows the maximum flexibility. type DoubleDist = Distribution Double Double epsilon :: Fractional a => a epsilon = 1e-7 approxEq :: (Ord a, Fractional a) => a -> a -> Bool approxEq x y = abs (x - y) < 1e-5 shouldApprox :: (Show a, Ord a, Fractional a) => a -> a -> Expectation x `shouldApprox` y = x `shouldSatisfy` approxEq y main :: IO () main = hspec $ do describe "standard distributions" $ do describe "categorical" $ do it "assigns the right probabilities" $ do let dist = categorical [(1 / 2, 1), (1 / 3, 2), (1 / 6, 3)] :: ExactDist Int probability (== 1) dist `shouldBe` 1 / 2 probability (== 2) dist `shouldBe` 1 / 3 probability (== 3) dist `shouldBe` 1 / 6 describe "uniform" $ do it "assigns the right probabilities" $ do let dist = uniform [1, 2, 3] :: ExactDist Int probability (== 1) dist `shouldBe` 1 / 3 probability (== 2) dist `shouldBe` 1 / 3 probability (== 3) dist `shouldBe` 1 / 3 describe "geometric" $ do it "assigns the right probabilities" $ do let dist = geometric (1 / 3) [1 .. 10] :: ExactDist Int probability (== 1) dist `shouldBe` 1 / 3 probability (== 2) dist `shouldBe` 2 / 9 probability (== 3) dist `shouldBe` 4 / 27 describe "bernoulli" $ do it "assigns the right probabilities" $ do let dist = bernoulli (1 / 3) :: ExactDist Bool probability (== True) dist `shouldBe` 1 / 3 probability (== False) dist `shouldBe` 2 / 3 describe "binomial" $ do it "assigns the right probabilities" $ do let dist = binomial (1 / 3) 3 :: ExactDist Int probability (== 0) dist `shouldBe` 08 / 27 probability (== 1) dist `shouldBe` 12 / 27 probability (== 2) dist `shouldBe` 06 / 27 probability (== 3) dist `shouldBe` 01 / 27 describe "negativeBinomial" $ do it "assigns the right probabilities" $ do let dist = negativeBinomial (1 / 3) 3 :: ExactDist Int approxProbability epsilon (== 0) dist `shouldApprox` (1 / 27) approxProbability epsilon (== 1) dist `shouldApprox` (2 / 27) approxProbability epsilon (== 2) dist `shouldApprox` (8 / 81) approxProbability epsilon (== 3) dist `shouldApprox` (80 / 729) describe "hypergeometric" $ do it "assigns the right probabilities" $ do let dist = hypergeometric 10 5 3 :: ExactDist Int probability (== 0) dist `shouldBe` 6 / 72 probability (== 1) dist `shouldBe` 30 / 72 probability (== 2) dist `shouldBe` 30 / 72 probability (== 3) dist `shouldBe` 6 / 72 describe "poisson" $ do it "assigns the right probabilities" $ do let dist = poisson 3 :: Distribution Double Int approxProbability epsilon (== 0) dist `shouldApprox` (1 / exp 3) approxProbability epsilon (== 1) dist `shouldApprox` (3 / exp 3) approxProbability epsilon (== 2) dist `shouldApprox` (4.5 / exp 3) approxProbability epsilon (== 3) dist `shouldApprox` (4.5 / exp 3) describe "conditionals and splits" $ do it "splits a distribution" $ do let dist = uniform [1 .. 10] :: ExactDist Int case viewEvent (> 7) dist of Sometimes p a b -> do p `shouldBe` 3 / 10 probability even a `shouldBe` 2 / 3 probability even b `shouldBe` 3 / 7 _ -> error "viewEvent gave the wrong answer" describe "Dist" $ do it "can describe a stochastic process" $ do let dist :: ExactDist [Int] = do -- Roll d4, then roll that number of d6 and add the d6's together. n <- uniform [1 .. 4] replicateM n (uniform [1 .. 6]) replicateM_ 1000 $ do xs <- sample dist length xs `shouldSatisfy` (>= 1) length xs `shouldSatisfy` (<= 4) xs `shouldSatisfy` all (>= 1) xs `shouldSatisfy` all (<= 6) it "can describe an unbounded stochastic process" $ do -- Reroll all 1s. This is a left-recursive infinite process, and naive -- implementations tend to hang when trying to analyze it. let dist :: ExactDist Int = do n <- uniform [1 .. 6] if n == 1 then dist else return n approxProbability epsilon (== 1) dist `shouldApprox` 0 approxProbability epsilon (== 2) dist `shouldApprox` 0.2 approxProbability epsilon (== 3) dist `shouldApprox` 0.2 approxProbability epsilon (== 4) dist `shouldApprox` 0.2 approxProbability epsilon (== 5) dist `shouldApprox` 0.2 approxProbability epsilon (== 6) dist `shouldApprox` 0.2 describe "stats" $ do it "computes the expected value of a distribution" $ do expectation (uniform [1 .. 5] :: ExactDist Rational) `shouldBe` 3 let dist = finitize epsilon (geometric 0.5 [1 ..]) :: DoubleDist expectation dist `shouldApprox` 2 it "computes the variance of a distribution" $ do let dist = finitize epsilon (fromInteger <$> poisson 4) :: DoubleDist expectation dist `shouldApprox` 4 variance dist `shouldApprox` 4 stddev dist `shouldApprox` 2 describe "info theory" $ do it "example" $ do -- Our fundamental unit of information theory will be the bit, and the -- entropy* of a probability distribution tells us how many bits of -- information you would learn by knowing a value chosen from that -- distribution. -- -- A choice between two options can be represented with one bit. entropy (uniform [0, 1] :: Distribution Double Int) `shouldApprox` 1 -- On the other hand, choosing between 32 options requires 5 bits. entropy (uniform [1 .. 32] :: Distribution Double Int) `shouldApprox` 5 -- The picture gets messier if the distribution is not uniform. If -- you flip a biased coin that almost always comes up heads, then you -- nearly know the outcome before the coin flip even happens. The entropy -- of this distribution is low. You can also think of entropy as a -- measure of uncertainty. entropy (bernoulli 0.999 :: Distribution Double Bool) `shouldApprox` 0.011407757737461145 -- We can explore more about information theory with a more advanced -- example. We'll use the stochastic process from earlier, where we roll -- a d4, then roll that number of d6. let dist :: Distribution Double [Int] = do n <- uniform [1 .. 4] replicateM n (uniform [1 .. 6]) -- There are about 8.5 bits of entropy contained in the distribution -- of results for this process. That's the information that you would -- learn by knowing the exact outcome. entropy dist `shouldApprox` 8.462406251802479 -- The values of this distribution tell us a lot about the result: how -- many d6 dice were rolled, and what the resulting numbers were, in what -- order. We can use the Functor instance to consider only a subset of -- the information -- -- For example, suppose we don't care about the order of the dice. We can -- remove that information from the distribution by sorting the dice, so -- that rolls with different orders are indistinguishable. entropy (sort <$> dist) `shouldApprox` 6.8224643388490005 -- Often we don't even care about the individual dice at all, and just -- want to add them up and get a total. This eliminates even more -- information. -- -- In this distribution, the possible values range from 1 to 24. You -- might expect the entropy to be log_2 24. It's lower than that, though, -- because the outcomes don't have equal probability. entropy (sum <$> dist) `shouldApprox` 4.192258630390927 -- We can also look at only how many of the d6 were rolled. This entropy -- comes out to a remarkably round number! The reason is simple: we're -- effectively looking at is the result of the first d4, which is -- unbiased. There are 4 possibilities, so that's 2 bits of info. entropy (length <$> dist) `shouldApprox` 2 -- There's a complex relationship between the number of d6 dice rolled and -- their sum. If we learn the length, then, we learn a bit about the sum. -- Suppose we learn that the length is 1. Perhaps we can hear the dice -- being rolled, but just cannot see the results on each die. We might -- ask what the probability distribution is *now*. This is a conditional -- distribution. let dist_1 = finiteConditional ((== 1) . length) dist -- One might as how much information we learned by learning the length, -- relative to the original distribution. This calculation is known as -- relative entropy (also known as KL divergence). -- -- In this case, we learned 2 bits of information (the result of the d4). relativeEntropy dist_1 dist `shouldApprox` 2 -- But not all of that information is relevant to the sum! We can also -- ask how much information we learned about the sum in particular. -- -- We can determine that learning that only one die is rolled would give -- us about 1.44 bits of information about the sum. relativeEntropy (sum <$> dist_1) (sum <$> dist) `shouldApprox` 1.4445630236465163 -- How much information are we still missing about the sum? Well, if we -- know there was only one die rolled, then the information we're missing -- is what number came up on that die, and 100% of that info is relevant -- to the sum. That's log_2 6 bits of information, or about 2.58. entropy (sum <$> dist_1) `shouldApprox` logBase 2 6 entropy (sum <$> dist_1) `shouldApprox` 2.5849625007211565 -- On the other hand, suppose we learn that there were 2, 3, or 4 d6 dice -- rolled. Then these results come out different, because knowing that -- more dice were rolled leaves a lot more room for uncertainty about the -- sum! So knowing the length tells you less about the sum, and there is -- more entropy remaining in the conditional distribution of sums. -- -- Interestingly, it is actually the two and three-roll cases that tell -- you the least about the sum. They are near the middle, so their sums -- overlap the most with possible sums from other lengths, and knowing the -- number of rolls does less to narrow down the possibilities. On the -- other hand, the remaining entropy is still higher with the four-roll -- case: although knowing that there are four rolls gives you more -- information, there's even more information to know! let dist_2 = finiteConditional ((== 2) . length) dist relativeEntropy dist_2 dist `shouldApprox` 2 relativeEntropy (sum <$> dist_2) (sum <$> dist) `shouldApprox` 0.594873350537772 entropy (sum <$> dist_2) `shouldApprox` 3.274401919288769 let dist_3 = finiteConditional ((== 3) . length) dist relativeEntropy dist_3 dist `shouldApprox` 2 relativeEntropy (sum <$> dist_3) (sum <$> dist) `shouldApprox` 0.48020133600720066 entropy (sum <$> dist_3) `shouldApprox` 3.599291802435438 let dist_4 = finiteConditional ((== 4) . length) dist relativeEntropy dist_4 dist `shouldApprox` 2 relativeEntropy (sum <$> dist_4) (sum <$> dist) `shouldApprox` 0.9766227667617642 entropy (sum <$> dist_4) `shouldApprox` 3.814117822165162 -- A reasonable question to ask is this: without knowing the specific -- length, how many bits of extra information might we *expect* to learn -- about the sum from knowing the length. This is known as mutual -- information: the number of bits of *shared* information between the -- length and the sum. -- -- Imagine a Venn-style diagram with two overlapping circles. The entire -- diagram represents the information you'd gain from knowing an exact -- outcome. One circle represents the information you'd gain from knowing -- the length, and the other represents the information you'd gain from -- knowing the sum. The two circles overlap, and the overlap represents -- the mutual information between the two. -- -- So it answers the question we asked: "If I know the length, how many -- bits of info does that tell me about the sum?" It also answers the -- converse question: "If I know the sum, how many bits of info does that -- tell me about the length?" mutualInformation length sum dist `shouldApprox` 0.8740651192382936 mutualInformation sum length dist `shouldApprox` 0.8740651192382936 -- An observant reader might notice that the mutual information given -- above is the average of the relative entropies for the four possible -- lengths. This is because all four lengths are equally likely. In -- general, the mutual information is the *expected* relative entropy -- of the conditional distribution versus the original. That is: let expectedRelativeEntropy = expectation $ do x <- length <$> dist let cond = finiteConditional ((== x) . length) dist return $ relativeEntropy (sum <$> cond) (sum <$> dist) mutualInformation length sum dist `shouldApprox` expectedRelativeEntropy describe "bayesian" $ do -- We have a coin that might be fair (equal chance of heads or tails), or -- might be biased (0.75 chance of heads, 0.25 chance of tails). As a -- prior, each is equally likely. let model = bernoulli :: Rational -> ExactDist Bool prior = uniform [0.75, 0.5] it "works in general" $ do -- We will flip the coin once, and update our estimate of the chance the -- coin is fair. let ifHeads = bayesian model (== True) prior ifTails = bayesian model (== False) prior -- If the coin comes up heads, we estimate it is the biased one with a -- 3/5 probability. -- -- By Bayes' Law, P(B|H) = P(H|B) * P(B) / P(H) -- = (3/4) * (1/2) / (1/2 * 3/4 + 1/2 * 1/2) -- = 3/5 approxProbability epsilon (> 0.5) ifHeads `shouldApprox` (3 / 5) -- If the coin comes up tails, we estimate it is the biased one with a -- 1/3 probability. This is slow, so we choose a larger epsilon. approxProbability 0.01 (> 0.5) ifTails `shouldSatisfy` \x -> abs (x - 1 / 3) < 0.01 it "works exactly with finite version" $ do let ifHeads = finiteBayesian model (== True) prior ifTails = finiteBayesian model (== False) prior probability (> 0.5) ifHeads `shouldBe` (3 / 5) probability (> 0.5) ifTails `shouldBe` (1 / 3)