----------------------------------------------------------------------------- -- | -- Module : Control.Monad.MC.Summary -- Copyright : Copyright (c) , Patrick Perry -- License : BSD3 -- Maintainer : Patrick Perry -- Stability : experimental -- module Control.Monad.MC.Summary ( -- * Summary statistics -- ** The @Summary@ data type Summary, summary, update, -- ** @Summary@ properties sampleSize, sampleMean, sampleVar, sampleSD, sampleSE, sampleCI, sampleMin, sampleMax, ) where import GSL.Random.Dist( ugaussianPInv ) -- | A type for storing summary statistics for a data set including -- sample size, min and max values, and first and second moments. data Summary = S {-# UNPACK #-} !Int -- sample size {-# UNPACK #-} !Double -- sample mean {-# UNPACK #-} !Double -- sum of squares {-# UNPACK #-} !Double -- sample min {-# UNPACK #-} !Double -- sample max -- | Get an empty summary. summary :: Summary summary = S 0 0 0 (1/0) (-1/0) -- | Update the summary with a data point. -- Running mean and variance computed as in Knuth, Vol 2, page 232, -- 3rd edition, see http://www.johndcook.com/standard_deviation.html for -- a description. update :: Summary -> Double -> Summary update (S n m s l h) x = let n' = n+1 delta = x - m m' = m + delta / fromIntegral n' s' = s + delta*(x - m') l' = if x < l then x else l h' = if x > h then x else h in S n' m' s' l' h' -- | Get the sample size. sampleSize :: Summary -> Int sampleSize (S n _ _ _ _) = n -- | Get the sample mean. sampleMean :: Summary -> Double sampleMean (S _ m _ _ _) = m -- | Get the sample variance. sampleVar :: Summary -> Double sampleVar (S n _ s _ _) = s / fromIntegral (n - 1) -- | Get the sample standard deviation. sampleSD :: Summary -> Double sampleSD s = sqrt (sampleVar s) -- | Get the sample standard error. sampleSE :: Summary -> Double sampleSE s = sqrt (sampleVar s / fromIntegral (sampleSize s)) -- | Get a Central Limit Theorem-based confidence interval for the mean -- with the specified coverage level. The level must be in the range @(0,1)@. sampleCI :: Double -> Summary -> (Double,Double) sampleCI level s | not (level > 0 && level < 1) = error "level must be between 0 and 1" | otherwise = let alpha = (0.5 - level) + 0.5 z = -(ugaussianPInv (0.5*alpha)) se = sampleSE s delta = z*se xbar = sampleMean s in (xbar-delta, xbar+delta) -- | Get the minimum of the sample. sampleMin :: Summary -> Double sampleMin (S _ _ _ l _) = l -- | Get the maximum of the sample. sampleMax :: Summary -> Double sampleMax (S _ _ _ _ h) = h