{-# LANGUAGE FlexibleContexts #-}
module Statistics.Sample
(
Sample
, WeightedSample
, range
, mean
, welfordMean
, meanWeighted
, harmonicMean
, geometricMean
, centralMoment
, centralMoments
, skewness
, kurtosis
, variance
, varianceUnbiased
, meanVariance
, meanVarianceUnb
, stdDev
, varianceWeighted
, stdErrMean
, fastVariance
, fastVarianceUnbiased
, fastStdDev
, covariance
, correlation
, pair
) where
import Statistics.Function (minMax)
import Statistics.Sample.Internal (robustSumVar, sum)
import Statistics.Types.Internal (Sample,WeightedSample)
import qualified Data.Vector as V
import qualified Data.Vector.Generic as G
import qualified Data.Vector.Unboxed as U
import Prelude hiding ((^), sum)
range :: (G.Vector v Double) => v Double -> Double
range s = hi - lo
where (lo , hi) = minMax s
{-# INLINE range #-}
mean :: (G.Vector v Double) => v Double -> Double
mean xs = sum xs / fromIntegral (G.length xs)
{-# SPECIALIZE mean :: U.Vector Double -> Double #-}
{-# SPECIALIZE mean :: V.Vector Double -> Double #-}
welfordMean :: (G.Vector v Double) => v Double -> Double
welfordMean = fini . G.foldl' go (T 0 0)
where
fini (T a _) = a
go (T m n) x = T m' n'
where m' = m + (x - m) / fromIntegral n'
n' = n + 1
{-# SPECIALIZE welfordMean :: U.Vector Double -> Double #-}
{-# SPECIALIZE welfordMean :: V.Vector Double -> Double #-}
meanWeighted :: (G.Vector v (Double,Double)) => v (Double,Double) -> Double
meanWeighted = fini . G.foldl' go (V 0 0)
where
fini (V a _) = a
go (V m w) (x,xw) = V m' w'
where m' | w' == 0 = 0
| otherwise = m + xw * (x - m) / w'
w' = w + xw
{-# INLINE meanWeighted #-}
harmonicMean :: (G.Vector v Double) => v Double -> Double
harmonicMean = fini . G.foldl' go (T 0 0)
where
fini (T b a) = fromIntegral a / b
go (T x y) n = T (x + (1/n)) (y+1)
{-# INLINE harmonicMean #-}
geometricMean :: (G.Vector v Double) => v Double -> Double
geometricMean = exp . mean . G.map log
{-# INLINE geometricMean #-}
centralMoment :: (G.Vector v Double) => Int -> v Double -> Double
centralMoment a xs
| a < 0 = error "Statistics.Sample.centralMoment: negative input"
| a == 0 = 1
| a == 1 = 0
| otherwise = sum (G.map go xs) / fromIntegral (G.length xs)
where
go x = (x-m) ^ a
m = mean xs
{-# SPECIALIZE centralMoment :: Int -> U.Vector Double -> Double #-}
{-# SPECIALIZE centralMoment :: Int -> V.Vector Double -> Double #-}
centralMoments :: (G.Vector v Double) => Int -> Int -> v Double -> (Double, Double)
centralMoments a b xs
| a < 2 || b < 2 = (centralMoment a xs , centralMoment b xs)
| otherwise = fini . G.foldl' go (V 0 0) $ xs
where go (V i j) x = V (i + d^a) (j + d^b)
where d = x - m
fini (V i j) = (i / n , j / n)
m = mean xs
n = fromIntegral (G.length xs)
{-# SPECIALIZE
centralMoments :: Int -> Int -> U.Vector Double -> (Double, Double) #-}
{-# SPECIALIZE
centralMoments :: Int -> Int -> V.Vector Double -> (Double, Double) #-}
skewness :: (G.Vector v Double) => v Double -> Double
skewness xs = c3 * c2 ** (-1.5)
where (c3 , c2) = centralMoments 3 2 xs
{-# SPECIALIZE skewness :: U.Vector Double -> Double #-}
{-# SPECIALIZE skewness :: V.Vector Double -> Double #-}
kurtosis :: (G.Vector v Double) => v Double -> Double
kurtosis xs = c4 / (c2 * c2) - 3
where (c4 , c2) = centralMoments 4 2 xs
{-# SPECIALIZE kurtosis :: U.Vector Double -> Double #-}
{-# SPECIALIZE kurtosis :: V.Vector Double -> Double #-}
data V = V {-# UNPACK #-} !Double {-# UNPACK #-} !Double
variance :: (G.Vector v Double) => v Double -> Double
variance samp
| n > 1 = robustSumVar (mean samp) samp / fromIntegral n
| otherwise = 0
where
n = G.length samp
{-# SPECIALIZE variance :: U.Vector Double -> Double #-}
{-# SPECIALIZE variance :: V.Vector Double -> Double #-}
varianceUnbiased :: (G.Vector v Double) => v Double -> Double
varianceUnbiased samp
| n > 1 = robustSumVar (mean samp) samp / fromIntegral (n-1)
| otherwise = 0
where
n = G.length samp
{-# SPECIALIZE varianceUnbiased :: U.Vector Double -> Double #-}
{-# SPECIALIZE varianceUnbiased :: V.Vector Double -> Double #-}
meanVariance :: (G.Vector v Double) => v Double -> (Double,Double)
meanVariance samp
| n > 1 = (m, robustSumVar m samp / fromIntegral n)
| otherwise = (m, 0)
where
n = G.length samp
m = mean samp
{-# SPECIALIZE meanVariance :: U.Vector Double -> (Double,Double) #-}
{-# SPECIALIZE meanVariance :: V.Vector Double -> (Double,Double) #-}
meanVarianceUnb :: (G.Vector v Double) => v Double -> (Double,Double)
meanVarianceUnb samp
| n > 1 = (m, robustSumVar m samp / fromIntegral (n-1))
| otherwise = (m, 0)
where
n = G.length samp
m = mean samp
{-# SPECIALIZE meanVarianceUnb :: U.Vector Double -> (Double,Double) #-}
{-# SPECIALIZE meanVarianceUnb :: V.Vector Double -> (Double,Double) #-}
stdDev :: (G.Vector v Double) => v Double -> Double
stdDev = sqrt . varianceUnbiased
{-# SPECIALIZE stdDev :: U.Vector Double -> Double #-}
{-# SPECIALIZE stdDev :: V.Vector Double -> Double #-}
stdErrMean :: (G.Vector v Double) => v Double -> Double
stdErrMean samp = stdDev samp / (sqrt . fromIntegral . G.length) samp
{-# SPECIALIZE stdErrMean :: U.Vector Double -> Double #-}
{-# SPECIALIZE stdErrMean :: V.Vector Double -> Double #-}
robustSumVarWeighted :: (G.Vector v (Double,Double)) => v (Double,Double) -> V
robustSumVarWeighted samp = G.foldl' go (V 0 0) samp
where
go (V s w) (x,xw) = V (s + xw*d*d) (w + xw)
where d = x - m
m = meanWeighted samp
{-# INLINE robustSumVarWeighted #-}
varianceWeighted :: (G.Vector v (Double,Double)) => v (Double,Double) -> Double
varianceWeighted samp
| G.length samp > 1 = fini $ robustSumVarWeighted samp
| otherwise = 0
where
fini (V s w) = s / w
{-# SPECIALIZE varianceWeighted :: U.Vector (Double,Double) -> Double #-}
{-# SPECIALIZE varianceWeighted :: V.Vector (Double,Double) -> Double #-}
fastVar :: (G.Vector v Double) => v Double -> T1
fastVar = G.foldl' go (T1 0 0 0)
where
go (T1 n m s) x = T1 n' m' s'
where n' = n + 1
m' = m + d / fromIntegral n'
s' = s + d * (x - m')
d = x - m
fastVariance :: (G.Vector v Double) => v Double -> Double
fastVariance = fini . fastVar
where fini (T1 n _m s)
| n > 1 = s / fromIntegral n
| otherwise = 0
{-# INLINE fastVariance #-}
fastVarianceUnbiased :: (G.Vector v Double) => v Double -> Double
fastVarianceUnbiased = fini . fastVar
where fini (T1 n _m s)
| n > 1 = s / fromIntegral (n - 1)
| otherwise = 0
{-# INLINE fastVarianceUnbiased #-}
fastStdDev :: (G.Vector v Double) => v Double -> Double
fastStdDev = sqrt . fastVariance
{-# INLINE fastStdDev #-}
covariance :: (G.Vector v (Double,Double), G.Vector v Double)
=> v (Double,Double)
-> Double
covariance xy
| n == 0 = 0
| otherwise = mean $ G.zipWith (*)
(G.map (\x -> x - muX) xs)
(G.map (\y -> y - muY) ys)
where
n = G.length xy
(xs,ys) = G.unzip xy
muX = mean xs
muY = mean ys
{-# SPECIALIZE covariance :: U.Vector (Double,Double) -> Double #-}
{-# SPECIALIZE covariance :: V.Vector (Double,Double) -> Double #-}
correlation :: (G.Vector v (Double,Double), G.Vector v Double)
=> v (Double,Double)
-> Double
correlation xy
| n == 0 = 0
| otherwise = cov / sqrt (varX * varY)
where
n = G.length xy
(xs,ys) = G.unzip xy
(muX,varX) = meanVariance xs
(muY,varY) = meanVariance ys
cov = mean $ G.zipWith (*)
(G.map (\x -> x - muX) xs)
(G.map (\y -> y - muY) ys)
{-# SPECIALIZE correlation :: U.Vector (Double,Double) -> Double #-}
{-# SPECIALIZE correlation :: V.Vector (Double,Double) -> Double #-}
pair :: (G.Vector v a, G.Vector v b, G.Vector v (a,b)) => v a -> v b -> v (a,b)
pair va vb
| G.length va == G.length vb = G.zip va vb
| otherwise = error "Statistics.Sample.pair: vector must have same length"
{-# INLINE pair #-}
(^) :: Double -> Int -> Double
x ^ 1 = x
x ^ n = x * (x ^ (n-1))
{-# INLINE (^) #-}
data T = T {-# UNPACK #-}!Double {-# UNPACK #-}!Int
data T1 = T1 {-# UNPACK #-}!Int {-# UNPACK #-}!Double {-# UNPACK #-}!Double