-----------------------------------------------------------------------------
-- Module      : Math.Statistics
-- Copyright   : (c) 2007 SFTank
-- License     : BSD3
--
-- Maintainer  : mbeddoe@<nospam>sftank.net
-- Stability   : experimental
-- Portability : portable
--
-- Description :
--   A collection of commonly used statistical functions.
-----------------------------------------------------------------------------

module Math.Statistics where

import List

-- Numerically stable mean
mean :: Floating a => [a] -> a
mean = fst.foldr (\x (m,n) -> (m+(x-m) / (fromIntegral $ n + 1), n + 1)) (0,0)

-- Harmonic mean
hmean :: (Floating a) => [a] -> a
hmean xs = fromIntegral (length xs) / (sum $ map (1/) xs)

-- Geometric mean
gmean :: (Floating a) => [a] -> a
gmean xs = (foldr1 (*) xs)**(1 / fromIntegral (length xs))

-- Median
median :: (Floating a, Ord a) => [a] -> a
median x | odd n  = head  $ drop (n `div` 2) x'
         | even n = mean $ take 2 $ drop i x'
                  where i = (length x' `div` 2) - 1
                        x' = sort x
                        n  = length x

-- Modes
-- Returns a sorted list of modes in descending order
modes :: (Ord a) => [a] -> [(Int, a)]
modes xs = sortOn (negate.fst) $ map (\x->(length x, head x)) $ (group.sort) xs
    where
      sortOn :: Ord b => (a -> b) -> [a] -> [a]
      sortOn f = sortBy (\x y -> compare (f x) (f y))

-- Central moments
centralMoment xs 1 = 0
centralMoment xs r = (sum (map (\x -> (x-m)^r) xs)) / n
    where
      m = mean xs
      n = fromIntegral $ length xs

-- Range
range :: (Num a, Ord a) => [a] -> a
range xs = maximum xs - minimum xs

-- Average deviation
avgdev :: (Floating a) => [a] -> a
avgdev xs = mean $ map (\x -> abs(x - m)) xs
    where
      m = mean xs

-- Standard deviation
stddev :: (Floating a) => [a] -> a
stddev xs = sqrt $ var xs

-- Population variance
pvar :: (Floating a) => [a] -> a
pvar xs = centralMoment xs 2

-- Numerically stable sample variance
var xs = (var' 0 0 0 xs) / (fromIntegral $ length xs - 1)
    where
      var' _ _ s [] = s
      var' m n s (x:xs) = var' nm (n + 1) (s + delta * (x - nm)) xs
         where
           delta = x - m
           nm = m + delta/(fromIntegral $ n + 1)

-- Interquartile range
-- XXX: Add case that takes into account even vs odd length
iqr xs = take (length xs - 2*q) $ drop q xs
    where
      q = ((length xs) + 1) `div` 4

-- Kurtosis
kurtosis xs = ((centralMoment xs 4) / (centralMoment xs 2)^2)-3

-- Skew
skew xs = (centralMoment xs 3) / (centralMoment xs 2)**(3/2)

pearsonSkew1 xs = 3 * (mean xs - mo) / stddev xs
    where
      mo = snd $ head $ modes xs

pearsonSkew2 xs = 3 * (mean xs - median xs) / stddev xs

-- Covariance
cov :: (Floating a) => [a] -> [a] -> a
cov xs ys = sum (zipWith (*) (map f1 xs) (map f2 ys)) / (n - 1)
    where
      n = fromIntegral $ length $ xs
      m1 = mean xs
      m2 = mean ys
      f1 = \x -> (x - m1)
      f2 = \x -> (x - m2)

-- Covariance matrix
covMatrix :: (Floating a) => [[a]] -> [[a]]
covMatrix xs =  split' (length xs) cs
    where
      cs = [ cov a b | a <- xs, b <- xs]
      split' n = unfoldr (\y -> if null y then Nothing else Just $ splitAt n y)

-- Pearson's product-moment correlation coefficient
corr :: (Floating a) => [a] -> [a] -> a
corr x y = cov x y / (stddev x * stddev y)