module Math.Statistics (
mean
, meanWgh
, average
, harmean
, geomean
, stddev
, stddevp
, var
, pvar
, centralMoment
, devsq
, skew
, pearsonSkew1
, pearsonSkew2
, kurt
, median
, modes
, mode
, iqr
, quantile
, quantileAsc
, range
, avgdev
, covar
, covMatrix
, pearson
, correl
, linreg
) where
import Data.List
import Data.Ord (comparing)
mean :: Fractional a => [a] -> a
mean x = fst $ foldl' addElement (0,0) x
where
addElement (!m,!n) x = (m + (xm)/(n+1), n+1)
meanWgh :: Floating a => [(a,a)] -> a
meanWgh xs = (sum . map (uncurry (*)) $ xs) / (sum . map snd $ xs)
average :: Fractional a => [a] -> a
average = mean
harmean :: (Fractional a) => [a] -> a
harmean xs = fromIntegral (length xs) / (sum $ map (1/) xs)
geomean :: (Floating a) => [a] -> a
geomean xs = (foldr1 (*) xs)**(1 / fromIntegral (length xs))
median :: (Fractional a, Ord a) => [a] -> a
median x | odd n = head $ drop (n `div` 2) x'
| even n = mean $ take 2 $ drop i x'
where i = (length x' `div` 2) 1
x' = sort x
n = length x
modes :: (Ord a) => [a] -> [(Int, a)]
modes xs = sortBy (comparing $ negate.fst) $ map (\x->(length x, head x)) $ (group.sort) xs
mode :: (Ord a) => [a] -> Maybe a
mode xs = case m of
[] -> Nothing
_ -> Just . snd $ head m
where m = filter (\(a,b) -> a > 1) (modes xs)
centralMoment :: (Fractional b, Integral t) => [b] -> t -> b
centralMoment xs 1 = 0
centralMoment xs r = (sum (map (\x -> (xm)^r) xs)) / n
where
m = mean xs
n = fromIntegral $ length xs
range :: (Num a, Ord a) => [a] -> a
range xs = maximum xs minimum xs
avgdev :: (Floating a) => [a] -> a
avgdev xs = mean $ map (\x -> abs(x m)) xs
where
m = mean xs
stddev :: (Floating a) => [a] -> a
stddev xs = sqrt $ var xs
stddevp :: (Floating a) => [a] -> a
stddevp xs = sqrt $ pvar xs
pvar :: (Fractional a) => [a] -> a
pvar xs = centralMoment xs 2
var :: (Fractional b) => [b] -> b
var xs = (var' 0 0 0 xs) / (fromIntegral $ length xs 1)
where
var' _ _ s [] = s
var' m n s (x:xs) = var' nm (n + 1) (s + delta * (x nm)) xs
where
delta = x m
nm = m + delta/(fromIntegral $ n + 1)
iqr :: [a] -> [a]
iqr xs = take (length xs 2*q) $ drop q xs
where
q = ((length xs) + 1) `div` 4
kurt :: (Floating b) => [b] -> b
kurt xs = ((centralMoment xs 4) / (centralMoment xs 2)^2)3
quantile :: (Fractional b, Ord b) => Double -> [b] -> b
quantile q = quantileAsc q . sort
quantileAsc :: (Fractional b, Ord b) => Double -> [b] -> b
quantileAsc _ [] = error "quantile on empty list"
quantileAsc q xs
| q < 0 || q > 1 = error "quantile out of range"
| otherwise = xs !! (quantIndex (length xs) q)
where quantIndex :: Int -> Double -> Int
quantIndex len q = case round $ q * (fromIntegral len 1) of
idx | idx < 0 -> error "Quantile index too small"
| idx >= len -> error "Quantile index too large"
| otherwise -> idx
skew :: (Floating b) => [b] -> b
skew xs = (centralMoment xs 3) / (centralMoment xs 2)**(3/2)
pearsonSkew1 :: (Ord a, Floating a) => [a] -> a
pearsonSkew1 xs = 3 * (mean xs mo) / stddev xs
where
mo = snd $ head $ modes xs
pearsonSkew2 :: (Ord a, Floating a) => [a] -> a
pearsonSkew2 xs = 3 * (mean xs median xs) / stddev xs
covar :: (Floating a) => [a] -> [a] -> a
covar xs ys = sum (zipWith (*) (map f1 xs) (map f2 ys)) / (n1)
where
n = fromIntegral $ length $ xs
m1 = mean xs
m2 = mean ys
f1 x = x m1
f2 x = x m2
covMatrix :: (Floating a) => [[a]] -> [[a]]
covMatrix xs = split' (length xs) cs
where
cs = [ covar a b | a <- xs, b <- xs]
split' n = unfoldr (\y -> if null y then Nothing else Just $ splitAt n y)
pearson :: (Floating a) => [a] -> [a] -> a
pearson x y = covar x y / (stddev x * stddev y)
correl :: (Floating a) => [a] -> [a] -> a
correl = pearson
linreg :: (Floating b) => [(b, b)] -> (b, b, b)
linreg xys = let !xs = map fst xys
!ys = map snd xys
!n = fromIntegral $ length xys
!sX = sum xs
!sY = sum ys
!sXX = sum $ map (^ 2) xs
!sXY = sum $ map (uncurry (*)) xys
!sYY = sum $ map (^ 2) ys
!alpha = (sY beta * sX) / n
!beta = (n * sXY sX * sY) / (n * sXX sX * sX)
!r = (n * sXY sX * sY) / (sqrt $ (n * sXX sX^2) * (n * sYY sY ^ 2))
in (alpha, beta, r)
devsq :: (Floating a) => [a] -> a
devsq xs = sum $ map (\x->(xm)**2) xs
where m = mean xs