module Statistics.Test.KruskalWallis
( kruskalWallisRank
, kruskalWallis
, kruskalWallisSignificant
, kruskalWallisTest
) where
import Data.Ord (comparing)
import Data.Foldable (foldMap)
import qualified Data.Vector.Unboxed as U
import Statistics.Function (sort, sortBy, square)
import Statistics.Distribution (quantile)
import Statistics.Distribution.ChiSquared (chiSquared)
import Statistics.Test.Types (TestResult(..), significant)
import Statistics.Test.Internal (rank)
import Statistics.Sample
import qualified Statistics.Sample.Internal as Sample(sum)
kruskalWallisRank :: [Sample] -> [Sample]
kruskalWallisRank samples = groupByTags
. sortBy (comparing fst)
. U.zip tags
$ rank (==) joinSample
where
(tags,joinSample) = U.unzip
. sortBy (comparing snd)
$ foldMap (uncurry tagSample) $ zip [(1::Int)..] samples
tagSample t = U.map (\x -> (t,x))
groupByTags xs
| U.null xs = []
| otherwise = sort (U.map snd ys) : groupByTags zs
where
(ys,zs) = U.span ((==) (fst $ U.head xs) . fst) xs
kruskalWallis :: [Sample] -> Double
kruskalWallis samples = (nTot - 1) * numerator / denominator
where
nTot = fromIntegral $ sumWith rsamples U.length
avgRank = (nTot + 1) / 2
numerator = sumWith rsamples $ \sample ->
let n = fromIntegral $ U.length sample
in n * square (mean sample - avgRank)
denominator = sumWith rsamples $ \sample ->
Sample.sum $ U.map (\r -> square (r - avgRank)) sample
rsamples = kruskalWallisRank samples
kruskalWallisSignificant ::
[Int]
-> Double
-> Double
-> Maybe TestResult
kruskalWallisSignificant ns p k
| all (>4) ns = Just . significant $ k > x
| otherwise = Nothing
where
x = quantile (chiSquared (length ns - 1)) (1 - p)
kruskalWallisTest :: Double -> [Sample] -> Maybe TestResult
kruskalWallisTest p samples =
kruskalWallisSignificant (map U.length samples) p $ kruskalWallis samples
sumWith :: Num a => [Sample] -> (Sample -> a) -> a
sumWith samples f = Prelude.sum $ fmap f samples
{-# INLINE sumWith #-}