{-# LANGUAGE FlexibleContexts, Rank2Types, ScopedTypeVariables #-}
-- | Student's T-test is for assessing whether two samples have
--   different mean. This module contain several variations of
--   T-test. It's a parametric tests and assumes that samples are
--   normally distributed.
module Statistics.Test.StudentT
    (
      studentTTest
    , welchTTest
    , pairedTTest
    , module Statistics.Test.Types
    ) where

import Statistics.Distribution hiding (mean)
import Statistics.Distribution.StudentT
import Statistics.Sample (mean, varianceUnbiased)
import Statistics.Test.Types
import Statistics.Types    (mkPValue,PValue)
import Statistics.Function (square)
import qualified Data.Vector.Generic  as G
import qualified Data.Vector.Unboxed  as U
import qualified Data.Vector.Storable as S
import qualified Data.Vector          as V



-- | Two-sample Student's t-test. It assumes that both samples are
--   normally distributed and have same variance. Returns @Nothing@ if
--   sample sizes are not sufficient.
studentTTest :: (G.Vector v Double)
             => PositionTest  -- ^ one- or two-tailed test
             -> v Double      -- ^ Sample A
             -> v Double      -- ^ Sample B
             -> Maybe (Test StudentT)
studentTTest :: forall (v :: * -> *).
Vector v Double =>
PositionTest -> v Double -> v Double -> Maybe (Test StudentT)
studentTTest PositionTest
test v Double
sample1 v Double
sample2
  | forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v Double
sample1 forall a. Ord a => a -> a -> Bool
< Int
2 Bool -> Bool -> Bool
|| forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v Double
sample2 forall a. Ord a => a -> a -> Bool
< Int
2 = forall a. Maybe a
Nothing
  | Bool
otherwise                                    = forall a. a -> Maybe a
Just Test
      { testSignificance :: PValue Double
testSignificance = PositionTest -> Double -> Double -> PValue Double
significance PositionTest
test Double
t Double
ndf
      , testStatistics :: Double
testStatistics   = Double
t
      , testDistribution :: StudentT
testDistribution = Double -> StudentT
studentT Double
ndf
      }
  where
    (Double
t, Double
ndf) = forall (v :: * -> *).
Vector v Double =>
Bool -> v Double -> v Double -> (Double, Double)
tStatistics Bool
True v Double
sample1 v Double
sample2
{-# INLINABLE  studentTTest #-}
{-# SPECIALIZE studentTTest :: PositionTest -> U.Vector Double -> U.Vector Double -> Maybe (Test StudentT) #-}
{-# SPECIALIZE studentTTest :: PositionTest -> S.Vector Double -> S.Vector Double -> Maybe (Test StudentT) #-}
{-# SPECIALIZE studentTTest :: PositionTest -> V.Vector Double -> V.Vector Double -> Maybe (Test StudentT) #-}

-- | Two-sample Welch's t-test. It assumes that both samples are
--   normally distributed but doesn't assume that they have same
--   variance. Returns @Nothing@ if sample sizes are not sufficient.
welchTTest :: (G.Vector v Double)
           => PositionTest  -- ^ one- or two-tailed test
           -> v Double      -- ^ Sample A
           -> v Double      -- ^ Sample B
           -> Maybe (Test StudentT)
welchTTest :: forall (v :: * -> *).
Vector v Double =>
PositionTest -> v Double -> v Double -> Maybe (Test StudentT)
welchTTest PositionTest
test v Double
sample1 v Double
sample2
  | forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v Double
sample1 forall a. Ord a => a -> a -> Bool
< Int
2 Bool -> Bool -> Bool
|| forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v Double
sample2 forall a. Ord a => a -> a -> Bool
< Int
2 = forall a. Maybe a
Nothing
  | Bool
otherwise                                    = forall a. a -> Maybe a
Just Test
      { testSignificance :: PValue Double
testSignificance = PositionTest -> Double -> Double -> PValue Double
significance PositionTest
test Double
t Double
ndf
      , testStatistics :: Double
testStatistics   = Double
t
      , testDistribution :: StudentT
testDistribution = Double -> StudentT
studentT Double
ndf
      }
  where
    (Double
t, Double
ndf) = forall (v :: * -> *).
Vector v Double =>
Bool -> v Double -> v Double -> (Double, Double)
tStatistics Bool
False v Double
sample1 v Double
sample2
{-# INLINABLE  welchTTest #-}
{-# SPECIALIZE welchTTest :: PositionTest -> U.Vector Double -> U.Vector Double -> Maybe (Test StudentT) #-}
{-# SPECIALIZE welchTTest :: PositionTest -> S.Vector Double -> S.Vector Double -> Maybe (Test StudentT) #-}
{-# SPECIALIZE welchTTest :: PositionTest -> V.Vector Double -> V.Vector Double -> Maybe (Test StudentT) #-}

-- | Paired two-sample t-test. Two samples are paired in a
-- within-subject design. Returns @Nothing@ if sample size is not
-- sufficient.
pairedTTest :: forall v. (G.Vector v (Double, Double), G.Vector v Double)
            => PositionTest          -- ^ one- or two-tailed test
            -> v (Double, Double)    -- ^ paired samples
            -> Maybe (Test StudentT)
pairedTTest :: forall (v :: * -> *).
(Vector v (Double, Double), Vector v Double) =>
PositionTest -> v (Double, Double) -> Maybe (Test StudentT)
pairedTTest PositionTest
test v (Double, Double)
sample
  | forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v (Double, Double)
sample forall a. Ord a => a -> a -> Bool
< Int
2 = forall a. Maybe a
Nothing
  | Bool
otherwise           = forall a. a -> Maybe a
Just Test
      { testSignificance :: PValue Double
testSignificance = PositionTest -> Double -> Double -> PValue Double
significance PositionTest
test Double
t Double
ndf
      , testStatistics :: Double
testStatistics   = Double
t
      , testDistribution :: StudentT
testDistribution = Double -> StudentT
studentT Double
ndf
      }
  where
    (Double
t, Double
ndf) = forall (v :: * -> *).
(Vector v (Double, Double), Vector v Double) =>
v (Double, Double) -> (Double, Double)
tStatisticsPaired v (Double, Double)
sample
{-# INLINABLE  pairedTTest #-}
{-# SPECIALIZE pairedTTest :: PositionTest -> U.Vector (Double,Double) -> Maybe (Test StudentT) #-}
{-# SPECIALIZE pairedTTest :: PositionTest -> V.Vector (Double,Double) -> Maybe (Test StudentT) #-}


-------------------------------------------------------------------------------

significance :: PositionTest    -- ^ one- or two-tailed
             -> Double          -- ^ t statistics
             -> Double          -- ^ degree of freedom
             -> PValue Double   -- ^ p-value
significance :: PositionTest -> Double -> Double -> PValue Double
significance PositionTest
test Double
t Double
df =
  case PositionTest
test of
    -- Here we exploit symmetry of T-distribution and calculate small tail
    PositionTest
SamplesDiffer -> forall a. (Ord a, Num a) => a -> PValue a
mkPValue forall a b. (a -> b) -> a -> b
$ Double
2 forall a. Num a => a -> a -> a
* Double -> Double
tailArea (forall a. Num a => a -> a
negate (forall a. Num a => a -> a
abs Double
t))
    PositionTest
AGreater      -> forall a. (Ord a, Num a) => a -> PValue a
mkPValue forall a b. (a -> b) -> a -> b
$ Double -> Double
tailArea (forall a. Num a => a -> a
negate Double
t)
    PositionTest
BGreater      -> forall a. (Ord a, Num a) => a -> PValue a
mkPValue forall a b. (a -> b) -> a -> b
$ Double -> Double
tailArea  Double
t
  where
    tailArea :: Double -> Double
tailArea = forall d. Distribution d => d -> Double -> Double
cumulative (Double -> StudentT
studentT Double
df)


-- Calculate T statistics for two samples
tStatistics :: (G.Vector v Double)
            => Bool               -- variance equality
            -> v Double
            -> v Double
            -> (Double, Double)
{-# INLINE tStatistics #-}
tStatistics :: forall (v :: * -> *).
Vector v Double =>
Bool -> v Double -> v Double -> (Double, Double)
tStatistics Bool
varequal v Double
sample1 v Double
sample2 = (Double
t, Double
ndf)
  where
    -- t-statistics
    t :: Double
t = (Double
m1 forall a. Num a => a -> a -> a
- Double
m2) forall a. Fractional a => a -> a -> a
/ forall a. Floating a => a -> a
sqrt (
      if Bool
varequal
        then ((Double
n1 forall a. Num a => a -> a -> a
- Double
1) forall a. Num a => a -> a -> a
* Double
s1 forall a. Num a => a -> a -> a
+ (Double
n2 forall a. Num a => a -> a -> a
- Double
1) forall a. Num a => a -> a -> a
* Double
s2) forall a. Fractional a => a -> a -> a
/ (Double
n1 forall a. Num a => a -> a -> a
+ Double
n2 forall a. Num a => a -> a -> a
- Double
2) forall a. Num a => a -> a -> a
* (Double
1 forall a. Fractional a => a -> a -> a
/ Double
n1 forall a. Num a => a -> a -> a
+ Double
1 forall a. Fractional a => a -> a -> a
/ Double
n2)
        else Double
s1 forall a. Fractional a => a -> a -> a
/ Double
n1 forall a. Num a => a -> a -> a
+ Double
s2 forall a. Fractional a => a -> a -> a
/ Double
n2)

    -- degree of freedom
    ndf :: Double
ndf | Bool
varequal  = Double
n1 forall a. Num a => a -> a -> a
+ Double
n2 forall a. Num a => a -> a -> a
- Double
2
        | Bool
otherwise = Double -> Double
square (Double
s1 forall a. Fractional a => a -> a -> a
/ Double
n1 forall a. Num a => a -> a -> a
+ Double
s2 forall a. Fractional a => a -> a -> a
/ Double
n2)
                    forall a. Fractional a => a -> a -> a
/ (Double -> Double
square Double
s1 forall a. Fractional a => a -> a -> a
/ (Double -> Double
square Double
n1 forall a. Num a => a -> a -> a
* (Double
n1 forall a. Num a => a -> a -> a
- Double
1)) forall a. Num a => a -> a -> a
+ Double -> Double
square Double
s2 forall a. Fractional a => a -> a -> a
/ (Double -> Double
square Double
n2 forall a. Num a => a -> a -> a
* (Double
n2 forall a. Num a => a -> a -> a
- Double
1)))
    -- statistics of two samples
    n1 :: Double
n1 = forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v Double
sample1
    n2 :: Double
n2 = forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v Double
sample2
    m1 :: Double
m1 = forall (v :: * -> *). Vector v Double => v Double -> Double
mean v Double
sample1
    m2 :: Double
m2 = forall (v :: * -> *). Vector v Double => v Double -> Double
mean v Double
sample2
    s1 :: Double
s1 = forall (v :: * -> *). Vector v Double => v Double -> Double
varianceUnbiased v Double
sample1
    s2 :: Double
s2 = forall (v :: * -> *). Vector v Double => v Double -> Double
varianceUnbiased v Double
sample2


-- Calculate T-statistics for paired sample
tStatisticsPaired :: (G.Vector v (Double, Double), G.Vector v Double)
                  => v (Double, Double)
                  -> (Double, Double)
{-# INLINE tStatisticsPaired #-}
tStatisticsPaired :: forall (v :: * -> *).
(Vector v (Double, Double), Vector v Double) =>
v (Double, Double) -> (Double, Double)
tStatisticsPaired v (Double, Double)
sample = (Double
t, Double
ndf)
  where
    -- t-statistics
    t :: Double
t = let d :: v Double
d    = forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
G.map (forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry (-)) v (Double, Double)
sample
            sumd :: Double
sumd = forall (v :: * -> *) a. (Vector v a, Num a) => v a -> a
G.sum v Double
d
        in Double
sumd forall a. Fractional a => a -> a -> a
/ forall a. Floating a => a -> a
sqrt ((Double
n forall a. Num a => a -> a -> a
* forall (v :: * -> *) a. (Vector v a, Num a) => v a -> a
G.sum (forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
G.map Double -> Double
square v Double
d) forall a. Num a => a -> a -> a
- Double -> Double
square Double
sumd) forall a. Fractional a => a -> a -> a
/ Double
ndf)
    -- degree of freedom
    ndf :: Double
ndf = Double
n forall a. Num a => a -> a -> a
- Double
1
    n :: Double
n   = forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v (Double, Double)
sample