-- |
-- Module    : Statistics.Regression
-- Copyright : 2014 Bryan O'Sullivan
-- License   : BSD3
--
-- Functions for regression analysis.

module Statistics.Regression
    (
      olsRegress
    , ols
    , rSquare
    , bootstrapRegress
    ) where

import Control.Concurrent.Async (forConcurrently)
import Control.DeepSeq (rnf)
import Control.Monad (when)
import Data.List (nub)
import GHC.Conc (getNumCapabilities)
import Prelude hiding (pred, sum)
import Statistics.Function as F
import Statistics.Matrix hiding (map)
import Statistics.Matrix.Algorithms (qr)
import Statistics.Resampling (splitGen)
import Statistics.Types      (Estimate(..),ConfInt,CL,estimateFromInterval,significanceLevel)
import Statistics.Sample (mean)
import Statistics.Sample.Internal (sum)
import System.Random.MWC (GenIO, uniformR)
import qualified Data.Vector as V
import qualified Data.Vector.Generic as G
import qualified Data.Vector.Unboxed as U
import qualified Data.Vector.Unboxed.Mutable as M

-- | Perform an ordinary least-squares regression on a set of
-- predictors, and calculate the goodness-of-fit of the regression.
--
-- The returned pair consists of:
--
-- * A vector of regression coefficients.  This vector has /one more/
--   element than the list of predictors; the last element is the
--   /y/-intercept value.
--
-- * /R²/, the coefficient of determination (see 'rSquare' for
--   details).
olsRegress :: [Vector]
              -- ^ Non-empty list of predictor vectors.  Must all have
              -- the same length.  These will become the columns of
              -- the matrix /A/ solved by 'ols'.
           -> Vector
              -- ^ Responder vector.  Must have the same length as the
              -- predictor vectors.
           -> (Vector, Double)
olsRegress :: [Vector] -> Vector -> (Vector, Double)
olsRegress preds :: [Vector]
preds@(Vector
_:[Vector]
_) Vector
resps
  | forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (forall a. Eq a => a -> a -> Bool
/=Int
n) [Int]
ls        = forall a. HasCallStack => [Char] -> a
error forall a b. (a -> b) -> a -> b
$ [Char]
"predictor vector length mismatch " forall a. [a] -> [a] -> [a]
++
                                  forall a. Show a => a -> [Char]
show [Int]
lss
  | forall (v :: * -> *) a. Vector v a => v a -> Int
G.length Vector
resps forall a. Eq a => a -> a -> Bool
/= Int
n = forall a. HasCallStack => [Char] -> a
error forall a b. (a -> b) -> a -> b
$ [Char]
"responder/predictor length mismatch " forall a. [a] -> [a] -> [a]
++
                                  forall a. Show a => a -> [Char]
show (forall (v :: * -> *) a. Vector v a => v a -> Int
G.length Vector
resps, Int
n)
  | Bool
otherwise           = (Vector
coeffs, Matrix -> Vector -> Vector -> Double
rSquare Matrix
mxpreds Vector
resps Vector
coeffs)
  where
    coeffs :: Vector
coeffs    = Matrix -> Vector -> Vector
ols Matrix
mxpreds Vector
resps
    mxpreds :: Matrix
mxpreds   = Matrix -> Matrix
transpose forall b c a. (b -> c) -> (a -> b) -> a -> c
.
                Int -> Int -> Vector -> Matrix
fromVector (forall (t :: * -> *) a. Foldable t => t a -> Int
length [Int]
lss forall a. Num a => a -> a -> a
+ Int
1) Int
n forall b c a. (b -> c) -> (a -> b) -> a -> c
.
                forall (v :: * -> *) a. Vector v a => [v a] -> v a
G.concat forall a b. (a -> b) -> a -> b
$ [Vector]
preds forall a. [a] -> [a] -> [a]
++ [forall (v :: * -> *) a. Vector v a => Int -> a -> v a
G.replicate Int
n Double
1]
    lss :: [Int]
lss@(Int
n:[Int]
ls) = forall a b. (a -> b) -> [a] -> [b]
map forall (v :: * -> *) a. Vector v a => v a -> Int
G.length [Vector]
preds
olsRegress [Vector]
_ Vector
_ = forall a. HasCallStack => [Char] -> a
error [Char]
"no predictors given"

-- | Compute the ordinary least-squares solution to /A x = b/.
ols :: Matrix     -- ^ /A/ has at least as many rows as columns.
    -> Vector     -- ^ /b/ has the same length as columns in /A/.
    -> Vector
ols :: Matrix -> Vector -> Vector
ols Matrix
a Vector
b
  | Int
rs forall a. Ord a => a -> a -> Bool
< Int
cs   = forall a. HasCallStack => [Char] -> a
error forall a b. (a -> b) -> a -> b
$ [Char]
"fewer rows than columns " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show (Int, Int)
d
  | Bool
otherwise = Matrix -> Vector -> Vector
solve Matrix
r (Matrix -> Matrix
transpose Matrix
q Matrix -> Vector -> Vector
`multiplyV` Vector
b)
  where
    d :: (Int, Int)
d@(Int
rs,Int
cs) = Matrix -> (Int, Int)
dimension Matrix
a
    (Matrix
q,Matrix
r)     = Matrix -> (Matrix, Matrix)
qr Matrix
a

-- | Solve the equation /R x = b/.
solve :: Matrix     -- ^ /R/ is an upper-triangular square matrix.
      -> Vector     -- ^ /b/ is of the same length as rows\/columns in /R/.
      -> Vector
solve :: Matrix -> Vector -> Vector
solve Matrix
r Vector
b
  | Int
n forall a. Eq a => a -> a -> Bool
/= Int
l    = forall a. HasCallStack => [Char] -> a
error forall a b. (a -> b) -> a -> b
$ [Char]
"row/vector mismatch " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show (Int
n,Int
l)
  | Bool
otherwise = forall a. Unbox a => (forall s. ST s (MVector s a)) -> Vector a
U.create forall a b. (a -> b) -> a -> b
$ do
  MVector s Double
s <- forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
Vector a -> m (MVector (PrimState m) a)
U.thaw Vector
b
  forall (m :: * -> *).
Monad m =>
Int -> Int -> (Int -> m ()) -> m ()
rfor Int
n Int
0 forall a b. (a -> b) -> a -> b
$ \Int
i -> do
    Double
si <- (forall a. Fractional a => a -> a -> a
/ Matrix -> Int -> Int -> Double
unsafeIndex Matrix
r Int
i Int
i) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
M.unsafeRead MVector s Double
s Int
i
    forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
M.unsafeWrite MVector s Double
s Int
i Double
si
    forall (m :: * -> *).
Monad m =>
Int -> Int -> (Int -> m ()) -> m ()
F.for Int
0 Int
i forall a b. (a -> b) -> a -> b
$ \Int
j -> forall s. MVector s Double -> Int -> (Double -> Double) -> ST s ()
F.unsafeModify MVector s Double
s Int
j forall a b. (a -> b) -> a -> b
$ forall a. Num a => a -> a -> a
subtract (Matrix -> Int -> Int -> Double
unsafeIndex Matrix
r Int
j Int
i forall a. Num a => a -> a -> a
* Double
si)
  forall (m :: * -> *) a. Monad m => a -> m a
return MVector s Double
s
  where n :: Int
n = Matrix -> Int
rows Matrix
r
        l :: Int
l = forall a. Unbox a => Vector a -> Int
U.length Vector
b

-- | Compute /R&#0178;/, the coefficient of determination that
-- indicates goodness-of-fit of a regression.
--
-- This value will be 1 if the predictors fit perfectly, dropping to 0
-- if they have no explanatory power.
rSquare :: Matrix               -- ^ Predictors (regressors).
        -> Vector               -- ^ Responders.
        -> Vector               -- ^ Regression coefficients.
        -> Double
rSquare :: Matrix -> Vector -> Vector -> Double
rSquare Matrix
pred Vector
resp Vector
coeff = Double
1 forall a. Num a => a -> a -> a
- Double
r forall a. Fractional a => a -> a -> a
/ Double
t
  where
    r :: Double
r   = forall (v :: * -> *). Vector v Double => v Double -> Double
sum forall a b. (a -> b) -> a -> b
$ forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a b.
(Unbox a, Unbox b) =>
(Int -> a -> b) -> Vector a -> Vector b
U.imap Vector
resp forall a b. (a -> b) -> a -> b
$ \Int
i Double
x -> Double -> Double
square (Double
x forall a. Num a => a -> a -> a
- Int -> Double
p Int
i)
    t :: Double
t   = forall (v :: * -> *). Vector v Double => v Double -> Double
sum forall a b. (a -> b) -> a -> b
$ forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
U.map Vector
resp forall a b. (a -> b) -> a -> b
$ \Double
x -> Double -> Double
square (Double
x forall a. Num a => a -> a -> a
- forall (v :: * -> *). Vector v Double => v Double -> Double
mean Vector
resp)
    p :: Int -> Double
p Int
i = forall (v :: * -> *). Vector v Double => v Double -> Double
sum forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a b.
(Unbox a, Unbox b) =>
(Int -> a -> b) -> Vector a -> Vector b
U.imap Vector
coeff forall a b. (a -> b) -> a -> b
$ \Int
j -> (forall a. Num a => a -> a -> a
* Matrix -> Int -> Int -> Double
unsafeIndex Matrix
pred Int
i Int
j)

-- | Bootstrap a regression function.  Returns both the results of the
-- regression and the requested confidence interval values.
bootstrapRegress
  :: GenIO
  -> Int         -- ^ Number of resamples to compute.
  -> CL Double   -- ^ Confidence level.
  -> ([Vector] -> Vector -> (Vector, Double))
     -- ^ Regression function.
  -> [Vector]    -- ^ Predictor vectors.
  -> Vector      -- ^ Responder vector.
  -> IO (V.Vector (Estimate ConfInt Double), Estimate ConfInt Double)
bootstrapRegress :: GenIO
-> Int
-> CL Double
-> ([Vector] -> Vector -> (Vector, Double))
-> [Vector]
-> Vector
-> IO (Vector (Estimate ConfInt Double), Estimate ConfInt Double)
bootstrapRegress GenIO
gen0 Int
numResamples CL Double
cl [Vector] -> Vector -> (Vector, Double)
rgrss [Vector]
preds0 Vector
resp0
  | Int
numResamples forall a. Ord a => a -> a -> Bool
< Int
1   = forall a. HasCallStack => [Char] -> a
error forall a b. (a -> b) -> a -> b
$ [Char]
"bootstrapRegress: number of resamples " forall a. [a] -> [a] -> [a]
++
                                 [Char]
"must be positive"
  | Bool
otherwise = do

  -- some error checks so that we do not run into vector index out of bounds.
  case forall a. Eq a => [a] -> [a]
nub (forall a b. (a -> b) -> [a] -> [b]
map forall a. Unbox a => Vector a -> Int
U.length [Vector]
preds0) of
    [] -> forall a. HasCallStack => [Char] -> a
error [Char]
"bootstrapRegress: predictor vectors must not be empty"
    [Int
plen] -> do
        let rlen :: Int
rlen = forall a. Unbox a => Vector a -> Int
U.length Vector
resp0
        forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
plen forall a. Eq a => a -> a -> Bool
/= Int
rlen) forall a b. (a -> b) -> a -> b
$
            forall a. HasCallStack => [Char] -> a
error forall a b. (a -> b) -> a -> b
$ [Char]
"bootstrapRegress: responder vector length ["
                forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Int
rlen
                forall a. [a] -> [a] -> [a]
++ [Char]
"] must be the same as predictor vectors' length ["
                forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Int
plen forall a. [a] -> [a] -> [a]
++ [Char]
"]"
    [Int]
xs -> forall a. HasCallStack => [Char] -> a
error forall a b. (a -> b) -> a -> b
$ [Char]
"bootstrapRegress: all predictor vectors must be of the same \
        \length, lengths provided are: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show [Int]
xs

  Int
caps <- IO Int
getNumCapabilities
  [Gen RealWorld]
gens <- Int -> GenIO -> IO [GenIO]
splitGen Int
caps GenIO
gen0
  [Vector (Vector, Double)]
vs <- forall (t :: * -> *) a b.
Traversable t =>
t a -> (a -> IO b) -> IO (t b)
forConcurrently (forall a b. [a] -> [b] -> [(a, b)]
zip [Gen RealWorld]
gens (Int -> Int -> [Int]
balance Int
caps Int
numResamples)) forall a b. (a -> b) -> a -> b
$ \(Gen RealWorld
gen,Int
count) -> do
      Vector (Vector, Double)
v <- forall (m :: * -> *) a. Monad m => Int -> m a -> m (Vector a)
V.replicateM Int
count forall a b. (a -> b) -> a -> b
$ do
           let n :: Int
n = forall a. Unbox a => Vector a -> Int
U.length Vector
resp0
           Vector Int
ixs <- forall (m :: * -> *) a.
(Monad m, Unbox a) =>
Int -> m a -> m (Vector a)
U.replicateM Int
n forall a b. (a -> b) -> a -> b
$ forall a (m :: * -> *).
(Variate a, PrimMonad m) =>
(a, a) -> Gen (PrimState m) -> m a
uniformR (Int
0,Int
nforall a. Num a => a -> a -> a
-Int
1) Gen RealWorld
gen
           let resp :: Vector
resp  = forall a. Unbox a => Vector a -> Vector Int -> Vector a
U.backpermute Vector
resp0 Vector Int
ixs
               preds :: [Vector]
preds = forall a b. (a -> b) -> [a] -> [b]
map (forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a. Unbox a => Vector a -> Vector Int -> Vector a
U.backpermute Vector Int
ixs) [Vector]
preds0
           forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [Vector] -> Vector -> (Vector, Double)
rgrss [Vector]
preds Vector
resp
      forall a. NFData a => a -> ()
rnf Vector (Vector, Double)
v seq :: forall a b. a -> b -> b
`seq` forall (m :: * -> *) a. Monad m => a -> m a
return Vector (Vector, Double)
v
  let (Vector Vector
coeffsv, Vector Double
r2v) = forall (v :: * -> *) a b.
(Vector v a, Vector v b, Vector v (a, b)) =>
v (a, b) -> (v a, v b)
G.unzip (forall a. [Vector a] -> Vector a
V.concat [Vector (Vector, Double)]
vs)
  let coeffs :: Vector (Estimate ConfInt Double)
coeffs  = forall a b c. (a -> b -> c) -> b -> a -> c
flip forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(Int -> a -> b) -> v a -> v b
G.imap (forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
G.convert Vector
coeffss) forall a b. (a -> b) -> a -> b
$ \Int
i Double
x ->
                Double -> Vector -> Estimate ConfInt Double
est Double
x forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Unbox a => Int -> (Int -> a) -> Vector a
U.generate Int
numResamples forall a b. (a -> b) -> a -> b
$ \Int
k -> (Vector Vector
coeffsv forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
G.! Int
k) forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
G.! Int
i
      r2 :: Estimate ConfInt Double
r2      = Double -> Vector -> Estimate ConfInt Double
est Double
r2s (forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
G.convert Vector Double
r2v)
      (Vector
coeffss, Double
r2s) = [Vector] -> Vector -> (Vector, Double)
rgrss [Vector]
preds0 Vector
resp0
      est :: Double -> Vector -> Estimate ConfInt Double
est Double
s Vector
v = forall a. Num a => a -> (a, a) -> CL Double -> Estimate ConfInt a
estimateFromInterval Double
s (Vector
w forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
G.! Int
lo, Vector
w forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
G.! Int
hi) CL Double
cl
        where w :: Vector
w  = Vector -> Vector
F.sort Vector
v
              bounded :: Int -> Int
bounded Int
i = forall a. Ord a => a -> a -> a
min (forall a. Unbox a => Vector a -> Int
U.length Vector
w forall a. Num a => a -> a -> a
- Int
1) (forall a. Ord a => a -> a -> a
max Int
0 Int
i)
              lo :: Int
lo = Int -> Int
bounded forall a b. (a -> b) -> a -> b
$ forall a b. (RealFrac a, Integral b) => a -> b
round Double
c
              hi :: Int
hi = Int -> Int
bounded forall a b. (a -> b) -> a -> b
$ forall a b. (RealFrac a, Integral b) => a -> b
truncate (Double
n forall a. Num a => a -> a -> a
- Double
c)
              n :: Double
n  = forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
numResamples
              c :: Double
c  = Double
n forall a. Num a => a -> a -> a
* (forall a. CL a -> a
significanceLevel CL Double
cl forall a. Fractional a => a -> a -> a
/ Double
2)
  forall (m :: * -> *) a. Monad m => a -> m a
return (Vector (Estimate ConfInt Double)
coeffs, Estimate ConfInt Double
r2)

-- | Balance units of work across workers.
balance :: Int -> Int -> [Int]
balance :: Int -> Int -> [Int]
balance Int
numSlices Int
numItems = forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith forall a. Num a => a -> a -> a
(+) (forall a. Int -> a -> [a]
replicate Int
numSlices Int
q)
                                         (forall a. Int -> a -> [a]
replicate Int
r Int
1 forall a. [a] -> [a] -> [a]
++ forall a. a -> [a]
repeat Int
0)
 where (Int
q,Int
r) = Int
numItems forall a. Integral a => a -> a -> (a, a)
`quotRem` Int
numSlices