module Statistics.Regression
(
olsRegress
, ols
, rSquare
, bootstrapRegress
) where
import Control.Concurrent.Async (forConcurrently)
import Control.DeepSeq (rnf)
import Control.Monad (when)
import Data.List (nub)
import GHC.Conc (getNumCapabilities)
import Prelude hiding (pred, sum)
import Statistics.Function as F
import Statistics.Matrix hiding (map)
import Statistics.Matrix.Algorithms (qr)
import Statistics.Resampling (splitGen)
import Statistics.Types (Estimate(..),ConfInt,CL,estimateFromInterval,significanceLevel)
import Statistics.Sample (mean)
import Statistics.Sample.Internal (sum)
import System.Random.MWC (GenIO, uniformR)
import qualified Data.Vector as V
import qualified Data.Vector.Generic as G
import qualified Data.Vector.Unboxed as U
import qualified Data.Vector.Unboxed.Mutable as M
olsRegress :: [Vector]
-> Vector
-> (Vector, Double)
olsRegress :: [Vector] -> Vector -> (Vector, Double)
olsRegress preds :: [Vector]
preds@(Vector
_:[Vector]
_) Vector
resps
| forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (forall a. Eq a => a -> a -> Bool
/=Int
n) [Int]
ls = forall a. HasCallStack => [Char] -> a
error forall a b. (a -> b) -> a -> b
$ [Char]
"predictor vector length mismatch " forall a. [a] -> [a] -> [a]
++
forall a. Show a => a -> [Char]
show [Int]
lss
| forall (v :: * -> *) a. Vector v a => v a -> Int
G.length Vector
resps forall a. Eq a => a -> a -> Bool
/= Int
n = forall a. HasCallStack => [Char] -> a
error forall a b. (a -> b) -> a -> b
$ [Char]
"responder/predictor length mismatch " forall a. [a] -> [a] -> [a]
++
forall a. Show a => a -> [Char]
show (forall (v :: * -> *) a. Vector v a => v a -> Int
G.length Vector
resps, Int
n)
| Bool
otherwise = (Vector
coeffs, Matrix -> Vector -> Vector -> Double
rSquare Matrix
mxpreds Vector
resps Vector
coeffs)
where
coeffs :: Vector
coeffs = Matrix -> Vector -> Vector
ols Matrix
mxpreds Vector
resps
mxpreds :: Matrix
mxpreds = Matrix -> Matrix
transpose forall b c a. (b -> c) -> (a -> b) -> a -> c
.
Int -> Int -> Vector -> Matrix
fromVector (forall (t :: * -> *) a. Foldable t => t a -> Int
length [Int]
lss forall a. Num a => a -> a -> a
+ Int
1) Int
n forall b c a. (b -> c) -> (a -> b) -> a -> c
.
forall (v :: * -> *) a. Vector v a => [v a] -> v a
G.concat forall a b. (a -> b) -> a -> b
$ [Vector]
preds forall a. [a] -> [a] -> [a]
++ [forall (v :: * -> *) a. Vector v a => Int -> a -> v a
G.replicate Int
n Double
1]
lss :: [Int]
lss@(Int
n:[Int]
ls) = forall a b. (a -> b) -> [a] -> [b]
map forall (v :: * -> *) a. Vector v a => v a -> Int
G.length [Vector]
preds
olsRegress [Vector]
_ Vector
_ = forall a. HasCallStack => [Char] -> a
error [Char]
"no predictors given"
ols :: Matrix
-> Vector
-> Vector
ols :: Matrix -> Vector -> Vector
ols Matrix
a Vector
b
| Int
rs forall a. Ord a => a -> a -> Bool
< Int
cs = forall a. HasCallStack => [Char] -> a
error forall a b. (a -> b) -> a -> b
$ [Char]
"fewer rows than columns " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show (Int, Int)
d
| Bool
otherwise = Matrix -> Vector -> Vector
solve Matrix
r (Matrix -> Matrix
transpose Matrix
q Matrix -> Vector -> Vector
`multiplyV` Vector
b)
where
d :: (Int, Int)
d@(Int
rs,Int
cs) = Matrix -> (Int, Int)
dimension Matrix
a
(Matrix
q,Matrix
r) = Matrix -> (Matrix, Matrix)
qr Matrix
a
solve :: Matrix
-> Vector
-> Vector
solve :: Matrix -> Vector -> Vector
solve Matrix
r Vector
b
| Int
n forall a. Eq a => a -> a -> Bool
/= Int
l = forall a. HasCallStack => [Char] -> a
error forall a b. (a -> b) -> a -> b
$ [Char]
"row/vector mismatch " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show (Int
n,Int
l)
| Bool
otherwise = forall a. Unbox a => (forall s. ST s (MVector s a)) -> Vector a
U.create forall a b. (a -> b) -> a -> b
$ do
MVector s Double
s <- forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
Vector a -> m (MVector (PrimState m) a)
U.thaw Vector
b
forall (m :: * -> *).
Monad m =>
Int -> Int -> (Int -> m ()) -> m ()
rfor Int
n Int
0 forall a b. (a -> b) -> a -> b
$ \Int
i -> do
Double
si <- (forall a. Fractional a => a -> a -> a
/ Matrix -> Int -> Int -> Double
unsafeIndex Matrix
r Int
i Int
i) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
M.unsafeRead MVector s Double
s Int
i
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
M.unsafeWrite MVector s Double
s Int
i Double
si
forall (m :: * -> *).
Monad m =>
Int -> Int -> (Int -> m ()) -> m ()
F.for Int
0 Int
i forall a b. (a -> b) -> a -> b
$ \Int
j -> forall s. MVector s Double -> Int -> (Double -> Double) -> ST s ()
F.unsafeModify MVector s Double
s Int
j forall a b. (a -> b) -> a -> b
$ forall a. Num a => a -> a -> a
subtract (Matrix -> Int -> Int -> Double
unsafeIndex Matrix
r Int
j Int
i forall a. Num a => a -> a -> a
* Double
si)
forall (m :: * -> *) a. Monad m => a -> m a
return MVector s Double
s
where n :: Int
n = Matrix -> Int
rows Matrix
r
l :: Int
l = forall a. Unbox a => Vector a -> Int
U.length Vector
b
rSquare :: Matrix
-> Vector
-> Vector
-> Double
rSquare :: Matrix -> Vector -> Vector -> Double
rSquare Matrix
pred Vector
resp Vector
coeff = Double
1 forall a. Num a => a -> a -> a
- Double
r forall a. Fractional a => a -> a -> a
/ Double
t
where
r :: Double
r = forall (v :: * -> *). Vector v Double => v Double -> Double
sum forall a b. (a -> b) -> a -> b
$ forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a b.
(Unbox a, Unbox b) =>
(Int -> a -> b) -> Vector a -> Vector b
U.imap Vector
resp forall a b. (a -> b) -> a -> b
$ \Int
i Double
x -> Double -> Double
square (Double
x forall a. Num a => a -> a -> a
- Int -> Double
p Int
i)
t :: Double
t = forall (v :: * -> *). Vector v Double => v Double -> Double
sum forall a b. (a -> b) -> a -> b
$ forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
U.map Vector
resp forall a b. (a -> b) -> a -> b
$ \Double
x -> Double -> Double
square (Double
x forall a. Num a => a -> a -> a
- forall (v :: * -> *). Vector v Double => v Double -> Double
mean Vector
resp)
p :: Int -> Double
p Int
i = forall (v :: * -> *). Vector v Double => v Double -> Double
sum forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a b.
(Unbox a, Unbox b) =>
(Int -> a -> b) -> Vector a -> Vector b
U.imap Vector
coeff forall a b. (a -> b) -> a -> b
$ \Int
j -> (forall a. Num a => a -> a -> a
* Matrix -> Int -> Int -> Double
unsafeIndex Matrix
pred Int
i Int
j)
bootstrapRegress
:: GenIO
-> Int
-> CL Double
-> ([Vector] -> Vector -> (Vector, Double))
-> [Vector]
-> Vector
-> IO (V.Vector (Estimate ConfInt Double), Estimate ConfInt Double)
bootstrapRegress :: GenIO
-> Int
-> CL Double
-> ([Vector] -> Vector -> (Vector, Double))
-> [Vector]
-> Vector
-> IO (Vector (Estimate ConfInt Double), Estimate ConfInt Double)
bootstrapRegress GenIO
gen0 Int
numResamples CL Double
cl [Vector] -> Vector -> (Vector, Double)
rgrss [Vector]
preds0 Vector
resp0
| Int
numResamples forall a. Ord a => a -> a -> Bool
< Int
1 = forall a. HasCallStack => [Char] -> a
error forall a b. (a -> b) -> a -> b
$ [Char]
"bootstrapRegress: number of resamples " forall a. [a] -> [a] -> [a]
++
[Char]
"must be positive"
| Bool
otherwise = do
case forall a. Eq a => [a] -> [a]
nub (forall a b. (a -> b) -> [a] -> [b]
map forall a. Unbox a => Vector a -> Int
U.length [Vector]
preds0) of
[] -> forall a. HasCallStack => [Char] -> a
error [Char]
"bootstrapRegress: predictor vectors must not be empty"
[Int
plen] -> do
let rlen :: Int
rlen = forall a. Unbox a => Vector a -> Int
U.length Vector
resp0
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
plen forall a. Eq a => a -> a -> Bool
/= Int
rlen) forall a b. (a -> b) -> a -> b
$
forall a. HasCallStack => [Char] -> a
error forall a b. (a -> b) -> a -> b
$ [Char]
"bootstrapRegress: responder vector length ["
forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Int
rlen
forall a. [a] -> [a] -> [a]
++ [Char]
"] must be the same as predictor vectors' length ["
forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Int
plen forall a. [a] -> [a] -> [a]
++ [Char]
"]"
[Int]
xs -> forall a. HasCallStack => [Char] -> a
error forall a b. (a -> b) -> a -> b
$ [Char]
"bootstrapRegress: all predictor vectors must be of the same \
\length, lengths provided are: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show [Int]
xs
Int
caps <- IO Int
getNumCapabilities
[Gen RealWorld]
gens <- Int -> GenIO -> IO [GenIO]
splitGen Int
caps GenIO
gen0
[Vector (Vector, Double)]
vs <- forall (t :: * -> *) a b.
Traversable t =>
t a -> (a -> IO b) -> IO (t b)
forConcurrently (forall a b. [a] -> [b] -> [(a, b)]
zip [Gen RealWorld]
gens (Int -> Int -> [Int]
balance Int
caps Int
numResamples)) forall a b. (a -> b) -> a -> b
$ \(Gen RealWorld
gen,Int
count) -> do
Vector (Vector, Double)
v <- forall (m :: * -> *) a. Monad m => Int -> m a -> m (Vector a)
V.replicateM Int
count forall a b. (a -> b) -> a -> b
$ do
let n :: Int
n = forall a. Unbox a => Vector a -> Int
U.length Vector
resp0
Vector Int
ixs <- forall (m :: * -> *) a.
(Monad m, Unbox a) =>
Int -> m a -> m (Vector a)
U.replicateM Int
n forall a b. (a -> b) -> a -> b
$ forall a (m :: * -> *).
(Variate a, PrimMonad m) =>
(a, a) -> Gen (PrimState m) -> m a
uniformR (Int
0,Int
nforall a. Num a => a -> a -> a
-Int
1) Gen RealWorld
gen
let resp :: Vector
resp = forall a. Unbox a => Vector a -> Vector Int -> Vector a
U.backpermute Vector
resp0 Vector Int
ixs
preds :: [Vector]
preds = forall a b. (a -> b) -> [a] -> [b]
map (forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a. Unbox a => Vector a -> Vector Int -> Vector a
U.backpermute Vector Int
ixs) [Vector]
preds0
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [Vector] -> Vector -> (Vector, Double)
rgrss [Vector]
preds Vector
resp
forall a. NFData a => a -> ()
rnf Vector (Vector, Double)
v seq :: forall a b. a -> b -> b
`seq` forall (m :: * -> *) a. Monad m => a -> m a
return Vector (Vector, Double)
v
let (Vector Vector
coeffsv, Vector Double
r2v) = forall (v :: * -> *) a b.
(Vector v a, Vector v b, Vector v (a, b)) =>
v (a, b) -> (v a, v b)
G.unzip (forall a. [Vector a] -> Vector a
V.concat [Vector (Vector, Double)]
vs)
let coeffs :: Vector (Estimate ConfInt Double)
coeffs = forall a b c. (a -> b -> c) -> b -> a -> c
flip forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(Int -> a -> b) -> v a -> v b
G.imap (forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
G.convert Vector
coeffss) forall a b. (a -> b) -> a -> b
$ \Int
i Double
x ->
Double -> Vector -> Estimate ConfInt Double
est Double
x forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Unbox a => Int -> (Int -> a) -> Vector a
U.generate Int
numResamples forall a b. (a -> b) -> a -> b
$ \Int
k -> (Vector Vector
coeffsv forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
G.! Int
k) forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
G.! Int
i
r2 :: Estimate ConfInt Double
r2 = Double -> Vector -> Estimate ConfInt Double
est Double
r2s (forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
G.convert Vector Double
r2v)
(Vector
coeffss, Double
r2s) = [Vector] -> Vector -> (Vector, Double)
rgrss [Vector]
preds0 Vector
resp0
est :: Double -> Vector -> Estimate ConfInt Double
est Double
s Vector
v = forall a. Num a => a -> (a, a) -> CL Double -> Estimate ConfInt a
estimateFromInterval Double
s (Vector
w forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
G.! Int
lo, Vector
w forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
G.! Int
hi) CL Double
cl
where w :: Vector
w = Vector -> Vector
F.sort Vector
v
bounded :: Int -> Int
bounded Int
i = forall a. Ord a => a -> a -> a
min (forall a. Unbox a => Vector a -> Int
U.length Vector
w forall a. Num a => a -> a -> a
- Int
1) (forall a. Ord a => a -> a -> a
max Int
0 Int
i)
lo :: Int
lo = Int -> Int
bounded forall a b. (a -> b) -> a -> b
$ forall a b. (RealFrac a, Integral b) => a -> b
round Double
c
hi :: Int
hi = Int -> Int
bounded forall a b. (a -> b) -> a -> b
$ forall a b. (RealFrac a, Integral b) => a -> b
truncate (Double
n forall a. Num a => a -> a -> a
- Double
c)
n :: Double
n = forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
numResamples
c :: Double
c = Double
n forall a. Num a => a -> a -> a
* (forall a. CL a -> a
significanceLevel CL Double
cl forall a. Fractional a => a -> a -> a
/ Double
2)
forall (m :: * -> *) a. Monad m => a -> m a
return (Vector (Estimate ConfInt Double)
coeffs, Estimate ConfInt Double
r2)
balance :: Int -> Int -> [Int]
balance :: Int -> Int -> [Int]
balance Int
numSlices Int
numItems = forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith forall a. Num a => a -> a -> a
(+) (forall a. Int -> a -> [a]
replicate Int
numSlices Int
q)
(forall a. Int -> a -> [a]
replicate Int
r Int
1 forall a. [a] -> [a] -> [a]
++ forall a. a -> [a]
repeat Int
0)
where (Int
q,Int
r) = Int
numItems forall a. Integral a => a -> a -> (a, a)
`quotRem` Int
numSlices