{-# LANGUAGE BangPatterns, DeriveDataTypeable, DeriveGeneric,
    FlexibleContexts #-}
-- |
-- Module    : Statistics.Sample.Powers
-- Copyright : (c) 2009, 2010 Bryan O'Sullivan
-- License   : BSD3
--
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : portable
--
-- Very fast statistics over simple powers of a sample.  These can all
-- be computed efficiently in just a single pass over a sample, with
-- that pass subject to stream fusion.
--
-- The tradeoff is that some of these functions are less numerically
-- robust than their counterparts in the 'Statistics.Sample' module.
-- Where this is the case, the alternatives are noted.

module Statistics.Sample.Powers
    (
    -- * Types
      Powers

    -- * Constructor
    , powers

    -- * Descriptive functions
    , order
    , count
    , sum

    -- * Statistics of location
    , mean

    -- * Statistics of dispersion
    , variance
    , stdDev
    , varianceUnbiased

    -- * Functions over central moments
    , centralMoment
    , skewness
    , kurtosis

    -- * References
    -- $references
    ) where

import Control.Monad.ST
import Data.Aeson            (FromJSON, ToJSON)
import Data.Binary           (Binary(..))
import Data.Data             (Data, Typeable)
import Data.Vector.Binary    ()
import Data.Vector.Unboxed   ((!))
import GHC.Generics          (Generic)
import Numeric.SpecFunctions (choose)
import Prelude hiding (sum)
import Statistics.Function   (indexed)
import qualified Data.Vector          as V
import qualified Data.Vector.Generic  as G
import qualified Data.Vector.Storable as SV
import qualified Data.Vector.Unboxed  as U
import qualified Data.Vector.Unboxed.Mutable as MU
import qualified Statistics.Sample.Internal  as S

newtype Powers = Powers (U.Vector Double)
    deriving (Powers -> Powers -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Powers -> Powers -> Bool
$c/= :: Powers -> Powers -> Bool
== :: Powers -> Powers -> Bool
$c== :: Powers -> Powers -> Bool
Eq, ReadPrec [Powers]
ReadPrec Powers
Int -> ReadS Powers
ReadS [Powers]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [Powers]
$creadListPrec :: ReadPrec [Powers]
readPrec :: ReadPrec Powers
$creadPrec :: ReadPrec Powers
readList :: ReadS [Powers]
$creadList :: ReadS [Powers]
readsPrec :: Int -> ReadS Powers
$creadsPrec :: Int -> ReadS Powers
Read, Int -> Powers -> ShowS
[Powers] -> ShowS
Powers -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Powers] -> ShowS
$cshowList :: [Powers] -> ShowS
show :: Powers -> String
$cshow :: Powers -> String
showsPrec :: Int -> Powers -> ShowS
$cshowsPrec :: Int -> Powers -> ShowS
Show, Typeable, Typeable Powers
Powers -> DataType
Powers -> Constr
(forall b. Data b => b -> b) -> Powers -> Powers
forall a.
Typeable a
-> (forall (c :: * -> *).
    (forall d b. Data d => c (d -> b) -> d -> c b)
    -> (forall g. g -> c g) -> a -> c a)
-> (forall (c :: * -> *).
    (forall b r. Data b => c (b -> r) -> c r)
    -> (forall r. r -> c r) -> Constr -> c a)
-> (a -> Constr)
-> (a -> DataType)
-> (forall (t :: * -> *) (c :: * -> *).
    Typeable t =>
    (forall d. Data d => c (t d)) -> Maybe (c a))
-> (forall (t :: * -> * -> *) (c :: * -> *).
    Typeable t =>
    (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c a))
-> ((forall b. Data b => b -> b) -> a -> a)
-> (forall r r'.
    (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> a -> r)
-> (forall r r'.
    (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> a -> r)
-> (forall u. (forall d. Data d => d -> u) -> a -> [u])
-> (forall u. Int -> (forall d. Data d => d -> u) -> a -> u)
-> (forall (m :: * -> *).
    Monad m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> Data a
forall u. Int -> (forall d. Data d => d -> u) -> Powers -> u
forall u. (forall d. Data d => d -> u) -> Powers -> [u]
forall r r'.
(r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> Powers -> r
forall r r'.
(r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> Powers -> r
forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d) -> Powers -> m Powers
forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> Powers -> m Powers
forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c Powers
forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> Powers -> c Powers
forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c Powers)
forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c Powers)
gmapMo :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> Powers -> m Powers
$cgmapMo :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> Powers -> m Powers
gmapMp :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> Powers -> m Powers
$cgmapMp :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> Powers -> m Powers
gmapM :: forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d) -> Powers -> m Powers
$cgmapM :: forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d) -> Powers -> m Powers
gmapQi :: forall u. Int -> (forall d. Data d => d -> u) -> Powers -> u
$cgmapQi :: forall u. Int -> (forall d. Data d => d -> u) -> Powers -> u
gmapQ :: forall u. (forall d. Data d => d -> u) -> Powers -> [u]
$cgmapQ :: forall u. (forall d. Data d => d -> u) -> Powers -> [u]
gmapQr :: forall r r'.
(r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> Powers -> r
$cgmapQr :: forall r r'.
(r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> Powers -> r
gmapQl :: forall r r'.
(r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> Powers -> r
$cgmapQl :: forall r r'.
(r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> Powers -> r
gmapT :: (forall b. Data b => b -> b) -> Powers -> Powers
$cgmapT :: (forall b. Data b => b -> b) -> Powers -> Powers
dataCast2 :: forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c Powers)
$cdataCast2 :: forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c Powers)
dataCast1 :: forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c Powers)
$cdataCast1 :: forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c Powers)
dataTypeOf :: Powers -> DataType
$cdataTypeOf :: Powers -> DataType
toConstr :: Powers -> Constr
$ctoConstr :: Powers -> Constr
gunfold :: forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c Powers
$cgunfold :: forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c Powers
gfoldl :: forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> Powers -> c Powers
$cgfoldl :: forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> Powers -> c Powers
Data, forall x. Rep Powers x -> Powers
forall x. Powers -> Rep Powers x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Powers x -> Powers
$cfrom :: forall x. Powers -> Rep Powers x
Generic)

instance FromJSON Powers
instance ToJSON Powers

instance Binary Powers where
    put :: Powers -> Put
put (Powers Vector Double
v) = forall t. Binary t => t -> Put
put Vector Double
v
    get :: Get Powers
get = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Vector Double -> Powers
Powers forall t. Binary t => Get t
get

-- | O(/n/) Collect the /n/ simple powers of a sample.
--
-- Functions computed over a sample's simple powers require at least a
-- certain number (or /order/) of powers to be collected.
--
-- * To compute the /k/th 'centralMoment', at least /k/ simple powers
--   must be collected.
--
-- * For the 'variance', at least 2 simple powers are needed.
--
-- * For 'skewness', we need at least 3 simple powers.
--
-- * For 'kurtosis', at least 4 simple powers are required.
--
-- This function is subject to stream fusion.
powers :: G.Vector v Double =>
          Int                   -- ^ /n/, the number of powers, where /n/ >= 2.
       -> v Double
       -> Powers
powers :: forall (v :: * -> *). Vector v Double => Int -> v Double -> Powers
powers Int
k v Double
sample
  | Int
k forall a. Ord a => a -> a -> Bool
< Int
2     = forall a. HasCallStack => String -> a
error String
"Statistics.Sample.powers: too few powers"
  | Bool
otherwise = forall a. (forall s. ST s a) -> a
runST forall a b. (a -> b) -> a -> b
$ do
      MVector (PrimState (ST s)) Double
acc <- forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> a -> m (MVector (PrimState m) a)
MU.replicate Int
l Double
0
      forall (m :: * -> *) (v :: * -> *) a b.
(Monad m, Vector v a) =>
v a -> (a -> m b) -> m ()
G.forM_ v Double
sample forall a b. (a -> b) -> a -> b
$ \Double
x ->
        let loop :: Int -> Double -> ST s ()
loop !Int
i !Double
xk
              | Int
i forall a. Eq a => a -> a -> Bool
== Int
l    = forall (m :: * -> *) a. Monad m => a -> m a
return ()
              | Bool
otherwise = do forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
MU.write MVector (PrimState (ST s)) Double
acc Int
i forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. Num a => a -> a -> a
+ Double
xk) forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
MU.read MVector (PrimState (ST s)) Double
acc Int
i
                               Int -> Double -> ST s ()
loop (Int
iforall a. Num a => a -> a -> a
+Int
1) (Double
xk forall a. Num a => a -> a -> a
* Double
x)
        in Int -> Double -> ST s ()
loop Int
0 Double
1
      forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Vector Double -> Powers
Powers forall a b. (a -> b) -> a -> b
$ forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
U.unsafeFreeze MVector (PrimState (ST s)) Double
acc
  where
    l :: Int
l = Int
k forall a. Num a => a -> a -> a
+ Int
1
{-# SPECIALIZE powers :: Int -> U.Vector  Double -> Powers #-}
{-# SPECIALIZE powers :: Int -> V.Vector  Double -> Powers #-}
{-# SPECIALIZE powers :: Int -> SV.Vector Double -> Powers #-}

-- | The order (number) of simple powers collected from a 'sample'.
order :: Powers -> Int
order :: Powers -> Int
order (Powers Vector Double
pa) = forall a. Unbox a => Vector a -> Int
U.length Vector Double
pa forall a. Num a => a -> a -> a
- Int
1

-- | Compute the /k/th central moment of a sample.  The central
-- moment is also known as the moment about the mean.
centralMoment :: Int -> Powers -> Double
centralMoment :: Int -> Powers -> Double
centralMoment Int
k p :: Powers
p@(Powers Vector Double
pa)
    | Int
k forall a. Ord a => a -> a -> Bool
< Int
0 Bool -> Bool -> Bool
|| Int
k forall a. Ord a => a -> a -> Bool
> Powers -> Int
order Powers
p =
                  forall a. HasCallStack => String -> a
error (String
"Statistics.Sample.Powers.centralMoment: "
                         forall a. [a] -> [a] -> [a]
++ String
"invalid argument")
    | Int
k forall a. Eq a => a -> a -> Bool
== Int
0    = Double
1
    | Bool
otherwise = (forall a. Fractional a => a -> a -> a
/Double
n) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (v :: * -> *). Vector v Double => v Double -> Double
S.sum forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
U.map (Int, Double) -> Double
go forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (v :: * -> *) e.
(Vector v e, Vector v Int, Vector v (Int, e)) =>
v e -> v (Int, e)
indexed forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Unbox a => Int -> Vector a -> Vector a
U.take (Int
kforall a. Num a => a -> a -> a
+Int
1) forall a b. (a -> b) -> a -> b
$ Vector Double
pa
  where
    go :: (Int, Double) -> Double
go (Int
i , Double
e) = (Int
k Int -> Int -> Double
`choose` Int
i) forall a. Num a => a -> a -> a
* ((-Double
m) forall a b. (Num a, Integral b) => a -> b -> a
^ (Int
kforall a. Num a => a -> a -> a
-Int
i)) forall a. Num a => a -> a -> a
* Double
e
    n :: Double
n = forall a. Unbox a => Vector a -> a
U.head Vector Double
pa
    m :: Double
m = Powers -> Double
mean Powers
p

-- | Maximum likelihood estimate of a sample's variance.  Also known
-- as the population variance, where the denominator is /n/.  This is
-- the second central moment of the sample.
--
-- This is less numerically robust than the variance function in the
-- 'Statistics.Sample' module, but the number is essentially free to
-- compute if you have already collected a sample's simple powers.
--
-- Requires 'Powers' with 'order' at least 2.
variance :: Powers -> Double
variance :: Powers -> Double
variance = Int -> Powers -> Double
centralMoment Int
2

-- | Standard deviation.  This is simply the square root of the
-- maximum likelihood estimate of the variance.
stdDev :: Powers -> Double
stdDev :: Powers -> Double
stdDev = forall a. Floating a => a -> a
sqrt forall b c a. (b -> c) -> (a -> b) -> a -> c
. Powers -> Double
variance

-- | Unbiased estimate of a sample's variance.  Also known as the
-- sample variance, where the denominator is /n/-1.
--
-- Requires 'Powers' with 'order' at least 2.
varianceUnbiased :: Powers -> Double
varianceUnbiased :: Powers -> Double
varianceUnbiased p :: Powers
p@(Powers Vector Double
pa)
    | Double
n forall a. Ord a => a -> a -> Bool
> Double
1     = Powers -> Double
variance Powers
p forall a. Num a => a -> a -> a
* Double
n forall a. Fractional a => a -> a -> a
/ (Double
nforall a. Num a => a -> a -> a
-Double
1)
    | Bool
otherwise = Double
0
  where n :: Double
n = forall a. Unbox a => Vector a -> a
U.head Vector Double
pa

-- | Compute the skewness of a sample. This is a measure of the
-- asymmetry of its distribution.
--
-- A sample with negative skew is said to be /left-skewed/.  Most of
-- its mass is on the right of the distribution, with the tail on the
-- left.
--
-- > skewness . powers 3 $ U.to [1,100,101,102,103]
-- > ==> -1.497681449918257
--
-- A sample with positive skew is said to be /right-skewed/.
--
-- > skewness . powers 3 $ U.to [1,2,3,4,100]
-- > ==> 1.4975367033335198
--
-- A sample's skewness is not defined if its 'variance' is zero.
--
-- Requires 'Powers' with 'order' at least 3.
skewness :: Powers -> Double
skewness :: Powers -> Double
skewness Powers
p = Int -> Powers -> Double
centralMoment Int
3 Powers
p forall a. Num a => a -> a -> a
* Powers -> Double
variance Powers
p forall a. Floating a => a -> a -> a
** (-Double
1.5)

-- | Compute the excess kurtosis of a sample.  This is a measure of
-- the \"peakedness\" of its distribution.  A high kurtosis indicates
-- that the sample's variance is due more to infrequent severe
-- deviations than to frequent modest deviations.
--
-- A sample's excess kurtosis is not defined if its 'variance' is
-- zero.
--
-- Requires 'Powers' with 'order' at least 4.
kurtosis :: Powers -> Double
kurtosis :: Powers -> Double
kurtosis Powers
p = Int -> Powers -> Double
centralMoment Int
4 Powers
p forall a. Fractional a => a -> a -> a
/ (Double
v forall a. Num a => a -> a -> a
* Double
v) forall a. Num a => a -> a -> a
- Double
3
    where v :: Double
v = Powers -> Double
variance Powers
p

-- | The number of elements in the original 'Sample'.  This is the
-- sample's zeroth simple power.
count :: Powers -> Int
count :: Powers -> Int
count (Powers Vector Double
pa) = forall a b. (RealFrac a, Integral b) => a -> b
floor forall a b. (a -> b) -> a -> b
$ forall a. Unbox a => Vector a -> a
U.head Vector Double
pa

-- | The sum of elements in the original 'Sample'.  This is the
-- sample's first simple power.
sum :: Powers -> Double
sum :: Powers -> Double
sum (Powers Vector Double
pa) = Vector Double
pa forall a. Unbox a => Vector a -> Int -> a
! Int
1

-- | The arithmetic mean of elements in the original 'Sample'.
--
-- This is less numerically robust than the mean function in the
-- 'Statistics.Sample' module, but the number is essentially free to
-- compute if you have already collected a sample's simple powers.
mean :: Powers -> Double
mean :: Powers -> Double
mean p :: Powers
p@(Powers Vector Double
pa)
    | Double
n forall a. Eq a => a -> a -> Bool
== Double
0    = Double
0
    | Bool
otherwise = Powers -> Double
sum Powers
p forall a. Fractional a => a -> a -> a
/ Double
n
    where n :: Double
n     = forall a. Unbox a => Vector a -> a
U.head Vector Double
pa

-- $references
--
-- * Besset, D.H. (2000) Elements of statistics.
--   /Object-oriented implementation of numerical methods/
--   ch. 9, pp. 311&#8211;331.
--   <http://www.elsevier.com/wps/product/cws_home/677916>
--
-- * Anderson, G. (2009) Compute /k/th central moments in one
--   pass. /quantblog/. <http://quantblog.wordpress.com/2009/02/07/compute-kth-central-moments-in-one-pass/>