{-# LANGUAGE BangPatterns, FlexibleContexts, UnboxedTuples #-}
-- |
-- Module    : Statistics.Sample.KernelDensity
-- Copyright : (c) 2011 Bryan O'Sullivan
-- License   : BSD3
--
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : portable
--
-- Kernel density estimation.  This module provides a fast, robust,
-- non-parametric way to estimate the probability density function of
-- a sample.
--
-- This estimator does not use the commonly employed \"Gaussian rule
-- of thumb\".  As a result, it outperforms many plug-in methods on
-- multimodal samples with widely separated modes.

module Statistics.Sample.KernelDensity
    (
    -- * Estimation functions
      kde
    , kde_
    -- * References
    -- $references
    ) where

import Data.Default.Class
import Numeric.MathFunctions.Constants (m_sqrt_2_pi)
import Numeric.RootFinding             (fromRoot, ridders, RiddersParam(..), Tolerance(..))
import Prelude hiding (const, min, max, sum)
import Statistics.Function (minMax, nextHighestPowerOfTwo)
import Statistics.Sample.Histogram (histogram_)
import Statistics.Sample.Internal (sum)
import Statistics.Transform (CD, dct, idct)
import qualified Data.Vector.Generic  as G
import qualified Data.Vector.Unboxed  as U
import qualified Data.Vector          as V


-- | Gaussian kernel density estimator for one-dimensional data, using
-- the method of Botev et al.
--
-- The result is a pair of vectors, containing:
--
-- * The coordinates of each mesh point.  The mesh interval is chosen
--   to be 20% larger than the range of the sample.  (To specify the
--   mesh interval, use 'kde_'.)
--
-- * Density estimates at each mesh point.
kde :: (G.Vector v CD, G.Vector v Double, G.Vector v Int)
    => Int
    -- ^ The number of mesh points to use in the uniform discretization
    -- of the interval @(min,max)@.  If this value is not a power of
    -- two, then it is rounded up to the next power of two.
    -> v Double -> (v Double, v Double)
kde :: forall (v :: * -> *).
(Vector v CD, Vector v Double, Vector v Int) =>
Int -> v Double -> (v Double, v Double)
kde Int
n0 v Double
xs = forall (v :: * -> *).
(Vector v CD, Vector v Double, Vector v Int) =>
Int -> Double -> Double -> v Double -> (v Double, v Double)
kde_ Int
n0 (Double
lo forall a. Num a => a -> a -> a
- Double
range forall a. Fractional a => a -> a -> a
/ Double
10) (Double
hi forall a. Num a => a -> a -> a
+ Double
range forall a. Fractional a => a -> a -> a
/ Double
10) v Double
xs
  where
    (Double
lo,Double
hi) = forall (v :: * -> *).
Vector v Double =>
v Double -> (Double, Double)
minMax v Double
xs
    range :: Double
range   | forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v Double
xs forall a. Ord a => a -> a -> Bool
<= Int
1 = Double
1       -- Unreasonable guess
            | Double
lo forall a. Eq a => a -> a -> Bool
== Double
hi         = Double
1       -- All elements are equal
            | Bool
otherwise        = Double
hi forall a. Num a => a -> a -> a
- Double
lo
{-# INLINABLE  kde #-}
{-# SPECIAlIZE kde :: Int -> U.Vector Double -> (U.Vector Double, U.Vector Double) #-}
{-# SPECIAlIZE kde :: Int -> V.Vector Double -> (V.Vector Double, V.Vector Double) #-}


-- | Gaussian kernel density estimator for one-dimensional data, using
-- the method of Botev et al.
--
-- The result is a pair of vectors, containing:
--
-- * The coordinates of each mesh point.
--
-- * Density estimates at each mesh point.
kde_ :: (G.Vector v CD, G.Vector v Double, G.Vector v Int)
     => Int
     -- ^ The number of mesh points to use in the uniform discretization
     -- of the interval @(min,max)@.  If this value is not a power of
     -- two, then it is rounded up to the next power of two.
     -> Double
     -- ^ Lower bound (@min@) of the mesh range.
     -> Double
     -- ^ Upper bound (@max@) of the mesh range.
     -> v Double
     -> (v Double, v Double)
kde_ :: forall (v :: * -> *).
(Vector v CD, Vector v Double, Vector v Int) =>
Int -> Double -> Double -> v Double -> (v Double, v Double)
kde_ Int
n0 Double
min Double
max v Double
xs
  | forall (v :: * -> *) a. Vector v a => v a -> Bool
G.null v Double
xs = forall a. HasCallStack => [Char] -> a
error [Char]
"Statistics.KernelDensity.kde: empty sample"
  | Int
n0 forall a. Ord a => a -> a -> Bool
<= Int
1   = forall a. HasCallStack => [Char] -> a
error [Char]
"Statistics.KernelDensity.kde: invalid number of points"
  | Bool
otherwise = (v Double
mesh, v Double
density)
  where
    mesh :: v Double
mesh = forall (v :: * -> *) a. Vector v a => Int -> (Int -> a) -> v a
G.generate Int
ni forall a b. (a -> b) -> a -> b
$ \Int
z -> Double
min forall a. Num a => a -> a -> a
+ (Double
d forall a. Num a => a -> a -> a
* forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
z)
        where d :: Double
d = Double
r forall a. Fractional a => a -> a -> a
/ (Double
nforall a. Num a => a -> a -> a
-Double
1)
    density :: v Double
density = forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
G.map (forall a. Fractional a => a -> a -> a
/(Double
2 forall a. Num a => a -> a -> a
* Double
r)) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (v :: * -> *).
(Vector v CD, Vector v Double) =>
v Double -> v Double
idct forall a b. (a -> b) -> a -> b
$ forall (v :: * -> *) a b c.
(Vector v a, Vector v b, Vector v c) =>
(a -> b -> c) -> v a -> v b -> v c
G.zipWith Double -> Double -> Double
f v Double
a (forall (v :: * -> *) a. (Vector v a, Enum a) => a -> a -> v a
G.enumFromTo Double
0 (Double
nforall a. Num a => a -> a -> a
-Double
1))
      where f :: Double -> Double -> Double
f Double
b Double
z = Double
b forall a. Num a => a -> a -> a
* forall a. Floating a => a -> a
exp (forall {a}. Num a => a -> a
sqr Double
z forall a. Num a => a -> a -> a
* forall {a}. Num a => a -> a
sqr forall a. Floating a => a
pi forall a. Num a => a -> a -> a
* Double
t_star forall a. Num a => a -> a -> a
* (-Double
0.5))
    !n :: Double
n  = forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
ni
    !ni :: Int
ni = Int -> Int
nextHighestPowerOfTwo Int
n0
    !r :: Double
r  = Double
max forall a. Num a => a -> a -> a
- Double
min
    a :: v Double
a   = forall (v :: * -> *).
(Vector v CD, Vector v Double, Vector v Int) =>
v Double -> v Double
dct forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
G.map (forall a. Fractional a => a -> a -> a
/ forall (v :: * -> *). Vector v Double => v Double -> Double
sum v Double
h) forall a b. (a -> b) -> a -> b
$ v Double
h
        where h :: v Double
h = forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
G.map (forall a. Fractional a => a -> a -> a
/ Double
len) forall a b. (a -> b) -> a -> b
$ forall b a (v0 :: * -> *) (v1 :: * -> *).
(Num b, RealFrac a, Vector v0 a, Vector v1 b) =>
Int -> a -> a -> v0 a -> v1 b
histogram_ Int
ni Double
min Double
max v Double
xs
    !len :: Double
len    = forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v Double
xs)
    !t_star :: Double
t_star = forall a. a -> Root a -> a
fromRoot (Double
0.28 forall a. Num a => a -> a -> a
* Double
len forall a. Floating a => a -> a -> a
** (-Double
0.4)) forall b c a. (b -> c) -> (a -> b) -> a -> c
. RiddersParam
-> (Double, Double) -> (Double -> Double) -> Root Double
ridders forall a. Default a => a
def{ riddersTol :: Tolerance
riddersTol = Double -> Tolerance
AbsTol Double
1e-14 } (Double
0,Double
0.1)
            forall a b. (a -> b) -> a -> b
$ \Double
x -> Double
x forall a. Num a => a -> a -> a
- (Double
len forall a. Num a => a -> a -> a
* (Double
2 forall a. Num a => a -> a -> a
* forall a. Floating a => a -> a
sqrt forall a. Floating a => a
pi) forall a. Num a => a -> a -> a
* Double -> Double -> Double
go Double
6 (Double -> Double -> Double
f Double
7 Double
x)) forall a. Floating a => a -> a -> a
** (-Double
0.4)
      where
        f :: Double -> Double -> Double
f Double
q Double
t = Double
2 forall a. Num a => a -> a -> a
* forall a. Floating a => a
pi forall a. Floating a => a -> a -> a
** (Double
qforall a. Num a => a -> a -> a
*Double
2) forall a. Num a => a -> a -> a
* forall (v :: * -> *). Vector v Double => v Double -> Double
sum (forall (v :: * -> *) a b c.
(Vector v a, Vector v b, Vector v c) =>
(a -> b -> c) -> v a -> v b -> v c
G.zipWith Double -> Double -> Double
g v Double
iv v Double
a2v)
          where g :: Double -> Double -> Double
g Double
i Double
a2 = Double
i forall a. Floating a => a -> a -> a
** Double
q forall a. Num a => a -> a -> a
* Double
a2 forall a. Num a => a -> a -> a
* forall a. Floating a => a -> a
exp ((-Double
i) forall a. Num a => a -> a -> a
* forall {a}. Num a => a -> a
sqr forall a. Floating a => a
pi forall a. Num a => a -> a -> a
* Double
t)
                a2v :: v Double
a2v = forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
G.map (forall {a}. Num a => a -> a
sqr forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. Num a => a -> a -> a
*Double
0.5)) forall a b. (a -> b) -> a -> b
$ forall (v :: * -> *) a. Vector v a => v a -> v a
G.tail v Double
a
                iv :: v Double
iv = forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
G.map forall {a}. Num a => a -> a
sqr forall a b. (a -> b) -> a -> b
$ forall (v :: * -> *) a. (Vector v a, Enum a) => a -> a -> v a
G.enumFromTo Double
1 (Double
nforall a. Num a => a -> a -> a
-Double
1)
        go :: Double -> Double -> Double
go Double
s !Double
h | Double
s forall a. Eq a => a -> a -> Bool
== Double
1    = Double
h
                | Bool
otherwise = Double -> Double -> Double
go (Double
sforall a. Num a => a -> a -> a
-Double
1) (Double -> Double -> Double
f Double
s Double
time)
          where time :: Double
time  = (Double
2 forall a. Num a => a -> a -> a
* Double
const forall a. Num a => a -> a -> a
* Double
k0 forall a. Fractional a => a -> a -> a
/ Double
len forall a. Fractional a => a -> a -> a
/ Double
h) forall a. Floating a => a -> a -> a
** (Double
2 forall a. Fractional a => a -> a -> a
/ (Double
3 forall a. Num a => a -> a -> a
+ Double
2 forall a. Num a => a -> a -> a
* Double
s))
                const :: Double
const = (Double
1 forall a. Num a => a -> a -> a
+ Double
0.5 forall a. Floating a => a -> a -> a
** (Double
sforall a. Num a => a -> a -> a
+Double
0.5)) forall a. Fractional a => a -> a -> a
/ Double
3
                k0 :: Double
k0    = forall a. (Unbox a, Num a) => Vector a -> a
U.product (forall (v :: * -> *) a. (Vector v a, Enum a) => a -> a -> a -> v a
G.enumFromThenTo Double
1 Double
3 (Double
2forall a. Num a => a -> a -> a
*Double
sforall a. Num a => a -> a -> a
-Double
1)) forall a. Fractional a => a -> a -> a
/ Double
m_sqrt_2_pi
    sqr :: a -> a
sqr a
x = a
x forall a. Num a => a -> a -> a
* a
x
{-# INLINABLE  kde_ #-}
{-# SPECIAlIZE kde_ :: Int -> Double -> Double -> U.Vector Double -> (U.Vector Double, U.Vector Double) #-}
{-# SPECIAlIZE kde_ :: Int -> Double -> Double -> V.Vector Double -> (V.Vector Double, V.Vector Double) #-}


-- $references
--
-- Botev. Z.I., Grotowski J.F., Kroese D.P. (2010). Kernel density
-- estimation via diffusion. /Annals of Statistics/
-- 38(5):2916&#8211;2957. <http://arxiv.org/pdf/1011.2602>