{-# LANGUAGE FlexibleContexts, BangPatterns, ScopedTypeVariables #-}

-- |
-- Module    : Statistics.Sample.Histogram
-- Copyright : (c) 2011 Bryan O'Sullivan
-- License   : BSD3
--
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : portable
--
-- Functions for computing histograms of sample data.

module Statistics.Sample.Histogram
    (
      histogram
    -- * Building blocks
    , histogram_
    , range
    ) where

import Control.Monad.ST
import Numeric.MathFunctions.Constants (m_epsilon,m_tiny)
import Statistics.Function (minMax)
import qualified Data.Vector.Generic as G
import qualified Data.Vector.Generic.Mutable as GM

-- | /O(n)/ Compute a histogram over a data set.
--
-- The result consists of a pair of vectors:
--
-- * The lower bound of each interval.
--
-- * The number of samples within the interval.
--
-- Interval (bin) sizes are uniform, and the upper and lower bounds
-- are chosen automatically using the 'range' function.  To specify
-- these parameters directly, use the 'histogram_' function.
histogram :: (G.Vector v0 Double, G.Vector v1 Double, Num b, G.Vector v1 b) =>
             Int                -- ^ Number of bins (must be positive).
          -> v0 Double          -- ^ Sample data (cannot be empty).
          -> (v1 Double, v1 b)
histogram :: forall (v0 :: * -> *) (v1 :: * -> *) b.
(Vector v0 Double, Vector v1 Double, Num b, Vector v1 b) =>
Int -> v0 Double -> (v1 Double, v1 b)
histogram Int
numBins v0 Double
xs = (forall (v :: * -> *) a. Vector v a => Int -> (Int -> a) -> v a
G.generate Int
numBins forall {a}. Integral a => a -> Double
step, forall b a (v0 :: * -> *) (v1 :: * -> *).
(Num b, RealFrac a, Vector v0 a, Vector v1 b) =>
Int -> a -> a -> v0 a -> v1 b
histogram_ Int
numBins Double
lo Double
hi v0 Double
xs)
    where (Double
lo,Double
hi)    = forall (v :: * -> *).
Vector v Double =>
Int -> v Double -> (Double, Double)
range Int
numBins v0 Double
xs
          step :: a -> Double
step a
i     = Double
lo forall a. Num a => a -> a -> a
+ Double
d forall a. Num a => a -> a -> a
* forall a b. (Integral a, Num b) => a -> b
fromIntegral a
i
          d :: Double
d          = (Double
hi forall a. Num a => a -> a -> a
- Double
lo) forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
numBins
{-# INLINE histogram #-}

-- | /O(n)/ Compute a histogram over a data set.
--
-- Interval (bin) sizes are uniform, based on the supplied upper
-- and lower bounds.
histogram_ :: forall b a v0 v1. (Num b, RealFrac a, G.Vector v0 a, G.Vector v1 b) =>
              Int
           -- ^ Number of bins.  This value must be positive.  A zero
           -- or negative value will cause an error.
           -> a
           -- ^ Lower bound on interval range.  Sample data less than
           -- this will cause an error.
           -> a
           -- ^ Upper bound on interval range.  This value must not be
           -- less than the lower bound.  Sample data that falls above
           -- the upper bound will cause an error.
           -> v0 a
           -- ^ Sample data.
           -> v1 b
histogram_ :: forall b a (v0 :: * -> *) (v1 :: * -> *).
(Num b, RealFrac a, Vector v0 a, Vector v1 b) =>
Int -> a -> a -> v0 a -> v1 b
histogram_ Int
numBins a
lo a
hi v0 a
xs0 = forall (v :: * -> *) a.
Vector v a =>
(forall s. ST s (Mutable v s a)) -> v a
G.create (forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
Int -> a -> m (v (PrimState m) a)
GM.replicate Int
numBins b
0 forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall s. v0 a -> Mutable v1 s b -> ST s (Mutable v1 s b)
bin v0 a
xs0)
  where
    bin :: forall s. v0 a -> G.Mutable v1 s b -> ST s (G.Mutable v1 s b)
    bin :: forall s. v0 a -> Mutable v1 s b -> ST s (Mutable v1 s b)
bin v0 a
xs Mutable v1 s b
bins = Int -> ST s (Mutable v1 s b)
go Int
0
     where
       go :: Int -> ST s (Mutable v1 s b)
go Int
i | Int
i forall a. Ord a => a -> a -> Bool
>= Int
len = forall (m :: * -> *) a. Monad m => a -> m a
return Mutable v1 s b
bins
            | Bool
otherwise = do
         let x :: a
x = v0 a
xs forall (v :: * -> *) a. Vector v a => v a -> Int -> a
`G.unsafeIndex` Int
i
             b :: Int
b = forall a b. (RealFrac a, Integral b) => a -> b
truncate forall a b. (a -> b) -> a -> b
$ (a
x forall a. Num a => a -> a -> a
- a
lo) forall a. Fractional a => a -> a -> a
/ a
d
         forall {m :: * -> *} {v :: * -> * -> *} {a}.
(PrimMonad m, MVector v a) =>
v (PrimState m) a -> Int -> a -> m ()
write' Mutable v1 s b
bins Int
b forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. Num a => a -> a -> a
+b
1) forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< forall (m :: * -> *) (v :: * -> * -> *) a.
(HasCallStack, PrimMonad m, MVector v a) =>
v (PrimState m) a -> Int -> m a
GM.read Mutable v1 s b
bins Int
b
         Int -> ST s (Mutable v1 s b)
go (Int
iforall a. Num a => a -> a -> a
+Int
1)
       write' :: v (PrimState m) a -> Int -> a -> m ()
write' v (PrimState m) a
bins' Int
b !a
e = forall (m :: * -> *) (v :: * -> * -> *) a.
(HasCallStack, PrimMonad m, MVector v a) =>
v (PrimState m) a -> Int -> a -> m ()
GM.write v (PrimState m) a
bins' Int
b a
e
       len :: Int
len = forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v0 a
xs
       d :: a
d = ((a
hi forall a. Num a => a -> a -> a
- a
lo) forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
numBins) forall a. Num a => a -> a -> a
* (a
1 forall a. Num a => a -> a -> a
+ forall a b. (Real a, Fractional b) => a -> b
realToFrac Double
m_epsilon)
{-# INLINE histogram_ #-}

-- | /O(n)/ Compute decent defaults for the lower and upper bounds of
-- a histogram, based on the desired number of bins and the range of
-- the sample data.
--
-- The upper and lower bounds used are @(lo-d, hi+d)@, where
--
-- @d = (maximum sample - minimum sample) / ((bins - 1) * 2)@
--
-- If all elements in the sample are the same and equal to @x@ range
-- is set to @(x - |x|/10, x + |x|/10)@. And if @x@ is equal to 0 range
-- is set to @(-1,1)@. This is needed to avoid creating histogram with
-- zero bin size.
range :: (G.Vector v Double) =>
         Int                    -- ^ Number of bins (must be positive).
      -> v Double               -- ^ Sample data (cannot be empty).
      -> (Double, Double)
range :: forall (v :: * -> *).
Vector v Double =>
Int -> v Double -> (Double, Double)
range Int
numBins v Double
xs
    | Int
numBins forall a. Ord a => a -> a -> Bool
< Int
1 = forall a. HasCallStack => [Char] -> a
error [Char]
"Statistics.Histogram.range: invalid bin count"
    | forall (v :: * -> *) a. Vector v a => v a -> Bool
G.null v Double
xs   = forall a. HasCallStack => [Char] -> a
error [Char]
"Statistics.Histogram.range: empty sample"
    | Double
lo forall a. Eq a => a -> a -> Bool
== Double
hi    = case forall a. Num a => a -> a
abs Double
lo forall a. Fractional a => a -> a -> a
/ Double
10 of
                      Double
a | Double
a forall a. Ord a => a -> a -> Bool
< Double
m_tiny -> (-Double
1,Double
1)
                        | Bool
otherwise  -> (Double
lo forall a. Num a => a -> a -> a
- Double
a, Double
lo forall a. Num a => a -> a -> a
+ Double
a)
    | Bool
otherwise   = (Double
loforall a. Num a => a -> a -> a
-Double
d, Double
hiforall a. Num a => a -> a -> a
+Double
d)
  where
    d :: Double
d | Int
numBins forall a. Eq a => a -> a -> Bool
== Int
1 = Double
0
      | Bool
otherwise    = (Double
hi forall a. Num a => a -> a -> a
- Double
lo) forall a. Fractional a => a -> a -> a
/ ((forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
numBins forall a. Num a => a -> a -> a
- Double
1) forall a. Num a => a -> a -> a
* Double
2)
    (Double
lo,Double
hi)          = forall (v :: * -> *).
Vector v Double =>
v Double -> (Double, Double)
minMax v Double
xs
{-# INLINE range #-}