-- |
-- Module     : Simulation.Aivika.Experiment.Histogram
-- Copyright  : Copyright (c) 2012-2017, David Sorokin <david.sorokin@gmail.com>
-- License    : BSD3
-- Maintainer : David Sorokin <david.sorokin@gmail.com>
-- Stability  : experimental
-- Tested with: GHC 8.0.1
--
-- This module computes the histogram by the 
-- specified data and strategy applied for such computing.
--
-- The code in this module is essentially based on the 
-- <http://hackage.haskell.org/package/Histogram> package
-- by Mike Izbicki, who kindly agreed to re-license 
-- his library under BSD3, which allowed me to use his 
-- code and comments with some modifications. 

module Simulation.Aivika.Experiment.Histogram 
    (-- * Creating Histograms
     Histogram(..), 
     histogram, 
     histogramBinSize, 
     histogramNumBins,
     -- * Binning Strategies
     BinningStrategy(..),
     binSturges, 
     binDoane, 
     binSqrt, 
     binScott) where

import Data.List
import Data.Monoid
import qualified Data.Map as Map

import Numeric

-------------------------------------------------------------------------------

-- | Holds all the information needed to plot the histogram 
-- for a list of different series. Each series produces its 
-- own item in the resuling @[Int]@ list that may contain
-- zeros.
type Histogram = [(Double, [Int])]

-------------------------------------------------------------------------------
-- Bin counters; check out http://en.wikipedia.org/wiki/Histogram#Number_of_bins_and_width

-- | The strategy applied to calculate the histogram bins.
type BinningStrategy = [Double] -> Int

-- | This is only a helper function to convert strategies that 
-- specify bin width into strategies that specify the number of bins.
stratFromBinWidth :: [Double] -> Double -> Int
stratFromBinWidth :: [Double] -> Double -> Int
stratFromBinWidth [Double]
xs Double
h = forall a b. (RealFrac a, Integral b) => a -> b
ceiling forall a b. (a -> b) -> a -> b
$ (forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
maximum [Double]
xs forall a. Num a => a -> a -> a
- forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
minimum [Double]
xs) forall a. Fractional a => a -> a -> a
/ Double
h

-- | Sturges' binning strategy is the least computational work, 
-- but recommended for only normal data.
binSturges :: BinningStrategy
binSturges :: BinningStrategy
binSturges [Double]
xs = forall a b. (RealFrac a, Integral b) => a -> b
ceiling forall a b. (a -> b) -> a -> b
$ forall a. Floating a => a -> a -> a
logBase Double
2 Double
n forall a. Num a => a -> a -> a
+ Double
1
    where n :: Double
n = forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
xs
          
-- | Doane's binning strategy extends Sturges' for non-normal data.  
-- It takes a little more time because it must calculate the kurtosis 
-- (peakkiness) of the distribution.
binDoane :: BinningStrategy
binDoane :: BinningStrategy
binDoane [Double]
xs = forall a b. (RealFrac a, Integral b) => a -> b
ceiling forall a b. (a -> b) -> a -> b
$ Double
1 forall a. Num a => a -> a -> a
+ forall a. Floating a => a -> a
log Double
n forall a. Num a => a -> a -> a
+ forall a. Floating a => a -> a
log (Double
1 forall a. Num a => a -> a -> a
+ Double
a forall a. Num a => a -> a -> a
* ((Double
nforall a. Fractional a => a -> a -> a
/Double
6)forall a. Floating a => a -> a -> a
**(Double
1forall a. Fractional a => a -> a -> a
/Double
2)))
    where 
        n :: Double
n = forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
xs
        a :: Double
a = forall a. Floating a => [a] -> a
kurtosis [Double]
xs
        
-- | Using the sqrt of the number of samples is not supported by any 
-- theory, but is commonly used by excel and other histogram making software.
binSqrt :: BinningStrategy
binSqrt :: BinningStrategy
binSqrt [Double]
xs = forall a b. (RealFrac a, Integral b) => a -> b
round forall a b. (a -> b) -> a -> b
$ forall a. Floating a => a -> a
sqrt Double
n
    where 
        n :: Double
n = forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
xs

-- | Scott's rule is the optimal solution for normal data, but requires 
-- more computation than Sturges'.
binScott :: BinningStrategy
binScott :: BinningStrategy
binScott [Double]
xs = [Double] -> Double -> Int
stratFromBinWidth [Double]
xs forall a b. (a -> b) -> a -> b
$ Double
3.5 forall a. Num a => a -> a -> a
* forall a. Floating a => [a] -> a
stddev [Double]
xs forall a. Fractional a => a -> a -> a
/ (Double
nforall a. Floating a => a -> a -> a
**(Double
1forall a. Fractional a => a -> a -> a
/Double
3))
    where 
        n :: Double
n = forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
xs
        
-------------------------------------------------------------------------------
-- create the histogram data

-- | Creates a histogram by specifying the list of series.  Call it with one of 
-- the binning strategies that is appropriate to the type of data you have.  
-- If you don't know, then try using 'binSturges'.
histogram :: BinningStrategy -> [[Double]] -> Histogram
histogram :: BinningStrategy -> [[Double]] -> Histogram
histogram BinningStrategy
strat [[Double]]
xs = Int -> [[Double]] -> Histogram
histogramNumBins (BinningStrategy
strat forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[Double]]
xs) [[Double]]
xs

-- | Create a histogram by specifying the exact bin size. 
-- You probably don't want to use this function, and should use histogram 
-- with an appropriate binning strategy.
histogramBinSize :: Double -> [[Double]] -> Histogram
histogramBinSize :: Double -> [[Double]] -> Histogram
histogramBinSize Double
size = [[(Double, Int)]] -> Histogram
combineBins forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map (Double -> [Double] -> [(Double, Int)]
histBins Double
size forall b c a. (b -> c) -> (a -> b) -> a -> c
. Double -> [Double] -> [Double]
bin Double
size)

-- | Create a histogram by the specified approximated number of bins.
-- You probably don't want to use this function, and should use 
-- histogram with an appropriate binning strategy.
histogramNumBins :: Int -> [[Double]] -> Histogram
histogramNumBins :: Int -> [[Double]] -> Histogram
histogramNumBins Int
n [[Double]]
xs =
  Double -> [[Double]] -> Histogram
histogramBinSize Double
size [[Double]]
xs
    where
        size :: Double
size = forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall {a} {b}. (RealFrac a, Floating a, Integral b) => a -> b
firstdigit Double
diff) forall a. Num a => a -> a -> a
* (Double
10 forall a. Floating a => a -> a -> a
** forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall {a} {b}. (RealFrac a, Integral b, Floating a) => a -> b
exponent10 Double
diff))
        diff :: Double
diff = if Double
diff_test forall a. Ord a => a -> a -> Bool
> Double
0
               then Double
diff_test
               else Double
1
        diff_test :: Double
diff_test = (forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
maximum (forall a b. (a -> b) -> [a] -> [b]
map forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
maximum [[Double]]
xs) forall a. Num a => a -> a -> a
- 
                     forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
minimum (forall a b. (a -> b) -> [a] -> [b]
map forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
minimum [[Double]]
xs)) forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall a. Ord a => a -> a -> a
max Int
1 Int
n)

        firstdigit :: a -> b
firstdigit a
dbl = forall a b. (RealFrac a, Integral b) => a -> b
floor forall a b. (a -> b) -> a -> b
$ a
dbl forall a. Fractional a => a -> a -> a
/ (a
10 forall a. Floating a => a -> a -> a
** forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall {a} {b}. (RealFrac a, Integral b, Floating a) => a -> b
exponent10 a
dbl))
        exponent10 :: a -> b
exponent10 a
dbl = forall a b. (RealFrac a, Integral b) => a -> b
floor forall a b. (a -> b) -> a -> b
$ forall a. Floating a => a -> a -> a
logBase a
10 a
dbl

-- | It does all the binning for the histogram.
histBins :: Double -> [Double] -> [(Double, Int)]
histBins :: Double -> [Double] -> [(Double, Int)]
histBins Double
size [Double]
xs = [ (forall a. [a] -> a
head [Double]
l, forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
l) | [Double]
l <- forall a. Eq a => [a] -> [[a]]
group (forall a. Ord a => [a] -> [a]
sort [Double]
xs) ]

-- | It "rounds" every number into the closest number below it 
-- that is divisible by size.
bin :: Double -> [Double] -> [Double]
bin :: Double -> [Double] -> [Double]
bin Double
size xs :: [Double]
xs@[Double
_] = [Double]
xs
bin Double
size [Double]
xs = forall a b. (a -> b) -> [a] -> [b]
map (\Double
x -> Double
size forall a. Num a => a -> a -> a
* forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall a b. (RealFrac a, Integral b) => a -> b
floor (Double
x forall a. Fractional a => a -> a -> a
/ Double
size)) forall a. Num a => a -> a -> a
+ Double
size forall a. Fractional a => a -> a -> a
/ Double
2) [Double]
xs

-- | Combine bins from different histograms (optimized version).
combineBins :: [[(Double, Int)]] -> [(Double, [Int])]
combineBins :: [[(Double, Int)]] -> Histogram
combineBins [[(Double, Int)]
xs] = forall a b. (a -> b) -> [a] -> [b]
map (\(Double
t, Int
n) -> (Double
t, [Int
n])) [(Double, Int)]
xs
combineBins [[(Double, Int)]]
xss  = [[(Double, Int)]] -> Histogram
combineBins' [[(Double, Int)]]
xss

-- | Combine bins from different histograms (generic version).
combineBins' :: [[(Double, Int)]] -> [(Double, [Int])]
combineBins' :: [[(Double, Int)]] -> Histogram
combineBins' [[(Double, Int)]]
xs = forall a b. (a -> b) -> [a] -> [b]
map (forall {a} {a}. Num a => [(Int, a, a)] -> (a, [a])
merging forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall {b} {c}. [(Int, b, c)] -> [(Int, b, c)]
sorting) forall a b. (a -> b) -> a -> b
$ 
                  forall {a} {c}. [(a, Double, c)] -> [[(a, Double, c)]]
groupping forall a b. (a -> b) -> a -> b
$ forall {a} {c}. [(a, Double, c)] -> [(a, Double, c)]
ordering forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat forall a b. (a -> b) -> a -> b
$ forall {b} {c}. [[(b, c)]] -> [[(Int, b, c)]]
numbering [[(Double, Int)]]
xs
  where numbering :: [[(b, c)]] -> [[(Int, b, c)]]
numbering       = forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (forall a b c. ((a, b) -> c) -> a -> b -> c
curry forall {a} {b} {c}. (a, [(b, c)]) -> [(a, b, c)]
indexing) [Int
1..]
        indexing :: (a, [(b, c)]) -> [(a, b, c)]
indexing (a
i, [(b, c)]
x) = forall a b. (a -> b) -> [a] -> [b]
map (\(b
t, c
n) -> (a
i, b
t, c
n)) [(b, c)]
x
        ordering :: [(a, Double, c)] -> [(a, Double, c)]
ordering  = forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy  forall a b. (a -> b) -> a -> b
$ \(a
_, Double
t1, c
_) (a
_, Double
t2, c
_) -> forall a. Ord a => a -> a -> Ordering
compare Double
t1 Double
t2
        groupping :: [(a, Double, c)] -> [[(a, Double, c)]]
groupping = forall a. (a -> a -> Bool) -> [a] -> [[a]]
groupBy forall a b. (a -> b) -> a -> b
$ \(a
_, Double
t1, c
_) (a
_, Double
t2, c
_) -> Double
t1 forall a. Eq a => a -> a -> Bool
== Double
t2
        sorting :: [(Int, b, c)] -> [(Int, b, c)]
sorting   = forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy  forall a b. (a -> b) -> a -> b
$ \(Int
i1, b
_, c
_) (Int
i2, b
_, c
_) -> forall a. Ord a => a -> a -> Ordering
compare Int
i1 Int
i2
        merging :: [(Int, a, a)] -> (a, [a])
merging zs :: [(Int, a, a)]
zs@((Int
_, a
t, a
_) : [(Int, a, a)]
_) = forall {a} {b} {a}.
Num a =>
[(Int, b, a)] -> a -> Int -> [a] -> (a, [a])
merging' [(Int, a, a)]
zs a
t Int
1 []
        merging' :: [(Int, b, a)] -> a -> Int -> [a] -> (a, [a])
merging' [] a
t Int
k [a]
acc
          | Int
k forall a. Ord a => a -> a -> Bool
<= Int
count = [(Int, b, a)] -> a -> Int -> [a] -> (a, [a])
merging' [] a
t (Int
k forall a. Num a => a -> a -> a
+ Int
1) (a
0 forall a. a -> [a] -> [a]
: [a]
acc)
          | Int
k forall a. Ord a => a -> a -> Bool
> Int
count  = (a
t, forall a. [a] -> [a]
reverse [a]
acc)
        merging' zs :: [(Int, b, a)]
zs@((Int
i, b
_, a
n) : [(Int, b, a)]
xs) a
t Int
k [a]
acc
          | Int
i forall a. Eq a => a -> a -> Bool
== Int
k = [(Int, b, a)] -> a -> Int -> [a] -> (a, [a])
merging' [(Int, b, a)]
xs a
t (Int
k forall a. Num a => a -> a -> a
+ Int
1) (a
n forall a. a -> [a] -> [a]
: [a]
acc)
          | Int
i forall a. Ord a => a -> a -> Bool
> Int
k  = [(Int, b, a)] -> a -> Int -> [a] -> (a, [a])
merging' [(Int, b, a)]
zs a
t (Int
k forall a. Num a => a -> a -> a
+ Int
1) (a
0 forall a. a -> [a] -> [a]
: [a]
acc)
        count :: Int
count = forall (t :: * -> *) a. Foldable t => t a -> Int
length [[(Double, Int)]]
xs

------------------------------------------------------------------------------
-- simple math functions
-- taken from package hstats, which wouldn't fully compile

-- | Numerically stable mean.
mean :: Floating a => [a] -> a
mean :: forall a. Floating a => [a] -> a
mean [a]
x = forall a b. (a, b) -> a
fst forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\(a
m, a
n) a
x -> (a
mforall a. Num a => a -> a -> a
+(a
xforall a. Num a => a -> a -> a
-a
m)forall a. Fractional a => a -> a -> a
/(a
nforall a. Num a => a -> a -> a
+a
1),a
nforall a. Num a => a -> a -> a
+a
1)) (a
0,a
0) [a]
x

-- | Standard deviation of sample.
stddev :: (Floating a) => [a] -> a
stddev :: forall a. Floating a => [a] -> a
stddev [a]
xs = forall a. Floating a => a -> a
sqrt forall a b. (a -> b) -> a -> b
$ forall a. Floating a => [a] -> a
var [a]
xs

-- | Sample variance.
var :: (Floating a) => [a] -> a
var :: forall a. Floating a => [a] -> a
var [a]
xs = forall {t} {t}.
(Fractional t, Integral t) =>
t -> t -> t -> [t] -> t
var' a
0 Integer
0 a
0 [a]
xs forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
xs forall a. Num a => a -> a -> a
- Int
1)
    where
      var' :: t -> t -> t -> [t] -> t
var' t
_ t
_ t
s [] = t
s
      var' t
m t
n t
s (t
x:[t]
xs) = t -> t -> t -> [t] -> t
var' t
nm (t
n forall a. Num a => a -> a -> a
+ t
1) (t
s forall a. Num a => a -> a -> a
+ t
delta forall a. Num a => a -> a -> a
* (t
x forall a. Num a => a -> a -> a
- t
nm)) [t]
xs
         where
           delta :: t
delta = t
x forall a. Num a => a -> a -> a
- t
m
           nm :: t
nm = t
m forall a. Num a => a -> a -> a
+ t
delta forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral (t
n forall a. Num a => a -> a -> a
+ t
1)

-- | kurtosis is taken from wikipedia's definition.
kurtosis :: (Floating a) => [a] -> a
kurtosis :: forall a. Floating a => [a] -> a
kurtosis [a]
xs = ((a
1forall a. Fractional a => a -> a -> a
/a
n) forall a. Num a => a -> a -> a
* forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [(a
xforall a. Num a => a -> a -> a
-a
x_bar)forall a b. (Num a, Integral b) => a -> b -> a
^Integer
4 | a
x <- [a]
xs])
            forall a. Fractional a => a -> a -> a
/ ((a
1forall a. Fractional a => a -> a -> a
/a
n) forall a. Num a => a -> a -> a
* forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [(a
xforall a. Num a => a -> a -> a
-a
x_bar)forall a b. (Num a, Integral b) => a -> b -> a
^Integer
2 | a
x <- [a]
xs])forall a b. (Num a, Integral b) => a -> b -> a
^Integer
2 forall a. Num a => a -> a -> a
-a
3
    where 
        n :: a
n = forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
xs
        x_bar :: a
x_bar = forall a. Floating a => [a] -> a
mean [a]
xs