{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TypeFamilies #-}

-- |
-- Module      : BenchShow.Internal.Analysis
-- Copyright   : (c) 2009-2014 Bryan O'Sullivan
--               (c) 2018 Composewell Technologies
--
-- License     : BSD-style
-- Maintainer  : harendra.kumar@gmail.com
-- Stability   : experimental
-- Portability : GHC

module BenchShow.Internal.Analysis
    ( OutlierEffect(..)
    , OutlierVariance(..)
    , countOutliers
    , Estimator(..)
    , AnalyzedField(..)
    , getAnalyzedValue
    , BenchmarkMatrix(..)
    , BenchmarkIterMatrix(..)
    , foldBenchmark
    , filterSamples
    , isMaxField
    ) where

import Control.Applicative
import Data.Char (toLower)
import Data.Data (Data, Typeable)
import Data.Int (Int64)
import Data.List (elemIndex, transpose)
import Data.Maybe (fromMaybe)
import Data.Traversable
import GHC.Generics (Generic)
import Statistics.Function (sort)
import Statistics.Quantile (weightedAvg)
import Statistics.Regression (bootstrapRegress, olsRegress)
import Statistics.Resampling (resample)
import Statistics.Resampling.Bootstrap (bootstrapBCA)
import Statistics.Sample (mean, stdDev)
import Statistics.Sample.KernelDensity (kde)
import Statistics.Types (Sample, Estimate(..), ConfInt(..), cl95, CL)
import System.Random.MWC (GenIO, createSystemRandom)
import Prelude hiding (sequence, mapM)

import qualified Statistics.Resampling as St
import qualified Data.Vector.Generic as G
import qualified Data.Vector.Unboxed as U

-------------------------------------------------------------------------------
-- Outliers
-------------------------------------------------------------------------------

-- | Outliers from sample data, calculated using the boxplot
-- technique.
data Outliers = Outliers {
      Outliers -> Int64
samplesSeen :: !Int64
    , Outliers -> Int64
lowSevere   :: !Int64
    -- ^ More than 3 times the interquartile range (IQR) below the
    -- first quartile.
    , Outliers -> Int64
lowMild     :: !Int64
    -- ^ Between 1.5 and 3 times the IQR below the first quartile.
    , Outliers -> Int64
highMild    :: !Int64
    -- ^ Between 1.5 and 3 times the IQR above the third quartile.
    , Outliers -> Int64
highSevere  :: !Int64
    -- ^ More than 3 times the IQR above the third quartile.
    } deriving (Outliers -> Outliers -> Bool
(Outliers -> Outliers -> Bool)
-> (Outliers -> Outliers -> Bool) -> Eq Outliers
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Outliers -> Outliers -> Bool
$c/= :: Outliers -> Outliers -> Bool
== :: Outliers -> Outliers -> Bool
$c== :: Outliers -> Outliers -> Bool
Eq, Int -> Outliers -> ShowS
[Outliers] -> ShowS
Outliers -> String
(Int -> Outliers -> ShowS)
-> (Outliers -> String) -> ([Outliers] -> ShowS) -> Show Outliers
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Outliers] -> ShowS
$cshowList :: [Outliers] -> ShowS
show :: Outliers -> String
$cshow :: Outliers -> String
showsPrec :: Int -> Outliers -> ShowS
$cshowsPrec :: Int -> Outliers -> ShowS
Show, Typeable, Typeable Outliers
DataType
Constr
Typeable Outliers
-> (forall (c :: * -> *).
    (forall d b. Data d => c (d -> b) -> d -> c b)
    -> (forall g. g -> c g) -> Outliers -> c Outliers)
-> (forall (c :: * -> *).
    (forall b r. Data b => c (b -> r) -> c r)
    -> (forall r. r -> c r) -> Constr -> c Outliers)
-> (Outliers -> Constr)
-> (Outliers -> DataType)
-> (forall (t :: * -> *) (c :: * -> *).
    Typeable t =>
    (forall d. Data d => c (t d)) -> Maybe (c Outliers))
-> (forall (t :: * -> * -> *) (c :: * -> *).
    Typeable t =>
    (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c Outliers))
-> ((forall b. Data b => b -> b) -> Outliers -> Outliers)
-> (forall r r'.
    (r -> r' -> r)
    -> r -> (forall d. Data d => d -> r') -> Outliers -> r)
-> (forall r r'.
    (r' -> r -> r)
    -> r -> (forall d. Data d => d -> r') -> Outliers -> r)
-> (forall u. (forall d. Data d => d -> u) -> Outliers -> [u])
-> (forall u. Int -> (forall d. Data d => d -> u) -> Outliers -> u)
-> (forall (m :: * -> *).
    Monad m =>
    (forall d. Data d => d -> m d) -> Outliers -> m Outliers)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> Outliers -> m Outliers)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> Outliers -> m Outliers)
-> Data Outliers
Outliers -> DataType
Outliers -> Constr
(forall b. Data b => b -> b) -> Outliers -> Outliers
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> Outliers -> c Outliers
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c Outliers
forall a.
Typeable a
-> (forall (c :: * -> *).
    (forall d b. Data d => c (d -> b) -> d -> c b)
    -> (forall g. g -> c g) -> a -> c a)
-> (forall (c :: * -> *).
    (forall b r. Data b => c (b -> r) -> c r)
    -> (forall r. r -> c r) -> Constr -> c a)
-> (a -> Constr)
-> (a -> DataType)
-> (forall (t :: * -> *) (c :: * -> *).
    Typeable t =>
    (forall d. Data d => c (t d)) -> Maybe (c a))
-> (forall (t :: * -> * -> *) (c :: * -> *).
    Typeable t =>
    (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c a))
-> ((forall b. Data b => b -> b) -> a -> a)
-> (forall r r'.
    (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> a -> r)
-> (forall r r'.
    (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> a -> r)
-> (forall u. (forall d. Data d => d -> u) -> a -> [u])
-> (forall u. Int -> (forall d. Data d => d -> u) -> a -> u)
-> (forall (m :: * -> *).
    Monad m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> Data a
forall u. Int -> (forall d. Data d => d -> u) -> Outliers -> u
forall u. (forall d. Data d => d -> u) -> Outliers -> [u]
forall r r'.
(r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> Outliers -> r
forall r r'.
(r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> Outliers -> r
forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d) -> Outliers -> m Outliers
forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> Outliers -> m Outliers
forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c Outliers
forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> Outliers -> c Outliers
forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c Outliers)
forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c Outliers)
$cOutliers :: Constr
$tOutliers :: DataType
gmapMo :: (forall d. Data d => d -> m d) -> Outliers -> m Outliers
$cgmapMo :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> Outliers -> m Outliers
gmapMp :: (forall d. Data d => d -> m d) -> Outliers -> m Outliers
$cgmapMp :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> Outliers -> m Outliers
gmapM :: (forall d. Data d => d -> m d) -> Outliers -> m Outliers
$cgmapM :: forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d) -> Outliers -> m Outliers
gmapQi :: Int -> (forall d. Data d => d -> u) -> Outliers -> u
$cgmapQi :: forall u. Int -> (forall d. Data d => d -> u) -> Outliers -> u
gmapQ :: (forall d. Data d => d -> u) -> Outliers -> [u]
$cgmapQ :: forall u. (forall d. Data d => d -> u) -> Outliers -> [u]
gmapQr :: (r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> Outliers -> r
$cgmapQr :: forall r r'.
(r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> Outliers -> r
gmapQl :: (r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> Outliers -> r
$cgmapQl :: forall r r'.
(r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> Outliers -> r
gmapT :: (forall b. Data b => b -> b) -> Outliers -> Outliers
$cgmapT :: (forall b. Data b => b -> b) -> Outliers -> Outliers
dataCast2 :: (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c Outliers)
$cdataCast2 :: forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c Outliers)
dataCast1 :: (forall d. Data d => c (t d)) -> Maybe (c Outliers)
$cdataCast1 :: forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c Outliers)
dataTypeOf :: Outliers -> DataType
$cdataTypeOf :: Outliers -> DataType
toConstr :: Outliers -> Constr
$ctoConstr :: Outliers -> Constr
gunfold :: (forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c Outliers
$cgunfold :: forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c Outliers
gfoldl :: (forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> Outliers -> c Outliers
$cgfoldl :: forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> Outliers -> c Outliers
$cp1Data :: Typeable Outliers
Data, (forall x. Outliers -> Rep Outliers x)
-> (forall x. Rep Outliers x -> Outliers) -> Generic Outliers
forall x. Rep Outliers x -> Outliers
forall x. Outliers -> Rep Outliers x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Outliers x -> Outliers
$cfrom :: forall x. Outliers -> Rep Outliers x
Generic)

-- | A description of the extent to which outliers in the sample data
-- affect the sample mean and standard deviation.
data OutlierEffect = Unaffected -- ^ Less than 1% effect.
                   | Slight     -- ^ Between 1% and 10%.
                   | Moderate   -- ^ Between 10% and 50%.
                   | Severe     -- ^ Above 50% (i.e. measurements
                                -- are useless).
                     deriving (OutlierEffect -> OutlierEffect -> Bool
(OutlierEffect -> OutlierEffect -> Bool)
-> (OutlierEffect -> OutlierEffect -> Bool) -> Eq OutlierEffect
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: OutlierEffect -> OutlierEffect -> Bool
$c/= :: OutlierEffect -> OutlierEffect -> Bool
== :: OutlierEffect -> OutlierEffect -> Bool
$c== :: OutlierEffect -> OutlierEffect -> Bool
Eq, Eq OutlierEffect
Eq OutlierEffect
-> (OutlierEffect -> OutlierEffect -> Ordering)
-> (OutlierEffect -> OutlierEffect -> Bool)
-> (OutlierEffect -> OutlierEffect -> Bool)
-> (OutlierEffect -> OutlierEffect -> Bool)
-> (OutlierEffect -> OutlierEffect -> Bool)
-> (OutlierEffect -> OutlierEffect -> OutlierEffect)
-> (OutlierEffect -> OutlierEffect -> OutlierEffect)
-> Ord OutlierEffect
OutlierEffect -> OutlierEffect -> Bool
OutlierEffect -> OutlierEffect -> Ordering
OutlierEffect -> OutlierEffect -> OutlierEffect
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: OutlierEffect -> OutlierEffect -> OutlierEffect
$cmin :: OutlierEffect -> OutlierEffect -> OutlierEffect
max :: OutlierEffect -> OutlierEffect -> OutlierEffect
$cmax :: OutlierEffect -> OutlierEffect -> OutlierEffect
>= :: OutlierEffect -> OutlierEffect -> Bool
$c>= :: OutlierEffect -> OutlierEffect -> Bool
> :: OutlierEffect -> OutlierEffect -> Bool
$c> :: OutlierEffect -> OutlierEffect -> Bool
<= :: OutlierEffect -> OutlierEffect -> Bool
$c<= :: OutlierEffect -> OutlierEffect -> Bool
< :: OutlierEffect -> OutlierEffect -> Bool
$c< :: OutlierEffect -> OutlierEffect -> Bool
compare :: OutlierEffect -> OutlierEffect -> Ordering
$ccompare :: OutlierEffect -> OutlierEffect -> Ordering
$cp1Ord :: Eq OutlierEffect
Ord, Int -> OutlierEffect -> ShowS
[OutlierEffect] -> ShowS
OutlierEffect -> String
(Int -> OutlierEffect -> ShowS)
-> (OutlierEffect -> String)
-> ([OutlierEffect] -> ShowS)
-> Show OutlierEffect
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [OutlierEffect] -> ShowS
$cshowList :: [OutlierEffect] -> ShowS
show :: OutlierEffect -> String
$cshow :: OutlierEffect -> String
showsPrec :: Int -> OutlierEffect -> ShowS
$cshowsPrec :: Int -> OutlierEffect -> ShowS
Show, Typeable, Typeable OutlierEffect
DataType
Constr
Typeable OutlierEffect
-> (forall (c :: * -> *).
    (forall d b. Data d => c (d -> b) -> d -> c b)
    -> (forall g. g -> c g) -> OutlierEffect -> c OutlierEffect)
-> (forall (c :: * -> *).
    (forall b r. Data b => c (b -> r) -> c r)
    -> (forall r. r -> c r) -> Constr -> c OutlierEffect)
-> (OutlierEffect -> Constr)
-> (OutlierEffect -> DataType)
-> (forall (t :: * -> *) (c :: * -> *).
    Typeable t =>
    (forall d. Data d => c (t d)) -> Maybe (c OutlierEffect))
-> (forall (t :: * -> * -> *) (c :: * -> *).
    Typeable t =>
    (forall d e. (Data d, Data e) => c (t d e))
    -> Maybe (c OutlierEffect))
-> ((forall b. Data b => b -> b) -> OutlierEffect -> OutlierEffect)
-> (forall r r'.
    (r -> r' -> r)
    -> r -> (forall d. Data d => d -> r') -> OutlierEffect -> r)
-> (forall r r'.
    (r' -> r -> r)
    -> r -> (forall d. Data d => d -> r') -> OutlierEffect -> r)
-> (forall u. (forall d. Data d => d -> u) -> OutlierEffect -> [u])
-> (forall u.
    Int -> (forall d. Data d => d -> u) -> OutlierEffect -> u)
-> (forall (m :: * -> *).
    Monad m =>
    (forall d. Data d => d -> m d) -> OutlierEffect -> m OutlierEffect)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> OutlierEffect -> m OutlierEffect)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> OutlierEffect -> m OutlierEffect)
-> Data OutlierEffect
OutlierEffect -> DataType
OutlierEffect -> Constr
(forall b. Data b => b -> b) -> OutlierEffect -> OutlierEffect
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> OutlierEffect -> c OutlierEffect
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c OutlierEffect
forall a.
Typeable a
-> (forall (c :: * -> *).
    (forall d b. Data d => c (d -> b) -> d -> c b)
    -> (forall g. g -> c g) -> a -> c a)
-> (forall (c :: * -> *).
    (forall b r. Data b => c (b -> r) -> c r)
    -> (forall r. r -> c r) -> Constr -> c a)
-> (a -> Constr)
-> (a -> DataType)
-> (forall (t :: * -> *) (c :: * -> *).
    Typeable t =>
    (forall d. Data d => c (t d)) -> Maybe (c a))
-> (forall (t :: * -> * -> *) (c :: * -> *).
    Typeable t =>
    (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c a))
-> ((forall b. Data b => b -> b) -> a -> a)
-> (forall r r'.
    (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> a -> r)
-> (forall r r'.
    (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> a -> r)
-> (forall u. (forall d. Data d => d -> u) -> a -> [u])
-> (forall u. Int -> (forall d. Data d => d -> u) -> a -> u)
-> (forall (m :: * -> *).
    Monad m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> Data a
forall u. Int -> (forall d. Data d => d -> u) -> OutlierEffect -> u
forall u. (forall d. Data d => d -> u) -> OutlierEffect -> [u]
forall r r'.
(r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> OutlierEffect -> r
forall r r'.
(r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> OutlierEffect -> r
forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d) -> OutlierEffect -> m OutlierEffect
forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> OutlierEffect -> m OutlierEffect
forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c OutlierEffect
forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> OutlierEffect -> c OutlierEffect
forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c OutlierEffect)
forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e))
-> Maybe (c OutlierEffect)
$cSevere :: Constr
$cModerate :: Constr
$cSlight :: Constr
$cUnaffected :: Constr
$tOutlierEffect :: DataType
gmapMo :: (forall d. Data d => d -> m d) -> OutlierEffect -> m OutlierEffect
$cgmapMo :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> OutlierEffect -> m OutlierEffect
gmapMp :: (forall d. Data d => d -> m d) -> OutlierEffect -> m OutlierEffect
$cgmapMp :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> OutlierEffect -> m OutlierEffect
gmapM :: (forall d. Data d => d -> m d) -> OutlierEffect -> m OutlierEffect
$cgmapM :: forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d) -> OutlierEffect -> m OutlierEffect
gmapQi :: Int -> (forall d. Data d => d -> u) -> OutlierEffect -> u
$cgmapQi :: forall u. Int -> (forall d. Data d => d -> u) -> OutlierEffect -> u
gmapQ :: (forall d. Data d => d -> u) -> OutlierEffect -> [u]
$cgmapQ :: forall u. (forall d. Data d => d -> u) -> OutlierEffect -> [u]
gmapQr :: (r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> OutlierEffect -> r
$cgmapQr :: forall r r'.
(r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> OutlierEffect -> r
gmapQl :: (r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> OutlierEffect -> r
$cgmapQl :: forall r r'.
(r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> OutlierEffect -> r
gmapT :: (forall b. Data b => b -> b) -> OutlierEffect -> OutlierEffect
$cgmapT :: (forall b. Data b => b -> b) -> OutlierEffect -> OutlierEffect
dataCast2 :: (forall d e. (Data d, Data e) => c (t d e))
-> Maybe (c OutlierEffect)
$cdataCast2 :: forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e))
-> Maybe (c OutlierEffect)
dataCast1 :: (forall d. Data d => c (t d)) -> Maybe (c OutlierEffect)
$cdataCast1 :: forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c OutlierEffect)
dataTypeOf :: OutlierEffect -> DataType
$cdataTypeOf :: OutlierEffect -> DataType
toConstr :: OutlierEffect -> Constr
$ctoConstr :: OutlierEffect -> Constr
gunfold :: (forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c OutlierEffect
$cgunfold :: forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c OutlierEffect
gfoldl :: (forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> OutlierEffect -> c OutlierEffect
$cgfoldl :: forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> OutlierEffect -> c OutlierEffect
$cp1Data :: Typeable OutlierEffect
Data, (forall x. OutlierEffect -> Rep OutlierEffect x)
-> (forall x. Rep OutlierEffect x -> OutlierEffect)
-> Generic OutlierEffect
forall x. Rep OutlierEffect x -> OutlierEffect
forall x. OutlierEffect -> Rep OutlierEffect x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep OutlierEffect x -> OutlierEffect
$cfrom :: forall x. OutlierEffect -> Rep OutlierEffect x
Generic)

outliersEmpty :: Outliers
outliersEmpty :: Outliers
outliersEmpty = Int64 -> Int64 -> Int64 -> Int64 -> Int64 -> Outliers
Outliers Int64
0 Int64
0 Int64
0 Int64
0 Int64
0

addOutliers :: Outliers -> Outliers -> Outliers
addOutliers :: Outliers -> Outliers -> Outliers
addOutliers (Outliers Int64
s Int64
a Int64
b Int64
c Int64
d) (Outliers Int64
t Int64
w Int64
x Int64
y Int64
z) =
    Int64 -> Int64 -> Int64 -> Int64 -> Int64 -> Outliers
Outliers (Int64
sInt64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+Int64
t) (Int64
aInt64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+Int64
w) (Int64
bInt64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+Int64
x) (Int64
cInt64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+Int64
y) (Int64
dInt64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+Int64
z)
{-# INLINE addOutliers #-}

-- | Analysis of the extent to which outliers in a sample affect its
-- standard deviation (and to some extent, its mean).
data OutlierVariance = OutlierVariance {
      OutlierVariance -> OutlierEffect
ovEffect   :: !OutlierEffect
    -- ^ Qualitative description of effect.
    , OutlierVariance -> String
ovDesc     :: !String
    -- ^ Brief textual description of effect.
    , OutlierVariance -> Double
ovFraction :: !Double
    -- ^ Quantitative description of effect (a fraction between 0 and 1).
    } deriving (OutlierVariance -> OutlierVariance -> Bool
(OutlierVariance -> OutlierVariance -> Bool)
-> (OutlierVariance -> OutlierVariance -> Bool)
-> Eq OutlierVariance
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: OutlierVariance -> OutlierVariance -> Bool
$c/= :: OutlierVariance -> OutlierVariance -> Bool
== :: OutlierVariance -> OutlierVariance -> Bool
$c== :: OutlierVariance -> OutlierVariance -> Bool
Eq, Int -> OutlierVariance -> ShowS
[OutlierVariance] -> ShowS
OutlierVariance -> String
(Int -> OutlierVariance -> ShowS)
-> (OutlierVariance -> String)
-> ([OutlierVariance] -> ShowS)
-> Show OutlierVariance
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [OutlierVariance] -> ShowS
$cshowList :: [OutlierVariance] -> ShowS
show :: OutlierVariance -> String
$cshow :: OutlierVariance -> String
showsPrec :: Int -> OutlierVariance -> ShowS
$cshowsPrec :: Int -> OutlierVariance -> ShowS
Show, Typeable, Typeable OutlierVariance
DataType
Constr
Typeable OutlierVariance
-> (forall (c :: * -> *).
    (forall d b. Data d => c (d -> b) -> d -> c b)
    -> (forall g. g -> c g) -> OutlierVariance -> c OutlierVariance)
-> (forall (c :: * -> *).
    (forall b r. Data b => c (b -> r) -> c r)
    -> (forall r. r -> c r) -> Constr -> c OutlierVariance)
-> (OutlierVariance -> Constr)
-> (OutlierVariance -> DataType)
-> (forall (t :: * -> *) (c :: * -> *).
    Typeable t =>
    (forall d. Data d => c (t d)) -> Maybe (c OutlierVariance))
-> (forall (t :: * -> * -> *) (c :: * -> *).
    Typeable t =>
    (forall d e. (Data d, Data e) => c (t d e))
    -> Maybe (c OutlierVariance))
-> ((forall b. Data b => b -> b)
    -> OutlierVariance -> OutlierVariance)
-> (forall r r'.
    (r -> r' -> r)
    -> r -> (forall d. Data d => d -> r') -> OutlierVariance -> r)
-> (forall r r'.
    (r' -> r -> r)
    -> r -> (forall d. Data d => d -> r') -> OutlierVariance -> r)
-> (forall u.
    (forall d. Data d => d -> u) -> OutlierVariance -> [u])
-> (forall u.
    Int -> (forall d. Data d => d -> u) -> OutlierVariance -> u)
-> (forall (m :: * -> *).
    Monad m =>
    (forall d. Data d => d -> m d)
    -> OutlierVariance -> m OutlierVariance)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d)
    -> OutlierVariance -> m OutlierVariance)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d)
    -> OutlierVariance -> m OutlierVariance)
-> Data OutlierVariance
OutlierVariance -> DataType
OutlierVariance -> Constr
(forall b. Data b => b -> b) -> OutlierVariance -> OutlierVariance
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> OutlierVariance -> c OutlierVariance
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c OutlierVariance
forall a.
Typeable a
-> (forall (c :: * -> *).
    (forall d b. Data d => c (d -> b) -> d -> c b)
    -> (forall g. g -> c g) -> a -> c a)
-> (forall (c :: * -> *).
    (forall b r. Data b => c (b -> r) -> c r)
    -> (forall r. r -> c r) -> Constr -> c a)
-> (a -> Constr)
-> (a -> DataType)
-> (forall (t :: * -> *) (c :: * -> *).
    Typeable t =>
    (forall d. Data d => c (t d)) -> Maybe (c a))
-> (forall (t :: * -> * -> *) (c :: * -> *).
    Typeable t =>
    (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c a))
-> ((forall b. Data b => b -> b) -> a -> a)
-> (forall r r'.
    (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> a -> r)
-> (forall r r'.
    (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> a -> r)
-> (forall u. (forall d. Data d => d -> u) -> a -> [u])
-> (forall u. Int -> (forall d. Data d => d -> u) -> a -> u)
-> (forall (m :: * -> *).
    Monad m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> Data a
forall u.
Int -> (forall d. Data d => d -> u) -> OutlierVariance -> u
forall u. (forall d. Data d => d -> u) -> OutlierVariance -> [u]
forall r r'.
(r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> OutlierVariance -> r
forall r r'.
(r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> OutlierVariance -> r
forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d)
-> OutlierVariance -> m OutlierVariance
forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d)
-> OutlierVariance -> m OutlierVariance
forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c OutlierVariance
forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> OutlierVariance -> c OutlierVariance
forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c OutlierVariance)
forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e))
-> Maybe (c OutlierVariance)
$cOutlierVariance :: Constr
$tOutlierVariance :: DataType
gmapMo :: (forall d. Data d => d -> m d)
-> OutlierVariance -> m OutlierVariance
$cgmapMo :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d)
-> OutlierVariance -> m OutlierVariance
gmapMp :: (forall d. Data d => d -> m d)
-> OutlierVariance -> m OutlierVariance
$cgmapMp :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d)
-> OutlierVariance -> m OutlierVariance
gmapM :: (forall d. Data d => d -> m d)
-> OutlierVariance -> m OutlierVariance
$cgmapM :: forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d)
-> OutlierVariance -> m OutlierVariance
gmapQi :: Int -> (forall d. Data d => d -> u) -> OutlierVariance -> u
$cgmapQi :: forall u.
Int -> (forall d. Data d => d -> u) -> OutlierVariance -> u
gmapQ :: (forall d. Data d => d -> u) -> OutlierVariance -> [u]
$cgmapQ :: forall u. (forall d. Data d => d -> u) -> OutlierVariance -> [u]
gmapQr :: (r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> OutlierVariance -> r
$cgmapQr :: forall r r'.
(r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> OutlierVariance -> r
gmapQl :: (r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> OutlierVariance -> r
$cgmapQl :: forall r r'.
(r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> OutlierVariance -> r
gmapT :: (forall b. Data b => b -> b) -> OutlierVariance -> OutlierVariance
$cgmapT :: (forall b. Data b => b -> b) -> OutlierVariance -> OutlierVariance
dataCast2 :: (forall d e. (Data d, Data e) => c (t d e))
-> Maybe (c OutlierVariance)
$cdataCast2 :: forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e))
-> Maybe (c OutlierVariance)
dataCast1 :: (forall d. Data d => c (t d)) -> Maybe (c OutlierVariance)
$cdataCast1 :: forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c OutlierVariance)
dataTypeOf :: OutlierVariance -> DataType
$cdataTypeOf :: OutlierVariance -> DataType
toConstr :: OutlierVariance -> Constr
$ctoConstr :: OutlierVariance -> Constr
gunfold :: (forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c OutlierVariance
$cgunfold :: forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c OutlierVariance
gfoldl :: (forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> OutlierVariance -> c OutlierVariance
$cgfoldl :: forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> OutlierVariance -> c OutlierVariance
$cp1Data :: Typeable OutlierVariance
Data, (forall x. OutlierVariance -> Rep OutlierVariance x)
-> (forall x. Rep OutlierVariance x -> OutlierVariance)
-> Generic OutlierVariance
forall x. Rep OutlierVariance x -> OutlierVariance
forall x. OutlierVariance -> Rep OutlierVariance x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep OutlierVariance x -> OutlierVariance
$cfrom :: forall x. OutlierVariance -> Rep OutlierVariance x
Generic)

-- | Classify outliers in a data set, using the boxplot technique.
classifyOutliers :: Sample -> Outliers
classifyOutliers :: Sample -> Outliers
classifyOutliers Sample
sa = (Outliers -> Double -> Outliers) -> Outliers -> Sample -> Outliers
forall b a. Unbox b => (a -> b -> a) -> a -> Vector b -> a
U.foldl' (((Outliers -> Outliers)
-> (Double -> Outliers) -> Double -> Outliers
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Double -> Outliers
outlier) ((Outliers -> Outliers) -> Double -> Outliers)
-> (Outliers -> Outliers -> Outliers)
-> Outliers
-> Double
-> Outliers
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Outliers -> Outliers -> Outliers
addOutliers) Outliers
outliersEmpty Sample
ssa
    where outlier :: Double -> Outliers
outlier Double
e = Outliers :: Int64 -> Int64 -> Int64 -> Int64 -> Int64 -> Outliers
Outliers
                { samplesSeen :: Int64
samplesSeen = Int64
1
                , lowSevere :: Int64
lowSevere = if Double
e Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
<= Double
loS Bool -> Bool -> Bool
&& Double
e Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< Double
hiM then Int64
1 else Int64
0
                , lowMild :: Int64
lowMild = if Double
e Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
> Double
loS Bool -> Bool -> Bool
&& Double
e Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
<= Double
loM then Int64
1 else Int64
0
                , highMild :: Int64
highMild = if Double
e Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
>= Double
hiM Bool -> Bool -> Bool
&& Double
e Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< Double
hiS then Int64
1 else Int64
0
                , highSevere :: Int64
highSevere = if Double
e Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
>= Double
hiS Bool -> Bool -> Bool
&& Double
e Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
> Double
loM then Int64
1 else Int64
0
                }
          !loS :: Double
loS = Double
q1 Double -> Double -> Double
forall a. Num a => a -> a -> a
- (Double
iqr Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
3)
          !loM :: Double
loM = Double
q1 Double -> Double -> Double
forall a. Num a => a -> a -> a
- (Double
iqr Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
1.5)
          !hiM :: Double
hiM = Double
q3 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ (Double
iqr Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
1.5)
          !hiS :: Double
hiS = Double
q3 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ (Double
iqr Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
3)
          q1 :: Double
q1   = Int -> Int -> Sample -> Double
forall (v :: * -> *).
Vector v Double =>
Int -> Int -> v Double -> Double
weightedAvg Int
1 Int
4 Sample
ssa
          q3 :: Double
q3   = Int -> Int -> Sample -> Double
forall (v :: * -> *).
Vector v Double =>
Int -> Int -> v Double -> Double
weightedAvg Int
3 Int
4 Sample
ssa
          ssa :: Sample
ssa  = Sample -> Sample
sort Sample
sa
          iqr :: Double
iqr  = Double
q3 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
q1

-- | Compute the extent to which outliers in the sample data affect
-- the sample mean and standard deviation.
outlierVariance
  :: Double -- ^ mean
  -> Double -- ^ standard deviation.
  -> Double -- ^ Number of original iterations.
  -> OutlierVariance
outlierVariance :: Double -> Double -> Double -> OutlierVariance
outlierVariance Double
µ Double
σ Double
a = OutlierEffect -> String -> Double -> OutlierVariance
OutlierVariance OutlierEffect
effect String
desc Double
varOutMin
    where
    µa :: Double
µa    = Double
µ Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
a
    µgMin :: Double
µgMin = Double
µa Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
2
    σg2 :: Double
σg2   = Double
σg Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
σg where σg :: Double
σg = Double -> Double -> Double
forall a. Ord a => a -> a -> a
min (Double
µgMin Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
4) (Double
σ Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double -> Double
forall a. Floating a => a -> a
sqrt Double
a)
    σ2 :: Double
σ2    = Double
σ Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
σ
    varOut :: Double -> Double
varOut Double
c  = (Double
ac Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
a) Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double
σ2 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
ac Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
σg2) where ac :: Double
ac = Double
a Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
c
    cMax :: Double -> Double
cMax Double
x    = Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
floor (-Double
2 Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
k0 Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double
k1 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double -> Double
forall a. Floating a => a -> a
sqrt Double
det)) :: Int)
        where
        ad :: Double
ad = Double
a Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
d
            where
            d :: Double
d = Double
k Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
k
            k :: Double
k = Double
µa Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
x
        k0 :: Double
k0    = -Double
a Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
ad
        k1 :: Double
k1    = Double
σ2 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
a Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
σg2 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
ad
        det :: Double
det   = Double
k1 Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
k1 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
4 Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
σg2 Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
k0

    minBy :: (t -> a) -> t -> t -> a
minBy t -> a
f t
q t
r = a -> a -> a
forall a. Ord a => a -> a -> a
min (t -> a
f t
q) (t -> a
f t
r)
    varOutMin :: Double
varOutMin = if Double
σ2 Double -> Double -> Bool
forall a. Eq a => a -> a -> Bool
== Double
0
                then Double
0
                else ((Double -> Double) -> Double -> Double -> Double
forall a t. Ord a => (t -> a) -> t -> t -> a
minBy Double -> Double
varOut Double
1 ((Double -> Double) -> Double -> Double -> Double
forall a t. Ord a => (t -> a) -> t -> t -> a
minBy Double -> Double
cMax Double
0 Double
µgMin)) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
σ2

    (OutlierEffect
effect, String
desc) | Double
varOutMin Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< Double
0.01 = (OutlierEffect
Unaffected, String
"no")
                   | Double
varOutMin Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< Double
0.1  = (OutlierEffect
Slight,     String
"slight")
                   | Double
varOutMin Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< Double
0.5  = (OutlierEffect
Moderate,   String
"moderate")
                   | Bool
otherwise        = (OutlierEffect
Severe,     String
"severe")

-- | Count the total number of outliers in a sample.
countOutliers :: Outliers -> Int64
countOutliers :: Outliers -> Int64
countOutliers (Outliers Int64
_ Int64
a Int64
b Int64
c Int64
d) = Int64
a Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+ Int64
b Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+ Int64
c Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+ Int64
d
{-# INLINE countOutliers #-}

-------------------------------------------------------------------------------
-- Linear regression
-------------------------------------------------------------------------------

useRegression :: Bool
useRegression :: Bool
useRegression = Bool
True

useBootstrap :: Bool
useBootstrap :: Bool
useBootstrap = Bool
True

resampleCount :: Int
resampleCount :: Int
resampleCount = Int
1000

confidence :: CL Double
confidence :: CL Double
confidence = CL Double
forall a. Fractional a => CL a
cl95

regress
    :: GenIO
    -> Int  -- index of the iters field, we return the coefficient of only the
            -- iters field
    -> [String] -- responder column names
    -> [([Double], [Double])]
    -> IO [Maybe (Estimate ConfInt Double, Estimate ConfInt Double)]
regress :: GenIO
-> Int
-> [String]
-> [([Double], [Double])]
-> IO [Maybe (Estimate ConfInt Double, Estimate ConfInt Double)]
regress GenIO
randGen Int
i [String]
rcols [([Double], [Double])]
samples = do
    -- perform ordinary least squares regression for each field
    -- the main predictor is the number of iterations
    let predVectors :: [Sample]
predVectors = ([Double] -> Sample) -> [[Double]] -> [Sample]
forall a b. (a -> b) -> [a] -> [b]
map [Double] -> Sample
forall a. Unbox a => [a] -> Vector a
U.fromList ([[Double]] -> [Sample]) -> [[Double]] -> [Sample]
forall a b. (a -> b) -> a -> b
$ [[Double]] -> [[Double]]
forall a. [[a]] -> [[a]]
transpose ([[Double]] -> [[Double]]) -> [[Double]] -> [[Double]]
forall a b. (a -> b) -> a -> b
$ (([Double], [Double]) -> [Double])
-> [([Double], [Double])] -> [[Double]]
forall a b. (a -> b) -> [a] -> [b]
map ([Double], [Double]) -> [Double]
forall a b. (a, b) -> a
fst [([Double], [Double])]
samples
        regressWithIters :: Maybe Sample
-> IO
     (Maybe (Vector (Estimate ConfInt Double), Estimate ConfInt Double))
regressWithIters = (Sample
 -> IO (Vector (Estimate ConfInt Double), Estimate ConfInt Double))
-> Maybe Sample
-> IO
     (Maybe (Vector (Estimate ConfInt Double), Estimate ConfInt Double))
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (GenIO
-> Int
-> CL Double
-> ([Sample] -> Sample -> (Sample, Double))
-> [Sample]
-> Sample
-> IO (Vector (Estimate ConfInt Double), Estimate ConfInt Double)
bootstrapRegress GenIO
randGen Int
resampleCount
                                CL Double
confidence [Sample] -> Sample -> (Sample, Double)
olsRegress [Sample]
predVectors)

    let avoidMaxFields :: String -> a -> Maybe a
avoidMaxFields String
name a
vec =
            if String -> Bool
isMaxField String
name
            then Maybe a
forall a. Maybe a
Nothing
            else a -> Maybe a
forall a. a -> Maybe a
Just a
vec
    let respVectors :: [Sample]
respVectors = ([Double] -> Sample) -> [[Double]] -> [Sample]
forall a b. (a -> b) -> [a] -> [b]
map [Double] -> Sample
forall a. Unbox a => [a] -> Vector a
U.fromList ([[Double]] -> [Sample]) -> [[Double]] -> [Sample]
forall a b. (a -> b) -> a -> b
$ [[Double]] -> [[Double]]
forall a. [[a]] -> [[a]]
transpose ([[Double]] -> [[Double]]) -> [[Double]] -> [[Double]]
forall a b. (a -> b) -> a -> b
$ (([Double], [Double]) -> [Double])
-> [([Double], [Double])] -> [[Double]]
forall a b. (a -> b) -> [a] -> [b]
map ([Double], [Double]) -> [Double]
forall a b. (a, b) -> b
snd [([Double], [Double])]
samples
    [Maybe (Vector (Estimate ConfInt Double), Estimate ConfInt Double)]
res <- (Maybe Sample
 -> IO
      (Maybe
         (Vector (Estimate ConfInt Double), Estimate ConfInt Double)))
-> [Maybe Sample]
-> IO
     [Maybe (Vector (Estimate ConfInt Double), Estimate ConfInt Double)]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM Maybe Sample
-> IO
     (Maybe (Vector (Estimate ConfInt Double), Estimate ConfInt Double))
regressWithIters ((String -> Sample -> Maybe Sample)
-> [String] -> [Sample] -> [Maybe Sample]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith String -> Sample -> Maybe Sample
forall a. String -> a -> Maybe a
avoidMaxFields [String]
rcols [Sample]
respVectors)
    [Maybe (Estimate ConfInt Double, Estimate ConfInt Double)]
-> IO [Maybe (Estimate ConfInt Double, Estimate ConfInt Double)]
forall (m :: * -> *) a. Monad m => a -> m a
return ([Maybe (Estimate ConfInt Double, Estimate ConfInt Double)]
 -> IO [Maybe (Estimate ConfInt Double, Estimate ConfInt Double)])
-> [Maybe (Estimate ConfInt Double, Estimate ConfInt Double)]
-> IO [Maybe (Estimate ConfInt Double, Estimate ConfInt Double)]
forall a b. (a -> b) -> a -> b
$ (Maybe (Vector (Estimate ConfInt Double), Estimate ConfInt Double)
 -> Maybe (Estimate ConfInt Double, Estimate ConfInt Double))
-> [Maybe
      (Vector (Estimate ConfInt Double), Estimate ConfInt Double)]
-> [Maybe (Estimate ConfInt Double, Estimate ConfInt Double)]
forall a b. (a -> b) -> [a] -> [b]
map (((Vector (Estimate ConfInt Double), Estimate ConfInt Double)
 -> (Estimate ConfInt Double, Estimate ConfInt Double))
-> Maybe
     (Vector (Estimate ConfInt Double), Estimate ConfInt Double)
-> Maybe (Estimate ConfInt Double, Estimate ConfInt Double)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\(Vector (Estimate ConfInt Double)
v,Estimate ConfInt Double
r2) -> ((Vector (Estimate ConfInt Double) -> [Estimate ConfInt Double]
forall (v :: * -> *) a. Vector v a => v a -> [a]
G.toList Vector (Estimate ConfInt Double)
v) [Estimate ConfInt Double] -> Int -> Estimate ConfInt Double
forall a. [a] -> Int -> a
!! Int
i, Estimate ConfInt Double
r2))) [Maybe (Vector (Estimate ConfInt Double), Estimate ConfInt Double)]
res

-------------------------------------------------------------------------------
-- Mean and std deviation by boostrap resampling
-------------------------------------------------------------------------------

estimateMeanAndStdDev
    :: GenIO
    -> [U.Vector Double]
    -> IO [(Estimate ConfInt Double, Estimate ConfInt Double)]
estimateMeanAndStdDev :: GenIO
-> [Sample]
-> IO [(Estimate ConfInt Double, Estimate ConfInt Double)]
estimateMeanAndStdDev GenIO
randGen [Sample]
vectors = do
    let resamp :: Sample -> IO [(Estimator, Bootstrap Vector Double)]
resamp = GenIO
-> [Estimator]
-> Int
-> Sample
-> IO [(Estimator, Bootstrap Vector Double)]
resample GenIO
randGen [Estimator
St.Mean, Estimator
St.StdDev] Int
resampleCount
    [[(Estimator, Bootstrap Vector Double)]]
res <- (Sample -> IO [(Estimator, Bootstrap Vector Double)])
-> [Sample] -> IO [[(Estimator, Bootstrap Vector Double)]]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM Sample -> IO [(Estimator, Bootstrap Vector Double)]
resamp [Sample]
vectors
    [(Estimate ConfInt Double, Estimate ConfInt Double)]
-> IO [(Estimate ConfInt Double, Estimate ConfInt Double)]
forall (m :: * -> *) a. Monad m => a -> m a
return ([(Estimate ConfInt Double, Estimate ConfInt Double)]
 -> IO [(Estimate ConfInt Double, Estimate ConfInt Double)])
-> [(Estimate ConfInt Double, Estimate ConfInt Double)]
-> IO [(Estimate ConfInt Double, Estimate ConfInt Double)]
forall a b. (a -> b) -> a -> b
$ ([Estimate ConfInt Double]
 -> (Estimate ConfInt Double, Estimate ConfInt Double))
-> [[Estimate ConfInt Double]]
-> [(Estimate ConfInt Double, Estimate ConfInt Double)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\[Estimate ConfInt Double
mn,Estimate ConfInt Double
dev] -> (Estimate ConfInt Double
mn, Estimate ConfInt Double
dev))
        ([[Estimate ConfInt Double]]
 -> [(Estimate ConfInt Double, Estimate ConfInt Double)])
-> [[Estimate ConfInt Double]]
-> [(Estimate ConfInt Double, Estimate ConfInt Double)]
forall a b. (a -> b) -> a -> b
$ ZipList [Estimate ConfInt Double] -> [[Estimate ConfInt Double]]
forall a. ZipList a -> [a]
getZipList
        (ZipList [Estimate ConfInt Double] -> [[Estimate ConfInt Double]])
-> ZipList [Estimate ConfInt Double] -> [[Estimate ConfInt Double]]
forall a b. (a -> b) -> a -> b
$ CL Double
-> Sample
-> [(Estimator, Bootstrap Vector Double)]
-> [Estimate ConfInt Double]
bootstrapBCA CL Double
confidence
            (Sample
 -> [(Estimator, Bootstrap Vector Double)]
 -> [Estimate ConfInt Double])
-> ZipList Sample
-> ZipList
     ([(Estimator, Bootstrap Vector Double)]
      -> [Estimate ConfInt Double])
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Sample] -> ZipList Sample
forall a. [a] -> ZipList a
ZipList [Sample]
vectors
            ZipList
  ([(Estimator, Bootstrap Vector Double)]
   -> [Estimate ConfInt Double])
-> ZipList [(Estimator, Bootstrap Vector Double)]
-> ZipList [Estimate ConfInt Double]
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> [[(Estimator, Bootstrap Vector Double)]]
-> ZipList [(Estimator, Bootstrap Vector Double)]
forall a. [a] -> ZipList a
ZipList [[(Estimator, Bootstrap Vector Double)]]
res

-------------------------------------------------------------------------------
-- Statistical analysis of benchmark iterations
-------------------------------------------------------------------------------

-- By default the fields are considered "scaled" fields that is
-- they scale by iterations. However in case of maxrss field it is
-- a max value across the experiment and does not scale by
-- iterations, in this case we just need to take a mean or max
-- without scaling.
isMaxField :: String -> Bool
isMaxField :: String -> Bool
isMaxField String
fieldName = (Char -> Char) -> ShowS
forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toLower String
fieldName String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"maxrss"

rescaleIteration :: Int -> [String] -> ([Double], [Double]) -> [Double]
rescaleIteration :: Int -> [String] -> ([Double], [Double]) -> [Double]
rescaleIteration Int
idx [String]
rcols ([Double]
pvals, [Double]
vals) =
    let iter :: Double
iter = [Double]
pvals [Double] -> Int -> Double
forall a. [a] -> Int -> a
!! Int
idx
    in ((Double -> Double) -> Double -> Double)
-> [Double -> Double] -> [Double] -> [Double]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
($) (((Double -> Double -> Double) -> Double -> Double)
-> [Double -> Double -> Double] -> [Double -> Double]
forall a b. (a -> b) -> [a] -> [b]
map ((Double -> Double -> Double) -> Double -> Double -> Double
forall a b. (a -> b) -> a -> b
$ Double
iter) [Double -> Double -> Double]
foldFields) [Double]
vals

    where

    getMeanOrMax :: String -> a -> a -> a
getMeanOrMax String
fname a
i a
val =
        if String -> Bool
isMaxField String
fname
        then a
val
        else a
val a -> a -> a
forall a. Fractional a => a -> a -> a
/ a
i

    foldFields :: [Double -> Double -> Double]
foldFields = (String -> Double -> Double -> Double)
-> [String] -> [Double -> Double -> Double]
forall a b. (a -> b) -> [a] -> [b]
map String -> Double -> Double -> Double
forall a. Fractional a => String -> a -> a -> a
getMeanOrMax [String]
rcols

data AnalyzedField = AnalyzedField
    { AnalyzedField -> Double
analyzedMean       :: !Double
    , AnalyzedField -> Double
analyzedStdDev     :: !Double

    , AnalyzedField -> Double
analyzedMedian     :: !Double
    , AnalyzedField -> Outliers
analyzedOutliers   :: !Outliers
    , AnalyzedField -> OutlierVariance
analyzedOutlierVar :: !OutlierVariance
    , AnalyzedField -> (Sample, Sample)
analyzedKDE        :: !(U.Vector Double, U.Vector Double)

    , AnalyzedField -> Maybe (Estimate ConfInt Double)
analyzedRegCoeff   :: Maybe (Estimate ConfInt Double)
    , AnalyzedField -> Maybe (Estimate ConfInt Double)
analyzedRegRSq     :: Maybe (Estimate ConfInt Double)
    } deriving Int -> AnalyzedField -> ShowS
[AnalyzedField] -> ShowS
AnalyzedField -> String
(Int -> AnalyzedField -> ShowS)
-> (AnalyzedField -> String)
-> ([AnalyzedField] -> ShowS)
-> Show AnalyzedField
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [AnalyzedField] -> ShowS
$cshowList :: [AnalyzedField] -> ShowS
show :: AnalyzedField -> String
$cshow :: AnalyzedField -> String
showsPrec :: Int -> AnalyzedField -> ShowS
$cshowsPrec :: Int -> AnalyzedField -> ShowS
Show

-- | The statistical estimator used to arrive at a single value for a
-- benchmark when samples from multiple experiments are available.
--
-- @since 0.2.0
data Estimator =
      Median        -- ^ Report the median, outliers and outlier variance using
                    -- box-plot method. This is the most robust indicator
                    -- with respect to outliers when successive runs of
                    -- benchmarks are compared.
    | Mean          -- ^ Report the mean and the standard deviation from the
                    -- mean. This is less robust than median but more precise.
    | Regression    -- ^ Report the coefficient of regression, discarding the
                    -- constant factor, arrived at by linear regression using
                    -- ordinary least square method.  The R-square
                    -- goodness-of-fit estimate is also reported.  It works
                    -- better when larger number of samples are taken.  This
                    -- cannot be used when the number of samples is less than
                    -- 2, in that case a mean value is reported instead.
    deriving (Estimator -> Estimator -> Bool
(Estimator -> Estimator -> Bool)
-> (Estimator -> Estimator -> Bool) -> Eq Estimator
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Estimator -> Estimator -> Bool
$c/= :: Estimator -> Estimator -> Bool
== :: Estimator -> Estimator -> Bool
$c== :: Estimator -> Estimator -> Bool
Eq, Int -> Estimator -> ShowS
[Estimator] -> ShowS
Estimator -> String
(Int -> Estimator -> ShowS)
-> (Estimator -> String)
-> ([Estimator] -> ShowS)
-> Show Estimator
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Estimator] -> ShowS
$cshowList :: [Estimator] -> ShowS
show :: Estimator -> String
$cshow :: Estimator -> String
showsPrec :: Int -> Estimator -> ShowS
$cshowsPrec :: Int -> Estimator -> ShowS
Show, ReadPrec [Estimator]
ReadPrec Estimator
Int -> ReadS Estimator
ReadS [Estimator]
(Int -> ReadS Estimator)
-> ReadS [Estimator]
-> ReadPrec Estimator
-> ReadPrec [Estimator]
-> Read Estimator
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [Estimator]
$creadListPrec :: ReadPrec [Estimator]
readPrec :: ReadPrec Estimator
$creadPrec :: ReadPrec Estimator
readList :: ReadS [Estimator]
$creadList :: ReadS [Estimator]
readsPrec :: Int -> ReadS Estimator
$creadsPrec :: Int -> ReadS Estimator
Read)

getAnalyzedValue :: Estimator -> AnalyzedField -> Double
getAnalyzedValue :: Estimator -> AnalyzedField -> Double
getAnalyzedValue Estimator
estimator AnalyzedField{Double
Maybe (Estimate ConfInt Double)
(Sample, Sample)
OutlierVariance
Outliers
analyzedRegRSq :: Maybe (Estimate ConfInt Double)
analyzedRegCoeff :: Maybe (Estimate ConfInt Double)
analyzedKDE :: (Sample, Sample)
analyzedOutlierVar :: OutlierVariance
analyzedOutliers :: Outliers
analyzedMedian :: Double
analyzedStdDev :: Double
analyzedMean :: Double
analyzedRegRSq :: AnalyzedField -> Maybe (Estimate ConfInt Double)
analyzedRegCoeff :: AnalyzedField -> Maybe (Estimate ConfInt Double)
analyzedKDE :: AnalyzedField -> (Sample, Sample)
analyzedOutlierVar :: AnalyzedField -> OutlierVariance
analyzedOutliers :: AnalyzedField -> Outliers
analyzedMedian :: AnalyzedField -> Double
analyzedStdDev :: AnalyzedField -> Double
analyzedMean :: AnalyzedField -> Double
..} =
    case Estimator
estimator of
        Estimator
Median -> Double
analyzedMedian
        Estimator
Mean -> Double
analyzedMean
        Estimator
Regression ->
            case Maybe (Estimate ConfInt Double)
analyzedRegCoeff of
                Maybe (Estimate ConfInt Double)
Nothing -> Double
analyzedMean
                Just Estimate ConfInt Double
x -> Estimate ConfInt Double -> Double
forall (e :: * -> *) a. Estimate e a -> a
estPoint Estimate ConfInt Double
x

-- | Perform an analysis of a measurement.
analyzeBenchmark :: GenIO
                 -> [String]
                 -> [String]
                 -> [([Double], [Double])]
                 -> IO [AnalyzedField]
analyzeBenchmark :: GenIO
-> [String]
-> [String]
-> [([Double], [Double])]
-> IO [AnalyzedField]
analyzeBenchmark GenIO
randGen [String]
pcols [String]
rcols [([Double], [Double])]
samples = do
    let sampleCnt :: Int
sampleCnt = [([Double], [Double])] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [([Double], [Double])]
samples
        i :: Int
i = Int -> Maybe Int -> Int
forall a. a -> Maybe a -> a
fromMaybe (String -> Int
forall a. HasCallStack => String -> a
error String
"bug") (Maybe Int -> Int) -> Maybe Int -> Int
forall a b. (a -> b) -> a -> b
$ String -> [String] -> Maybe Int
forall a. Eq a => a -> [a] -> Maybe Int
elemIndex String
"iters" [String]
pcols
        vectors :: [Sample]
vectors = ([Double] -> Sample) -> [[Double]] -> [Sample]
forall a b. (a -> b) -> [a] -> [b]
map [Double] -> Sample
forall a. Unbox a => [a] -> Vector a
U.fromList
            ([[Double]] -> [Sample]) -> [[Double]] -> [Sample]
forall a b. (a -> b) -> a -> b
$ [[Double]] -> [[Double]]
forall a. [[a]] -> [[a]]
transpose
            ([[Double]] -> [[Double]]) -> [[Double]] -> [[Double]]
forall a b. (a -> b) -> a -> b
$ (([Double], [Double]) -> [Double])
-> [([Double], [Double])] -> [[Double]]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> [String] -> ([Double], [Double]) -> [Double]
rescaleIteration Int
i [String]
rcols) [([Double], [Double])]
samples

    ([Maybe (Estimate ConfInt Double)]
coeffs, [Maybe (Estimate ConfInt Double)]
r2s) <-
        -- olsRegress fails if there are fewer samples than predictors
        if Bool
useRegression Bool -> Bool -> Bool
&& [([Double], [Double])] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [([Double], [Double])]
samples Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= ([String] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [String]
pcols Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
        then do
            let f :: Maybe (a, a) -> (Maybe a, Maybe a)
f (Just (a
x, a
y)) = (a -> Maybe a
forall a. a -> Maybe a
Just a
x, a -> Maybe a
forall a. a -> Maybe a
Just a
y)
                f Maybe (a, a)
Nothing = (Maybe a
forall a. Maybe a
Nothing, Maybe a
forall a. Maybe a
Nothing)
            ([Maybe (Estimate ConfInt Double, Estimate ConfInt Double)]
 -> ([Maybe (Estimate ConfInt Double)],
     [Maybe (Estimate ConfInt Double)]))
-> IO [Maybe (Estimate ConfInt Double, Estimate ConfInt Double)]
-> IO
     ([Maybe (Estimate ConfInt Double)],
      [Maybe (Estimate ConfInt Double)])
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ([(Maybe (Estimate ConfInt Double),
  Maybe (Estimate ConfInt Double))]
-> ([Maybe (Estimate ConfInt Double)],
    [Maybe (Estimate ConfInt Double)])
forall a b. [(a, b)] -> ([a], [b])
unzip ([(Maybe (Estimate ConfInt Double),
   Maybe (Estimate ConfInt Double))]
 -> ([Maybe (Estimate ConfInt Double)],
     [Maybe (Estimate ConfInt Double)]))
-> ([Maybe (Estimate ConfInt Double, Estimate ConfInt Double)]
    -> [(Maybe (Estimate ConfInt Double),
         Maybe (Estimate ConfInt Double))])
-> [Maybe (Estimate ConfInt Double, Estimate ConfInt Double)]
-> ([Maybe (Estimate ConfInt Double)],
    [Maybe (Estimate ConfInt Double)])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Maybe (Estimate ConfInt Double, Estimate ConfInt Double)
 -> (Maybe (Estimate ConfInt Double),
     Maybe (Estimate ConfInt Double)))
-> [Maybe (Estimate ConfInt Double, Estimate ConfInt Double)]
-> [(Maybe (Estimate ConfInt Double),
     Maybe (Estimate ConfInt Double))]
forall a b. (a -> b) -> [a] -> [b]
map Maybe (Estimate ConfInt Double, Estimate ConfInt Double)
-> (Maybe (Estimate ConfInt Double),
    Maybe (Estimate ConfInt Double))
forall a a. Maybe (a, a) -> (Maybe a, Maybe a)
f) (IO [Maybe (Estimate ConfInt Double, Estimate ConfInt Double)]
 -> IO
      ([Maybe (Estimate ConfInt Double)],
       [Maybe (Estimate ConfInt Double)]))
-> IO [Maybe (Estimate ConfInt Double, Estimate ConfInt Double)]
-> IO
     ([Maybe (Estimate ConfInt Double)],
      [Maybe (Estimate ConfInt Double)])
forall a b. (a -> b) -> a -> b
$ GenIO
-> Int
-> [String]
-> [([Double], [Double])]
-> IO [Maybe (Estimate ConfInt Double, Estimate ConfInt Double)]
regress GenIO
randGen Int
i [String]
rcols [([Double], [Double])]
samples
        else do
            let n :: Int
n = [String] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [String]
rcols
            ([Maybe (Estimate ConfInt Double)],
 [Maybe (Estimate ConfInt Double)])
-> IO
     ([Maybe (Estimate ConfInt Double)],
      [Maybe (Estimate ConfInt Double)])
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
-> Maybe (Estimate ConfInt Double)
-> [Maybe (Estimate ConfInt Double)]
forall a. Int -> a -> [a]
replicate Int
n Maybe (Estimate ConfInt Double)
forall a. Maybe a
Nothing, Int
-> Maybe (Estimate ConfInt Double)
-> [Maybe (Estimate ConfInt Double)]
forall a. Int -> a -> [a]
replicate Int
n Maybe (Estimate ConfInt Double)
forall a. Maybe a
Nothing)

    ([Double]
means, [Double]
devs) <-
        if Bool
useBootstrap Bool -> Bool -> Bool
&& [([Double], [Double])] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [([Double], [Double])]
samples Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
3
        then do
            ([Estimate ConfInt Double]
ms, [Estimate ConfInt Double]
ds) <- ([(Estimate ConfInt Double, Estimate ConfInt Double)]
 -> ([Estimate ConfInt Double], [Estimate ConfInt Double]))
-> IO [(Estimate ConfInt Double, Estimate ConfInt Double)]
-> IO ([Estimate ConfInt Double], [Estimate ConfInt Double])
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [(Estimate ConfInt Double, Estimate ConfInt Double)]
-> ([Estimate ConfInt Double], [Estimate ConfInt Double])
forall a b. [(a, b)] -> ([a], [b])
unzip (IO [(Estimate ConfInt Double, Estimate ConfInt Double)]
 -> IO ([Estimate ConfInt Double], [Estimate ConfInt Double]))
-> IO [(Estimate ConfInt Double, Estimate ConfInt Double)]
-> IO ([Estimate ConfInt Double], [Estimate ConfInt Double])
forall a b. (a -> b) -> a -> b
$ GenIO
-> [Sample]
-> IO [(Estimate ConfInt Double, Estimate ConfInt Double)]
estimateMeanAndStdDev GenIO
randGen [Sample]
vectors
            ([Double], [Double]) -> IO ([Double], [Double])
forall (m :: * -> *) a. Monad m => a -> m a
return ((Estimate ConfInt Double -> Double)
-> [Estimate ConfInt Double] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map Estimate ConfInt Double -> Double
forall (e :: * -> *) a. Estimate e a -> a
estPoint [Estimate ConfInt Double]
ms, (Estimate ConfInt Double -> Double)
-> [Estimate ConfInt Double] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map Estimate ConfInt Double -> Double
forall (e :: * -> *) a. Estimate e a -> a
estPoint [Estimate ConfInt Double]
ds)
        else do
            -- Even for max fields (e.g. maxrss) we take the mean
            let ms :: [Double]
ms = (Sample -> Double) -> [Sample] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map Sample -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
mean [Sample]
vectors
                ds :: [Double]
ds = (Sample -> Double) -> [Sample] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map Sample -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
stdDev [Sample]
vectors
            ([Double], [Double]) -> IO ([Double], [Double])
forall (m :: * -> *) a. Monad m => a -> m a
return ([Double]
ms, [Double]
ds)

    let len :: Int
len = Sample -> Int
forall a. Unbox a => Vector a -> Int
U.length (Sample -> Int) -> Sample -> Int
forall a b. (a -> b) -> a -> b
$ [Sample] -> Sample
forall a. [a] -> a
head [Sample]
vectors
        median :: Sample -> Double
median Sample
v = (Sample -> Sample
sort Sample
v) Sample -> Int -> Double
forall a. Unbox a => Vector a -> Int -> a
U.! (Int
len Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
2)
        medians :: [Double]
medians = (Sample -> Double) -> [Sample] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map Sample -> Double
median [Sample]
vectors
        outliers :: [Outliers]
outliers = ZipList Outliers -> [Outliers]
forall a. ZipList a -> [a]
getZipList (ZipList Outliers -> [Outliers]) -> ZipList Outliers -> [Outliers]
forall a b. (a -> b) -> a -> b
$ Sample -> Outliers
classifyOutliers (Sample -> Outliers) -> ZipList Sample -> ZipList Outliers
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Sample] -> ZipList Sample
forall a. [a] -> ZipList a
ZipList [Sample]
vectors
        outlierVars :: [OutlierVariance]
outlierVars = ZipList OutlierVariance -> [OutlierVariance]
forall a. ZipList a -> [a]
getZipList
                (ZipList OutlierVariance -> [OutlierVariance])
-> ZipList OutlierVariance -> [OutlierVariance]
forall a b. (a -> b) -> a -> b
$ Double -> Double -> Double -> OutlierVariance
outlierVariance
                    (Double -> Double -> Double -> OutlierVariance)
-> ZipList Double -> ZipList (Double -> Double -> OutlierVariance)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Double] -> ZipList Double
forall a. [a] -> ZipList a
ZipList [Double]
means
                    ZipList (Double -> Double -> OutlierVariance)
-> ZipList Double -> ZipList (Double -> OutlierVariance)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> [Double] -> ZipList Double
forall a. [a] -> ZipList a
ZipList [Double]
devs
                    ZipList (Double -> OutlierVariance)
-> ZipList Double -> ZipList OutlierVariance
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Double -> ZipList Double
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
sampleCnt)
        kdes :: [(Sample, Sample)]
kdes = (Sample -> (Sample, Sample)) -> [Sample] -> [(Sample, Sample)]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Sample -> (Sample, Sample)
forall (v :: * -> *).
(Vector v CD, Vector v Double, Vector v Int) =>
Int -> v Double -> (v Double, v Double)
kde Int
128) [Sample]
vectors

    [AnalyzedField] -> IO [AnalyzedField]
forall (m :: * -> *) a. Monad m => a -> m a
return ([AnalyzedField] -> IO [AnalyzedField])
-> [AnalyzedField] -> IO [AnalyzedField]
forall a b. (a -> b) -> a -> b
$ ZipList AnalyzedField -> [AnalyzedField]
forall a. ZipList a -> [a]
getZipList (ZipList AnalyzedField -> [AnalyzedField])
-> ZipList AnalyzedField -> [AnalyzedField]
forall a b. (a -> b) -> a -> b
$ Double
-> Double
-> Double
-> Outliers
-> OutlierVariance
-> (Sample, Sample)
-> Maybe (Estimate ConfInt Double)
-> Maybe (Estimate ConfInt Double)
-> AnalyzedField
AnalyzedField
        (Double
 -> Double
 -> Double
 -> Outliers
 -> OutlierVariance
 -> (Sample, Sample)
 -> Maybe (Estimate ConfInt Double)
 -> Maybe (Estimate ConfInt Double)
 -> AnalyzedField)
-> ZipList Double
-> ZipList
     (Double
      -> Double
      -> Outliers
      -> OutlierVariance
      -> (Sample, Sample)
      -> Maybe (Estimate ConfInt Double)
      -> Maybe (Estimate ConfInt Double)
      -> AnalyzedField)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Double] -> ZipList Double
forall a. [a] -> ZipList a
ZipList [Double]
means
        ZipList
  (Double
   -> Double
   -> Outliers
   -> OutlierVariance
   -> (Sample, Sample)
   -> Maybe (Estimate ConfInt Double)
   -> Maybe (Estimate ConfInt Double)
   -> AnalyzedField)
-> ZipList Double
-> ZipList
     (Double
      -> Outliers
      -> OutlierVariance
      -> (Sample, Sample)
      -> Maybe (Estimate ConfInt Double)
      -> Maybe (Estimate ConfInt Double)
      -> AnalyzedField)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> [Double] -> ZipList Double
forall a. [a] -> ZipList a
ZipList [Double]
devs

        ZipList
  (Double
   -> Outliers
   -> OutlierVariance
   -> (Sample, Sample)
   -> Maybe (Estimate ConfInt Double)
   -> Maybe (Estimate ConfInt Double)
   -> AnalyzedField)
-> ZipList Double
-> ZipList
     (Outliers
      -> OutlierVariance
      -> (Sample, Sample)
      -> Maybe (Estimate ConfInt Double)
      -> Maybe (Estimate ConfInt Double)
      -> AnalyzedField)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> [Double] -> ZipList Double
forall a. [a] -> ZipList a
ZipList [Double]
medians
        ZipList
  (Outliers
   -> OutlierVariance
   -> (Sample, Sample)
   -> Maybe (Estimate ConfInt Double)
   -> Maybe (Estimate ConfInt Double)
   -> AnalyzedField)
-> ZipList Outliers
-> ZipList
     (OutlierVariance
      -> (Sample, Sample)
      -> Maybe (Estimate ConfInt Double)
      -> Maybe (Estimate ConfInt Double)
      -> AnalyzedField)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> [Outliers] -> ZipList Outliers
forall a. [a] -> ZipList a
ZipList [Outliers]
outliers
        ZipList
  (OutlierVariance
   -> (Sample, Sample)
   -> Maybe (Estimate ConfInt Double)
   -> Maybe (Estimate ConfInt Double)
   -> AnalyzedField)
-> ZipList OutlierVariance
-> ZipList
     ((Sample, Sample)
      -> Maybe (Estimate ConfInt Double)
      -> Maybe (Estimate ConfInt Double)
      -> AnalyzedField)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> [OutlierVariance] -> ZipList OutlierVariance
forall a. [a] -> ZipList a
ZipList [OutlierVariance]
outlierVars
        ZipList
  ((Sample, Sample)
   -> Maybe (Estimate ConfInt Double)
   -> Maybe (Estimate ConfInt Double)
   -> AnalyzedField)
-> ZipList (Sample, Sample)
-> ZipList
     (Maybe (Estimate ConfInt Double)
      -> Maybe (Estimate ConfInt Double) -> AnalyzedField)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> [(Sample, Sample)] -> ZipList (Sample, Sample)
forall a. [a] -> ZipList a
ZipList [(Sample, Sample)]
kdes

        ZipList
  (Maybe (Estimate ConfInt Double)
   -> Maybe (Estimate ConfInt Double) -> AnalyzedField)
-> ZipList (Maybe (Estimate ConfInt Double))
-> ZipList (Maybe (Estimate ConfInt Double) -> AnalyzedField)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> [Maybe (Estimate ConfInt Double)]
-> ZipList (Maybe (Estimate ConfInt Double))
forall a. [a] -> ZipList a
ZipList [Maybe (Estimate ConfInt Double)]
coeffs
        ZipList (Maybe (Estimate ConfInt Double) -> AnalyzedField)
-> ZipList (Maybe (Estimate ConfInt Double))
-> ZipList AnalyzedField
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> [Maybe (Estimate ConfInt Double)]
-> ZipList (Maybe (Estimate ConfInt Double))
forall a. [a] -> ZipList a
ZipList [Maybe (Estimate ConfInt Double)]
r2s

-- predictor matrix
data BenchmarkIterMatrix = BenchmarkIterMatrix
    { BenchmarkIterMatrix -> [String]
iterPredColNames  :: ![String]  -- predictor column names
    , BenchmarkIterMatrix -> [String]
iterRespColNames  :: ![String]  -- responder column names
    -- (Benchmark, [(predictor columns, responder columns)])
    , BenchmarkIterMatrix -> [(String, [([Double], [Double])])]
iterRowValues :: ![(String, [([Double], [Double])])]
    } deriving Int -> BenchmarkIterMatrix -> ShowS
[BenchmarkIterMatrix] -> ShowS
BenchmarkIterMatrix -> String
(Int -> BenchmarkIterMatrix -> ShowS)
-> (BenchmarkIterMatrix -> String)
-> ([BenchmarkIterMatrix] -> ShowS)
-> Show BenchmarkIterMatrix
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [BenchmarkIterMatrix] -> ShowS
$cshowList :: [BenchmarkIterMatrix] -> ShowS
show :: BenchmarkIterMatrix -> String
$cshow :: BenchmarkIterMatrix -> String
showsPrec :: Int -> BenchmarkIterMatrix -> ShowS
$cshowsPrec :: Int -> BenchmarkIterMatrix -> ShowS
Show

-- Stored in row major order
data BenchmarkMatrix = BenchmarkMatrix
    { BenchmarkMatrix -> [String]
colNames  :: ![String]
    , BenchmarkMatrix -> [(String, [AnalyzedField])]
rowValues :: ![(String, [AnalyzedField])] -- (Benchmark, columns)
    } deriving Int -> BenchmarkMatrix -> ShowS
[BenchmarkMatrix] -> ShowS
BenchmarkMatrix -> String
(Int -> BenchmarkMatrix -> ShowS)
-> (BenchmarkMatrix -> String)
-> ([BenchmarkMatrix] -> ShowS)
-> Show BenchmarkMatrix
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [BenchmarkMatrix] -> ShowS
$cshowList :: [BenchmarkMatrix] -> ShowS
show :: BenchmarkMatrix -> String
$cshow :: BenchmarkMatrix -> String
showsPrec :: Int -> BenchmarkMatrix -> ShowS
$cshowsPrec :: Int -> BenchmarkMatrix -> ShowS
Show

foldBenchmark :: BenchmarkIterMatrix -> IO BenchmarkMatrix
foldBenchmark :: BenchmarkIterMatrix -> IO BenchmarkMatrix
foldBenchmark BenchmarkIterMatrix{[String]
[(String, [([Double], [Double])])]
iterRowValues :: [(String, [([Double], [Double])])]
iterRespColNames :: [String]
iterPredColNames :: [String]
iterRowValues :: BenchmarkIterMatrix -> [(String, [([Double], [Double])])]
iterRespColNames :: BenchmarkIterMatrix -> [String]
iterPredColNames :: BenchmarkIterMatrix -> [String]
..} = do
    Gen RealWorld
randGen <- IO (Gen RealWorld)
IO GenIO
createSystemRandom
    [(String, [AnalyzedField])]
rows <- ((String, [([Double], [Double])]) -> IO (String, [AnalyzedField]))
-> [(String, [([Double], [Double])])]
-> IO [(String, [AnalyzedField])]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (Gen RealWorld
-> (String, [([Double], [Double])]) -> IO (String, [AnalyzedField])
foldIters Gen RealWorld
randGen) [(String, [([Double], [Double])])]
iterRowValues
    BenchmarkMatrix -> IO BenchmarkMatrix
forall (m :: * -> *) a. Monad m => a -> m a
return (BenchmarkMatrix -> IO BenchmarkMatrix)
-> BenchmarkMatrix -> IO BenchmarkMatrix
forall a b. (a -> b) -> a -> b
$ BenchmarkMatrix :: [String] -> [(String, [AnalyzedField])] -> BenchmarkMatrix
BenchmarkMatrix
        { colNames :: [String]
colNames = [String]
iterRespColNames
        , rowValues :: [(String, [AnalyzedField])]
rowValues = [(String, [AnalyzedField])]
rows
        }

    where

    foldIters :: Gen RealWorld
-> (String, [([Double], [Double])]) -> IO (String, [AnalyzedField])
foldIters Gen RealWorld
randGen (String
name, [([Double], [Double])]
vals) = do
            [AnalyzedField]
vals' <- GenIO
-> [String]
-> [String]
-> [([Double], [Double])]
-> IO [AnalyzedField]
analyzeBenchmark Gen RealWorld
GenIO
randGen [String]
iterPredColNames
                                      [String]
iterRespColNames [([Double], [Double])]
vals
            (String, [AnalyzedField]) -> IO (String, [AnalyzedField])
forall (m :: * -> *) a. Monad m => a -> m a
return (String
name, [AnalyzedField]
vals')

-- take top samples
-- XXX take equivalent iterations across multiple groups
filterSamples :: BenchmarkIterMatrix -> BenchmarkIterMatrix
filterSamples :: BenchmarkIterMatrix -> BenchmarkIterMatrix
filterSamples BenchmarkIterMatrix
matrix =
    BenchmarkIterMatrix
matrix
        {-
        {
          iterRowValues = map filterIters iterRowValues
        }

    where

    iterIndex = fromMaybe undefined
        $ elemIndex "iters" (map (map toLower) iterPredColNames)
    nivcswIndex = fromMaybe undefined
        $ elemIndex "nivcsw" (map (map toLower) iterPredColNames)
    filterIters (name, vals) =
        let vals'' = take 50 $ reverse $ sortBy (comparing ((!! iterIndex) .  fst)) vals
        let vals' = filter (\(x,_) -> x !! nivcswIndex < 10) vals
            vals'' =
                if null vals'
                then trace "null after filter" vals
                else vals'
        in (name, vals'')
        -}