{-# LANGUAGE FlexibleInstances      #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE GADTs                  #-}
-- | 'TDigest' postprocessing functions.
--
-- These are re-exported from "Data.TDigest" module.
--
module Data.TDigest.Postprocess.Internal (
    -- * Histogram
    HasHistogram (..),
    HistBin (..),
    histogramFromCentroids,
    -- * Quantiles
    quantile,
    -- * Mean & variance
    --
    -- | As we have "full" histogram, we can calculate other statistical
    -- variables.
    mean,
    variance,
    -- * CDF
    cdf,
    -- * Debug
    validateHistogram,
    -- * Affine - internal
    Affine (..),
    ) where

import Data.Foldable         (toList, traverse_)
import Data.Foldable1        (foldMap1)
import Data.Functor.Compose  (Compose (..))
import Data.Functor.Identity (Identity (..))
import Data.List.NonEmpty    (NonEmpty (..), nonEmpty)
import Data.Proxy            (Proxy (..))
import Data.Semigroup        (Semigroup (..))
import Prelude ()
import Prelude.Compat

import qualified Data.List.NonEmpty as NE

import Data.TDigest.Internal

-------------------------------------------------------------------------------
-- Histogram
-------------------------------------------------------------------------------

-- | Histogram bin
data HistBin = HistBin
    { HistBin -> Weight
hbMin       :: !Mean    -- ^ lower bound
    , HistBin -> Weight
hbMax       :: !Mean    -- ^ upper bound
    , HistBin -> Weight
hbValue     :: !Mean    -- ^ original value: @(mi + ma) / 2@
    , HistBin -> Weight
hbWeight    :: !Weight  -- ^ weight ("area" of the bar)
    , HistBin -> Weight
hbCumWeight :: !Weight  -- ^ weight from the right, excludes this bin
    }
  deriving (Int -> HistBin -> ShowS
[HistBin] -> ShowS
HistBin -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [HistBin] -> ShowS
$cshowList :: [HistBin] -> ShowS
show :: HistBin -> String
$cshow :: HistBin -> String
showsPrec :: Int -> HistBin -> ShowS
$cshowsPrec :: Int -> HistBin -> ShowS
Show)

-- | Types from which we can extract histogram.
class Affine f => HasHistogram a f | a -> f where
    histogram   :: a -> f (NonEmpty HistBin)
    totalWeight :: a -> Weight

instance (HistBin ~ e) => HasHistogram (NonEmpty HistBin) Identity where
    histogram :: NonEmpty HistBin -> Identity (NonEmpty HistBin)
histogram = forall a. a -> Identity a
Identity
    totalWeight :: NonEmpty HistBin -> Weight
totalWeight = HistBin -> Weight
tw forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. NonEmpty a -> a
NE.last where
        tw :: HistBin -> Weight
tw HistBin
hb =  HistBin -> Weight
hbWeight HistBin
hb forall a. Num a => a -> a -> a
+ HistBin -> Weight
hbCumWeight HistBin
hb

instance (HistBin ~ e) => HasHistogram [HistBin] Maybe where
    histogram :: [HistBin] -> Maybe (NonEmpty HistBin)
histogram = forall a. [a] -> Maybe (NonEmpty a)
nonEmpty
    totalWeight :: [HistBin] -> Weight
totalWeight = forall (t :: * -> *) b a. Affine t => b -> (a -> b) -> t a -> b
affine Weight
0 forall a (f :: * -> *). HasHistogram a f => a -> Weight
totalWeight forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a (f :: * -> *).
HasHistogram a f =>
a -> f (NonEmpty HistBin)
histogram

-- | Histogram from centroids
histogramFromCentroids :: NonEmpty Centroid -> NonEmpty HistBin
histogramFromCentroids :: NonEmpty Centroid -> NonEmpty HistBin
histogramFromCentroids = NonEmpty Centroid -> NonEmpty HistBin
make
  where
    make :: NonEmpty Centroid -> NonEmpty HistBin
    -- one
    make :: NonEmpty Centroid -> NonEmpty HistBin
make ((Weight
x, Weight
w) :| []) = Weight -> Weight -> Weight -> Weight -> Weight -> HistBin
HistBin Weight
x Weight
x Weight
x Weight
w Weight
0 forall a. a -> [a] -> NonEmpty a
:| []
    -- first
    make (c1 :: Centroid
c1@(Weight
x1, Weight
w1) :| rest :: [Centroid]
rest@((Weight
x2, Weight
_) : [Centroid]
_))
        = Weight -> Weight -> Weight -> Weight -> Weight -> HistBin
HistBin Weight
x1 (forall {a}. Fractional a => a -> a -> a
mid Weight
x1 Weight
x2) Weight
x1 Weight
w1 Weight
0 forall a. a -> [a] -> NonEmpty a
:| Centroid -> Weight -> [Centroid] -> [HistBin]
iter Centroid
c1 Weight
w1 [Centroid]
rest

    -- zero
    iter :: (Mean, Weight) -> Weight -> [(Mean, Weight)] -> [HistBin]
    iter :: Centroid -> Weight -> [Centroid] -> [HistBin]
iter Centroid
_ Weight
_ [] = []
    -- middle
    iter (Weight
x0, Weight
_) Weight
t (c1 :: Centroid
c1@(Weight
x1, Weight
w1) : rest :: [Centroid]
rest@((Weight
x2, Weight
_) : [Centroid]
_))
        = Weight -> Weight -> Weight -> Weight -> Weight -> HistBin
HistBin (forall {a}. Fractional a => a -> a -> a
mid Weight
x0 Weight
x1) (forall {a}. Fractional a => a -> a -> a
mid Weight
x1 Weight
x2) Weight
x1 Weight
w1 Weight
tforall a. a -> [a] -> [a]
: Centroid -> Weight -> [Centroid] -> [HistBin]
iter Centroid
c1 (Weight
t forall a. Num a => a -> a -> a
+ Weight
w1) [Centroid]
rest
    -- last
    iter (Weight
x0, Weight
_) Weight
t [(Weight
x1, Weight
w1)]
        = [Weight -> Weight -> Weight -> Weight -> Weight -> HistBin
HistBin (forall {a}. Fractional a => a -> a -> a
mid Weight
x0 Weight
x1) Weight
x1 Weight
x1 Weight
w1 Weight
t]

    mid :: a -> a -> a
mid a
a a
b = (a
a forall a. Num a => a -> a -> a
+ a
b) forall {a}. Fractional a => a -> a -> a
/ a
2

-------------------------------------------------------------------------------
-- Quantile
-------------------------------------------------------------------------------

-- | Quantile from the histogram.
quantile :: Double -> Weight -> NonEmpty HistBin -> Double
quantile :: Weight -> Weight -> NonEmpty HistBin -> Weight
quantile Weight
q Weight
tw = [HistBin] -> Weight
iter forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) a. Foldable t => t a -> [a]
toList
  where
    q' :: Weight
q' = Weight
q forall a. Num a => a -> a -> a
* Weight
tw

    iter :: [HistBin] -> Weight
iter []                          = forall a. HasCallStack => String -> a
error String
"quantile: empty NonEmpty"
    iter [HistBin Weight
a Weight
b Weight
_ Weight
w Weight
t]           = Weight
a forall a. Num a => a -> a -> a
+ (Weight
b forall a. Num a => a -> a -> a
- Weight
a) forall a. Num a => a -> a -> a
* (Weight
q' forall a. Num a => a -> a -> a
- Weight
t) forall {a}. Fractional a => a -> a -> a
/ Weight
w
    iter (HistBin Weight
a Weight
b Weight
_ Weight
w Weight
t : [HistBin]
rest)
        | {- t < q' && -} Weight
q' forall a. Ord a => a -> a -> Bool
< Weight
t forall a. Num a => a -> a -> a
+ Weight
w = Weight
a forall a. Num a => a -> a -> a
+ (Weight
b forall a. Num a => a -> a -> a
- Weight
a) forall a. Num a => a -> a -> a
* (Weight
q' forall a. Num a => a -> a -> a
- Weight
t) forall {a}. Fractional a => a -> a -> a
/ Weight
w
        | Bool
otherwise                  = [HistBin] -> Weight
iter [HistBin]
rest

-------------------------------------------------------------------------------
-- Mean
-------------------------------------------------------------------------------

-- | Mean from the histogram.
mean :: NonEmpty HistBin -> Double
mean :: NonEmpty HistBin -> Weight
mean = Mean' -> Weight
getMean forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) m a.
(Foldable1 t, Semigroup m) =>
(a -> m) -> t a -> m
foldMap1 HistBin -> Mean'
toMean
  where
    toMean :: HistBin -> Mean'
toMean (HistBin Weight
_ Weight
_ Weight
x Weight
w Weight
_) = Weight -> Weight -> Mean'
Mean Weight
w Weight
x

data Mean' = Mean !Double !Double

getMean :: Mean' -> Double
getMean :: Mean' -> Weight
getMean (Mean Weight
_ Weight
x) = Weight
x

instance Semigroup Mean' where
    Mean Weight
w1 Weight
x1 <> :: Mean' -> Mean' -> Mean'
<> Mean Weight
w2 Weight
x2 = Weight -> Weight -> Mean'
Mean Weight
w Weight
x
      where
        w :: Weight
w = Weight
w1 forall a. Num a => a -> a -> a
+ Weight
w2
        x :: Weight
x = (Weight
x1 forall a. Num a => a -> a -> a
* Weight
w1 forall a. Num a => a -> a -> a
+ Weight
x2 forall a. Num a => a -> a -> a
* Weight
w2) forall {a}. Fractional a => a -> a -> a
/ Weight
w

-- | Variance from the histogram.
variance :: NonEmpty HistBin -> Double
variance :: NonEmpty HistBin -> Weight
variance = Variance -> Weight
getVariance forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) m a.
(Foldable1 t, Semigroup m) =>
(a -> m) -> t a -> m
foldMap1 HistBin -> Variance
toVariance
  where
    toVariance :: HistBin -> Variance
toVariance (HistBin Weight
_ Weight
_ Weight
x Weight
w Weight
_) = Weight -> Weight -> Weight -> Variance
Variance Weight
w Weight
x Weight
0

data Variance = Variance !Double !Double !Double

getVariance :: Variance -> Double
getVariance :: Variance -> Weight
getVariance (Variance Weight
w Weight
_ Weight
d) = Weight
d forall {a}. Fractional a => a -> a -> a
/ (Weight
w forall a. Num a => a -> a -> a
- Weight
1)

-- See: https://izbicki.me/blog/gausian-distributions-are-monoids
instance Semigroup Variance where
    Variance Weight
w1 Weight
x1 Weight
d1 <> :: Variance -> Variance -> Variance
<> Variance Weight
w2 Weight
x2 Weight
d2 = Weight -> Weight -> Weight -> Variance
Variance Weight
w Weight
x Weight
d
      where
        w :: Weight
w = Weight
w1 forall a. Num a => a -> a -> a
+ Weight
w2
        x :: Weight
x = (Weight
x1 forall a. Num a => a -> a -> a
* Weight
w1 forall a. Num a => a -> a -> a
+ Weight
x2 forall a. Num a => a -> a -> a
* Weight
w2) forall {a}. Fractional a => a -> a -> a
/ Weight
w
        d :: Weight
d = Weight
d1 forall a. Num a => a -> a -> a
+ Weight
d2 forall a. Num a => a -> a -> a
+ Weight
w1 forall a. Num a => a -> a -> a
* (Weight
x1 forall a. Num a => a -> a -> a
* Weight
x1) forall a. Num a => a -> a -> a
+ Weight
w2 forall a. Num a => a -> a -> a
* (Weight
x2 forall a. Num a => a -> a -> a
* Weight
x2) forall a. Num a => a -> a -> a
- Weight
w forall a. Num a => a -> a -> a
* Weight
x forall a. Num a => a -> a -> a
* Weight
x

-------------------------------------------------------------------------------
-- CDF - cumulative distribution function
-------------------------------------------------------------------------------

-- | Cumulative distribution function.
cdf :: Double
    -> Double  -- ^ total weight
    -> [HistBin] -> Double
cdf :: Weight -> Weight -> [HistBin] -> Weight
cdf Weight
x Weight
n = [HistBin] -> Weight
iter
  where
    iter :: [HistBin] -> Weight
iter [] = Weight
1
    iter (HistBin Weight
a Weight
b Weight
_ Weight
w Weight
t : [HistBin]
rest)
        | Weight
x forall a. Ord a => a -> a -> Bool
< Weight
a     = Weight
0
        | Weight
x forall a. Ord a => a -> a -> Bool
< Weight
b     = (Weight
t forall a. Num a => a -> a -> a
+ Weight
w forall a. Num a => a -> a -> a
* (Weight
x forall a. Num a => a -> a -> a
- Weight
a) forall {a}. Fractional a => a -> a -> a
/ (Weight
b forall a. Num a => a -> a -> a
- Weight
a)) forall {a}. Fractional a => a -> a -> a
/ Weight
n
        | Bool
otherwise = [HistBin] -> Weight
iter [HistBin]
rest

-------------------------------------------------------------------------------
-- Debug
-------------------------------------------------------------------------------

-- | Validate that list of 'HistBin' is a valid "histogram".
validateHistogram :: Foldable f => f HistBin -> Either String (f HistBin)
validateHistogram :: forall (f :: * -> *).
Foldable f =>
f HistBin -> Either String (f HistBin)
validateHistogram f HistBin
bs = forall (t :: * -> *) (f :: * -> *) a b.
(Foldable t, Applicative f) =>
(a -> f b) -> t a -> f ()
traverse_ (HistBin, HistBin) -> Either String ()
validPair (forall {b}. [b] -> [(b, b)]
pairs forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t a -> [a]
toList f HistBin
bs) forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (f :: * -> *) a. Applicative f => a -> f a
pure f HistBin
bs
  where
    validPair :: (HistBin, HistBin) -> Either String ()
validPair (lb :: HistBin
lb@(HistBin Weight
_ Weight
lmax Weight
_ Weight
lwt Weight
lcw), rb :: HistBin
rb@(HistBin Weight
rmin Weight
_ Weight
_ Weight
_ Weight
rcw)) = do
        Bool -> String -> Either String ()
check (Weight
lmax forall a. Eq a => a -> a -> Bool
== Weight
rmin)     String
"gap between bins"
        Bool -> String -> Either String ()
check (Weight
lcw forall a. Num a => a -> a -> a
+ Weight
lwt forall a. Eq a => a -> a -> Bool
== Weight
rcw) String
"mismatch in weight cumulation"
      where
        check :: Bool -> String -> Either String ()
check Bool
False String
err = forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ String
err forall a. [a] -> [a] -> [a]
++ String
" " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show (HistBin
lb, HistBin
rb)
        check Bool
True  String
_   = forall a b. b -> Either a b
Right ()
    pairs :: [b] -> [(b, b)]
pairs [b]
xs = forall a b. [a] -> [b] -> [(a, b)]
zip [b]
xs forall a b. (a -> b) -> a -> b
$ forall a. [a] -> [a]
tail [b]
xs

-------------------------------------------------------------------------------
-- Affine
-------------------------------------------------------------------------------

-- | Affine containers, i.e. containing at most 1 element
--
-- This class doesn't have 'traverse' analogie
-- as it would require using 'Pointed' which is disputed type class.
--
-- > traverseAff :: Pointed f => (a -> f b) -> t a -> f (t b)
--
class Traversable t => Affine t where
    -- | Like `foldMap`
    affine :: b -> (a -> b) -> t a -> b
    affine b
x a -> b
f = forall (t :: * -> *) a. Affine t => a -> t a -> a
fromAffine b
x forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap a -> b
f

    fromAffine :: a -> t a -> a
    fromAffine a
x = forall (t :: * -> *) b a. Affine t => b -> (a -> b) -> t a -> b
affine a
x forall a. a -> a
id

    {-# MINIMAL fromAffine | affine #-}

instance Affine Identity    where fromAffine :: forall a. a -> Identity a -> a
fromAffine a
_ = forall a. Identity a -> a
runIdentity
instance Affine Maybe       where affine :: forall b a. b -> (a -> b) -> Maybe a -> b
affine = forall b a. b -> (a -> b) -> Maybe a -> b
maybe
instance Affine Proxy       where affine :: forall b a. b -> (a -> b) -> Proxy a -> b
affine b
x a -> b
_ Proxy a
_ = b
x

-- | Composition of 'Affine' containers is 'Affine'
instance (Affine f, Affine g) => Affine (Compose f g) where
    affine :: forall b a. b -> (a -> b) -> Compose f g a -> b
affine b
x a -> b
f (Compose f (g a)
c) = forall (t :: * -> *) b a. Affine t => b -> (a -> b) -> t a -> b
affine b
x (forall (t :: * -> *) b a. Affine t => b -> (a -> b) -> t a -> b
affine b
x a -> b
f) f (g a)
c