{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE GADTs #-}
module Data.TDigest.Postprocess.Internal (
HasHistogram (..),
HistBin (..),
histogramFromCentroids,
quantile,
mean,
variance,
cdf,
validateHistogram,
Affine (..),
) where
import Data.Foldable (toList, traverse_)
import Data.Functor.Compose (Compose (..))
import Data.Functor.Identity (Identity (..))
import Data.List.NonEmpty (NonEmpty (..), nonEmpty)
import Data.Proxy (Proxy (..))
import Data.Semigroup (Semigroup (..))
import Data.Semigroup.Foldable (foldMap1)
import Prelude ()
import Prelude.Compat
import qualified Data.List.NonEmpty as NE
import Data.TDigest.Internal
data HistBin = HistBin
{ HistBin -> Mean
hbMin :: !Mean
, HistBin -> Mean
hbMax :: !Mean
, HistBin -> Mean
hbValue :: !Mean
, HistBin -> Mean
hbWeight :: !Weight
, HistBin -> Mean
hbCumWeight :: !Weight
}
deriving (Int -> HistBin -> ShowS
[HistBin] -> ShowS
HistBin -> String
(Int -> HistBin -> ShowS)
-> (HistBin -> String) -> ([HistBin] -> ShowS) -> Show HistBin
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [HistBin] -> ShowS
$cshowList :: [HistBin] -> ShowS
show :: HistBin -> String
$cshow :: HistBin -> String
showsPrec :: Int -> HistBin -> ShowS
$cshowsPrec :: Int -> HistBin -> ShowS
Show)
class Affine f => HasHistogram a f | a -> f where
histogram :: a -> f (NonEmpty HistBin)
totalWeight :: a -> Weight
instance (HistBin ~ e) => HasHistogram (NonEmpty HistBin) Identity where
histogram :: NonEmpty HistBin -> Identity (NonEmpty HistBin)
histogram = NonEmpty HistBin -> Identity (NonEmpty HistBin)
forall a. a -> Identity a
Identity
totalWeight :: NonEmpty HistBin -> Mean
totalWeight = HistBin -> Mean
tw (HistBin -> Mean)
-> (NonEmpty HistBin -> HistBin) -> NonEmpty HistBin -> Mean
forall b c a. (b -> c) -> (a -> b) -> a -> c
. NonEmpty HistBin -> HistBin
forall a. NonEmpty a -> a
NE.last where
tw :: HistBin -> Mean
tw HistBin
hb = HistBin -> Mean
hbWeight HistBin
hb Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
+ HistBin -> Mean
hbCumWeight HistBin
hb
instance (HistBin ~ e) => HasHistogram [HistBin] Maybe where
histogram :: [HistBin] -> Maybe (NonEmpty HistBin)
histogram = [HistBin] -> Maybe (NonEmpty HistBin)
forall a. [a] -> Maybe (NonEmpty a)
nonEmpty
totalWeight :: [HistBin] -> Mean
totalWeight = Mean
-> (NonEmpty HistBin -> Mean) -> Maybe (NonEmpty HistBin) -> Mean
forall (t :: * -> *) b a. Affine t => b -> (a -> b) -> t a -> b
affine Mean
0 NonEmpty HistBin -> Mean
forall a (f :: * -> *). HasHistogram a f => a -> Mean
totalWeight (Maybe (NonEmpty HistBin) -> Mean)
-> ([HistBin] -> Maybe (NonEmpty HistBin)) -> [HistBin] -> Mean
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [HistBin] -> Maybe (NonEmpty HistBin)
forall a (f :: * -> *).
HasHistogram a f =>
a -> f (NonEmpty HistBin)
histogram
histogramFromCentroids :: NonEmpty Centroid -> NonEmpty HistBin
histogramFromCentroids :: NonEmpty Centroid -> NonEmpty HistBin
histogramFromCentroids = NonEmpty Centroid -> NonEmpty HistBin
make
where
make :: NonEmpty Centroid -> NonEmpty HistBin
make :: NonEmpty Centroid -> NonEmpty HistBin
make ((Mean
x, Mean
w) :| []) = Mean -> Mean -> Mean -> Mean -> Mean -> HistBin
HistBin Mean
x Mean
x Mean
x Mean
w Mean
0 HistBin -> [HistBin] -> NonEmpty HistBin
forall a. a -> [a] -> NonEmpty a
:| []
make (c1 :: Centroid
c1@(Mean
x1, Mean
w1) :| rest :: [Centroid]
rest@((Mean
x2, Mean
_) : [Centroid]
_))
= Mean -> Mean -> Mean -> Mean -> Mean -> HistBin
HistBin Mean
x1 (Mean -> Mean -> Mean
forall a. Fractional a => a -> a -> a
mid Mean
x1 Mean
x2) Mean
x1 Mean
w1 Mean
0 HistBin -> [HistBin] -> NonEmpty HistBin
forall a. a -> [a] -> NonEmpty a
:| Centroid -> Mean -> [Centroid] -> [HistBin]
iter Centroid
c1 Mean
w1 [Centroid]
rest
iter :: (Mean, Weight) -> Weight -> [(Mean, Weight)] -> [HistBin]
iter :: Centroid -> Mean -> [Centroid] -> [HistBin]
iter Centroid
_ Mean
_ [] = []
iter (Mean
x0, Mean
_) Mean
t (c1 :: Centroid
c1@(Mean
x1, Mean
w1) : rest :: [Centroid]
rest@((Mean
x2, Mean
_) : [Centroid]
_))
= Mean -> Mean -> Mean -> Mean -> Mean -> HistBin
HistBin (Mean -> Mean -> Mean
forall a. Fractional a => a -> a -> a
mid Mean
x0 Mean
x1) (Mean -> Mean -> Mean
forall a. Fractional a => a -> a -> a
mid Mean
x1 Mean
x2) Mean
x1 Mean
w1 Mean
tHistBin -> [HistBin] -> [HistBin]
forall a. a -> [a] -> [a]
: Centroid -> Mean -> [Centroid] -> [HistBin]
iter Centroid
c1 (Mean
t Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
+ Mean
w1) [Centroid]
rest
iter (Mean
x0, Mean
_) Mean
t [(Mean
x1, Mean
w1)]
= [Mean -> Mean -> Mean -> Mean -> Mean -> HistBin
HistBin (Mean -> Mean -> Mean
forall a. Fractional a => a -> a -> a
mid Mean
x0 Mean
x1) Mean
x1 Mean
x1 Mean
w1 Mean
t]
mid :: a -> a -> a
mid a
a a
b = (a
a a -> a -> a
forall a. Num a => a -> a -> a
+ a
b) a -> a -> a
forall a. Fractional a => a -> a -> a
/ a
2
quantile :: Double -> Weight -> NonEmpty HistBin -> Double
quantile :: Mean -> Mean -> NonEmpty HistBin -> Mean
quantile Mean
q Mean
tw = [HistBin] -> Mean
iter ([HistBin] -> Mean)
-> (NonEmpty HistBin -> [HistBin]) -> NonEmpty HistBin -> Mean
forall b c a. (b -> c) -> (a -> b) -> a -> c
. NonEmpty HistBin -> [HistBin]
forall (t :: * -> *) a. Foldable t => t a -> [a]
toList
where
q' :: Mean
q' = Mean
q Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
* Mean
tw
iter :: [HistBin] -> Mean
iter [] = String -> Mean
forall a. HasCallStack => String -> a
error String
"quantile: empty NonEmpty"
iter [HistBin Mean
a Mean
b Mean
_ Mean
w Mean
t] = Mean
a Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
+ (Mean
b Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
- Mean
a) Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
* (Mean
q' Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
- Mean
t) Mean -> Mean -> Mean
forall a. Fractional a => a -> a -> a
/ Mean
w
iter (HistBin Mean
a Mean
b Mean
_ Mean
w Mean
t : [HistBin]
rest)
| Mean
q' Mean -> Mean -> Bool
forall a. Ord a => a -> a -> Bool
< Mean
t Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
+ Mean
w = Mean
a Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
+ (Mean
b Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
- Mean
a) Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
* (Mean
q' Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
- Mean
t) Mean -> Mean -> Mean
forall a. Fractional a => a -> a -> a
/ Mean
w
| Bool
otherwise = [HistBin] -> Mean
iter [HistBin]
rest
mean :: NonEmpty HistBin -> Double
mean :: NonEmpty HistBin -> Mean
mean = Mean' -> Mean
getMean (Mean' -> Mean)
-> (NonEmpty HistBin -> Mean') -> NonEmpty HistBin -> Mean
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (HistBin -> Mean') -> NonEmpty HistBin -> Mean'
forall (t :: * -> *) m a.
(Foldable1 t, Semigroup m) =>
(a -> m) -> t a -> m
foldMap1 HistBin -> Mean'
toMean
where
toMean :: HistBin -> Mean'
toMean (HistBin Mean
_ Mean
_ Mean
x Mean
w Mean
_) = Mean -> Mean -> Mean'
Mean Mean
w Mean
x
data Mean' = Mean !Double !Double
getMean :: Mean' -> Double
getMean :: Mean' -> Mean
getMean (Mean Mean
_ Mean
x) = Mean
x
instance Semigroup Mean' where
Mean Mean
w1 Mean
x1 <> :: Mean' -> Mean' -> Mean'
<> Mean Mean
w2 Mean
x2 = Mean -> Mean -> Mean'
Mean Mean
w Mean
x
where
w :: Mean
w = Mean
w1 Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
+ Mean
w2
x :: Mean
x = (Mean
x1 Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
* Mean
w1 Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
+ Mean
x2 Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
* Mean
w2) Mean -> Mean -> Mean
forall a. Fractional a => a -> a -> a
/ Mean
w
variance :: NonEmpty HistBin -> Double
variance :: NonEmpty HistBin -> Mean
variance = Variance -> Mean
getVariance (Variance -> Mean)
-> (NonEmpty HistBin -> Variance) -> NonEmpty HistBin -> Mean
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (HistBin -> Variance) -> NonEmpty HistBin -> Variance
forall (t :: * -> *) m a.
(Foldable1 t, Semigroup m) =>
(a -> m) -> t a -> m
foldMap1 HistBin -> Variance
toVariance
where
toVariance :: HistBin -> Variance
toVariance (HistBin Mean
_ Mean
_ Mean
x Mean
w Mean
_) = Mean -> Mean -> Mean -> Variance
Variance Mean
w Mean
x Mean
0
data Variance = Variance !Double !Double !Double
getVariance :: Variance -> Double
getVariance :: Variance -> Mean
getVariance (Variance Mean
w Mean
_ Mean
d) = Mean
d Mean -> Mean -> Mean
forall a. Fractional a => a -> a -> a
/ (Mean
w Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
- Mean
1)
instance Semigroup Variance where
Variance Mean
w1 Mean
x1 Mean
d1 <> :: Variance -> Variance -> Variance
<> Variance Mean
w2 Mean
x2 Mean
d2 = Mean -> Mean -> Mean -> Variance
Variance Mean
w Mean
x Mean
d
where
w :: Mean
w = Mean
w1 Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
+ Mean
w2
x :: Mean
x = (Mean
x1 Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
* Mean
w1 Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
+ Mean
x2 Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
* Mean
w2) Mean -> Mean -> Mean
forall a. Fractional a => a -> a -> a
/ Mean
w
d :: Mean
d = Mean
d1 Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
+ Mean
d2 Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
+ Mean
w1 Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
* (Mean
x1 Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
* Mean
x1) Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
+ Mean
w2 Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
* (Mean
x2 Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
* Mean
x2) Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
- Mean
w Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
* Mean
x Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
* Mean
x
cdf :: Double
-> Double
-> [HistBin] -> Double
cdf :: Mean -> Mean -> [HistBin] -> Mean
cdf Mean
x Mean
n = [HistBin] -> Mean
iter
where
iter :: [HistBin] -> Mean
iter [] = Mean
1
iter (HistBin Mean
a Mean
b Mean
_ Mean
w Mean
t : [HistBin]
rest)
| Mean
x Mean -> Mean -> Bool
forall a. Ord a => a -> a -> Bool
< Mean
a = Mean
0
| Mean
x Mean -> Mean -> Bool
forall a. Ord a => a -> a -> Bool
< Mean
b = (Mean
t Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
+ Mean
w Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
* (Mean
x Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
- Mean
a) Mean -> Mean -> Mean
forall a. Fractional a => a -> a -> a
/ (Mean
b Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
- Mean
a)) Mean -> Mean -> Mean
forall a. Fractional a => a -> a -> a
/ Mean
n
| Bool
otherwise = [HistBin] -> Mean
iter [HistBin]
rest
validateHistogram :: Foldable f => f HistBin -> Either String (f HistBin)
validateHistogram :: f HistBin -> Either String (f HistBin)
validateHistogram f HistBin
bs = ((HistBin, HistBin) -> Either String ())
-> [(HistBin, HistBin)] -> Either String ()
forall (t :: * -> *) (f :: * -> *) a b.
(Foldable t, Applicative f) =>
(a -> f b) -> t a -> f ()
traverse_ (HistBin, HistBin) -> Either String ()
validPair ([HistBin] -> [(HistBin, HistBin)]
forall b. [b] -> [(b, b)]
pairs ([HistBin] -> [(HistBin, HistBin)])
-> [HistBin] -> [(HistBin, HistBin)]
forall a b. (a -> b) -> a -> b
$ f HistBin -> [HistBin]
forall (t :: * -> *) a. Foldable t => t a -> [a]
toList f HistBin
bs) Either String ()
-> Either String (f HistBin) -> Either String (f HistBin)
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> f HistBin -> Either String (f HistBin)
forall (f :: * -> *) a. Applicative f => a -> f a
pure f HistBin
bs
where
validPair :: (HistBin, HistBin) -> Either String ()
validPair (lb :: HistBin
lb@(HistBin Mean
_ Mean
lmax Mean
_ Mean
lwt Mean
lcw), rb :: HistBin
rb@(HistBin Mean
rmin Mean
_ Mean
_ Mean
_ Mean
rcw)) = do
Bool -> String -> Either String ()
check (Mean
lmax Mean -> Mean -> Bool
forall a. Eq a => a -> a -> Bool
== Mean
rmin) String
"gap between bins"
Bool -> String -> Either String ()
check (Mean
lcw Mean -> Mean -> Mean
forall a. Num a => a -> a -> a
+ Mean
lwt Mean -> Mean -> Bool
forall a. Eq a => a -> a -> Bool
== Mean
rcw) String
"mismatch in weight cumulation"
where
check :: Bool -> String -> Either String ()
check Bool
False String
err = String -> Either String ()
forall a b. a -> Either a b
Left (String -> Either String ()) -> String -> Either String ()
forall a b. (a -> b) -> a -> b
$ String
err String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" " String -> ShowS
forall a. [a] -> [a] -> [a]
++ (HistBin, HistBin) -> String
forall a. Show a => a -> String
show (HistBin
lb, HistBin
rb)
check Bool
True String
_ = () -> Either String ()
forall a b. b -> Either a b
Right ()
pairs :: [b] -> [(b, b)]
pairs [b]
xs = [b] -> [b] -> [(b, b)]
forall a b. [a] -> [b] -> [(a, b)]
zip [b]
xs ([b] -> [(b, b)]) -> [b] -> [(b, b)]
forall a b. (a -> b) -> a -> b
$ [b] -> [b]
forall a. [a] -> [a]
tail [b]
xs
class Traversable t => Affine t where
affine :: b -> (a -> b) -> t a -> b
affine b
x a -> b
f = b -> t b -> b
forall (t :: * -> *) a. Affine t => a -> t a -> a
fromAffine b
x (t b -> b) -> (t a -> t b) -> t a -> b
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a -> b) -> t a -> t b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap a -> b
f
fromAffine :: a -> t a -> a
fromAffine a
x = a -> (a -> a) -> t a -> a
forall (t :: * -> *) b a. Affine t => b -> (a -> b) -> t a -> b
affine a
x a -> a
forall a. a -> a
id
{-# MINIMAL fromAffine | affine #-}
instance Affine Identity where fromAffine :: a -> Identity a -> a
fromAffine a
_ = Identity a -> a
forall a. Identity a -> a
runIdentity
instance Affine Maybe where affine :: b -> (a -> b) -> Maybe a -> b
affine = b -> (a -> b) -> Maybe a -> b
forall b a. b -> (a -> b) -> Maybe a -> b
maybe
instance Affine Proxy where affine :: b -> (a -> b) -> Proxy a -> b
affine b
x a -> b
_ Proxy a
_ = b
x
instance (Affine f, Affine g) => Affine (Compose f g) where
affine :: b -> (a -> b) -> Compose f g a -> b
affine b
x a -> b
f (Compose f (g a)
c) = b -> (g a -> b) -> f (g a) -> b
forall (t :: * -> *) b a. Affine t => b -> (a -> b) -> t a -> b
affine b
x (b -> (a -> b) -> g a -> b
forall (t :: * -> *) b a. Affine t => b -> (a -> b) -> t a -> b
affine b
x a -> b
f) f (g a)
c