{-# LANGUAGE DataKinds #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Data.TDigest.Tree.Internal where
import Control.DeepSeq (NFData (..))
import Control.Monad.ST (ST, runST)
import Data.Binary (Binary (..))
import Data.Either (isRight)
import Data.Foldable (toList)
import Data.List.Compat (foldl')
import Data.List.NonEmpty (nonEmpty)
import Data.Ord (comparing)
import Data.Proxy (Proxy (..))
import Data.Semigroup (Semigroup (..))
import Data.Semigroup.Reducer (Reducer (..))
import GHC.TypeLits (KnownNat, Nat, natVal)
import Prelude ()
import Prelude.Compat
import qualified Data.Vector.Algorithms.Heap as VHeap
import qualified Data.Vector.Unboxed as VU
import qualified Data.Vector.Unboxed.Mutable as MVU
import Data.TDigest.Internal
import qualified Data.TDigest.Postprocess.Internal as PP
data TDigest (compression :: Nat)
= Node
{-# UNPACK #-} !Size
{-# UNPACK #-} !Mean
{-# UNPACK #-} !Weight
{-# UNPACK #-} !Weight
!(TDigest compression)
!(TDigest compression)
| Nil
deriving (Int -> TDigest compression -> ShowS
forall (compression :: Nat). Int -> TDigest compression -> ShowS
forall (compression :: Nat). [TDigest compression] -> ShowS
forall (compression :: Nat). TDigest compression -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TDigest compression] -> ShowS
$cshowList :: forall (compression :: Nat). [TDigest compression] -> ShowS
show :: TDigest compression -> String
$cshow :: forall (compression :: Nat). TDigest compression -> String
showsPrec :: Int -> TDigest compression -> ShowS
$cshowsPrec :: forall (compression :: Nat). Int -> TDigest compression -> ShowS
Show)
instance KnownNat comp => Semigroup (TDigest comp) where
<> :: TDigest comp -> TDigest comp -> TDigest comp
(<>) = forall (comp :: Nat).
KnownNat comp =>
TDigest comp -> TDigest comp -> TDigest comp
combineDigest
instance KnownNat comp => Reducer Double (TDigest comp) where
cons :: Double -> TDigest comp -> TDigest comp
cons = forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert
snoc :: TDigest comp -> Double -> TDigest comp
snoc = forall a b c. (a -> b -> c) -> b -> a -> c
flip forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert
unit :: Double -> TDigest comp
unit = forall (comp :: Nat). KnownNat comp => Double -> TDigest comp
singleton
instance KnownNat comp => Monoid (TDigest comp) where
mempty :: TDigest comp
mempty = forall (comp :: Nat). TDigest comp
emptyTDigest
mappend :: TDigest comp -> TDigest comp -> TDigest comp
mappend = forall (comp :: Nat).
KnownNat comp =>
TDigest comp -> TDigest comp -> TDigest comp
combineDigest
instance NFData (TDigest comp) where
rnf :: TDigest comp -> ()
rnf TDigest comp
x = TDigest comp
x seq :: forall a b. a -> b -> b
`seq` ()
instance KnownNat comp => Binary (TDigest comp) where
put :: TDigest comp -> Put
put = forall t. Binary t => t -> Put
put forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (comp :: Nat). TDigest comp -> [Centroid]
getCentroids
get :: Get (TDigest comp)
get = forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (forall a b c. (a -> b -> c) -> b -> a -> c
flip forall (comp :: Nat).
KnownNat comp =>
Centroid -> TDigest comp -> TDigest comp
insertCentroid) forall (comp :: Nat). TDigest comp
emptyTDigest forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Centroid] -> [Centroid]
lc forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall t. Binary t => Get t
get
where
lc :: [Centroid] -> [Centroid]
lc :: [Centroid] -> [Centroid]
lc = forall a. a -> a
id
instance PP.HasHistogram (TDigest comp) Maybe where
histogram :: TDigest comp -> Maybe (NonEmpty HistBin)
histogram = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap NonEmpty Centroid -> NonEmpty HistBin
PP.histogramFromCentroids forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> Maybe (NonEmpty a)
nonEmpty forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (comp :: Nat). TDigest comp -> [Centroid]
getCentroids
totalWeight :: TDigest comp -> Double
totalWeight = forall (comp :: Nat). TDigest comp -> Double
totalWeight
getCentroids :: TDigest comp -> [Centroid]
getCentroids :: forall (comp :: Nat). TDigest comp -> [Centroid]
getCentroids = (forall a b. (a -> b) -> a -> b
$ []) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall {compression :: Nat}.
TDigest compression -> [Centroid] -> [Centroid]
go
where
go :: TDigest compression -> [Centroid] -> [Centroid]
go TDigest compression
Nil = forall a. a -> a
id
go (Node Int
_ Double
x Double
w Double
_ TDigest compression
l TDigest compression
r) = TDigest compression -> [Centroid] -> [Centroid]
go TDigest compression
l forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Double
x,Double
w) forall a. a -> [a] -> [a]
: ) forall b c a. (b -> c) -> (a -> b) -> a -> c
. TDigest compression -> [Centroid] -> [Centroid]
go TDigest compression
r
totalWeight :: TDigest comp -> Weight
totalWeight :: forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
Nil = Double
0
totalWeight (Node Int
_ Double
_ Double
_ Double
tw TDigest comp
_ TDigest comp
_) = Double
tw
size :: TDigest comp -> Int
size :: forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
Nil = Int
0
size (Node Int
s Double
_ Double
_ Double
_ TDigest comp
_ TDigest comp
_) = Int
s
minimumValue :: TDigest comp -> Mean
minimumValue :: forall (comp :: Nat). TDigest comp -> Double
minimumValue = forall {compression :: Nat}.
Double -> TDigest compression -> Double
go Double
posInf
where
go :: Double -> TDigest compression -> Double
go Double
acc TDigest compression
Nil = Double
acc
go Double
_acc (Node Int
_ Double
x Double
_ Double
_ TDigest compression
l TDigest compression
_) = Double -> TDigest compression -> Double
go Double
x TDigest compression
l
maximumValue :: TDigest comp -> Mean
maximumValue :: forall (comp :: Nat). TDigest comp -> Double
maximumValue = forall {compression :: Nat}.
Double -> TDigest compression -> Double
go Double
negInf
where
go :: Double -> TDigest compression -> Double
go Double
acc TDigest compression
Nil = Double
acc
go Double
_acc (Node Int
_ Double
x Double
_ Double
_ TDigest compression
_ TDigest compression
r) = Double -> TDigest compression -> Double
go Double
x TDigest compression
r
emptyTDigest :: TDigest comp
emptyTDigest :: forall (comp :: Nat). TDigest comp
emptyTDigest = forall (comp :: Nat). TDigest comp
Nil
combineDigest
:: KnownNat comp
=> TDigest comp
-> TDigest comp
-> TDigest comp
combineDigest :: forall (comp :: Nat).
KnownNat comp =>
TDigest comp -> TDigest comp -> TDigest comp
combineDigest TDigest comp
a TDigest comp
Nil = TDigest comp
a
combineDigest TDigest comp
Nil TDigest comp
b = TDigest comp
b
combineDigest a :: TDigest comp
a@(Node Int
n Double
_ Double
_ Double
_ TDigest comp
_ TDigest comp
_) b :: TDigest comp
b@(Node Int
m Double
_ Double
_ Double
_ TDigest comp
_ TDigest comp
_)
| Int
n forall a. Ord a => a -> a -> Bool
< Int
m = forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
compress forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (forall a b c. (a -> b -> c) -> b -> a -> c
flip forall (comp :: Nat).
KnownNat comp =>
Centroid -> TDigest comp -> TDigest comp
insertCentroid) TDigest comp
b (forall (comp :: Nat). TDigest comp -> [Centroid]
getCentroids TDigest comp
a)
| Bool
otherwise = forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
compress forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (forall a b c. (a -> b -> c) -> b -> a -> c
flip forall (comp :: Nat).
KnownNat comp =>
Centroid -> TDigest comp -> TDigest comp
insertCentroid) TDigest comp
a (forall (comp :: Nat). TDigest comp -> [Centroid]
getCentroids TDigest comp
b)
insertCentroid
:: forall comp. KnownNat comp
=> Centroid
-> TDigest comp
-> TDigest comp
insertCentroid :: forall (comp :: Nat).
KnownNat comp =>
Centroid -> TDigest comp -> TDigest comp
insertCentroid (Double
x, Double
w) TDigest comp
Nil = forall (comp :: Nat). Double -> Double -> TDigest comp
singNode Double
x Double
w
insertCentroid (Double
mean, Double
weight) TDigest comp
td = Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go Double
0 Double
mean Double
weight Bool
False TDigest comp
td
where
n :: Weight
n :: Double
n = forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
td forall a. Num a => a -> a -> a
+ Double
weight
compression :: Double
compression :: Double
compression = forall a. Num a => Integer -> a
fromInteger forall a b. (a -> b) -> a -> b
$ forall (n :: Nat) (proxy :: Nat -> *).
KnownNat n =>
proxy n -> Integer
natVal (forall {k} (t :: k). Proxy t
Proxy :: Proxy comp)
go
:: Weight
-> Mean
-> Weight
-> Bool
-> TDigest comp
-> TDigest comp
go :: Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go Double
_ Double
newX Double
newW Bool
_ TDigest comp
Nil = forall (comp :: Nat). Double -> Double -> TDigest comp
singNode Double
newX Double
newW
go Double
cum Double
newX Double
newW Bool
e (Node Int
s Double
x Double
w Double
tw TDigest comp
l TDigest comp
r) = case forall a. Ord a => a -> a -> Ordering
compare Double
newX Double
x of
Ordering
EQ -> forall (compression :: Nat).
Int
-> Double
-> Double
-> Double
-> TDigest compression
-> TDigest compression
-> TDigest compression
Node Int
s Double
x (Double
w forall a. Num a => a -> a -> a
+ Double
newW) (Double
tw forall a. Num a => a -> a -> a
+ Double
newW) TDigest comp
l TDigest comp
r
Ordering
LT | Double
thr forall a. Ord a => a -> a -> Bool
<= Double
w -> forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceL Double
x Double
w (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go Double
cum Double
newX Double
newW Bool
e TDigest comp
l) TDigest comp
r
Ordering
GT | Double
thr forall a. Ord a => a -> a -> Bool
<= Double
w -> forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceR Double
x Double
w TDigest comp
l (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go (Double
cum forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
l forall a. Num a => a -> a -> a
+ Double
w) Double
newX Double
newW Bool
e TDigest comp
r)
Ordering
LT | Bool
e -> forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceL Double
x Double
w (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go Double
cum Double
newX Double
newW Bool
e TDigest comp
l) TDigest comp
r
Ordering
LT -> case TDigest comp
l of
TDigest comp
Nil -> case Maybe Double
mrw of
Maybe Double
Nothing -> forall (compression :: Nat).
Int
-> Double
-> Double
-> Double
-> TDigest compression
-> TDigest compression
-> TDigest compression
node' Int
s Double
nx Double
nw (Double
tw forall a. Num a => a -> a -> a
+ Double
newW) forall (comp :: Nat). TDigest comp
Nil TDigest comp
r
Just Double
rw -> forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceL Double
nx Double
nw (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go Double
cum Double
newX Double
rw Bool
True forall (comp :: Nat). TDigest comp
Nil) TDigest comp
r
Node {}
| Double
lmax forall a. Ord a => a -> a -> Bool
< Double
newX Bool -> Bool -> Bool
&& forall a. Num a => a -> a
abs (Double
newX forall a. Num a => a -> a -> a
- Double
x) forall a. Ord a => a -> a -> Bool
< forall a. Num a => a -> a
abs (Double
newX forall a. Num a => a -> a -> a
- Double
lmax) -> case Maybe Double
mrw of
Maybe Double
Nothing -> forall (compression :: Nat).
Int
-> Double
-> Double
-> Double
-> TDigest compression
-> TDigest compression
-> TDigest compression
node' Int
s Double
nx Double
nw (Double
tw forall a. Num a => a -> a -> a
+ Double
nw forall a. Num a => a -> a -> a
- Double
w) TDigest comp
l TDigest comp
r
Just Double
rw -> forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceL Double
nx Double
nw (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go Double
cum Double
newX Double
rw Bool
True TDigest comp
l) TDigest comp
r
| Bool
otherwise -> forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceL Double
x Double
w (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go Double
cum Double
newX Double
newW Bool
e TDigest comp
l) TDigest comp
r
where
lmax :: Double
lmax = forall (comp :: Nat). TDigest comp -> Double
maximumValue TDigest comp
l
Ordering
GT | Bool
e -> forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceR Double
x Double
w TDigest comp
l (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go (Double
cum forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
l forall a. Num a => a -> a -> a
+ Double
w) Double
newX Double
newW Bool
True TDigest comp
r)
Ordering
GT -> case TDigest comp
r of
TDigest comp
Nil -> case Maybe Double
mrw of
Maybe Double
Nothing -> forall (compression :: Nat).
Int
-> Double
-> Double
-> Double
-> TDigest compression
-> TDigest compression
-> TDigest compression
node' Int
s Double
nx Double
nw (Double
tw forall a. Num a => a -> a -> a
+ Double
newW) TDigest comp
l forall (comp :: Nat). TDigest comp
Nil
Just Double
rw -> forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceR Double
nx Double
nw TDigest comp
l (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go (Double
cum forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
l forall a. Num a => a -> a -> a
+ Double
nw) Double
newX Double
rw Bool
True forall (comp :: Nat). TDigest comp
Nil)
Node {}
| Double
rmin forall a. Ord a => a -> a -> Bool
> Double
newX Bool -> Bool -> Bool
&& forall a. Num a => a -> a
abs (Double
newX forall a. Num a => a -> a -> a
- Double
x) forall a. Ord a => a -> a -> Bool
< forall a. Num a => a -> a
abs (Double
newX forall a. Num a => a -> a -> a
- Double
rmin) -> case Maybe Double
mrw of
Maybe Double
Nothing -> forall (compression :: Nat).
Int
-> Double
-> Double
-> Double
-> TDigest compression
-> TDigest compression
-> TDigest compression
node' Int
s Double
nx Double
nw (Double
tw forall a. Num a => a -> a -> a
+ Double
newW) TDigest comp
l TDigest comp
r
Just Double
rw -> forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceR Double
nx Double
nw TDigest comp
l (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go (Double
cum forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
l forall a. Num a => a -> a -> a
+ Double
nw) Double
newX Double
rw Bool
True TDigest comp
r)
| Bool
otherwise -> forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceR Double
x Double
w TDigest comp
l (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go (Double
cum forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
l forall a. Num a => a -> a -> a
+ Double
w) Double
newX Double
newW Bool
e TDigest comp
r)
where
rmin :: Double
rmin = forall (comp :: Nat). TDigest comp -> Double
minimumValue TDigest comp
r
where
cum' :: Double
cum' = Double
cum forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
l
q :: Double
q = (Double
w forall a. Fractional a => a -> a -> a
/ Double
2 forall a. Num a => a -> a -> a
+ Double
cum') forall a. Fractional a => a -> a -> a
/ Double
n
thr :: Double
thr = Double -> Double -> Double -> Double
threshold Double
n Double
q Double
compression
dw :: Weight
mrw :: Maybe Weight
(Double
dw, Maybe Double
mrw) =
let diff :: Double
diff = forall a. Bool -> String -> a -> a
assert (Double
thr forall a. Ord a => a -> a -> Bool
> Double
w) String
"threshold should be larger than current node weight"
forall a b. (a -> b) -> a -> b
$ Double
w forall a. Num a => a -> a -> a
+ Double
newW forall a. Num a => a -> a -> a
- Double
thr
in if Double
diff forall a. Ord a => a -> a -> Bool
< Double
0
then (Double
newW, forall a. Maybe a
Nothing)
else (Double
thr forall a. Num a => a -> a -> a
- Double
w, forall a. a -> Maybe a
Just Double
diff)
(Double
nx, Double
nw) = Double -> Double -> Double -> Double -> Centroid
combinedCentroid Double
x Double
w Double
x Double
dw
node :: Mean -> Weight -> TDigest comp -> TDigest comp -> TDigest comp
node :: forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
l TDigest comp
r = forall (compression :: Nat).
Int
-> Double
-> Double
-> Double
-> TDigest compression
-> TDigest compression
-> TDigest compression
Node
(Int
1 forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
l forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
r)
Double
x Double
w
(Double
w forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
l forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
r)
TDigest comp
l TDigest comp
r
balanceR :: Mean -> Weight -> TDigest comp -> TDigest comp -> TDigest comp
balanceR :: forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceR Double
x Double
w TDigest comp
l TDigest comp
r
| forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
l forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
r forall a. Ord a => a -> a -> Bool
<= Int
1 = forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
l TDigest comp
r
| forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
r forall a. Ord a => a -> a -> Bool
> Int
balOmega forall a. Num a => a -> a -> a
* forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
l = case TDigest comp
r of
TDigest comp
Nil -> forall a. HasCallStack => String -> a
error String
"balanceR: impossible happened"
(Node Int
_ Double
rx Double
rw Double
_ TDigest comp
Nil TDigest comp
rr) ->
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
rx Double
rw (forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
l forall (comp :: Nat). TDigest comp
Nil) TDigest comp
rr
(Node Int
_ Double
rx Double
rw Double
_ TDigest comp
rl TDigest comp
rr)
| forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
rl forall a. Ord a => a -> a -> Bool
< Int
balAlpha forall a. Num a => a -> a -> a
* forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
rr ->
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
rx Double
rw (forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
l TDigest comp
rl) TDigest comp
rr
(Node Int
_ Double
rx Double
rw Double
_ (Node Int
_ Double
rlx Double
rlw Double
_ TDigest comp
rll TDigest comp
rlr) TDigest comp
rr) ->
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
rlx Double
rlw (forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
l TDigest comp
rll) (forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
rx Double
rw TDigest comp
rlr TDigest comp
rr)
| Bool
otherwise = forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
l TDigest comp
r
balanceL :: Mean -> Weight -> TDigest comp -> TDigest comp -> TDigest comp
balanceL :: forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceL Double
x Double
w TDigest comp
l TDigest comp
r
| forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
l forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
r forall a. Ord a => a -> a -> Bool
<= Int
1 = forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
l TDigest comp
r
| forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
l forall a. Ord a => a -> a -> Bool
> Int
balOmega forall a. Num a => a -> a -> a
* forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
r = case TDigest comp
l of
TDigest comp
Nil -> forall a. HasCallStack => String -> a
error String
"balanceL: impossible happened"
(Node Int
_ Double
lx Double
lw Double
_ TDigest comp
ll TDigest comp
Nil) ->
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
lx Double
lw TDigest comp
ll (forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w forall (comp :: Nat). TDigest comp
Nil TDigest comp
r)
(Node Int
_ Double
lx Double
lw Double
_ TDigest comp
ll TDigest comp
lr)
| forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
lr forall a. Ord a => a -> a -> Bool
< Int
balAlpha forall a. Num a => a -> a -> a
* forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
ll ->
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
lx Double
lw TDigest comp
ll (forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
lr TDigest comp
r)
(Node Int
_ Double
lx Double
lw Double
_ TDigest comp
ll (Node Int
_ Double
lrx Double
lrw Double
_ TDigest comp
lrl TDigest comp
lrr)) ->
forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
lrx Double
lrw (forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
lx Double
lw TDigest comp
ll TDigest comp
lrl) (forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
lrr TDigest comp
r)
| Bool
otherwise = forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
l TDigest comp
r
node' :: Int -> Mean -> Weight -> Weight -> TDigest comp -> TDigest comp -> TDigest comp
node' :: forall (compression :: Nat).
Int
-> Double
-> Double
-> Double
-> TDigest compression
-> TDigest compression
-> TDigest compression
node' = forall (compression :: Nat).
Int
-> Double
-> Double
-> Double
-> TDigest compression
-> TDigest compression
-> TDigest compression
Node
singNode :: Mean -> Weight -> TDigest comp
singNode :: forall (comp :: Nat). Double -> Double -> TDigest comp
singNode Double
x Double
w = forall (compression :: Nat).
Int
-> Double
-> Double
-> Double
-> TDigest compression
-> TDigest compression
-> TDigest compression
Node Int
1 Double
x Double
w Double
w forall (comp :: Nat). TDigest comp
Nil forall (comp :: Nat). TDigest comp
Nil
combinedCentroid
:: Mean -> Weight
-> Mean -> Weight
-> Centroid
combinedCentroid :: Double -> Double -> Double -> Double -> Centroid
combinedCentroid Double
x Double
w Double
x' Double
w' =
( (Double
x forall a. Num a => a -> a -> a
* Double
w forall a. Num a => a -> a -> a
+ Double
x' forall a. Num a => a -> a -> a
* Double
w') forall a. Fractional a => a -> a -> a
/ Double
w''
, Double
w''
)
where
w'' :: Double
w'' = Double
w forall a. Num a => a -> a -> a
+ Double
w'
threshold
:: Double
-> Double
-> Double
-> Double
threshold :: Double -> Double -> Double -> Double
threshold Double
n Double
q Double
compression = Double
4 forall a. Num a => a -> a -> a
* Double
n forall a. Num a => a -> a -> a
* Double
q forall a. Num a => a -> a -> a
* (Double
1 forall a. Num a => a -> a -> a
- Double
q) forall a. Fractional a => a -> a -> a
/ Double
compression
compress :: forall comp. KnownNat comp => TDigest comp -> TDigest comp
compress :: forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
compress TDigest comp
Nil = forall (comp :: Nat). TDigest comp
Nil
compress TDigest comp
td
| forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
td forall a. Ord a => a -> a -> Bool
> Int
relMaxSize forall a. Num a => a -> a -> a
* Int
compression Bool -> Bool -> Bool
&& forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
td forall a. Ord a => a -> a -> Bool
> Int
absMaxSize
= forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
forceCompress TDigest comp
td
| Bool
otherwise
= TDigest comp
td
where
compression :: Int
compression = forall a. Num a => Integer -> a
fromInteger forall a b. (a -> b) -> a -> b
$ forall (n :: Nat) (proxy :: Nat -> *).
KnownNat n =>
proxy n -> Integer
natVal (forall {k} (t :: k). Proxy t
Proxy :: Proxy comp)
forceCompress :: forall comp. KnownNat comp => TDigest comp -> TDigest comp
forceCompress :: forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
forceCompress TDigest comp
Nil = forall (comp :: Nat). TDigest comp
Nil
forceCompress TDigest comp
td =
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (forall a b c. (a -> b -> c) -> b -> a -> c
flip forall (comp :: Nat).
KnownNat comp =>
Centroid -> TDigest comp -> TDigest comp
insertCentroid) forall (comp :: Nat). TDigest comp
emptyTDigest forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a b. (a, b) -> a
fst forall a b. (a -> b) -> a -> b
$ forall a. Unbox a => Vector a -> [a]
VU.toList Vector (Centroid, Double)
centroids
where
centroids :: VU.Vector (Centroid, Double)
centroids :: Vector (Centroid, Double)
centroids = forall a. (forall s. ST s a) -> a
runST forall a b. (a -> b) -> a -> b
$ do
MVector s (Centroid, Double)
v <- forall (comp :: Nat) s.
KnownNat comp =>
TDigest comp -> ST s (MVector s (Centroid, Double))
toMVector TDigest comp
td
forall (m :: * -> *) (v :: * -> * -> *) e.
(PrimMonad m, MVector v e) =>
Comparison e -> v (PrimState m) e -> m ()
VHeap.sortBy (forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing forall a b. (a, b) -> b
snd) MVector s (Centroid, Double)
v
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
VU.unsafeFreeze MVector s (Centroid, Double)
v
toMVector
:: forall comp s. KnownNat comp
=> TDigest comp
-> ST s (VU.MVector s (Centroid, Double))
toMVector :: forall (comp :: Nat) s.
KnownNat comp =>
TDigest comp -> ST s (MVector s (Centroid, Double))
toMVector TDigest comp
td = do
MVector s (Centroid, Double)
v <- forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
MVU.new (forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
td)
(Int
i, Double
cum) <- forall {f :: * -> *} {compression :: Nat}.
PrimMonad f =>
MVector (PrimState f) (Centroid, Double)
-> Int -> Double -> TDigest compression -> f (Int, Double)
go MVector s (Centroid, Double)
v (Int
0 :: Int) (Double
0 :: Double) TDigest comp
td
forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ forall a. Bool -> String -> a -> a
assert (Int
i forall a. Eq a => a -> a -> Bool
== forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
td Bool -> Bool -> Bool
&& forall a. Num a => a -> a
abs (Double
cum forall a. Num a => a -> a -> a
- forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
td) forall a. Ord a => a -> a -> Bool
< Double
1e-6) String
"traversal in toMVector:" MVector s (Centroid, Double)
v
where
go :: MVector (PrimState f) (Centroid, Double)
-> Int -> Double -> TDigest compression -> f (Int, Double)
go MVector (PrimState f) (Centroid, Double)
_ Int
i Double
cum TDigest compression
Nil = forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int
i, Double
cum)
go MVector (PrimState f) (Centroid, Double)
v Int
i Double
cum (Node Int
_ Double
x Double
w Double
_ TDigest compression
l TDigest compression
r) = do
(Int
i', Double
cum') <- MVector (PrimState f) (Centroid, Double)
-> Int -> Double -> TDigest compression -> f (Int, Double)
go MVector (PrimState f) (Centroid, Double)
v Int
i Double
cum TDigest compression
l
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
MVU.unsafeWrite MVector (PrimState f) (Centroid, Double)
v Int
i' ((Double
x, Double
w), Double -> Double -> Double
space Double
w Double
cum')
MVector (PrimState f) (Centroid, Double)
-> Int -> Double -> TDigest compression -> f (Int, Double)
go MVector (PrimState f) (Centroid, Double)
v (Int
i' forall a. Num a => a -> a -> a
+ Int
1) (Double
cum' forall a. Num a => a -> a -> a
+ Double
w) TDigest compression
r
n :: Double
n = forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
td
compression :: Double
compression = forall a. Num a => Integer -> a
fromInteger forall a b. (a -> b) -> a -> b
$ forall (n :: Nat) (proxy :: Nat -> *).
KnownNat n =>
proxy n -> Integer
natVal (forall {k} (t :: k). Proxy t
Proxy :: Proxy comp)
space :: Double -> Double -> Double
space Double
w Double
cum = Double
thr forall a. Num a => a -> a -> a
- Double
w
where
q :: Double
q = (Double
w forall a. Fractional a => a -> a -> a
/ Double
2 forall a. Num a => a -> a -> a
+ Double
cum) forall a. Fractional a => a -> a -> a
/ Double
n
thr :: Double
thr = Double -> Double -> Double -> Double
threshold Double
n Double
q Double
compression
relMaxSize :: Int
relMaxSize :: Int
relMaxSize = Int
25
absMaxSize :: Int
absMaxSize :: Int
absMaxSize = Int
1000
balOmega :: Int
balOmega :: Int
balOmega = Int
3
balAlpha :: Int
balAlpha :: Int
balAlpha = Int
2
debugPrint :: TDigest comp -> IO ()
debugPrint :: forall (comp :: Nat). TDigest comp -> IO ()
debugPrint TDigest comp
td = forall {compression :: Nat}. Int -> TDigest compression -> IO ()
go Int
0 TDigest comp
td
where
go :: Int -> TDigest compression -> IO ()
go Int
i TDigest compression
Nil = String -> IO ()
putStrLn forall a b. (a -> b) -> a -> b
$ forall a. Int -> a -> [a]
replicate (Int
i forall a. Num a => a -> a -> a
* Int
3) Char
' ' forall a. [a] -> [a] -> [a]
++ String
"Nil"
go Int
i (Node Int
s Double
m Double
w Double
tw TDigest compression
l TDigest compression
r) = do
Int -> TDigest compression -> IO ()
go (Int
i forall a. Num a => a -> a -> a
+ Int
1) TDigest compression
l
String -> IO ()
putStrLn forall a b. (a -> b) -> a -> b
$ forall a. Int -> a -> [a]
replicate (Int
i forall a. Num a => a -> a -> a
* Int
3) Char
' ' forall a. [a] -> [a] -> [a]
++ String
"Node " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show (Int
s,Double
m,Double
w,Double
tw)
Int -> TDigest compression -> IO ()
go (Int
i forall a. Num a => a -> a -> a
+ Int
1) TDigest compression
r
valid :: TDigest comp -> Bool
valid :: forall (comp :: Nat). TDigest comp -> Bool
valid = forall a b. Either a b -> Bool
isRight forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (comp :: Nat). TDigest comp -> Either String (TDigest comp)
validate
validate :: TDigest comp -> Either String (TDigest comp)
validate :: forall (comp :: Nat). TDigest comp -> Either String (TDigest comp)
validate TDigest comp
td
| Bool -> Bool
not (forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all forall (comp :: Nat). TDigest comp -> Bool
sizeValid [TDigest comp]
centroids) = forall a b. a -> Either a b
Left String
"invalid sizes"
| Bool -> Bool
not (forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all forall (comp :: Nat). TDigest comp -> Bool
weightValid [TDigest comp]
centroids) = forall a b. a -> Either a b
Left String
"invalid weights"
| Bool -> Bool
not (forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all forall (comp :: Nat). TDigest comp -> Bool
orderValid [TDigest comp]
centroids) = forall a b. a -> Either a b
Left String
"invalid ordering"
| Bool -> Bool
not (forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all forall (comp :: Nat). TDigest comp -> Bool
balanced [TDigest comp]
centroids) = forall a b. a -> Either a b
Left String
"tree is ill-balanced"
| Bool
otherwise = forall a b. b -> Either a b
Right TDigest comp
td
where
centroids :: [TDigest comp]
centroids = forall {compression :: Nat}.
TDigest compression -> [TDigest compression]
goc TDigest comp
td
goc :: TDigest compression -> [TDigest compression]
goc TDigest compression
Nil = []
goc n :: TDigest compression
n@(Node Int
_ Double
_ Double
_ Double
_ TDigest compression
l TDigest compression
r) = TDigest compression
n forall a. a -> [a] -> [a]
: TDigest compression -> [TDigest compression]
goc TDigest compression
l forall a. [a] -> [a] -> [a]
++ TDigest compression -> [TDigest compression]
goc TDigest compression
r
sizeValid :: TDigest comp -> Bool
sizeValid TDigest comp
Nil = Bool
True
sizeValid (Node Int
s Double
_ Double
_ Double
_ TDigest comp
l TDigest comp
r) = Int
s forall a. Eq a => a -> a -> Bool
== forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
l forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
r forall a. Num a => a -> a -> a
+ Int
1
weightValid :: TDigest comp -> Bool
weightValid TDigest comp
Nil = Bool
True
weightValid (Node Int
_ Double
_ Double
w Double
tw TDigest comp
l TDigest comp
r) = Double -> Double -> Bool
eq Double
tw forall a b. (a -> b) -> a -> b
$ Double
w forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
l forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
r
orderValid :: TDigest compression -> Bool
orderValid TDigest compression
Nil = Bool
True
orderValid (Node Int
_ Double
_ Double
_ Double
_ TDigest compression
Nil TDigest compression
Nil) = Bool
True
orderValid (Node Int
_ Double
x Double
_ Double
_ (Node Int
_ Double
lx Double
_ Double
_ TDigest compression
_ TDigest compression
_) TDigest compression
Nil) = Double
lx forall a. Ord a => a -> a -> Bool
< Double
x
orderValid (Node Int
_ Double
x Double
_ Double
_ TDigest compression
Nil (Node Int
_ Double
rx Double
_ Double
_ TDigest compression
_ TDigest compression
_)) = Double
x forall a. Ord a => a -> a -> Bool
< Double
rx
orderValid (Node Int
_ Double
x Double
_ Double
_ (Node Int
_ Double
lx Double
_ Double
_ TDigest compression
_ TDigest compression
_) (Node Int
_ Double
rx Double
_ Double
_ TDigest compression
_ TDigest compression
_)) = Double
lx forall a. Ord a => a -> a -> Bool
< Double
x Bool -> Bool -> Bool
&& Double
x forall a. Ord a => a -> a -> Bool
< Double
rx
balanced :: TDigest comp -> Bool
balanced TDigest comp
Nil = Bool
True
balanced (Node Int
_ Double
_ Double
_ Double
_ TDigest comp
l TDigest comp
r) =
forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
l forall a. Ord a => a -> a -> Bool
<= forall a. Ord a => a -> a -> a
max Int
1 (Int
balOmega forall a. Num a => a -> a -> a
* forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
r) Bool -> Bool -> Bool
&&
forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
r forall a. Ord a => a -> a -> Bool
<= forall a. Ord a => a -> a -> a
max Int
1 (Int
balOmega forall a. Num a => a -> a -> a
* forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
l)
insert
:: KnownNat comp
=> Double
-> TDigest comp
-> TDigest comp
insert :: forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert Double
x = forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
compress forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert' Double
x
insert'
:: KnownNat comp
=> Double
-> TDigest comp
-> TDigest comp
insert' :: forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert' Double
x = forall (comp :: Nat).
KnownNat comp =>
Centroid -> TDigest comp -> TDigest comp
insertCentroid (Double
x, Double
1)
singleton :: KnownNat comp => Double -> TDigest comp
singleton :: forall (comp :: Nat). KnownNat comp => Double -> TDigest comp
singleton Double
x = forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert Double
x forall (comp :: Nat). TDigest comp
emptyTDigest
tdigest :: (Foldable f, KnownNat comp) => f Double -> TDigest comp
tdigest :: forall (f :: * -> *) (comp :: Nat).
(Foldable f, KnownNat comp) =>
f Double -> TDigest comp
tdigest = forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' forall {comp :: Nat} {t :: * -> *}.
(KnownNat comp, Foldable t) =>
TDigest comp -> t Double -> TDigest comp
insertChunk forall (comp :: Nat). TDigest comp
emptyTDigest forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall {a}. [a] -> [[a]]
chunks forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) a. Foldable t => t a -> [a]
toList
where
insertChunk :: TDigest comp -> t Double -> TDigest comp
insertChunk TDigest comp
td t Double
xs =
forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
compress (forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (forall a b c. (a -> b -> c) -> b -> a -> c
flip forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert') TDigest comp
td t Double
xs)
chunks :: [a] -> [[a]]
chunks [] = []
chunks [a]
xs =
let ([a]
a, [a]
b) = forall a. Int -> [a] -> ([a], [a])
splitAt Int
1000 [a]
xs
in [a]
a forall a. a -> [a] -> [a]
: [a] -> [[a]]
chunks [a]
b