{-# LANGUAGE DataKinds             #-}
{-# LANGUAGE KindSignatures        #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE ScopedTypeVariables   #-}
-- | Internals of 'TDigest'.
--
-- Tree implementation is based on /Adams’ Trees Revisited/ by Milan Straka
-- <http://fox.ucw.cz/papers/bbtree/bbtree.pdf>
module Data.TDigest.Tree.Internal where

import Control.DeepSeq        (NFData (..))
import Control.Monad.ST       (ST, runST)
import Data.Binary            (Binary (..))
import Data.Either            (isRight)
import Data.Foldable          (toList)
import Data.List.Compat       (foldl')
import Data.List.NonEmpty     (nonEmpty)
import Data.Ord               (comparing)
import Data.Proxy             (Proxy (..))
import Data.Semigroup         (Semigroup (..))
import Data.Semigroup.Reducer (Reducer (..))
import GHC.TypeLits           (KnownNat, Nat, natVal)
import Prelude ()
import Prelude.Compat

import qualified Data.Vector.Algorithms.Heap as VHeap
import qualified Data.Vector.Unboxed         as VU
import qualified Data.Vector.Unboxed.Mutable as MVU

import           Data.TDigest.Internal
import qualified Data.TDigest.Postprocess.Internal as PP

-------------------------------------------------------------------------------
-- TDigest
-------------------------------------------------------------------------------

-- | 'TDigest' is a tree of centroids.
--
-- @compression@ is a @1/δ@. The greater the value of @compression@ the less
-- likely value merging will happen.
data TDigest (compression :: Nat)
    -- | Tree node
    = Node
        {-# UNPACK #-} !Size     -- size of this tree/centroid
        {-# UNPACK #-} !Mean     -- mean of the centroid
        {-# UNPACK #-} !Weight   -- weight of the centrod
        {-# UNPACK #-} !Weight   -- total weight of the tree
        !(TDigest compression)   -- left subtree
        !(TDigest compression)   -- right subtree
    -- | Empty tree
    | Nil
  deriving (Int -> TDigest compression -> ShowS
forall (compression :: Nat). Int -> TDigest compression -> ShowS
forall (compression :: Nat). [TDigest compression] -> ShowS
forall (compression :: Nat). TDigest compression -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TDigest compression] -> ShowS
$cshowList :: forall (compression :: Nat). [TDigest compression] -> ShowS
show :: TDigest compression -> String
$cshow :: forall (compression :: Nat). TDigest compression -> String
showsPrec :: Int -> TDigest compression -> ShowS
$cshowsPrec :: forall (compression :: Nat). Int -> TDigest compression -> ShowS
Show)

-- [Note: keep min & max in the tree]
--
-- We tried it, but it seems the alloc/update cost is bigger than
-- re-calculating them on need (it's O(log n) - calculation!)

-- [Note: singleton node]
-- We tried to add one, but haven't seen change in performance

-- [Note: inlining balanceR and balanceL]
-- We probably can squueze some performance by making
-- 'balanceL' and 'balanceR' check arguments only once (like @containers@ do)
-- and not use 'node' function.
-- *But*, the benefit vs. code explosion is not yet worth.

instance KnownNat comp => Semigroup (TDigest comp) where
    <> :: TDigest comp -> TDigest comp -> TDigest comp
(<>) = forall (comp :: Nat).
KnownNat comp =>
TDigest comp -> TDigest comp -> TDigest comp
combineDigest

-- | Both 'cons' and 'snoc' are 'insert'
instance KnownNat comp => Reducer Double (TDigest comp) where
    cons :: Double -> TDigest comp -> TDigest comp
cons = forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert
    snoc :: TDigest comp -> Double -> TDigest comp
snoc = forall a b c. (a -> b -> c) -> b -> a -> c
flip forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert
    unit :: Double -> TDigest comp
unit = forall (comp :: Nat). KnownNat comp => Double -> TDigest comp
singleton

instance  KnownNat comp => Monoid (TDigest comp) where
    mempty :: TDigest comp
mempty  = forall (comp :: Nat). TDigest comp
emptyTDigest
    mappend :: TDigest comp -> TDigest comp -> TDigest comp
mappend = forall (comp :: Nat).
KnownNat comp =>
TDigest comp -> TDigest comp -> TDigest comp
combineDigest

-- | 'TDigest' has only strict fields.
instance NFData (TDigest comp) where
    rnf :: TDigest comp -> ()
rnf TDigest comp
x = TDigest comp
x seq :: forall a b. a -> b -> b
`seq` ()

-- | 'TDigest' isn't compressed after de-serialisation,
-- but it can be still smaller.
instance KnownNat comp => Binary (TDigest comp) where
    put :: TDigest comp -> Put
put = forall t. Binary t => t -> Put
put forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (comp :: Nat). TDigest comp -> [Centroid]
getCentroids
    get :: Get (TDigest comp)
get = forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (forall a b c. (a -> b -> c) -> b -> a -> c
flip forall (comp :: Nat).
KnownNat comp =>
Centroid -> TDigest comp -> TDigest comp
insertCentroid) forall (comp :: Nat). TDigest comp
emptyTDigest forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Centroid] -> [Centroid]
lc forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall t. Binary t => Get t
get
      where
        lc :: [Centroid] -> [Centroid]
        lc :: [Centroid] -> [Centroid]
lc = forall a. a -> a
id

instance PP.HasHistogram (TDigest comp) Maybe where
    histogram :: TDigest comp -> Maybe (NonEmpty HistBin)
histogram = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap NonEmpty Centroid -> NonEmpty HistBin
PP.histogramFromCentroids forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> Maybe (NonEmpty a)
nonEmpty forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (comp :: Nat). TDigest comp -> [Centroid]
getCentroids
    totalWeight :: TDigest comp -> Double
totalWeight = forall (comp :: Nat). TDigest comp -> Double
totalWeight

getCentroids :: TDigest comp -> [Centroid]
getCentroids :: forall (comp :: Nat). TDigest comp -> [Centroid]
getCentroids = (forall a b. (a -> b) -> a -> b
$ []) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall {compression :: Nat}.
TDigest compression -> [Centroid] -> [Centroid]
go
  where
    go :: TDigest compression -> [Centroid] -> [Centroid]
go TDigest compression
Nil                = forall a. a -> a
id
    go (Node Int
_ Double
x Double
w Double
_ TDigest compression
l TDigest compression
r) = TDigest compression -> [Centroid] -> [Centroid]
go TDigest compression
l forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Double
x,Double
w) forall a. a -> [a] -> [a]
: ) forall b c a. (b -> c) -> (a -> b) -> a -> c
. TDigest compression -> [Centroid] -> [Centroid]
go TDigest compression
r

-- | Total count of samples.
--
-- >>> totalWeight (tdigest [1..100] :: TDigest 5)
-- 100.0
--
totalWeight :: TDigest comp -> Weight
totalWeight :: forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
Nil                 = Double
0
totalWeight (Node Int
_ Double
_ Double
_ Double
tw TDigest comp
_ TDigest comp
_) = Double
tw

size :: TDigest comp -> Int
size :: forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
Nil                    = Int
0
size (Node Int
s Double
_ Double
_ Double
_ TDigest comp
_ TDigest comp
_) = Int
s

-- | Center of left-most centroid. Note: may be different than min element inserted.
--
-- >>> minimumValue (tdigest [1..100] :: TDigest 3)
-- 1.0
--
minimumValue :: TDigest comp -> Mean
minimumValue :: forall (comp :: Nat). TDigest comp -> Double
minimumValue = forall {compression :: Nat}.
Double -> TDigest compression -> Double
go Double
posInf
  where
    go :: Double -> TDigest compression -> Double
go  Double
acc TDigest compression
Nil                    = Double
acc
    go Double
_acc (Node Int
_ Double
x Double
_ Double
_ TDigest compression
l TDigest compression
_) = Double -> TDigest compression -> Double
go Double
x TDigest compression
l

-- | Center of right-most centroid. Note: may be different than max element inserted.
--
-- >>> maximumValue (tdigest [1..100] :: TDigest 3)
-- 99.0
--
maximumValue :: TDigest comp -> Mean
maximumValue :: forall (comp :: Nat). TDigest comp -> Double
maximumValue = forall {compression :: Nat}.
Double -> TDigest compression -> Double
go Double
negInf
  where
    go :: Double -> TDigest compression -> Double
go  Double
acc TDigest compression
Nil                    = Double
acc
    go Double
_acc (Node Int
_ Double
x Double
_ Double
_ TDigest compression
_ TDigest compression
r) = Double -> TDigest compression -> Double
go Double
x TDigest compression
r

-------------------------------------------------------------------------------
-- Implementation
-------------------------------------------------------------------------------

emptyTDigest :: TDigest comp
emptyTDigest :: forall (comp :: Nat). TDigest comp
emptyTDigest = forall (comp :: Nat). TDigest comp
Nil

combineDigest
    :: KnownNat comp
    => TDigest comp
    -> TDigest comp
    -> TDigest comp
combineDigest :: forall (comp :: Nat).
KnownNat comp =>
TDigest comp -> TDigest comp -> TDigest comp
combineDigest TDigest comp
a TDigest comp
Nil = TDigest comp
a
combineDigest TDigest comp
Nil TDigest comp
b = TDigest comp
b
combineDigest a :: TDigest comp
a@(Node Int
n Double
_ Double
_ Double
_ TDigest comp
_ TDigest comp
_) b :: TDigest comp
b@(Node Int
m Double
_ Double
_ Double
_ TDigest comp
_ TDigest comp
_)
    -- TODO: merge first, then shuffle and insert (part of compress)
    | Int
n forall a. Ord a => a -> a -> Bool
< Int
m     = forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
compress forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (forall a b c. (a -> b -> c) -> b -> a -> c
flip forall (comp :: Nat).
KnownNat comp =>
Centroid -> TDigest comp -> TDigest comp
insertCentroid) TDigest comp
b (forall (comp :: Nat). TDigest comp -> [Centroid]
getCentroids TDigest comp
a)
    | Bool
otherwise = forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
compress forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (forall a b c. (a -> b -> c) -> b -> a -> c
flip forall (comp :: Nat).
KnownNat comp =>
Centroid -> TDigest comp -> TDigest comp
insertCentroid) TDigest comp
a (forall (comp :: Nat). TDigest comp -> [Centroid]
getCentroids TDigest comp
b)

insertCentroid
    :: forall comp. KnownNat comp
    => Centroid
    -> TDigest comp
    -> TDigest comp
insertCentroid :: forall (comp :: Nat).
KnownNat comp =>
Centroid -> TDigest comp -> TDigest comp
insertCentroid (Double
x, Double
w) TDigest comp
Nil        = forall (comp :: Nat). Double -> Double -> TDigest comp
singNode Double
x Double
w
insertCentroid (Double
mean, Double
weight) TDigest comp
td = Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go Double
0 Double
mean Double
weight Bool
False TDigest comp
td
  where
    -- New weight of the tree
    n :: Weight
    n :: Double
n = forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
td forall a. Num a => a -> a -> a
+ Double
weight

    -- 1/delta
    compression :: Double
    compression :: Double
compression = forall a. Num a => Integer -> a
fromInteger forall a b. (a -> b) -> a -> b
$ forall (n :: Nat) (proxy :: Nat -> *).
KnownNat n =>
proxy n -> Integer
natVal (forall {k} (t :: k). Proxy t
Proxy :: Proxy comp)

    go
        :: Weight        -- weight to the left of this tree
        -> Mean          -- mean to insert
        -> Weight        -- weight to insert
        -> Bool          -- should insert everything.
                         -- if we merged somewhere on top, rest is inserted as is
        -> TDigest comp  -- subtree to insert/merge centroid into
        -> TDigest comp
    go :: Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go Double
_   Double
newX Double
newW Bool
_ TDigest comp
Nil                 = forall (comp :: Nat). Double -> Double -> TDigest comp
singNode Double
newX Double
newW
    go Double
cum Double
newX Double
newW Bool
e (Node Int
s Double
x Double
w Double
tw TDigest comp
l TDigest comp
r) = case forall a. Ord a => a -> a -> Ordering
compare Double
newX Double
x of
        -- Exact match, insert here
        Ordering
EQ -> forall (compression :: Nat).
Int
-> Double
-> Double
-> Double
-> TDigest compression
-> TDigest compression
-> TDigest compression
Node Int
s Double
x (Double
w forall a. Num a => a -> a -> a
+ Double
newW) (Double
tw forall a. Num a => a -> a -> a
+ Double
newW) TDigest comp
l TDigest comp
r -- node x (w + newW) l r

        -- there is *no* room to insert into this node
        Ordering
LT | Double
thr forall a. Ord a => a -> a -> Bool
<= Double
w -> forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceL Double
x Double
w (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go Double
cum Double
newX Double
newW Bool
e TDigest comp
l) TDigest comp
r
        Ordering
GT | Double
thr forall a. Ord a => a -> a -> Bool
<= Double
w -> forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceR Double
x Double
w TDigest comp
l (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go (Double
cum forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
l forall a. Num a => a -> a -> a
+ Double
w) Double
newX Double
newW Bool
e TDigest comp
r)

        -- otherwise go left ... or later right
        Ordering
LT | Bool
e -> forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceL Double
x Double
w (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go Double
cum Double
newX Double
newW Bool
e TDigest comp
l) TDigest comp
r
        Ordering
LT -> case TDigest comp
l of
            -- always create a new node
            TDigest comp
Nil -> case Maybe Double
mrw of
                Maybe Double
Nothing     -> forall (compression :: Nat).
Int
-> Double
-> Double
-> Double
-> TDigest compression
-> TDigest compression
-> TDigest compression
node' Int
s Double
nx Double
nw (Double
tw forall a. Num a => a -> a -> a
+ Double
newW) forall (comp :: Nat). TDigest comp
Nil TDigest comp
r
                Just Double
rw     -> forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceL Double
nx Double
nw (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go Double
cum Double
newX Double
rw Bool
True forall (comp :: Nat). TDigest comp
Nil) TDigest comp
r
            Node {}
                | Double
lmax forall a. Ord a => a -> a -> Bool
< Double
newX Bool -> Bool -> Bool
&& forall a. Num a => a -> a
abs (Double
newX forall a. Num a => a -> a -> a
- Double
x) forall a. Ord a => a -> a -> Bool
< forall a. Num a => a -> a
abs (Double
newX forall a. Num a => a -> a -> a
- Double
lmax) {- && newX < x -} -> case Maybe Double
mrw of
                    Maybe Double
Nothing -> forall (compression :: Nat).
Int
-> Double
-> Double
-> Double
-> TDigest compression
-> TDigest compression
-> TDigest compression
node' Int
s Double
nx Double
nw (Double
tw forall a. Num a => a -> a -> a
+ Double
nw forall a. Num a => a -> a -> a
- Double
w) TDigest comp
l TDigest comp
r
                    -- in this two last LT cases, we have to recalculate size
                    Just Double
rw -> forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceL Double
nx Double
nw (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go Double
cum Double
newX Double
rw Bool
True TDigest comp
l) TDigest comp
r
                | Bool
otherwise -> forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceL Double
x Double
w (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go Double
cum Double
newX Double
newW Bool
e TDigest comp
l) TDigest comp
r
              where
                lmax :: Double
lmax = forall (comp :: Nat). TDigest comp -> Double
maximumValue TDigest comp
l

        -- ... or right
        Ordering
GT | Bool
e -> forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceR Double
x Double
w TDigest comp
l (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go (Double
cum forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
l forall a. Num a => a -> a -> a
+ Double
w) Double
newX Double
newW Bool
True TDigest comp
r)
        Ordering
GT -> case TDigest comp
r of
            TDigest comp
Nil -> case Maybe Double
mrw of
                Maybe Double
Nothing     -> forall (compression :: Nat).
Int
-> Double
-> Double
-> Double
-> TDigest compression
-> TDigest compression
-> TDigest compression
node' Int
s Double
nx Double
nw (Double
tw forall a. Num a => a -> a -> a
+ Double
newW) TDigest comp
l forall (comp :: Nat). TDigest comp
Nil
                Just Double
rw     -> forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceR Double
nx Double
nw TDigest comp
l (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go (Double
cum forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
l forall a. Num a => a -> a -> a
+ Double
nw) Double
newX Double
rw Bool
True forall (comp :: Nat). TDigest comp
Nil)
            Node {}
                | Double
rmin forall a. Ord a => a -> a -> Bool
> Double
newX Bool -> Bool -> Bool
&& forall a. Num a => a -> a
abs (Double
newX forall a. Num a => a -> a -> a
- Double
x) forall a. Ord a => a -> a -> Bool
< forall a. Num a => a -> a
abs (Double
newX forall a. Num a => a -> a -> a
- Double
rmin) {- && newX > x -} -> case Maybe Double
mrw of
                    Maybe Double
Nothing -> forall (compression :: Nat).
Int
-> Double
-> Double
-> Double
-> TDigest compression
-> TDigest compression
-> TDigest compression
node' Int
s Double
nx Double
nw (Double
tw forall a. Num a => a -> a -> a
+ Double
newW) TDigest comp
l TDigest comp
r
                    -- in this two last GT cases, we have to recalculate size
                    Just Double
rw -> forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceR Double
nx Double
nw TDigest comp
l (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go (Double
cum forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
l forall a. Num a => a -> a -> a
+ Double
nw) Double
newX Double
rw Bool
True TDigest comp
r)
                | Bool
otherwise -> forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceR Double
x Double
w TDigest comp
l (Double -> Double -> Double -> Bool -> TDigest comp -> TDigest comp
go (Double
cum forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
l forall a. Num a => a -> a -> a
+ Double
w) Double
newX Double
newW Bool
e TDigest comp
r)
              where
                rmin :: Double
rmin = forall (comp :: Nat). TDigest comp -> Double
minimumValue TDigest comp
r
      where
        -- quantile approximation of current node
        cum' :: Double
cum' = Double
cum forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
l
        q :: Double
q   = (Double
w forall a. Fractional a => a -> a -> a
/ Double
2 forall a. Num a => a -> a -> a
+ Double
cum') forall a. Fractional a => a -> a -> a
/ Double
n

        -- threshold, max size of current node/centroid
        thr :: Double
thr = {- traceShowId $ traceShow (n, q) $ -} Double -> Double -> Double -> Double
threshold Double
n Double
q Double
compression

        -- We later use nx, nw and mrw:

        -- max size of current node
        dw :: Weight
        mrw :: Maybe Weight
        (Double
dw, Maybe Double
mrw) =
            let diff :: Double
diff = forall a. Bool -> String -> a -> a
assert (Double
thr forall a. Ord a => a -> a -> Bool
> Double
w) String
"threshold should be larger than current node weight"
                     forall a b. (a -> b) -> a -> b
$ Double
w forall a. Num a => a -> a -> a
+ Double
newW forall a. Num a => a -> a -> a
- Double
thr
            in if Double
diff forall a. Ord a => a -> a -> Bool
< Double
0 -- i.e. there is room
                then (Double
newW, forall a. Maybe a
Nothing)
                else (Double
thr forall a. Num a => a -> a -> a
- Double
w, forall a. a -> Maybe a
Just Double
diff)

        -- the change of current node
        (Double
nx, Double
nw) = {- traceShowId $ traceShow (newX, newW, x, dw, mrw) $ -} Double -> Double -> Double -> Double -> Centroid
combinedCentroid Double
x Double
w Double
x Double
dw

-- | Constructor which calculates size and total weight.
node :: Mean -> Weight -> TDigest comp -> TDigest comp -> TDigest comp
node :: forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
l TDigest comp
r = forall (compression :: Nat).
Int
-> Double
-> Double
-> Double
-> TDigest compression
-> TDigest compression
-> TDigest compression
Node
    (Int
1 forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
l forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
r)
    Double
x Double
w
    (Double
w forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
l forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
r)
    TDigest comp
l TDigest comp
r

-- | Balance after right insertion.
balanceR :: Mean -> Weight -> TDigest comp -> TDigest comp -> TDigest comp
balanceR :: forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceR Double
x Double
w TDigest comp
l TDigest comp
r
    | forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
l forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
r forall a. Ord a => a -> a -> Bool
<= Int
1 = forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
l TDigest comp
r
    | forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
r forall a. Ord a => a -> a -> Bool
> Int
balOmega forall a. Num a => a -> a -> a
* forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
l = case TDigest comp
r of
        TDigest comp
Nil -> forall a. HasCallStack => String -> a
error String
"balanceR: impossible happened"
        (Node Int
_ Double
rx Double
rw Double
_ TDigest comp
Nil TDigest comp
rr) ->
            -- assert (0 < balAlpha * size rr) "balanceR" $
                -- single left rotation
                forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
rx Double
rw (forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
l forall (comp :: Nat). TDigest comp
Nil) TDigest comp
rr
        (Node Int
_ Double
rx Double
rw Double
_ TDigest comp
rl TDigest comp
rr)
            | forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
rl forall a. Ord a => a -> a -> Bool
< Int
balAlpha forall a. Num a => a -> a -> a
* forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
rr ->
                -- single left rotation
                forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
rx Double
rw (forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
l TDigest comp
rl) TDigest comp
rr
        (Node Int
_ Double
rx Double
rw Double
_ (Node Int
_ Double
rlx Double
rlw Double
_ TDigest comp
rll TDigest comp
rlr) TDigest comp
rr) ->
                -- double left rotation
                forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
rlx Double
rlw (forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
l TDigest comp
rll) (forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
rx Double
rw TDigest comp
rlr TDigest comp
rr)
    | Bool
otherwise            = forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
l TDigest comp
r

-- | Balance after left insertion.
balanceL :: Mean -> Weight -> TDigest comp -> TDigest comp -> TDigest comp
balanceL :: forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
balanceL Double
x Double
w TDigest comp
l TDigest comp
r
    | forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
l forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
r forall a. Ord a => a -> a -> Bool
<= Int
1 = forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
l TDigest comp
r
    | forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
l forall a. Ord a => a -> a -> Bool
> Int
balOmega forall a. Num a => a -> a -> a
* forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
r = case TDigest comp
l of
        TDigest comp
Nil -> forall a. HasCallStack => String -> a
error String
"balanceL: impossible happened"
        (Node Int
_ Double
lx Double
lw Double
_ TDigest comp
ll TDigest comp
Nil) ->
            -- assert (0 < balAlpha * size ll) "balanceL" $
                -- single right rotation
                forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
lx Double
lw TDigest comp
ll (forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w forall (comp :: Nat). TDigest comp
Nil TDigest comp
r)
        (Node Int
_ Double
lx Double
lw Double
_ TDigest comp
ll TDigest comp
lr)
            | forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
lr forall a. Ord a => a -> a -> Bool
< Int
balAlpha forall a. Num a => a -> a -> a
* forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
ll ->
                -- single right rotation
                forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
lx Double
lw TDigest comp
ll (forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
lr TDigest comp
r)
        (Node Int
_ Double
lx Double
lw Double
_ TDigest comp
ll (Node Int
_ Double
lrx Double
lrw Double
_ TDigest comp
lrl TDigest comp
lrr)) ->
                -- double left rotation
                forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
lrx Double
lrw (forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
lx Double
lw TDigest comp
ll TDigest comp
lrl) (forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
lrr TDigest comp
r)
    | Bool
otherwise = forall (comp :: Nat).
Double -> Double -> TDigest comp -> TDigest comp -> TDigest comp
node Double
x Double
w TDigest comp
l TDigest comp
r

-- | Alias to 'Node'
node' :: Int -> Mean -> Weight -> Weight -> TDigest comp -> TDigest comp -> TDigest comp
node' :: forall (compression :: Nat).
Int
-> Double
-> Double
-> Double
-> TDigest compression
-> TDigest compression
-> TDigest compression
node' = forall (compression :: Nat).
Int
-> Double
-> Double
-> Double
-> TDigest compression
-> TDigest compression
-> TDigest compression
Node

-- | Create singular node.
singNode :: Mean -> Weight -> TDigest comp
singNode :: forall (comp :: Nat). Double -> Double -> TDigest comp
singNode Double
x Double
w = forall (compression :: Nat).
Int
-> Double
-> Double
-> Double
-> TDigest compression
-> TDigest compression
-> TDigest compression
Node Int
1 Double
x Double
w Double
w forall (comp :: Nat). TDigest comp
Nil forall (comp :: Nat). TDigest comp
Nil

-- | Add two weighted means together.
combinedCentroid
    :: Mean -> Weight
    -> Mean -> Weight
    -> Centroid
combinedCentroid :: Double -> Double -> Double -> Double -> Centroid
combinedCentroid Double
x Double
w Double
x' Double
w' =
    ( (Double
x forall a. Num a => a -> a -> a
* Double
w forall a. Num a => a -> a -> a
+ Double
x' forall a. Num a => a -> a -> a
* Double
w') forall a. Fractional a => a -> a -> a
/ Double
w'' -- this is probably not num. stable
    , Double
w''
    )
  where
    w'' :: Double
w'' = Double
w forall a. Num a => a -> a -> a
+ Double
w'

-- | Calculate the threshold, i.e. maximum weight of centroid.
threshold
    :: Double  -- ^ total weight
    -> Double  -- ^ quantile
    -> Double  -- ^ compression (1/δ)
    -> Double
threshold :: Double -> Double -> Double -> Double
threshold Double
n Double
q Double
compression = Double
4 forall a. Num a => a -> a -> a
* Double
n forall a. Num a => a -> a -> a
* Double
q forall a. Num a => a -> a -> a
* (Double
1 forall a. Num a => a -> a -> a
- Double
q) forall a. Fractional a => a -> a -> a
/ Double
compression

-------------------------------------------------------------------------------
-- Compression
-------------------------------------------------------------------------------

-- | Compress 'TDigest'.
--
-- Reinsert the centroids in "better" order (in original paper: in random)
-- so they have opportunity to merge.
--
-- Compression will happen only if size is both:
-- bigger than @'relMaxSize' * comp@ and bigger than 'absMaxSize'.
--
compress :: forall comp. KnownNat comp => TDigest comp -> TDigest comp
compress :: forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
compress TDigest comp
Nil = forall (comp :: Nat). TDigest comp
Nil
compress TDigest comp
td
    | forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
td forall a. Ord a => a -> a -> Bool
> Int
relMaxSize forall a. Num a => a -> a -> a
* Int
compression Bool -> Bool -> Bool
&& forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
td forall a. Ord a => a -> a -> Bool
> Int
absMaxSize
        = forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
forceCompress TDigest comp
td
    | Bool
otherwise
        = TDigest comp
td
  where
    compression :: Int
compression = forall a. Num a => Integer -> a
fromInteger forall a b. (a -> b) -> a -> b
$ forall (n :: Nat) (proxy :: Nat -> *).
KnownNat n =>
proxy n -> Integer
natVal (forall {k} (t :: k). Proxy t
Proxy :: Proxy comp)

-- | Perform compression, even if current size says it's not necessary.
forceCompress :: forall comp. KnownNat comp => TDigest comp -> TDigest comp
forceCompress :: forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
forceCompress TDigest comp
Nil = forall (comp :: Nat). TDigest comp
Nil
forceCompress TDigest comp
td =
    forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (forall a b c. (a -> b -> c) -> b -> a -> c
flip forall (comp :: Nat).
KnownNat comp =>
Centroid -> TDigest comp -> TDigest comp
insertCentroid) forall (comp :: Nat). TDigest comp
emptyTDigest forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a b. (a, b) -> a
fst forall a b. (a -> b) -> a -> b
$ forall a. Unbox a => Vector a -> [a]
VU.toList Vector (Centroid, Double)
centroids
  where
    -- Centroids are shuffled based on space
    centroids :: VU.Vector (Centroid, Double)
    centroids :: Vector (Centroid, Double)
centroids = forall a. (forall s. ST s a) -> a
runST forall a b. (a -> b) -> a -> b
$ do
        MVector s (Centroid, Double)
v <- forall (comp :: Nat) s.
KnownNat comp =>
TDigest comp -> ST s (MVector s (Centroid, Double))
toMVector TDigest comp
td
        -- sort by cumulative weight
        forall (m :: * -> *) (v :: * -> * -> *) e.
(PrimMonad m, MVector v e) =>
Comparison e -> v (PrimState m) e -> m ()
VHeap.sortBy (forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing forall a b. (a, b) -> b
snd) MVector s (Centroid, Double)
v
        forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
VU.unsafeFreeze MVector s (Centroid, Double)
v

toMVector
    :: forall comp s. KnownNat comp
    => TDigest comp                           -- ^ t-Digest
    -> ST s (VU.MVector s (Centroid, Double)) -- ^ return also a "space left in the centroid" value for "shuffling"
toMVector :: forall (comp :: Nat) s.
KnownNat comp =>
TDigest comp -> ST s (MVector s (Centroid, Double))
toMVector TDigest comp
td = do
    MVector s (Centroid, Double)
v <- forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
MVU.new (forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
td)
    (Int
i, Double
cum) <- forall {f :: * -> *} {compression :: Nat}.
PrimMonad f =>
MVector (PrimState f) (Centroid, Double)
-> Int -> Double -> TDigest compression -> f (Int, Double)
go MVector s (Centroid, Double)
v (Int
0 :: Int) (Double
0 :: Double) TDigest comp
td
    forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ forall a. Bool -> String -> a -> a
assert (Int
i forall a. Eq a => a -> a -> Bool
== forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
td Bool -> Bool -> Bool
&& forall a. Num a => a -> a
abs (Double
cum forall a. Num a => a -> a -> a
- forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
td) forall a. Ord a => a -> a -> Bool
< Double
1e-6) String
"traversal in toMVector:" MVector s (Centroid, Double)
v
  where
    go :: MVector (PrimState f) (Centroid, Double)
-> Int -> Double -> TDigest compression -> f (Int, Double)
go MVector (PrimState f) (Centroid, Double)
_ Int
i Double
cum TDigest compression
Nil                   = forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int
i, Double
cum)
    go MVector (PrimState f) (Centroid, Double)
v Int
i Double
cum (Node Int
_ Double
x Double
w Double
_ TDigest compression
l TDigest compression
r) = do
        (Int
i', Double
cum') <- MVector (PrimState f) (Centroid, Double)
-> Int -> Double -> TDigest compression -> f (Int, Double)
go MVector (PrimState f) (Centroid, Double)
v Int
i Double
cum TDigest compression
l
        forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
MVU.unsafeWrite MVector (PrimState f) (Centroid, Double)
v Int
i' ((Double
x, Double
w), Double -> Double -> Double
space Double
w Double
cum')
        MVector (PrimState f) (Centroid, Double)
-> Int -> Double -> TDigest compression -> f (Int, Double)
go MVector (PrimState f) (Centroid, Double)
v (Int
i' forall a. Num a => a -> a -> a
+ Int
1) (Double
cum' forall a. Num a => a -> a -> a
+ Double
w) TDigest compression
r

    n :: Double
n = forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
td
    compression :: Double
compression = forall a. Num a => Integer -> a
fromInteger forall a b. (a -> b) -> a -> b
$ forall (n :: Nat) (proxy :: Nat -> *).
KnownNat n =>
proxy n -> Integer
natVal (forall {k} (t :: k). Proxy t
Proxy :: Proxy comp)

    space :: Double -> Double -> Double
space Double
w Double
cum = Double
thr forall a. Num a => a -> a -> a
- Double
w
      where
        q :: Double
q     = (Double
w forall a. Fractional a => a -> a -> a
/ Double
2 forall a. Num a => a -> a -> a
+ Double
cum) forall a. Fractional a => a -> a -> a
/ Double
n
        thr :: Double
thr   = Double -> Double -> Double -> Double
threshold Double
n Double
q Double
compression

-------------------------------------------------------------------------------
-- Params
-------------------------------------------------------------------------------

-- | Relative size parameter. Hard-coded value: 25.
relMaxSize :: Int
relMaxSize :: Int
relMaxSize = Int
25

-- | Absolute size parameter. Hard-coded value: 1000.
absMaxSize :: Int
absMaxSize :: Int
absMaxSize = Int
1000

-------------------------------------------------------------------------------
-- Tree balance parameters
-------------------------------------------------------------------------------

balOmega :: Int
balOmega :: Int
balOmega = Int
3

balAlpha :: Int
balAlpha :: Int
balAlpha = Int
2

-- balDelta = 0

-------------------------------------------------------------------------------
-- Debug
-------------------------------------------------------------------------------

-- | Output the 'TDigest' tree.
debugPrint :: TDigest comp -> IO ()
debugPrint :: forall (comp :: Nat). TDigest comp -> IO ()
debugPrint TDigest comp
td = forall {compression :: Nat}. Int -> TDigest compression -> IO ()
go Int
0 TDigest comp
td
  where
    go :: Int -> TDigest compression -> IO ()
go Int
i TDigest compression
Nil = String -> IO ()
putStrLn forall a b. (a -> b) -> a -> b
$ forall a. Int -> a -> [a]
replicate (Int
i forall a. Num a => a -> a -> a
* Int
3) Char
' ' forall a. [a] -> [a] -> [a]
++ String
"Nil"
    go Int
i (Node Int
s Double
m Double
w Double
tw TDigest compression
l TDigest compression
r) = do
        Int -> TDigest compression -> IO ()
go (Int
i forall a. Num a => a -> a -> a
+ Int
1) TDigest compression
l
        String -> IO ()
putStrLn forall a b. (a -> b) -> a -> b
$ forall a. Int -> a -> [a]
replicate (Int
i forall a. Num a => a -> a -> a
* Int
3) Char
' ' forall a. [a] -> [a] -> [a]
++ String
"Node " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show (Int
s,Double
m,Double
w,Double
tw)
        Int -> TDigest compression -> IO ()
go (Int
i forall a. Num a => a -> a -> a
+ Int
1) TDigest compression
r

-- | @'isRight' . 'validate'@
valid :: TDigest comp -> Bool
valid :: forall (comp :: Nat). TDigest comp -> Bool
valid = forall a b. Either a b -> Bool
isRight forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (comp :: Nat). TDigest comp -> Either String (TDigest comp)
validate

-- | Check various invariants in the 'TDigest' tree.
validate :: TDigest comp -> Either String (TDigest comp)
validate :: forall (comp :: Nat). TDigest comp -> Either String (TDigest comp)
validate TDigest comp
td
    | Bool -> Bool
not (forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all forall (comp :: Nat). TDigest comp -> Bool
sizeValid   [TDigest comp]
centroids) = forall a b. a -> Either a b
Left String
"invalid sizes"
    | Bool -> Bool
not (forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all forall (comp :: Nat). TDigest comp -> Bool
weightValid [TDigest comp]
centroids) = forall a b. a -> Either a b
Left String
"invalid weights"
    | Bool -> Bool
not (forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all forall (comp :: Nat). TDigest comp -> Bool
orderValid  [TDigest comp]
centroids) = forall a b. a -> Either a b
Left String
"invalid ordering"
    | Bool -> Bool
not (forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all forall (comp :: Nat). TDigest comp -> Bool
balanced    [TDigest comp]
centroids) = forall a b. a -> Either a b
Left String
"tree is ill-balanced"
    | Bool
otherwise = forall a b. b -> Either a b
Right TDigest comp
td
  where
    centroids :: [TDigest comp]
centroids = forall {compression :: Nat}.
TDigest compression -> [TDigest compression]
goc TDigest comp
td

    goc :: TDigest compression -> [TDigest compression]
goc TDigest compression
Nil = []
    goc n :: TDigest compression
n@(Node Int
_ Double
_ Double
_ Double
_ TDigest compression
l TDigest compression
r) = TDigest compression
n forall a. a -> [a] -> [a]
: TDigest compression -> [TDigest compression]
goc TDigest compression
l forall a. [a] -> [a] -> [a]
++ TDigest compression -> [TDigest compression]
goc TDigest compression
r

    sizeValid :: TDigest comp -> Bool
sizeValid TDigest comp
Nil = Bool
True
    sizeValid (Node Int
s Double
_ Double
_ Double
_ TDigest comp
l TDigest comp
r) = Int
s forall a. Eq a => a -> a -> Bool
== forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
l forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
r forall a. Num a => a -> a -> a
+ Int
1

    weightValid :: TDigest comp -> Bool
weightValid TDigest comp
Nil = Bool
True
    weightValid (Node Int
_ Double
_ Double
w Double
tw TDigest comp
l TDigest comp
r) = Double -> Double -> Bool
eq Double
tw forall a b. (a -> b) -> a -> b
$ Double
w forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
l forall a. Num a => a -> a -> a
+ forall (comp :: Nat). TDigest comp -> Double
totalWeight TDigest comp
r

    orderValid :: TDigest compression -> Bool
orderValid TDigest compression
Nil = Bool
True
    orderValid (Node Int
_ Double
_ Double
_ Double
_ TDigest compression
Nil                 TDigest compression
Nil)                 = Bool
True
    orderValid (Node Int
_ Double
x Double
_ Double
_ (Node Int
_ Double
lx Double
_ Double
_ TDigest compression
_ TDigest compression
_) TDigest compression
Nil)                 = Double
lx forall a. Ord a => a -> a -> Bool
< Double
x
    orderValid (Node Int
_ Double
x Double
_ Double
_ TDigest compression
Nil                 (Node Int
_ Double
rx Double
_ Double
_ TDigest compression
_ TDigest compression
_)) = Double
x forall a. Ord a => a -> a -> Bool
< Double
rx
    orderValid (Node Int
_ Double
x Double
_ Double
_ (Node Int
_ Double
lx Double
_ Double
_ TDigest compression
_ TDigest compression
_) (Node Int
_ Double
rx Double
_ Double
_ TDigest compression
_ TDigest compression
_)) = Double
lx forall a. Ord a => a -> a -> Bool
< Double
x Bool -> Bool -> Bool
&& Double
x forall a. Ord a => a -> a -> Bool
< Double
rx

    balanced :: TDigest comp -> Bool
balanced TDigest comp
Nil = Bool
True
    balanced (Node Int
_ Double
_ Double
_ Double
_ TDigest comp
l TDigest comp
r) =
        forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
l forall a. Ord a => a -> a -> Bool
<= forall a. Ord a => a -> a -> a
max Int
1 (Int
balOmega forall a. Num a => a -> a -> a
* forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
r) Bool -> Bool -> Bool
&&
        forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
r forall a. Ord a => a -> a -> Bool
<= forall a. Ord a => a -> a -> a
max Int
1 (Int
balOmega forall a. Num a => a -> a -> a
* forall (comp :: Nat). TDigest comp -> Int
size TDigest comp
l)

-------------------------------------------------------------------------------
-- Higher level helpers
-------------------------------------------------------------------------------

-- | Insert single value into 'TDigest'.
insert
    :: KnownNat comp
    => Double         -- ^ element
    -> TDigest comp
    -> TDigest comp
insert :: forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert Double
x = forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
compress forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert' Double
x

-- | Insert single value, don't compress 'TDigest' even if needed.
--
-- For sensibly bounded input, it makes sense to let 'TDigest' grow (it might
-- grow linearly in size), and after that compress it once.
insert'
    :: KnownNat comp
    => Double         -- ^ element
    -> TDigest comp
    -> TDigest comp
insert' :: forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert' Double
x = forall (comp :: Nat).
KnownNat comp =>
Centroid -> TDigest comp -> TDigest comp
insertCentroid (Double
x, Double
1)

-- | Make a 'TDigest' of a single data point.
singleton :: KnownNat comp => Double -> TDigest comp
singleton :: forall (comp :: Nat). KnownNat comp => Double -> TDigest comp
singleton Double
x = forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert Double
x forall (comp :: Nat). TDigest comp
emptyTDigest

-- | Strict 'foldl'' over 'Foldable' structure.
tdigest :: (Foldable f, KnownNat comp) => f Double -> TDigest comp
tdigest :: forall (f :: * -> *) (comp :: Nat).
(Foldable f, KnownNat comp) =>
f Double -> TDigest comp
tdigest = forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' forall {comp :: Nat} {t :: * -> *}.
(KnownNat comp, Foldable t) =>
TDigest comp -> t Double -> TDigest comp
insertChunk forall (comp :: Nat). TDigest comp
emptyTDigest forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall {a}. [a] -> [[a]]
chunks forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) a. Foldable t => t a -> [a]
toList
  where
    -- compress after each chunk, forceCompress at the very end.
    insertChunk :: TDigest comp -> t Double -> TDigest comp
insertChunk TDigest comp
td t Double
xs =
        forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
compress (forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (forall a b c. (a -> b -> c) -> b -> a -> c
flip forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert') TDigest comp
td t Double
xs)

    chunks :: [a] -> [[a]]
chunks [] = []
    chunks [a]
xs =
        let ([a]
a, [a]
b) = forall a. Int -> [a] -> ([a], [a])
splitAt Int
1000 [a]
xs -- 1000 is totally arbitrary.
        in [a]
a forall a. a -> [a] -> [a]
: [a] -> [[a]]
chunks [a]
b

-- $setup
-- >>> :set -XDataKinds