module Data.TDigest.Postprocess (
histogram,
HistBin (..),
median,
quantile,
cdf,
icdf,
) where
import Prelude ()
import Prelude.Compat
import Data.TDigest.Internal.Tree
data HistBin = HistBin
{ hbMin :: !Double
, hbMax :: !Double
, hbWeight :: !Double
, hbCumWeight :: !Double
}
deriving (Show)
histogram :: TDigest comp -> [HistBin]
histogram = iter Nothing 0 . getCentroids
where
iter :: Maybe (Mean, Weight) -> Weight -> [(Mean, Weight)] -> [HistBin]
iter _ _ [] = []
iter Nothing t [(x, w)] = [HistBin x x w t]
iter Nothing t (c1@(x1, w1) : rest@((x2, _) : _))
= HistBin x1 (mid x1 x2) w1 t : iter (Just c1) (t + w1) rest
iter (Just (x0, _)) t (c1@(x1, w1) : rest@((x2, _) : _))
= HistBin (mid x0 x1) (mid x1 x2) w1 t: iter (Just c1) (t + w1) rest
iter (Just (x0, _)) t [(x1, w1)]
= [HistBin (mid x0 x1) x1 w1 t]
mid a b = (a + b) / 2
median :: TDigest comp -> Maybe Double
median = quantile 0.5
quantile :: Double -> TDigest comp -> Maybe Double
quantile q td =
iter $ histogram td
where
q' = q * totalWeight td
iter [] = Nothing
iter [HistBin a b w t] = Just $ a + (b a) * (q' t) / w
iter (HistBin a b w t : rest)
| q' < t + w = Just $ a + (b a) * (q' t) / w
| otherwise = iter rest
icdf :: Double -> TDigest comp -> Maybe Double
icdf = quantile
cdf :: Double -> TDigest comp -> Double
cdf x td =
iter $ histogram td
where
n = totalWeight td
iter [] = 1
iter (HistBin a b w t : rest)
| x < a = 0
| x < b = (t + w * (x a) / (b a)) / n
| otherwise = iter rest