-- | 'TDigest' postprocessing functions. -- -- These are re-exported from "Data.TDigest" module. -- module Data.TDigest.Postprocess ( -- * Histogram histogram, HistBin (..), -- * Quantiles median, quantile, -- * CDF cdf, icdf, ) where import Prelude () import Prelude.Compat import Data.TDigest.Internal.Tree ------------------------------------------------------------------------------- -- Histogram ------------------------------------------------------------------------------- -- | Histogram bin data HistBin = HistBin { hbMin :: !Double -- ^ lower bound , hbMax :: !Double -- ^ upper bound , hbWeight :: !Double -- ^ weight ("area" of the bar) , hbCumWeight :: !Double -- ^ weight from the right } deriving (Show) -- | Calculate histogram based on the 'TDigest'. histogram :: TDigest comp -> [HistBin] histogram = iter Nothing 0 . getCentroids where -- zero iter :: Maybe (Mean, Weight) -> Weight -> [(Mean, Weight)] -> [HistBin] iter _ _ [] = [] -- one iter Nothing t [(x, w)] = [HistBin x x w t] -- first iter Nothing t (c1@(x1, w1) : rest@((x2, _) : _)) = HistBin x1 (mid x1 x2) w1 t : iter (Just c1) (t + w1) rest -- middle iter (Just (x0, _)) t (c1@(x1, w1) : rest@((x2, _) : _)) = HistBin (mid x0 x1) (mid x1 x2) w1 t: iter (Just c1) (t + w1) rest -- last iter (Just (x0, _)) t [(x1, w1)] = [HistBin (mid x0 x1) x1 w1 t] mid a b = (a + b) / 2 ------------------------------------------------------------------------------- -- Quantile ------------------------------------------------------------------------------- -- | Median, i.e. @'quantile' 0.5@. median :: TDigest comp -> Maybe Double median = quantile 0.5 -- | Calculate quantile of a specific value. quantile :: Double -> TDigest comp -> Maybe Double quantile q td = iter $ histogram td where q' = q * totalWeight td iter [] = Nothing iter [HistBin a b w t] = Just $ a + (b - a) * (q' - t) / w iter (HistBin a b w t : rest) | {- t < q' && -} q' < t + w = Just $ a + (b - a) * (q' - t) / w | otherwise = iter rest -- | Alias of 'quantile'. icdf :: Double -> TDigest comp -> Maybe Double icdf = quantile ------------------------------------------------------------------------------- -- CDF - cumulative distribution function ------------------------------------------------------------------------------- -- | Cumulative distribution function. -- -- /Note:/ if this is the only thing you need, it's more efficient to count -- this directly. cdf :: Double -> TDigest comp -> Double cdf x td = iter $ histogram td where n = totalWeight td iter [] = 1 iter (HistBin a b w t : rest) | x < a = 0 | x < b = (t + w * (x - a) / (b - a)) / n | otherwise = iter rest