{-# LANGUAGE DataKinds #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# OPTIONS_GHC -Wall #-}
module NumHask.Space.Histogram
( Histogram (..),
DealOvers (..),
fill,
cutI,
regular,
makeRects,
regularQuantiles,
quantileFold,
fromQuantiles,
freq,
)
where
import qualified Control.Foldl as L
import qualified Data.List as List
import qualified Data.Map as Map
import Data.TDigest
import NumHask.Space.Range
import NumHask.Space.Rect
import NumHask.Space.Types
import Protolude
data Histogram
= Histogram
{ cuts :: [Double],
values :: Map.Map Int Double
}
deriving (Show, Eq)
data DealOvers = IgnoreOvers | IncludeOvers Double
fill :: (Foldable f) => [Double] -> f Double -> Histogram
fill cs xs = Histogram cs (foldl' (\x a -> Map.insertWith (+) (cutI cs a) 1 x) Map.empty xs)
cutI :: (Ord a) => [a] -> a -> Int
cutI bs n = go bs 0
where
go [] i = i
go (x : xs) i = bool i (go xs (i + 1)) (n > x)
regular :: Int -> [Double] -> Histogram
regular n xs = fill cs xs
where
cs = grid OuterPos (space1 xs :: Range Double) n
makeRects :: DealOvers -> Histogram -> [Rect Double]
makeRects o (Histogram cs counts) = List.zipWith4 Rect x z y w'
where
y = repeat 0
w =
zipWith
(/)
((\x' -> Map.findWithDefault 0 x' counts) <$> [f .. l])
(zipWith (-) z x)
f = case o of
IgnoreOvers -> 1
IncludeOvers _ -> 0
l = case o of
IgnoreOvers -> length cs - 1
IncludeOvers _ -> length cs
w' = (/ sum w) <$> w
x = case o of
IgnoreOvers -> cs
IncludeOvers outw ->
[List.head cs - outw]
<> cs
<> [List.last cs + outw]
z = drop 1 x
regularQuantiles :: Double -> [Double] -> [Double]
regularQuantiles n = L.fold (quantileFold qs)
where
qs = ((1 / n) *) <$> [0 .. n]
quantileFold :: [Double] -> L.Fold Double [Double]
quantileFold qs = L.Fold step begin done
where
step x a = Data.TDigest.insert a x
begin = tdigest ([] :: [Double]) :: TDigest 25
done x = fromMaybe (0 / 0) . (`quantile` compress x) <$> qs
fromQuantiles :: [Double] -> [Double] -> Histogram
fromQuantiles qs xs = Histogram xs (Map.fromList $ zip [1 ..] (diffq qs))
where
diffq [] = []
diffq [_] = []
diffq (x : xs') = L.fold (L.Fold step (x, []) (reverse . snd)) xs'
step (a0, xs') a = (a, (a - a0) : xs')
freq :: Histogram -> Histogram
freq (Histogram cs vs) = Histogram cs $ Map.map (* recip (sum vs)) vs