{-# LANGUAGE DataKinds #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Data.TDigest.Vector.Internal where
import Control.DeepSeq (NFData (..))
import Data.Either (isRight)
import Data.Foldable (toList)
import Data.List (foldl', sortBy)
import Data.List.NonEmpty (nonEmpty)
import Data.Ord (comparing)
import Data.Proxy (Proxy (..))
import Data.Semigroup (Semigroup (..))
import Data.Semigroup.Reducer (Reducer (..))
import GHC.TypeLits (KnownNat, Nat, natVal)
import Prelude ()
import Prelude.Compat
import qualified Data.Vector.Unboxed as VU
import Data.TDigest.Internal
import qualified Data.TDigest.Postprocess.Internal as PP
data TDigest (compression :: Nat) = TDigest
{ forall (compression :: Nat). TDigest compression -> Size
tdigestTotalWeight :: !Size
, forall (compression :: Nat). TDigest compression -> Vector Centroid
tdigestData :: !(VU.Vector Centroid)
, forall (compression :: Nat). TDigest compression -> Size
tdigestBufferSize :: !Size
, forall (compression :: Nat). TDigest compression -> [Double]
tdigestBuffer :: [Double]
, forall (compression :: Nat). TDigest compression -> Bool
tdigestDirection :: !Bool
}
deriving Size -> TDigest compression -> ShowS
forall (compression :: Nat). Size -> TDigest compression -> ShowS
forall (compression :: Nat). [TDigest compression] -> ShowS
forall (compression :: Nat). TDigest compression -> String
forall a.
(Size -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TDigest compression] -> ShowS
$cshowList :: forall (compression :: Nat). [TDigest compression] -> ShowS
show :: TDigest compression -> String
$cshow :: forall (compression :: Nat). TDigest compression -> String
showsPrec :: Size -> TDigest compression -> ShowS
$cshowsPrec :: forall (compression :: Nat). Size -> TDigest compression -> ShowS
Show
instance KnownNat comp => Semigroup (TDigest comp) where
<> :: TDigest comp -> TDigest comp -> TDigest comp
(<>) = forall (comp :: Nat).
KnownNat comp =>
TDigest comp -> TDigest comp -> TDigest comp
combineTDigest
instance KnownNat comp => Monoid (TDigest comp) where
mempty :: TDigest comp
mempty = forall (comp :: Nat). TDigest comp
emptyTDigest
mappend :: TDigest comp -> TDigest comp -> TDigest comp
mappend = forall a. Semigroup a => a -> a -> a
(<>)
instance KnownNat comp => Reducer Double (TDigest comp) where
cons :: Double -> TDigest comp -> TDigest comp
cons = forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert
snoc :: TDigest comp -> Double -> TDigest comp
snoc = forall a b c. (a -> b -> c) -> b -> a -> c
flip forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert
unit :: Double -> TDigest comp
unit = forall (comp :: Nat). Double -> TDigest comp
singleton
instance NFData (TDigest comp) where
rnf :: TDigest comp -> ()
rnf (TDigest Size
_ Vector Centroid
_ Size
_ [Double]
b Bool
_) = forall a. NFData a => a -> ()
rnf [Double]
b
instance KnownNat comp => PP.HasHistogram (TDigest comp) Maybe where
histogram :: TDigest comp -> Maybe (NonEmpty HistBin)
histogram = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap NonEmpty Centroid -> NonEmpty HistBin
PP.histogramFromCentroids forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> Maybe (NonEmpty a)
nonEmpty forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Unbox a => Vector a -> [a]
VU.toList forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (compression :: Nat). TDigest compression -> Vector Centroid
tdigestData forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
finalize
totalWeight :: TDigest comp -> Double
totalWeight = forall (comp :: Nat). TDigest comp -> Double
totalWeight
size :: TDigest comp -> Int
size :: forall (compression :: Nat). TDigest compression -> Size
size TDigest comp
td = forall a. Unbox a => Vector a -> Size
VU.length (forall (compression :: Nat). TDigest compression -> Vector Centroid
tdigestData TDigest comp
td) forall a. Num a => a -> a -> a
+ forall (compression :: Nat). TDigest compression -> Size
tdigestBufferSize TDigest comp
td
totalWeight :: TDigest comp -> Weight
totalWeight :: forall (comp :: Nat). TDigest comp -> Double
totalWeight = forall a b. (Integral a, Num b) => a -> b
fromIntegral forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (compression :: Nat). TDigest compression -> Size
tdigestTotalWeight
minimumValue :: KnownNat comp => TDigest comp -> Mean
minimumValue :: forall (comp :: Nat). KnownNat comp => TDigest comp -> Double
minimumValue TDigest comp
td
| forall a. Unbox a => Vector a -> Bool
VU.null Vector Centroid
d = Double
posInf
| Bool
otherwise = forall a b. (a, b) -> a
fst (forall a. Unbox a => Vector a -> a
VU.head Vector Centroid
d)
where
d :: Vector Centroid
d = forall (compression :: Nat). TDigest compression -> Vector Centroid
tdigestData (forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
finalize TDigest comp
td)
maximumValue :: KnownNat comp => TDigest comp -> Mean
maximumValue :: forall (comp :: Nat). KnownNat comp => TDigest comp -> Double
maximumValue TDigest comp
td
| forall a. Unbox a => Vector a -> Bool
VU.null Vector Centroid
d = Double
posInf
| Bool
otherwise = forall a b. (a, b) -> a
fst (forall a. Unbox a => Vector a -> a
VU.last Vector Centroid
d)
where
d :: Vector Centroid
d = forall (compression :: Nat). TDigest compression -> Vector Centroid
tdigestData (forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
finalize TDigest comp
td)
ksize
:: Double
-> Double
-> Double
ksize :: Double -> Double -> Double
ksize Double
comp Double
q = Double
comp forall a. Num a => a -> a -> a
* (forall a. Floating a => a -> a
asin (Double
2 forall a. Num a => a -> a -> a
* Double -> Double
clamp Double
q forall a. Num a => a -> a -> a
- Double
1) forall a. Fractional a => a -> a -> a
/ forall a. Floating a => a
pi forall a. Num a => a -> a -> a
+ Double
0.5)
clamp :: Double -> Double
clamp :: Double -> Double
clamp Double
x
| Double
x forall a. Ord a => a -> a -> Bool
< Double
0.0 = Double
0.0
| Double
x forall a. Ord a => a -> a -> Bool
> Double
1.0 = Double
1.0
| Bool
otherwise = Double
x
ksizeInv
:: Double
-> Double
-> Double
ksizeInv :: Double -> Double -> Double
ksizeInv Double
comp Double
k
| Double
k forall a. Ord a => a -> a -> Bool
> Double
comp = Double
1
| Double
k forall a. Ord a => a -> a -> Bool
< Double
0 = Double
0
| Bool
otherwise = Double
0.5 forall a. Num a => a -> a -> a
* (forall a. Floating a => a -> a
sin ((Double
k forall a. Fractional a => a -> a -> a
/ Double
comp forall a. Num a => a -> a -> a
- Double
0.5) forall a. Num a => a -> a -> a
* forall a. Floating a => a
pi) forall a. Num a => a -> a -> a
+ Double
1)
merge :: Int -> Double -> [(Mean, Weight)] -> [(Mean, Weight)]
merge :: Size -> Double -> [Centroid] -> [Centroid]
merge Size
_ Double
_ [] = []
merge Size
tw' Double
comp (Centroid
y:[Centroid]
ys) = Double -> Double -> Centroid -> [Centroid] -> [Centroid]
go Double
0 (Double -> Double
qLimit' Double
0) Centroid
y [Centroid]
ys
where
tw :: Double
tw = forall a b. (Integral a, Num b) => a -> b
fromIntegral Size
tw'
qLimit' :: Double -> Double
qLimit' :: Double -> Double
qLimit' Double
q0 = Double -> Double -> Double
ksizeInv Double
comp (Double -> Double -> Double
ksize Double
comp Double
q0 forall a. Num a => a -> a -> a
+ Double
1)
go :: Double
-> Double
-> (Mean, Weight)
-> [(Mean, Weight)]
-> [(Mean, Weight)]
go :: Double -> Double -> Centroid -> [Centroid] -> [Centroid]
go Double
_q0 Double
_qLimit Centroid
sigma [] = [Centroid
sigma]
go Double
q0 Double
qLimit Centroid
sigma (Centroid
x:[Centroid]
xs)
| Double
q forall a. Ord a => a -> a -> Bool
<= Double
qLimit = Double -> Double -> Centroid -> [Centroid] -> [Centroid]
go Double
q0 Double
qLimit (Centroid -> Centroid -> Centroid
plus Centroid
sigma Centroid
x) [Centroid]
xs
| Bool
otherwise = Centroid
sigma forall a. a -> [a] -> [a]
: Double -> Double -> Centroid -> [Centroid] -> [Centroid]
go Double
q0' (Double -> Double
qLimit' Double
q0') Centroid
x [Centroid]
xs
where
q :: Double
q = Double
q0 forall a. Num a => a -> a -> a
+ (forall a b. (a, b) -> b
snd Centroid
sigma forall a. Num a => a -> a -> a
+ forall a b. (a, b) -> b
snd Centroid
x) forall a. Fractional a => a -> a -> a
/ Double
tw
q0' :: Double
q0' = Double
q0 forall a. Num a => a -> a -> a
+ forall a b. (a, b) -> b
snd Centroid
sigma forall a. Fractional a => a -> a -> a
/ Double
tw
plus :: Centroid -> Centroid -> Centroid
plus :: Centroid -> Centroid -> Centroid
plus (Double
m1,Double
w1) (Double
m2,Double
w2) = ((Double
m1 forall a. Num a => a -> a -> a
* Double
w1 forall a. Num a => a -> a -> a
+ Double
m2 forall a. Num a => a -> a -> a
* Double
w2) forall a. Fractional a => a -> a -> a
/ Double
w, Double
w) where w :: Double
w = Double
w1 forall a. Num a => a -> a -> a
+ Double
w2
emptyTDigest :: TDigest comp
emptyTDigest :: forall (comp :: Nat). TDigest comp
emptyTDigest = forall (compression :: Nat).
Size
-> Vector Centroid
-> Size
-> [Double]
-> Bool
-> TDigest compression
TDigest Size
0 forall a. Monoid a => a
mempty Size
0 forall a. Monoid a => a
mempty Bool
True
combineTDigest :: forall comp. KnownNat comp => TDigest comp -> TDigest comp -> TDigest comp
combineTDigest :: forall (comp :: Nat).
KnownNat comp =>
TDigest comp -> TDigest comp -> TDigest comp
combineTDigest (TDigest Size
tw Vector Centroid
d Size
_ [Double]
b Bool
dir) (TDigest Size
tw' Vector Centroid
d' Size
_ [Double]
b' Bool
dir') =
forall (compression :: Nat).
Size
-> Vector Centroid
-> Size
-> [Double]
-> Bool
-> TDigest compression
TDigest (Size
tw forall a. Num a => a -> a -> a
+ Size
tw') Vector Centroid
newD Size
0 [] (Bool
dir forall a. Eq a => a -> a -> Bool
/= Bool
dir')
where
newD :: Vector Centroid
newD = forall a. Unbox a => [a] -> Vector a
VU.fromList
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Size -> Double -> [Centroid] -> [Centroid]
merge Size
tw Double
comp
forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing forall a b. (a, b) -> a
fst)
forall a b. (a -> b) -> a -> b
$ forall a. Unbox a => Vector a -> [a]
VU.toList Vector Centroid
d forall a. [a] -> [a] -> [a]
++ forall a. Unbox a => Vector a -> [a]
VU.toList Vector Centroid
d' forall a. [a] -> [a] -> [a]
++ forall a b. (a -> b) -> [a] -> [b]
map (forall a b c. (a -> b -> c) -> b -> a -> c
flip (,) Double
1) ([Double]
b forall a. [a] -> [a] -> [a]
++ [Double]
b')
comp :: Double
comp = forall a. Num a => Integer -> a
fromInteger (forall (n :: Nat) (proxy :: Nat -> *).
KnownNat n =>
proxy n -> Integer
natVal (forall {k} (t :: k). Proxy t
Proxy :: Proxy comp)) forall a. Num a => a -> a -> a
* forall a. Num a => a
sizeCoefficient
finalize :: forall comp. KnownNat comp => TDigest comp -> TDigest comp
finalize :: forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
finalize TDigest comp
td
| forall (t :: * -> *) a. Foldable t => t a -> Bool
null (forall (compression :: Nat). TDigest compression -> [Double]
tdigestBuffer TDigest comp
td) = TDigest comp
td
| Bool
otherwise = forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
forceCompress TDigest comp
td
forceCompress :: forall comp. KnownNat comp => TDigest comp -> TDigest comp
forceCompress :: forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
forceCompress (TDigest Size
tw Vector Centroid
d Size
_bs [Double]
b Bool
dir) = forall (compression :: Nat).
Size
-> Vector Centroid
-> Size
-> [Double]
-> Bool
-> TDigest compression
TDigest Size
tw Vector Centroid
d' Size
0 [] (Bool -> Bool
not Bool
dir)
where
d' :: Vector Centroid
d' = forall a. Unbox a => [a] -> Vector a
VU.fromList
forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall {a}. [a] -> [a]
rev
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Size -> Double -> [Centroid] -> [Centroid]
merge Size
tw Double
comp
forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall {a}. [a] -> [a]
rev
forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing forall a b. (a, b) -> a
fst)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. [a] -> [a] -> [a]
++ forall a b. (a -> b) -> [a] -> [b]
map (forall a b c. (a -> b -> c) -> b -> a -> c
flip (,) Double
1) [Double]
b)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Unbox a => Vector a -> [a]
VU.toList
forall a b. (a -> b) -> a -> b
$ Vector Centroid
d
comp :: Double
comp = forall a. Num a => Integer -> a
fromInteger (forall (n :: Nat) (proxy :: Nat -> *).
KnownNat n =>
proxy n -> Integer
natVal (forall {k} (t :: k). Proxy t
Proxy :: Proxy comp)) forall a. Num a => a -> a -> a
* forall a. Num a => a
sizeCoefficient
rev :: [a] -> [a]
rev | Bool
dir = forall a. a -> a
id
| Bool
otherwise = forall {a}. [a] -> [a]
reverse
compress :: forall comp. KnownNat comp => TDigest comp -> TDigest comp
compress :: forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
compress t :: TDigest comp
t@(TDigest Size
_ Vector Centroid
_ Size
bs [Double]
_ Bool
_)
| Size
bs forall a. Ord a => a -> a -> Bool
> Size
compInt forall a. Num a => a -> a -> a
* Size
2 = forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
forceCompress TDigest comp
t
| Bool
otherwise = TDigest comp
t
where
compInt :: Size
compInt = forall a. Num a => Integer -> a
fromInteger (forall (n :: Nat) (proxy :: Nat -> *).
KnownNat n =>
proxy n -> Integer
natVal (forall {k} (t :: k). Proxy t
Proxy :: Proxy comp)) forall a. Num a => a -> a -> a
* forall a. Num a => a
sizeCoefficient
sizeCoefficient :: Num a => a
sizeCoefficient :: forall a. Num a => a
sizeCoefficient = a
32
valid :: TDigest comp -> Bool
valid :: forall (compression :: Nat). TDigest compression -> Bool
valid = forall a b. Either a b -> Bool
isRight forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (comp :: Nat). TDigest comp -> Either String (TDigest comp)
validate
validate :: TDigest comp -> Either String (TDigest comp)
validate :: forall (comp :: Nat). TDigest comp -> Either String (TDigest comp)
validate td :: TDigest comp
td@(TDigest Size
tw Vector Centroid
d Size
bs [Double]
b Bool
_dir)
| Bool -> Bool
not (Size
bs forall a. Eq a => a -> a -> Bool
== forall (t :: * -> *) a. Foldable t => t a -> Size
length [Double]
b) =
forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ String
"Buffer lenght don't match: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show (Size
bs, forall (t :: * -> *) a. Foldable t => t a -> Size
length [Double]
b)
| Bool -> Bool
not (Size
tw forall a. Eq a => a -> a -> Bool
== Size
bs forall a. Num a => a -> a -> a
+ forall a b. (RealFrac a, Integral b) => a -> b
round Double
dw) =
forall a b. a -> Either a b
Left String
"Total weight doesn't match"
| [Centroid]
dl forall a. Eq a => a -> a -> Bool
/= forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing forall a b. (a, b) -> a
fst) [Centroid]
dl =
forall a b. a -> Either a b
Left String
"Data buffer isn't ordered"
| Bool
otherwise = forall a b. b -> Either a b
Right TDigest comp
td
where
dl :: [Centroid]
dl :: [Centroid]
dl = forall a. Unbox a => Vector a -> [a]
VU.toList Vector Centroid
d
dw :: Double
dw :: Double
dw = forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> b
snd [Centroid]
dl)
insert
:: KnownNat comp
=> Double
-> TDigest comp
-> TDigest comp
insert :: forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert Double
x = forall (comp :: Nat). KnownNat comp => TDigest comp -> TDigest comp
compress forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert' Double
x
insert'
:: KnownNat comp
=> Double
-> TDigest comp
-> TDigest comp
insert' :: forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert' Double
x (TDigest Size
s Vector Centroid
d Size
sb [Double]
b Bool
dir) = forall (compression :: Nat).
Size
-> Vector Centroid
-> Size
-> [Double]
-> Bool
-> TDigest compression
TDigest (Size
s forall a. Num a => a -> a -> a
+ Size
1) Vector Centroid
d (Size
sb forall a. Num a => a -> a -> a
+ Size
1) (Double
x forall a. a -> [a] -> [a]
: [Double]
b) Bool
dir
singleton :: Double -> TDigest comp
singleton :: forall (comp :: Nat). Double -> TDigest comp
singleton Double
x = forall (compression :: Nat).
Size
-> Vector Centroid
-> Size
-> [Double]
-> Bool
-> TDigest compression
TDigest Size
1 (forall a. Unbox a => a -> Vector a
VU.singleton (Double
x, Double
1)) Size
0 [] Bool
True
tdigest :: (Foldable f, KnownNat comp) => f Double -> TDigest comp
tdigest :: forall (f :: * -> *) (comp :: Nat).
(Foldable f, KnownNat comp) =>
f Double -> TDigest comp
tdigest = forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (forall a b c. (a -> b -> c) -> b -> a -> c
flip forall (comp :: Nat).
KnownNat comp =>
Double -> TDigest comp -> TDigest comp
insert) forall a. Monoid a => a
mempty forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) a. Foldable t => t a -> [a]
toList