{-# LANGUAGE OverloadedStrings, PatternGuards, DeriveDataTypeable, DeriveGeneric #-} -- | -- Module : Statistics.Distribution.NegativeBinomial -- Copyright : (c) 2022 Lorenz Minder -- License : BSD3 -- -- Maintainer : lminder@gmx.net -- Stability : experimental -- Portability : portable -- -- The negative binomial distribution. This is the discrete probability -- distribution of the number of failures in a sequence of independent -- yes\/no experiments before a specified number of successes /r/. Each -- Bernoulli trial has success probability /p/ in the range (0, 1]. The -- parameter /r/ must be positive, but does not have to be integer. module Statistics.Distribution.NegativeBinomial ( NegativeBinomialDistribution -- * Constructors , negativeBinomial , negativeBinomialE -- * Accessors , nbdSuccesses , nbdProbability ) where import Control.Applicative import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:)) import Data.Binary (Binary(..)) import Data.Data (Data, Typeable) import Data.Foldable (foldl') import GHC.Generics (Generic) import Numeric.SpecFunctions (incompleteBeta, log1p) import Numeric.SpecFunctions.Extra (logChooseFast) import Numeric.MathFunctions.Constants (m_epsilon, m_tiny) import qualified Statistics.Distribution as D import Statistics.Internal -- Math helper functions -- | Generalized binomial coefficients. -- -- These computes binomial coefficients with the small generalization -- that the /n/ need not be integer, but can be real. gChoose :: Double -> Int -> Double gChoose n k | k < 0 = 0 | k' >= 50 = exp $ logChooseFast n k' | otherwise = foldl' (*) 1 factors where factors = [ (n - k' + j) / j | j <- [1..k'] ] k' = fromIntegral k -- Implementation of Negative Binomial -- | The negative binomial distribution. data NegativeBinomialDistribution = NBD { nbdSuccesses :: {-# UNPACK #-} !Double -- ^ Number of successes until stop , nbdProbability :: {-# UNPACK #-} !Double -- ^ Success probability. } deriving (Eq, Typeable, Data, Generic) instance Show NegativeBinomialDistribution where showsPrec i (NBD r p) = defaultShow2 "negativeBinomial" r p i instance Read NegativeBinomialDistribution where readPrec = defaultReadPrecM2 "negativeBinomial" negativeBinomialE instance ToJSON NegativeBinomialDistribution instance FromJSON NegativeBinomialDistribution where parseJSON (Object v) = do r <- v .: "nbdSuccesses" p <- v .: "nbdProbability" maybe (fail $ errMsg r p) return $ negativeBinomialE r p parseJSON _ = empty instance Binary NegativeBinomialDistribution where put (NBD r p) = put r >> put p get = do r <- get p <- get maybe (fail $ errMsg r p) return $ negativeBinomialE r p instance D.Distribution NegativeBinomialDistribution where cumulative = cumulative complCumulative = complCumulative instance D.DiscreteDistr NegativeBinomialDistribution where probability = probability logProbability = logProbability instance D.Mean NegativeBinomialDistribution where mean = mean instance D.Variance NegativeBinomialDistribution where variance = variance instance D.MaybeMean NegativeBinomialDistribution where maybeMean = Just . D.mean instance D.MaybeVariance NegativeBinomialDistribution where maybeStdDev = Just . D.stdDev maybeVariance = Just . D.variance instance D.Entropy NegativeBinomialDistribution where entropy = directEntropy instance D.MaybeEntropy NegativeBinomialDistribution where maybeEntropy = Just . D.entropy -- This could be slow for big n probability :: NegativeBinomialDistribution -> Int -> Double probability d@(NBD r p) k | k < 0 = 0 -- Switch to log domain for large k + r to avoid overflows. -- -- We also want to avoid underflow when computing (1-p)^k & -- p^r. | k' + r < 1000 , pK >= m_tiny , pR >= m_tiny = gChoose (k' + r - 1) k * pK * pR | otherwise = exp $ logProbability d k where pK = exp $ log1p (-p) * k' pR = p**r k' = fromIntegral k logProbability :: NegativeBinomialDistribution -> Int -> Double logProbability (NBD r p) k | k < 0 = (-1)/0 | otherwise = logChooseFast (k' + r - 1) k' + log1p (-p) * k' + log p * r where k' = fromIntegral k cumulative :: NegativeBinomialDistribution -> Double -> Double cumulative (NBD r p) x | isNaN x = error "Statistics.Distribution.NegativeBinomial.cumulative: NaN input" | isInfinite x = if x > 0 then 1 else 0 | k < 0 = 0 | otherwise = incompleteBeta r (fromIntegral (k+1)) p where k = floor x :: Integer complCumulative :: NegativeBinomialDistribution -> Double -> Double complCumulative (NBD r p) x | isNaN x = error "Statistics.Distribution.NegativeBinomial.complCumulative: NaN input" | isInfinite x = if x > 0 then 0 else 1 | k < 0 = 1 | otherwise = incompleteBeta (fromIntegral (k+1)) r (1 - p) where k = (floor x)::Integer mean :: NegativeBinomialDistribution -> Double mean (NBD r p) = r * (1 - p)/p variance :: NegativeBinomialDistribution -> Double variance (NBD r p) = r * (1 - p)/(p * p) directEntropy :: NegativeBinomialDistribution -> Double directEntropy d = negate . sum $ takeWhile (< -m_epsilon) $ dropWhile (>= -m_epsilon) $ [ let x = probability d k in x * log x | k <- [0..]] -- | Construct negative binomial distribution. Number of failures /r/ -- must be positive and probability must be in (0,1] range negativeBinomial :: Double -- ^ Number of successes. -> Double -- ^ Success probability. -> NegativeBinomialDistribution negativeBinomial r p = maybe (error $ errMsg r p) id $ negativeBinomialE r p -- | Construct negative binomial distribution. Number of failures /r/ -- must be positive and probability must be in (0,1] range negativeBinomialE :: Double -- ^ Number of successes. -> Double -- ^ Success probability. -> Maybe NegativeBinomialDistribution negativeBinomialE r p | r > 0 && 0 < p && p <= 1 = Just (NBD r p) | otherwise = Nothing errMsg :: Double -> Double -> String errMsg r p = "Statistics.Distribution.NegativeBinomial.negativeBinomial: r=" ++ show r ++ " p=" ++ show p ++ ", but need r>0 and p in (0,1]"