{-# LANGUAGE CPP #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
-- |
-- Module    : Statistics.Types
-- Copyright : (c) 2009 Bryan O'Sullivan
-- License   : BSD3
--
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : portable
--
-- Data types common used in statistics
module Statistics.Types
    ( -- * Confidence level
      CL
      -- ** Accessors
    , confidenceLevel
    , significanceLevel
      -- ** Constructors
    , mkCL
    , mkCLE
    , mkCLFromSignificance
    , mkCLFromSignificanceE
      -- ** Constants and conversion to nσ
    , cl90
    , cl95
    , cl99
      -- *** Normal approximation
    , nSigma
    , nSigma1
    , getNSigma
    , getNSigma1
      -- * p-value
    , PValue
      -- ** Accessors
    , pValue
      -- ** Constructors
    , mkPValue
    , mkPValueE
      -- * Estimates and upper/lower limits
    , Estimate(..)
    , NormalErr(..)
    , ConfInt(..)
    , UpperLimit(..)
    , LowerLimit(..)
      -- ** Constructors
    , estimateNormErr
    , (±)
    , estimateFromInterval
    , estimateFromErr
      -- ** Accessors
    , confidenceInterval
    , asymErrors
    , Scale(..)
      -- * Other
    , Sample
    , WeightedSample
    , Weights
    ) where

import Control.Monad                ((<=<), liftM2, liftM3)
import Control.DeepSeq              (NFData(..))
import Data.Aeson                   (FromJSON(..), ToJSON)
import Data.Binary                  (Binary(..))
import Data.Data                    (Data,Typeable)
import Data.Maybe                   (fromMaybe)
import Data.Vector.Unboxed          (Unbox)
import Data.Vector.Unboxed.Deriving (derivingUnbox)
import GHC.Generics                 (Generic)

#if __GLASGOW_HASKELL__ == 704
import qualified Data.Vector.Generic
import qualified Data.Vector.Generic.Mutable
#endif

import Statistics.Internal
import Statistics.Types.Internal
import Statistics.Distribution
import Statistics.Distribution.Normal


----------------------------------------------------------------
-- Data type for confidence level
----------------------------------------------------------------

-- |
-- Confidence level. In context of confidence intervals it's
-- probability of said interval covering true value of measured
-- value. In context of statistical tests it's @1-α@ where α is
-- significance of test.
--
-- Since confidence level are usually close to 1 they are stored as
-- @1-CL@ internally. There are two smart constructors for @CL@:
-- 'mkCL' and 'mkCLFromSignificance' (and corresponding variant
-- returning @Maybe@). First creates @CL@ from confidence level and
-- second from @1 - CL@ or significance level.
--
-- >>> cl95
-- mkCLFromSignificance 0.05
--
-- Prior to 0.14 confidence levels were passed to function as plain
-- @Doubles@. Use 'mkCL' to convert them to @CL@.
newtype CL a = CL a
               deriving (Eq, Typeable, Data, Generic)

instance Show a => Show (CL a) where
  showsPrec n (CL p) = defaultShow1 "mkCLFromSignificance" p n
instance (Num a, Ord a, Read a) => Read (CL a) where
  readPrec = defaultReadPrecM1 "mkCLFromSignificance" mkCLFromSignificanceE

instance (Binary a, Num a, Ord a) => Binary (CL a) where
  put (CL p) = put p
  get        = maybe (fail errMkCL) return . mkCLFromSignificanceE =<< get

instance (ToJSON a)                 => ToJSON   (CL a)
instance (FromJSON a, Num a, Ord a) => FromJSON (CL a) where
  parseJSON = maybe (fail errMkCL) return . mkCLFromSignificanceE <=< parseJSON

instance NFData   a => NFData   (CL a) where
  rnf (CL a) = rnf a

-- |
-- >>> cl95 > cl90
-- True
instance Ord a => Ord (CL a) where
  CL a <  CL b = a >  b
  CL a <= CL b = a >= b
  CL a >  CL b = a <  b
  CL a >= CL b = a <= b
  max (CL a) (CL b) = CL (min a b)
  min (CL a) (CL b) = CL (max a b)


-- | Create confidence level from probability β or probability
--   confidence interval contain true value of estimate. Will throw
--   exception if parameter is out of [0,1] range
--
-- >>> mkCL 0.95    -- same as cl95
-- mkCLFromSignificance 0.05
mkCL :: (Ord a, Num a) => a -> CL a
mkCL
  = fromMaybe (error "Statistics.Types.mkCL: probability is out if [0,1] range")
  . mkCLE

-- | Same as 'mkCL' but returns @Nothing@ instead of error if
--   parameter is out of [0,1] range
--
-- >>> mkCLE 0.95    -- same as cl95
-- Just (mkCLFromSignificance 0.05)
mkCLE :: (Ord a, Num a) => a -> Maybe (CL a)
mkCLE p
  | p >= 0 && p <= 1 = Just $ CL (1 - p)
  | otherwise        = Nothing

-- | Create confidence level from probability α or probability that
--   confidence interval does not contain true value of estimate. Will
--   throw exception if parameter is out of [0,1] range
--
-- >>> mkCLFromSignificance 0.05    -- same as cl95
-- mkCLFromSignificance 0.05
mkCLFromSignificance :: (Ord a, Num a) => a -> CL a
mkCLFromSignificance = fromMaybe (error errMkCL) . mkCLFromSignificanceE

-- | Same as 'mkCLFromSignificance' but returns @Nothing@ instead of error if
--   parameter is out of [0,1] range
--
-- >>> mkCLFromSignificanceE 0.05    -- same as cl95
-- Just (mkCLFromSignificance 0.05)
mkCLFromSignificanceE :: (Ord a, Num a) => a -> Maybe (CL a)
mkCLFromSignificanceE p
  | p >= 0 && p <= 1 = Just $ CL p
  | otherwise        = Nothing

errMkCL :: String
errMkCL = "Statistics.Types.mkPValCL: probability is out if [0,1] range"


-- | Get confidence level. This function is subject to rounding
--   errors. If @1 - CL@ is needed use 'significanceLevel' instead
confidenceLevel :: (Num a) => CL a -> a
confidenceLevel (CL p) = 1 - p

-- | Get significance level.
significanceLevel :: CL a -> a
significanceLevel (CL p) = p



-- | 90% confidence level
cl90 :: Fractional a => CL a
cl90 = CL 0.10

-- | 95% confidence level
cl95 :: Fractional a => CL a
cl95 = CL 0.05

-- | 99% confidence level
cl99 :: Fractional a => CL a
cl99 = CL 0.01



----------------------------------------------------------------
-- Data type for p-value
----------------------------------------------------------------

-- | Newtype wrapper for p-value.
newtype PValue a = PValue a
               deriving (Eq,Ord, Typeable, Data, Generic)

instance Show a => Show (PValue a) where
  showsPrec n (PValue p) = defaultShow1 "mkPValue" p n
instance (Num a, Ord a, Read a) => Read (PValue a) where
  readPrec = defaultReadPrecM1 "mkPValue" mkPValueE

instance (Binary a, Num a, Ord a) => Binary (PValue a) where
  put (PValue p) = put p
  get            = maybe (fail errMkPValue) return . mkPValueE =<< get

instance (ToJSON a)                 => ToJSON   (PValue a)
instance (FromJSON a, Num a, Ord a) => FromJSON (PValue a) where
  parseJSON = maybe (fail errMkPValue) return . mkPValueE <=< parseJSON

instance NFData a => NFData (PValue a) where
  rnf (PValue a) = rnf a


-- | Construct PValue. Throws error if argument is out of [0,1] range.
--
mkPValue :: (Ord a, Num a) => a -> PValue a
mkPValue = fromMaybe (error errMkPValue) . mkPValueE

-- | Construct PValue. Returns @Nothing@ if argument is out of [0,1] range.
mkPValueE :: (Ord a, Num a) => a -> Maybe (PValue a)
mkPValueE p
  | p >= 0 && p <= 1 = Just $ PValue p
  | otherwise        = Nothing

-- | Get p-value
pValue :: PValue a -> a
pValue (PValue p) = p


-- | P-value expressed in sigma. This is convention widely used in
--   experimental physics. N sigma confidence level corresponds to
--   probability within N sigma of normal distribution.
--
--   Note that this correspondence is for normal distribution. Other
--   distribution will have different dependency. Also experimental
--   distribution usually only approximately normal (especially at
--   extreme tails).
nSigma :: Double -> PValue Double
nSigma n
  | n > 0     = PValue $ 2 * cumulative standard (-n)
  | otherwise = error "Statistics.Extra.Error.nSigma: non-positive number of sigma"

-- | P-value expressed in sigma for one-tail hypothesis. This correspond to
--   probability of obtaining value less than @N·σ@.
nSigma1 :: Double -> PValue Double
nSigma1 n
  | n > 0     = PValue $ cumulative standard (-n)
  | otherwise = error "Statistics.Extra.Error.nSigma1: non-positive number of sigma"

-- | Express confidence level in sigmas
getNSigma :: PValue Double -> Double
getNSigma (PValue p) = negate $ quantile standard (p / 2)

-- | Express confidence level in sigmas for one-tailed hypothesis.
getNSigma1 :: PValue Double -> Double
getNSigma1 (PValue p) = negate $ quantile standard p



errMkPValue :: String
errMkPValue = "Statistics.Types.mkPValue: probability is out if [0,1] range"



----------------------------------------------------------------
-- Point estimates
----------------------------------------------------------------

-- |
-- A point estimate and its confidence interval. It's parametrized by
-- both error type @e@ and value type @a@. This module provides two
-- types of error: 'NormalErr' for normally distributed errors and
-- 'ConfInt' for error with normal distribution. See their
-- documentation for more details.
--
-- For example @144 ± 5@ (assuming normality) could be expressed as
--
-- > Estimate { estPoint = 144
-- >          , estError = NormalErr 5
-- >          }
--
-- Or if we want to express @144 + 6 - 4@ at CL95 we could write:
--
-- > Estimate { estPoint = 144
-- >          , estError = ConfInt
-- >                       { confIntLDX = 4
-- >                       , confIntUDX = 6
-- >                       , confIntCL  = cl95
-- >                       }
--
-- Prior to statistics 0.14 @Estimate@ data type used following definition:
--
-- > data Estimate = Estimate {
-- >      estPoint           :: {-# UNPACK #-} !Double
-- >    , estLowerBound      :: {-# UNPACK #-} !Double
-- >    , estUpperBound      :: {-# UNPACK #-} !Double
-- >    , estConfidenceLevel :: {-# UNPACK #-} !Double
-- >    }
--
-- Now type @Estimate ConfInt Double@ should be used instead. Function
-- 'estimateFromInterval' allow to easily construct estimate from same inputs.
data Estimate e a = Estimate
    { estPoint           :: !a
      -- ^ Point estimate.
    , estError           :: !(e a)
      -- ^ Confidence interval for estimate.
    } deriving (Eq, Read, Show, Generic
#if __GLASGOW_HASKELL__ >= 708
               , Typeable, Data
#endif
               )

instance (Binary   (e a), Binary   a) => Binary   (Estimate e a) where
  get = liftM2 Estimate get get
  put (Estimate ep ee) = put ep >> put ee
instance (FromJSON (e a), FromJSON a) => FromJSON (Estimate e a)
instance (ToJSON   (e a), ToJSON   a) => ToJSON   (Estimate e a)
instance (NFData   (e a), NFData   a) => NFData   (Estimate e a) where
    rnf (Estimate x dx) = rnf x `seq` rnf dx



-- |
-- Normal errors. They are stored as 1σ errors which corresponds to
-- 68.8% CL. Since we can recalculate them to any confidence level if
-- needed we don't store it.
newtype NormalErr a = NormalErr
  { normalError :: a
  }
  deriving (Eq, Read, Show, Typeable, Data, Generic)

instance Binary   a => Binary   (NormalErr a) where
  get = fmap NormalErr get
  put = put . normalError
instance FromJSON a => FromJSON (NormalErr a)
instance ToJSON   a => ToJSON   (NormalErr a)
instance NFData   a => NFData   (NormalErr a) where
    rnf (NormalErr x) = rnf x


-- | Confidence interval. It assumes that confidence interval forms
--   single interval and isn't set of disjoint intervals.
data ConfInt a = ConfInt
  { confIntLDX :: !a
    -- ^ Lower error estimate, or distance between point estimate and
    --   lower bound of confidence interval.
  , confIntUDX :: !a
    -- ^ Upper error estimate, or distance between point estimate and
    --   upper bound of confidence interval.
  , confIntCL  :: !(CL Double)
    -- ^ Confidence level corresponding to given confidence interval.
  }
  deriving (Read,Show,Eq,Typeable,Data,Generic)

instance Binary   a => Binary   (ConfInt a) where
  get = liftM3 ConfInt get get get
  put (ConfInt l u cl) = put l >> put u >> put cl 
instance FromJSON a => FromJSON (ConfInt a)
instance ToJSON   a => ToJSON   (ConfInt a)
instance NFData   a => NFData   (ConfInt a) where
    rnf (ConfInt x y _) = rnf x `seq` rnf y



----------------------------------------
-- Constructors

-- | Create estimate with normal errors
estimateNormErr :: a            -- ^ Point estimate
                -> a            -- ^ 1σ error
                -> Estimate NormalErr a
estimateNormErr x dx = Estimate x (NormalErr dx)

-- | Synonym for 'estimateNormErr'
(±) :: a      -- ^ Point estimate
    -> a      -- ^ 1σ error
    -> Estimate NormalErr a
(±) = estimateNormErr

-- | Create estimate with asymmetric error.
estimateFromErr
  :: a                     -- ^ Central estimate
  -> (a,a)                 -- ^ Lower and upper errors. Both should be
                           --   positive but it's not checked.
  -> CL Double             -- ^ Confidence level for interval
  -> Estimate ConfInt a
estimateFromErr x (ldx,udx) cl = Estimate x (ConfInt ldx udx cl)

-- | Create estimate with asymmetric error.
estimateFromInterval
  :: Num a
  => a                     -- ^ Point estimate. Should lie within
                           --   interval but it's not checked.
  -> (a,a)                 -- ^ Lower and upper bounds of interval
  -> CL Double             -- ^ Confidence level for interval
  -> Estimate ConfInt a
estimateFromInterval x (lx,ux) cl
  = Estimate x (ConfInt (x-lx) (ux-x) cl)


----------------------------------------
-- Accessors

-- | Get confidence interval
confidenceInterval :: Num a => Estimate ConfInt a -> (a,a)
confidenceInterval (Estimate x (ConfInt ldx udx _))
  = (x - ldx, x + udx)

-- | Get asymmetric errors
asymErrors :: Estimate ConfInt a -> (a,a)
asymErrors (Estimate _ (ConfInt ldx udx _)) = (ldx,udx)



-- | Data types which could be multiplied by constant.
class Scale e where
  scale :: (Ord a, Num a) => a -> e a -> e a

instance Scale NormalErr where
  scale a (NormalErr e) = NormalErr (abs a * e)

instance Scale ConfInt where
  scale a (ConfInt l u cl) | a >= 0    = ConfInt  (a*l)  (a*u) cl
                           | otherwise = ConfInt (-a*u) (-a*l) cl

instance Scale e => Scale (Estimate e) where
  scale a (Estimate x dx) = Estimate (a*x) (scale a dx)



----------------------------------------------------------------
-- Upper/lower limit
----------------------------------------------------------------

-- | Upper limit. They are usually given for small non-negative values
--   when it's not possible detect difference from zero.
data UpperLimit a = UpperLimit
    { upperLimit        :: !a
      -- ^ Upper limit
    , ulConfidenceLevel :: !(CL Double)
      -- ^ Confidence level for which limit was calculated
    } deriving (Eq, Read, Show, Typeable, Data, Generic)


instance Binary   a => Binary   (UpperLimit a) where
  get = liftM2 UpperLimit get get
  put (UpperLimit l cl) = put l >> put cl
instance FromJSON a => FromJSON (UpperLimit a)
instance ToJSON   a => ToJSON   (UpperLimit a)
instance NFData   a => NFData   (UpperLimit a) where
    rnf (UpperLimit x cl) = rnf x `seq` rnf cl



-- | Lower limit. They are usually given for large quantities when
--   it's not possible to measure them. For example: proton half-life
data LowerLimit a = LowerLimit {
    lowerLimit        :: !a
    -- ^ Lower limit
  , llConfidenceLevel :: !(CL Double)
    -- ^ Confidence level for which limit was calculated
  } deriving (Eq, Read, Show, Typeable, Data, Generic)

instance Binary   a => Binary   (LowerLimit a) where
  get = liftM2 LowerLimit get get
  put (LowerLimit l cl) = put l >> put cl
instance FromJSON a => FromJSON (LowerLimit a)
instance ToJSON   a => ToJSON   (LowerLimit a)
instance NFData   a => NFData   (LowerLimit a) where
    rnf (LowerLimit x cl) = rnf x `seq` rnf cl


----------------------------------------------------------------
-- Deriving unbox instances
----------------------------------------------------------------

derivingUnbox "CL"
  [t| forall a. Unbox a => CL a -> a |]
  [| \(CL a) -> a |]
  [| CL           |]

derivingUnbox "PValue"
  [t| forall a. Unbox a => PValue a -> a |]
  [| \(PValue a) -> a |]
  [| PValue           |]

derivingUnbox "Estimate"
  [t| forall a e. (Unbox a, Unbox (e a)) => Estimate e a -> (a, e a) |]
  [| \(Estimate x dx) -> (x,dx) |]
  [| \(x,dx) -> (Estimate x dx) |]

derivingUnbox "NormalErr"
  [t| forall a. Unbox a => NormalErr a -> a |]
  [| \(NormalErr a) -> a |]
  [| NormalErr           |]

derivingUnbox "ConfInt"
  [t| forall a. Unbox a => ConfInt a -> (a, a, CL Double) |]
  [| \(ConfInt a b c) -> (a,b,c) |]
  [| \(a,b,c) -> ConfInt a b c   |]

derivingUnbox "UpperLimit"
  [t| forall a. Unbox a => UpperLimit a -> (a, CL Double) |]
  [| \(UpperLimit a b) -> (a,b) |]
  [| \(a,b) -> UpperLimit a b   |]

derivingUnbox "LowerLimit"
  [t| forall a. Unbox a => LowerLimit a -> (a, CL Double) |]
  [| \(LowerLimit a b) -> (a,b) |]
  [| \(a,b) -> LowerLimit a b   |]