-- | Various instances of 'Statistical' 'Manifold's. module Goal.Probability.Distributions ( -- * General Statistical Manifolds CurvedCategorical (CurvedCategorical) , Uniform (Uniform) -- * Exponential Family Manifolds , Bernoulli (Bernoulli) , Binomial (Binomial) , Categorical (Categorical) , Poisson (Poisson) , Normal (Normal) , MeanNormal (MeanNormal) , MultivariateNormal (MultivariateNormal) -- * Util , muSigmaToMultivariateNormal ) where -- Package -- import Goal.Core import Goal.Probability.Statistical import Goal.Probability.ExponentialFamily import Goal.Geometry -- Qualified -- import qualified Data.Vector.Storable as C import qualified Numeric.LinearAlgebra.HMatrix as M -- Unqualified -- import System.Random.MWC.Monad import System.Random.MWC.Distributions.Monad import Statistics.Sample hiding (mean) import Numeric.SpecFunctions -- Uniform -- data Uniform = Uniform Double Double deriving (Eq, Read, Show) instance Manifold Uniform where dimension _ = 0 instance Statistical Uniform where type SampleSpace Uniform = Continuum sampleSpace _ = Continuum instance Generative Standard Uniform where generate p = let (Uniform a b) = manifold p in uniformR (a,b) instance AbsolutelyContinuous Standard Uniform where density p x = let (Uniform a b) = manifold p in if x >= a && x <= b then recip $ b - a else 0 -- Bernoulli Distribution -- -- | The Bernoulli 'Family' with 'SampleSpace' 'Bernoulli' = 'Bool' (because why not). data Bernoulli = Bernoulli deriving (Eq, Read, Show) instance Manifold Bernoulli where dimension _ = 1 instance Statistical Bernoulli where type SampleSpace Bernoulli = Boolean sampleSpace Bernoulli = Boolean instance Generative Standard Bernoulli where generate p = bernoulli . C.head $ coordinates p instance AbsolutelyContinuous Standard Bernoulli where density p True = C.head $ coordinates p density p False = 1 - C.head (coordinates p) instance MaximumLikelihood Standard Bernoulli where mle _ bls = fromList Bernoulli [mean $ toDouble <$> bls] where toDouble True = 1 toDouble False = 0 instance Legendre Natural Bernoulli where potential p = log $ 1 + exp (coordinate 0 p) potentialDifferentials p = fromList (Tangent p) [logistic $ coordinate 0 p] instance Legendre Mixture Bernoulli where potential p = let eta = coordinate 0 p in logit eta * eta - log (1 / (1 - eta)) potentialDifferentials p = fromList (Tangent p) [logit $ coordinate 0 p] instance ExponentialFamily Bernoulli where baseMeasure _ _ = 1 sufficientStatistic Bernoulli True = fromList Bernoulli [1] sufficientStatistic Bernoulli False = fromList Bernoulli [0] instance Riemannian Natural Bernoulli where metric p = let tht = coordinate 0 p stht = logistic tht in fromList (Tensor (Tangent p) (Tangent p)) [stht * (1-stht)] instance Transition Standard Mixture Bernoulli where transition = breakChart instance Transition Mixture Standard Bernoulli where transition = breakChart instance Transition Standard Natural Bernoulli where transition = potentialMapping . chart Mixture . transition instance Transition Natural Standard Bernoulli where transition = transition . potentialMapping instance Generative Natural Bernoulli where generate = standardGenerate -- Binomial Distribution -- newtype Binomial = Binomial { binomialTrials :: Int } deriving (Eq, Read, Show) instance Manifold Binomial where dimension _ = 1 instance Statistical Binomial where type SampleSpace Binomial = [Int] sampleSpace (Binomial n) = [0..n] instance Generative Standard Binomial where generate p = do let n = binomialTrials $ manifold p bls <- replicateM n . bernoulli . head $ listCoordinates p return $ sum [ if bl then 1 else 0 | bl <- bls ] instance AbsolutelyContinuous Standard Binomial where density p k = let n = binomialTrials $ manifold p [c] = listCoordinates p in choose n k * c^k * (1 - c)^(n-k) instance Legendre Natural Binomial where potential p = let n = fromIntegral . binomialTrials $ manifold p tht = coordinate 0 p in n * log (1 + exp tht) potentialDifferentials p = let n = fromIntegral . binomialTrials $ manifold p in fromList (Tangent p) [n * logistic (coordinate 0 p)] instance Legendre Mixture Binomial where potential p = let n = fromIntegral . binomialTrials $ manifold p eta = coordinate 0 p in eta * log (eta / (n - eta)) - n * log (n / (n - eta)) potentialDifferentials p = let n = fromIntegral . binomialTrials $ manifold p eta = coordinate 0 p in fromList (Tangent p) [log $ eta / (n - eta) ] instance ExponentialFamily Binomial where baseMeasure (Binomial n) = choose n sufficientStatistic s k = fromList s [fromIntegral k] instance Transition Standard Natural Binomial where transition = potentialMapping . chart Mixture . transition instance Transition Natural Standard Binomial where transition = chart Standard . transition . potentialMapping instance Transition Standard Mixture Binomial where transition p = breakChart $ alterCoordinates (* (fromIntegral . binomialTrials $ manifold p)) p instance Transition Mixture Standard Binomial where transition p = breakChart $ alterCoordinates (/ (fromIntegral . binomialTrials $ manifold p)) p -- Categorical Distribution -- newtype Categorical s = Categorical s deriving (Show,Eq,Read) -- | A 'Categorical' distribution where the probability of the last category is -- given by the normalization constraint. generateCategorical :: [k] -> Coordinates -> RandST s k -- | Takes a weighted list of elements representing a probability mass function, and -- returns a sample from the Categorical distribution. generateCategorical ks0 cs0 = do c0 <- uniform return $ findProbability ks0 cs0 c0 where findProbability ks cs c | C.null cs = head ks | c < C.head cs = head ks | otherwise = findProbability (tail ks) (C.tail cs) (c - C.head cs) instance Discrete s => Manifold (Categorical s) where dimension (Categorical s) = length (elements s) - 1 instance Discrete s => Statistical (Categorical s) where type SampleSpace (Categorical s) = s sampleSpace (Categorical ks) = ks instance Discrete s => Generative Standard (Categorical s) where generate p = generateCategorical (samples $ manifold p) (coordinates p) instance Discrete s => AbsolutelyContinuous Standard (Categorical s) where density p k | idx == dimension (manifold p) = 1 - C.sum cs | otherwise = cs C.! idx where cs = coordinates p idx = fromMaybe (error "attempted to calculate density of non-categorical element") $ elemIndex k (samples $ manifold p) instance Discrete s => MaximumLikelihood Standard (Categorical s) where mle m ks0' = fromIntegral (length ks0') /> fromList m (builder $ samples m) where builder ks | null $ tail ks = [] | otherwise = let k = head ks kn = length $ filter (== k) ks0' in fromIntegral kn : builder (tail ks) instance Discrete s => Legendre Natural (Categorical s) where potential p = log $ 1 + C.sum (exp $ coordinates p) potentialDifferentials p = let exps = exp $ coordinates p nrm = 1 + C.sum exps in nrm /> fromCoordinates (Tangent p) exps instance Discrete s => Legendre Mixture (Categorical s) where potential p = let cs = coordinates p scs = 1 - C.sum cs in C.sum (C.zipWith (*) cs $ log cs) + scs * log scs potentialDifferentials p = let ps = coordinates p nrm = 1 - C.sum ps in fromCoordinates (Tangent p) (log $ C.map (/nrm) ps) instance Discrete s => ExponentialFamily (Categorical s) where baseMeasure _ _ = 1 sufficientStatistic m k = fromCoordinates m $ C.generate (dimension m) (\j -> if i == j then 1 else 0) where ks = samples m i = fromMaybe (error "Categorical distribution given uncategorized element") $ elemIndex k ks instance Discrete s => Transition Standard Mixture (Categorical s) where transition = breakChart instance Discrete s => Transition Mixture Standard (Categorical s) where transition = breakChart instance Discrete s => Transition Standard Natural (Categorical s) where transition = potentialMapping . chart Mixture . transition instance Discrete s => Transition Natural Standard (Categorical s) where transition = transition . potentialMapping -- Curved Categorical Distribution -- newtype CurvedCategorical s = CurvedCategorical s deriving (Show,Eq,Read) instance Discrete s => Manifold (CurvedCategorical s) where dimension = length . samples instance Discrete s => Statistical (CurvedCategorical s) where type SampleSpace (CurvedCategorical s) = s sampleSpace (CurvedCategorical s) = s instance Discrete s => Generative Standard (CurvedCategorical s) where generate p = generateCategorical (samples $ manifold p) (coordinates p) instance Discrete s => AbsolutelyContinuous Standard (CurvedCategorical s) where density p k = cs C.! idx where ks = samples $ manifold p cs = coordinates p idx = fromMaybe (error "attempted to calculate density of non-categorical element") $ elemIndex k ks -- Poisson Distribution -- generatePoisson :: Double -> RandST s Int -- | Returns a sample from a Poisson distribution with the given rate. generatePoisson rt = uniform >>= renew 0 where l = exp (-rt) renew k p | p <= l = return k | otherwise = do u <- uniform renew (k+1) (p*u) data Poisson = Poisson deriving (Eq, Read, Show) instance Manifold Poisson where dimension _ = 1 instance Statistical Poisson where type SampleSpace Poisson = NaturalNumbers sampleSpace _ = NaturalNumbers instance Generative Standard Poisson where generate d = generatePoisson . C.head $ coordinates d instance AbsolutelyContinuous Standard Poisson where density d k = let ps = coordinates d lmda = C.head ps in lmda^k / factorial k * exp (-lmda) instance MaximumLikelihood Standard Poisson where mle _ xs = fromList Poisson . (:[]) . mean $ fromIntegral <$> xs instance ExponentialFamily Poisson where sufficientStatistic Poisson = fromCoordinates Poisson . C.singleton . fromIntegral baseMeasure _ k = recip $ factorial k instance Legendre Natural Poisson where potential p = exp $ coordinate 0 p potentialDifferentials p = fromCoordinates (Tangent p) . exp $ coordinates p instance Legendre Mixture Poisson where potential p = let eta = coordinate 0 p in eta * log eta - eta potentialDifferentials p = fromCoordinates (Tangent p) . log $ coordinates p instance Riemannian Natural Poisson where metric p = let tht = coordinate 0 p in fromList (Tensor (Tangent p) (Tangent p)) [exp tht] instance Transition Standard Natural Poisson where transition = transition . chart Mixture . transition instance Transition Natural Standard Poisson where transition = transition . potentialMapping instance Transition Standard Mixture Poisson where transition = breakChart instance Transition Mixture Standard Poisson where transition = breakChart instance Generative Natural Poisson where generate = standardGenerate -- Normal Distribution -- data Normal = Normal deriving (Show,Eq,Read) instance Manifold Normal where dimension _ = 2 instance Statistical Normal where type SampleSpace Normal = Continuum sampleSpace _ = Continuum instance Generative Standard Normal where generate p = let [mu,vr] = listCoordinates p in normal mu $ sqrt vr instance AbsolutelyContinuous Standard Normal where density p x = let [mu,vr] = listCoordinates p in recip (sqrt $ vr*2*pi) * exp (negate $ (x - mu) ** 2 / (2*vr)) instance MaximumLikelihood Standard Normal where mle _ xs = let (mu,vr) = meanVariance $ C.fromList xs in fromList Normal [mu,vr] instance ExponentialFamily Normal where sufficientStatistic Normal x = fromList Normal [x,x**2] baseMeasure _ _ = recip . sqrt $ 2 * pi instance Legendre Natural Normal where potential p = let [tht0,tht1] = listCoordinates p in -(tht0^2 / (4*tht1)) - 0.5 * log(-2*tht1) potentialDifferentials p = let [tht0,tht1] = listCoordinates p dv = tht0/tht1 in fromList (Tangent p) [-0.5*dv, 0.25 * dv^2 - 0.5/tht1] instance Legendre Mixture Normal where potential p = let [eta0,eta1] = listCoordinates p in -0.5 * log(eta1 - eta0^2) - 1/2 potentialDifferentials p = let [eta0,eta1] = listCoordinates p dff = eta0^2 - eta1 in fromList (Tangent p) [-eta0 / dff, 0.5 / dff] instance Riemannian Natural Normal where metric p = let [tht1,tht2] = listCoordinates p in fromList (Tensor (Tangent p) (Tangent p)) [-1/(2*tht2),tht1/(2*tht2^2),tht1/(2*tht2^2),(-tht1^2 + tht2)/(2*tht2^3) ] instance Riemannian Standard Normal where metric p = let [_,vr] = listCoordinates p in fromList (Tensor (Tangent p) (Tangent p)) [recip vr,0,0,recip $ 2*vr^2] instance Transition Standard Mixture Normal where transition p = let [mu,vr] = listCoordinates p in fromList Normal [mu, vr + mu^2] instance Transition Mixture Standard Normal where transition p = let [eta0,eta1] = listCoordinates p in fromList Normal [eta0, eta1 - eta0^2] instance Transition Standard Natural Normal where transition p = let [mu,vr] = listCoordinates p in fromList Normal [mu / vr, negate . recip $ 2 * vr] instance Transition Natural Standard Normal where transition p = let [tht0,tht1] = listCoordinates p in fromList Normal [-0.5 * tht0 / tht1, negate . recip $ 2 * tht1] instance Generative Natural Normal where generate = standardGenerate -- MeanNormal Distribution -- data MeanNormal = MeanNormal Double deriving (Show,Eq,Read) instance Manifold MeanNormal where dimension _ = 1 instance Statistical MeanNormal where type SampleSpace MeanNormal = Continuum sampleSpace _ = Continuum instance Generative Standard MeanNormal where generate p = do let (MeanNormal vr) = manifold p normal (coordinate 0 p) $ sqrt vr instance AbsolutelyContinuous Standard MeanNormal where density p = let (MeanNormal vr) = manifold p mu = coordinate 0 p in density . chart Standard $ fromList Normal [mu,vr] instance MaximumLikelihood Standard MeanNormal where mle mnrm xs = fromList mnrm [mean xs] instance Legendre Natural MeanNormal where potential p = let (MeanNormal vr) = manifold p in 0.5 * vr * coordinate 0 p^2 potentialDifferentials p = let (MeanNormal vr) = manifold p in fromList (Tangent p) [vr * coordinate 0 p] instance Legendre Mixture MeanNormal where potential p = let (MeanNormal vr) = manifold p in 0.5 / vr * coordinate 0 p^2 potentialDifferentials p = let (MeanNormal vr) = manifold p in fromList (Tangent p) [coordinate 0 p / vr] instance ExponentialFamily MeanNormal where sufficientStatistic mnrm x = fromList mnrm [x] baseMeasure (MeanNormal vr) x = (exp . negate $ 0.5 * x^2 / vr) / sqrt (2*pi*vr) instance Riemannian Natural MeanNormal where metric p = let (MeanNormal vr) = manifold p in fromList (Tensor (Tangent p) (Tangent p)) [vr] instance Transition Standard Natural MeanNormal where transition = potentialMapping . chart Mixture . breakChart instance Transition Natural Standard MeanNormal where transition = breakChart . potentialMapping instance Transition Standard Mixture MeanNormal where transition = breakChart instance Transition Mixture Standard MeanNormal where transition = breakChart -- Multivariate Normal -- data MultivariateNormal = MultivariateNormal { sampleSpaceDimension :: Int } deriving (Eq, Read, Show) generateMultivariateNormal :: C.Vector Double -> M.Matrix Double -> RandST s (C.Vector Double) -- | Samples from a multivariate Normal. generateMultivariateNormal mus rtsgma = do nrms <- C.replicateM n $ normal 0 1 return $ mus + (M.#>) rtsgma nrms where n = C.length mus muSigmaToMultivariateNormal :: C.Vector Double -> M.Matrix Double -> Standard :#: MultivariateNormal -- | Generates a multivariateNormal by way of a covariance matrix i.e. by taking -- the square root. muSigmaToMultivariateNormal mus sgma = fromCoordinates (MultivariateNormal $ C.length mus) $ mus C.++ M.flatten sgma splitCoordinates :: c :#: MultivariateNormal -> (Coordinates, M.Matrix Double) splitCoordinates p = let (MultivariateNormal n) = manifold p (mus,sgms) = C.splitAt n $ coordinates p in (mus,M.reshape n sgms) instance Manifold MultivariateNormal where dimension (MultivariateNormal n) = n + n^2 instance Statistical MultivariateNormal where type SampleSpace MultivariateNormal = Euclidean sampleSpace (MultivariateNormal n) = Euclidean n instance Generative Standard MultivariateNormal where generate p = let n = sampleSpaceDimension $ manifold p (mus,sds) = C.splitAt n $ coordinates p in generateMultivariateNormal mus $ M.reshape n sds instance AbsolutelyContinuous Standard MultivariateNormal where density p xs = let n = sampleSpaceDimension $ manifold p (mus,sgma) = splitCoordinates p flx = M.sqrtm sgma in recip ((2*pi)**(fromIntegral n / 2) * M.det flx) * exp (-0.5 * ((M.tr (M.inv sgma) M.#> C.zipWith (-) xs mus) `M.dot` C.zipWith (-) xs mus)) instance MaximumLikelihood Standard MultivariateNormal where mle _ xss = let n = fromIntegral $ length xss mus = recip (fromIntegral n) * sum xss sgma = recip (fromIntegral $ n - 1) * sum (map (\xs -> let xs' = xs - mus in M.outer xs' xs') xss) in muSigmaToMultivariateNormal mus sgma instance ExponentialFamily MultivariateNormal where sufficientStatistic m x = fromCoordinates m $ x C.++ M.flatten (M.outer x x) baseMeasure (MultivariateNormal n) _ = (2*pi)**(-fromIntegral n/2) instance Legendre Natural MultivariateNormal where potential p = let (tmu,tsgma) = splitCoordinates p invtsgma = M.inv tsgma in -0.25 * M.dot tmu (invtsgma M.#> tmu) - 0.5 * log(M.det $ M.scale (-2) tsgma) potentialDifferentials p = let (tmu,tsgma) = splitCoordinates p invtsgma = M.inv tsgma invapp = M.app invtsgma tmu in fromCoordinates (Tangent p) $ (-0.5 * invapp) C.++ M.flatten (M.scale (-0.5) invtsgma + M.scale 0.25 (M.outer invapp invapp)) instance Legendre Mixture MultivariateNormal where potential p = let (mmu,msgma) = splitCoordinates p --n = fromIntegral . sampleSpaceDimension $ manifold p in -0.5 * (1 + M.dot mmu (M.inv msgma M.#> mmu)) - 0.5 * log (M.det msgma) potentialDifferentials p = let (mmu,msgma) = splitCoordinates p invmsgma' = M.inv $ M.outer mmu mmu - msgma in fromCoordinates (Tangent p) $ (negate invmsgma' M.#> mmu) C.++ M.flatten (M.scale 0.5 invmsgma') instance Transition Standard Natural MultivariateNormal where transition p = let (mu,sgma) = splitCoordinates p invsgma = M.inv sgma in fromCoordinates (manifold p) $ (invsgma M.#> mu) C.++ M.flatten (M.scale (-0.5) invsgma) instance Transition Natural Standard MultivariateNormal where transition p = let (emu,esgma) = splitCoordinates p invesgma = M.inv esgma in fromCoordinates (manifold p) $ M.scale 0.5 (invesgma M.#> emu) C.++ M.flatten (M.scale 0.5 invesgma) instance Transition Standard Mixture MultivariateNormal where transition p = let (mu,sgma) = splitCoordinates p in fromCoordinates (manifold p) $ mu C.++ M.flatten (sgma + M.outer mu mu) instance Transition Mixture Standard MultivariateNormal where transition p = let (mmu,msgma) = splitCoordinates p in fromCoordinates (manifold p) $ mmu C.++ M.flatten (msgma -M.outer mmu mmu) {- --- Graveyard --- functionToCategorical :: Double -> Double -> Int -> (Double -> Double) -> Standard :#: Categorical Double -- | Takes range information in the form of a minimum, maximum, and sample count, -- and a function which represents an unnomralized pdf, and returns a normalized list of -- pairs (x,f(x)) over the specified range such that the sum of the f(x)s is 1. -- -- In principle, f should be strictly positive, but this is not checked. functionToCategorical mn mx n f = let (ks,fks) = unzip $ discretizeFunction mn mx n f in recip (sum fks) .> fromList (Categorical ks) fks -- Exponential Distribution -- data Exponential = Exponential deriving (Eq,Read,Show) instance Manifold Exponential where dimension _ = 1 type instance SampleSpace Exponential = Continuum instance Statistical Exponential where sampleSpace _ = Continuum instance Generative Standard Exponential where generate = exponential . C.head . coordinates instance AbsolutelyContinuous Standard Exponential where density p x = let lmda = C.head $ coordinates p in lmda * exp (negate $ lmda * x) instance MaximumLikelihood Standard Exponential where mle _ xs = chart Standard . fromList Exponential . (:[]) . recip . mean $ xs instance Legendre Natural Exponential where potential p = negate . log . negate $ coordinate 0 p potentialDifferentials p = fromCoordinates (Tangent p) . negate $ coordinates p instance Legendre Mixture Exponential where potential p = 1 - log eta potentialDifferentials p = instance ExponentialFamily Exponential where sufficientStatistic Exponential = fromCoordinates Exponential . C.singleton baseMeasure _ _ = 1 instance Transition Standard Natural Exponential where transition = breakChart . alterCoordinates negate instance Transition Natural Standard Exponential where transition = breakChart . alterCoordinates negate -}