{-# LANGUAGE TemplateHaskell #-}

-- |
-- Module      :  Mcmc.Proposal.Simplex
-- Description :  Proposals on simplices
-- Copyright   :  (c) Dominik Schrempf, 2021
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Mon Oct 19 15:32:31 2020.
module Mcmc.Proposal.Simplex
  ( -- * Elements of simplices
    Simplex (toVector),
    simplexUniform,
    simplexFromVector,

    -- * Proposals on simplices
    dirichlet,
    beta,
  )
where

import Data.Aeson
import Data.Aeson.TH
import qualified Data.Vector.Unboxed as V
import Mcmc.Proposal
import Mcmc.Statistics.Types
import Numeric.Log
import Statistics.Distribution
import Statistics.Distribution.Beta
import Statistics.Distribution.Dirichlet

-- import Debug.Trace

-- | An element of a simplex.
--
-- A vector of non-negative values summing to one.
--
-- The nomenclature is not very consistent, because a K-dimensional simplex is
-- usually considered to be the set containing all @K@-dimensional vectors with
-- non-negative elements that sum to 1.0. However, I couldn't come up with a
-- better name. Maybe @SimplexElement@, but that was too long.
newtype Simplex = SimplexUnsafe {Simplex -> Vector Double
toVector :: V.Vector Double}
  deriving (Simplex -> Simplex -> Bool
(Simplex -> Simplex -> Bool)
-> (Simplex -> Simplex -> Bool) -> Eq Simplex
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Simplex -> Simplex -> Bool
$c/= :: Simplex -> Simplex -> Bool
== :: Simplex -> Simplex -> Bool
$c== :: Simplex -> Simplex -> Bool
Eq, Int -> Simplex -> ShowS
[Simplex] -> ShowS
Simplex -> String
(Int -> Simplex -> ShowS)
-> (Simplex -> String) -> ([Simplex] -> ShowS) -> Show Simplex
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Simplex] -> ShowS
$cshowList :: [Simplex] -> ShowS
show :: Simplex -> String
$cshow :: Simplex -> String
showsPrec :: Int -> Simplex -> ShowS
$cshowsPrec :: Int -> Simplex -> ShowS
Show)

$(deriveJSON defaultOptions ''Simplex)

-- Tolerance.
eps :: Double
eps :: Double
eps = Double
1e-14

-- Check if vector is normalized with tolerance 'eps'.
isNormalized :: V.Vector Double -> Bool
isNormalized :: Vector Double -> Bool
isNormalized Vector Double
v
  | Double -> Double
forall a. Num a => a -> a
abs (Vector Double -> Double
forall a. (Unbox a, Num a) => Vector a -> a
V.sum Vector Double
v Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
1.0) Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
> Double
eps = Bool
False
  | Bool
otherwise = Bool
True

-- Check if vector contains negative elements.
isNegative :: V.Vector Double -> Bool
isNegative :: Vector Double -> Bool
isNegative = (Double -> Bool) -> Vector Double -> Bool
forall a. Unbox a => (a -> Bool) -> Vector a -> Bool
V.any (Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< Double
0)

-- | Ensure that the value vector is an element of a simplex.
--
-- Return 'Left' if:
-- - The value vector is empty.
-- - The value vector contains negative elements.
-- - The value vector is not normalized.
simplexFromVector :: V.Vector Double -> Either String Simplex
simplexFromVector :: Vector Double -> Either String Simplex
simplexFromVector Vector Double
v
  | Vector Double -> Bool
forall a. Unbox a => Vector a -> Bool
V.null Vector Double
v = String -> Either String Simplex
forall a b. a -> Either a b
Left String
"simplexFromVector: Vector is empty."
  | Vector Double -> Bool
isNegative Vector Double
v = String -> Either String Simplex
forall a b. a -> Either a b
Left String
"simplexFromVector: Vector contains negative elements."
  | Bool -> Bool
not (Vector Double -> Bool
isNormalized Vector Double
v) = String -> Either String Simplex
forall a b. a -> Either a b
Left String
"simplexFromVector: Vector is not normalized."
  | Bool
otherwise = Simplex -> Either String Simplex
forall a b. b -> Either a b
Right (Simplex -> Either String Simplex)
-> Simplex -> Either String Simplex
forall a b. (a -> b) -> a -> b
$ Vector Double -> Simplex
SimplexUnsafe Vector Double
v

-- | Create the uniform element of the K-dimensional simplex.
--
-- Set all values to \(1/D\).
simplexUniform :: Dimension -> Simplex
--                 Should never fail.
simplexUniform :: Int -> Simplex
simplexUniform Int
k = (String -> Simplex)
-> (Simplex -> Simplex) -> Either String Simplex -> Simplex
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> Simplex
forall a. HasCallStack => String -> a
error Simplex -> Simplex
forall a. a -> a
id (Either String Simplex -> Simplex)
-> Either String Simplex -> Simplex
forall a b. (a -> b) -> a -> b
$ Vector Double -> Either String Simplex
simplexFromVector (Vector Double -> Either String Simplex)
-> Vector Double -> Either String Simplex
forall a b. (a -> b) -> a -> b
$ Int -> Double -> Vector Double
forall a. Unbox a => Int -> a -> Vector a
V.replicate Int
k (Double
1.0 Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
k)

-- Tuning function is inverted (high alpha means small steps).
getTuningFunction :: TuningParameter -> (TuningParameter -> TuningParameter)
getTuningFunction :: Double -> Double -> Double
getTuningFunction Double
t = (Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
t'')
  where
    -- Start with small steps.
    t' :: Double
t' = Double
t Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
10000
    -- Extremely small tuning parameters lead to numeric overflow. The square
    -- root pulls the tuning parameter closer to 1.0. However, overflow may
    -- still occur (the involved gamma functions grow faster than the
    -- exponential). I did not observe numeric underflow in my tests.
    t'' :: Double
t'' = Double -> Double
forall a. Floating a => a -> a
sqrt Double
t'

-- The tuning parameter is proportional to the inverted mean of the shape
-- parameter values.
--
-- The values determining the proposal size have been set using an example
-- analysis. They are good values for this analysis, but may fail for other
-- analyses.
dirichletSimple :: TuningParameter -> ProposalSimple Simplex
dirichletSimple :: Double -> ProposalSimple Simplex
dirichletSimple Double
t (SimplexUnsafe Vector Double
xs) GenIO
g = do
  -- If @t@ is high and above 1.0, the parameter vector will be low, and the
  -- variance will be high. If @t@ is low and below 1.0, the parameter vector
  -- will be high, and the Dirichlet distribution will be very concentrated with
  -- low variance.
  let ddXs :: DirichletDistribution
ddXs = (String -> DirichletDistribution)
-> (DirichletDistribution -> DirichletDistribution)
-> Either String DirichletDistribution
-> DirichletDistribution
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> DirichletDistribution
forall a. HasCallStack => String -> a
error DirichletDistribution -> DirichletDistribution
forall a. a -> a
id (Either String DirichletDistribution -> DirichletDistribution)
-> Either String DirichletDistribution -> DirichletDistribution
forall a b. (a -> b) -> a -> b
$ Vector Double -> Either String DirichletDistribution
dirichletDistribution (Vector Double -> Either String DirichletDistribution)
-> Vector Double -> Either String DirichletDistribution
forall a b. (a -> b) -> a -> b
$ (Double -> Double) -> Vector Double -> Vector Double
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
V.map Double -> Double
tf Vector Double
xs
  -- traceShowM $ V.map tf xs
  Vector Double
ys <- DirichletDistribution -> GenIO -> IO (Vector Double)
forall (m :: * -> *).
PrimMonad m =>
DirichletDistribution -> Gen (PrimState m) -> m (Vector Double)
dirichletSample DirichletDistribution
ddXs GenIO
g
  -- traceShowM ys
  -- Have to check if parameters are valid (because zeroes do occur).
  let eitherDdYs :: Either String DirichletDistribution
eitherDdYs = Vector Double -> Either String DirichletDistribution
dirichletDistribution (Vector Double -> Either String DirichletDistribution)
-> Vector Double -> Either String DirichletDistribution
forall a b. (a -> b) -> a -> b
$ (Double -> Double) -> Vector Double -> Vector Double
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
V.map Double -> Double
tf Vector Double
ys
  let r :: KernelRatio
r = case Either String DirichletDistribution
eitherDdYs of
        -- Set ratio to 0; so that the proposal will not be accepted.
        Left String
_ -> KernelRatio
0
        Right DirichletDistribution
ddYs -> DirichletDistribution -> Vector Double -> KernelRatio
dirichletDensity DirichletDistribution
ddYs Vector Double
xs KernelRatio -> KernelRatio -> KernelRatio
forall a. Fractional a => a -> a -> a
/ DirichletDistribution -> Vector Double -> KernelRatio
dirichletDensity DirichletDistribution
ddXs Vector Double
ys
  -- I do not think a Jacobian is necessary in this case. I do know that if a
  -- subset of states is updated a Jacobian would be necessary.
  --
  -- traceShowM mhRatio
  (Simplex, KernelRatio, KernelRatio)
-> IO (Simplex, KernelRatio, KernelRatio)
forall (m :: * -> *) a. Monad m => a -> m a
return (Vector Double -> Simplex
SimplexUnsafe Vector Double
ys, KernelRatio
r, KernelRatio
1.0)
  where
    tf :: Double -> Double
tf = Double -> Double -> Double
getTuningFunction Double
t

-- | Dirichlet proposal on a simplex.
--
-- For a given element of a K-dimensional simplex, propose a new element of the
-- K-dimensional simplex. The new element is sampled from the multivariate
-- Dirichlet distribution with parameter vector being proportional to the
-- current element of the simplex.
--
-- The tuning parameter is used to determine the concentration of the Dirichlet
-- distribution: the lower the tuning parameter, the higher the concentration.
--
-- The proposal dimension, which is the dimension of the simplex, is used to
-- determine the optimal acceptance rate.
--
-- For high dimensional simplices, this proposal may have low acceptance rates.
-- In this case, please see the coordinate wise 'beta' proposal.
dirichlet :: PDimension -> PName -> PWeight -> Tune -> Proposal Simplex
dirichlet :: PDimension -> PName -> PWeight -> Tune -> Proposal Simplex
dirichlet = PDescription
-> (Double -> ProposalSimple Simplex)
-> PDimension
-> PName
-> PWeight
-> Tune
-> Proposal Simplex
forall a.
PDescription
-> (Double -> ProposalSimple a)
-> PDimension
-> PName
-> PWeight
-> Tune
-> Proposal a
createProposal (String -> PDescription
PDescription String
"Dirichlet") Double -> ProposalSimple Simplex
dirichletSimple

-- The tuning parameter is the inverted mean of the shape values.
--
-- The values determining the proposal size have been set using an example
-- analysis. They are good values for this analysis, but may fail for other
-- analyses.
--
-- See also the 'dirichlet' proposal.
betaSimple :: Dimension -> TuningParameter -> ProposalSimple Simplex
betaSimple :: Int -> Double -> ProposalSimple Simplex
betaSimple Int
i Double
t (SimplexUnsafe Vector Double
xs) GenIO
g = do
  -- Shape parameters of beta distribution. Do not assume that the sum of the
  -- elements of 'xs' is 1.0, because then repeated proposals let the sum of the
  -- vector diverge.
  let aX :: Double
aX = Double
xI
      bX :: Double
bX = Double
xsSum Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
xI
      bdXI :: BetaDistribution
bdXI = Double -> Double -> BetaDistribution
betaDistr (Double -> Double
tf Double
aX) (Double -> Double
tf Double
bX)
  -- New value of element i.
  Double
yI <- BetaDistribution -> Gen RealWorld -> IO Double
forall d g (m :: * -> *).
(ContGen d, StatefulGen g m) =>
d -> g -> m Double
genContVar BetaDistribution
bdXI Gen RealWorld
GenIO
g
  -- Shape parameters of beta distribution.
  let aY :: Double
aY = Double
yI
      bY :: Double
bY = Double
1.0 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
yI
      eitherBdYI :: Maybe BetaDistribution
eitherBdYI = Double -> Double -> Maybe BetaDistribution
betaDistrE (Double -> Double
tf Double
aY) (Double -> Double
tf Double
bY)
  -- See 'dirichlet', which has the same construct.
  let r :: KernelRatio
r = case Maybe BetaDistribution
eitherBdYI of
        Maybe BetaDistribution
Nothing -> KernelRatio
0
        Just BetaDistribution
bdYI -> Double -> KernelRatio
forall a. a -> Log a
Exp (Double -> KernelRatio) -> Double -> KernelRatio
forall a b. (a -> b) -> a -> b
$ BetaDistribution -> Double -> Double
forall d. ContDistr d => d -> Double -> Double
logDensity BetaDistribution
bdYI Double
xI Double -> Double -> Double
forall a. Num a => a -> a -> a
- BetaDistribution -> Double -> Double
forall d. ContDistr d => d -> Double -> Double
logDensity BetaDistribution
bdXI Double
yI
      -- The absolute value of the determinant of the Jacobian. Derivation takes
      -- a while...
      ja1 :: Double
ja1 = Double
bY Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
bX
      jac :: KernelRatio
jac = Double -> KernelRatio
forall a. a -> Log a
Exp (Double -> KernelRatio) -> Double -> KernelRatio
forall a b. (a -> b) -> a -> b
$ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Vector Double -> Int
forall a. Unbox a => Vector a -> Int
V.length Vector Double
xs Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
2) Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double -> Double
forall a. Floating a => a -> a
log Double
ja1
  -- Construct new vector.
  let -- Normalization function for other elements.
      -- nf x = x * bY / bX
      --
      -- It turns out, that this factor is also needed to compute the determinant
      -- of the Jacobian above.
      nf :: Double -> Double
nf Double
x = Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
ja1
      ys :: Vector Double
ys = Int -> (Int -> Double) -> Vector Double
forall a. Unbox a => Int -> (Int -> a) -> Vector a
V.generate (Vector Double -> Int
forall a. Unbox a => Vector a -> Int
V.length Vector Double
xs) (\Int
j -> if Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
j then Double
yI else Double -> Double
nf (Vector Double
xs Vector Double -> Int -> Double
forall a. Unbox a => Vector a -> Int -> a
V.! Int
j))
  (Simplex, KernelRatio, KernelRatio)
-> IO (Simplex, KernelRatio, KernelRatio)
forall (m :: * -> *) a. Monad m => a -> m a
return ((String -> Simplex)
-> (Simplex -> Simplex) -> Either String Simplex -> Simplex
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> Simplex
forall a. HasCallStack => String -> a
error Simplex -> Simplex
forall a. a -> a
id (Either String Simplex -> Simplex)
-> Either String Simplex -> Simplex
forall a b. (a -> b) -> a -> b
$ Vector Double -> Either String Simplex
simplexFromVector Vector Double
ys, KernelRatio
r, KernelRatio
jac)
  where
    xI :: Double
xI = Vector Double
xs Vector Double -> Int -> Double
forall a. Unbox a => Vector a -> Int -> a
V.! Int
i
    xsSum :: Double
xsSum = Vector Double -> Double
forall a. (Unbox a, Num a) => Vector a -> a
V.sum Vector Double
xs
    tf :: Double -> Double
tf = Double -> Double -> Double
getTuningFunction Double
t

-- | Beta proposal on a specific coordinate @i@ on a simplex.
--
-- For a given element of a K-dimensional simplex, propose a new element of the
-- K-dimensional simplex. The coordinate @i@ of the new element is sampled from
-- the beta distribution. The other coordinates are normalized such that the
-- values sum to 1.0. The parameters of the beta distribution are chosen such
-- that the expected value of the beta distribution is the value of the old
-- coordinate.
--
-- The tuning parameter is used to determine the concentration of the beta
-- distribution: the lower the tuning parameter, the higher the concentration.
--
-- See also the 'dirichlet' proposal.
--
-- No "out of bounds" checks are performed during compile time. Run time errors
-- can occur if @i@ is negative, or if @i-1@ is larger than the length of the
-- element vector of the simplex.
--
-- This proposal has been assigned a dimension of 2. See the discussion at
-- 'PDimension'.
beta :: Dimension -> PName -> PWeight -> Tune -> Proposal Simplex
beta :: Int -> PName -> PWeight -> Tune -> Proposal Simplex
beta Int
i = PDescription
-> (Double -> ProposalSimple Simplex)
-> PDimension
-> PName
-> PWeight
-> Tune
-> Proposal Simplex
forall a.
PDescription
-> (Double -> ProposalSimple a)
-> PDimension
-> PName
-> PWeight
-> Tune
-> Proposal a
createProposal PDescription
description (Int -> Double -> ProposalSimple Simplex
betaSimple Int
i) (Int -> PDimension
PDimension Int
2)
  where
    description :: PDescription
description = String -> PDescription
PDescription (String -> PDescription) -> String -> PDescription
forall a b. (a -> b) -> a -> b
$ String
"Beta; coordinate: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
i