-- Creation date: Thu Jun  9 15:12:39 2022.
--
-- See "Mcmc.Proposal.Hamiltonian.Hamiltonian".
--
-- References:
--
-- - [1] Chapter 5 of Handbook of Monte Carlo: Neal, R. M., MCMC Using
--   Hamiltonian Dynamics, In S. Brooks, A. Gelman, G. Jones, & X. Meng (Eds.),
--   Handbook of Markov Chain Monte Carlo (2011), CRC press.
--
-- - [2] Gelman, A., Carlin, J. B., Stern, H. S., & Rubin, D. B., Bayesian data
--   analysis (2014), CRC Press.
--
-- - [3] Review by Betancourt and notes: Betancourt, M., A conceptual
--   introduction to Hamiltonian Monte Carlo, arXiv, 1701–02434 (2017).
--
-- - [4] Matthew D. Hoffman, Andrew Gelman (2014) The No-U-Turn Sampler:
--   Adaptively Setting Path Lengths in Hamiltonian Monte Carlo, Journal of
--   Machine Learning Research.
{-# LANGUAGE BangPatterns #-}

-- |
-- Module      :  Mcmc.Proposal.Hamiltonian.Internal
-- Description :  Internal definitions related to Hamiltonian dynamics
-- Copyright   :  2022 Dominik Schrempf
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  experimental
-- Portability :  portable
module Mcmc.Proposal.Hamiltonian.Internal
  ( -- * Parameters
    HParamsI (..),
    hParamsIWith,

    -- * Tuning
    toAuxiliaryTuningParameters,
    fromAuxiliaryTuningParameters,
    findReasonableEpsilon,
    hTuningFunctionWith,

    -- * Structure of state
    checkHStructureWith,

    -- * Hamiltonian dynamics
    generateMomenta,
    exponentialKineticEnergy,

    -- * Leapfrog integrator
    Target,
    leapfrog,
  )
where

import Control.Monad
import Control.Monad.ST
import Data.Foldable
import Data.Maybe
import qualified Data.Vector.Storable as VS
import qualified Data.Vector.Unboxed as VU
import Mcmc.Proposal
import Mcmc.Proposal.Hamiltonian.Common
import Mcmc.Proposal.Hamiltonian.Masses
import qualified Numeric.LinearAlgebra as L
import Numeric.Log
import System.Random.Stateful

-- Variable tuning parameters.
--
-- See Algorithm 5 or 6 in [4].
data TParamsVar = TParamsVar
  { -- \bar{eps} of Algorithm 5 or 6.
    tpvLeapfrogScalingFactorMean :: LeapfrogScalingFactor,
    -- H_i of Algorithm 5 or 6.
    tpvHStatistics :: Double,
    -- m of Algorithm 5 or 6.
    tpvCurrentTuningStep :: Double
  }
  deriving (Show)

tParamsVar :: TParamsVar
tParamsVar = TParamsVar 1.0 0.0 1.0

-- Fixed tuning parameters.
--
-- See Algorithm 5 and 6 in [4].
data TParamsFixed = TParamsFixed
  { tpfEps0 :: Double,
    tpfMu :: Double,
    tpfGa :: Double,
    tpfT0 :: Double,
    tpfKa :: Double
  }
  deriving (Show)

-- The default tuning parameters in [4] which have been tweaked for tuning the
-- proposals after every iteration are:
--
--   mu = log $ 10 * eps
--   ga = 0.05
--   t0 = 10
--   ka = 0.75
--
-- For reference, I used the following default parameters with longer auto
-- tuning intervals.
--
--   mu = log $ 10 * eps
--   ga = 0.1
--   t0 = 3
--   ka = 0.5
--
-- Another good resource:
-- https://mc-stan.org/docs/2_29/reference-manual/hmc-algorithm-parameters.html.
--
-- NOTE: In theory, we could expose these internal tuning parameters to the
-- user.
tParamsFixedWith :: LeapfrogScalingFactor -> TParamsFixed
tParamsFixedWith eps = TParamsFixed eps mu ga t0 ka
  where
    -- "Mu is a freely chosen point that the iterators are shrunk towards". I am
    -- not exactly sure what this means. The parameter does not seem to have
    -- much of an effect.
    mu = log $ 10 * eps
    -- Gamma "controls the amount of shrinkage towards mu". The larger gamma is,
    -- the less variant epsilon is.
    --
    -- I changed this parameter from 0.05 to get better results in test runs.
    ga = 0.15
    -- "Free parameter that stabilizes the initial iterations". The larger t0
    -- is, the stabler epsilon is in the first iterations.
    t0 = 10
    -- "Setting the parameter ka < 1 allows us to give higher weight to more
    -- recent iterates and to more quickly forget the iterates produced during
    -- the early warmup stages."
    --
    -- I changed this parameter from 0.75 to get better results in test runs.
    ka = 0.75

-- All internal parameters.
data HParamsI = HParamsI
  { hpsLeapfrogScalingFactor :: LeapfrogScalingFactor,
    hpsLeapfrogSimulationLength :: LeapfrogSimulationLength,
    hpsMasses :: Masses,
    hpsTParamsVar :: TParamsVar,
    hpsTParamsFixed :: TParamsFixed,
    hpsMassesI :: MassesI,
    hpsMu :: Mu
  }
  deriving (Show)

-- NOTE: If changed, amend help text of 'defaultHParams', and 'defaultNParams'.
defaultLeapfrogScalingFactor :: LeapfrogScalingFactor
defaultLeapfrogScalingFactor = 0.1

-- NOTE: If changed, amend help text of 'defaultHParams'.
defaultLeapfrogSimulationLength :: LeapfrogSimulationLength
defaultLeapfrogSimulationLength = 0.5

-- NOTE: If changed, amend help text of 'defaultHParams'.
defaultMassesWith :: Int -> Masses
defaultMassesWith d = L.trustSym $ L.ident d

-- Instantiate all internal parameters.
hParamsIWith ::
  Target ->
  Positions ->
  Maybe LeapfrogScalingFactor ->
  Maybe LeapfrogSimulationLength ->
  Maybe Masses ->
  Either String HParamsI
hParamsIWith htarget p mEps mLa mMs = do
  d <- case VS.length p of
    0 -> eWith "Empty position vector."
    d -> Right d
  ms <- case mMs of
    Nothing -> Right $ defaultMassesWith d
    Just ms -> do
      let ms' = cleanMatrix $ L.unSym ms
          diagonalMs = L.toList $ L.takeDiag ms'
      when (any (<= 0) diagonalMs) $ eWith "Some diagonal masses are zero or negative."
      let nrows = L.rows ms'
          ncols = L.cols ms'
      when (nrows /= ncols) $ eWith "Mass matrix is not square."
      Right ms
  let msI = getMassesI ms
      mus = getMus ms
  la <- case mLa of
    Nothing -> Right defaultLeapfrogSimulationLength
    Just l
      | l <= 0 -> eWith "Leapfrog simulation length is zero or negative."
      | otherwise -> Right l
  eps <- case mEps of
    Nothing -> Right $ runST $ do
      -- NOTE: This is not random. However, I do not want to provide a generator
      -- when creating the proposal.
      g <- newSTGenM $ mkStdGen 42
      findReasonableEpsilon htarget ms p g
    Just e
      | e <= 0 -> eWith "Leapfrog scaling factor is zero or negative."
      | otherwise -> Right e
  let tParamsFixed = tParamsFixedWith eps
  pure $ HParamsI eps la ms tParamsVar tParamsFixed msI mus
  where
    eWith m = Left $ "hParamsIWith: " <> m

-- Save internal parameters.
toAuxiliaryTuningParameters :: HParamsI -> AuxiliaryTuningParameters
toAuxiliaryTuningParameters (HParamsI eps la ms tpv tpf _ _) =
  -- Put masses to the end. Like so, conversion is easier.
  VU.fromList $ eps : la : epsMean : h : m : eps0 : mu : ga : t0 : ka : msL
  where
    (TParamsVar epsMean h m) = tpv
    (TParamsFixed eps0 mu ga t0 ka) = tpf
    msL = VU.toList $ massesToVector ms

-- Load internal parameters.
fromAuxiliaryTuningParameters :: Dimension -> AuxiliaryTuningParameters -> Either String HParamsI
fromAuxiliaryTuningParameters d xs
  | (d * d) + 10 /= len = Left "fromAuxiliaryTuningParameters: Dimension mismatch."
  | fromIntegral (d * d) /= lenMs = Left "fromAuxiliaryTuningParameters: Masses dimension mismatch."
  | otherwise = case VU.toList $ VU.take 10 xs of
      [eps, la, epsMean, h, m, eps0, mu, ga, t0, ka] ->
        let tpv = TParamsVar epsMean h m
            tpf = TParamsFixed eps0 mu ga t0 ka
         in Right $ HParamsI eps la ms tpv tpf msI mus
      -- To please the exhaustive pattern match checker.
      _ -> Left "fromAuxiliaryTuningParameters: Impossible dimension mismatch."
  where
    len = VU.length xs
    msV = VU.drop 10 xs
    lenMs = VU.length msV
    ms = vectorToMasses d msV
    msI = getMassesI ms
    mus = getMus ms

-- See Algorithm 4 in [4].
findReasonableEpsilon ::
  StatefulGen g m =>
  Target ->
  Masses ->
  Positions ->
  g ->
  m LeapfrogScalingFactor
findReasonableEpsilon t ms q g = do
  p <- generateMomenta mu ms g
  case leapfrog t msI 1 eI q p of
    Nothing -> pure defaultLeapfrogScalingFactor
    Just (_, p', prQ, prQ') -> do
      let expEKin = exponentialKineticEnergy msI p
          expEKin' = exponentialKineticEnergy msI p'
          rI :: Double
          rI = exp $ ln $ prQ' * expEKin' / (prQ * expEKin)
          a :: Double
          a = if rI > 0.5 then 1 else (-1)
          go e r =
            if r ** a > 2 ** negate a
              then case leapfrog t msI 1 e q p of
                Nothing -> e
                Just (_, p'', _, prQ'') ->
                  let expEKin'' = exponentialKineticEnergy msI p''
                      r' :: Double
                      r' = exp $ ln $ prQ'' * expEKin'' / (prQ * expEKin)
                      e' = (2 ** a) * e
                   in go e' r'
              else e
      pure $ go eI rI
  where
    eI = 1.0
    msI = getMassesI ms
    mu = getMus ms

hTuningFunctionWith ::
  Dimension ->
  -- Conversion from value to vector.
  (a -> Positions) ->
  HTuningConf ->
  Maybe (TuningFunction a)
hTuningFunctionWith _ _ (HTuningConf HNoTuneLeapfrog HNoTuneMasses) = Nothing
hTuningFunctionWith n toVec (HTuningConf lc mc) = Just $ \tt pdim mar mxs (_, !ts) ->
  case tt of
    IntermediateTuningFastProposalsOnly -> err "fast intermediate tuning step but slow proposal"
    NormalTuningFastProposalsOnly -> err "fast normal tuning step but slow proposal"
    _ ->
      let (HParamsI eps la ms tpv tpf msI mus) =
            -- NOTE: Use error here, because a dimension mismatch is a serious bug.
            either error id $ fromAuxiliaryTuningParameters n ts
          (TParamsVar epsMean h m) = tpv
          (TParamsFixed eps0 mu ga t0 ka) = tpf
          m' = SmoothingParameter $ round m
          (ms', msI') = case tt of
            IntermediateTuningAllProposals -> (ms, msI)
            _ ->
              let xs = fromMaybe (err "empty trace") mxs
               in case mc of
                    HNoTuneMasses -> (ms, msI)
                    HTuneDiagonalMassesOnly -> tuneDiagonalMassesOnly m' toVec xs (ms, msI)
                    HTuneAllMasses -> tuneAllMasses m' toVec xs (ms, msI)
          (eps'', epsMean'', h'') = case tt of
            LastTuningFastProposalsOnly -> (eps, epsMean, h)
            _ -> case lc of
              HNoTuneLeapfrog -> (eps, epsMean, h)
              HTuneLeapfrog ->
                let ar = fromMaybe (err "no acceptance rate") mar
                    delta = getOptimalRate pdim
                    -- Algorithm 6; explained in Section 3.2.
                    --
                    -- Another good resource is the Tensorflow API
                    -- documentation:
                    -- https://www.tensorflow.org/probability/api_docs/python/tfp/mcmc/DualAveragingStepSizeAdaptation.
                    --
                    -- See also Nesterov (2007) Primal-dual subgradient methods
                    -- for convex problems, Mathematical Programming.
                    c = recip $ m + t0
                    h' = (1.0 - c) * h + c * (delta - ar)
                    eps' = exp $ mu - (sqrt m / ga) * h'
                    mMKa = m ** negate ka
                    -- Original formula is:
                    -- epsMean' = exp $ mMKa * logEps' + (1 - mMKa) * log epsMean
                    -- Which is the same as:
                    epsMean' = (eps' ** mMKa) * (epsMean ** (1 - mMKa))
                    epsF = if tt == LastTuningAllProposals then epsMean' else eps'
                 in (epsF, epsMean', h')
          tpv' = TParamsVar epsMean'' h'' (m + 1.0)
       in (eps'' / eps0, toAuxiliaryTuningParameters $ HParamsI eps'' la ms' tpv' tpf msI' mus)
  where
    err msg = error $ "hTuningFunctionWith: " <> msg

checkHStructureWith :: Foldable s => Masses -> HStructure s -> Maybe String
checkHStructureWith ms (HStructure x toVec fromVec)
  | toList (fromVec x xVec) /= toList x = eWith "'fromVectorWith x (toVector x) /= x' for sample state."
  | L.size xVec /= nrows = eWith "Mass matrix and 'toVector x' have different sizes for sample state."
  | otherwise = Nothing
  where
    eWith m = Just $ "checkHStructureWith: " <> m
    nrows = L.rows $ L.unSym ms
    xVec = toVec x

-- Generate momenta for a new iteration.
generateMomenta ::
  StatefulGen g m =>
  Mu ->
  Masses ->
  g ->
  m Momenta
generateMomenta mu masses gen = do
  seed <- uniformM gen
  let momenta = L.gaussianSample seed 1 mu masses
  return $ L.flatten momenta

-- Compute exponent of kinetic energy.
--
-- Use a general matrix which has special representations for diagonal and
-- sparse matrices, both of which are really useful here.
exponentialKineticEnergy ::
  MassesI ->
  Momenta ->
  Log Double
exponentialKineticEnergy msI xs =
  -- NOTE: Because of numerical errors, the following formulas exhibit different
  -- traces (although the posterior appears to be the same):
  -- - This one we cannot use with general matrices:
  --   Exp $ (-0.5) * ((xs L.#> msI) L.<.> xs)
  Exp $ (-0.5) * (xs L.<.> (msI L.!#> xs))

-- Function calculating target value and gradient.
--
-- The function acts on the subset of the state manipulated by the proposal but
-- the value and gradient have to be calculated for the complete state. The
-- reason is that parameters untouched by the Hamiltonian proposal may affect
-- the result or the gradient.
--
-- Make sure that the value is calculated lazily because many times, only the
-- gradient is required.
type Target = Positions -> (Log Double, Positions)

-- Leapfrog integrator.
leapfrog ::
  Target ->
  MassesI ->
  --
  LeapfrogTrajectoryLength ->
  LeapfrogScalingFactor ->
  --
  Positions ->
  Momenta ->
  -- | (New positions, new momenta, old target, new target).
  --
  -- Fail if state is not valid.
  Maybe (Positions, Momenta, Log Double, Log Double)
leapfrog tF msI l eps q p = do
  -- The first half step of the momenta.
  (x, pHalf) <-
    let (x, pHalf) = leapfrogStepMomenta (0.5 * eps) tF q p
     in if x > 0.0
          then Just (x, pHalf)
          else Nothing
  -- L-1 full steps for positions and momenta. This gives the positions q_{L-1},
  -- and the momenta p_{L-1/2}.
  (qLM1, pLM1Half) <- go (l - 1) $ Just (q, pHalf)
  -- The last full step of the positions.
  let qL = leapfrogStepPositions msI eps qLM1 pLM1Half
  -- The last half step of the momenta.
  (x', pL) <-
    let (x', pL) = leapfrogStepMomenta (0.5 * eps) tF qL pLM1Half
     in if x' > 0.0
          then Just (x', pL)
          else Nothing
  return (qL, pL, x, x')
  where
    go _ Nothing = Nothing
    go n (Just (qs, ps))
      | n <= 0 = Just (qs, ps)
      | otherwise =
          let qs' = leapfrogStepPositions msI eps qs ps
              (x, ps') = leapfrogStepMomenta eps tF qs' p
           in if x > 0.0
                then go (n - 1) $ Just (qs', ps')
                else Nothing

leapfrogStepMomenta ::
  LeapfrogScalingFactor ->
  Target ->
  -- Current positions.
  Positions ->
  -- Current momenta.
  Momenta ->
  -- New momenta; also return value target function to be collected at the end
  -- of the leapfrog integration.
  (Log Double, Momenta)
leapfrogStepMomenta eps tf q p = (x, p + L.scale eps g)
  where
    (x, g) = tf q

leapfrogStepPositions ::
  MassesI ->
  LeapfrogScalingFactor ->
  -- Current positions.
  Positions ->
  -- Current momenta.
  Momenta ->
  -- New positions.
  Positions
-- NOTE: Because of numerical errors, the following formulas exhibit different
-- traces (although the posterior appears to be the same):
-- 1. This one we cannot use with general matrices:
--    leapfrogStepPositions msI eps q p = q + (L.scale eps msI L.!#> p)
-- 2. This one seems to be more numerically unstable:
--    leapfrogStepPositions msI eps q p = q + L.scale eps (msI L.!#> p)
leapfrogStepPositions msI eps q p = q + (msI L.!#> L.scale eps p)