-- |
-- Module      :  ELynx.Data.MarkovProcess.SubstitutionModel
-- Description :  Data type describing substitution model
-- Copyright   :  (c) Dominik Schrempf 2021
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Tue Jan 29 19:10:46 2019.
--
-- To be imported qualified.
module ELynx.Data.MarkovProcess.SubstitutionModel
  ( -- * Types
    Name,
    Params,
    SubstitutionModel,

    -- * Accessors
    alphabet,
    name,
    params,
    stationaryDistribution,
    exchangeabilityMatrix,
    rateMatrix,
    totalRate,

    -- * Building substitution models
    substitutionModel,

    -- * Transformations
    scale,
    normalize,
    appendName,
  )
where

import qualified Data.Vector.Storable as V
import ELynx.Data.Alphabet.Alphabet
import qualified ELynx.Data.MarkovProcess.RateMatrix as R
import qualified Numeric.LinearAlgebra as LinAlg

-- | Name of substitution model; abstracted and subject to change.
type Name = String

-- | Parameters of substitution model. May be the empty list.
type Params = [Double]

-- XXX: Use a proper data type. For example:
-- data SubstitutionModelAA = LG | WAG | LG-Custom dist | ...
-- data SubstitutionModelNuc = JC | HKY p1 p2 ... | GTR p1 p2 ...
--
-- I thought about this a lot, and it seems easier like it is at the moment.
-- Since the data types are abstracted anyways, not much harm can be done. Of
-- course, conflicting substitution models can be declared, or duplicate ones
-- with different names, but well...

-- | Complete definition of a substitution model. Create instances with
-- 'substitutionModel'. A substitution model has an alphabet, a name, and a list
-- of parameters (e.g., the kappa value for the HKY model). Further, the
-- transition rate matrix is defined by a stationary distribution and a set of
-- exchangeabilities.
data SubstitutionModel = SubstitutionModel
  { -- | Alphabet
    SubstitutionModel -> Alphabet
alphabet :: Alphabet,
    -- | Name
    SubstitutionModel -> Name
name :: Name,
    -- | List of parameters
    SubstitutionModel -> Params
params :: Params,
    -- | Stationary distribution
    SubstitutionModel -> StationaryDistribution
stationaryDistribution :: R.StationaryDistribution,
    -- | Exchangeability matrix
    SubstitutionModel -> ExchangeabilityMatrix
exchangeabilityMatrix :: R.ExchangeabilityMatrix
  }
  deriving (Int -> SubstitutionModel -> ShowS
[SubstitutionModel] -> ShowS
SubstitutionModel -> Name
(Int -> SubstitutionModel -> ShowS)
-> (SubstitutionModel -> Name)
-> ([SubstitutionModel] -> ShowS)
-> Show SubstitutionModel
forall a.
(Int -> a -> ShowS) -> (a -> Name) -> ([a] -> ShowS) -> Show a
showList :: [SubstitutionModel] -> ShowS
$cshowList :: [SubstitutionModel] -> ShowS
show :: SubstitutionModel -> Name
$cshow :: SubstitutionModel -> Name
showsPrec :: Int -> SubstitutionModel -> ShowS
$cshowsPrec :: Int -> SubstitutionModel -> ShowS
Show, ReadPrec [SubstitutionModel]
ReadPrec SubstitutionModel
Int -> ReadS SubstitutionModel
ReadS [SubstitutionModel]
(Int -> ReadS SubstitutionModel)
-> ReadS [SubstitutionModel]
-> ReadPrec SubstitutionModel
-> ReadPrec [SubstitutionModel]
-> Read SubstitutionModel
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [SubstitutionModel]
$creadListPrec :: ReadPrec [SubstitutionModel]
readPrec :: ReadPrec SubstitutionModel
$creadPrec :: ReadPrec SubstitutionModel
readList :: ReadS [SubstitutionModel]
$creadList :: ReadS [SubstitutionModel]
readsPrec :: Int -> ReadS SubstitutionModel
$creadsPrec :: Int -> ReadS SubstitutionModel
Read)

-- | Calculate rate matrix from substitution model.
rateMatrix :: SubstitutionModel -> R.RateMatrix
rateMatrix :: SubstitutionModel -> ExchangeabilityMatrix
rateMatrix SubstitutionModel
sm =
  ExchangeabilityMatrix
-> StationaryDistribution -> ExchangeabilityMatrix
R.fromExchangeabilityMatrix
    (SubstitutionModel -> ExchangeabilityMatrix
exchangeabilityMatrix SubstitutionModel
sm)
    (SubstitutionModel -> StationaryDistribution
stationaryDistribution SubstitutionModel
sm)

-- | Get scale of substitution model.
totalRate :: SubstitutionModel -> Double
totalRate :: SubstitutionModel -> Double
totalRate SubstitutionModel
sm = ExchangeabilityMatrix -> Double
R.totalRate (SubstitutionModel -> ExchangeabilityMatrix
rateMatrix SubstitutionModel
sm)

normalizeSumVec :: V.Vector Double -> V.Vector Double
normalizeSumVec :: StationaryDistribution -> StationaryDistribution
normalizeSumVec StationaryDistribution
v = (Double -> Double)
-> StationaryDistribution -> StationaryDistribution
forall a b.
(Storable a, Storable b) =>
(a -> b) -> Vector a -> Vector b
V.map (Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
s) StationaryDistribution
v
  where
    s :: Double
s = StationaryDistribution -> Double
forall a. (Storable a, Num a) => Vector a -> a
V.sum StationaryDistribution
v
{-# INLINE normalizeSumVec #-}

-- | Create normalized 'SubstitutionModel'. See 'normalize'.
substitutionModel ::
  Alphabet ->
  Name ->
  Params ->
  R.StationaryDistribution ->
  R.ExchangeabilityMatrix ->
  SubstitutionModel
substitutionModel :: Alphabet
-> Name
-> Params
-> StationaryDistribution
-> ExchangeabilityMatrix
-> SubstitutionModel
substitutionModel Alphabet
c Name
n Params
ps StationaryDistribution
d ExchangeabilityMatrix
e =
  if StationaryDistribution -> Bool
R.isValid StationaryDistribution
d
    then SubstitutionModel -> SubstitutionModel
normalize (SubstitutionModel -> SubstitutionModel)
-> SubstitutionModel -> SubstitutionModel
forall a b. (a -> b) -> a -> b
$ Alphabet
-> Name
-> Params
-> StationaryDistribution
-> ExchangeabilityMatrix
-> SubstitutionModel
SubstitutionModel Alphabet
c Name
n Params
ps (StationaryDistribution -> StationaryDistribution
normalizeSumVec StationaryDistribution
d) ExchangeabilityMatrix
e
    else
      Name -> SubstitutionModel
forall a. HasCallStack => Name -> a
error (Name -> SubstitutionModel) -> Name -> SubstitutionModel
forall a b. (a -> b) -> a -> b
$
        Name
"substitionModel: Stationary distribution does not sum to 1.0: "
          Name -> ShowS
forall a. [a] -> [a] -> [a]
++ StationaryDistribution -> Name
forall a. Show a => a -> Name
show StationaryDistribution
d

-- | Scale the rate of a substitution model by given factor.
scale :: Double -> SubstitutionModel -> SubstitutionModel
scale :: Double -> SubstitutionModel -> SubstitutionModel
scale Double
r SubstitutionModel
sm = SubstitutionModel
sm {exchangeabilityMatrix :: ExchangeabilityMatrix
exchangeabilityMatrix = ExchangeabilityMatrix
em'}
  where
    em' :: ExchangeabilityMatrix
em' = Double -> ExchangeabilityMatrix -> ExchangeabilityMatrix
forall t (c :: * -> *). Linear t c => t -> c t -> c t
LinAlg.scale Double
r (ExchangeabilityMatrix -> ExchangeabilityMatrix)
-> ExchangeabilityMatrix -> ExchangeabilityMatrix
forall a b. (a -> b) -> a -> b
$ SubstitutionModel -> ExchangeabilityMatrix
exchangeabilityMatrix SubstitutionModel
sm

-- | Normalize a substitution model, so that, on average, one substitution
-- happens per unit time.
normalize :: SubstitutionModel -> SubstitutionModel
normalize :: SubstitutionModel -> SubstitutionModel
normalize SubstitutionModel
sm = Double -> SubstitutionModel -> SubstitutionModel
scale (Double
1.0 Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
r) SubstitutionModel
sm where r :: Double
r = SubstitutionModel -> Double
totalRate SubstitutionModel
sm

-- | Abbend to name.
appendName :: Name -> SubstitutionModel -> SubstitutionModel
appendName :: Name -> SubstitutionModel -> SubstitutionModel
appendName Name
n SubstitutionModel
sm = SubstitutionModel
sm {name :: Name
name = Name
n'} where n' :: Name
n' = SubstitutionModel -> Name
name SubstitutionModel
sm Name -> ShowS
forall a. Semigroup a => a -> a -> a
<> Name
n