-- |
-- Module      :  ELynx.Data.MarkovProcess.Nucleotide
-- Description :  Substitution models using nucleotides
-- Copyright   :  (c) Dominik Schrempf 2021
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Thu Jan 24 08:33:26 2019.
--
-- XXX: Maybe rename to something like /DNA substitution models/. Nucleotide ~
-- Alphabet; DNA ~ Character.
--
-- The order of nucleotides is A, C, G, T; see 'ELynx.Data.Character.Nucleotide'.
--
-- For the different DNA substitution models, please see
-- https://en.wikipedia.org/wiki/Models_of_DNA_evolution
module ELynx.Data.MarkovProcess.Nucleotide
  ( jc,
    f81,
    hky,
    gtr4,
  )
where

import qualified Data.Vector.Storable as V
import ELynx.Data.Alphabet.Alphabet
import ELynx.Data.MarkovProcess.RateMatrix
import ELynx.Data.MarkovProcess.SubstitutionModel
import Numeric.LinearAlgebra hiding (normalize)

-- XXX: Another idea of structuring the code. This would probably be cleaner in
-- the long run.

-- data PhyloModel = MixtureModel | SubstitutionModel

--
-- I think it could simply be:
-- data PhyloModel = [(Weight, SubstitutionModel)]
--

-- data MixtureModel = [(Weight, SubstitutionModel)]

-- data SubstitutionModel = SMDNA DNASubstitutionModel | SMAA AASubstitutionModel

-- data DNASubstitutionModel = JC | HKY Double StationaryDistribution

-- data AASubstitutionModel = LG | ...

n :: Int
-- n = length (alphabet :: [Nucleotide])
-- Hard code this here. Reduces model dependencies, and number of nucleotides
-- will not change.
n :: Int
n = Int
4

-- | JC model matrix.
jcExch :: ExchangeabilityMatrix
jcExch :: ExchangeabilityMatrix
jcExch =
  (Int
n Int -> Int -> [R] -> ExchangeabilityMatrix
forall a. Storable a => Int -> Int -> [a] -> Matrix a
>< Int
n)
    [ R
0.0,
      R
1.0,
      R
1.0,
      R
1.0,
      R
1.0,
      R
0.0,
      R
1.0,
      R
1.0,
      R
1.0,
      R
1.0,
      R
0.0,
      R
1.0,
      R
1.0,
      R
1.0,
      R
1.0,
      R
0.0
    ]

uniformVec :: Vector Double
uniformVec :: Vector R
uniformVec = Int -> R -> Vector R
forall a. Storable a => Int -> a -> Vector a
V.replicate Int
n (R
1 R -> R -> R
forall a. Fractional a => a -> a -> a
/ Int -> R
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n)

-- | JC substitution model.
jc :: SubstitutionModel
jc :: SubstitutionModel
jc = Alphabet
-> Name
-> [R]
-> Vector R
-> ExchangeabilityMatrix
-> SubstitutionModel
substitutionModel Alphabet
DNA Name
"JC" [] Vector R
uniformVec ExchangeabilityMatrix
jcExch

-- | F81 substitution model.
f81 :: StationaryDistribution -> SubstitutionModel
f81 :: Vector R -> SubstitutionModel
f81 Vector R
d = Alphabet
-> Name
-> [R]
-> Vector R
-> ExchangeabilityMatrix
-> SubstitutionModel
substitutionModel Alphabet
DNA Name
"F81" [] Vector R
d ExchangeabilityMatrix
jcExch

hkyExch :: Double -> ExchangeabilityMatrix
hkyExch :: R -> ExchangeabilityMatrix
hkyExch R
k =
  (Int
n Int -> Int -> [R] -> ExchangeabilityMatrix
forall a. Storable a => Int -> Int -> [a] -> Matrix a
>< Int
n)
    [R
0.0, R
1.0, R
k, R
1.0, R
1.0, R
0.0, R
1.0, R
k, R
k, R
1.0, R
0.0, R
1.0, R
1.0, R
k, R
1.0, R
0.0]

-- | HKY substitution model.
hky :: Double -> StationaryDistribution -> SubstitutionModel
hky :: R -> Vector R -> SubstitutionModel
hky R
k Vector R
d = Alphabet
-> Name
-> [R]
-> Vector R
-> ExchangeabilityMatrix
-> SubstitutionModel
substitutionModel Alphabet
DNA Name
"HKY" [R
k] Vector R
d ExchangeabilityMatrix
e where e :: ExchangeabilityMatrix
e = R -> ExchangeabilityMatrix
hkyExch R
k

-- | HKY substitution model.
gtr4 :: [Double] -> StationaryDistribution -> SubstitutionModel
gtr4 :: [R] -> Vector R -> SubstitutionModel
gtr4 [R]
es Vector R
d = Alphabet
-> Name
-> [R]
-> Vector R
-> ExchangeabilityMatrix
-> SubstitutionModel
substitutionModel Alphabet
DNA Name
"GTR" [R]
es Vector R
d ExchangeabilityMatrix
e
  where
    e :: ExchangeabilityMatrix
e = Int -> [R] -> ExchangeabilityMatrix
forall a.
(RealFrac a, Container Vector a) =>
Int -> [a] -> Matrix a
exchFromListUpper Int
n [R]
es