module ELynx.Data.MarkovProcess.AminoAcid
( lg,
lgCustom,
wag,
wagCustom,
poisson,
poissonCustom,
gtr20,
)
where
import Data.ByteString.Internal (c2w)
import Data.List (elemIndex)
import Data.Maybe (fromMaybe)
import qualified Data.Vector.Storable as V
import Data.Word (Word8)
import ELynx.Data.Alphabet.Alphabet
import ELynx.Data.MarkovProcess.RateMatrix
import ELynx.Data.MarkovProcess.SubstitutionModel
import Numeric.LinearAlgebra
n :: Int
n = 20
aaAlphaOrder :: [Word8]
aaAlphaOrder =
map
c2w
[ 'A',
'C',
'D',
'E',
'F',
'G',
'H',
'I',
'K',
'L',
'M',
'N',
'P',
'Q',
'R',
'S',
'T',
'V',
'W',
'Y'
]
aaPamlOrder :: [Word8]
aaPamlOrder =
map
c2w
[ 'A',
'R',
'N',
'D',
'C',
'Q',
'E',
'G',
'H',
'I',
'L',
'K',
'M',
'F',
'P',
'S',
'T',
'W',
'Y',
'V'
]
alphaIndexToPamlIndex :: Int -> Int
alphaIndexToPamlIndex i =
fromMaybe
(error $ "Could not convert index " ++ show i ++ ".")
(elemIndex aa aaPamlOrder)
where
aa = aaAlphaOrder !! i
pamlToAlphaVec :: Vector R -> Vector R
pamlToAlphaVec v = build n (\i -> v ! alphaIndexToPamlIndex (round i))
pamlToAlphaMat :: Matrix R -> Matrix R
pamlToAlphaMat m =
build
(n, n)
( \i j -> m ! alphaIndexToPamlIndex (round i) ! alphaIndexToPamlIndex (round j)
)
lgExchRawPaml :: [Double]
lgExchRawPaml =
[ 0.425093,
0.276818,
0.751878,
0.395144,
0.123954,
5.076149,
2.489084,
0.534551,
0.528768,
0.062556,
0.969894,
2.807908,
1.695752,
0.523386,
0.084808,
1.038545,
0.363970,
0.541712,
5.243870,
0.003499,
4.128591,
2.066040,
0.390192,
1.437645,
0.844926,
0.569265,
0.267959,
0.348847,
0.358858,
2.426601,
4.509238,
0.927114,
0.640543,
4.813505,
0.423881,
0.311484,
0.149830,
0.126991,
0.191503,
0.010690,
0.320627,
0.072854,
0.044265,
0.008705,
0.108882,
0.395337,
0.301848,
0.068427,
0.015076,
0.594007,
0.582457,
0.069673,
0.044261,
0.366317,
4.145067,
0.536518,
6.326067,
2.145078,
0.282959,
0.013266,
3.234294,
1.807177,
0.296636,
0.697264,
0.159069,
0.137500,
1.124035,
0.484133,
0.371004,
0.025548,
0.893680,
1.672569,
0.173735,
0.139538,
0.442472,
4.273607,
6.312358,
0.656604,
0.253701,
0.052722,
0.089525,
0.017416,
1.105251,
0.035855,
0.018811,
0.089586,
0.682139,
1.112727,
2.592692,
0.023918,
1.798853,
1.177651,
0.332533,
0.161787,
0.394456,
0.075382,
0.624294,
0.419409,
0.196961,
0.508851,
0.078281,
0.249060,
0.390322,
0.099849,
0.094464,
4.727182,
0.858151,
4.008358,
1.240275,
2.784478,
1.223828,
0.611973,
1.739990,
0.990012,
0.064105,
0.182287,
0.748683,
0.346960,
0.361819,
1.338132,
2.139501,
0.578987,
2.000679,
0.425860,
1.143480,
1.080136,
0.604545,
0.129836,
0.584262,
1.033739,
0.302936,
1.136863,
2.020366,
0.165001,
0.571468,
6.472279,
0.180717,
0.593607,
0.045376,
0.029890,
0.670128,
0.236199,
0.077852,
0.268491,
0.597054,
0.111660,
0.619632,
0.049906,
0.696175,
2.457121,
0.095131,
0.248862,
0.140825,
0.218959,
0.314440,
0.612025,
0.135107,
1.165532,
0.257336,
0.120037,
0.054679,
5.306834,
0.232523,
0.299648,
0.131932,
0.481306,
7.803902,
0.089613,
0.400547,
0.245841,
3.151815,
2.547870,
0.170887,
0.083688,
0.037967,
1.959291,
0.210332,
0.245034,
0.076701,
0.119013,
10.649107,
1.702745,
0.185202,
1.898718,
0.654683,
0.296501,
0.098369,
2.188158,
0.189510,
0.249313
]
lgExch :: ExchangeabilityMatrix
lgExch = pamlToAlphaMat $ exchFromListLower n lgExchRawPaml
normalizeSumVec :: Vector Double -> Vector Double
normalizeSumVec v = V.map (/ s) v
where
s = V.sum v
{-# INLINE normalizeSumVec #-}
lgStatDistPaml :: StationaryDistribution
lgStatDistPaml =
normalizeSumVec $
fromList
[ 0.079066,
0.055941,
0.041977,
0.053052,
0.012937,
0.040767,
0.071586,
0.057337,
0.022355,
0.062157,
0.099081,
0.064600,
0.022951,
0.042302,
0.044040,
0.061197,
0.053287,
0.012066,
0.034155,
0.069147
]
lgStatDist :: StationaryDistribution
lgStatDist = pamlToAlphaVec lgStatDistPaml
lg :: SubstitutionModel
lg = substitutionModel Protein "LG" [] lgStatDist lgExch
lgCustom :: Maybe String -> StationaryDistribution -> SubstitutionModel
lgCustom mnm d = substitutionModel Protein nm [] d lgExch
where
nm = fromMaybe "LG-Custom" mnm
wagExchRawPaml :: [Double]
wagExchRawPaml =
[ 55.15710,
50.98480,
63.53460,
73.89980,
14.73040,
542.94200,
102.70400,
52.81910,
26.52560,
3.02949,
90.85980,
303.55000,
154.36400,
61.67830,
9.88179,
158.28500,
43.91570,
94.71980,
617.41600,
2.13520,
546.94700,
141.67200,
58.46650,
112.55600,
86.55840,
30.66740,
33.00520,
56.77170,
31.69540,
213.71500,
395.62900,
93.06760,
24.89720,
429.41100,
57.00250,
24.94100,
19.33350,
18.69790,
55.42360,
3.94370,
17.01350,
11.39170,
12.73950,
3.04501,
13.81900,
39.79150,
49.76710,
13.15280,
8.48047,
38.42870,
86.94890,
15.42630,
6.13037,
49.94620,
317.09700,
90.62650,
535.14200,
301.20100,
47.98550,
7.40339,
389.49000,
258.44300,
37.35580,
89.04320,
32.38320,
25.75550,
89.34960,
68.31620,
19.82210,
10.37540,
39.04820,
154.52600,
31.51240,
17.41000,
40.41410,
425.74600,
485.40200,
93.42760,
21.04940,
10.27110,
9.61621,
4.67304,
39.80200,
9.99208,
8.11339,
4.99310,
67.93710,
105.94700,
211.51700,
8.88360,
119.06300,
143.85500,
67.94890,
19.50810,
42.39840,
10.94040,
93.33720,
68.23550,
24.35700,
69.61980,
9.99288,
41.58440,
55.68960,
17.13290,
16.14440,
337.07900,
122.41900,
397.42300,
107.17600,
140.76600,
102.88700,
70.49390,
134.18200,
74.01690,
31.94400,
34.47390,
96.71300,
49.39050,
54.59310,
161.32800,
212.11100,
55.44130,
203.00600,
37.48660,
51.29840,
85.79280,
82.27650,
22.58330,
47.33070,
145.81600,
32.66220,
138.69800,
151.61200,
17.19030,
79.53840,
437.80200,
11.31330,
116.39200,
7.19167,
12.97670,
71.70700,
21.57370,
15.65570,
33.69830,
26.25690,
21.24830,
66.53090,
13.75050,
51.57060,
152.96400,
13.94050,
52.37420,
11.08640,
24.07350,
38.15330,
108.60000,
32.57110,
54.38330,
22.77100,
19.63030,
10.36040,
387.34400,
42.01700,
39.86180,
13.32640,
42.84370,
645.42800,
21.60460,
78.69930,
29.11480,
248.53900,
200.60100,
25.18490,
19.62460,
15.23350,
100.21400,
30.12810,
58.87310,
18.72470,
11.83580,
782.13000,
180.03400,
30.54340,
205.84500,
64.98920,
31.48870,
23.27390,
138.82300,
36.53690,
31.47300
]
wagExch :: ExchangeabilityMatrix
wagExch = pamlToAlphaMat $ exchFromListLower n wagExchRawPaml
wagStatDistPaml :: StationaryDistribution
wagStatDistPaml =
normalizeSumVec $
fromList
[ 0.0866279,
0.043972,
0.0390894,
0.0570451,
0.0193078,
0.0367281,
0.0580589,
0.0832518,
0.0244313,
0.048466,
0.086209,
0.0620286,
0.0195027,
0.0384319,
0.0457631,
0.0695179,
0.0610127,
0.0143859,
0.0352742,
0.0708957
]
wagStatDist :: StationaryDistribution
wagStatDist = pamlToAlphaVec wagStatDistPaml
wag :: SubstitutionModel
wag = substitutionModel Protein "WAG" [] wagStatDist wagExch
wagCustom :: Maybe String -> StationaryDistribution -> SubstitutionModel
wagCustom mnm d = substitutionModel Protein nm [] d wagExch
where
nm = fromMaybe "WAG-Custom" mnm
matrixSetDiagToZero :: Matrix R -> Matrix R
matrixSetDiagToZero m = m - diag (takeDiag m)
{-# INLINE matrixSetDiagToZero #-}
uniformExch :: ExchangeabilityMatrix
uniformExch = matrixSetDiagToZero $ matrix n $ replicate (n * n) 1.0
poissonExch :: ExchangeabilityMatrix
poissonExch = uniformExch
uniformVec :: Vector Double
uniformVec = V.replicate n (1 / fromIntegral n)
poisson :: SubstitutionModel
poisson = substitutionModel Protein "Poisson" [] uniformVec poissonExch
poissonCustom :: Maybe String -> StationaryDistribution -> SubstitutionModel
poissonCustom mnm d = substitutionModel Protein nm [] d poissonExch
where
nm = fromMaybe "Poisson-Custom" mnm
gtr20 :: [Double] -> StationaryDistribution -> SubstitutionModel
gtr20 es d = substitutionModel Protein "GTR" es d e
where
e = exchFromListUpper n es