module ELynx.Data.MarkovProcess.AminoAcid
( lg,
lgCustom,
wag,
wagCustom,
poisson,
poissonCustom,
gtr20,
)
where
import Data.ByteString.Internal (c2w)
import Data.List (elemIndex)
import Data.Maybe (fromMaybe)
import qualified Data.Vector.Storable as V
import Data.Word (Word8)
import ELynx.Data.Alphabet.Alphabet
import ELynx.Data.MarkovProcess.RateMatrix
import ELynx.Data.MarkovProcess.SubstitutionModel
import Numeric.LinearAlgebra
n :: Int
n :: Int
n = Int
20
aaAlphaOrder :: [Word8]
aaAlphaOrder :: [Word8]
aaAlphaOrder =
(Char -> Word8) -> [Char] -> [Word8]
forall a b. (a -> b) -> [a] -> [b]
map
Char -> Word8
c2w
[ Char
'A',
Char
'C',
Char
'D',
Char
'E',
Char
'F',
Char
'G',
Char
'H',
Char
'I',
Char
'K',
Char
'L',
Char
'M',
Char
'N',
Char
'P',
Char
'Q',
Char
'R',
Char
'S',
Char
'T',
Char
'V',
Char
'W',
Char
'Y'
]
aaPamlOrder :: [Word8]
aaPamlOrder :: [Word8]
aaPamlOrder =
(Char -> Word8) -> [Char] -> [Word8]
forall a b. (a -> b) -> [a] -> [b]
map
Char -> Word8
c2w
[ Char
'A',
Char
'R',
Char
'N',
Char
'D',
Char
'C',
Char
'Q',
Char
'E',
Char
'G',
Char
'H',
Char
'I',
Char
'L',
Char
'K',
Char
'M',
Char
'F',
Char
'P',
Char
'S',
Char
'T',
Char
'W',
Char
'Y',
Char
'V'
]
alphaIndexToPamlIndex :: Int -> Int
alphaIndexToPamlIndex :: Int -> Int
alphaIndexToPamlIndex Int
i =
Int -> Maybe Int -> Int
forall a. a -> Maybe a -> a
fromMaybe
([Char] -> Int
forall a. HasCallStack => [Char] -> a
error ([Char] -> Int) -> [Char] -> Int
forall a b. (a -> b) -> a -> b
$ [Char]
"Could not convert index " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
i [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
".")
(Word8 -> [Word8] -> Maybe Int
forall a. Eq a => a -> [a] -> Maybe Int
elemIndex Word8
aa [Word8]
aaPamlOrder)
where
aa :: Word8
aa = [Word8]
aaAlphaOrder [Word8] -> Int -> Word8
forall a. [a] -> Int -> a
!! Int
i
pamlToAlphaVec :: Vector R -> Vector R
pamlToAlphaVec :: Vector R -> Vector R
pamlToAlphaVec Vector R
v = Int -> (R -> R) -> Vector R
forall d f (c :: * -> *) e. Build d f c e => d -> f -> c e
build Int
n (\R
i -> Vector R
v Vector R -> Int -> R
forall c t. Indexable c t => c -> Int -> t
! Int -> Int
alphaIndexToPamlIndex (R -> Int
forall a b. (RealFrac a, Integral b) => a -> b
round R
i))
pamlToAlphaMat :: Matrix R -> Matrix R
pamlToAlphaMat :: Matrix R -> Matrix R
pamlToAlphaMat Matrix R
m =
(Int, Int) -> (R -> R -> R) -> Matrix R
forall d f (c :: * -> *) e. Build d f c e => d -> f -> c e
build
(Int
n, Int
n)
( \R
i R
j -> Matrix R
m Matrix R -> Int -> Vector R
forall c t. Indexable c t => c -> Int -> t
! Int -> Int
alphaIndexToPamlIndex (R -> Int
forall a b. (RealFrac a, Integral b) => a -> b
round R
i) Vector R -> Int -> R
forall c t. Indexable c t => c -> Int -> t
! Int -> Int
alphaIndexToPamlIndex (R -> Int
forall a b. (RealFrac a, Integral b) => a -> b
round R
j)
)
lgExchRawPaml :: [Double]
lgExchRawPaml :: [R]
lgExchRawPaml =
[ R
0.425093,
R
0.276818,
R
0.751878,
R
0.395144,
R
0.123954,
R
5.076149,
R
2.489084,
R
0.534551,
R
0.528768,
R
0.062556,
R
0.969894,
R
2.807908,
R
1.695752,
R
0.523386,
R
0.084808,
R
1.038545,
R
0.363970,
R
0.541712,
R
5.243870,
R
0.003499,
R
4.128591,
R
2.066040,
R
0.390192,
R
1.437645,
R
0.844926,
R
0.569265,
R
0.267959,
R
0.348847,
R
0.358858,
R
2.426601,
R
4.509238,
R
0.927114,
R
0.640543,
R
4.813505,
R
0.423881,
R
0.311484,
R
0.149830,
R
0.126991,
R
0.191503,
R
0.010690,
R
0.320627,
R
0.072854,
R
0.044265,
R
0.008705,
R
0.108882,
R
0.395337,
R
0.301848,
R
0.068427,
R
0.015076,
R
0.594007,
R
0.582457,
R
0.069673,
R
0.044261,
R
0.366317,
R
4.145067,
R
0.536518,
R
6.326067,
R
2.145078,
R
0.282959,
R
0.013266,
R
3.234294,
R
1.807177,
R
0.296636,
R
0.697264,
R
0.159069,
R
0.137500,
R
1.124035,
R
0.484133,
R
0.371004,
R
0.025548,
R
0.893680,
R
1.672569,
R
0.173735,
R
0.139538,
R
0.442472,
R
4.273607,
R
6.312358,
R
0.656604,
R
0.253701,
R
0.052722,
R
0.089525,
R
0.017416,
R
1.105251,
R
0.035855,
R
0.018811,
R
0.089586,
R
0.682139,
R
1.112727,
R
2.592692,
R
0.023918,
R
1.798853,
R
1.177651,
R
0.332533,
R
0.161787,
R
0.394456,
R
0.075382,
R
0.624294,
R
0.419409,
R
0.196961,
R
0.508851,
R
0.078281,
R
0.249060,
R
0.390322,
R
0.099849,
R
0.094464,
R
4.727182,
R
0.858151,
R
4.008358,
R
1.240275,
R
2.784478,
R
1.223828,
R
0.611973,
R
1.739990,
R
0.990012,
R
0.064105,
R
0.182287,
R
0.748683,
R
0.346960,
R
0.361819,
R
1.338132,
R
2.139501,
R
0.578987,
R
2.000679,
R
0.425860,
R
1.143480,
R
1.080136,
R
0.604545,
R
0.129836,
R
0.584262,
R
1.033739,
R
0.302936,
R
1.136863,
R
2.020366,
R
0.165001,
R
0.571468,
R
6.472279,
R
0.180717,
R
0.593607,
R
0.045376,
R
0.029890,
R
0.670128,
R
0.236199,
R
0.077852,
R
0.268491,
R
0.597054,
R
0.111660,
R
0.619632,
R
0.049906,
R
0.696175,
R
2.457121,
R
0.095131,
R
0.248862,
R
0.140825,
R
0.218959,
R
0.314440,
R
0.612025,
R
0.135107,
R
1.165532,
R
0.257336,
R
0.120037,
R
0.054679,
R
5.306834,
R
0.232523,
R
0.299648,
R
0.131932,
R
0.481306,
R
7.803902,
R
0.089613,
R
0.400547,
R
0.245841,
R
3.151815,
R
2.547870,
R
0.170887,
R
0.083688,
R
0.037967,
R
1.959291,
R
0.210332,
R
0.245034,
R
0.076701,
R
0.119013,
R
10.649107,
R
1.702745,
R
0.185202,
R
1.898718,
R
0.654683,
R
0.296501,
R
0.098369,
R
2.188158,
R
0.189510,
R
0.249313
]
lgExch :: ExchangeabilityMatrix
lgExch :: Matrix R
lgExch = Matrix R -> Matrix R
pamlToAlphaMat (Matrix R -> Matrix R) -> Matrix R -> Matrix R
forall a b. (a -> b) -> a -> b
$ Int -> [R] -> Matrix R
forall a.
(RealFrac a, Container Vector a) =>
Int -> [a] -> Matrix a
exchFromListLower Int
n [R]
lgExchRawPaml
normalizeSumVec :: Vector Double -> Vector Double
normalizeSumVec :: Vector R -> Vector R
normalizeSumVec Vector R
v = (R -> R) -> Vector R -> Vector R
forall a b.
(Storable a, Storable b) =>
(a -> b) -> Vector a -> Vector b
V.map (R -> R -> R
forall a. Fractional a => a -> a -> a
/ R
s) Vector R
v
where
s :: R
s = Vector R -> R
forall a. (Storable a, Num a) => Vector a -> a
V.sum Vector R
v
{-# INLINE normalizeSumVec #-}
lgStatDistPaml :: StationaryDistribution
lgStatDistPaml :: Vector R
lgStatDistPaml =
Vector R -> Vector R
normalizeSumVec (Vector R -> Vector R) -> Vector R -> Vector R
forall a b. (a -> b) -> a -> b
$
[R] -> Vector R
forall a. Storable a => [a] -> Vector a
fromList
[ R
0.079066,
R
0.055941,
R
0.041977,
R
0.053052,
R
0.012937,
R
0.040767,
R
0.071586,
R
0.057337,
R
0.022355,
R
0.062157,
R
0.099081,
R
0.064600,
R
0.022951,
R
0.042302,
R
0.044040,
R
0.061197,
R
0.053287,
R
0.012066,
R
0.034155,
R
0.069147
]
lgStatDist :: StationaryDistribution
lgStatDist :: Vector R
lgStatDist = Vector R -> Vector R
pamlToAlphaVec Vector R
lgStatDistPaml
lg :: SubstitutionModel
lg :: SubstitutionModel
lg = Alphabet
-> [Char] -> [R] -> Vector R -> Matrix R -> SubstitutionModel
substitutionModel Alphabet
Protein [Char]
"LG" [] Vector R
lgStatDist Matrix R
lgExch
lgCustom :: Maybe String -> StationaryDistribution -> SubstitutionModel
lgCustom :: Maybe [Char] -> Vector R -> SubstitutionModel
lgCustom Maybe [Char]
mnm Vector R
d = Alphabet
-> [Char] -> [R] -> Vector R -> Matrix R -> SubstitutionModel
substitutionModel Alphabet
Protein [Char]
nm [] Vector R
d Matrix R
lgExch
where
nm :: [Char]
nm = [Char] -> Maybe [Char] -> [Char]
forall a. a -> Maybe a -> a
fromMaybe [Char]
"LG-Custom" Maybe [Char]
mnm
wagExchRawPaml :: [Double]
wagExchRawPaml :: [R]
wagExchRawPaml =
[ R
55.15710,
R
50.98480,
R
63.53460,
R
73.89980,
R
14.73040,
R
542.94200,
R
102.70400,
R
52.81910,
R
26.52560,
R
3.02949,
R
90.85980,
R
303.55000,
R
154.36400,
R
61.67830,
R
9.88179,
R
158.28500,
R
43.91570,
R
94.71980,
R
617.41600,
R
2.13520,
R
546.94700,
R
141.67200,
R
58.46650,
R
112.55600,
R
86.55840,
R
30.66740,
R
33.00520,
R
56.77170,
R
31.69540,
R
213.71500,
R
395.62900,
R
93.06760,
R
24.89720,
R
429.41100,
R
57.00250,
R
24.94100,
R
19.33350,
R
18.69790,
R
55.42360,
R
3.94370,
R
17.01350,
R
11.39170,
R
12.73950,
R
3.04501,
R
13.81900,
R
39.79150,
R
49.76710,
R
13.15280,
R
8.48047,
R
38.42870,
R
86.94890,
R
15.42630,
R
6.13037,
R
49.94620,
R
317.09700,
R
90.62650,
R
535.14200,
R
301.20100,
R
47.98550,
R
7.40339,
R
389.49000,
R
258.44300,
R
37.35580,
R
89.04320,
R
32.38320,
R
25.75550,
R
89.34960,
R
68.31620,
R
19.82210,
R
10.37540,
R
39.04820,
R
154.52600,
R
31.51240,
R
17.41000,
R
40.41410,
R
425.74600,
R
485.40200,
R
93.42760,
R
21.04940,
R
10.27110,
R
9.61621,
R
4.67304,
R
39.80200,
R
9.99208,
R
8.11339,
R
4.99310,
R
67.93710,
R
105.94700,
R
211.51700,
R
8.88360,
R
119.06300,
R
143.85500,
R
67.94890,
R
19.50810,
R
42.39840,
R
10.94040,
R
93.33720,
R
68.23550,
R
24.35700,
R
69.61980,
R
9.99288,
R
41.58440,
R
55.68960,
R
17.13290,
R
16.14440,
R
337.07900,
R
122.41900,
R
397.42300,
R
107.17600,
R
140.76600,
R
102.88700,
R
70.49390,
R
134.18200,
R
74.01690,
R
31.94400,
R
34.47390,
R
96.71300,
R
49.39050,
R
54.59310,
R
161.32800,
R
212.11100,
R
55.44130,
R
203.00600,
R
37.48660,
R
51.29840,
R
85.79280,
R
82.27650,
R
22.58330,
R
47.33070,
R
145.81600,
R
32.66220,
R
138.69800,
R
151.61200,
R
17.19030,
R
79.53840,
R
437.80200,
R
11.31330,
R
116.39200,
R
7.19167,
R
12.97670,
R
71.70700,
R
21.57370,
R
15.65570,
R
33.69830,
R
26.25690,
R
21.24830,
R
66.53090,
R
13.75050,
R
51.57060,
R
152.96400,
R
13.94050,
R
52.37420,
R
11.08640,
R
24.07350,
R
38.15330,
R
108.60000,
R
32.57110,
R
54.38330,
R
22.77100,
R
19.63030,
R
10.36040,
R
387.34400,
R
42.01700,
R
39.86180,
R
13.32640,
R
42.84370,
R
645.42800,
R
21.60460,
R
78.69930,
R
29.11480,
R
248.53900,
R
200.60100,
R
25.18490,
R
19.62460,
R
15.23350,
R
100.21400,
R
30.12810,
R
58.87310,
R
18.72470,
R
11.83580,
R
782.13000,
R
180.03400,
R
30.54340,
R
205.84500,
R
64.98920,
R
31.48870,
R
23.27390,
R
138.82300,
R
36.53690,
R
31.47300
]
wagExch :: ExchangeabilityMatrix
wagExch :: Matrix R
wagExch = Matrix R -> Matrix R
pamlToAlphaMat (Matrix R -> Matrix R) -> Matrix R -> Matrix R
forall a b. (a -> b) -> a -> b
$ Int -> [R] -> Matrix R
forall a.
(RealFrac a, Container Vector a) =>
Int -> [a] -> Matrix a
exchFromListLower Int
n [R]
wagExchRawPaml
wagStatDistPaml :: StationaryDistribution
wagStatDistPaml :: Vector R
wagStatDistPaml =
Vector R -> Vector R
normalizeSumVec (Vector R -> Vector R) -> Vector R -> Vector R
forall a b. (a -> b) -> a -> b
$
[R] -> Vector R
forall a. Storable a => [a] -> Vector a
fromList
[ R
0.0866279,
R
0.043972,
R
0.0390894,
R
0.0570451,
R
0.0193078,
R
0.0367281,
R
0.0580589,
R
0.0832518,
R
0.0244313,
R
0.048466,
R
0.086209,
R
0.0620286,
R
0.0195027,
R
0.0384319,
R
0.0457631,
R
0.0695179,
R
0.0610127,
R
0.0143859,
R
0.0352742,
R
0.0708957
]
wagStatDist :: StationaryDistribution
wagStatDist :: Vector R
wagStatDist = Vector R -> Vector R
pamlToAlphaVec Vector R
wagStatDistPaml
wag :: SubstitutionModel
wag :: SubstitutionModel
wag = Alphabet
-> [Char] -> [R] -> Vector R -> Matrix R -> SubstitutionModel
substitutionModel Alphabet
Protein [Char]
"WAG" [] Vector R
wagStatDist Matrix R
wagExch
wagCustom :: Maybe String -> StationaryDistribution -> SubstitutionModel
wagCustom :: Maybe [Char] -> Vector R -> SubstitutionModel
wagCustom Maybe [Char]
mnm Vector R
d = Alphabet
-> [Char] -> [R] -> Vector R -> Matrix R -> SubstitutionModel
substitutionModel Alphabet
Protein [Char]
nm [] Vector R
d Matrix R
wagExch
where
nm :: [Char]
nm = [Char] -> Maybe [Char] -> [Char]
forall a. a -> Maybe a -> a
fromMaybe [Char]
"WAG-Custom" Maybe [Char]
mnm
matrixSetDiagToZero :: Matrix R -> Matrix R
matrixSetDiagToZero :: Matrix R -> Matrix R
matrixSetDiagToZero Matrix R
m = Matrix R
m Matrix R -> Matrix R -> Matrix R
forall a. Num a => a -> a -> a
- Vector R -> Matrix R
forall a. (Num a, Element a) => Vector a -> Matrix a
diag (Matrix R -> Vector R
forall t. Element t => Matrix t -> Vector t
takeDiag Matrix R
m)
{-# INLINE matrixSetDiagToZero #-}
uniformExch :: ExchangeabilityMatrix
uniformExch :: Matrix R
uniformExch = Matrix R -> Matrix R
matrixSetDiagToZero (Matrix R -> Matrix R) -> Matrix R -> Matrix R
forall a b. (a -> b) -> a -> b
$ Int -> [R] -> Matrix R
matrix Int
n ([R] -> Matrix R) -> [R] -> Matrix R
forall a b. (a -> b) -> a -> b
$ Int -> R -> [R]
forall a. Int -> a -> [a]
replicate (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
n) R
1.0
poissonExch :: ExchangeabilityMatrix
poissonExch :: Matrix R
poissonExch = Matrix R
uniformExch
uniformVec :: Vector Double
uniformVec :: Vector R
uniformVec = Int -> R -> Vector R
forall a. Storable a => Int -> a -> Vector a
V.replicate Int
n (R
1 R -> R -> R
forall a. Fractional a => a -> a -> a
/ Int -> R
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n)
poisson :: SubstitutionModel
poisson :: SubstitutionModel
poisson = Alphabet
-> [Char] -> [R] -> Vector R -> Matrix R -> SubstitutionModel
substitutionModel Alphabet
Protein [Char]
"Poisson" [] Vector R
uniformVec Matrix R
poissonExch
poissonCustom :: Maybe String -> StationaryDistribution -> SubstitutionModel
poissonCustom :: Maybe [Char] -> Vector R -> SubstitutionModel
poissonCustom Maybe [Char]
mnm Vector R
d = Alphabet
-> [Char] -> [R] -> Vector R -> Matrix R -> SubstitutionModel
substitutionModel Alphabet
Protein [Char]
nm [] Vector R
d Matrix R
poissonExch
where
nm :: [Char]
nm = [Char] -> Maybe [Char] -> [Char]
forall a. a -> Maybe a -> a
fromMaybe [Char]
"Poisson-Custom" Maybe [Char]
mnm
gtr20 :: [Double] -> StationaryDistribution -> SubstitutionModel
gtr20 :: [R] -> Vector R -> SubstitutionModel
gtr20 [R]
es Vector R
d = Alphabet
-> [Char] -> [R] -> Vector R -> Matrix R -> SubstitutionModel
substitutionModel Alphabet
Protein [Char]
"GTR" [R]
es Vector R
d Matrix R
e
where
e :: Matrix R
e = Int -> [R] -> Matrix R
forall a.
(RealFrac a, Container Vector a) =>
Int -> [a] -> Matrix a
exchFromListUpper Int
n [R]
es