{- | Module : ELynx.Data.MarkovProcess.AminoAcid Description : Amino acid rate matrices such as LG Copyright : (c) Dominik Schrempf 2020 License : GPL-3.0-or-later Maintainer : dominik.schrempf@gmail.com Stability : unstable Portability : portable Creation date: Tue Jan 29 09:29:19 2019. The order of amino acids is alphabetic. -} module ELynx.Data.MarkovProcess.AminoAcid ( lg , lgCustom , wag , wagCustom , poisson , poissonCustom , gtr20 ) where import Data.List ( elemIndex ) import Data.Maybe ( fromMaybe ) import Data.Word ( Word8 ) import Numeric.LinearAlgebra import ELynx.Data.Alphabet.Alphabet import ELynx.Tools import ELynx.Data.MarkovProcess.RateMatrix import ELynx.Data.MarkovProcess.SubstitutionModel n :: Int n = 20 -- Some matrices have to be converted from PAML order to alphabetical order. See -- 'pamlToAlphaVec' and 'pamlToAlphaMat'. -- Amno acids in alphabetical order. aaAlphaOrder :: [Word8] aaAlphaOrder = map c2w [ 'A' , 'C' , 'D' , 'E' , 'F' , 'G' , 'H' , 'I' , 'K' , 'L' , 'M' , 'N' , 'P' , 'Q' , 'R' , 'S' , 'T' , 'V' , 'W' , 'Y' ] -- Amino acids in PAML oder. aaPamlOrder :: [Word8] aaPamlOrder = map c2w [ 'A' , 'R' , 'N' , 'D' , 'C' , 'Q' , 'E' , 'G' , 'H' , 'I' , 'L' , 'K' , 'M' , 'F' , 'P' , 'S' , 'T' , 'W' , 'Y' , 'V' ] -- -- This is a very slow implementation; since I only convert matrices once it -- -- should not be a problem. A map would be better if performance is an issue. -- pamlIndexToAlphaIndex :: Int -> Int -- pamlIndexToAlphaIndex i = fromMaybe -- (error $ "Could not convert index " ++ show i ++ ".") -- (elemIndex aa aaAlphaOrder) -- where aa = aaPamlOrder !! i -- This is a very slow implementation; since I only convert matrices once it -- should not be a problem. A map would be better if performance is an issue. alphaIndexToPamlIndex :: Int -> Int alphaIndexToPamlIndex i = fromMaybe (error $ "Could not convert index " ++ show i ++ ".") (elemIndex aa aaPamlOrder) where aa = aaAlphaOrder !! i -- Convert an amino acid vector in PAML order to a vector in alphabetical order. pamlToAlphaVec :: Vector R -> Vector R pamlToAlphaVec v = build n (\i -> v ! alphaIndexToPamlIndex (round i)) -- Convert an amino acid matrix in PAML order to a matrix in alphabetical order. pamlToAlphaMat :: Matrix R -> Matrix R pamlToAlphaMat m = build (n, n) (\i j -> m ! alphaIndexToPamlIndex (round i) ! alphaIndexToPamlIndex (round j) ) -- Lower triangular matrix of LG exchangeabilities in PAML order and in form of -- a list. lgExchRawPaml :: [Double] lgExchRawPaml = [ 0.425093 , 0.276818 , 0.751878 , 0.395144 , 0.123954 , 5.076149 , 2.489084 , 0.534551 , 0.528768 , 0.062556 , 0.969894 , 2.807908 , 1.695752 , 0.523386 , 0.084808 , 1.038545 , 0.363970 , 0.541712 , 5.243870 , 0.003499 , 4.128591 , 2.066040 , 0.390192 , 1.437645 , 0.844926 , 0.569265 , 0.267959 , 0.348847 , 0.358858 , 2.426601 , 4.509238 , 0.927114 , 0.640543 , 4.813505 , 0.423881 , 0.311484 , 0.149830 , 0.126991 , 0.191503 , 0.010690 , 0.320627 , 0.072854 , 0.044265 , 0.008705 , 0.108882 , 0.395337 , 0.301848 , 0.068427 , 0.015076 , 0.594007 , 0.582457 , 0.069673 , 0.044261 , 0.366317 , 4.145067 , 0.536518 , 6.326067 , 2.145078 , 0.282959 , 0.013266 , 3.234294 , 1.807177 , 0.296636 , 0.697264 , 0.159069 , 0.137500 , 1.124035 , 0.484133 , 0.371004 , 0.025548 , 0.893680 , 1.672569 , 0.173735 , 0.139538 , 0.442472 , 4.273607 , 6.312358 , 0.656604 , 0.253701 , 0.052722 , 0.089525 , 0.017416 , 1.105251 , 0.035855 , 0.018811 , 0.089586 , 0.682139 , 1.112727 , 2.592692 , 0.023918 , 1.798853 , 1.177651 , 0.332533 , 0.161787 , 0.394456 , 0.075382 , 0.624294 , 0.419409 , 0.196961 , 0.508851 , 0.078281 , 0.249060 , 0.390322 , 0.099849 , 0.094464 , 4.727182 , 0.858151 , 4.008358 , 1.240275 , 2.784478 , 1.223828 , 0.611973 , 1.739990 , 0.990012 , 0.064105 , 0.182287 , 0.748683 , 0.346960 , 0.361819 , 1.338132 , 2.139501 , 0.578987 , 2.000679 , 0.425860 , 1.143480 , 1.080136 , 0.604545 , 0.129836 , 0.584262 , 1.033739 , 0.302936 , 1.136863 , 2.020366 , 0.165001 , 0.571468 , 6.472279 , 0.180717 , 0.593607 , 0.045376 , 0.029890 , 0.670128 , 0.236199 , 0.077852 , 0.268491 , 0.597054 , 0.111660 , 0.619632 , 0.049906 , 0.696175 , 2.457121 , 0.095131 , 0.248862 , 0.140825 , 0.218959 , 0.314440 , 0.612025 , 0.135107 , 1.165532 , 0.257336 , 0.120037 , 0.054679 , 5.306834 , 0.232523 , 0.299648 , 0.131932 , 0.481306 , 7.803902 , 0.089613 , 0.400547 , 0.245841 , 3.151815 , 2.547870 , 0.170887 , 0.083688 , 0.037967 , 1.959291 , 0.210332 , 0.245034 , 0.076701 , 0.119013 , 10.649107 , 1.702745 , 0.185202 , 1.898718 , 0.654683 , 0.296501 , 0.098369 , 2.188158 , 0.189510 , 0.249313 ] -- Exchangeabilities of LG model in alphabetical order. lgExch :: ExchangeabilityMatrix lgExch = pamlToAlphaMat $ exchFromListLower n lgExchRawPaml -- Stationary distribution in PAML order. lgStatDistPaml :: StationaryDistribution lgStatDistPaml = normalizeSumVec 1.0 $ fromList [ 0.079066 , 0.055941 , 0.041977 , 0.053052 , 0.012937 , 0.040767 , 0.071586 , 0.057337 , 0.022355 , 0.062157 , 0.099081 , 0.064600 , 0.022951 , 0.042302 , 0.044040 , 0.061197 , 0.053287 , 0.012066 , 0.034155 , 0.069147 ] -- Stationary distribution of LG model in alphabetical order. lgStatDist :: StationaryDistribution lgStatDist = pamlToAlphaVec lgStatDistPaml -- | LG substitution model. lg :: SubstitutionModel lg = substitutionModel Protein "LG" [] lgStatDist lgExch -- | LG substitution model with maybe a name and a custom stationary distribution. lgCustom :: Maybe String -> StationaryDistribution -> SubstitutionModel lgCustom mnm d = substitutionModel Protein nm [] d lgExch where nm = fromMaybe "LG-Custom" mnm -- WAG exchangeability list in PAML order. wagExchRawPaml :: [Double] wagExchRawPaml = [ 55.15710 , 50.98480 , 63.53460 , 73.89980 , 14.73040 , 542.94200 , 102.70400 , 52.81910 , 26.52560 , 3.02949 , 90.85980 , 303.55000 , 154.36400 , 61.67830 , 9.88179 , 158.28500 , 43.91570 , 94.71980 , 617.41600 , 2.13520 , 546.94700 , 141.67200 , 58.46650 , 112.55600 , 86.55840 , 30.66740 , 33.00520 , 56.77170 , 31.69540 , 213.71500 , 395.62900 , 93.06760 , 24.89720 , 429.41100 , 57.00250 , 24.94100 , 19.33350 , 18.69790 , 55.42360 , 3.94370 , 17.01350 , 11.39170 , 12.73950 , 3.04501 , 13.81900 , 39.79150 , 49.76710 , 13.15280 , 8.48047 , 38.42870 , 86.94890 , 15.42630 , 6.13037 , 49.94620 , 317.09700 , 90.62650 , 535.14200 , 301.20100 , 47.98550 , 7.40339 , 389.49000 , 258.44300 , 37.35580 , 89.04320 , 32.38320 , 25.75550 , 89.34960 , 68.31620 , 19.82210 , 10.37540 , 39.04820 , 154.52600 , 31.51240 , 17.41000 , 40.41410 , 425.74600 , 485.40200 , 93.42760 , 21.04940 , 10.27110 , 9.61621 , 4.67304 , 39.80200 , 9.99208 , 8.11339 , 4.99310 , 67.93710 , 105.94700 , 211.51700 , 8.88360 , 119.06300 , 143.85500 , 67.94890 , 19.50810 , 42.39840 , 10.94040 , 93.33720 , 68.23550 , 24.35700 , 69.61980 , 9.99288 , 41.58440 , 55.68960 , 17.13290 , 16.14440 , 337.07900 , 122.41900 , 397.42300 , 107.17600 , 140.76600 , 102.88700 , 70.49390 , 134.18200 , 74.01690 , 31.94400 , 34.47390 , 96.71300 , 49.39050 , 54.59310 , 161.32800 , 212.11100 , 55.44130 , 203.00600 , 37.48660 , 51.29840 , 85.79280 , 82.27650 , 22.58330 , 47.33070 , 145.81600 , 32.66220 , 138.69800 , 151.61200 , 17.19030 , 79.53840 , 437.80200 , 11.31330 , 116.39200 , 7.19167 , 12.97670 , 71.70700 , 21.57370 , 15.65570 , 33.69830 , 26.25690 , 21.24830 , 66.53090 , 13.75050 , 51.57060 , 152.96400 , 13.94050 , 52.37420 , 11.08640 , 24.07350 , 38.15330 , 108.60000 , 32.57110 , 54.38330 , 22.77100 , 19.63030 , 10.36040 , 387.34400 , 42.01700 , 39.86180 , 13.32640 , 42.84370 , 645.42800 , 21.60460 , 78.69930 , 29.11480 , 248.53900 , 200.60100 , 25.18490 , 19.62460 , 15.23350 , 100.21400 , 30.12810 , 58.87310 , 18.72470 , 11.83580 , 782.13000 , 180.03400 , 30.54340 , 205.84500 , 64.98920 , 31.48870 , 23.27390 , 138.82300 , 36.53690 , 31.47300 ] -- WAG exchangeability matrix n alphabetical order. wagExch :: ExchangeabilityMatrix wagExch = pamlToAlphaMat $ exchFromListLower n wagExchRawPaml -- WAG stationary distribution in PAML order. wagStatDistPaml :: StationaryDistribution wagStatDistPaml = normalizeSumVec 1.0 $ fromList [ 0.0866279 , 0.043972 , 0.0390894 , 0.0570451 , 0.0193078 , 0.0367281 , 0.0580589 , 0.0832518 , 0.0244313 , 0.048466 , 0.086209 , 0.0620286 , 0.0195027 , 0.0384319 , 0.0457631 , 0.0695179 , 0.0610127 , 0.0143859 , 0.0352742 , 0.0708957 ] -- WAG stationary distribution in alphabetical order. wagStatDist :: StationaryDistribution wagStatDist = pamlToAlphaVec wagStatDistPaml -- | LG substitution model. wag :: SubstitutionModel wag = substitutionModel Protein "WAG" [] wagStatDist wagExch -- | LG substitution model with maybe a name and a custom stationary distribution. wagCustom :: Maybe String -> StationaryDistribution -> SubstitutionModel wagCustom mnm d = substitutionModel Protein nm [] d wagExch where nm = fromMaybe "WAG-Custom" mnm uniformExch :: ExchangeabilityMatrix uniformExch = matrixSetDiagToZero $ matrix n $ replicate (n * n) 1.0 poissonExch :: ExchangeabilityMatrix poissonExch = uniformExch -- | Poisson substitution model. poisson :: SubstitutionModel poisson = substitutionModel Protein "Poisson" [] (uniformVec n) poissonExch -- | Poisson substitution model with maybe a name and a custom stationary distribution. poissonCustom :: Maybe String -> StationaryDistribution -> SubstitutionModel poissonCustom mnm d = substitutionModel Protein nm [] d poissonExch where nm = fromMaybe "Poisson-Custom" mnm -- | General time reversible (GTR) substitution model for amino acids. gtr20 :: [Double] -> StationaryDistribution -> SubstitutionModel gtr20 es d = substitutionModel Protein "GTR" es d e where e = exchFromListUpper n es