{-# LANGUAGE OverloadedStrings #-}
module SLynx.Simulate.PhyloModel
( getPhyloModel,
)
where
import Control.Applicative
import Control.Monad (when)
import Data.Attoparsec.ByteString.Char8
import qualified Data.ByteString.Char8 as BS
import Data.Either (rights)
import Data.List.NonEmpty (fromList)
import Data.Maybe
import ELynx.Data.MarkovProcess.AminoAcid
import ELynx.Data.MarkovProcess.CXXModels
import qualified ELynx.Data.MarkovProcess.MixtureModel as M
import ELynx.Data.MarkovProcess.Nucleotide
import qualified ELynx.Data.MarkovProcess.PhyloModel as P
import ELynx.Data.MarkovProcess.RateMatrix
import qualified ELynx.Data.MarkovProcess.SubstitutionModel as S
import ELynx.Import.MarkovProcess.EDMModelPhylobayes
( EDMComponent,
)
import ELynx.Tools
import Numeric.LinearAlgebra
( norm_1,
size,
vector,
)
nNuc :: Int
nNuc = 4
nAA :: Int
nAA = 20
paramsStart :: Char
paramsStart = '['
paramsEnd :: Char
paramsEnd = ']'
sdStart :: Char
sdStart = '{'
sdEnd :: Char
sdEnd = '}'
mmStart :: Char
mmStart = '('
mmEnd :: Char
mmEnd = ')'
separator :: Char
separator = ','
name :: Parser String
name =
BS.unpack
<$> takeWhile1 (notInClass [paramsStart, paramsEnd, sdStart, sdEnd, mmStart, mmEnd, separator])
params :: Parser [Double]
params = char paramsStart *> double `sepBy1` char separator <* char paramsEnd
stationaryDistribution :: Parser StationaryDistribution
stationaryDistribution = do
_ <- char sdStart
f <- vector <$> double `sepBy1` char separator
_ <- char sdEnd
if nearlyEq (norm_1 f) 1.0
then return f
else
error $
"Sum of stationary distribution is "
++ show (norm_1 f)
++ " but should be 1.0."
assertLength :: StationaryDistribution -> Int -> a -> a
assertLength d n r =
if size d /= n
then
error $
"Length of stationary distribution is "
++ show (size d)
++ " but should be "
++ show n
++ "."
else r
assembleSubstitutionModel ::
String ->
Maybe S.Params ->
Maybe StationaryDistribution ->
Either String S.SubstitutionModel
assembleSubstitutionModel "JC" Nothing Nothing = Right jc
assembleSubstitutionModel "F81" Nothing (Just d) =
Right $ assertLength d nNuc $ f81 d
assembleSubstitutionModel "HKY" (Just [k]) (Just d) =
Right $ assertLength d nNuc $ hky k d
assembleSubstitutionModel "GTR4" (Just es) (Just d) =
Right $ assertLength d nNuc $ gtr4 es d
assembleSubstitutionModel "Poisson" Nothing Nothing = Right poisson
assembleSubstitutionModel "Poisson-Custom" Nothing (Just d) =
Right $ assertLength d nAA $ poissonCustom Nothing d
assembleSubstitutionModel "LG" Nothing Nothing = Right lg
assembleSubstitutionModel "LG-Custom" Nothing (Just d) =
Right $ assertLength d nAA $ lgCustom Nothing d
assembleSubstitutionModel "WAG" Nothing Nothing = Right wag
assembleSubstitutionModel "WAG-Custom" Nothing (Just d) =
Right $ assertLength d nAA $ wagCustom Nothing d
assembleSubstitutionModel "GTR20" (Just es) (Just d) =
Right $ assertLength d nAA $ gtr20 es d
assembleSubstitutionModel n mps mf =
Left $
unlines
[ "Cannot assemble substitution model.",
"Name: " ++ show n,
"Parameters: " ++ show mps,
"Stationary distribution: " ++ show mf
]
parseSubstitutionModel :: Parser S.SubstitutionModel
parseSubstitutionModel = do
n <- name
mps <- optional params
mf <- optional stationaryDistribution
let esm = assembleSubstitutionModel n mps mf
case esm of
Left err -> fail err
Right sm -> return sm
edmModel :: [EDMComponent] -> Maybe [M.Weight] -> Parser M.MixtureModel
edmModel cs mws = do
_ <- string "EDM"
_ <- char mmStart
n <- name
mps <- optional params
_ <- char mmEnd
when (null cs) $ error "edmModel: no EDM components given."
let sms = map (\c -> assembleSubstitutionModel n mps (Just $ snd c)) cs
edmName = "EDM" ++ show (length cs)
ws = fromMaybe (map fst cs) mws
errs = [e | (Left e) <- sms]
when (length sms /= length ws) $
error "edmModel: number of substitution models and weights differs."
if not $ null errs
then fail $ head errs
else
return $
M.fromSubstitutionModels edmName (fromList ws) (fromList $ rights sms)
cxxModel :: Maybe [M.Weight] -> Parser M.MixtureModel
cxxModel mws = do
_ <- char 'C'
n <- decimal :: Parser Int
return $ cxx n mws
standardMixtureModel :: [M.Weight] -> Parser M.MixtureModel
standardMixtureModel ws = do
_ <- string "MIXTURE"
_ <- char mmStart
sms <- parseSubstitutionModel `sepBy1` char separator
_ <- char mmEnd
return $ M.fromSubstitutionModels "MIXTURE" (fromList ws) (fromList sms)
mixtureModel ::
Maybe [EDMComponent] -> Maybe [M.Weight] -> Parser M.MixtureModel
mixtureModel Nothing Nothing =
try (cxxModel Nothing) <|> fail "No weights provided."
mixtureModel Nothing mws@(Just ws) =
try (cxxModel mws) <|> standardMixtureModel ws
mixtureModel (Just cs) mws = edmModel cs mws
getPhyloModel ::
Maybe String ->
Maybe String ->
Maybe [M.Weight] ->
Maybe [EDMComponent] ->
Either String P.PhyloModel
getPhyloModel Nothing Nothing _ _ = Left "No model was given. See help."
getPhyloModel (Just _) (Just _) _ _ =
Left "Both, substitution and mixture model string given; use only one."
getPhyloModel (Just s) Nothing Nothing Nothing =
Right $
P.SubstitutionModel $
parseStringWith
parseSubstitutionModel
s
getPhyloModel (Just _) Nothing (Just _) _ =
Left "Weights given; but cannot be used with substitution model."
getPhyloModel (Just _) Nothing _ (Just _) =
Left
"Empirical distribution mixture model components given; but cannot be used with substitution model."
getPhyloModel Nothing (Just m) mws mcs =
Right $
P.MixtureModel $
parseStringWith
(mixtureModel mcs mws)
m