{-# LANGUAGE OverloadedStrings #-}
module SLynx.Simulate.PhyloModel
( getPhyloModel,
)
where
import Control.Applicative
import Control.Monad (when)
import Data.Attoparsec.ByteString.Char8
import qualified Data.ByteString.Char8 as BS
import Data.Either (rights)
import Data.Maybe
import qualified Data.Vector as V
import ELynx.Import.MarkovProcess.EDMModelPhylobayes
( EDMComponent,
)
import ELynx.MarkovProcess.AminoAcid
import ELynx.MarkovProcess.CXXModels
import qualified ELynx.MarkovProcess.MixtureModel as M
import ELynx.MarkovProcess.Nucleotide
import qualified ELynx.MarkovProcess.PhyloModel as P
import ELynx.MarkovProcess.RateMatrix
import qualified ELynx.MarkovProcess.SubstitutionModel as S
import ELynx.Tools.Equality
import ELynx.Tools.InputOutput
import Numeric.LinearAlgebra (norm_1, size, vector)
import SLynx.Simulate.Options (MixtureModelGlobalNormalization (..))
nNuc :: Int
nNuc :: Int
nNuc = Int
4
nAA :: Int
nAA :: Int
nAA = Int
20
paramsStart :: Char
paramsStart :: Char
paramsStart = Char
'['
paramsEnd :: Char
paramsEnd :: Char
paramsEnd = Char
']'
sdStart :: Char
sdStart :: Char
sdStart = Char
'{'
sdEnd :: Char
sdEnd :: Char
sdEnd = Char
'}'
mmStart :: Char
mmStart :: Char
mmStart = Char
'('
mmEnd :: Char
mmEnd :: Char
mmEnd = Char
')'
separator :: Char
separator :: Char
separator = Char
','
name :: Parser String
name :: Parser [Char]
name =
ByteString -> [Char]
BS.unpack
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser ByteString
takeWhile1 ([Char] -> Char -> Bool
notInClass [Char
paramsStart, Char
paramsEnd, Char
sdStart, Char
sdEnd, Char
mmStart, Char
mmEnd, Char
separator])
params :: Parser [Double]
params :: Parser Params
params = Char -> Parser Char
char Char
paramsStart forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser R
double forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`sepBy1` Char -> Parser Char
char Char
separator forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Char
char Char
paramsEnd
stationaryDistribution :: Parser StationaryDistribution
stationaryDistribution :: Parser (Vector R)
stationaryDistribution = do
Char
_ <- Char -> Parser Char
char Char
sdStart
Vector R
f <- Params -> Vector R
vector forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser R
double forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`sepBy1` Char -> Parser Char
char Char
separator
Char
_ <- Char -> Parser Char
char Char
sdEnd
if R -> R -> Bool
nearlyEq (forall a. Normed a => a -> R
norm_1 Vector R
f) R
1.0
then forall (m :: * -> *) a. Monad m => a -> m a
return Vector R
f
else
forall a. HasCallStack => [Char] -> a
error forall a b. (a -> b) -> a -> b
$
[Char]
"Sum of stationary distribution is "
forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show (forall a. Normed a => a -> R
norm_1 Vector R
f)
forall a. [a] -> [a] -> [a]
++ [Char]
" but should be 1.0."
assertLength :: StationaryDistribution -> Int -> a -> a
assertLength :: forall a. Vector R -> Int -> a -> a
assertLength Vector R
d Int
n a
r =
if forall (c :: * -> *) t. Container c t => c t -> IndexOf c
size Vector R
d forall a. Eq a => a -> a -> Bool
/= Int
n
then
forall a. HasCallStack => [Char] -> a
error forall a b. (a -> b) -> a -> b
$
[Char]
"Length of stationary distribution is "
forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show (forall (c :: * -> *) t. Container c t => c t -> IndexOf c
size Vector R
d)
forall a. [a] -> [a] -> [a]
++ [Char]
" but should be "
forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Int
n
forall a. [a] -> [a] -> [a]
++ [Char]
"."
else a
r
assembleSubstitutionModel ::
S.Normalize ->
String ->
Maybe S.Params ->
Maybe StationaryDistribution ->
Either String S.SubstitutionModel
assembleSubstitutionModel :: Normalize
-> [Char]
-> Maybe Params
-> Maybe (Vector R)
-> Either [Char] SubstitutionModel
assembleSubstitutionModel Normalize
nz [Char]
"JC" Maybe Params
Nothing Maybe (Vector R)
Nothing = forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ Normalize -> SubstitutionModel
jc Normalize
nz
assembleSubstitutionModel Normalize
nz [Char]
"F81" Maybe Params
Nothing (Just Vector R
d) =
forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a. Vector R -> Int -> a -> a
assertLength Vector R
d Int
nNuc forall a b. (a -> b) -> a -> b
$ Normalize -> Vector R -> SubstitutionModel
f81 Normalize
nz Vector R
d
assembleSubstitutionModel Normalize
nz [Char]
"HKY" (Just [R
k]) (Just Vector R
d) =
forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a. Vector R -> Int -> a -> a
assertLength Vector R
d Int
nNuc forall a b. (a -> b) -> a -> b
$ Normalize -> R -> Vector R -> SubstitutionModel
hky Normalize
nz R
k Vector R
d
assembleSubstitutionModel Normalize
nz [Char]
"GTR4" (Just Params
es) (Just Vector R
d) =
forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a. Vector R -> Int -> a -> a
assertLength Vector R
d Int
nNuc forall a b. (a -> b) -> a -> b
$ Normalize -> Params -> Vector R -> SubstitutionModel
gtr4 Normalize
nz Params
es Vector R
d
assembleSubstitutionModel Normalize
nz [Char]
"Poisson" Maybe Params
Nothing Maybe (Vector R)
Nothing = forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ Normalize -> SubstitutionModel
poisson Normalize
nz
assembleSubstitutionModel Normalize
nz [Char]
"Poisson-Custom" Maybe Params
Nothing (Just Vector R
d) =
forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a. Vector R -> Int -> a -> a
assertLength Vector R
d Int
nAA forall a b. (a -> b) -> a -> b
$ Maybe [Char] -> Normalize -> Vector R -> SubstitutionModel
poissonCustom forall a. Maybe a
Nothing Normalize
nz Vector R
d
assembleSubstitutionModel Normalize
nz [Char]
"LG" Maybe Params
Nothing Maybe (Vector R)
Nothing = forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ Normalize -> SubstitutionModel
lg Normalize
nz
assembleSubstitutionModel Normalize
nz [Char]
"LG-Custom" Maybe Params
Nothing (Just Vector R
d) =
forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a. Vector R -> Int -> a -> a
assertLength Vector R
d Int
nAA forall a b. (a -> b) -> a -> b
$ Maybe [Char] -> Normalize -> Vector R -> SubstitutionModel
lgCustom forall a. Maybe a
Nothing Normalize
nz Vector R
d
assembleSubstitutionModel Normalize
nz [Char]
"WAG" Maybe Params
Nothing Maybe (Vector R)
Nothing = forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ Normalize -> SubstitutionModel
wag Normalize
nz
assembleSubstitutionModel Normalize
nz [Char]
"WAG-Custom" Maybe Params
Nothing (Just Vector R
d) =
forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a. Vector R -> Int -> a -> a
assertLength Vector R
d Int
nAA forall a b. (a -> b) -> a -> b
$ Maybe [Char] -> Normalize -> Vector R -> SubstitutionModel
wagCustom forall a. Maybe a
Nothing Normalize
nz Vector R
d
assembleSubstitutionModel Normalize
nz [Char]
"GTR20" (Just Params
es) (Just Vector R
d) =
forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a. Vector R -> Int -> a -> a
assertLength Vector R
d Int
nAA forall a b. (a -> b) -> a -> b
$ Normalize -> Params -> Vector R -> SubstitutionModel
gtr20 Normalize
nz Params
es Vector R
d
assembleSubstitutionModel Normalize
nz [Char]
n Maybe Params
mps Maybe (Vector R)
mf =
forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$
[[Char]] -> [Char]
unlines
[ [Char]
"Cannot assemble substitution model.",
[Char]
"Normalize: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Normalize
nz,
[Char]
"Name: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show [Char]
n,
[Char]
"Parameters: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Maybe Params
mps,
[Char]
"Stationary distribution: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Maybe (Vector R)
mf
]
parseSubstitutionModel :: S.Normalize -> Parser S.SubstitutionModel
parseSubstitutionModel :: Normalize -> Parser SubstitutionModel
parseSubstitutionModel Normalize
nz = do
[Char]
n <- Parser [Char]
name
Maybe Params
mps <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser Params
params
Maybe (Vector R)
mf <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser (Vector R)
stationaryDistribution
let esm :: Either [Char] SubstitutionModel
esm = Normalize
-> [Char]
-> Maybe Params
-> Maybe (Vector R)
-> Either [Char] SubstitutionModel
assembleSubstitutionModel Normalize
nz [Char]
n Maybe Params
mps Maybe (Vector R)
mf
case Either [Char] SubstitutionModel
esm of
Left [Char]
err -> forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
err
Right SubstitutionModel
sm -> forall (m :: * -> *) a. Monad m => a -> m a
return SubstitutionModel
sm
edmModel :: MixtureModelGlobalNormalization -> [EDMComponent] -> Maybe [M.Weight] -> Parser M.MixtureModel
edmModel :: MixtureModelGlobalNormalization
-> [EDMComponent] -> Maybe Params -> Parser MixtureModel
edmModel MixtureModelGlobalNormalization
nz [EDMComponent]
cs Maybe Params
mws = do
ByteString
_ <- ByteString -> Parser ByteString
string ByteString
"EDM"
Char
_ <- Char -> Parser Char
char Char
mmStart
[Char]
n <- Parser [Char]
name
Maybe Params
mps <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser Params
params
Char
_ <- Char -> Parser Char
char Char
mmEnd
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (forall (t :: * -> *) a. Foldable t => t a -> Bool
null [EDMComponent]
cs) forall a b. (a -> b) -> a -> b
$ forall a. HasCallStack => [Char] -> a
error [Char]
"edmModel: no EDM components given."
let sms :: [Either [Char] SubstitutionModel]
sms = forall a b. (a -> b) -> [a] -> [b]
map (\EDMComponent
c -> Normalize
-> [Char]
-> Maybe Params
-> Maybe (Vector R)
-> Either [Char] SubstitutionModel
assembleSubstitutionModel Normalize
subNz [Char]
n Maybe Params
mps (forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ forall a b. (a, b) -> b
snd EDMComponent
c)) [EDMComponent]
cs
edmName :: [Char]
edmName = [Char]
"EDM" forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show (forall (t :: * -> *) a. Foldable t => t a -> Int
length [EDMComponent]
cs)
ws :: Params
ws = forall a. a -> Maybe a -> a
fromMaybe (forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> a
fst [EDMComponent]
cs) Maybe Params
mws
errs :: [[Char]]
errs = [[Char]
e | (Left [Char]
e) <- [Either [Char] SubstitutionModel]
sms]
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (forall (t :: * -> *) a. Foldable t => t a -> Int
length [Either [Char] SubstitutionModel]
sms forall a. Eq a => a -> a -> Bool
/= forall (t :: * -> *) a. Foldable t => t a -> Int
length Params
ws) forall a b. (a -> b) -> a -> b
$
forall a. HasCallStack => [Char] -> a
error [Char]
"edmModel: number of substitution models and weights differs."
if Bool -> Bool
not forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t a -> Bool
null [[Char]]
errs
then forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail forall a b. (a -> b) -> a -> b
$ forall a. [a] -> a
head [[Char]]
errs
else
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$
[Char]
-> Normalize
-> Vector R
-> Vector SubstitutionModel
-> MixtureModel
M.fromSubstitutionModels [Char]
edmName Normalize
mmNz (forall a. [a] -> Vector a
V.fromList Params
ws) (forall a. [a] -> Vector a
V.fromList forall a b. (a -> b) -> a -> b
$ forall a b. [Either a b] -> [b]
rights [Either [Char] SubstitutionModel]
sms)
where
(Normalize
subNz, Normalize
mmNz) = case MixtureModelGlobalNormalization
nz of
MixtureModelGlobalNormalization
GlobalNormalization -> (Normalize
S.DoNotNormalize, Normalize
S.DoNormalize)
MixtureModelGlobalNormalization
LocalNormalization -> (Normalize
S.DoNormalize, Normalize
S.DoNotNormalize)
cxxModel :: MixtureModelGlobalNormalization -> Maybe [M.Weight] -> Parser M.MixtureModel
cxxModel :: MixtureModelGlobalNormalization
-> Maybe Params -> Parser MixtureModel
cxxModel MixtureModelGlobalNormalization
LocalNormalization Maybe Params
_ = forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
"Local normalization impossible with CXX models."
cxxModel MixtureModelGlobalNormalization
_ Maybe Params
mws = do
Char
_ <- Char -> Parser Char
char Char
'C'
Int
n <- forall a. Integral a => Parser a
decimal :: Parser Int
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Int -> Maybe Params -> MixtureModel
cxx Int
n Maybe Params
mws
standardMixtureModel :: MixtureModelGlobalNormalization -> [M.Weight] -> Parser M.MixtureModel
standardMixtureModel :: MixtureModelGlobalNormalization -> Params -> Parser MixtureModel
standardMixtureModel MixtureModelGlobalNormalization
nz Params
ws = do
ByteString
_ <- ByteString -> Parser ByteString
string ByteString
"MIXTURE"
Char
_ <- Char -> Parser Char
char Char
mmStart
[SubstitutionModel]
sms <- Normalize -> Parser SubstitutionModel
parseSubstitutionModel Normalize
subNz forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`sepBy1` Char -> Parser Char
char Char
separator
Char
_ <- Char -> Parser Char
char Char
mmEnd
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [Char]
-> Normalize
-> Vector R
-> Vector SubstitutionModel
-> MixtureModel
M.fromSubstitutionModels [Char]
"MIXTURE" Normalize
mmNz (forall a. [a] -> Vector a
V.fromList Params
ws) (forall a. [a] -> Vector a
V.fromList [SubstitutionModel]
sms)
where
(Normalize
subNz, Normalize
mmNz) = case MixtureModelGlobalNormalization
nz of
MixtureModelGlobalNormalization
GlobalNormalization -> (Normalize
S.DoNotNormalize, Normalize
S.DoNormalize)
MixtureModelGlobalNormalization
LocalNormalization -> (Normalize
S.DoNormalize, Normalize
S.DoNotNormalize)
mixtureModel :: MixtureModelGlobalNormalization -> Maybe [EDMComponent] -> Maybe [M.Weight] -> Parser M.MixtureModel
mixtureModel :: MixtureModelGlobalNormalization
-> Maybe [EDMComponent] -> Maybe Params -> Parser MixtureModel
mixtureModel MixtureModelGlobalNormalization
nz Maybe [EDMComponent]
Nothing Maybe Params
Nothing = forall i a. Parser i a -> Parser i a
try (MixtureModelGlobalNormalization
-> Maybe Params -> Parser MixtureModel
cxxModel MixtureModelGlobalNormalization
nz forall a. Maybe a
Nothing) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
"No weights provided."
mixtureModel MixtureModelGlobalNormalization
nz Maybe [EDMComponent]
Nothing mws :: Maybe Params
mws@(Just Params
ws) = forall i a. Parser i a -> Parser i a
try (MixtureModelGlobalNormalization
-> Maybe Params -> Parser MixtureModel
cxxModel MixtureModelGlobalNormalization
nz Maybe Params
mws) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> MixtureModelGlobalNormalization -> Params -> Parser MixtureModel
standardMixtureModel MixtureModelGlobalNormalization
nz Params
ws
mixtureModel MixtureModelGlobalNormalization
nz (Just [EDMComponent]
cs) Maybe Params
mws = MixtureModelGlobalNormalization
-> [EDMComponent] -> Maybe Params -> Parser MixtureModel
edmModel MixtureModelGlobalNormalization
nz [EDMComponent]
cs Maybe Params
mws
getPhyloModel ::
Maybe String ->
Maybe String ->
MixtureModelGlobalNormalization ->
Maybe [M.Weight] ->
Maybe [EDMComponent] ->
Either String P.PhyloModel
getPhyloModel :: Maybe [Char]
-> Maybe [Char]
-> MixtureModelGlobalNormalization
-> Maybe Params
-> Maybe [EDMComponent]
-> Either [Char] PhyloModel
getPhyloModel Maybe [Char]
Nothing Maybe [Char]
Nothing MixtureModelGlobalNormalization
_ Maybe Params
_ Maybe [EDMComponent]
_ = forall a b. a -> Either a b
Left [Char]
"No model was given. See help."
getPhyloModel (Just [Char]
_) (Just [Char]
_) MixtureModelGlobalNormalization
_ Maybe Params
_ Maybe [EDMComponent]
_ =
forall a b. a -> Either a b
Left [Char]
"Both, substitution and mixture model string given; use only one."
getPhyloModel (Just [Char]
s) Maybe [Char]
Nothing MixtureModelGlobalNormalization
nz Maybe Params
Nothing Maybe [EDMComponent]
Nothing
| MixtureModelGlobalNormalization
nz forall a. Eq a => a -> a -> Bool
== MixtureModelGlobalNormalization
GlobalNormalization = forall a b. a -> Either a b
Left [Char]
"Global normalization not possible for substitution models."
| Bool
otherwise = forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ SubstitutionModel -> PhyloModel
P.SubstitutionModel forall a b. (a -> b) -> a -> b
$ forall a. Parser a -> [Char] -> a
parseStringWith (Normalize -> Parser SubstitutionModel
parseSubstitutionModel Normalize
S.DoNormalize) [Char]
s
getPhyloModel (Just [Char]
_) Maybe [Char]
Nothing MixtureModelGlobalNormalization
_ (Just Params
_) Maybe [EDMComponent]
_ =
forall a b. a -> Either a b
Left [Char]
"Weights given; but cannot be used with substitution model."
getPhyloModel (Just [Char]
_) Maybe [Char]
Nothing MixtureModelGlobalNormalization
_ Maybe Params
_ (Just [EDMComponent]
_) =
let msg1 :: [Char]
msg1 = [Char]
"Empirical distribution mixture model components given;"
msg2 :: [Char]
msg2 = [Char]
" but cannot be used with substitution model."
in forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ [Char]
msg1 forall a. Semigroup a => a -> a -> a
<> [Char]
msg2
getPhyloModel Maybe [Char]
Nothing (Just [Char]
m) MixtureModelGlobalNormalization
nz Maybe Params
mws Maybe [EDMComponent]
mcs =
forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ MixtureModel -> PhyloModel
P.MixtureModel forall a b. (a -> b) -> a -> b
$ forall a. Parser a -> [Char] -> a
parseStringWith (MixtureModelGlobalNormalization
-> Maybe [EDMComponent] -> Maybe Params -> Parser MixtureModel
mixtureModel MixtureModelGlobalNormalization
nz Maybe [EDMComponent]
mcs Maybe Params
mws) [Char]
m