{-# LANGUAGE OverloadedStrings #-}
module SLynx.Simulate.PhyloModel
( getPhyloModel,
)
where
import Control.Applicative
import Control.Monad (when)
import Data.Attoparsec.ByteString.Char8
import qualified Data.ByteString.Char8 as BS
import Data.Either (rights)
import Data.Maybe
import qualified Data.Vector as V
import ELynx.Import.MarkovProcess.EDMModelPhylobayes
( EDMComponent,
)
import ELynx.MarkovProcess.AminoAcid
import ELynx.MarkovProcess.CXXModels
import qualified ELynx.MarkovProcess.MixtureModel as M
import ELynx.MarkovProcess.Nucleotide
import qualified ELynx.MarkovProcess.PhyloModel as P
import ELynx.MarkovProcess.RateMatrix
import qualified ELynx.MarkovProcess.SubstitutionModel as S
import ELynx.Tools.Equality
import ELynx.Tools.InputOutput
import Numeric.LinearAlgebra
( norm_1,
size,
vector,
)
nNuc :: Int
nNuc :: Int
nNuc = Int
4
nAA :: Int
nAA :: Int
nAA = Int
20
paramsStart :: Char
paramsStart :: Char
paramsStart = Char
'['
paramsEnd :: Char
paramsEnd :: Char
paramsEnd = Char
']'
sdStart :: Char
sdStart :: Char
sdStart = Char
'{'
sdEnd :: Char
sdEnd :: Char
sdEnd = Char
'}'
mmStart :: Char
mmStart :: Char
mmStart = Char
'('
mmEnd :: Char
mmEnd :: Char
mmEnd = Char
')'
separator :: Char
separator :: Char
separator = Char
','
name :: Parser String
name :: Parser [Char]
name =
ByteString -> [Char]
BS.unpack
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser ByteString
takeWhile1 ([Char] -> Char -> Bool
notInClass [Char
paramsStart, Char
paramsEnd, Char
sdStart, Char
sdEnd, Char
mmStart, Char
mmEnd, Char
separator])
params :: Parser [Double]
params :: Parser Params
params = Char -> Parser Char
char Char
paramsStart forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser R
double forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`sepBy1` Char -> Parser Char
char Char
separator forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Char
char Char
paramsEnd
stationaryDistribution :: Parser StationaryDistribution
stationaryDistribution :: Parser (Vector R)
stationaryDistribution = do
Char
_ <- Char -> Parser Char
char Char
sdStart
Vector R
f <- Params -> Vector R
vector forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser R
double forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`sepBy1` Char -> Parser Char
char Char
separator
Char
_ <- Char -> Parser Char
char Char
sdEnd
if R -> R -> Bool
nearlyEq (forall a. Normed a => a -> R
norm_1 Vector R
f) R
1.0
then forall (m :: * -> *) a. Monad m => a -> m a
return Vector R
f
else
forall a. HasCallStack => [Char] -> a
error forall a b. (a -> b) -> a -> b
$
[Char]
"Sum of stationary distribution is "
forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show (forall a. Normed a => a -> R
norm_1 Vector R
f)
forall a. [a] -> [a] -> [a]
++ [Char]
" but should be 1.0."
assertLength :: StationaryDistribution -> Int -> a -> a
assertLength :: forall a. Vector R -> Int -> a -> a
assertLength Vector R
d Int
n a
r =
if forall (c :: * -> *) t. Container c t => c t -> IndexOf c
size Vector R
d forall a. Eq a => a -> a -> Bool
/= Int
n
then
forall a. HasCallStack => [Char] -> a
error forall a b. (a -> b) -> a -> b
$
[Char]
"Length of stationary distribution is "
forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show (forall (c :: * -> *) t. Container c t => c t -> IndexOf c
size Vector R
d)
forall a. [a] -> [a] -> [a]
++ [Char]
" but should be "
forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Int
n
forall a. [a] -> [a] -> [a]
++ [Char]
"."
else a
r
assembleSubstitutionModel ::
String ->
Maybe S.Params ->
Maybe StationaryDistribution ->
Either String S.SubstitutionModel
assembleSubstitutionModel :: [Char]
-> Maybe Params
-> Maybe (Vector R)
-> Either [Char] SubstitutionModel
assembleSubstitutionModel [Char]
"JC" Maybe Params
Nothing Maybe (Vector R)
Nothing = forall a b. b -> Either a b
Right SubstitutionModel
jc
assembleSubstitutionModel [Char]
"F81" Maybe Params
Nothing (Just Vector R
d) =
forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a. Vector R -> Int -> a -> a
assertLength Vector R
d Int
nNuc forall a b. (a -> b) -> a -> b
$ Vector R -> SubstitutionModel
f81 Vector R
d
assembleSubstitutionModel [Char]
"HKY" (Just [R
k]) (Just Vector R
d) =
forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a. Vector R -> Int -> a -> a
assertLength Vector R
d Int
nNuc forall a b. (a -> b) -> a -> b
$ R -> Vector R -> SubstitutionModel
hky R
k Vector R
d
assembleSubstitutionModel [Char]
"GTR4" (Just Params
es) (Just Vector R
d) =
forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a. Vector R -> Int -> a -> a
assertLength Vector R
d Int
nNuc forall a b. (a -> b) -> a -> b
$ Params -> Vector R -> SubstitutionModel
gtr4 Params
es Vector R
d
assembleSubstitutionModel [Char]
"Poisson" Maybe Params
Nothing Maybe (Vector R)
Nothing = forall a b. b -> Either a b
Right SubstitutionModel
poisson
assembleSubstitutionModel [Char]
"Poisson-Custom" Maybe Params
Nothing (Just Vector R
d) =
forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a. Vector R -> Int -> a -> a
assertLength Vector R
d Int
nAA forall a b. (a -> b) -> a -> b
$ Maybe [Char] -> Vector R -> SubstitutionModel
poissonCustom forall a. Maybe a
Nothing Vector R
d
assembleSubstitutionModel [Char]
"LG" Maybe Params
Nothing Maybe (Vector R)
Nothing = forall a b. b -> Either a b
Right SubstitutionModel
lg
assembleSubstitutionModel [Char]
"LG-Custom" Maybe Params
Nothing (Just Vector R
d) =
forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a. Vector R -> Int -> a -> a
assertLength Vector R
d Int
nAA forall a b. (a -> b) -> a -> b
$ Maybe [Char] -> Vector R -> SubstitutionModel
lgCustom forall a. Maybe a
Nothing Vector R
d
assembleSubstitutionModel [Char]
"WAG" Maybe Params
Nothing Maybe (Vector R)
Nothing = forall a b. b -> Either a b
Right SubstitutionModel
wag
assembleSubstitutionModel [Char]
"WAG-Custom" Maybe Params
Nothing (Just Vector R
d) =
forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a. Vector R -> Int -> a -> a
assertLength Vector R
d Int
nAA forall a b. (a -> b) -> a -> b
$ Maybe [Char] -> Vector R -> SubstitutionModel
wagCustom forall a. Maybe a
Nothing Vector R
d
assembleSubstitutionModel [Char]
"GTR20" (Just Params
es) (Just Vector R
d) =
forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a. Vector R -> Int -> a -> a
assertLength Vector R
d Int
nAA forall a b. (a -> b) -> a -> b
$ Params -> Vector R -> SubstitutionModel
gtr20 Params
es Vector R
d
assembleSubstitutionModel [Char]
n Maybe Params
mps Maybe (Vector R)
mf =
forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$
[[Char]] -> [Char]
unlines
[ [Char]
"Cannot assemble substitution model.",
[Char]
"Name: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show [Char]
n,
[Char]
"Parameters: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Maybe Params
mps,
[Char]
"Stationary distribution: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Maybe (Vector R)
mf
]
parseSubstitutionModel :: Parser S.SubstitutionModel
parseSubstitutionModel :: Parser SubstitutionModel
parseSubstitutionModel = do
[Char]
n <- Parser [Char]
name
Maybe Params
mps <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser Params
params
Maybe (Vector R)
mf <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser (Vector R)
stationaryDistribution
let esm :: Either [Char] SubstitutionModel
esm = [Char]
-> Maybe Params
-> Maybe (Vector R)
-> Either [Char] SubstitutionModel
assembleSubstitutionModel [Char]
n Maybe Params
mps Maybe (Vector R)
mf
case Either [Char] SubstitutionModel
esm of
Left [Char]
err -> forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
err
Right SubstitutionModel
sm -> forall (m :: * -> *) a. Monad m => a -> m a
return SubstitutionModel
sm
edmModel :: [EDMComponent] -> Maybe [M.Weight] -> Parser M.MixtureModel
edmModel :: [EDMComponent] -> Maybe Params -> Parser MixtureModel
edmModel [EDMComponent]
cs Maybe Params
mws = do
ByteString
_ <- ByteString -> Parser ByteString
string ByteString
"EDM"
Char
_ <- Char -> Parser Char
char Char
mmStart
[Char]
n <- Parser [Char]
name
Maybe Params
mps <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser Params
params
Char
_ <- Char -> Parser Char
char Char
mmEnd
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (forall (t :: * -> *) a. Foldable t => t a -> Bool
null [EDMComponent]
cs) forall a b. (a -> b) -> a -> b
$ forall a. HasCallStack => [Char] -> a
error [Char]
"edmModel: no EDM components given."
let sms :: [Either [Char] SubstitutionModel]
sms = forall a b. (a -> b) -> [a] -> [b]
map (\EDMComponent
c -> [Char]
-> Maybe Params
-> Maybe (Vector R)
-> Either [Char] SubstitutionModel
assembleSubstitutionModel [Char]
n Maybe Params
mps (forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ forall a b. (a, b) -> b
snd EDMComponent
c)) [EDMComponent]
cs
edmName :: [Char]
edmName = [Char]
"EDM" forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show (forall (t :: * -> *) a. Foldable t => t a -> Int
length [EDMComponent]
cs)
ws :: Params
ws = forall a. a -> Maybe a -> a
fromMaybe (forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> a
fst [EDMComponent]
cs) Maybe Params
mws
errs :: [[Char]]
errs = [[Char]
e | (Left [Char]
e) <- [Either [Char] SubstitutionModel]
sms]
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (forall (t :: * -> *) a. Foldable t => t a -> Int
length [Either [Char] SubstitutionModel]
sms forall a. Eq a => a -> a -> Bool
/= forall (t :: * -> *) a. Foldable t => t a -> Int
length Params
ws) forall a b. (a -> b) -> a -> b
$
forall a. HasCallStack => [Char] -> a
error [Char]
"edmModel: number of substitution models and weights differs."
if Bool -> Bool
not forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t a -> Bool
null [[Char]]
errs
then forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail forall a b. (a -> b) -> a -> b
$ forall a. [a] -> a
head [[Char]]
errs
else
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$
[Char] -> Vector R -> Vector SubstitutionModel -> MixtureModel
M.fromSubstitutionModels [Char]
edmName (forall a. [a] -> Vector a
V.fromList Params
ws) (forall a. [a] -> Vector a
V.fromList forall a b. (a -> b) -> a -> b
$ forall a b. [Either a b] -> [b]
rights [Either [Char] SubstitutionModel]
sms)
cxxModel :: Maybe [M.Weight] -> Parser M.MixtureModel
cxxModel :: Maybe Params -> Parser MixtureModel
cxxModel Maybe Params
mws = do
Char
_ <- Char -> Parser Char
char Char
'C'
Int
n <- forall a. Integral a => Parser a
decimal :: Parser Int
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Int -> Maybe Params -> MixtureModel
cxx Int
n Maybe Params
mws
standardMixtureModel :: [M.Weight] -> Parser M.MixtureModel
standardMixtureModel :: Params -> Parser MixtureModel
standardMixtureModel Params
ws = do
ByteString
_ <- ByteString -> Parser ByteString
string ByteString
"MIXTURE"
Char
_ <- Char -> Parser Char
char Char
mmStart
[SubstitutionModel]
sms <- Parser SubstitutionModel
parseSubstitutionModel forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`sepBy1` Char -> Parser Char
char Char
separator
Char
_ <- Char -> Parser Char
char Char
mmEnd
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [Char] -> Vector R -> Vector SubstitutionModel -> MixtureModel
M.fromSubstitutionModels [Char]
"MIXTURE" (forall a. [a] -> Vector a
V.fromList Params
ws) (forall a. [a] -> Vector a
V.fromList [SubstitutionModel]
sms)
mixtureModel ::
Maybe [EDMComponent] -> Maybe [M.Weight] -> Parser M.MixtureModel
mixtureModel :: Maybe [EDMComponent] -> Maybe Params -> Parser MixtureModel
mixtureModel Maybe [EDMComponent]
Nothing Maybe Params
Nothing =
forall i a. Parser i a -> Parser i a
try (Maybe Params -> Parser MixtureModel
cxxModel forall a. Maybe a
Nothing) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
"No weights provided."
mixtureModel Maybe [EDMComponent]
Nothing mws :: Maybe Params
mws@(Just Params
ws) =
forall i a. Parser i a -> Parser i a
try (Maybe Params -> Parser MixtureModel
cxxModel Maybe Params
mws) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Params -> Parser MixtureModel
standardMixtureModel Params
ws
mixtureModel (Just [EDMComponent]
cs) Maybe Params
mws = [EDMComponent] -> Maybe Params -> Parser MixtureModel
edmModel [EDMComponent]
cs Maybe Params
mws
getPhyloModel ::
Maybe String ->
Maybe String ->
Maybe [M.Weight] ->
Maybe [EDMComponent] ->
Either String P.PhyloModel
getPhyloModel :: Maybe [Char]
-> Maybe [Char]
-> Maybe Params
-> Maybe [EDMComponent]
-> Either [Char] PhyloModel
getPhyloModel Maybe [Char]
Nothing Maybe [Char]
Nothing Maybe Params
_ Maybe [EDMComponent]
_ = forall a b. a -> Either a b
Left [Char]
"No model was given. See help."
getPhyloModel (Just [Char]
_) (Just [Char]
_) Maybe Params
_ Maybe [EDMComponent]
_ =
forall a b. a -> Either a b
Left [Char]
"Both, substitution and mixture model string given; use only one."
getPhyloModel (Just [Char]
s) Maybe [Char]
Nothing Maybe Params
Nothing Maybe [EDMComponent]
Nothing =
forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$
SubstitutionModel -> PhyloModel
P.SubstitutionModel forall a b. (a -> b) -> a -> b
$
forall a. Parser a -> [Char] -> a
parseStringWith
Parser SubstitutionModel
parseSubstitutionModel
[Char]
s
getPhyloModel (Just [Char]
_) Maybe [Char]
Nothing (Just Params
_) Maybe [EDMComponent]
_ =
forall a b. a -> Either a b
Left [Char]
"Weights given; but cannot be used with substitution model."
getPhyloModel (Just [Char]
_) Maybe [Char]
Nothing Maybe Params
_ (Just [EDMComponent]
_) =
forall a b. a -> Either a b
Left
[Char]
"Empirical distribution mixture model components given; but cannot be used with substitution model."
getPhyloModel Maybe [Char]
Nothing (Just [Char]
m) Maybe Params
mws Maybe [EDMComponent]
mcs =
forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$
MixtureModel -> PhyloModel
P.MixtureModel forall a b. (a -> b) -> a -> b
$
forall a. Parser a -> [Char] -> a
parseStringWith
(Maybe [EDMComponent] -> Maybe Params -> Parser MixtureModel
mixtureModel Maybe [EDMComponent]
mcs Maybe Params
mws)
[Char]
m