{-# LANGUAGE OverloadedStrings #-}

-- |
-- Module      :  SLynx.Simulate.PhyloModel
-- Description :  Parse and interpret the model string
-- Copyright   :  2021 Dominik Schrempf
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Fri Feb  1 13:32:16 2019.
module SLynx.Simulate.PhyloModel
  ( getPhyloModel,
  )
where

import Control.Applicative
import Control.Monad (when)
import Data.Attoparsec.ByteString.Char8
import qualified Data.ByteString.Char8 as BS
import Data.Either (rights)
import Data.Maybe
import qualified Data.Vector as V
import ELynx.Import.MarkovProcess.EDMModelPhylobayes
  ( EDMComponent,
  )
import ELynx.MarkovProcess.AminoAcid
import ELynx.MarkovProcess.CXXModels
import qualified ELynx.MarkovProcess.MixtureModel as M
import ELynx.MarkovProcess.Nucleotide
import qualified ELynx.MarkovProcess.PhyloModel as P
import ELynx.MarkovProcess.RateMatrix
import qualified ELynx.MarkovProcess.SubstitutionModel as S
import ELynx.Tools.Equality
import ELynx.Tools.InputOutput
import Numeric.LinearAlgebra
  ( norm_1,
    size,
    vector,
  )

nNuc :: Int
-- nNuc = length (alphabet :: [Nucleotide])
nNuc :: Int
nNuc = Int
4

nAA :: Int
-- nAA = length (alphabet :: [AminoAcid])
nAA :: Int
nAA = Int
20

-- Model parameters between square brackets.
paramsStart :: Char
paramsStart :: Char
paramsStart = Char
'['

paramsEnd :: Char
paramsEnd :: Char
paramsEnd = Char
']'

-- Stationary distribution between curly brackets.
sdStart :: Char
sdStart :: Char
sdStart = Char
'{'

sdEnd :: Char
sdEnd :: Char
sdEnd = Char
'}'

-- Mixture model components between round brackets.
mmStart :: Char
mmStart :: Char
mmStart = Char
'('

mmEnd :: Char
mmEnd :: Char
mmEnd = Char
')'

separator :: Char
separator :: Char
separator = Char
','

name :: Parser String
name :: Parser [Char]
name =
  ByteString -> [Char]
BS.unpack
    forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser ByteString
takeWhile1 ([Char] -> Char -> Bool
notInClass [Char
paramsStart, Char
paramsEnd, Char
sdStart, Char
sdEnd, Char
mmStart, Char
mmEnd, Char
separator])

params :: Parser [Double]
params :: Parser Params
params = Char -> Parser Char
char Char
paramsStart forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser R
double forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`sepBy1` Char -> Parser Char
char Char
separator forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Char
char Char
paramsEnd

stationaryDistribution :: Parser StationaryDistribution
stationaryDistribution :: Parser (Vector R)
stationaryDistribution = do
  Char
_ <- Char -> Parser Char
char Char
sdStart
  Vector R
f <- Params -> Vector R
vector forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser R
double forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`sepBy1` Char -> Parser Char
char Char
separator
  Char
_ <- Char -> Parser Char
char Char
sdEnd
  if R -> R -> Bool
nearlyEq (forall a. Normed a => a -> R
norm_1 Vector R
f) R
1.0
    then forall (m :: * -> *) a. Monad m => a -> m a
return Vector R
f
    else
      forall a. HasCallStack => [Char] -> a
error forall a b. (a -> b) -> a -> b
$
        [Char]
"Sum of stationary distribution is "
          forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show (forall a. Normed a => a -> R
norm_1 Vector R
f)
          forall a. [a] -> [a] -> [a]
++ [Char]
" but should be 1.0."

assertLength :: StationaryDistribution -> Int -> a -> a
assertLength :: forall a. Vector R -> Int -> a -> a
assertLength Vector R
d Int
n a
r =
  if forall (c :: * -> *) t. Container c t => c t -> IndexOf c
size Vector R
d forall a. Eq a => a -> a -> Bool
/= Int
n
    then
      forall a. HasCallStack => [Char] -> a
error forall a b. (a -> b) -> a -> b
$
        [Char]
"Length of stationary distribution is "
          forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show (forall (c :: * -> *) t. Container c t => c t -> IndexOf c
size Vector R
d)
          forall a. [a] -> [a] -> [a]
++ [Char]
" but should be "
          forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Int
n
          forall a. [a] -> [a] -> [a]
++ [Char]
"."
    else a
r

-- This is the main function that connects the model string, the parameters and
-- the stationary distribution. It should check that the model is valid.
assembleSubstitutionModel ::
  String ->
  Maybe S.Params ->
  Maybe StationaryDistribution ->
  Either String S.SubstitutionModel
-- DNA models.
assembleSubstitutionModel :: [Char]
-> Maybe Params
-> Maybe (Vector R)
-> Either [Char] SubstitutionModel
assembleSubstitutionModel [Char]
"JC" Maybe Params
Nothing Maybe (Vector R)
Nothing = forall a b. b -> Either a b
Right SubstitutionModel
jc
assembleSubstitutionModel [Char]
"F81" Maybe Params
Nothing (Just Vector R
d) =
  forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a. Vector R -> Int -> a -> a
assertLength Vector R
d Int
nNuc forall a b. (a -> b) -> a -> b
$ Vector R -> SubstitutionModel
f81 Vector R
d
assembleSubstitutionModel [Char]
"HKY" (Just [R
k]) (Just Vector R
d) =
  forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a. Vector R -> Int -> a -> a
assertLength Vector R
d Int
nNuc forall a b. (a -> b) -> a -> b
$ R -> Vector R -> SubstitutionModel
hky R
k Vector R
d
assembleSubstitutionModel [Char]
"GTR4" (Just Params
es) (Just Vector R
d) =
  forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a. Vector R -> Int -> a -> a
assertLength Vector R
d Int
nNuc forall a b. (a -> b) -> a -> b
$ Params -> Vector R -> SubstitutionModel
gtr4 Params
es Vector R
d
-- Protein models.
assembleSubstitutionModel [Char]
"Poisson" Maybe Params
Nothing Maybe (Vector R)
Nothing = forall a b. b -> Either a b
Right SubstitutionModel
poisson
assembleSubstitutionModel [Char]
"Poisson-Custom" Maybe Params
Nothing (Just Vector R
d) =
  forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a. Vector R -> Int -> a -> a
assertLength Vector R
d Int
nAA forall a b. (a -> b) -> a -> b
$ Maybe [Char] -> Vector R -> SubstitutionModel
poissonCustom forall a. Maybe a
Nothing Vector R
d
assembleSubstitutionModel [Char]
"LG" Maybe Params
Nothing Maybe (Vector R)
Nothing = forall a b. b -> Either a b
Right SubstitutionModel
lg
assembleSubstitutionModel [Char]
"LG-Custom" Maybe Params
Nothing (Just Vector R
d) =
  forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a. Vector R -> Int -> a -> a
assertLength Vector R
d Int
nAA forall a b. (a -> b) -> a -> b
$ Maybe [Char] -> Vector R -> SubstitutionModel
lgCustom forall a. Maybe a
Nothing Vector R
d
assembleSubstitutionModel [Char]
"WAG" Maybe Params
Nothing Maybe (Vector R)
Nothing = forall a b. b -> Either a b
Right SubstitutionModel
wag
assembleSubstitutionModel [Char]
"WAG-Custom" Maybe Params
Nothing (Just Vector R
d) =
  forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a. Vector R -> Int -> a -> a
assertLength Vector R
d Int
nAA forall a b. (a -> b) -> a -> b
$ Maybe [Char] -> Vector R -> SubstitutionModel
wagCustom forall a. Maybe a
Nothing Vector R
d
assembleSubstitutionModel [Char]
"GTR20" (Just Params
es) (Just Vector R
d) =
  forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a. Vector R -> Int -> a -> a
assertLength Vector R
d Int
nAA forall a b. (a -> b) -> a -> b
$ Params -> Vector R -> SubstitutionModel
gtr20 Params
es Vector R
d
-- Ohterwisse, we cannot assemble the model.
assembleSubstitutionModel [Char]
n Maybe Params
mps Maybe (Vector R)
mf =
  forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$
    [[Char]] -> [Char]
unlines
      [ [Char]
"Cannot assemble substitution model.",
        [Char]
"Name: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show [Char]
n,
        [Char]
"Parameters: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Maybe Params
mps,
        [Char]
"Stationary distribution: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Maybe (Vector R)
mf
      ]

parseSubstitutionModel :: Parser S.SubstitutionModel
parseSubstitutionModel :: Parser SubstitutionModel
parseSubstitutionModel = do
  [Char]
n <- Parser [Char]
name
  Maybe Params
mps <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser Params
params
  Maybe (Vector R)
mf <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser (Vector R)
stationaryDistribution
  let esm :: Either [Char] SubstitutionModel
esm = [Char]
-> Maybe Params
-> Maybe (Vector R)
-> Either [Char] SubstitutionModel
assembleSubstitutionModel [Char]
n Maybe Params
mps Maybe (Vector R)
mf
  case Either [Char] SubstitutionModel
esm of
    Left [Char]
err -> forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
err
    Right SubstitutionModel
sm -> forall (m :: * -> *) a. Monad m => a -> m a
return SubstitutionModel
sm

edmModel :: [EDMComponent] -> Maybe [M.Weight] -> Parser M.MixtureModel
edmModel :: [EDMComponent] -> Maybe Params -> Parser MixtureModel
edmModel [EDMComponent]
cs Maybe Params
mws = do
  ByteString
_ <- ByteString -> Parser ByteString
string ByteString
"EDM"
  Char
_ <- Char -> Parser Char
char Char
mmStart
  [Char]
n <- Parser [Char]
name
  Maybe Params
mps <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser Params
params
  Char
_ <- Char -> Parser Char
char Char
mmEnd
  forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (forall (t :: * -> *) a. Foldable t => t a -> Bool
null [EDMComponent]
cs) forall a b. (a -> b) -> a -> b
$ forall a. HasCallStack => [Char] -> a
error [Char]
"edmModel: no EDM components given."
  let sms :: [Either [Char] SubstitutionModel]
sms = forall a b. (a -> b) -> [a] -> [b]
map (\EDMComponent
c -> [Char]
-> Maybe Params
-> Maybe (Vector R)
-> Either [Char] SubstitutionModel
assembleSubstitutionModel [Char]
n Maybe Params
mps (forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ forall a b. (a, b) -> b
snd EDMComponent
c)) [EDMComponent]
cs
      edmName :: [Char]
edmName = [Char]
"EDM" forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show (forall (t :: * -> *) a. Foldable t => t a -> Int
length [EDMComponent]
cs)
      ws :: Params
ws = forall a. a -> Maybe a -> a
fromMaybe (forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> a
fst [EDMComponent]
cs) Maybe Params
mws
      errs :: [[Char]]
errs = [[Char]
e | (Left [Char]
e) <- [Either [Char] SubstitutionModel]
sms]
  forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (forall (t :: * -> *) a. Foldable t => t a -> Int
length [Either [Char] SubstitutionModel]
sms forall a. Eq a => a -> a -> Bool
/= forall (t :: * -> *) a. Foldable t => t a -> Int
length Params
ws) forall a b. (a -> b) -> a -> b
$
    forall a. HasCallStack => [Char] -> a
error [Char]
"edmModel: number of substitution models and weights differs."
  if Bool -> Bool
not forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t a -> Bool
null [[Char]]
errs
    then forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail forall a b. (a -> b) -> a -> b
$ forall a. [a] -> a
head [[Char]]
errs
    else
      forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$
        [Char] -> Vector R -> Vector SubstitutionModel -> MixtureModel
M.fromSubstitutionModels [Char]
edmName (forall a. [a] -> Vector a
V.fromList Params
ws) (forall a. [a] -> Vector a
V.fromList forall a b. (a -> b) -> a -> b
$ forall a b. [Either a b] -> [b]
rights [Either [Char] SubstitutionModel]
sms)

cxxModel :: Maybe [M.Weight] -> Parser M.MixtureModel
cxxModel :: Maybe Params -> Parser MixtureModel
cxxModel Maybe Params
mws = do
  Char
_ <- Char -> Parser Char
char Char
'C'
  Int
n <- forall a. Integral a => Parser a
decimal :: Parser Int
  forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Int -> Maybe Params -> MixtureModel
cxx Int
n Maybe Params
mws

standardMixtureModel :: [M.Weight] -> Parser M.MixtureModel
standardMixtureModel :: Params -> Parser MixtureModel
standardMixtureModel Params
ws = do
  ByteString
_ <- ByteString -> Parser ByteString
string ByteString
"MIXTURE"
  Char
_ <- Char -> Parser Char
char Char
mmStart
  [SubstitutionModel]
sms <- Parser SubstitutionModel
parseSubstitutionModel forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`sepBy1` Char -> Parser Char
char Char
separator
  Char
_ <- Char -> Parser Char
char Char
mmEnd
  -- XXX: The use of `Data.List.NonEmpty.fromList` leads to uninformative error messages.
  forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [Char] -> Vector R -> Vector SubstitutionModel -> MixtureModel
M.fromSubstitutionModels [Char]
"MIXTURE" (forall a. [a] -> Vector a
V.fromList Params
ws) (forall a. [a] -> Vector a
V.fromList [SubstitutionModel]
sms)

mixtureModel ::
  Maybe [EDMComponent] -> Maybe [M.Weight] -> Parser M.MixtureModel
mixtureModel :: Maybe [EDMComponent] -> Maybe Params -> Parser MixtureModel
mixtureModel Maybe [EDMComponent]
Nothing Maybe Params
Nothing =
  forall i a. Parser i a -> Parser i a
try (Maybe Params -> Parser MixtureModel
cxxModel forall a. Maybe a
Nothing) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
"No weights provided."
mixtureModel Maybe [EDMComponent]
Nothing mws :: Maybe Params
mws@(Just Params
ws) =
  forall i a. Parser i a -> Parser i a
try (Maybe Params -> Parser MixtureModel
cxxModel Maybe Params
mws) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Params -> Parser MixtureModel
standardMixtureModel Params
ws
mixtureModel (Just [EDMComponent]
cs) Maybe Params
mws = [EDMComponent] -> Maybe Params -> Parser MixtureModel
edmModel [EDMComponent]
cs Maybe Params
mws

-- | Parse the phylogenetic model string. The argument list is somewhat long,
-- but models can have many parameters and we have to check for redundant
-- parameters.
--
-- @
-- getPhyloModel maybeSubstitutionModelString maybeMixtureModelString maybeEDMComponents
-- @
getPhyloModel ::
  Maybe String ->
  Maybe String ->
  Maybe [M.Weight] ->
  Maybe [EDMComponent] ->
  Either String P.PhyloModel
getPhyloModel :: Maybe [Char]
-> Maybe [Char]
-> Maybe Params
-> Maybe [EDMComponent]
-> Either [Char] PhyloModel
getPhyloModel Maybe [Char]
Nothing Maybe [Char]
Nothing Maybe Params
_ Maybe [EDMComponent]
_ = forall a b. a -> Either a b
Left [Char]
"No model was given. See help."
getPhyloModel (Just [Char]
_) (Just [Char]
_) Maybe Params
_ Maybe [EDMComponent]
_ =
  forall a b. a -> Either a b
Left [Char]
"Both, substitution and mixture model string given; use only one."
getPhyloModel (Just [Char]
s) Maybe [Char]
Nothing Maybe Params
Nothing Maybe [EDMComponent]
Nothing =
  forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$
    SubstitutionModel -> PhyloModel
P.SubstitutionModel forall a b. (a -> b) -> a -> b
$
      forall a. Parser a -> [Char] -> a
parseStringWith
        Parser SubstitutionModel
parseSubstitutionModel
        [Char]
s
getPhyloModel (Just [Char]
_) Maybe [Char]
Nothing (Just Params
_) Maybe [EDMComponent]
_ =
  forall a b. a -> Either a b
Left [Char]
"Weights given; but cannot be used with substitution model."
getPhyloModel (Just [Char]
_) Maybe [Char]
Nothing Maybe Params
_ (Just [EDMComponent]
_) =
  forall a b. a -> Either a b
Left
    [Char]
"Empirical distribution mixture model components given; but cannot be used with substitution model."
getPhyloModel Maybe [Char]
Nothing (Just [Char]
m) Maybe Params
mws Maybe [EDMComponent]
mcs =
  forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$
    MixtureModel -> PhyloModel
P.MixtureModel forall a b. (a -> b) -> a -> b
$
      forall a. Parser a -> [Char] -> a
parseStringWith
        (Maybe [EDMComponent] -> Maybe Params -> Parser MixtureModel
mixtureModel Maybe [EDMComponent]
mcs Maybe Params
mws)
        [Char]
m