{-# LANGUAGE TemplateHaskell #-}
module ELynx.Data.MarkovProcess.SubstitutionModel
(
Name
, Params
, SubstitutionModel
, alphabet
, name
, stationaryDistribution
, exchangeabilityMatrix
, getRateMatrix
, totalRate
, substitutionModel
, unnormalized
, scale
, normalize
, appendName
, summarize
) where
import Control.Lens
import qualified Data.ByteString.Lazy.Char8 as L
import qualified Numeric.LinearAlgebra as LinAlg
import ELynx.Data.Alphabet.Alphabet
import qualified ELynx.Data.MarkovProcess.RateMatrix as R
import ELynx.Tools.Definitions
import ELynx.Tools.LinearAlgebra
import ELynx.Tools.Numeric
type Name = String
type Params = [Double]
data SubstitutionModel = SubstitutionModel
{ _alphabet :: Alphabet
, _name :: Name
, _params :: Params
, _stationaryDistribution :: R.StationaryDistribution
, _exchangeabilityMatrix :: R.ExchangeabilityMatrix
}
deriving (Show, Read)
makeLenses ''SubstitutionModel
getRateMatrix :: SubstitutionModel -> R.RateMatrix
getRateMatrix sm = R.fromExchangeabilityMatrix (sm^.exchangeabilityMatrix) (sm^.stationaryDistribution)
totalRate :: SubstitutionModel -> Double
totalRate sm = R.totalRate (sm^.stationaryDistribution) (getRateMatrix sm)
substitutionModel :: Alphabet -> Name -> Params
-> R.StationaryDistribution -> R.ExchangeabilityMatrix
-> SubstitutionModel
substitutionModel c n ps d e = normalize $ SubstitutionModel c n ps d e
unnormalized :: Alphabet -> Name -> Params
-> R.StationaryDistribution -> R.ExchangeabilityMatrix
-> SubstitutionModel
unnormalized = SubstitutionModel
scale :: Double -> SubstitutionModel -> SubstitutionModel
scale r = over exchangeabilityMatrix (LinAlg.scale r)
normalize :: SubstitutionModel -> SubstitutionModel
normalize sm = scale (1.0/r) sm
where r = totalRate sm
appendName :: Name -> SubstitutionModel -> SubstitutionModel
appendName n = over name (<> n)
summarize :: SubstitutionModel -> [L.ByteString]
summarize sm = map L.pack $
(show (sm^.alphabet) ++ " substitution model: " ++ sm^.name ++ ".") :
[ "Parameters: " ++ show (sm^.params) ++ "." | not (null (sm^.params))] ++
case sm^.alphabet of
DNA -> [ "Stationary distribution: " ++ dispv precision (sm^.stationaryDistribution) ++ "."
, "Exchangeability matrix:\n" ++ dispmi 2 precision (sm^.exchangeabilityMatrix) ++ "."
, "Scale: " ++ show (roundN precision $ totalRate sm) ++ "."
]
Protein -> [ "Stationary distribution: " ++ dispv precision (sm^.stationaryDistribution) ++ "."
, "Scale: " ++ show (roundN precision $ totalRate sm) ++ "."
]
_ -> error "Extended character sets are not supported with substitution models."