{-# LANGUAGE TemplateHaskell #-} {- | Module : ELynx.Data.MarkovProcess.SubstitutionModel Description : Data type describing substitution model Copyright : (c) Dominik Schrempf 2019 License : GPL-3 Maintainer : dominik.schrempf@gmail.com Stability : unstable Portability : portable Creation date: Tue Jan 29 19:10:46 2019. To be imported qualified. -} module ELynx.Data.MarkovProcess.SubstitutionModel ( -- * Types Name , Params , SubstitutionModel -- * Lenses and other accessors , alphabet , name , stationaryDistribution , exchangeabilityMatrix , getRateMatrix , totalRate -- * Building substitution models , substitutionModel , unnormalized -- * Transformations , scale , normalize , appendName -- * Output , summarize ) where import Control.Lens import qualified Data.ByteString.Lazy.Char8 as L import qualified Numeric.LinearAlgebra as LinAlg import ELynx.Data.Alphabet.Alphabet import qualified ELynx.Data.MarkovProcess.RateMatrix as R import ELynx.Tools.Definitions import ELynx.Tools.LinearAlgebra import ELynx.Tools.Numeric -- | Name of substitution model; abstracted and subject to change. type Name = String -- | Parameters of substitution model. May be the empty list. type Params = [Double] -- | Complete definition of a substitution model. Create instances with -- 'substitutionModel'. data SubstitutionModel = SubstitutionModel { _alphabet :: Alphabet , _name :: Name , _params :: Params , _stationaryDistribution :: R.StationaryDistribution , _exchangeabilityMatrix :: R.ExchangeabilityMatrix } deriving (Show, Read) makeLenses ''SubstitutionModel -- | Calculate rate matrix from substitution model. getRateMatrix :: SubstitutionModel -> R.RateMatrix getRateMatrix sm = R.fromExchangeabilityMatrix (sm^.exchangeabilityMatrix) (sm^.stationaryDistribution) -- | Get scale of substitution model. totalRate :: SubstitutionModel -> Double totalRate sm = R.totalRate (sm^.stationaryDistribution) (getRateMatrix sm) -- | Create normalized 'SubstitutionModel'. See 'normalize'. substitutionModel :: Alphabet -> Name -> Params -> R.StationaryDistribution -> R.ExchangeabilityMatrix -> SubstitutionModel substitutionModel c n ps d e = normalize $ SubstitutionModel c n ps d e -- | Create UNNORMALIZED 'SubstitutionModel'. See 'substitutionModel'. unnormalized :: Alphabet -> Name -> Params -> R.StationaryDistribution -> R.ExchangeabilityMatrix -> SubstitutionModel unnormalized = SubstitutionModel -- | Scale the rate of a substitution model by given factor. scale :: Double -> SubstitutionModel -> SubstitutionModel scale r = over exchangeabilityMatrix (LinAlg.scale r) -- | Normalize a substitution model, so that, on average, one substitution -- happens per unit time. normalize :: SubstitutionModel -> SubstitutionModel normalize sm = scale (1.0/r) sm where r = totalRate sm -- | Abbend to name. appendName :: Name -> SubstitutionModel -> SubstitutionModel appendName n = over name (<> n) -- | Summarize a substitution model; lines to be printed to screen or log. summarize :: SubstitutionModel -> [L.ByteString] summarize sm = map L.pack $ (show (sm^.alphabet) ++ " substitution model: " ++ sm^.name ++ ".") : [ "Parameters: " ++ show (sm^.params) ++ "." | not (null (sm^.params))] ++ case sm^.alphabet of DNA -> [ "Stationary distribution: " ++ dispv precision (sm^.stationaryDistribution) ++ "." , "Exchangeability matrix:\n" ++ dispmi 2 precision (sm^.exchangeabilityMatrix) ++ "." , "Scale: " ++ show (roundN precision $ totalRate sm) ++ "." ] Protein -> [ "Stationary distribution: " ++ dispv precision (sm^.stationaryDistribution) ++ "." , "Scale: " ++ show (roundN precision $ totalRate sm) ++ "." ] _ -> error "Extended character sets are not supported with substitution models."