module Biobase.SubstMatrix where
import Control.DeepSeq (NFData(..))
import Control.Lens
import Control.Monad.Except
import Control.Monad.IO.Class
import Data.Aeson (FromJSON,ToJSON)
import Data.Binary (Binary)
import Data.List (maximumBy,find)
import Data.Serialize (Serialize)
import Data.Vector.Unboxed.Deriving
import GHC.Generics (Generic)
import Numeric.Log
import qualified Data.Map.Strict as M
import qualified Data.Vector.Unboxed as VU
import System.Directory (doesFileExist)
import System.Exit (exitSuccess)
import Text.Printf
import Biobase.GeneticCodes.Translation
import Biobase.GeneticCodes.Types
import Biobase.Primary.AA (aaRange)
import Biobase.Primary.Letter
import Biobase.Primary.Nuc.DNA ()
import Biobase.Primary.Trans
import Biobase.Types.BioSequence (DNA,AA)
import Biobase.Types.Codon
import Data.PrimitiveArray as PA
import Numeric.Discretized
import qualified Biobase.Primary.AA as AA
import qualified Biobase.Primary.Nuc.DNA as DNA
import StatisticalMechanics.Ensemble
import Statistics.Odds
import Statistics.Probability
import Biobase.SubstMatrix.Embedded
import Biobase.SubstMatrix.Import
import Biobase.SubstMatrix.Types
mkANuc3SubstMat
∷ TranslationTable (Letter DNA n) (Letter AA n)
→ AASubstMat t (DiscLogOdds k) n
→ ANuc3SubstMat t (Letter AA n, (DiscLogOdds k)) n n
mkANuc3SubstMat tbl (AASubstMat m)
= ANuc3SubstMat
$ fromAssocs (ZZ:..LtLetter AA.Undef:..LtLetter DNA.N:..LtLetter DNA.N:..LtLetter DNA.N) (AA.Undef, DiscLogOdds . Discretized $ -999)
[ ( (Z:.a:.u:.v:.w)
, (t, m!(Z:.a:.t))
)
| a <- VU.toList aaRange
, u <- [DNA.A .. DNA.N], v <- [DNA.A .. DNA.N], w <- [DNA.A .. DNA.N]
, let b = Codon u v w
, let t = translate tbl b
]
fromFileOrCached ∷ (MonadIO m, MonadError String m) ⇒ FilePath → m (AASubstMat t (DiscLogOdds Unknown) a)
fromFileOrCached fname = do
dfe ← liftIO $ doesFileExist fname
if | fname == "list" → do
mapM_ (liftIO . printf "%s\n" . fst) embeddedPamBlosum
liftIO exitSuccess
| dfe → fromFile fname
| Just (k,v) ← find ((fname==).fst) embeddedPamBlosum → return v
| otherwise → throwError $ fname ++ " is neither a file nor a known substitution matrix"
mkProbabilityMatrix
∷ Double
→ AASubstMat t (DiscLogOdds k) n
→ AASubstMat t (Log (Probability NotNormalized Double)) n
mkProbabilityMatrix invScale (AASubstMat dlo) = AASubstMat $ PA.map (/nrm) $ dbl
where dbl = PA.map (\(DiscLogOdds (Discretized k)) → stateLogProbability (negate invScale) $ fromIntegral @Int @Double k) dlo
nrm = maximum . Prelude.map snd $ PA.assocs dbl