{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE KindSignatures #-}

-- |
-- Module      :  ELynx.Sequence.Divergence
-- Description :  Compute divergence matrix between sequences
-- Copyright   :  2022 Dominik Schrempf
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  experimental
-- Portability :  portable
--
-- Creation date: Thu Jul  7 17:55:23 2022.
module ELynx.Sequence.Divergence
  ( divergence,
  )
where

import Control.Monad.Primitive
import qualified Data.Matrix.Unboxed as MU
import qualified Data.Matrix.Unboxed.Mutable as MU
import Data.Maybe
import qualified Data.Set as S
import qualified Data.Vector.Unboxed as VU
import qualified Data.Vector.Unboxed.Mutable as VUM
import ELynx.Alphabet.Alphabet
import ELynx.Sequence.Sequence
import GHC.Exts (Constraint)

type Context x = (VUM.Unbox x :: Constraint)

modify :: (Context a, PrimMonad s) => MU.MMatrix (PrimState s) a -> (Int, Int) -> (a -> a) -> s ()
modify :: forall a (s :: * -> *).
(Context a, PrimMonad s) =>
MMatrix (PrimState s) a -> (Int, Int) -> (a -> a) -> s ()
modify MMatrix (PrimState s) a
m (Int, Int)
ij a -> a
f = do
  a
x <- forall a (s :: * -> *).
(Context a, PrimMonad s) =>
MMatrix (PrimState s) a -> (Int, Int) -> s a
MU.read MMatrix (PrimState s) a
m (Int, Int)
ij
  forall a (s :: * -> *).
(Context a, PrimMonad s) =>
MMatrix (PrimState s) a -> (Int, Int) -> a -> s ()
MU.write MMatrix (PrimState s) a
m (Int, Int)
ij (a -> a
f a
x)

divergence :: Sequence -> Sequence -> Either String (MU.Matrix Int)
divergence :: Sequence -> Sequence -> Either String (Matrix Int)
divergence Sequence
s1 Sequence
s2
  | Sequence -> Alphabet
alphabet Sequence
s1 forall a. Eq a => a -> a -> Bool
/= Sequence -> Alphabet
alphabet Sequence
s2 = forall {b}. String -> Either String b
err String
"sequences have different alphabets"
  | Bool -> Bool
not ([Sequence] -> Bool
equalLength [Sequence
s1, Sequence
s2]) = forall {b}. String -> Either String b
err String
"sequences have different lengths"
  | Bool
otherwise = forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a. Context a => (forall s. ST s (MMatrix s a)) -> Matrix a
MU.create forall a b. (a -> b) -> a -> b
$ do
      MMatrix s Int
m <- forall a (s :: * -> *).
(Context a, PrimMonad s) =>
(Int, Int) -> s (MMatrix (PrimState s) a)
MU.new (Int
n, Int
n)
      -- Initialize matrix.
      forall (t :: * -> *) (m :: * -> *) a.
(Foldable t, Monad m) =>
t (m a) -> m ()
sequence_ [forall a (s :: * -> *).
(Context a, PrimMonad s) =>
MMatrix (PrimState s) a -> (Int, Int) -> a -> s ()
MU.write MMatrix s Int
m (Int
i, Int
j) Int
0 | Int
i <- [Int
0 .. (Int
n forall a. Num a => a -> a -> a
- Int
1)], Int
j <- [Int
0 .. (Int
n forall a. Num a => a -> a -> a
- Int
1)]]
      -- Fill matrix.
      forall (t :: * -> *) (m :: * -> *) a.
(Foldable t, Monad m) =>
t (m a) -> m ()
sequence_ forall a b. (a -> b) -> a -> b
$
        forall a. [Maybe a] -> [a]
catMaybes
          [ do
              -- Only treat sites where both characters are standard.
              Int
i <- Maybe Int
mi
              Int
j <- Maybe Int
mj
              forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ forall a (s :: * -> *).
(Context a, PrimMonad s) =>
MMatrix (PrimState s) a -> (Int, Int) -> (a -> a) -> s ()
modify MMatrix s Int
m (Int
i, Int
j) (forall a. Num a => a -> a -> a
+ Int
1)
            | (Character
x, Character
y) <- forall a b. [a] -> [b] -> [(a, b)]
zip (forall a. Unbox a => Vector a -> [a]
VU.toList Characters
cs1) (forall a. Unbox a => Vector a -> [a]
VU.toList Characters
cs2),
              let mi :: Maybe Int
mi = forall a. Ord a => a -> Set a -> Maybe Int
S.lookupIndex Character
x Set Character
a,
              let mj :: Maybe Int
mj = forall a. Ord a => a -> Set a -> Maybe Int
S.lookupIndex Character
y Set Character
a
          ]
      forall (m :: * -> *) a. Monad m => a -> m a
return MMatrix s Int
m
  where
    err :: String -> Either String b
err String
m = forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ String
"divergence: " forall a. Semigroup a => a -> a -> a
<> String
m
    a :: Set Character
a = AlphabetSpec -> Set Character
std forall a b. (a -> b) -> a -> b
$ Alphabet -> AlphabetSpec
alphabetSpec forall a b. (a -> b) -> a -> b
$ Sequence -> Alphabet
alphabet Sequence
s1
    n :: Int
n = forall a. Set a -> Int
S.size Set Character
a
    cs1 :: Characters
cs1 = Sequence -> Characters
characters Sequence
s1
    cs2 :: Characters
cs2 = Sequence -> Characters
characters Sequence
s2