-- |
-- Module      :  ELynx.Sequence.Distance
-- Description :  Distance functions between sequences
-- Copyright   :  2021 Dominik Schrempf
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Fri Aug 21 15:09:58 2020.
module ELynx.Sequence.Distance
  ( hamming,
  )
where

import qualified Data.Vector.Unboxed as V
import ELynx.Sequence.Sequence

countFalses :: (Int -> Bool -> Int)
countFalses :: Int -> Bool -> Int
countFalses Int
n Bool
False = forall a. Enum a => a -> a
succ Int
n
countFalses Int
n Bool
True = Int
n

-- | Compute hamming distance between two sequences.
hamming :: Sequence -> Sequence -> Either String Int
hamming :: Sequence -> Sequence -> Either String Int
hamming Sequence
l Sequence
r
  | Sequence -> Alphabet
alphabet Sequence
l forall a. Eq a => a -> a -> Bool
/= Sequence -> Alphabet
alphabet Sequence
r = forall a b. a -> Either a b
Left String
"hamming: Alphabets of sequences differ."
  | forall a. Unbox a => Vector a -> Int
V.length Characters
csL forall a. Eq a => a -> a -> Bool
/= forall a. Unbox a => Vector a -> Int
V.length Characters
csR = forall a b. a -> Either a b
Left String
"hamming: Sequence lengths differ."
  | forall a. Unbox a => Vector a -> Bool
V.null Characters
csL Bool -> Bool -> Bool
|| forall a. Unbox a => Vector a -> Bool
V.null Characters
csR = forall a b. a -> Either a b
Left String
"hamming: Empty sequence encountered."
  | Bool
otherwise = forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall b a. Unbox b => (a -> b -> a) -> a -> Vector b -> a
V.foldl' Int -> Bool -> Int
countFalses Int
0 forall a b. (a -> b) -> a -> b
$ forall a b c.
(Unbox a, Unbox b, Unbox c) =>
(a -> b -> c) -> Vector a -> Vector b -> Vector c
V.zipWith forall a. Eq a => a -> a -> Bool
(==) (Sequence -> Characters
characters Sequence
l) (Sequence -> Characters
characters Sequence
r)
  where
    csL :: Characters
csL = Sequence -> Characters
characters Sequence
l
    csR :: Characters
csR = Sequence -> Characters
characters Sequence
r