-- |
-- Module      :  ELynx.Data.Sequence.Translate
-- Description :  Translate sequences
-- Copyright   :  (c) Dominik Schrempf 2021
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Fri May 17 13:49:18 2019.
module ELynx.Data.Sequence.Translate
  ( translateSeq,
  )
where

import qualified Data.Vector.Unboxed as V
import ELynx.Data.Alphabet.Alphabet
import qualified ELynx.Data.Alphabet.Character as C
import ELynx.Data.Character.Codon
import ELynx.Data.Sequence.Sequence

-- Chop list into chunks of given length. If the last chop is shorter than
-- length, it is dropped.
chopVec :: V.Unbox a => Int -> V.Vector a -> [V.Vector a]
chopVec :: Int -> Vector a -> [Vector a]
chopVec Int
n Vector a
xs
  | Vector a -> Int
forall a. Unbox a => Vector a -> Int
V.length Vector a
xs Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
n = []
  | Bool
otherwise = Int -> Vector a -> Vector a
forall a. Unbox a => Int -> Vector a -> Vector a
V.take Int
n Vector a
xs Vector a -> [Vector a] -> [Vector a]
forall a. a -> [a] -> [a]
: Int -> Vector a -> [Vector a]
forall a. Unbox a => Int -> Vector a -> [Vector a]
chopVec Int
n (Int -> Vector a -> Vector a
forall a. Unbox a => Int -> Vector a -> Vector a
V.drop Int
n Vector a
xs)

-- | Translate a sequence from 'DNA' or 'DNAX' to 'ProteinS'.
translateSeq :: UniversalCode -> Int -> Sequence -> Sequence
translateSeq :: UniversalCode -> Int -> Sequence -> Sequence
translateSeq UniversalCode
uc Int
rf (Sequence Name
n Name
d Alphabet
a Characters
cs) = case Alphabet
a of
  Alphabet
DNA -> Name -> Name -> Alphabet -> Characters -> Sequence
Sequence Name
n Name
d Alphabet
ProteinS ((Codon Nucleotide -> AminoAcidS) -> Characters
forall a a.
(Character a, Character a) =>
(Codon a -> a) -> Characters
cs' ((Codon Nucleotide -> AminoAcidS) -> Characters)
-> (Codon Nucleotide -> AminoAcidS) -> Characters
forall a b. (a -> b) -> a -> b
$ UniversalCode -> Codon Nucleotide -> AminoAcidS
translate UniversalCode
uc)
  Alphabet
DNAX -> Name -> Name -> Alphabet -> Characters -> Sequence
Sequence Name
n Name
d Alphabet
ProteinS ((Codon NucleotideX -> AminoAcidS) -> Characters
forall a a.
(Character a, Character a) =>
(Codon a -> a) -> Characters
cs' ((Codon NucleotideX -> AminoAcidS) -> Characters)
-> (Codon NucleotideX -> AminoAcidS) -> Characters
forall a b. (a -> b) -> a -> b
$ UniversalCode -> Codon NucleotideX -> AminoAcidS
translateX UniversalCode
uc)
  Alphabet
DNAI -> Name -> Name -> Alphabet -> Characters -> Sequence
Sequence Name
n Name
d Alphabet
ProteinI ((Codon NucleotideI -> AminoAcidI) -> Characters
forall a a.
(Character a, Character a) =>
(Codon a -> a) -> Characters
cs' ((Codon NucleotideI -> AminoAcidI) -> Characters)
-> (Codon NucleotideI -> AminoAcidI) -> Characters
forall a b. (a -> b) -> a -> b
$ UniversalCode -> Codon NucleotideI -> AminoAcidI
translateI UniversalCode
uc)
  Alphabet
_ -> [Char] -> Sequence
forall a. HasCallStack => [Char] -> a
error [Char]
"translate: can only translate DNA, DNAX, and DNAI."
  where
    cs' :: (Codon a -> a) -> Characters
cs' Codon a -> a
f = Vector a -> Characters
forall a. Character a => Vector a -> Characters
C.fromCVec (Vector a -> Characters) -> Vector a -> Characters
forall a b. (a -> b) -> a -> b
$ (Codon a -> a) -> Int -> Vector a -> Vector a
forall a b.
(Unbox a, Ord a, Unbox b) =>
(Codon a -> b) -> Int -> Vector a -> Vector b
translateVecWith Codon a -> a
f Int
rf (Characters -> Vector a
forall a. Character a => Characters -> Vector a
C.toCVec Characters
cs)

-- Translate from DNA to Protein with given reading frame (0, 1, 2).
translateVecWith ::
  (V.Unbox a, Ord a, V.Unbox b) =>
  (Codon a -> b) ->
  Int ->
  V.Vector a ->
  V.Vector b
translateVecWith :: (Codon a -> b) -> Int -> Vector a -> Vector b
translateVecWith Codon a -> b
f Int
rf Vector a
cs
  | Int
rf Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
2 = [Char] -> Vector b
forall a. HasCallStack => [Char] -> a
error [Char]
"translateVecWith: reading frame is larger than 2."
  | Int
rf Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 = [Char] -> Vector b
forall a. HasCallStack => [Char] -> a
error [Char]
"translateVecWith: reading frame is negative."
  | Bool
otherwise = Vector b
aas
  where
    codons :: [Codon a]
codons = (Vector a -> Codon a) -> [Vector a] -> [Codon a]
forall a b. (a -> b) -> [a] -> [b]
map Vector a -> Codon a
forall (v :: * -> *) a. Vector v a => v a -> Codon a
fromVecUnsafe ([Vector a] -> [Codon a]) -> [Vector a] -> [Codon a]
forall a b. (a -> b) -> a -> b
$ Int -> Vector a -> [Vector a]
forall a. Unbox a => Int -> Vector a -> [Vector a]
chopVec Int
3 (Vector a -> [Vector a]) -> Vector a -> [Vector a]
forall a b. (a -> b) -> a -> b
$ Int -> Vector a -> Vector a
forall a. Unbox a => Int -> Vector a -> Vector a
V.drop Int
rf Vector a
cs
    aas :: Vector b
aas = [b] -> Vector b
forall a. Unbox a => [a] -> Vector a
V.fromList ([b] -> Vector b) -> [b] -> Vector b
forall a b. (a -> b) -> a -> b
$ (Codon a -> b) -> [Codon a] -> [b]
forall a b. (a -> b) -> [a] -> [b]
map Codon a -> b
f [Codon a]
codons