module ELynx.Data.Sequence.Translate
( translateSeq,
)
where
import qualified Data.Vector.Unboxed as V
import ELynx.Data.Alphabet.Alphabet
import qualified ELynx.Data.Alphabet.Character as C
import ELynx.Data.Character.Codon
import ELynx.Data.Sequence.Sequence
chopVec :: V.Unbox a => Int -> V.Vector a -> [V.Vector a]
chopVec :: Int -> Vector a -> [Vector a]
chopVec Int
n Vector a
xs
| Vector a -> Int
forall a. Unbox a => Vector a -> Int
V.length Vector a
xs Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
n = []
| Bool
otherwise = Int -> Vector a -> Vector a
forall a. Unbox a => Int -> Vector a -> Vector a
V.take Int
n Vector a
xs Vector a -> [Vector a] -> [Vector a]
forall a. a -> [a] -> [a]
: Int -> Vector a -> [Vector a]
forall a. Unbox a => Int -> Vector a -> [Vector a]
chopVec Int
n (Int -> Vector a -> Vector a
forall a. Unbox a => Int -> Vector a -> Vector a
V.drop Int
n Vector a
xs)
translateSeq :: UniversalCode -> Int -> Sequence -> Sequence
translateSeq :: UniversalCode -> Int -> Sequence -> Sequence
translateSeq UniversalCode
uc Int
rf (Sequence Name
n Name
d Alphabet
a Characters
cs) = case Alphabet
a of
Alphabet
DNA -> Name -> Name -> Alphabet -> Characters -> Sequence
Sequence Name
n Name
d Alphabet
ProteinS ((Codon Nucleotide -> AminoAcidS) -> Characters
forall a a.
(Character a, Character a) =>
(Codon a -> a) -> Characters
cs' ((Codon Nucleotide -> AminoAcidS) -> Characters)
-> (Codon Nucleotide -> AminoAcidS) -> Characters
forall a b. (a -> b) -> a -> b
$ UniversalCode -> Codon Nucleotide -> AminoAcidS
translate UniversalCode
uc)
Alphabet
DNAX -> Name -> Name -> Alphabet -> Characters -> Sequence
Sequence Name
n Name
d Alphabet
ProteinS ((Codon NucleotideX -> AminoAcidS) -> Characters
forall a a.
(Character a, Character a) =>
(Codon a -> a) -> Characters
cs' ((Codon NucleotideX -> AminoAcidS) -> Characters)
-> (Codon NucleotideX -> AminoAcidS) -> Characters
forall a b. (a -> b) -> a -> b
$ UniversalCode -> Codon NucleotideX -> AminoAcidS
translateX UniversalCode
uc)
Alphabet
DNAI -> Name -> Name -> Alphabet -> Characters -> Sequence
Sequence Name
n Name
d Alphabet
ProteinI ((Codon NucleotideI -> AminoAcidI) -> Characters
forall a a.
(Character a, Character a) =>
(Codon a -> a) -> Characters
cs' ((Codon NucleotideI -> AminoAcidI) -> Characters)
-> (Codon NucleotideI -> AminoAcidI) -> Characters
forall a b. (a -> b) -> a -> b
$ UniversalCode -> Codon NucleotideI -> AminoAcidI
translateI UniversalCode
uc)
Alphabet
_ -> [Char] -> Sequence
forall a. HasCallStack => [Char] -> a
error [Char]
"translate: can only translate DNA, DNAX, and DNAI."
where
cs' :: (Codon a -> a) -> Characters
cs' Codon a -> a
f = Vector a -> Characters
forall a. Character a => Vector a -> Characters
C.fromCVec (Vector a -> Characters) -> Vector a -> Characters
forall a b. (a -> b) -> a -> b
$ (Codon a -> a) -> Int -> Vector a -> Vector a
forall a b.
(Unbox a, Ord a, Unbox b) =>
(Codon a -> b) -> Int -> Vector a -> Vector b
translateVecWith Codon a -> a
f Int
rf (Characters -> Vector a
forall a. Character a => Characters -> Vector a
C.toCVec Characters
cs)
translateVecWith ::
(V.Unbox a, Ord a, V.Unbox b) =>
(Codon a -> b) ->
Int ->
V.Vector a ->
V.Vector b
translateVecWith :: (Codon a -> b) -> Int -> Vector a -> Vector b
translateVecWith Codon a -> b
f Int
rf Vector a
cs
| Int
rf Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
2 = [Char] -> Vector b
forall a. HasCallStack => [Char] -> a
error [Char]
"translateVecWith: reading frame is larger than 2."
| Int
rf Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 = [Char] -> Vector b
forall a. HasCallStack => [Char] -> a
error [Char]
"translateVecWith: reading frame is negative."
| Bool
otherwise = Vector b
aas
where
codons :: [Codon a]
codons = (Vector a -> Codon a) -> [Vector a] -> [Codon a]
forall a b. (a -> b) -> [a] -> [b]
map Vector a -> Codon a
forall (v :: * -> *) a. Vector v a => v a -> Codon a
fromVecUnsafe ([Vector a] -> [Codon a]) -> [Vector a] -> [Codon a]
forall a b. (a -> b) -> a -> b
$ Int -> Vector a -> [Vector a]
forall a. Unbox a => Int -> Vector a -> [Vector a]
chopVec Int
3 (Vector a -> [Vector a]) -> Vector a -> [Vector a]
forall a b. (a -> b) -> a -> b
$ Int -> Vector a -> Vector a
forall a. Unbox a => Int -> Vector a -> Vector a
V.drop Int
rf Vector a
cs
aas :: Vector b
aas = [b] -> Vector b
forall a. Unbox a => [a] -> Vector a
V.fromList ([b] -> Vector b) -> [b] -> Vector b
forall a b. (a -> b) -> a -> b
$ (Codon a -> b) -> [Codon a] -> [b]
forall a b. (a -> b) -> [a] -> [b]
map Codon a -> b
f [Codon a]
codons