{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}

-- |
-- Module      :  ELynx.Data.AminoAcid
-- Description :  Amino acid related types and functions
-- Copyright   :  (c) Dominik Schrempf 2021
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Thu Oct  4 18:26:35 2018.
--
-- See header of 'ELynx.Data.Alphabet.Alphabet'.
--
-- Amino acid IUPAC code. See also https://www.bioinformatics.org/sms/iupac.html or
-- https://en.wikipedia.org/wiki/International_Union_of_Pure_and_Applied_Chemistry.
--
-- Remarks:
--
-- - Question marks (@?@) are interpreted as unknowns (same as @X@). However, when
--   a sequence is printed/exported, @X@s will be used.
--
-- - Full stops (@.@) are interpreted as gaps (same as @-@). However, when a
--   sequence is printed/exported, @-@s will be used
--
-- @
-- Amino Acid Code:  Three letter Code:  Amino Acid:
-- ----------------  ------------------  -----------
-- A                 Ala                 Alanine
-- C                 Cys                 Cysteine
-- D                 Asp                 Aspartic Acid
-- E                 Glu                 Glutamic Acid
-- F                 Phe                 Phenylalanine
-- G                 Gly                 Glycine
-- H                 His                 Histidine
-- I                 Ile                 Isoleucine
-- K                 Lys                 Lysine
-- L                 Leu                 Leucine
-- M                 Met                 Methionine
-- N                 Asn                 Asparagine
-- P                 Pro                 Proline
-- Q                 Gln                 Glutamine
-- R                 Arg                 Arginine
-- S                 Ser                 Serine
-- T                 Thr                 Threonine
-- V                 Val                 Valine
-- W                 Trp                 Tryptophan
-- Y                 Tyr                 Tyrosine
-- ----------------  ------------------  -----------
-- J                                     Leucine or Isoleucine
-- B                 Asx                 Aspartic acid or Asparagine
-- Z                 Glx                 Glutamine or Glutamic acid
-- ----------------  ------------------  -----------
-- X                 Xaa                 Any amino acid (preferred; used for printing)
-- ?                 Xaa                 Any amino acid
-- ----------------  ------------------  -----------
-- *                 Stp                 No amino acid
-- ----------------  ------------------  -----------
-- -                 Gap                 No amino acid (preferred; used for printing)
-- .                 Gap                 No amino acid
-- @
module ELynx.Data.Character.AminoAcidI
  ( AminoAcidI (..),
  )
where

import Data.ByteString.Internal (c2w, w2c)
import Data.Vector.Unboxed.Deriving
import Data.Word8
import qualified ELynx.Data.Character.Character as C

-- | Amino acids.
data AminoAcidI
  = A
  | C
  | D
  | E
  | F
  | G
  | H
  | I
  | K
  | L
  | M
  | N
  | P
  | Q
  | R
  | S
  | T
  | V
  | W
  | Y
  | J
  | B
  | Z
  | X
  | Stop
  | Gap
  deriving (Int -> AminoAcidI -> ShowS
[AminoAcidI] -> ShowS
AminoAcidI -> String
(Int -> AminoAcidI -> ShowS)
-> (AminoAcidI -> String)
-> ([AminoAcidI] -> ShowS)
-> Show AminoAcidI
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [AminoAcidI] -> ShowS
$cshowList :: [AminoAcidI] -> ShowS
show :: AminoAcidI -> String
$cshow :: AminoAcidI -> String
showsPrec :: Int -> AminoAcidI -> ShowS
$cshowsPrec :: Int -> AminoAcidI -> ShowS
Show, ReadPrec [AminoAcidI]
ReadPrec AminoAcidI
Int -> ReadS AminoAcidI
ReadS [AminoAcidI]
(Int -> ReadS AminoAcidI)
-> ReadS [AminoAcidI]
-> ReadPrec AminoAcidI
-> ReadPrec [AminoAcidI]
-> Read AminoAcidI
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [AminoAcidI]
$creadListPrec :: ReadPrec [AminoAcidI]
readPrec :: ReadPrec AminoAcidI
$creadPrec :: ReadPrec AminoAcidI
readList :: ReadS [AminoAcidI]
$creadList :: ReadS [AminoAcidI]
readsPrec :: Int -> ReadS AminoAcidI
$creadsPrec :: Int -> ReadS AminoAcidI
Read, AminoAcidI -> AminoAcidI -> Bool
(AminoAcidI -> AminoAcidI -> Bool)
-> (AminoAcidI -> AminoAcidI -> Bool) -> Eq AminoAcidI
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: AminoAcidI -> AminoAcidI -> Bool
$c/= :: AminoAcidI -> AminoAcidI -> Bool
== :: AminoAcidI -> AminoAcidI -> Bool
$c== :: AminoAcidI -> AminoAcidI -> Bool
Eq, Eq AminoAcidI
Eq AminoAcidI
-> (AminoAcidI -> AminoAcidI -> Ordering)
-> (AminoAcidI -> AminoAcidI -> Bool)
-> (AminoAcidI -> AminoAcidI -> Bool)
-> (AminoAcidI -> AminoAcidI -> Bool)
-> (AminoAcidI -> AminoAcidI -> Bool)
-> (AminoAcidI -> AminoAcidI -> AminoAcidI)
-> (AminoAcidI -> AminoAcidI -> AminoAcidI)
-> Ord AminoAcidI
AminoAcidI -> AminoAcidI -> Bool
AminoAcidI -> AminoAcidI -> Ordering
AminoAcidI -> AminoAcidI -> AminoAcidI
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: AminoAcidI -> AminoAcidI -> AminoAcidI
$cmin :: AminoAcidI -> AminoAcidI -> AminoAcidI
max :: AminoAcidI -> AminoAcidI -> AminoAcidI
$cmax :: AminoAcidI -> AminoAcidI -> AminoAcidI
>= :: AminoAcidI -> AminoAcidI -> Bool
$c>= :: AminoAcidI -> AminoAcidI -> Bool
> :: AminoAcidI -> AminoAcidI -> Bool
$c> :: AminoAcidI -> AminoAcidI -> Bool
<= :: AminoAcidI -> AminoAcidI -> Bool
$c<= :: AminoAcidI -> AminoAcidI -> Bool
< :: AminoAcidI -> AminoAcidI -> Bool
$c< :: AminoAcidI -> AminoAcidI -> Bool
compare :: AminoAcidI -> AminoAcidI -> Ordering
$ccompare :: AminoAcidI -> AminoAcidI -> Ordering
$cp1Ord :: Eq AminoAcidI
Ord, Int -> AminoAcidI
AminoAcidI -> Int
AminoAcidI -> [AminoAcidI]
AminoAcidI -> AminoAcidI
AminoAcidI -> AminoAcidI -> [AminoAcidI]
AminoAcidI -> AminoAcidI -> AminoAcidI -> [AminoAcidI]
(AminoAcidI -> AminoAcidI)
-> (AminoAcidI -> AminoAcidI)
-> (Int -> AminoAcidI)
-> (AminoAcidI -> Int)
-> (AminoAcidI -> [AminoAcidI])
-> (AminoAcidI -> AminoAcidI -> [AminoAcidI])
-> (AminoAcidI -> AminoAcidI -> [AminoAcidI])
-> (AminoAcidI -> AminoAcidI -> AminoAcidI -> [AminoAcidI])
-> Enum AminoAcidI
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: AminoAcidI -> AminoAcidI -> AminoAcidI -> [AminoAcidI]
$cenumFromThenTo :: AminoAcidI -> AminoAcidI -> AminoAcidI -> [AminoAcidI]
enumFromTo :: AminoAcidI -> AminoAcidI -> [AminoAcidI]
$cenumFromTo :: AminoAcidI -> AminoAcidI -> [AminoAcidI]
enumFromThen :: AminoAcidI -> AminoAcidI -> [AminoAcidI]
$cenumFromThen :: AminoAcidI -> AminoAcidI -> [AminoAcidI]
enumFrom :: AminoAcidI -> [AminoAcidI]
$cenumFrom :: AminoAcidI -> [AminoAcidI]
fromEnum :: AminoAcidI -> Int
$cfromEnum :: AminoAcidI -> Int
toEnum :: Int -> AminoAcidI
$ctoEnum :: Int -> AminoAcidI
pred :: AminoAcidI -> AminoAcidI
$cpred :: AminoAcidI -> AminoAcidI
succ :: AminoAcidI -> AminoAcidI
$csucc :: AminoAcidI -> AminoAcidI
Enum, AminoAcidI
AminoAcidI -> AminoAcidI -> Bounded AminoAcidI
forall a. a -> a -> Bounded a
maxBound :: AminoAcidI
$cmaxBound :: AminoAcidI
minBound :: AminoAcidI
$cminBound :: AminoAcidI
Bounded)

toWord :: AminoAcidI -> Word8
toWord :: AminoAcidI -> Word8
toWord AminoAcidI
A = Char -> Word8
c2w Char
'A'
toWord AminoAcidI
C = Char -> Word8
c2w Char
'C'
toWord AminoAcidI
D = Char -> Word8
c2w Char
'D'
toWord AminoAcidI
E = Char -> Word8
c2w Char
'E'
toWord AminoAcidI
F = Char -> Word8
c2w Char
'F'
toWord AminoAcidI
G = Char -> Word8
c2w Char
'G'
toWord AminoAcidI
H = Char -> Word8
c2w Char
'H'
toWord AminoAcidI
I = Char -> Word8
c2w Char
'I'
toWord AminoAcidI
K = Char -> Word8
c2w Char
'K'
toWord AminoAcidI
L = Char -> Word8
c2w Char
'L'
toWord AminoAcidI
M = Char -> Word8
c2w Char
'M'
toWord AminoAcidI
N = Char -> Word8
c2w Char
'N'
toWord AminoAcidI
P = Char -> Word8
c2w Char
'P'
toWord AminoAcidI
Q = Char -> Word8
c2w Char
'Q'
toWord AminoAcidI
R = Char -> Word8
c2w Char
'R'
toWord AminoAcidI
S = Char -> Word8
c2w Char
'S'
toWord AminoAcidI
T = Char -> Word8
c2w Char
'T'
toWord AminoAcidI
V = Char -> Word8
c2w Char
'V'
toWord AminoAcidI
W = Char -> Word8
c2w Char
'W'
toWord AminoAcidI
Y = Char -> Word8
c2w Char
'Y'
toWord AminoAcidI
J = Char -> Word8
c2w Char
'J'
toWord AminoAcidI
B = Char -> Word8
c2w Char
'B'
toWord AminoAcidI
Z = Char -> Word8
c2w Char
'Z'
toWord AminoAcidI
X = Char -> Word8
c2w Char
'X'
toWord AminoAcidI
Stop = Char -> Word8
c2w Char
'*'
toWord AminoAcidI
Gap = Char -> Word8
c2w Char
'-'

fromWord :: Word8 -> AminoAcidI
fromWord :: Word8 -> AminoAcidI
fromWord Word8
w = case Word8 -> Char
w2c Word8
w of
  Char
'A' -> AminoAcidI
A
  Char
'C' -> AminoAcidI
C
  Char
'D' -> AminoAcidI
D
  Char
'E' -> AminoAcidI
E
  Char
'F' -> AminoAcidI
F
  Char
'G' -> AminoAcidI
G
  Char
'H' -> AminoAcidI
H
  Char
'I' -> AminoAcidI
I
  Char
'K' -> AminoAcidI
K
  Char
'L' -> AminoAcidI
L
  Char
'M' -> AminoAcidI
M
  Char
'N' -> AminoAcidI
N
  Char
'P' -> AminoAcidI
P
  Char
'Q' -> AminoAcidI
Q
  Char
'R' -> AminoAcidI
R
  Char
'S' -> AminoAcidI
S
  Char
'T' -> AminoAcidI
T
  Char
'V' -> AminoAcidI
V
  Char
'W' -> AminoAcidI
W
  Char
'Y' -> AminoAcidI
Y
  Char
'J' -> AminoAcidI
J
  Char
'B' -> AminoAcidI
B
  Char
'Z' -> AminoAcidI
Z
  Char
'X' -> AminoAcidI
X
  -- Question marks code for @X@s.
  Char
'?' -> AminoAcidI
X
  Char
'*' -> AminoAcidI
Stop
  Char
'-' -> AminoAcidI
Gap
  -- Full stops code for gaps (@-@s).
  Char
'.' -> AminoAcidI
Gap
  Char
_ -> String -> AminoAcidI
forall a. HasCallStack => String -> a
error String
"fromWord: Cannot convert Word8 to AminoAcidI"

derivingUnbox
  "AminoAcidI"
  [t|AminoAcidI -> Word8|]
  [|toWord|]
  [|fromWord|]

instance C.Character AminoAcidI where
  toWord :: AminoAcidI -> Word8
toWord = AminoAcidI -> Word8
toWord
  fromWord :: Word8 -> AminoAcidI
fromWord = Word8 -> AminoAcidI
fromWord

instance C.CharacterX AminoAcidI where
  gap :: AminoAcidI
gap = AminoAcidI
Gap

toStandard :: AminoAcidI -> [AminoAcidI]
toStandard :: AminoAcidI -> [AminoAcidI]
toStandard AminoAcidI
A = [AminoAcidI
A]
toStandard AminoAcidI
C = [AminoAcidI
C]
toStandard AminoAcidI
D = [AminoAcidI
D]
toStandard AminoAcidI
E = [AminoAcidI
E]
toStandard AminoAcidI
F = [AminoAcidI
F]
toStandard AminoAcidI
G = [AminoAcidI
G]
toStandard AminoAcidI
H = [AminoAcidI
H]
toStandard AminoAcidI
I = [AminoAcidI
I]
toStandard AminoAcidI
K = [AminoAcidI
K]
toStandard AminoAcidI
L = [AminoAcidI
L]
toStandard AminoAcidI
M = [AminoAcidI
M]
toStandard AminoAcidI
N = [AminoAcidI
N]
toStandard AminoAcidI
P = [AminoAcidI
P]
toStandard AminoAcidI
Q = [AminoAcidI
Q]
toStandard AminoAcidI
R = [AminoAcidI
R]
toStandard AminoAcidI
S = [AminoAcidI
S]
toStandard AminoAcidI
T = [AminoAcidI
T]
toStandard AminoAcidI
V = [AminoAcidI
V]
toStandard AminoAcidI
W = [AminoAcidI
W]
toStandard AminoAcidI
Y = [AminoAcidI
Y]
toStandard AminoAcidI
J = [AminoAcidI
L, AminoAcidI
I]
toStandard AminoAcidI
B = [AminoAcidI
D, AminoAcidI
N]
toStandard AminoAcidI
Z = [AminoAcidI
E, AminoAcidI
Q]
toStandard AminoAcidI
X = [AminoAcidI
A, AminoAcidI
C, AminoAcidI
D, AminoAcidI
E, AminoAcidI
F, AminoAcidI
G, AminoAcidI
H, AminoAcidI
I, AminoAcidI
K, AminoAcidI
L, AminoAcidI
M, AminoAcidI
N, AminoAcidI
P, AminoAcidI
Q, AminoAcidI
R, AminoAcidI
S, AminoAcidI
T, AminoAcidI
V, AminoAcidI
W, AminoAcidI
Y]
toStandard AminoAcidI
Stop = []
toStandard AminoAcidI
Gap = []

instance C.CharacterI AminoAcidI where
  unknown :: AminoAcidI
unknown = AminoAcidI
X
  iupac :: [AminoAcidI]
iupac = [AminoAcidI
J, AminoAcidI
B, AminoAcidI
Z, AminoAcidI
X]
  toStandard :: AminoAcidI -> [AminoAcidI]
toStandard = AminoAcidI -> [AminoAcidI]
toStandard