module Biobase.Primary.IUPAC where
import Control.Arrow ((***))
import Data.ByteString.Char8 (ByteString,unpack)
import Data.Char (toUpper)
import Data.FileEmbed (embedFile)
import Data.List (nub,sort)
import Data.String
import Data.Tuple (swap)
import qualified Data.Vector.Unboxed as VU
import Control.Category ((>>>))
import Biobase.Types.BioSequence
import Biobase.Primary.Letter
import Biobase.Primary.Nuc
import qualified Biobase.Primary.Nuc.RNA as R
data DEG
pattern A = Letter 0 :: Letter DEG n
pattern C = Letter 1 :: Letter DEG n
pattern G = Letter 2 :: Letter DEG n
pattern T = Letter 3 :: Letter DEG n
pattern U = Letter 4 :: Letter DEG n
pattern W = Letter 5 :: Letter DEG n
pattern S = Letter 6 :: Letter DEG n
pattern M = Letter 7 :: Letter DEG n
pattern K = Letter 8 :: Letter DEG n
pattern R = Letter 9 :: Letter DEG n
pattern Y = Letter 10 :: Letter DEG n
pattern B = Letter 11 :: Letter DEG n
pattern D = Letter 12 :: Letter DEG n
pattern H = Letter 13 :: Letter DEG n
pattern V = Letter 14 :: Letter DEG n
pattern N = Letter 15 :: Letter DEG n
instance Bounded (Letter DEG n) where
minBound = A
maxBound = N
instance Enum (Letter DEG n) where
succ N = error "succ/N:DEG"
succ (Letter x) = Letter $ x+1
pred A = error "pred/A:DEG"
pred (Letter x) = Letter $ x-1
toEnum k | k>=0 && k<=15 = Letter k
toEnum k = error $ "toEnum/Letter DEG " ++ show k
fromEnum (Letter k) = k
charDEG = toUpper >>> \case
'A' -> A
'C' -> C
'G' -> G
'T' -> T
'U' -> U
'W' -> W
'S' -> S
'M' -> M
'K' -> K
'R' -> R
'Y' -> Y
'B' -> B
'D' -> D
'H' -> H
'V' -> V
_ -> N
{-# INLINE charDEG #-}
degChar = \case
A -> 'A'
C -> 'C'
G -> 'G'
T -> 'T'
U -> 'U'
W -> 'W'
S -> 'S'
M -> 'M'
K -> 'K'
R -> 'R'
Y -> 'Y'
B -> 'B'
D -> 'D'
H -> 'H'
V -> 'V'
N -> 'N'
{-# INLINE degChar #-}
instance Show (Letter DEG n) where
show c = [degChar c]
degSeq :: MkPrimary p DEG n => p -> Primary DEG n
degSeq = primary
instance MkPrimary (VU.Vector Char) DEG n where
primary = VU.map charDEG
instance IsString [Letter DEG n] where
fromString = map charDEG
class Degenerate x where
fromDegenerate :: Char -> [x]
toDegenerate :: [x] -> Maybe Char
instance Degenerate Char where
fromDegenerate = maybe [] id . flip lookup iupacXDNAchars
toDegenerate = flip lookup (map swap iupacXDNAchars) . nub . sort
instance Degenerate (Letter RNA n) where
fromDegenerate 'T' = []
fromDegenerate x = map dnaTrna $ fromDegenerate x
toDegenerate xs | xs == [R.U] = Just 'U'
| otherwise = toDegenerate $ map rnaTdna xs
instance Degenerate (Letter DNA n) where
fromDegenerate 'U' = []
fromDegenerate x = map charDNA $ fromDegenerate x
toDegenerate = toDegenerate . map dnaChar
instance Degenerate (Letter XNA n) where
fromDegenerate = map charXNA . fromDegenerate
toDegenerate = toDegenerate . map xnaChar
iupacXDNAchars :: [(Char,String)]
iupacXDNAchars = map (go . words) . lines . unpack $ iupacNucleotides where
go [[c],cs] = (c,cs)
{-# NOINLINE iupacXDNAchars #-}
iupacNucleotides :: ByteString
iupacNucleotides = $(embedFile "sources/iupac-nucleotides")