module Biobase.Primary.IUPAC where
import Control.Arrow ((***))
import Data.ByteString.Char8 (ByteString,unpack)
import Data.Char (toUpper)
import Data.FileEmbed (makeRelativeToProject, embedFile)
import Data.List (nub,sort)
import Data.String
import Data.Tuple (swap)
import qualified Data.Vector.Unboxed as VU
import Control.Category ((>>>))
import Biobase.Types.BioSequence
import Biobase.Primary.Letter
import Biobase.Primary.Nuc
import qualified Biobase.Primary.Nuc.RNA as R
data DEG
pattern $bA :: Letter DEG n
$mA :: forall r k (n :: k).
Letter DEG n -> (Void# -> r) -> (Void# -> r) -> r
A = Letter 0 :: Letter DEG n
pattern $bC :: Letter DEG n
$mC :: forall r k (n :: k).
Letter DEG n -> (Void# -> r) -> (Void# -> r) -> r
C = Letter 1 :: Letter DEG n
pattern $bG :: Letter DEG n
$mG :: forall r k (n :: k).
Letter DEG n -> (Void# -> r) -> (Void# -> r) -> r
G = Letter 2 :: Letter DEG n
pattern $bT :: Letter DEG n
$mT :: forall r k (n :: k).
Letter DEG n -> (Void# -> r) -> (Void# -> r) -> r
T = Letter 3 :: Letter DEG n
pattern $bU :: Letter DEG n
$mU :: forall r k (n :: k).
Letter DEG n -> (Void# -> r) -> (Void# -> r) -> r
U = Letter 4 :: Letter DEG n
pattern $bW :: Letter DEG n
$mW :: forall r k (n :: k).
Letter DEG n -> (Void# -> r) -> (Void# -> r) -> r
W = Letter 5 :: Letter DEG n
pattern $bS :: Letter DEG n
$mS :: forall r k (n :: k).
Letter DEG n -> (Void# -> r) -> (Void# -> r) -> r
S = Letter 6 :: Letter DEG n
pattern $bM :: Letter DEG n
$mM :: forall r k (n :: k).
Letter DEG n -> (Void# -> r) -> (Void# -> r) -> r
M = Letter 7 :: Letter DEG n
pattern $bK :: Letter DEG n
$mK :: forall r k (n :: k).
Letter DEG n -> (Void# -> r) -> (Void# -> r) -> r
K = Letter 8 :: Letter DEG n
pattern $bR :: Letter DEG n
$mR :: forall r k (n :: k).
Letter DEG n -> (Void# -> r) -> (Void# -> r) -> r
R = Letter 9 :: Letter DEG n
pattern $bY :: Letter DEG n
$mY :: forall r k (n :: k).
Letter DEG n -> (Void# -> r) -> (Void# -> r) -> r
Y = Letter 10 :: Letter DEG n
pattern $bB :: Letter DEG n
$mB :: forall r k (n :: k).
Letter DEG n -> (Void# -> r) -> (Void# -> r) -> r
B = Letter 11 :: Letter DEG n
pattern $bD :: Letter DEG n
$mD :: forall r k (n :: k).
Letter DEG n -> (Void# -> r) -> (Void# -> r) -> r
D = Letter 12 :: Letter DEG n
pattern $bH :: Letter DEG n
$mH :: forall r k (n :: k).
Letter DEG n -> (Void# -> r) -> (Void# -> r) -> r
H = Letter 13 :: Letter DEG n
pattern $bV :: Letter DEG n
$mV :: forall r k (n :: k).
Letter DEG n -> (Void# -> r) -> (Void# -> r) -> r
V = Letter 14 :: Letter DEG n
pattern $bN :: Letter DEG n
$mN :: forall r k (n :: k).
Letter DEG n -> (Void# -> r) -> (Void# -> r) -> r
N = Letter 15 :: Letter DEG n
instance Bounded (Letter DEG n) where
minBound :: Letter DEG n
minBound = Letter DEG n
forall k (n :: k). Letter DEG n
A
maxBound :: Letter DEG n
maxBound = Letter DEG n
forall k (n :: k). Letter DEG n
N
instance Enum (Letter DEG n) where
succ :: Letter DEG n -> Letter DEG n
succ Letter DEG n
N = [Char] -> Letter DEG n
forall a. HasCallStack => [Char] -> a
error [Char]
"succ/N:DEG"
succ (Letter Int
x) = Int -> Letter DEG n
forall k seqTy (nameTy :: k). Int -> Letter seqTy nameTy
Letter (Int -> Letter DEG n) -> Int -> Letter DEG n
forall a b. (a -> b) -> a -> b
$ Int
xInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1
pred :: Letter DEG n -> Letter DEG n
pred Letter DEG n
A = [Char] -> Letter DEG n
forall a. HasCallStack => [Char] -> a
error [Char]
"pred/A:DEG"
pred (Letter Int
x) = Int -> Letter DEG n
forall k seqTy (nameTy :: k). Int -> Letter seqTy nameTy
Letter (Int -> Letter DEG n) -> Int -> Letter DEG n
forall a b. (a -> b) -> a -> b
$ Int
xInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1
toEnum :: Int -> Letter DEG n
toEnum Int
k | Int
kInt -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>=Int
0 Bool -> Bool -> Bool
&& Int
kInt -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<=Int
15 = Int -> Letter DEG n
forall k seqTy (nameTy :: k). Int -> Letter seqTy nameTy
Letter Int
k
toEnum Int
k = [Char] -> Letter DEG n
forall a. HasCallStack => [Char] -> a
error ([Char] -> Letter DEG n) -> [Char] -> Letter DEG n
forall a b. (a -> b) -> a -> b
$ [Char]
"toEnum/Letter DEG " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
k
fromEnum :: Letter DEG n -> Int
fromEnum (Letter Int
k) = Int
k
charDEG :: Char -> Letter DEG n
charDEG = Char -> Char
toUpper (Char -> Char) -> (Char -> Letter DEG n) -> Char -> Letter DEG n
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> \case
Char
'A' -> Letter DEG n
forall k (n :: k). Letter DEG n
A
Char
'C' -> Letter DEG n
forall k (n :: k). Letter DEG n
C
Char
'G' -> Letter DEG n
forall k (n :: k). Letter DEG n
G
Char
'T' -> Letter DEG n
forall k (n :: k). Letter DEG n
T
Char
'U' -> Letter DEG n
forall k (n :: k). Letter DEG n
U
Char
'W' -> Letter DEG n
forall k (n :: k). Letter DEG n
W
Char
'S' -> Letter DEG n
forall k (n :: k). Letter DEG n
S
Char
'M' -> Letter DEG n
forall k (n :: k). Letter DEG n
M
Char
'K' -> Letter DEG n
forall k (n :: k). Letter DEG n
K
Char
'R' -> Letter DEG n
forall k (n :: k). Letter DEG n
R
Char
'Y' -> Letter DEG n
forall k (n :: k). Letter DEG n
Y
Char
'B' -> Letter DEG n
forall k (n :: k). Letter DEG n
B
Char
'D' -> Letter DEG n
forall k (n :: k). Letter DEG n
D
Char
'H' -> Letter DEG n
forall k (n :: k). Letter DEG n
H
Char
'V' -> Letter DEG n
forall k (n :: k). Letter DEG n
V
Char
_ -> Letter DEG n
forall k (n :: k). Letter DEG n
N
{-# INLINE charDEG #-}
degChar :: Letter DEG n -> Char
degChar = \case
Letter DEG n
A -> Char
'A'
Letter DEG n
C -> Char
'C'
Letter DEG n
G -> Char
'G'
Letter DEG n
T -> Char
'T'
Letter DEG n
U -> Char
'U'
Letter DEG n
W -> Char
'W'
Letter DEG n
S -> Char
'S'
Letter DEG n
M -> Char
'M'
Letter DEG n
K -> Char
'K'
Letter DEG n
R -> Char
'R'
Letter DEG n
Y -> Char
'Y'
Letter DEG n
B -> Char
'B'
Letter DEG n
D -> Char
'D'
Letter DEG n
H -> Char
'H'
Letter DEG n
V -> Char
'V'
Letter DEG n
N -> Char
'N'
{-# INLINE degChar #-}
instance Show (Letter DEG n) where
show :: Letter DEG n -> [Char]
show Letter DEG n
c = [Letter DEG n -> Char
forall k (n :: k). Letter DEG n -> Char
degChar Letter DEG n
c]
degSeq :: MkPrimary p DEG n => p -> Primary DEG n
degSeq :: p -> Primary DEG n
degSeq = p -> Primary DEG n
forall k c t (n :: k). MkPrimary c t n => c -> Primary t n
primary
instance MkPrimary (VU.Vector Char) DEG n where
primary :: Vector Char -> Primary DEG n
primary = (Char -> Letter DEG n) -> Vector Char -> Primary DEG n
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map Char -> Letter DEG n
forall k (n :: k). Char -> Letter DEG n
charDEG
instance IsString [Letter DEG n] where
fromString :: [Char] -> [Letter DEG n]
fromString = (Char -> Letter DEG n) -> [Char] -> [Letter DEG n]
forall a b. (a -> b) -> [a] -> [b]
map Char -> Letter DEG n
forall k (n :: k). Char -> Letter DEG n
charDEG
class Degenerate x where
fromDegenerate :: Char -> [x]
toDegenerate :: [x] -> Maybe Char
instance Degenerate Char where
fromDegenerate :: Char -> [Char]
fromDegenerate = [Char] -> ([Char] -> [Char]) -> Maybe [Char] -> [Char]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [] [Char] -> [Char]
forall a. a -> a
id (Maybe [Char] -> [Char])
-> (Char -> Maybe [Char]) -> Char -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> [(Char, [Char])] -> Maybe [Char])
-> [(Char, [Char])] -> Char -> Maybe [Char]
forall a b c. (a -> b -> c) -> b -> a -> c
flip Char -> [(Char, [Char])] -> Maybe [Char]
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup [(Char, [Char])]
iupacXDNAchars
toDegenerate :: [Char] -> Maybe Char
toDegenerate = ([Char] -> [([Char], Char)] -> Maybe Char)
-> [([Char], Char)] -> [Char] -> Maybe Char
forall a b c. (a -> b -> c) -> b -> a -> c
flip [Char] -> [([Char], Char)] -> Maybe Char
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup (((Char, [Char]) -> ([Char], Char))
-> [(Char, [Char])] -> [([Char], Char)]
forall a b. (a -> b) -> [a] -> [b]
map (Char, [Char]) -> ([Char], Char)
forall a b. (a, b) -> (b, a)
swap [(Char, [Char])]
iupacXDNAchars) ([Char] -> Maybe Char)
-> ([Char] -> [Char]) -> [Char] -> Maybe Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> [Char]
forall a. Eq a => [a] -> [a]
nub ([Char] -> [Char]) -> ([Char] -> [Char]) -> [Char] -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> [Char]
forall a. Ord a => [a] -> [a]
sort
instance Degenerate (Letter RNA n) where
fromDegenerate :: Char -> [Letter RNA n]
fromDegenerate Char
'T' = []
fromDegenerate Char
x = (Letter DNA Any -> Letter RNA n)
-> [Letter DNA Any] -> [Letter RNA n]
forall a b. (a -> b) -> [a] -> [b]
map Letter DNA Any -> Letter RNA n
forall k1 k2 (n1 :: k1) (n2 :: k2). Letter DNA n1 -> Letter RNA n2
dnaTrna ([Letter DNA Any] -> [Letter RNA n])
-> [Letter DNA Any] -> [Letter RNA n]
forall a b. (a -> b) -> a -> b
$ Char -> [Letter DNA Any]
forall x. Degenerate x => Char -> [x]
fromDegenerate Char
x
toDegenerate :: [Letter RNA n] -> Maybe Char
toDegenerate [Letter RNA n]
xs | [Letter RNA n]
xs [Letter RNA n] -> [Letter RNA n] -> Bool
forall a. Eq a => a -> a -> Bool
== [Letter RNA n
forall k (n :: k). Letter RNA n
R.U] = Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'U'
| Bool
otherwise = [Letter DNA Any] -> Maybe Char
forall x. Degenerate x => [x] -> Maybe Char
toDegenerate ([Letter DNA Any] -> Maybe Char) -> [Letter DNA Any] -> Maybe Char
forall a b. (a -> b) -> a -> b
$ (Letter RNA n -> Letter DNA Any)
-> [Letter RNA n] -> [Letter DNA Any]
forall a b. (a -> b) -> [a] -> [b]
map Letter RNA n -> Letter DNA Any
forall k1 k2 (n1 :: k1) (n2 :: k2). Letter RNA n1 -> Letter DNA n2
rnaTdna [Letter RNA n]
xs
instance Degenerate (Letter DNA n) where
fromDegenerate :: Char -> [Letter DNA n]
fromDegenerate Char
'U' = []
fromDegenerate Char
x = (Char -> Letter DNA n) -> [Char] -> [Letter DNA n]
forall a b. (a -> b) -> [a] -> [b]
map Char -> Letter DNA n
forall k (n :: k). Char -> Letter DNA n
charDNA ([Char] -> [Letter DNA n]) -> [Char] -> [Letter DNA n]
forall a b. (a -> b) -> a -> b
$ Char -> [Char]
forall x. Degenerate x => Char -> [x]
fromDegenerate Char
x
toDegenerate :: [Letter DNA n] -> Maybe Char
toDegenerate = [Char] -> Maybe Char
forall x. Degenerate x => [x] -> Maybe Char
toDegenerate ([Char] -> Maybe Char)
-> ([Letter DNA n] -> [Char]) -> [Letter DNA n] -> Maybe Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Letter DNA n -> Char) -> [Letter DNA n] -> [Char]
forall a b. (a -> b) -> [a] -> [b]
map Letter DNA n -> Char
forall k (n :: k). Letter DNA n -> Char
dnaChar
instance Degenerate (Letter XNA n) where
fromDegenerate :: Char -> [Letter XNA n]
fromDegenerate = (Char -> Letter XNA n) -> [Char] -> [Letter XNA n]
forall a b. (a -> b) -> [a] -> [b]
map Char -> Letter XNA n
forall k (n :: k). Char -> Letter XNA n
charXNA ([Char] -> [Letter XNA n])
-> (Char -> [Char]) -> Char -> [Letter XNA n]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> [Char]
forall x. Degenerate x => Char -> [x]
fromDegenerate
toDegenerate :: [Letter XNA n] -> Maybe Char
toDegenerate = [Char] -> Maybe Char
forall x. Degenerate x => [x] -> Maybe Char
toDegenerate ([Char] -> Maybe Char)
-> ([Letter XNA n] -> [Char]) -> [Letter XNA n] -> Maybe Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Letter XNA n -> Char) -> [Letter XNA n] -> [Char]
forall a b. (a -> b) -> [a] -> [b]
map Letter XNA n -> Char
forall k (n :: k). Letter XNA n -> Char
xnaChar
iupacXDNAchars :: [(Char,String)]
iupacXDNAchars :: [(Char, [Char])]
iupacXDNAchars = ([Char] -> (Char, [Char])) -> [[Char]] -> [(Char, [Char])]
forall a b. (a -> b) -> [a] -> [b]
map ([[Char]] -> (Char, [Char])
forall a. [[a]] -> (a, [a])
go ([[Char]] -> (Char, [Char]))
-> ([Char] -> [[Char]]) -> [Char] -> (Char, [Char])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> [[Char]]
words) ([[Char]] -> [(Char, [Char])])
-> (ByteString -> [[Char]]) -> ByteString -> [(Char, [Char])]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> [[Char]]
lines ([Char] -> [[Char]])
-> (ByteString -> [Char]) -> ByteString -> [[Char]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [Char]
unpack (ByteString -> [(Char, [Char])]) -> ByteString -> [(Char, [Char])]
forall a b. (a -> b) -> a -> b
$ ByteString
iupacNucleotides where
go :: [[a]] -> (a, [a])
go [[a
c],[a]
cs] = (a
c,[a]
cs)
{-# NOINLINE iupacXDNAchars #-}
iupacNucleotides :: ByteString
iupacNucleotides :: ByteString
iupacNucleotides = $(makeRelativeToProject "sources/iupac-nucleotides" >>= embedFile)