module Biobase.Types.BioSequence where
import Control.DeepSeq
import Control.Lens
import Data.ByteString.Char8 (ByteString)
import Data.Char (ord,chr,toUpper)
import Data.Data (Data)
import Data.Typeable (Typeable)
import Data.Void
import GHC.Exts (IsString(..))
import GHC.Generics (Generic)
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.UTF8 as BSU
import qualified Streaming.Prelude as SP
import qualified Streaming as S
import qualified Test.QuickCheck as TQ
import Test.QuickCheck (Arbitrary(..))
import Biobase.Types.Location
import Biobase.Types.Strand
import qualified Biobase.Types.Index as BTI
newtype SequenceIdentifier (which ∷ k) = SequenceIdentifier { _sequenceIdentifier ∷ ByteString }
deriving (Data, Typeable, Generic, Eq, Ord, Read, Show)
makeWrapped ''SequenceIdentifier
makePrisms ''SequenceIdentifier
instance NFData (SequenceIdentifier w)
instance IsString (SequenceIdentifier w) where
fromString = SequenceIdentifier . BSU.fromString
data RNA
data DNA
data XNA
data AA
newtype BioSequence (which ∷ k) = BioSequence {_bioSequence ∷ ByteString}
deriving (Data, Typeable, Generic, Eq, Ord, Read, Show, Semigroup)
makeWrapped ''BioSequence
makePrisms ''BioSequence
instance NFData (BioSequence w)
type instance Index (BioSequence w) = Int
type instance IxValue (BioSequence w) = Char
instance Ixed (BioSequence w) where
ix k = _BioSequence . ix k . iso (chr . fromIntegral) (fromIntegral . ord)
{-# Inline ix #-}
deriving instance Reversing (BioSequence w)
instance IsString (BioSequence Void) where
fromString = BioSequence . BS.pack
mkRNAseq ∷ ByteString → BioSequence RNA
mkRNAseq = BioSequence . BS.map go . BS.map toUpper
where go x | x `elem` acgu = x
| otherwise = 'N'
acgu ∷ String
acgu = "ACGU"
instance IsString (BioSequence RNA) where
fromString = mkRNAseq . BS.pack
instance Arbitrary (BioSequence RNA) where
arbitrary = do
k ← TQ.choose (0,100)
xs ← TQ.vectorOf k $ TQ.elements "ACGU"
return . BioSequence $ BS.pack xs
shrink = view (to shrink)
mkDNAseq ∷ ByteString → (BioSequence DNA)
mkDNAseq = BioSequence . BS.map go . BS.map toUpper
where go x | x `elem` acgt = x
| otherwise = 'N'
acgt ∷ String
acgt = "ACGT"
instance IsString (BioSequence DNA) where
fromString = mkDNAseq . BS.pack
instance Arbitrary (BioSequence DNA) where
arbitrary = do
k ← TQ.choose (0,100)
xs ← TQ.vectorOf k $ TQ.elements "ACGT"
return . BioSequence $ BS.pack xs
shrink = view (to shrink)
mkXNAseq ∷ ByteString → (BioSequence XNA)
mkXNAseq = BioSequence . BS.map go . BS.map toUpper
where go x | x `elem` acgtu = x
| otherwise = 'N'
acgtu ∷ String
acgtu = "ACGTU"
instance IsString (BioSequence XNA) where
fromString = mkXNAseq . BS.pack
instance Arbitrary (BioSequence XNA) where
arbitrary = do
k ← TQ.choose (0,100)
xs ← TQ.vectorOf k $ TQ.elements "ACGTU"
return . BioSequence $ BS.pack xs
shrink = view (to shrink)
mkAAseq ∷ ByteString → (BioSequence AA)
mkAAseq = BioSequence . BS.map go . BS.map toUpper
where go x | x `elem` aas = x
| otherwise = 'X'
aas ∷ String
aas = "ARNDCEQGHILKMFPSTWYVUO"
instance IsString (BioSequence AA) where
fromString = mkAAseq . BS.pack
instance Arbitrary (BioSequence AA) where
arbitrary = do
k ← TQ.choose (0,100)
xs ← TQ.vectorOf k $ TQ.elements "ARNDCEQGHILKMFPSTWYVUO"
return . BioSequence $ BS.pack xs
shrink = view (to shrink)
data BioSequenceWindow w ty loc = BioSequenceWindow
{ _bswIdentifier ∷ !(SequenceIdentifier w)
, _bswPrefix ∷ !(BioSequence ty)
, _bswSequence ∷ !(BioSequence ty)
, _bswSuffix ∷ !(BioSequence ty)
, _bswLocation ∷ !loc
}
deriving (Data, Typeable, Generic, Eq, Ord, Read, Show)
makeLenses ''BioSequenceWindow
instance (Reversing loc) ⇒ Reversing (BioSequenceWindow w ty loc) where
{-# Inlinable reversing #-}
reversing bsw = bsw
& bswPrefix .~ (bsw^.bswSuffix.reversed)
& bswSuffix .~ (bsw^.bswPrefix.reversed)
& bswSequence .~ (bsw^.bswSequence.reversed)
& bswLocation .~ (bsw^.bswLocation.reversed)
bswFullSequence ∷ Lens' (BioSequenceWindow w ty k) (BioSequence ty)
{-# Inlinable bswFullSequence #-}
bswFullSequence = lens f t
where f bsw = bsw^.bswPrefix <> bsw^.bswSequence <> bsw^.bswSuffix
t bsw (BioSequence s) =
let (pfx,ifxsfx) = BS.splitAt (bsw^.bswPrefix._BioSequence.to BS.length) s
(ifx,sfx) = BS.splitAt (bsw^.bswSequence._BioSequence.to BS.length) ifxsfx
in bsw & bswPrefix._BioSequence .~ pfx
& bswSequence._BioSequence .~ ifx
& bswSuffix._BioSequence .~ sfx
attachPrefixes ∷ (Monad m) ⇒ SP.Stream (SP.Of (BioSequenceWindow w ty k)) m r → SP.Stream (SP.Of (BioSequenceWindow w ty k)) m r
{-# Inlinable attachPrefixes #-}
attachPrefixes =
let go (Left pfx) w = Right (set bswPrefix pfx w)
go (Right p) w = Right (set bswPrefix (view bswSequence p) w)
in SP.map (\(Right w) → w) . SP.drop 1 . SP.scan go (Left $ BioSequence "") id
attachSuffixes ∷ (Monad m) ⇒ SP.Stream (SP.Of (BioSequenceWindow w ty k)) m r → SP.Stream (SP.Of (BioSequenceWindow w ty k)) m r
{-# Inlinable attachSuffixes #-}
attachSuffixes xs = undefined
rna2dna ∷ Char → Char
rna2dna = \case
'U' → 'T'
'u' → 't'
x → x
{-# Inline rna2dna #-}
rnaComplement ∷ Char → Char
rnaComplement = \case
'A' → 'U'
'a' → 'u'
'C' → 'G'
'c' → 'g'
'G' → 'C'
'g' → 'c'
'U' → 'A'
'u' → 'a'
x → x
{-# Inline rnaComplement #-}
dna2rna ∷ Char → Char
dna2rna = \case
'T' → 'U'
't' → 'u'
x → x
{-# Inline dna2rna #-}
dnaComplement ∷ Char → Char
dnaComplement = \case
'A' → 'T'
'a' → 't'
'C' → 'G'
'c' → 'g'
'G' → 'C'
'g' → 'c'
'T' → 'A'
't' → 'a'
x → x
{-# Inline dnaComplement #-}
class Transcribe f where
type TranscribeTo f ∷ *
transcribe ∷ Iso' f (TranscribeTo f)
instance Transcribe (BioSequence DNA) where
type TranscribeTo (BioSequence DNA) = (BioSequence RNA)
transcribe = iso (over _BioSequence (BS.map dna2rna)) (over _BioSequence (BS.map rna2dna))
{-# Inline transcribe #-}
instance Transcribe (BioSequence RNA) where
type TranscribeTo (BioSequence RNA) = (BioSequence DNA)
transcribe = from transcribe
{-# Inline transcribe #-}
class Complement f where
complement ∷ Iso' f f
instance Complement (BioSequence DNA) where
{-# Inline complement #-}
complement = let f = (over _BioSequence (BS.map dnaComplement))
{-# Inline f #-}
in iso f f
instance Complement (BioSequence RNA) where
{-# Inline complement #-}
complement = let f = (over _BioSequence (BS.map rnaComplement))
{-# Inline f #-}
in iso f f
instance (Complement (BioSequence ty)) ⇒ Complement (BioSequenceWindow w ty k) where
{-# Inline complement #-}
complement = let g = view complement
f = (\w → over bswSuffix g . over bswPrefix g . over bswSequence g $ w)
{-# Inline g #-}
{-# Inline f #-}
in iso f f
reverseComplement ∷ (Complement f, Reversing f) ⇒ Iso' f f
{-# Inline reverseComplement #-}
reverseComplement = reversed . complement