-- | Encode the allowed amino acids in a better way.

module Biobase.Types.AminoAcidSequence where

import           Control.DeepSeq
import           Control.Lens
import           Data.ByteString (ByteString)
import           Data.Char (ord,chr,toUpper)
import           Data.Data (Data)
import           Data.Typeable (Typeable)
import           GHC.Exts (IsString(..))
import           GHC.Generics (Generic)
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.UTF8 as BSU
import           Test.QuickCheck (Arbitrary(..))
import qualified Test.QuickCheck as TQ



-- | A short amino acid suquence.
--
-- It is an instance of 'Ixed' to allow @RNAseq (BS.pack "cag") ^? ix 2 == Just 'g'@.

newtype AAseq = AAseq { _aaseq  ByteString }
  deriving (Data, Typeable, Generic, Eq, Ord, Read, Show)
makeLenses ''AAseq

instance NFData AAseq

type instance Index AAseq = Int

type instance IxValue AAseq = Char

instance Ixed AAseq where
  ix k = aaseq . ix k . iso (chr . fromIntegral) (fromIntegral . ord)
  {-# Inline ix #-}

deriving instance Reversing AAseq

mkAAseq  ByteString  AAseq
mkAAseq = AAseq . BS.map go . BS.map toUpper
  where go x | x `elem` aas = x
             | otherwise    = 'X'
        aas  String
        aas = "ARNDCEQGHILKMFPSTWYVUO"

instance IsString AAseq where
  fromString = mkAAseq . BS.pack

instance Arbitrary AAseq where
  arbitrary = do
    k  TQ.choose (0,100)
    xs  TQ.vectorOf k $ TQ.elements "ARNDCEQGHILKMFPSTWYVUO"
    return . AAseq $ BS.pack xs
  shrink = view (to shrink)