module Bio.Core.Sequence (
Qual (..), Offset (..),
SeqData (..), SeqLabel (..), QualData (..),
BioSeq (..), BioSeqQual (..),
toFasta, toFastaQual, toFastQ,
module Data.Stringable
) where
import qualified Data.ByteString.Lazy.Char8 as LC
import qualified Data.ByteString.Lazy as L
import Data.Int
import Data.Typeable (Typeable)
import Data.Word
import Data.String
import Data.Stringable hiding (length)
import Data.Monoid
newtype SeqData = SeqData { unSD :: LC.ByteString }
deriving (Eq,Ord,IsString,Show,Typeable,Stringable)
instance Monoid SeqData where
mempty = SeqData mempty
mappend (SeqData s1) (SeqData s2) = SeqData (mappend s1 s2)
mconcat = SeqData . mconcat . map unSD
newtype SeqLabel = SeqLabel { unSL :: LC.ByteString }
deriving (Eq,Ord,IsString,Show,Typeable,Stringable)
instance Monoid SeqLabel where
mempty = SeqLabel mempty
mappend (SeqLabel s1) (SeqLabel s2) = let
(i1:r1) = LC.words s1
(i2:r2) = LC.words s2
sid = mconcat [i1,(LC.pack ":"),i2]
in SeqLabel (LC.unwords ([sid]++r1++[LC.pack ":"]++r2))
newtype Qual = Qual { unQual :: Word8 }
deriving (Show,Eq,Ord,Num,Enum,Real,Integral,Typeable)
newtype QualData = QualData { unQD :: L.ByteString }
deriving (Eq,Ord,Show,Typeable,Stringable)
instance Monoid QualData where
mempty = QualData mempty
mappend (QualData s1) (QualData s2) = QualData (mappend s1 s2)
mconcat = QualData . mconcat . map unQD
newtype Offset = Offset { unOff :: Int64 }
deriving (Show,Eq,Ord,Num,Enum,Real,Integral,Typeable)
class BioSeq s where
seqid :: s -> SeqLabel
seqid = seqlabel
seqheader :: s -> SeqLabel
seqheader = seqlabel
seqdata :: s -> SeqData
seqlength :: s -> Offset
seqlabel :: s -> SeqLabel
seqlabel = seqid
toFasta :: BioSeq s => s -> LC.ByteString
toFasta s = LC.concat (gt:unSL (seqheader s):nl:wrap (unSD $ seqdata s))
where wrap x = if LC.null x then [] else let (ln,rest) = LC.splitAt 60 x in ln : nl : wrap rest
nl = LC.pack "\n"
gt = LC.pack ">"
class BioSeq sq => BioSeqQual sq where
seqqual :: sq -> QualData
toFastaQual :: BioSeqQual s => s -> LC.ByteString
toFastaQual s = LC.concat (gt:unSL (seqheader s):nl:wrap (L.unpack $ unQD $ seqqual s))
where wrap x = if null x then [] else let (ln,rest) = splitAt 20 x in LC.pack (unwords $ map show ln) : nl : wrap rest
nl = LC.pack "\n"
gt = LC.pack ">"
toFastQ :: BioSeqQual s => s -> LC.ByteString
toFastQ s = LC.unlines [LC.cons '@' (unSL $ seqid s)
, unSD (seqdata s)
, LC.cons '+' (unSL $ seqid s)
, L.map (+33) (unQD $ seqqual s)]