module Bio.Sequence.Fasta
( Sequence(..)
, readFasta, writeFasta, hReadFasta, hWriteFasta
, countSeqs
, mkSeqs
, toStr, seqid, seqheader, seqdata, seqlength
) where
import Data.Char (chr, isSpace)
import Data.List (groupBy, intersperse)
import System.IO
import qualified Data.ByteString.Lazy.Char8 as B
import qualified Data.ByteString.Lazy as BB
import Data.ByteString.Lazy.Char8 (ByteString)
import Bio.Core.Sequence
splitsAt :: Offset -> ByteString -> [ByteString]
splitsAt n s = let (s1,s2) = B.splitAt (unOff n) s
in if B.null s2 then [s1] else s1 : splitsAt n s2
data Sequence = Seq SeqLabel SeqData (Maybe QualData)
deriving (Show, Eq)
instance BioSeq Sequence where
seqid (Seq lab seq mqual) = SeqLabel {unSL = B.takeWhile (/= ' ') $ unSL lab}
seqheader (Seq lab seq mqual) = lab
seqdata (Seq lab seq mqual) = seq
seqlength (Seq lab seq mqual) = Offset {unOff = B.length $ unSD seq}
toStr :: SeqData -> String
toStr = B.unpack . unSD
readFasta :: FilePath -> IO [Sequence]
readFasta f = (mkSeqs . B.lines) `fmap` B.readFile f
writeFasta :: FilePath -> [Sequence] -> IO ()
writeFasta f ss = do
h <- openFile f WriteMode
hWriteFasta h ss
hClose h
hReadFasta :: Handle -> IO [Sequence]
hReadFasta h = (mkSeqs . B.lines) `fmap` B.hGetContents h
hWriteFasta :: Handle -> [Sequence] -> IO ()
hWriteFasta h = mapM_ (wFasta h)
wHead :: Handle -> SeqLabel -> IO ()
wHead h l = do
B.hPut h $ B.pack ">"
B.hPut h (unSL l)
B.hPut h $ B.pack "\n"
wFasta :: Handle -> Sequence -> IO ()
wFasta h (Seq l d _) = do
wHead h l
let ls = splitsAt 60 (unSD d)
mapM_ (B.hPut h) $ intersperse (B.pack "\n") ls
B.hPut h $ B.pack "\n"
mkSeqs :: [ByteString] -> [Sequence]
mkSeqs = map mkSeq . blocks
mkSeq :: [ByteString] -> Sequence
mkSeq (l:ls) = Seq (SeqLabel (B.drop 1 l))
(SeqData (B.filter (not . isSpace) $ B.concat $ takeWhile isSeq ls))
Nothing
where isSeq s = (not . B.null) s &&
(flip elem (['A'..'Z']++['a'..'z']) . B.head) s
mkSeq [] = error "empty input to mkSeq"
blocks :: [ByteString] -> [[ByteString]]
blocks = groupBy (const (('>' /=) . B.head))
. filter ((/='#') . B.head)
. dropWhile (('>' /=) . B.head)
. filter (not . B.null)
countSeqs :: FilePath -> IO Int
countSeqs f = do
ss <- B.readFile f
let hdrs = filter (('>'==).B.head) $ filter (not . B.null) $ B.lines ss
return (length hdrs)