{-# LANGUAGE OverloadedStrings #-}
module SequenceFormats.Fasta (readNextFastaEntry, loadFastaChrom) where
import SequenceFormats.Utils (Chrom (..))
import Control.Exception.Base (AssertionFailed (..),
throwIO)
import Control.Monad (void)
import Control.Monad.IO.Class (MonadIO, liftIO)
import Control.Monad.Trans.State.Strict (runStateT)
import qualified Data.Attoparsec.ByteString.Char8 as A
import qualified Data.ByteString.Char8 as B
import Data.Char (isSpace)
import Lens.Family2 (view)
import Pipes (Producer, next, runEffect,
(>->))
import Pipes.Attoparsec (parse)
import qualified Pipes.ByteString as P
import Pipes.Prelude (drain)
import System.IO (Handle, hPutStr, stderr)
loadFastaChrom :: Handle -> Chrom -> IO (Producer B.ByteString IO ())
loadFastaChrom :: Handle -> Chrom -> IO (Producer ByteString IO ())
loadFastaChrom Handle
refFileHandle Chrom
chrom = do
let prod :: Proxy x' x () ByteString IO ()
prod = Handle -> Producer' ByteString IO ()
forall (m :: * -> *).
MonadIO m =>
Handle -> Producer' ByteString m ()
P.fromHandle Handle
refFileHandle
Producer ByteString IO () -> IO (Producer ByteString IO ())
go Producer ByteString IO ()
Producer' ByteString IO ()
prod
where
go :: Producer ByteString IO () -> IO (Producer ByteString IO ())
go Producer ByteString IO ()
prod = do
(Chrom
chrom_, Producer ByteString IO (Producer ByteString IO ())
prod') <- Producer ByteString IO ()
-> IO (Chrom, Producer ByteString IO (Producer ByteString IO ()))
forall (m :: * -> *).
MonadIO m =>
Producer ByteString m ()
-> m (Chrom, Producer ByteString m (Producer ByteString m ()))
readNextFastaEntry Producer ByteString IO ()
prod
Handle -> String -> IO ()
hPutStr Handle
stderr (String
"found chromosome " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Chrom -> String
forall a. Show a => a -> String
show Chrom
chrom_)
if Chrom
chrom_ Chrom -> Chrom -> Bool
forall a. Eq a => a -> a -> Bool
== Chrom
chrom
then Producer ByteString IO () -> IO (Producer ByteString IO ())
forall (m :: * -> *) a. Monad m => a -> m a
return (Producer ByteString IO (Producer ByteString IO ())
-> Producer ByteString IO ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void Producer ByteString IO (Producer ByteString IO ())
prod')
else do
Producer ByteString IO ()
newProd <- Effect IO (Producer ByteString IO ())
-> IO (Producer ByteString IO ())
forall (m :: * -> *) r. Monad m => Effect m r -> m r
runEffect (Effect IO (Producer ByteString IO ())
-> IO (Producer ByteString IO ()))
-> Effect IO (Producer ByteString IO ())
-> IO (Producer ByteString IO ())
forall a b. (a -> b) -> a -> b
$ Producer ByteString IO (Producer ByteString IO ())
prod' Producer ByteString IO (Producer ByteString IO ())
-> Proxy () ByteString () X IO (Producer ByteString IO ())
-> Effect IO (Producer ByteString IO ())
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>-> Proxy () ByteString () X IO (Producer ByteString IO ())
forall (m :: * -> *) a r. Functor m => Consumer' a m r
drain
Producer ByteString IO () -> IO (Producer ByteString IO ())
go Producer ByteString IO ()
newProd
readNextFastaEntry :: (MonadIO m) => Producer B.ByteString m () ->
m (Chrom, Producer B.ByteString m (Producer B.ByteString m ()))
readNextFastaEntry :: Producer ByteString m ()
-> m (Chrom, Producer ByteString m (Producer ByteString m ()))
readNextFastaEntry Producer ByteString m ()
prod = do
(Maybe (Either ParsingError Chrom)
res, Producer ByteString m ()
rest) <- StateT
(Producer ByteString m ()) m (Maybe (Either ParsingError Chrom))
-> Producer ByteString m ()
-> m (Maybe (Either ParsingError Chrom), Producer ByteString m ())
forall s (m :: * -> *) a. StateT s m a -> s -> m (a, s)
runStateT (Parser ByteString Chrom
-> Parser ByteString m (Maybe (Either ParsingError Chrom))
forall (m :: * -> *) a b.
(Monad m, ParserInput a) =>
Parser a b -> Parser a m (Maybe (Either ParsingError b))
parse Parser ByteString Chrom
fastaHeaderLineParser) Producer ByteString m ()
prod
Chrom
header <- case Maybe (Either ParsingError Chrom)
res of
Maybe (Either ParsingError Chrom)
Nothing -> IO Chrom -> m Chrom
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Chrom -> m Chrom)
-> (AssertionFailed -> IO Chrom) -> AssertionFailed -> m Chrom
forall b c a. (b -> c) -> (a -> b) -> a -> c
. AssertionFailed -> IO Chrom
forall e a. Exception e => e -> IO a
throwIO (AssertionFailed -> m Chrom) -> AssertionFailed -> m Chrom
forall a b. (a -> b) -> a -> b
$ String -> AssertionFailed
AssertionFailed String
"Could not find chromosome. Fasta file exhausted."
Just (Left ParsingError
e_) -> do
Either () (ByteString, Producer ByteString m ())
x <- Producer ByteString m ()
-> m (Either () (ByteString, Producer ByteString m ()))
forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Producer ByteString m ()
rest
case Either () (ByteString, Producer ByteString m ())
x of
(Right (ByteString
chunk, Producer ByteString m ()
_)) -> do
let msg :: String
msg = ParsingError -> String
forall a. Show a => a -> String
show ParsingError
e_ String -> String -> String
forall a. [a] -> [a] -> [a]
++ ByteString -> String
B.unpack ByteString
chunk
IO Chrom -> m Chrom
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Chrom -> m Chrom)
-> (AssertionFailed -> IO Chrom) -> AssertionFailed -> m Chrom
forall b c a. (b -> c) -> (a -> b) -> a -> c
. AssertionFailed -> IO Chrom
forall e a. Exception e => e -> IO a
throwIO (AssertionFailed -> m Chrom) -> AssertionFailed -> m Chrom
forall a b. (a -> b) -> a -> b
$ String -> AssertionFailed
AssertionFailed (String
"Fasta header parsing error: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
msg)
Either () (ByteString, Producer ByteString m ())
_ -> String -> m Chrom
forall a. HasCallStack => String -> a
error String
"should not happen"
Just (Right Chrom
h) -> Chrom -> m Chrom
forall (m :: * -> *) a. Monad m => a -> m a
return Chrom
h
(Chrom, Producer ByteString m (Producer ByteString m ()))
-> m (Chrom, Producer ByteString m (Producer ByteString m ()))
forall (m :: * -> *) a. Monad m => a -> m a
return (Chrom
header, FoldLike
(Producer ByteString m (Producer ByteString m ()))
(Producer ByteString m ())
(Producer ByteString m ())
(Producer ByteString m (Producer ByteString m ()))
(Producer ByteString m (Producer ByteString m ()))
-> Producer ByteString m ()
-> Producer ByteString m (Producer ByteString m ())
forall a s t b. FoldLike a s t a b -> s -> a
view ((Word8 -> Bool)
-> Lens'
(Producer ByteString m ())
(Producer ByteString m (Producer ByteString m ()))
forall (m :: * -> *) x.
Monad m =>
(Word8 -> Bool)
-> Lens'
(Producer ByteString m x)
(Producer ByteString m (Producer ByteString m x))
P.break (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
==Word8
62)) Producer ByteString m ()
rest Producer ByteString m (Producer ByteString m ())
-> Proxy () ByteString () ByteString m (Producer ByteString m ())
-> Producer ByteString m (Producer ByteString m ())
forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>-> (Word8 -> Bool)
-> Proxy () ByteString () ByteString m (Producer ByteString m ())
forall (m :: * -> *) r.
Monad m =>
(Word8 -> Bool) -> Pipe ByteString ByteString m r
P.filter (\Word8
c -> Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
10 Bool -> Bool -> Bool
&& Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
13))
fastaHeaderLineParser :: A.Parser Chrom
= do
Char
_ <- Char -> Parser Char
A.char Char
'>'
ByteString
chrom <- (Char -> Bool) -> Parser ByteString
A.takeWhile ((Char -> Bool) -> Parser ByteString)
-> (Char -> Bool) -> Parser ByteString
forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isSpace
(Char -> Bool) -> Parser ()
A.skipWhile (\Char
c -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\n' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\r')
Parser ()
A.endOfLine
Chrom -> Parser ByteString Chrom
forall (m :: * -> *) a. Monad m => a -> m a
return (Chrom -> Parser ByteString Chrom)
-> (ByteString -> Chrom) -> ByteString -> Parser ByteString Chrom
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Chrom
Chrom (ByteString -> Parser ByteString Chrom)
-> ByteString -> Parser ByteString Chrom
forall a b. (a -> b) -> a -> b
$ ByteString
chrom