{-# LANGUAGE OverloadedStrings #-}
module SequenceFormats.Utils (liftParsingErrors,
consumeProducer, readFileProd,
SeqFormatException(..),
Chrom(..), word) where
import Control.Error (readErr)
import Control.Exception (Exception, throw)
import Control.Monad.Catch (MonadThrow, throwM)
import Control.Monad.Trans.Class (lift)
import qualified Data.Attoparsec.ByteString.Char8 as A
import qualified Data.ByteString.Char8 as B
import Data.Char (isSpace)
import Pipes (Producer, next)
import Pipes.Attoparsec (ParsingError(..), parsed)
import qualified Pipes.ByteString as PB
import qualified Pipes.Safe as PS
import qualified Pipes.Safe.Prelude as PS
import System.IO (IOMode(..))
data SeqFormatException = SeqFormatException String
deriving (Int -> SeqFormatException -> ShowS
[SeqFormatException] -> ShowS
SeqFormatException -> String
(Int -> SeqFormatException -> ShowS)
-> (SeqFormatException -> String)
-> ([SeqFormatException] -> ShowS)
-> Show SeqFormatException
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [SeqFormatException] -> ShowS
$cshowList :: [SeqFormatException] -> ShowS
show :: SeqFormatException -> String
$cshow :: SeqFormatException -> String
showsPrec :: Int -> SeqFormatException -> ShowS
$cshowsPrec :: Int -> SeqFormatException -> ShowS
Show, SeqFormatException -> SeqFormatException -> Bool
(SeqFormatException -> SeqFormatException -> Bool)
-> (SeqFormatException -> SeqFormatException -> Bool)
-> Eq SeqFormatException
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: SeqFormatException -> SeqFormatException -> Bool
$c/= :: SeqFormatException -> SeqFormatException -> Bool
== :: SeqFormatException -> SeqFormatException -> Bool
$c== :: SeqFormatException -> SeqFormatException -> Bool
Eq)
instance Exception SeqFormatException
newtype Chrom = Chrom {Chrom -> ByteString
unChrom :: B.ByteString} deriving (Chrom -> Chrom -> Bool
(Chrom -> Chrom -> Bool) -> (Chrom -> Chrom -> Bool) -> Eq Chrom
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Chrom -> Chrom -> Bool
$c/= :: Chrom -> Chrom -> Bool
== :: Chrom -> Chrom -> Bool
$c== :: Chrom -> Chrom -> Bool
Eq)
instance Show Chrom where
show :: Chrom -> String
show (Chrom ByteString
c) = ByteString -> String
B.unpack ByteString
c
instance Ord Chrom where
compare :: Chrom -> Chrom -> Ordering
compare (Chrom ByteString
c1) (Chrom ByteString
c2) =
let [ByteString
c1NoChr, ByteString
c2NoChr] = (ByteString -> ByteString) -> [ByteString] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map ByteString -> ByteString
removeChr [ByteString
c1, ByteString
c2]
[ByteString
c1XYMTconvert, ByteString
c2XYMTconvert] = (ByteString -> ByteString) -> [ByteString] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map ByteString -> ByteString
convertXYMT [ByteString
c1NoChr, ByteString
c2NoChr]
in case (,) (Int -> Int -> (Int, Int))
-> Either SeqFormatException Int
-> Either SeqFormatException (Int -> (Int, Int))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ByteString -> Either SeqFormatException Int
readChrom ByteString
c1XYMTconvert Either SeqFormatException (Int -> (Int, Int))
-> Either SeqFormatException Int
-> Either SeqFormatException (Int, Int)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ByteString -> Either SeqFormatException Int
readChrom ByteString
c2XYMTconvert of
Left SeqFormatException
e -> SeqFormatException -> Ordering
forall a e. Exception e => e -> a
throw SeqFormatException
e
Right (Int
cn1, Int
cn2) -> Int
cn1 Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
`compare` Int
cn2
where
removeChr :: B.ByteString -> B.ByteString
removeChr :: ByteString -> ByteString
removeChr ByteString
c = if Int -> ByteString -> ByteString
B.take Int
3 ByteString
c ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
"chr" then Int -> ByteString -> ByteString
B.drop Int
3 ByteString
c else ByteString
c
convertXYMT :: B.ByteString -> B.ByteString
convertXYMT :: ByteString -> ByteString
convertXYMT ByteString
c = case ByteString
c of
ByteString
"X" -> ByteString
"23"
ByteString
"Y" -> ByteString
"24"
ByteString
"MT" -> ByteString
"90"
ByteString
n -> ByteString
n
readChrom :: B.ByteString -> Either SeqFormatException Int
readChrom :: ByteString -> Either SeqFormatException Int
readChrom ByteString
c = SeqFormatException -> String -> Either SeqFormatException Int
forall a e. Read a => e -> String -> Either e a
readErr (String -> SeqFormatException
SeqFormatException (String -> SeqFormatException) -> String -> SeqFormatException
forall a b. (a -> b) -> a -> b
$ String
"cannot parse chromosome " String -> ShowS
forall a. [a] -> [a] -> [a]
++ ByteString -> String
B.unpack ByteString
c) (String -> Either SeqFormatException Int)
-> (ByteString -> String)
-> ByteString
-> Either SeqFormatException Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> String
B.unpack (ByteString -> Either SeqFormatException Int)
-> ByteString -> Either SeqFormatException Int
forall a b. (a -> b) -> a -> b
$ ByteString
c
liftParsingErrors :: (MonadThrow m) =>
Either (ParsingError, Producer B.ByteString m r) () -> Producer a m ()
liftParsingErrors :: Either (ParsingError, Producer ByteString m r) ()
-> Producer a m ()
liftParsingErrors Either (ParsingError, Producer ByteString m r) ()
res = case Either (ParsingError, Producer ByteString m r) ()
res of
Left (ParsingError [String]
_ String
msg, Producer ByteString m r
restProd) -> do
Either r (ByteString, Producer ByteString m r)
x <- m (Either r (ByteString, Producer ByteString m r))
-> Proxy
X () () a m (Either r (ByteString, Producer ByteString m r))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (Either r (ByteString, Producer ByteString m r))
-> Proxy
X () () a m (Either r (ByteString, Producer ByteString m r)))
-> m (Either r (ByteString, Producer ByteString m r))
-> Proxy
X () () a m (Either r (ByteString, Producer ByteString m r))
forall a b. (a -> b) -> a -> b
$ Producer ByteString m r
-> m (Either r (ByteString, Producer ByteString m r))
forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Producer ByteString m r
restProd
case Either r (ByteString, Producer ByteString m r)
x of
Right (ByteString
chunk, Producer ByteString m r
_) -> do
let msg' :: String
msg' = String
"Error while parsing: " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
msg String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
". Error occurred when trying to parse this chunk: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ ByteString -> String
forall a. Show a => a -> String
show ByteString
chunk
SeqFormatException -> Producer a m ()
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM (SeqFormatException -> Producer a m ())
-> SeqFormatException -> Producer a m ()
forall a b. (a -> b) -> a -> b
$ String -> SeqFormatException
SeqFormatException String
msg'
Left r
_ -> String -> Producer a m ()
forall a. HasCallStack => String -> a
error String
"should not happen"
Right () -> () -> Producer a m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
consumeProducer :: (MonadThrow m) => A.Parser a -> Producer B.ByteString m () -> Producer a m ()
consumeProducer :: Parser a -> Producer ByteString m () -> Producer a m ()
consumeProducer Parser a
parser Producer ByteString m ()
prod = Parser a
-> Producer ByteString m ()
-> Producer
a m (Either (ParsingError, Producer ByteString m ()) ())
forall (m :: * -> *) a b r.
(Monad m, ParserInput a) =>
Parser a b
-> Producer a m r
-> Producer b m (Either (ParsingError, Producer a m r) r)
parsed Parser a
parser Producer ByteString m ()
prod Producer a m (Either (ParsingError, Producer ByteString m ()) ())
-> (Either (ParsingError, Producer ByteString m ()) ()
-> Producer a m ())
-> Producer a m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Either (ParsingError, Producer ByteString m ()) ()
-> Producer a m ()
forall (m :: * -> *) r a.
MonadThrow m =>
Either (ParsingError, Producer ByteString m r) ()
-> Producer a m ()
liftParsingErrors
readFileProd :: (PS.MonadSafe m) => FilePath -> Producer B.ByteString m ()
readFileProd :: String -> Producer ByteString m ()
readFileProd String
f = String
-> IOMode
-> (Handle -> Producer ByteString m ())
-> Producer ByteString m ()
forall (m :: * -> *) r.
MonadSafe m =>
String -> IOMode -> (Handle -> m r) -> m r
PS.withFile String
f IOMode
ReadMode Handle -> Producer ByteString m ()
forall (m :: * -> *).
MonadIO m =>
Handle -> Producer' ByteString m ()
PB.fromHandle
word :: A.Parser B.ByteString
word :: Parser ByteString
word = (Char -> Bool) -> Parser ByteString
A.takeTill Char -> Bool
isSpace