-- | Parser for @FastA/FastQ@, 'ByteStream' style, written such that it
-- works well with module "Bio.Bam".
--
-- Input streams are broken into numbered lines, then into records.
-- Records can start with empty lines, which are ignored, or random
-- junk, which is ignored, but results in a warning, followed by a
-- header indicating either a @FastA@ (begins with @\>@ or @;@) or
-- @FastQ@ record (begins with @\@@).  More description lines begining
-- with @;@ are allowed, and silently ignored.  All following lines not
-- starting with @+@, @\>@, @;@ or @\@@ are sequence lines.  (Only) in a
-- @FastQ@ record, this is followed by a separator line starting with a
-- @+@, which is ignored, and exactly as many quality lines as there
-- were sequence lines.  A missing separator results in a warning and
-- the record being parsed without quality scores.
--
-- In sequence lines, IUPAC-IUB ambiguity codes are converted to
-- 'Nucleotides', white space is skipped silently.  Any other character
-- becomes an unknown base ('=' in SAM) and a warning is emitted.  Note
-- that downstream tools are unlikely to handle the resulting unknown
-- bases and/or empty records gracefully.  If the quality lines do not
-- have the same total length as the sequence lines (this includes
-- missing quality lines due to end-of-stream), a warning is emitted,
-- and the record receives no quality scores (just as if it was a
-- @FastA@ record).  Else, if the quality lines have a different layout
-- than the sequence lines, a warning is emitted, but they are still
-- used.
--
-- Quality scores must be stored as raw bytes with offset 33.  (Other
-- variants, like 454's ASCII qualities and Solexa's raw bytes with
-- offset 64 are difficult to detect, and extinct in the wild anyway.)
-- If the second word of the header stores multiple fields, we try to
-- extract Illumina's \"QC failed\" flag and either an index sequence or
-- a read group name from it.
--
-- Other flags are commonly encoded into the sequence names.  We do not
-- handle those here, but most of the conventions at MPI EVAN are dealt
-- with by 'Bio.Bam.Evan.removeWarts'.

module Bio.Bam.Fastq
    ( parseFastq
    , EmptyRecord(..)
    , IncoherentQualities(..)
    , IncongruentQualities(..)
    , JunkFound(..)
    , QualitiesMissing(..)
    , SequenceHasGaps(..)
    ) where

import Bio.Bam.Header
import Bio.Bam.Rec
import Bio.Prelude
import Bio.Streaming
import Bio.Streaming.Bytes ( lines' )

import qualified Data.ByteString                    as B
import qualified Data.ByteString.Char8              as C
import qualified Data.ByteString.Unsafe             as B
import qualified Data.Vector.Storable               as W
import qualified Streaming.Prelude                  as Q

-- | Emitted when random text is found instead of a header.
data JunkFound = JunkFound !Int !Bytes deriving (Typeable, Int -> JunkFound -> ShowS
[JunkFound] -> ShowS
JunkFound -> String
(Int -> JunkFound -> ShowS)
-> (JunkFound -> String)
-> ([JunkFound] -> ShowS)
-> Show JunkFound
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [JunkFound] -> ShowS
$cshowList :: [JunkFound] -> ShowS
show :: JunkFound -> String
$cshow :: JunkFound -> String
showsPrec :: Int -> JunkFound -> ShowS
$cshowsPrec :: Int -> JunkFound -> ShowS
Show)

instance Exception JunkFound where
    displayException :: JunkFound -> String
displayException (JunkFound n :: Int
n s :: Bytes
s) = String -> Int -> ShowS
forall r. PrintfType r => String -> r
printf "junk found at line %d: %s" Int
n (Bytes -> String
forall s. Unpack s => s -> String
unpack Bytes
s)

-- | Emitted when a quality separator was expected, but not found.
data QualitiesMissing = QualitiesMissing !Int !Bytes deriving (Typeable, Int -> QualitiesMissing -> ShowS
[QualitiesMissing] -> ShowS
QualitiesMissing -> String
(Int -> QualitiesMissing -> ShowS)
-> (QualitiesMissing -> String)
-> ([QualitiesMissing] -> ShowS)
-> Show QualitiesMissing
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [QualitiesMissing] -> ShowS
$cshowList :: [QualitiesMissing] -> ShowS
show :: QualitiesMissing -> String
$cshow :: QualitiesMissing -> String
showsPrec :: Int -> QualitiesMissing -> ShowS
$cshowsPrec :: Int -> QualitiesMissing -> ShowS
Show)

instance Exception QualitiesMissing where
    displayException :: QualitiesMissing -> String
displayException (QualitiesMissing 0 _) = ShowS
forall r. PrintfType r => String -> r
printf "expected '+' symbol at end of file"
    displayException (QualitiesMissing n :: Int
n s :: Bytes
s) = String -> Int -> ShowS
forall r. PrintfType r => String -> r
printf "expected '+' symbol at line %d, but found %s" Int
n (Bytes -> String
forall s. Unpack s => s -> String
unpack Bytes
s)

-- | Emitted when a quality record does not fit the sequence record.
data IncoherentQualities = IncoherentQualities !Int !Bytes deriving (Typeable, Int -> IncoherentQualities -> ShowS
[IncoherentQualities] -> ShowS
IncoherentQualities -> String
(Int -> IncoherentQualities -> ShowS)
-> (IncoherentQualities -> String)
-> ([IncoherentQualities] -> ShowS)
-> Show IncoherentQualities
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [IncoherentQualities] -> ShowS
$cshowList :: [IncoherentQualities] -> ShowS
show :: IncoherentQualities -> String
$cshow :: IncoherentQualities -> String
showsPrec :: Int -> IncoherentQualities -> ShowS
$cshowsPrec :: Int -> IncoherentQualities -> ShowS
Show)

instance Exception IncoherentQualities where
    displayException :: IncoherentQualities -> String
displayException (IncoherentQualities n :: Int
n s :: Bytes
s) = String -> Int -> ShowS
forall r. PrintfType r => String -> r
printf "quality record of incorrect length ignored at line %d (%s)" Int
n (Bytes -> String
forall s. Unpack s => s -> String
unpack Bytes
s)

-- | Emitted when a quality record has different layout than the
-- sequence.
data IncongruentQualities = IncongruentQualities !Int !Bytes deriving (Typeable, Int -> IncongruentQualities -> ShowS
[IncongruentQualities] -> ShowS
IncongruentQualities -> String
(Int -> IncongruentQualities -> ShowS)
-> (IncongruentQualities -> String)
-> ([IncongruentQualities] -> ShowS)
-> Show IncongruentQualities
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [IncongruentQualities] -> ShowS
$cshowList :: [IncongruentQualities] -> ShowS
show :: IncongruentQualities -> String
$cshow :: IncongruentQualities -> String
showsPrec :: Int -> IncongruentQualities -> ShowS
$cshowsPrec :: Int -> IncongruentQualities -> ShowS
Show)

instance Exception IncongruentQualities where
    displayException :: IncongruentQualities -> String
displayException (IncongruentQualities n :: Int
n s :: Bytes
s) = String -> Int -> ShowS
forall r. PrintfType r => String -> r
printf "quality and sequence have different layouts at line %d (%s)" Int
n (Bytes -> String
forall s. Unpack s => s -> String
unpack Bytes
s)

-- | Emitted when a sequence record contains strange characters
data SequenceHasGaps = SequenceHasGaps !Int !Bytes deriving (Typeable, Int -> SequenceHasGaps -> ShowS
[SequenceHasGaps] -> ShowS
SequenceHasGaps -> String
(Int -> SequenceHasGaps -> ShowS)
-> (SequenceHasGaps -> String)
-> ([SequenceHasGaps] -> ShowS)
-> Show SequenceHasGaps
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [SequenceHasGaps] -> ShowS
$cshowList :: [SequenceHasGaps] -> ShowS
show :: SequenceHasGaps -> String
$cshow :: SequenceHasGaps -> String
showsPrec :: Int -> SequenceHasGaps -> ShowS
$cshowsPrec :: Int -> SequenceHasGaps -> ShowS
Show)

instance Exception SequenceHasGaps where
    displayException :: SequenceHasGaps -> String
displayException (SequenceHasGaps n :: Int
n cs :: Bytes
cs) = String -> String -> Int -> String
forall r. PrintfType r => String -> r
printf "undefined characters %s stored as unknown bases at line %d" (Bytes -> String
forall a. Show a => a -> String
show Bytes
cs) Int
n

data EmptyRecord = EmptyRecord !Int !Bytes deriving (Typeable, Int -> EmptyRecord -> ShowS
[EmptyRecord] -> ShowS
EmptyRecord -> String
(Int -> EmptyRecord -> ShowS)
-> (EmptyRecord -> String)
-> ([EmptyRecord] -> ShowS)
-> Show EmptyRecord
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [EmptyRecord] -> ShowS
$cshowList :: [EmptyRecord] -> ShowS
show :: EmptyRecord -> String
$cshow :: EmptyRecord -> String
showsPrec :: Int -> EmptyRecord -> ShowS
$cshowsPrec :: Int -> EmptyRecord -> ShowS
Show)

instance Exception EmptyRecord where
    displayException :: EmptyRecord -> String
displayException (EmptyRecord n :: Int
n s :: Bytes
s) = String -> Int -> ShowS
forall r. PrintfType r => String -> r
printf "(effectively) empty record at line %d (%s)" Int
n (Bytes -> String
forall s. Unpack s => s -> String
unpack Bytes
s)


{-# INLINE parseFastq #-}
parseFastq :: MonadLog m => ByteStream m r -> Stream (Of BamRec) m r
parseFastq :: ByteStream m r -> Stream (Of BamRec) m r
parseFastq = (Stream (Of (Int, Bytes)) m r
 -> m (Either r (BamRec, Stream (Of (Int, Bytes)) m r)))
-> Stream (Of (Int, Bytes)) m r -> Stream (Of BamRec) m r
forall (m :: * -> *) s r a.
Monad m =>
(s -> m (Either r (a, s))) -> s -> Stream (Of a) m r
Q.unfoldr Stream (Of (Int, Bytes)) m r
-> m (Either r (BamRec, Stream (Of (Int, Bytes)) m r))
forall a.
Stream (Of (Int, Bytes)) m a
-> m (Either a (BamRec, Stream (Of (Int, Bytes)) m a))
go (Stream (Of (Int, Bytes)) m r -> Stream (Of BamRec) m r)
-> (ByteStream m r -> Stream (Of (Int, Bytes)) m r)
-> ByteStream m r
-> Stream (Of BamRec) m r
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Stream (Of Int) m r
-> Stream (Of Bytes) m r -> Stream (Of (Int, Bytes)) m r
forall (m :: * -> *) a r b.
Monad m =>
Stream (Of a) m r -> Stream (Of b) m r -> Stream (Of (a, b)) m r
Q.zip (Int -> Stream (Of Int) m r
forall (m :: * -> *) n r.
(Monad m, Enum n) =>
n -> Stream (Of n) m r
Q.enumFrom (1::Int)) (Stream (Of Bytes) m r -> Stream (Of (Int, Bytes)) m r)
-> (ByteStream m r -> Stream (Of Bytes) m r)
-> ByteStream m r
-> Stream (Of (Int, Bytes)) m r
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ByteStream m r -> Stream (Of Bytes) m r
forall (m :: * -> *) r.
Monad m =>
ByteStream m r -> Stream (Of Bytes) m r
lines'
  where
    go :: Stream (Of (Int, Bytes)) m a
-> m (Either a (BamRec, Stream (Of (Int, Bytes)) m a))
go = Stream (Of (Int, Bytes)) m a
-> m (Either a (Of (Int, Bytes) (Stream (Of (Int, Bytes)) m a)))
forall (m :: * -> *) (f :: * -> *) r.
Monad m =>
Stream f m r -> m (Either r (f (Stream f m r)))
inspect (Stream (Of (Int, Bytes)) m a
 -> m (Either a (Of (Int, Bytes) (Stream (Of (Int, Bytes)) m a))))
-> (Either a (Of (Int, Bytes) (Stream (Of (Int, Bytes)) m a))
    -> m (Either a (BamRec, Stream (Of (Int, Bytes)) m a)))
-> Stream (Of (Int, Bytes)) m a
-> m (Either a (BamRec, Stream (Of (Int, Bytes)) m a))
forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> \case
        Left r :: a
r                                    ->  Either a (BamRec, Stream (Of (Int, Bytes)) m a)
-> m (Either a (BamRec, Stream (Of (Int, Bytes)) m a))
forall (m :: * -> *) a. Monad m => a -> m a
return (a -> Either a (BamRec, Stream (Of (Int, Bytes)) m a)
forall a b. a -> Either a b
Left a
r)
        Right ((i :: Int
i,h :: Bytes
h) :> ls :: Stream (Of (Int, Bytes)) m a
ls)
            | Bytes -> Bool
B.null Bytes
h                            ->  Stream (Of (Int, Bytes)) m a
-> m (Either a (BamRec, Stream (Of (Int, Bytes)) m a))
go Stream (Of (Int, Bytes)) m a
ls
            | Bytes -> Char
C.head Bytes
h Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '>' Bool -> Bool -> Bool
|| Bytes -> Char
C.head Bytes
h Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== ';'  ->  (Int, Bytes)
-> Stream (Of (Int, Bytes)) m a
-> m (Either a (BamRec, Stream (Of (Int, Bytes)) m a))
forall (m :: * -> *) r a.
MonadLog m =>
(Int, Bytes)
-> Stream (Of (Int, Bytes)) m r
-> m (Either a (BamRec, Stream (Of (Int, Bytes)) m r))
goFasta (Int
i, Bytes -> Bytes
B.tail Bytes
h) (((Int, Bytes) -> Bool)
-> Stream (Of (Int, Bytes)) m a -> Stream (Of (Int, Bytes)) m a
forall (m :: * -> *) a r.
Monad m =>
(a -> Bool) -> Stream (Of a) m r -> Stream (Of a) m r
Q.dropWhile (Int, Bytes) -> Bool
forall a. (a, Bytes) -> Bool
isDescr Stream (Of (Int, Bytes)) m a
ls)
            | Bytes -> Char
C.head Bytes
h Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '@'                     ->  (Int, Bytes)
-> Stream (Of (Int, Bytes)) m a
-> m (Either a (BamRec, Stream (Of (Int, Bytes)) m a))
forall (m :: * -> *) r a.
MonadLog m =>
(Int, Bytes)
-> Stream (Of (Int, Bytes)) m r
-> m (Either a (BamRec, Stream (Of (Int, Bytes)) m r))
goFastq (Int
i, Bytes -> Bytes
B.tail Bytes
h) (((Int, Bytes) -> Bool)
-> Stream (Of (Int, Bytes)) m a -> Stream (Of (Int, Bytes)) m a
forall (m :: * -> *) a r.
Monad m =>
(a -> Bool) -> Stream (Of a) m r -> Stream (Of a) m r
Q.dropWhile (Int, Bytes) -> Bool
forall a. (a, Bytes) -> Bool
isDescr Stream (Of (Int, Bytes)) m a
ls)
            | Bool
otherwise                           ->  Level -> JunkFound -> m ()
forall (m :: * -> *) e.
(MonadLog m, Exception e) =>
Level -> e -> m ()
logMsg Level
Warning (Int -> Bytes -> JunkFound
JunkFound Int
i Bytes
h) m ()
-> m (Either a (BamRec, Stream (Of (Int, Bytes)) m a))
-> m (Either a (BamRec, Stream (Of (Int, Bytes)) m a))
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Stream (Of (Int, Bytes)) m a
-> m (Either a (BamRec, Stream (Of (Int, Bytes)) m a))
go Stream (Of (Int, Bytes)) m a
ls

    isDescr :: (a, Bytes) -> Bool
isDescr  (_,s :: Bytes
s) = Bool -> Bool
not (Bytes -> Bool
B.null Bytes
s) Bool -> Bool -> Bool
&&  Bytes -> Char
C.head Bytes
s Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== ';'
    isHeader :: (a, Bytes) -> Bool
isHeader (_,s :: Bytes
s) = Bool -> Bool
not (Bytes -> Bool
B.null Bytes
s) Bool -> Bool -> Bool
&& (Bytes -> Char
C.head Bytes
s Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '>' Bool -> Bool -> Bool
|| Bytes -> Char
C.head Bytes
s Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '@' Bool -> Bool -> Bool
|| Bytes -> Char
C.head Bytes
s Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== ';')
    isSep :: (a, Bytes) -> Bool
isSep    (_,s :: Bytes
s) = Bool -> Bool
not (Bytes -> Bool
B.null Bytes
s) Bool -> Bool -> Bool
&&  Bytes -> Char
C.head Bytes
s Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '+'
    isSeq :: (a, Bytes) -> Bool
isSeq x :: (a, Bytes)
x = Bool -> Bool
not ((a, Bytes) -> Bool
forall a. (a, Bytes) -> Bool
isHeader (a, Bytes)
x Bool -> Bool -> Bool
|| (a, Bytes) -> Bool
forall a. (a, Bytes) -> Bool
isSep (a, Bytes)
x)

    goFasta :: (Int, Bytes)
-> Stream (Of (Int, Bytes)) m r
-> m (Either a (BamRec, Stream (Of (Int, Bytes)) m r))
goFasta h :: (Int, Bytes)
h ls :: Stream (Of (Int, Bytes)) m r
ls = do
        sq :: [(Int, Bytes)]
sq :> ls' :: Stream (Of (Int, Bytes)) m r
ls' <- Stream (Of (Int, Bytes)) m (Stream (Of (Int, Bytes)) m r)
-> m (Of [(Int, Bytes)] (Stream (Of (Int, Bytes)) m r))
forall (m :: * -> *) a r.
Monad m =>
Stream (Of a) m r -> m (Of [a] r)
Q.toList (Stream (Of (Int, Bytes)) m (Stream (Of (Int, Bytes)) m r)
 -> m (Of [(Int, Bytes)] (Stream (Of (Int, Bytes)) m r)))
-> Stream (Of (Int, Bytes)) m (Stream (Of (Int, Bytes)) m r)
-> m (Of [(Int, Bytes)] (Stream (Of (Int, Bytes)) m r))
forall a b. (a -> b) -> a -> b
$ ((Int, Bytes) -> Bool)
-> Stream (Of (Int, Bytes)) m r
-> Stream (Of (Int, Bytes)) m (Stream (Of (Int, Bytes)) m r)
forall (m :: * -> *) a r.
Monad m =>
(a -> Bool)
-> Stream (Of a) m r -> Stream (Of a) m (Stream (Of a) m r)
Q.span (Int, Bytes) -> Bool
forall a. (a, Bytes) -> Bool
isSeq Stream (Of (Int, Bytes)) m r
ls
        (Int, Bytes)
-> [(Int, Bytes)]
-> Maybe [(Int, Bytes)]
-> Stream (Of (Int, Bytes)) m r
-> m (Either a (BamRec, Stream (Of (Int, Bytes)) m r))
forall (m :: * -> *) b a.
MonadLog m =>
(Int, Bytes)
-> [(Int, Bytes)]
-> Maybe [(Int, Bytes)]
-> b
-> m (Either a (BamRec, b))
make_record (Int, Bytes)
h [(Int, Bytes)]
sq Maybe [(Int, Bytes)]
forall a. Maybe a
Nothing Stream (Of (Int, Bytes)) m r
ls'

    goFastq :: (Int, Bytes)
-> Stream (Of (Int, Bytes)) m a
-> m (Either a (BamRec, Stream (Of (Int, Bytes)) m a))
goFastq h :: (Int, Bytes)
h ls :: Stream (Of (Int, Bytes)) m a
ls = do
        sq :: [(Int, Bytes)]
sq :> ls1 :: Stream (Of (Int, Bytes)) m a
ls1  <- Stream (Of (Int, Bytes)) m (Stream (Of (Int, Bytes)) m a)
-> m (Of [(Int, Bytes)] (Stream (Of (Int, Bytes)) m a))
forall (m :: * -> *) a r.
Monad m =>
Stream (Of a) m r -> m (Of [a] r)
Q.toList (Stream (Of (Int, Bytes)) m (Stream (Of (Int, Bytes)) m a)
 -> m (Of [(Int, Bytes)] (Stream (Of (Int, Bytes)) m a)))
-> Stream (Of (Int, Bytes)) m (Stream (Of (Int, Bytes)) m a)
-> m (Of [(Int, Bytes)] (Stream (Of (Int, Bytes)) m a))
forall a b. (a -> b) -> a -> b
$ ((Int, Bytes) -> Bool)
-> Stream (Of (Int, Bytes)) m a
-> Stream (Of (Int, Bytes)) m (Stream (Of (Int, Bytes)) m a)
forall (m :: * -> *) a r.
Monad m =>
(a -> Bool)
-> Stream (Of a) m r -> Stream (Of a) m (Stream (Of a) m r)
Q.span (Int, Bytes) -> Bool
forall a. (a, Bytes) -> Bool
isSeq Stream (Of (Int, Bytes)) m a
ls
        Stream (Of (Int, Bytes)) m a
-> m (Either a ((Int, Bytes), Stream (Of (Int, Bytes)) m a))
forall (m :: * -> *) a r.
Monad m =>
Stream (Of a) m r -> m (Either r (a, Stream (Of a) m r))
Q.next Stream (Of (Int, Bytes)) m a
ls1 m (Either a ((Int, Bytes), Stream (Of (Int, Bytes)) m a))
-> (Either a ((Int, Bytes), Stream (Of (Int, Bytes)) m a)
    -> m (Either a (BamRec, Stream (Of (Int, Bytes)) m a)))
-> m (Either a (BamRec, Stream (Of (Int, Bytes)) m a))
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
            Right (sep :: (Int, Bytes)
sep, ls2 :: Stream (Of (Int, Bytes)) m a
ls2)
                | (Int, Bytes) -> Bool
forall a. (a, Bytes) -> Bool
isSep (Int, Bytes)
sep -> do
                    qs :: [(Int, Bytes)]
qs :> ls3 :: Stream (Of (Int, Bytes)) m a
ls3 <- Stream (Of (Int, Bytes)) m (Stream (Of (Int, Bytes)) m a)
-> m (Of [(Int, Bytes)] (Stream (Of (Int, Bytes)) m a))
forall (m :: * -> *) a r.
Monad m =>
Stream (Of a) m r -> m (Of [a] r)
Q.toList (Stream (Of (Int, Bytes)) m (Stream (Of (Int, Bytes)) m a)
 -> m (Of [(Int, Bytes)] (Stream (Of (Int, Bytes)) m a)))
-> Stream (Of (Int, Bytes)) m (Stream (Of (Int, Bytes)) m a)
-> m (Of [(Int, Bytes)] (Stream (Of (Int, Bytes)) m a))
forall a b. (a -> b) -> a -> b
$ Int
-> Stream (Of (Int, Bytes)) m a
-> Stream (Of (Int, Bytes)) m (Stream (Of (Int, Bytes)) m a)
forall (m :: * -> *) (f :: * -> *) r.
(Monad m, Functor f) =>
Int -> Stream f m r -> Stream f m (Stream f m r)
Q.splitAt ([(Int, Bytes)] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(Int, Bytes)]
sq) Stream (Of (Int, Bytes)) m a
ls2
                    if [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (((Int, Bytes) -> Int) -> [(Int, Bytes)] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (Bytes -> Int
B.length (Bytes -> Int) -> ((Int, Bytes) -> Bytes) -> (Int, Bytes) -> Int
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Int, Bytes) -> Bytes
forall a b. (a, b) -> b
snd) [(Int, Bytes)]
qs) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (((Int, Bytes) -> Int) -> [(Int, Bytes)] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (Bytes -> Int
B.length (Bytes -> Int) -> ((Int, Bytes) -> Bytes) -> (Int, Bytes) -> Int
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Int, Bytes) -> Bytes
forall a b. (a, b) -> b
snd) [(Int, Bytes)]
sq) then do
                        Level -> IncoherentQualities -> m ()
forall (m :: * -> *) e.
(MonadLog m, Exception e) =>
Level -> e -> m ()
logMsg Level
Error (IncoherentQualities -> m ()) -> IncoherentQualities -> m ()
forall a b. (a -> b) -> a -> b
$ (Int -> Bytes -> IncoherentQualities)
-> (Int, Bytes) -> IncoherentQualities
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Int -> Bytes -> IncoherentQualities
IncoherentQualities  (Int, Bytes)
h
                        (Int, Bytes)
-> [(Int, Bytes)]
-> Maybe [(Int, Bytes)]
-> Stream (Of (Int, Bytes)) m a
-> m (Either a (BamRec, Stream (Of (Int, Bytes)) m a))
forall (m :: * -> *) b a.
MonadLog m =>
(Int, Bytes)
-> [(Int, Bytes)]
-> Maybe [(Int, Bytes)]
-> b
-> m (Either a (BamRec, b))
make_record (Int, Bytes)
h [(Int, Bytes)]
sq Maybe [(Int, Bytes)]
forall a. Maybe a
Nothing Stream (Of (Int, Bytes)) m a
ls3
                      else do
                        Bool -> m () -> m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (((Int, Bytes) -> Int) -> [(Int, Bytes)] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (Bytes -> Int
B.length (Bytes -> Int) -> ((Int, Bytes) -> Bytes) -> (Int, Bytes) -> Int
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Int, Bytes) -> Bytes
forall a b. (a, b) -> b
snd) [(Int, Bytes)]
qs [Int] -> [Int] -> Bool
forall a. Eq a => a -> a -> Bool
/= ((Int, Bytes) -> Int) -> [(Int, Bytes)] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (Bytes -> Int
B.length (Bytes -> Int) -> ((Int, Bytes) -> Bytes) -> (Int, Bytes) -> Int
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Int, Bytes) -> Bytes
forall a b. (a, b) -> b
snd) [(Int, Bytes)]
sq) (m () -> m ()) -> m () -> m ()
forall a b. (a -> b) -> a -> b
$
                            Level -> IncongruentQualities -> m ()
forall (m :: * -> *) e.
(MonadLog m, Exception e) =>
Level -> e -> m ()
logMsg Level
Warning (IncongruentQualities -> m ()) -> IncongruentQualities -> m ()
forall a b. (a -> b) -> a -> b
$ (Int -> Bytes -> IncongruentQualities)
-> (Int, Bytes) -> IncongruentQualities
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Int -> Bytes -> IncongruentQualities
IncongruentQualities (Int, Bytes)
h
                        (Int, Bytes)
-> [(Int, Bytes)]
-> Maybe [(Int, Bytes)]
-> Stream (Of (Int, Bytes)) m a
-> m (Either a (BamRec, Stream (Of (Int, Bytes)) m a))
forall (m :: * -> *) b a.
MonadLog m =>
(Int, Bytes)
-> [(Int, Bytes)]
-> Maybe [(Int, Bytes)]
-> b
-> m (Either a (BamRec, b))
make_record (Int, Bytes)
h [(Int, Bytes)]
sq ([(Int, Bytes)] -> Maybe [(Int, Bytes)]
forall a. a -> Maybe a
Just [(Int, Bytes)]
qs) Stream (Of (Int, Bytes)) m a
ls3
                | Bool
otherwise -> do
                    Level -> QualitiesMissing -> m ()
forall (m :: * -> *) e.
(MonadLog m, Exception e) =>
Level -> e -> m ()
logMsg Level
Error (QualitiesMissing -> m ()) -> QualitiesMissing -> m ()
forall a b. (a -> b) -> a -> b
$ (Int -> Bytes -> QualitiesMissing)
-> (Int, Bytes) -> QualitiesMissing
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Int -> Bytes -> QualitiesMissing
QualitiesMissing (Int, Bytes)
sep
                    (Int, Bytes)
-> [(Int, Bytes)]
-> Maybe [(Int, Bytes)]
-> Stream (Of (Int, Bytes)) m a
-> m (Either a (BamRec, Stream (Of (Int, Bytes)) m a))
forall (m :: * -> *) b a.
MonadLog m =>
(Int, Bytes)
-> [(Int, Bytes)]
-> Maybe [(Int, Bytes)]
-> b
-> m (Either a (BamRec, b))
make_record (Int, Bytes)
h [(Int, Bytes)]
sq Maybe [(Int, Bytes)]
forall a. Maybe a
Nothing ((Int, Bytes)
-> Stream (Of (Int, Bytes)) m a -> Stream (Of (Int, Bytes)) m a
forall (m :: * -> *) a r.
Monad m =>
a -> Stream (Of a) m r -> Stream (Of a) m r
Q.cons (Int, Bytes)
sep Stream (Of (Int, Bytes)) m a
ls2)
            Left x :: a
x -> do
                    Level -> QualitiesMissing -> m ()
forall (m :: * -> *) e.
(MonadLog m, Exception e) =>
Level -> e -> m ()
logMsg Level
Error (QualitiesMissing -> m ()) -> QualitiesMissing -> m ()
forall a b. (a -> b) -> a -> b
$ Int -> Bytes -> QualitiesMissing
QualitiesMissing 0 Bytes
C.empty
                    (Int, Bytes)
-> [(Int, Bytes)]
-> Maybe [(Int, Bytes)]
-> Stream (Of (Int, Bytes)) m a
-> m (Either a (BamRec, Stream (Of (Int, Bytes)) m a))
forall (m :: * -> *) b a.
MonadLog m =>
(Int, Bytes)
-> [(Int, Bytes)]
-> Maybe [(Int, Bytes)]
-> b
-> m (Either a (BamRec, b))
make_record (Int, Bytes)
h [(Int, Bytes)]
sq Maybe [(Int, Bytes)]
forall a. Maybe a
Nothing (a -> Stream (Of (Int, Bytes)) m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure a
x)

    make_record :: (Int, Bytes)
-> [(Int, Bytes)]
-> Maybe [(Int, Bytes)]
-> b
-> m (Either a (BamRec, b))
make_record h :: (Int, Bytes)
h sq :: [(Int, Bytes)]
sq qs :: Maybe [(Int, Bytes)]
qs k :: b
k = do
        Bool -> m () -> m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
ngaps Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 0) (m () -> m ())
-> (SequenceHasGaps -> m ()) -> SequenceHasGaps -> m ()
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Level -> SequenceHasGaps -> m ()
forall (m :: * -> *) e.
(MonadLog m, Exception e) =>
Level -> e -> m ()
logMsg Level
Warning (SequenceHasGaps -> m ()) -> SequenceHasGaps -> m ()
forall a b. (a -> b) -> a -> b
$
            Int -> Bytes -> SequenceHasGaps
SequenceHasGaps ((Int, Bytes) -> Int
forall a b. (a, b) -> a
fst ((Int, Bytes) -> Int) -> (Int, Bytes) -> Int
forall a b. (a -> b) -> a -> b
$ [(Int, Bytes)] -> (Int, Bytes)
forall a. [a] -> a
head [(Int, Bytes)]
sq) ((Word8 -> Bool) -> Bytes -> Bytes
B.filter (Nucleotides -> Bool
isGap (Nucleotides -> Bool) -> (Word8 -> Nucleotides) -> Word8 -> Bool
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word8 -> Nucleotides
toNucleotides) (Bytes -> Bytes) -> Bytes -> Bytes
forall a b. (a -> b) -> a -> b
$ [Bytes] -> Bytes
B.concat ([Bytes] -> Bytes) -> [Bytes] -> Bytes
forall a b. (a -> b) -> a -> b
$ ((Int, Bytes) -> Bytes) -> [(Int, Bytes)] -> [Bytes]
forall a b. (a -> b) -> [a] -> [b]
map (Int, Bytes) -> Bytes
forall a b. (a, b) -> b
snd [(Int, Bytes)]
sq)
        Bool -> m () -> m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== 0) (m () -> m ()) -> (EmptyRecord -> m ()) -> EmptyRecord -> m ()
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Level -> EmptyRecord -> m ()
forall (m :: * -> *) e.
(MonadLog m, Exception e) =>
Level -> e -> m ()
logMsg Level
Warning (EmptyRecord -> m ()) -> EmptyRecord -> m ()
forall a b. (a -> b) -> a -> b
$ (Int -> Bytes -> EmptyRecord) -> (Int, Bytes) -> EmptyRecord
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Int -> Bytes -> EmptyRecord
EmptyRecord (Int, Bytes)
h
        Either a (BamRec, b) -> m (Either a (BamRec, b))
forall (m :: * -> *) a. Monad m => a -> m a
return (Either a (BamRec, b) -> m (Either a (BamRec, b)))
-> Either a (BamRec, b) -> m (Either a (BamRec, b))
forall a b. (a -> b) -> a -> b
$ (BamRec, b) -> Either a (BamRec, b)
forall a b. b -> Either a b
Right (BamRec
r,b
k)
      where
        !l :: Int
l     = [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ ((Int, Bytes) -> Int) -> [(Int, Bytes)] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (Bytes -> Int
B.length (Bytes -> Int) -> ((Int, Bytes) -> Bytes) -> (Int, Bytes) -> Int
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Int, Bytes) -> Bytes
forall a b. (a, b) -> b
snd) [(Int, Bytes)]
sq
        (!Vector_Nucs_half Nucleotides
nseq, !Int
ngaps) = Int -> [(Int, Bytes)] -> (Vector_Nucs_half Nucleotides, Int)
mkSeq Int
l [(Int, Bytes)]
sq
        !qual :: Maybe (Vector Qual)
qual  = case Maybe [(Int, Bytes)]
qs of Nothing -> Maybe (Vector Qual)
forall a. Maybe a
Nothing
                            Just [] -> Maybe (Vector Qual)
forall a. Maybe a
Nothing
                            Just  q -> Vector Qual -> Maybe (Vector Qual)
forall a. a -> Maybe a
Just (Vector Qual -> Maybe (Vector Qual))
-> Vector Qual -> Maybe (Vector Qual)
forall a b. (a -> b) -> a -> b
$! Int -> [(Int, Bytes)] -> Vector Qual
mkQual Int
l [(Int, Bytes)]
q

        (!Bytes
qname, !Bytes
descr) = (Word8 -> Bool) -> Bytes -> (Bytes, Bytes)
B.break (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== 32) ((Int, Bytes) -> Bytes
forall a b. (a, b) -> b
snd (Int, Bytes)
h)
        !fflag :: Bytes
fflag = Int -> Bytes -> Bytes
B.drop 1 (Bytes -> Bytes) -> (Bytes -> Bytes) -> Bytes -> Bytes
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Char -> Bool) -> Bytes -> Bytes
C.dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= ':') (Bytes -> Bytes) -> Bytes -> Bytes
forall a b. (a -> b) -> a -> b
$ Bytes
descr

        !r :: BamRec
r = if Bytes -> Int
B.length Bytes
fflag Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< 2 Bool -> Bool -> Bool
|| Bytes -> Int -> Char
C.index Bytes
fflag 1 Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= ':' Bool -> Bool -> Bool
|| (Bytes -> Char
C.head Bytes
fflag Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= 'Y' Bool -> Bool -> Bool
&& Bytes -> Char
C.head Bytes
fflag Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= 'N')
             then BamRec
nullBamRec { b_qname :: Bytes
b_qname = Bytes
qname, b_seq :: Vector_Nucs_half Nucleotides
b_seq = Vector_Nucs_half Nucleotides
nseq, b_qual :: Maybe (Vector Qual)
b_qual = Maybe (Vector Qual)
qual }
             else let !flag :: Int
flag | Bytes -> Char
C.head Bytes
fflag Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= 'Y' = BamRec -> Int
b_flag BamRec
nullBamRec Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flagFailsQC
                            | Bool
otherwise           = BamRec -> Int
b_flag BamRec
nullBamRec

                      !sample :: Bytes
sample = (Word8 -> Bool) -> Bytes -> Bytes
B.takeWhile (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/=32)(Bytes -> Bytes) -> (Bytes -> Bytes) -> Bytes -> Bytes
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> Bytes -> Bytes
B.drop 1(Bytes -> Bytes) -> (Bytes -> Bytes) -> Bytes -> Bytes
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Char -> Bool) -> Bytes -> Bytes
C.dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=':')(Bytes -> Bytes) -> (Bytes -> Bytes) -> Bytes -> Bytes
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> Bytes -> Bytes
B.drop 1(Bytes -> Bytes) -> (Bytes -> Bytes) -> Bytes -> Bytes
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Char -> Bool) -> Bytes -> Bytes
C.dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=':') (Bytes -> Bytes) -> Bytes -> Bytes
forall a b. (a -> b) -> a -> b
$ Bytes
fflag
                      !exts :: [(BamKey, Ext)]
exts | Bytes -> Bool
B.null Bytes
sample                    =  [                   ]
                            | (Char -> Bool) -> Bytes -> Bool
C.all (Char -> Bytes -> Bool
`C.elem` "ACGTN") Bytes
sample  =  [("XI", Bytes -> Ext
Text Bytes
sample)]
                            | Bool
otherwise                        =  [("RG", Bytes -> Ext
Text Bytes
sample)]
                  in BamRec
nullBamRec { b_qname :: Bytes
b_qname = Bytes
qname, b_seq :: Vector_Nucs_half Nucleotides
b_seq = Vector_Nucs_half Nucleotides
nseq, b_qual :: Maybe (Vector Qual)
b_qual = Maybe (Vector Qual)
qual, b_flag :: Int
b_flag = Int
flag, b_exts :: [(BamKey, Ext)]
b_exts = [(BamKey, Ext)]
exts }


mkSeq :: Int -> [(Int,Bytes)] -> (Vector_Nucs_half Nucleotides, Int)
mkSeq :: Int -> [(Int, Bytes)] -> (Vector_Nucs_half Nucleotides, Int)
mkSeq ltot :: Int
ltot xs0 :: [(Int, Bytes)]
xs0 = IO (Vector_Nucs_half Nucleotides, Int)
-> (Vector_Nucs_half Nucleotides, Int)
forall a. IO a -> a
unsafePerformIO (IO (Vector_Nucs_half Nucleotides, Int)
 -> (Vector_Nucs_half Nucleotides, Int))
-> IO (Vector_Nucs_half Nucleotides, Int)
-> (Vector_Nucs_half Nucleotides, Int)
forall a b. (a -> b) -> a -> b
$ do
    ForeignPtr Word8
fp <- Int -> IO (ForeignPtr Word8)
forall a. Int -> IO (ForeignPtr a)
mallocForeignPtrBytes (Int -> Int -> Int
forall a. Bits a => a -> Int -> a
shiftR (Int
ltotInt -> Int -> Int
forall a. Num a => a -> a -> a
+1) 1)
    Int
g  <- ForeignPtr Word8 -> (Ptr Word8 -> IO Int) -> IO Int
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr Word8
fp ((Ptr Word8 -> IO Int) -> IO Int)
-> (Ptr Word8 -> IO Int) -> IO Int
forall a b. (a -> b) -> a -> b
$ \p :: Ptr Word8
p -> Ptr Word8 -> Int -> [(Int, Bytes)] -> IO Int
forall a. Ptr Word8 -> Int -> [(a, Bytes)] -> IO Int
go_even Ptr Word8
p 0 [(Int, Bytes)]
xs0
    (Vector_Nucs_half Nucleotides, Int)
-> IO (Vector_Nucs_half Nucleotides, Int)
forall (m :: * -> *) a. Monad m => a -> m a
return ( Int -> Int -> ForeignPtr Word8 -> Vector_Nucs_half Nucleotides
forall a. Int -> Int -> ForeignPtr Word8 -> Vector_Nucs_half a
Vector_Nucs_half 0 Int
ltot ForeignPtr Word8
fp, Int
g )
  where
    go_even :: Ptr Word8 -> Int -> [(a, Bytes)] -> IO Int
go_even !Ptr Word8
p !Int
g (s :: (a, Bytes)
s:ss :: [(a, Bytes)]
ss)  =  Bytes -> (CStringLen -> IO Int) -> IO Int
forall a. Bytes -> (CStringLen -> IO a) -> IO a
B.unsafeUseAsCStringLen ((a, Bytes) -> Bytes
forall a b. (a, b) -> b
snd (a, Bytes)
s) ((CStringLen -> IO Int) -> IO Int)
-> (CStringLen -> IO Int) -> IO Int
forall a b. (a -> b) -> a -> b
$ \(q :: Ptr CChar
q,l :: Int
l) -> [(a, Bytes)] -> Ptr Word8 -> Ptr CChar -> Int -> Int -> IO Int
go1_even [(a, Bytes)]
ss Ptr Word8
p Ptr CChar
q Int
g Int
l
    go_even  _ !Int
g [    ]  =  Int -> IO Int
forall (m :: * -> *) a. Monad m => a -> m a
return Int
g

    go_odd :: Ptr Word8 -> Int -> Word8 -> [(a, Bytes)] -> IO Int
go_odd !Ptr Word8
p !Int
g !Word8
a (s :: (a, Bytes)
s:ss :: [(a, Bytes)]
ss)  =  Bytes -> (CStringLen -> IO Int) -> IO Int
forall a. Bytes -> (CStringLen -> IO a) -> IO a
B.unsafeUseAsCStringLen ((a, Bytes) -> Bytes
forall a b. (a, b) -> b
snd (a, Bytes)
s) ((CStringLen -> IO Int) -> IO Int)
-> (CStringLen -> IO Int) -> IO Int
forall a b. (a -> b) -> a -> b
$ \(q :: Ptr CChar
q,l :: Int
l) -> Word8
-> [(a, Bytes)] -> Ptr Word8 -> Ptr CChar -> Int -> Int -> IO Int
go1_odd Word8
a [(a, Bytes)]
ss Ptr Word8
p Ptr CChar
q Int
g Int
l
    go_odd !Ptr Word8
p !Int
g !Word8
a [    ]  =  Ptr Word8 -> Word8 -> IO ()
forall a. Storable a => Ptr a -> a -> IO ()
poke Ptr Word8
p Word8
a IO () -> IO Int -> IO Int
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Int -> IO Int
forall (m :: * -> *) a. Monad m => a -> m a
return Int
g

    go1_odd :: Word8
-> [(a, Bytes)] -> Ptr Word8 -> Ptr CChar -> Int -> Int -> IO Int
go1_odd a :: Word8
a ss :: [(a, Bytes)]
ss !Ptr Word8
p !Ptr CChar
q !Int
g !Int
l
        | Int
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 0      =  do !Word8
b <- Nucleotides -> Word8
unNs (Nucleotides -> Word8) -> (Word8 -> Nucleotides) -> Word8 -> Word8
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word8 -> Nucleotides
toNucleotides (Word8 -> Word8) -> IO Word8 -> IO Word8
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Ptr CChar -> Int -> IO Word8
forall a b. Storable a => Ptr b -> Int -> IO a
peekByteOff Ptr CChar
q 0
                           Ptr Word8 -> Word8 -> IO ()
forall a. Storable a => Ptr a -> a -> IO ()
poke Ptr Word8
p (Word8 -> IO ()) -> Word8 -> IO ()
forall a b. (a -> b) -> a -> b
$ Word8
a Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
`shiftL` 4 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.|. Word8
b
                           [(a, Bytes)] -> Ptr Word8 -> Ptr CChar -> Int -> Int -> IO Int
go1_even [(a, Bytes)]
ss (Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
p 1) (Ptr CChar -> Int -> Ptr CChar
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr CChar
q 1) (Int
g Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Bool -> Int
forall a. Enum a => a -> Int
fromEnum (Word8
b Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== 0)) (Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
-1)
        | Bool
otherwise  =  Ptr Word8 -> Int -> Word8 -> [(a, Bytes)] -> IO Int
go_odd Ptr Word8
p Int
g Word8
a [(a, Bytes)]
ss

    go1_even :: [(a, Bytes)] -> Ptr Word8 -> Ptr CChar -> Int -> Int -> IO Int
go1_even ss :: [(a, Bytes)]
ss !Ptr Word8
p !Ptr CChar
q !Int
g !Int
l
        | Int
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 1      =  do !Word8
a <- Nucleotides -> Word8
unNs (Nucleotides -> Word8) -> (Word8 -> Nucleotides) -> Word8 -> Word8
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word8 -> Nucleotides
toNucleotides (Word8 -> Word8) -> IO Word8 -> IO Word8
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Ptr CChar -> Int -> IO Word8
forall a b. Storable a => Ptr b -> Int -> IO a
peekByteOff Ptr CChar
q 0
                           !Word8
b <- Nucleotides -> Word8
unNs (Nucleotides -> Word8) -> (Word8 -> Nucleotides) -> Word8 -> Word8
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word8 -> Nucleotides
toNucleotides (Word8 -> Word8) -> IO Word8 -> IO Word8
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Ptr CChar -> Int -> IO Word8
forall a b. Storable a => Ptr b -> Int -> IO a
peekByteOff Ptr CChar
q 1
                           Ptr Word8 -> Word8 -> IO ()
forall a. Storable a => Ptr a -> a -> IO ()
poke Ptr Word8
p (Word8 -> IO ()) -> Word8 -> IO ()
forall a b. (a -> b) -> a -> b
$ Word8
a Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
`shiftL` 4 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.|. Word8
b
                           [(a, Bytes)] -> Ptr Word8 -> Ptr CChar -> Int -> Int -> IO Int
go1_even [(a, Bytes)]
ss (Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
p 1) (Ptr CChar -> Int -> Ptr CChar
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr CChar
q 2) (Int
g Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Bool -> Int
forall a. Enum a => a -> Int
fromEnum (Word8
a Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== 0) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Bool -> Int
forall a. Enum a => a -> Int
fromEnum (Word8
b Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== 0)) (Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
-2)
        | Int
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 0      =  do !Word8
a <- Nucleotides -> Word8
unNs (Nucleotides -> Word8) -> (Word8 -> Nucleotides) -> Word8 -> Word8
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word8 -> Nucleotides
toNucleotides (Word8 -> Word8) -> IO Word8 -> IO Word8
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Ptr CChar -> Int -> IO Word8
forall a b. Storable a => Ptr b -> Int -> IO a
peekByteOff Ptr CChar
q 0
                           Ptr Word8 -> Int -> Word8 -> [(a, Bytes)] -> IO Int
go_odd Ptr Word8
p (Int
g Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Bool -> Int
forall a. Enum a => a -> Int
fromEnum (Word8
a Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== 0)) (Word8
a Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
`shiftL` 4) [(a, Bytes)]
ss
        | Bool
otherwise  =  Ptr Word8 -> Int -> [(a, Bytes)] -> IO Int
go_even Ptr Word8
p Int
g [(a, Bytes)]
ss


mkQual :: Int -> [(Int, Bytes)] -> W.Vector Qual
mkQual :: Int -> [(Int, Bytes)] -> Vector Qual
mkQual ltot :: Int
ltot qs0 :: [(Int, Bytes)]
qs0 = IO (Vector Qual) -> Vector Qual
forall a. IO a -> a
unsafePerformIO (IO (Vector Qual) -> Vector Qual)
-> IO (Vector Qual) -> Vector Qual
forall a b. (a -> b) -> a -> b
$ do
    ForeignPtr Qual
fp <- Int -> IO (ForeignPtr Qual)
forall a. Int -> IO (ForeignPtr a)
mallocForeignPtrBytes Int
ltot
    ForeignPtr Qual -> (Ptr Qual -> IO ()) -> IO ()
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr Qual
fp ((Ptr Qual -> IO ()) -> IO ()) -> (Ptr Qual -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \p :: Ptr Qual
p -> Ptr Word8 -> [(Int, Bytes)] -> IO ()
forall a. Ptr Word8 -> [(a, Bytes)] -> IO ()
go (Ptr Qual -> Ptr Word8
forall a b. Ptr a -> Ptr b
castPtr Ptr Qual
p) [(Int, Bytes)]
qs0
    Vector Qual -> IO (Vector Qual)
forall (m :: * -> *) a. Monad m => a -> m a
return (Vector Qual -> IO (Vector Qual))
-> Vector Qual -> IO (Vector Qual)
forall a b. (a -> b) -> a -> b
$! ForeignPtr Qual -> Int -> Vector Qual
forall a. Storable a => ForeignPtr a -> Int -> Vector a
W.unsafeFromForeignPtr0 ForeignPtr Qual
fp Int
ltot
  where
    go :: Ptr Word8 -> [(a, Bytes)] -> IO ()
go !Ptr Word8
p (s :: (a, Bytes)
s:ss :: [(a, Bytes)]
ss)  =  Bytes -> (CStringLen -> IO ()) -> IO ()
forall a. Bytes -> (CStringLen -> IO a) -> IO a
B.unsafeUseAsCStringLen ((a, Bytes) -> Bytes
forall a b. (a, b) -> b
snd (a, Bytes)
s) ((CStringLen -> IO ()) -> IO ()) -> (CStringLen -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \(q :: Ptr CChar
q,l :: Int
l) -> [(a, Bytes)] -> Ptr Word8 -> Ptr Word8 -> Int -> IO ()
go1 [(a, Bytes)]
ss Ptr Word8
p (Ptr CChar -> Ptr Word8
forall a b. Ptr a -> Ptr b
castPtr Ptr CChar
q) Int
l
    go  _ [    ]  =  () -> IO ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()

    go1 :: [(a, Bytes)] -> Ptr Word8 -> Ptr Word8 -> Int -> IO ()
go1 ss :: [(a, Bytes)]
ss !Ptr Word8
p !Ptr Word8
q !Int
l
        | Int
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 0      =  do Ptr Word8 -> IO Word8
forall a. Storable a => Ptr a -> IO a
peek Ptr Word8
q IO Word8 -> (Word8 -> IO ()) -> IO ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Ptr Word8 -> Word8 -> IO ()
forall a. Storable a => Ptr a -> a -> IO ()
poke Ptr Word8
p (Word8 -> IO ()) -> (Word8 -> Word8) -> Word8 -> IO ()
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
subtract (33::Word8)
                           [(a, Bytes)] -> Ptr Word8 -> Ptr Word8 -> Int -> IO ()
go1 [(a, Bytes)]
ss (Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
p 1) (Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
q 1) (Int -> Int
forall a. Enum a => a -> a
pred Int
l)
        | Bool
otherwise  =  Ptr Word8 -> [(a, Bytes)] -> IO ()
go Ptr Word8
p [(a, Bytes)]
ss