module Bio.Bam.Trim
( trim_3
, trim_3'
, trim_low_quality
, AD_Seqs
, withADSeqs
, default_fwd_adapters
, default_rev_adapters
, find_merge
, mergeBam
, find_trim
, trimBam
, merged_seq
, merged_qual
) where
import Bio.Bam.Header
import Bio.Bam.Rec
import Bio.Bam.Rmdup ( ECig(..), setMD, toECig )
import Bio.Prelude
import Control.Monad.Trans.Control ( MonadBaseControl, control )
import Foreign.C.Types ( CInt(..) )
import Foreign.Marshal.Array ( allocaArray )
import qualified Data.ByteString as B
import qualified Data.Vector.Generic as V
import qualified Data.Vector.Storable as W
trim_3' :: ([Nucleotides] -> [Qual] -> Bool) -> BamRec -> BamRec
trim_3' :: ([Nucleotides] -> [Qual] -> Bool) -> BamRec -> BamRec
trim_3' p :: [Nucleotides] -> [Qual] -> Bool
p b :: BamRec
b = case BamRec -> Maybe (Vector Qual)
b_qual BamRec
b of
Nothing -> BamRec
b
Just qs :: Vector Qual
qs | BamRec -> Int
b_flag BamRec
b Int -> Int -> Bool
forall a. Bits a => a -> Int -> Bool
`testBit` 4 -> Int -> BamRec -> BamRec
trim_3 Int
len_rev BamRec
b
| Bool
otherwise -> Int -> BamRec -> BamRec
trim_3 Int
len_fwd BamRec
b
where
len_fwd :: Int
len_fwd = Int -> Int -> Int
forall a. Num a => a -> a -> a
subtract 1 (Int -> Int)
-> ([([Nucleotides], [Qual])] -> Int)
-> [([Nucleotides], [Qual])]
-> Int
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. [([Nucleotides], [Qual])] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([([Nucleotides], [Qual])] -> Int)
-> ([([Nucleotides], [Qual])] -> [([Nucleotides], [Qual])])
-> [([Nucleotides], [Qual])]
-> Int
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (([Nucleotides], [Qual]) -> Bool)
-> [([Nucleotides], [Qual])] -> [([Nucleotides], [Qual])]
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (([Nucleotides] -> [Qual] -> Bool)
-> ([Nucleotides], [Qual]) -> Bool
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry [Nucleotides] -> [Qual] -> Bool
p) ([([Nucleotides], [Qual])] -> Int)
-> [([Nucleotides], [Qual])] -> Int
forall a b. (a -> b) -> a -> b
$
[[Nucleotides]] -> [[Qual]] -> [([Nucleotides], [Qual])]
forall a b. [a] -> [b] -> [(a, b)]
zip ([Nucleotides] -> [[Nucleotides]]
forall a. [a] -> [[a]]
inits ([Nucleotides] -> [[Nucleotides]])
-> (Vector_Nucs_half Nucleotides -> [Nucleotides])
-> Vector_Nucs_half Nucleotides
-> [[Nucleotides]]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. [Nucleotides] -> [Nucleotides]
forall a. [a] -> [a]
reverse ([Nucleotides] -> [Nucleotides])
-> (Vector_Nucs_half Nucleotides -> [Nucleotides])
-> Vector_Nucs_half Nucleotides
-> [Nucleotides]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Vector_Nucs_half Nucleotides -> [Nucleotides]
forall (v :: * -> *) a. Vector v a => v a -> [a]
V.toList (Vector_Nucs_half Nucleotides -> [[Nucleotides]])
-> Vector_Nucs_half Nucleotides -> [[Nucleotides]]
forall a b. (a -> b) -> a -> b
$ BamRec -> Vector_Nucs_half Nucleotides
b_seq BamRec
b)
([Qual] -> [[Qual]]
forall a. [a] -> [[a]]
inits ([Qual] -> [[Qual]])
-> (Vector Qual -> [Qual]) -> Vector Qual -> [[Qual]]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. [Qual] -> [Qual]
forall a. [a] -> [a]
reverse ([Qual] -> [Qual])
-> (Vector Qual -> [Qual]) -> Vector Qual -> [Qual]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Vector Qual -> [Qual]
forall (v :: * -> *) a. Vector v a => v a -> [a]
V.toList (Vector Qual -> [[Qual]]) -> Vector Qual -> [[Qual]]
forall a b. (a -> b) -> a -> b
$ Vector Qual
qs)
len_rev :: Int
len_rev = Int -> Int -> Int
forall a. Num a => a -> a -> a
subtract 1 (Int -> Int)
-> ([([Nucleotides], [Qual])] -> Int)
-> [([Nucleotides], [Qual])]
-> Int
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. [([Nucleotides], [Qual])] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([([Nucleotides], [Qual])] -> Int)
-> ([([Nucleotides], [Qual])] -> [([Nucleotides], [Qual])])
-> [([Nucleotides], [Qual])]
-> Int
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (([Nucleotides], [Qual]) -> Bool)
-> [([Nucleotides], [Qual])] -> [([Nucleotides], [Qual])]
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (([Nucleotides] -> [Qual] -> Bool)
-> ([Nucleotides], [Qual]) -> Bool
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry [Nucleotides] -> [Qual] -> Bool
p) ([([Nucleotides], [Qual])] -> Int)
-> [([Nucleotides], [Qual])] -> Int
forall a b. (a -> b) -> a -> b
$
[[Nucleotides]] -> [[Qual]] -> [([Nucleotides], [Qual])]
forall a b. [a] -> [b] -> [(a, b)]
zip ([Nucleotides] -> [[Nucleotides]]
forall a. [a] -> [[a]]
inits ([Nucleotides] -> [[Nucleotides]])
-> (Vector_Nucs_half Nucleotides -> [Nucleotides])
-> Vector_Nucs_half Nucleotides
-> [[Nucleotides]]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Vector_Nucs_half Nucleotides -> [Nucleotides]
forall (v :: * -> *) a. Vector v a => v a -> [a]
V.toList (Vector_Nucs_half Nucleotides -> [[Nucleotides]])
-> Vector_Nucs_half Nucleotides -> [[Nucleotides]]
forall a b. (a -> b) -> a -> b
$ BamRec -> Vector_Nucs_half Nucleotides
b_seq BamRec
b)
([Qual] -> [[Qual]]
forall a. [a] -> [[a]]
inits ([Qual] -> [[Qual]])
-> (Vector Qual -> [Qual]) -> Vector Qual -> [[Qual]]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Vector Qual -> [Qual]
forall (v :: * -> *) a. Vector v a => v a -> [a]
V.toList (Vector Qual -> [[Qual]]) -> Vector Qual -> [[Qual]]
forall a b. (a -> b) -> a -> b
$ Vector Qual
qs)
trim_3 :: Int -> BamRec -> BamRec
trim_3 :: Int -> BamRec -> BamRec
trim_3 l :: Int
l b :: BamRec
b | BamRec -> Int
b_flag BamRec
b Int -> Int -> Bool
forall a. Bits a => a -> Int -> Bool
`testBit` 4 = BamRec
trim_rev
| Bool
otherwise = BamRec
trim_fwd
where
trim_fwd :: BamRec
trim_fwd = let (_, cigar' :: Vector Cigar
cigar') = Vector Cigar -> Int -> (Int, Vector Cigar)
forall (v :: * -> *).
Vector v Cigar =>
v Cigar -> Int -> (Int, v Cigar)
trim_back_cigar (BamRec -> Vector Cigar
b_cigar BamRec
b) Int
l
c :: BamRec
c = (ECig -> ECig) -> BamRec -> BamRec
modMd (Int -> ECig -> ECig
takeECig (Vector_Nucs_half Nucleotides -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
V.length (BamRec -> Vector_Nucs_half Nucleotides
b_seq BamRec
b) Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
l)) BamRec
b
in BamRec
c { b_seq :: Vector_Nucs_half Nucleotides
b_seq = Int -> Vector_Nucs_half Nucleotides -> Vector_Nucs_half Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
V.take (Vector_Nucs_half Nucleotides -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
V.length (BamRec -> Vector_Nucs_half Nucleotides
b_seq BamRec
c) Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
l) (Vector_Nucs_half Nucleotides -> Vector_Nucs_half Nucleotides)
-> Vector_Nucs_half Nucleotides -> Vector_Nucs_half Nucleotides
forall a b. (a -> b) -> a -> b
$ BamRec -> Vector_Nucs_half Nucleotides
b_seq BamRec
c
, b_qual :: Maybe (Vector Qual)
b_qual = Int -> Vector Qual -> Vector Qual
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
V.take (Vector_Nucs_half Nucleotides -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
V.length (BamRec -> Vector_Nucs_half Nucleotides
b_seq BamRec
c) Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
l) (Vector Qual -> Vector Qual)
-> Maybe (Vector Qual) -> Maybe (Vector Qual)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> BamRec -> Maybe (Vector Qual)
b_qual BamRec
c
, b_cigar :: Vector Cigar
b_cigar = Vector Cigar
cigar'
, b_exts :: Extensions
b_exts = ((BamKey, Ext) -> (BamKey, Ext)) -> Extensions -> Extensions
forall a b. (a -> b) -> [a] -> [b]
map (\(k :: BamKey
k,e :: Ext
e) -> case Ext
e of
Text t :: Bytes
t | BamKey
k BamKey -> [BamKey] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [BamKey]
trim_set
-> (BamKey
k, Bytes -> Ext
Text (Int -> Bytes -> Bytes
B.take (Bytes -> Int
B.length Bytes
t Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
l) Bytes
t))
_ -> (BamKey
k,Ext
e)
) (BamRec -> Extensions
b_exts BamRec
c) }
trim_rev :: BamRec
trim_rev = let (off :: Int
off, cigar' :: Vector Cigar
cigar') = Vector Cigar -> Int -> (Int, Vector Cigar)
forall (v :: * -> *).
Vector v Cigar =>
v Cigar -> Int -> (Int, v Cigar)
trim_fwd_cigar (BamRec -> Vector Cigar
b_cigar BamRec
b) Int
l
c :: BamRec
c = (ECig -> ECig) -> BamRec -> BamRec
modMd (Int -> ECig -> ECig
dropECig Int
l) BamRec
b
in BamRec
c { b_seq :: Vector_Nucs_half Nucleotides
b_seq = Int -> Vector_Nucs_half Nucleotides -> Vector_Nucs_half Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
V.drop Int
l (Vector_Nucs_half Nucleotides -> Vector_Nucs_half Nucleotides)
-> Vector_Nucs_half Nucleotides -> Vector_Nucs_half Nucleotides
forall a b. (a -> b) -> a -> b
$ BamRec -> Vector_Nucs_half Nucleotides
b_seq BamRec
c
, b_qual :: Maybe (Vector Qual)
b_qual = Int -> Vector Qual -> Vector Qual
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
V.drop Int
l (Vector Qual -> Vector Qual)
-> Maybe (Vector Qual) -> Maybe (Vector Qual)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> BamRec -> Maybe (Vector Qual)
b_qual BamRec
c
, b_pos :: Int
b_pos = BamRec -> Int
b_pos BamRec
c Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
off
, b_cigar :: Vector Cigar
b_cigar = Vector Cigar
cigar'
, b_exts :: Extensions
b_exts = ((BamKey, Ext) -> (BamKey, Ext)) -> Extensions -> Extensions
forall a b. (a -> b) -> [a] -> [b]
map (\(k :: BamKey
k,e :: Ext
e) -> case Ext
e of
Text t :: Bytes
t | BamKey
k BamKey -> [BamKey] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [BamKey]
trim_set
-> (BamKey
k, Bytes -> Ext
Text (Int -> Bytes -> Bytes
B.drop Int
l Bytes
t))
_ -> (BamKey
k,Ext
e)
) (BamRec -> Extensions
b_exts BamRec
c) }
trim_set :: [BamKey]
trim_set = ["BQ","CQ","CS","E2","OQ","U2"]
modMd :: (ECig -> ECig) -> BamRec -> BamRec
modMd :: (ECig -> ECig) -> BamRec -> BamRec
modMd f :: ECig -> ECig
f br :: BamRec
br = BamRec -> ([MdOp] -> BamRec) -> Maybe [MdOp] -> BamRec
forall b a. b -> (a -> b) -> Maybe a -> b
maybe BamRec
br (BamRec -> ECig -> BamRec
setMD BamRec
br (ECig -> BamRec) -> ([MdOp] -> ECig) -> [MdOp] -> BamRec
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ECig -> ECig
f (ECig -> ECig) -> ([MdOp] -> ECig) -> [MdOp] -> ECig
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Vector Cigar -> [MdOp] -> ECig
toECig (BamRec -> Vector Cigar
b_cigar BamRec
br)) (BamRec -> Maybe [MdOp]
getMd BamRec
br)
endOf :: ECig -> ECig
endOf :: ECig -> ECig
endOf WithMD = ECig
WithMD
endOf WithoutMD = ECig
WithoutMD
endOf (Mat' _ es :: ECig
es) = ECig -> ECig
endOf ECig
es
endOf (Ins' _ es :: ECig
es) = ECig -> ECig
endOf ECig
es
endOf (SMa' _ es :: ECig
es) = ECig -> ECig
endOf ECig
es
endOf (Rep' _ es :: ECig
es) = ECig -> ECig
endOf ECig
es
endOf (Del' _ es :: ECig
es) = ECig -> ECig
endOf ECig
es
endOf (Nop' _ es :: ECig
es) = ECig -> ECig
endOf ECig
es
endOf (HMa' _ es :: ECig
es) = ECig -> ECig
endOf ECig
es
endOf (Pad' _ es :: ECig
es) = ECig -> ECig
endOf ECig
es
takeECig :: Int -> ECig -> ECig
takeECig :: Int -> ECig -> ECig
takeECig 0 es :: ECig
es = ECig -> ECig
endOf ECig
es
takeECig _ WithMD = ECig
WithMD
takeECig _ WithoutMD = ECig
WithoutMD
takeECig n :: Int
n (Mat' m :: Int
m es :: ECig
es) = Int -> ECig -> ECig
Mat' Int
n (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ if Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
m then Int -> ECig -> ECig
takeECig (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
m) ECig
es else ECig
WithMD
takeECig n :: Int
n (Ins' m :: Int
m es :: ECig
es) = Int -> ECig -> ECig
Ins' Int
n (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ if Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
m then Int -> ECig -> ECig
takeECig (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
m) ECig
es else ECig
WithMD
takeECig n :: Int
n (SMa' m :: Int
m es :: ECig
es) = Int -> ECig -> ECig
SMa' Int
n (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ if Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
m then Int -> ECig -> ECig
takeECig (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
m) ECig
es else ECig
WithMD
takeECig n :: Int
n (Rep' ns :: Nucleotides
ns es :: ECig
es) = Nucleotides -> ECig -> ECig
Rep' Nucleotides
ns (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ Int -> ECig -> ECig
takeECig (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-1) ECig
es
takeECig n :: Int
n (Del' ns :: Nucleotides
ns es :: ECig
es) = Nucleotides -> ECig -> ECig
Del' Nucleotides
ns (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ Int -> ECig -> ECig
takeECig Int
n ECig
es
takeECig n :: Int
n (Nop' m :: Int
m es :: ECig
es) = Int -> ECig -> ECig
Nop' Int
m (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ Int -> ECig -> ECig
takeECig Int
n ECig
es
takeECig n :: Int
n (HMa' m :: Int
m es :: ECig
es) = Int -> ECig -> ECig
HMa' Int
m (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ Int -> ECig -> ECig
takeECig Int
n ECig
es
takeECig n :: Int
n (Pad' m :: Int
m es :: ECig
es) = Int -> ECig -> ECig
Pad' Int
m (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ Int -> ECig -> ECig
takeECig Int
n ECig
es
dropECig :: Int -> ECig -> ECig
dropECig :: Int -> ECig -> ECig
dropECig 0 es :: ECig
es = ECig
es
dropECig _ WithMD = ECig
WithMD
dropECig _ WithoutMD = ECig
WithoutMD
dropECig n :: Int
n (Mat' m :: Int
m es :: ECig
es) = if Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
m then Int -> ECig -> ECig
dropECig (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
m) ECig
es else Int -> ECig -> ECig
Mat' Int
n ECig
WithMD
dropECig n :: Int
n (Ins' m :: Int
m es :: ECig
es) = if Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
m then Int -> ECig -> ECig
dropECig (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
m) ECig
es else Int -> ECig -> ECig
Ins' Int
n ECig
WithMD
dropECig n :: Int
n (SMa' m :: Int
m es :: ECig
es) = if Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
m then Int -> ECig -> ECig
dropECig (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
m) ECig
es else Int -> ECig -> ECig
SMa' Int
n ECig
WithMD
dropECig n :: Int
n (Rep' _ es :: ECig
es) = Int -> ECig -> ECig
dropECig (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-1) ECig
es
dropECig n :: Int
n (Del' _ es :: ECig
es) = Int -> ECig -> ECig
dropECig Int
n ECig
es
dropECig n :: Int
n (Nop' _ es :: ECig
es) = Int -> ECig -> ECig
dropECig Int
n ECig
es
dropECig n :: Int
n (HMa' _ es :: ECig
es) = Int -> ECig -> ECig
dropECig Int
n ECig
es
dropECig n :: Int
n (Pad' _ es :: ECig
es) = Int -> ECig -> ECig
dropECig Int
n ECig
es
trim_back_cigar, trim_fwd_cigar :: V.Vector v Cigar => v Cigar -> Int -> ( Int, v Cigar )
trim_back_cigar :: v Cigar -> Int -> (Int, v Cigar)
trim_back_cigar c :: v Cigar
c l :: Int
l = (Int
o, [Cigar] -> v Cigar
forall (v :: * -> *) a. Vector v a => [a] -> v a
V.fromList ([Cigar] -> v Cigar) -> [Cigar] -> v Cigar
forall a b. (a -> b) -> a -> b
$ [Cigar] -> [Cigar]
forall a. [a] -> [a]
reverse [Cigar]
c') where (o :: Int
o,c' :: [Cigar]
c') = (Int, [Cigar]) -> (Int, [Cigar])
sanitize_cigar ((Int, [Cigar]) -> (Int, [Cigar]))
-> ([Cigar] -> (Int, [Cigar])) -> [Cigar] -> (Int, [Cigar])
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> [Cigar] -> (Int, [Cigar])
trim_cigar Int
l ([Cigar] -> (Int, [Cigar])) -> [Cigar] -> (Int, [Cigar])
forall a b. (a -> b) -> a -> b
$ [Cigar] -> [Cigar]
forall a. [a] -> [a]
reverse ([Cigar] -> [Cigar]) -> [Cigar] -> [Cigar]
forall a b. (a -> b) -> a -> b
$ v Cigar -> [Cigar]
forall (v :: * -> *) a. Vector v a => v a -> [a]
V.toList v Cigar
c
trim_fwd_cigar :: v Cigar -> Int -> (Int, v Cigar)
trim_fwd_cigar c :: v Cigar
c l :: Int
l = (Int
o, [Cigar] -> v Cigar
forall (v :: * -> *) a. Vector v a => [a] -> v a
V.fromList [Cigar]
c') where (o :: Int
o,c' :: [Cigar]
c') = (Int, [Cigar]) -> (Int, [Cigar])
sanitize_cigar ((Int, [Cigar]) -> (Int, [Cigar]))
-> (Int, [Cigar]) -> (Int, [Cigar])
forall a b. (a -> b) -> a -> b
$ Int -> [Cigar] -> (Int, [Cigar])
trim_cigar Int
l ([Cigar] -> (Int, [Cigar])) -> [Cigar] -> (Int, [Cigar])
forall a b. (a -> b) -> a -> b
$ v Cigar -> [Cigar]
forall (v :: * -> *) a. Vector v a => v a -> [a]
V.toList v Cigar
c
sanitize_cigar :: (Int, [Cigar]) -> (Int, [Cigar])
sanitize_cigar :: (Int, [Cigar]) -> (Int, [Cigar])
sanitize_cigar (o :: Int
o, [ ]) = (Int
o, [])
sanitize_cigar (o :: Int
o, (op :: CigOp
op:*l :: Int
l):xs :: [Cigar]
xs) | CigOp
op CigOp -> CigOp -> Bool
forall a. Eq a => a -> a -> Bool
== CigOp
Pad = (Int, [Cigar]) -> (Int, [Cigar])
sanitize_cigar (Int
o,[Cigar]
xs)
| CigOp
op CigOp -> CigOp -> Bool
forall a. Eq a => a -> a -> Bool
== CigOp
Del Bool -> Bool -> Bool
|| CigOp
op CigOp -> CigOp -> Bool
forall a. Eq a => a -> a -> Bool
== CigOp
Nop = (Int, [Cigar]) -> (Int, [Cigar])
sanitize_cigar (Int
o Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l, [Cigar]
xs)
| CigOp
op CigOp -> CigOp -> Bool
forall a. Eq a => a -> a -> Bool
== CigOp
Ins = (Int
o, (CigOp
SMa CigOp -> Int -> Cigar
:* Int
l)Cigar -> [Cigar] -> [Cigar]
forall a. a -> [a] -> [a]
:[Cigar]
xs)
| Bool
otherwise = (Int
o, (CigOp
op CigOp -> Int -> Cigar
:* Int
l)Cigar -> [Cigar] -> [Cigar]
forall a. a -> [a] -> [a]
:[Cigar]
xs)
trim_cigar :: Int -> [Cigar] -> (Int, [Cigar])
trim_cigar :: Int -> [Cigar] -> (Int, [Cigar])
trim_cigar 0 cs :: [Cigar]
cs = (0, [Cigar]
cs)
trim_cigar _ [] = (0, [])
trim_cigar l :: Int
l ((op :: CigOp
op:*ll :: Int
ll):cs :: [Cigar]
cs) | CigOp -> Bool
bad_op CigOp
op = let (o :: Int
o,cs' :: [Cigar]
cs') = Int -> [Cigar] -> (Int, [Cigar])
trim_cigar Int
l [Cigar]
cs in (Int
o Int -> Int -> Int
forall a. Num a => a -> a -> a
+ CigOp -> Int -> Int
forall a. Num a => CigOp -> a -> a
reflen CigOp
op Int
ll, [Cigar]
cs')
| Bool
otherwise = case Int
l Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
`compare` Int
ll of
LT -> (CigOp -> Int -> Int
forall a. Num a => CigOp -> a -> a
reflen CigOp
op Int
l, (CigOp
op CigOp -> Int -> Cigar
:* (Int
llInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
l))Cigar -> [Cigar] -> [Cigar]
forall a. a -> [a] -> [a]
:[Cigar]
cs)
EQ -> (CigOp -> Int -> Int
forall a. Num a => CigOp -> a -> a
reflen CigOp
op Int
ll, [Cigar]
cs)
GT -> let (o :: Int
o,cs' :: [Cigar]
cs') = Int -> [Cigar] -> (Int, [Cigar])
trim_cigar (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
ll) [Cigar]
cs in (Int
o Int -> Int -> Int
forall a. Num a => a -> a -> a
+ CigOp -> Int -> Int
forall a. Num a => CigOp -> a -> a
reflen CigOp
op Int
ll, [Cigar]
cs')
where
reflen :: CigOp -> a -> a
reflen op' :: CigOp
op' = if CigOp -> Bool
ref_op CigOp
op' then a -> a
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id else a -> a -> a
forall a b. a -> b -> a
const 0
bad_op :: CigOp -> Bool
bad_op o :: CigOp
o = CigOp
o CigOp -> CigOp -> Bool
forall a. Eq a => a -> a -> Bool
/= CigOp
Mat Bool -> Bool -> Bool
&& CigOp
o CigOp -> CigOp -> Bool
forall a. Eq a => a -> a -> Bool
/= CigOp
Ins Bool -> Bool -> Bool
&& CigOp
o CigOp -> CigOp -> Bool
forall a. Eq a => a -> a -> Bool
/= CigOp
SMa
ref_op :: CigOp -> Bool
ref_op o :: CigOp
o = CigOp
o CigOp -> CigOp -> Bool
forall a. Eq a => a -> a -> Bool
== CigOp
Mat Bool -> Bool -> Bool
|| CigOp
o CigOp -> CigOp -> Bool
forall a. Eq a => a -> a -> Bool
== CigOp
Del
trim_low_quality :: Qual -> a -> [Qual] -> Bool
trim_low_quality :: Qual -> a -> [Qual] -> Bool
trim_low_quality q :: Qual
q = ([Qual] -> Bool) -> a -> [Qual] -> Bool
forall a b. a -> b -> a
const (([Qual] -> Bool) -> a -> [Qual] -> Bool)
-> ([Qual] -> Bool) -> a -> [Qual] -> Bool
forall a b. (a -> b) -> a -> b
$ (Qual -> Bool) -> [Qual] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (Qual -> Qual -> Bool
forall a. Ord a => a -> a -> Bool
< Qual
q)
find_merge :: AD_Seqs -> AD_Seqs
-> W.Vector Nucleotides -> W.Vector Qual
-> W.Vector Nucleotides -> W.Vector Qual
-> IO (Int, Int, Int)
find_merge :: AD_Seqs
-> AD_Seqs
-> Vector Nucleotides
-> Vector Qual
-> Vector Nucleotides
-> Vector Qual
-> IO (Int, Int, Int)
find_merge pads1 :: AD_Seqs
pads1 pads2 :: AD_Seqs
pads2 r1 :: Vector Nucleotides
r1 q1 :: Vector Qual
q1 r2 :: Vector Nucleotides
r2 q2 :: Vector Qual
q2 =
Vector Nucleotides
-> Vector Qual
-> (FW_Seq -> IO (Int, Int, Int))
-> IO (Int, Int, Int)
forall r.
Vector Nucleotides -> Vector Qual -> (FW_Seq -> IO r) -> IO r
with_fw_seq Vector Nucleotides
r1 Vector Qual
q1 ((FW_Seq -> IO (Int, Int, Int)) -> IO (Int, Int, Int))
-> (FW_Seq -> IO (Int, Int, Int)) -> IO (Int, Int, Int)
forall a b. (a -> b) -> a -> b
$ \pr1 :: FW_Seq
pr1 ->
Vector Nucleotides
-> Vector Qual
-> (FW_Seq -> IO (Int, Int, Int))
-> IO (Int, Int, Int)
forall r.
Vector Nucleotides -> Vector Qual -> (FW_Seq -> IO r) -> IO r
with_fw_seq Vector Nucleotides
r2 Vector Qual
q2 ((FW_Seq -> IO (Int, Int, Int)) -> IO (Int, Int, Int))
-> (FW_Seq -> IO (Int, Int, Int)) -> IO (Int, Int, Int)
forall a b. (a -> b) -> a -> b
$ \pr2 :: FW_Seq
pr2 ->
Vector Nucleotides
-> Vector Qual
-> (RC_Seq -> IO (Int, Int, Int))
-> IO (Int, Int, Int)
forall r.
Vector Nucleotides -> Vector Qual -> (RC_Seq -> IO r) -> IO r
with_rc_seq Vector Nucleotides
r2 Vector Qual
q2 ((RC_Seq -> IO (Int, Int, Int)) -> IO (Int, Int, Int))
-> (RC_Seq -> IO (Int, Int, Int)) -> IO (Int, Int, Int)
forall a b. (a -> b) -> a -> b
$ \prv2 :: RC_Seq
prv2 -> do
AD_Seqs
-> AD_Seqs -> FW_Seq -> FW_Seq -> RC_Seq -> IO (Int, Int, Int)
min_merge_score AD_Seqs
pads1 AD_Seqs
pads2 FW_Seq
pr1 FW_Seq
pr2 RC_Seq
prv2
mergeBam :: Int -> Int
-> AD_Seqs -> AD_Seqs
-> BamRec -> BamRec -> IO [BamRec]
mergeBam :: Int -> Int -> AD_Seqs -> AD_Seqs -> BamRec -> BamRec -> IO [BamRec]
mergeBam lowq :: Int
lowq highq :: Int
highq ads1 :: AD_Seqs
ads1 ads2 :: AD_Seqs
ads2 r1 :: BamRec
r1 r2 :: BamRec
r2 = do
let len_r1 :: Int
len_r1 = Vector_Nucs_half Nucleotides -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
V.length (Vector_Nucs_half Nucleotides -> Int)
-> Vector_Nucs_half Nucleotides -> Int
forall a b. (a -> b) -> a -> b
$ BamRec -> Vector_Nucs_half Nucleotides
b_seq BamRec
r1
len_r2 :: Int
len_r2 = Vector_Nucs_half Nucleotides -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
V.length (Vector_Nucs_half Nucleotides -> Int)
-> Vector_Nucs_half Nucleotides -> Int
forall a b. (a -> b) -> a -> b
$ BamRec -> Vector_Nucs_half Nucleotides
b_seq BamRec
r2
b_seq_r1 :: Vector Nucleotides
b_seq_r1 = Vector_Nucs_half Nucleotides -> Vector Nucleotides
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
V.convert (Vector_Nucs_half Nucleotides -> Vector Nucleotides)
-> Vector_Nucs_half Nucleotides -> Vector Nucleotides
forall a b. (a -> b) -> a -> b
$ BamRec -> Vector_Nucs_half Nucleotides
b_seq BamRec
r1
b_seq_r2 :: Vector Nucleotides
b_seq_r2 = Vector_Nucs_half Nucleotides -> Vector Nucleotides
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
V.convert (Vector_Nucs_half Nucleotides -> Vector Nucleotides)
-> Vector_Nucs_half Nucleotides -> Vector Nucleotides
forall a b. (a -> b) -> a -> b
$ BamRec -> Vector_Nucs_half Nucleotides
b_seq BamRec
r2
b_qual_r1 :: Vector Qual
b_qual_r1 = Vector Qual -> Maybe (Vector Qual) -> Vector Qual
forall a. a -> Maybe a -> a
fromMaybe ((Nucleotides -> Qual) -> Vector Nucleotides -> Vector Qual
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
V.map (Qual -> Nucleotides -> Qual
forall a b. a -> b -> a
const (Word8 -> Qual
Q 23)) Vector Nucleotides
b_seq_r1) (BamRec -> Maybe (Vector Qual)
b_qual BamRec
r1)
b_qual_r2 :: Vector Qual
b_qual_r2 = Vector Qual -> Maybe (Vector Qual) -> Vector Qual
forall a. a -> Maybe a -> a
fromMaybe ((Nucleotides -> Qual) -> Vector Nucleotides -> Vector Qual
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
V.map (Qual -> Nucleotides -> Qual
forall a b. a -> b -> a
const (Word8 -> Qual
Q 23)) Vector Nucleotides
b_seq_r2) (BamRec -> Maybe (Vector Qual)
b_qual BamRec
r2)
(mlen :: Int
mlen, qual1 :: Int
qual1, qual2 :: Int
qual2) <- AD_Seqs
-> AD_Seqs
-> Vector Nucleotides
-> Vector Qual
-> Vector Nucleotides
-> Vector Qual
-> IO (Int, Int, Int)
find_merge AD_Seqs
ads1 AD_Seqs
ads2 Vector Nucleotides
b_seq_r1 Vector Qual
b_qual_r1 Vector Nucleotides
b_seq_r2 Vector Qual
b_qual_r2
let flag_alternative :: BamRec -> BamRec
flag_alternative br :: BamRec
br = BamRec
br { b_exts :: Extensions
b_exts = BamKey -> Ext -> Extensions -> Extensions
updateE "FF" (Int -> Ext
Int (Int -> Ext) -> Int -> Ext
forall a b. (a -> b) -> a -> b
$ Int -> BamKey -> BamRec -> Int
extAsInt 0 "FF" BamRec
br Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
eflagAlternative) (Extensions -> Extensions) -> Extensions -> Extensions
forall a b. (a -> b) -> a -> b
$ BamRec -> Extensions
b_exts BamRec
br }
store_quals :: BamRec -> BamRec
store_quals br :: BamRec
br = BamRec
br { b_exts :: Extensions
b_exts = BamKey -> Ext -> Extensions -> Extensions
updateE "YM" (Int -> Ext
Int Int
qual1) (Extensions -> Extensions) -> Extensions -> Extensions
forall a b. (a -> b) -> a -> b
$ BamKey -> Ext -> Extensions -> Extensions
updateE "YN" (Int -> Ext
Int Int
qual2) (Extensions -> Extensions) -> Extensions -> Extensions
forall a b. (a -> b) -> a -> b
$ BamRec -> Extensions
b_exts BamRec
br }
pair_flags :: Int
pair_flags = Int
flagPairedInt -> Int -> Int
forall a. Bits a => a -> a -> a
.|.Int
flagProperlyPairedInt -> Int -> Int
forall a. Bits a => a -> a -> a
.|.Int
flagMateUnmappedInt -> Int -> Int
forall a. Bits a => a -> a -> a
.|.Int
flagMateReversedInt -> Int -> Int
forall a. Bits a => a -> a -> a
.|.Int
flagFirstMateInt -> Int -> Int
forall a. Bits a => a -> a -> a
.|.Int
flagSecondMate
r1' :: BamRec
r1' = BamRec -> BamRec
store_quals BamRec
r1
r2' :: BamRec
r2' = BamRec -> BamRec
store_quals BamRec
r2
rm :: BamRec
rm = BamRec -> BamRec
store_quals (BamRec -> BamRec) -> BamRec -> BamRec
forall a b. (a -> b) -> a -> b
$ Int -> Word8 -> BamRec
merged_read Int
mlen (Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ Int -> Int -> Int
forall a. Ord a => a -> a -> a
min 63 Int
qual1)
merged_read :: Int -> Word8 -> BamRec
merged_read l :: Int
l qmax :: Word8
qmax =
BamRec
nullBamRec
{ b_qname :: Bytes
b_qname = BamRec -> Bytes
b_qname BamRec
r1
, b_flag :: Int
b_flag = Int
flagUnmapped Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int -> Int
forall a. Bits a => a -> a
complement Int
pair_flags Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. BamRec -> Int
b_flag BamRec
r1
, b_seq :: Vector_Nucs_half Nucleotides
b_seq = Vector Nucleotides -> Vector_Nucs_half Nucleotides
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
V.convert (Vector Nucleotides -> Vector_Nucs_half Nucleotides)
-> Vector Nucleotides -> Vector_Nucs_half Nucleotides
forall a b. (a -> b) -> a -> b
$ Int
-> Vector Nucleotides
-> Vector Qual
-> Vector Nucleotides
-> Vector Qual
-> Vector Nucleotides
forall (v :: * -> *).
(Vector v Nucleotides, Vector v Qual) =>
Int
-> v Nucleotides
-> v Qual
-> v Nucleotides
-> v Qual
-> v Nucleotides
merged_seq Int
l Vector Nucleotides
b_seq_r1 Vector Qual
b_qual_r1 Vector Nucleotides
b_seq_r2 Vector Qual
b_qual_r2
, b_qual :: Maybe (Vector Qual)
b_qual = Vector Qual -> Maybe (Vector Qual)
forall a. a -> Maybe a
Just (Vector Qual -> Maybe (Vector Qual))
-> Vector Qual -> Maybe (Vector Qual)
forall a b. (a -> b) -> a -> b
$ Word8
-> Int
-> Vector Nucleotides
-> Vector Qual
-> Vector Nucleotides
-> Vector Qual
-> Vector Qual
forall (v :: * -> *).
(Vector v Nucleotides, Vector v Qual) =>
Word8
-> Int
-> v Nucleotides
-> v Qual
-> v Nucleotides
-> v Qual
-> v Qual
merged_qual Word8
qmax Int
l Vector Nucleotides
b_seq_r1 Vector Qual
b_qual_r1 Vector Nucleotides
b_seq_r2 Vector Qual
b_qual_r2
, b_exts :: Extensions
b_exts = let ff :: Int
ff = if Int
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
len_r1 then Int
eflagTrimmed else 0
in BamKey -> Ext -> Extensions -> Extensions
updateE "FF" (Int -> Ext
Int (Int -> Ext) -> Int -> Ext
forall a b. (a -> b) -> a -> b
$ Int -> BamKey -> BamRec -> Int
extAsInt 0 "FF" BamRec
r1 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
eflagMerged Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
ff) (Extensions -> Extensions) -> Extensions -> Extensions
forall a b. (a -> b) -> a -> b
$ BamRec -> Extensions
b_exts BamRec
r1 }
[BamRec] -> IO [BamRec]
forall (m :: * -> *) a. Monad m => a -> m a
return ([BamRec] -> IO [BamRec]) -> [BamRec] -> IO [BamRec]
forall a b. (a -> b) -> a -> b
$ case () of
_ | Vector_Nucs_half Nucleotides -> Bool
forall (v :: * -> *) a. Vector v a => v a -> Bool
V.null (BamRec -> Vector_Nucs_half Nucleotides
b_seq BamRec
r1) Bool -> Bool -> Bool
&& Vector_Nucs_half Nucleotides -> Bool
forall (v :: * -> *) a. Vector v a => v a -> Bool
V.null (BamRec -> Vector_Nucs_half Nucleotides
b_seq BamRec
r2) -> [ ]
| Int
qual1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
lowq Bool -> Bool -> Bool
|| Int
mlen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< 0 -> [ BamRec
r1', BamRec
r2' ]
| Int
qual1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
highq Bool -> Bool -> Bool
&& Int
mlen Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== 0 -> [ ]
| Int
qual1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
highq -> [ BamRec
rm ]
| Int
mlen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
len_r1Int -> Int -> Int
forall a. Num a => a -> a -> a
-20 Bool -> Bool -> Bool
|| Int
mlen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
len_r2Int -> Int -> Int
forall a. Num a => a -> a -> a
-20 -> [ BamRec
rm ]
| Bool
otherwise -> (BamRec -> BamRec) -> [BamRec] -> [BamRec]
forall a b. (a -> b) -> [a] -> [b]
map BamRec -> BamRec
flag_alternative [ BamRec
r1', BamRec
r2', BamRec
rm ]
{-# INLINE merged_seq #-}
merged_seq :: (V.Vector v Nucleotides, V.Vector v Qual)
=> Int -> v Nucleotides -> v Qual -> v Nucleotides -> v Qual -> v Nucleotides
merged_seq :: Int
-> v Nucleotides
-> v Qual
-> v Nucleotides
-> v Qual
-> v Nucleotides
merged_seq l :: Int
l b_seq_r1 :: v Nucleotides
b_seq_r1 b_qual_r1 :: v Qual
b_qual_r1 b_seq_r2 :: v Nucleotides
b_seq_r2 b_qual_r2 :: v Qual
b_qual_r2 = [v Nucleotides] -> v Nucleotides
forall (v :: * -> *) a. Vector v a => [v a] -> v a
V.concat
[ Int -> v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
V.take (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
len_r2) v Nucleotides
b_seq_r1
, (Nucleotides -> Qual -> Nucleotides -> Qual -> Nucleotides)
-> v Nucleotides
-> v Qual
-> v Nucleotides
-> v Qual
-> v Nucleotides
forall (v :: * -> *) a b c d e.
(Vector v a, Vector v b, Vector v c, Vector v d, Vector v e) =>
(a -> b -> c -> d -> e) -> v a -> v b -> v c -> v d -> v e
V.zipWith4 Nucleotides -> Qual -> Nucleotides -> Qual -> Nucleotides
zz (Int -> v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
V.take Int
l (v Nucleotides -> v Nucleotides) -> v Nucleotides -> v Nucleotides
forall a b. (a -> b) -> a -> b
$ Int -> v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
V.drop (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
len_r2) v Nucleotides
b_seq_r1)
(Int -> v Qual -> v Qual
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
V.take Int
l (v Qual -> v Qual) -> v Qual -> v Qual
forall a b. (a -> b) -> a -> b
$ Int -> v Qual -> v Qual
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
V.drop (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
len_r2) v Qual
b_qual_r1)
(v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => v a -> v a
V.reverse (v Nucleotides -> v Nucleotides) -> v Nucleotides -> v Nucleotides
forall a b. (a -> b) -> a -> b
$ Int -> v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
V.take Int
l (v Nucleotides -> v Nucleotides) -> v Nucleotides -> v Nucleotides
forall a b. (a -> b) -> a -> b
$ Int -> v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
V.drop (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
len_r1) v Nucleotides
b_seq_r2)
(v Qual -> v Qual
forall (v :: * -> *) a. Vector v a => v a -> v a
V.reverse (v Qual -> v Qual) -> v Qual -> v Qual
forall a b. (a -> b) -> a -> b
$ Int -> v Qual -> v Qual
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
V.take Int
l (v Qual -> v Qual) -> v Qual -> v Qual
forall a b. (a -> b) -> a -> b
$ Int -> v Qual -> v Qual
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
V.drop (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
len_r1) v Qual
b_qual_r2)
, v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => v a -> v a
V.reverse (v Nucleotides -> v Nucleotides) -> v Nucleotides -> v Nucleotides
forall a b. (a -> b) -> a -> b
$ Int -> v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
V.take (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
len_r1) v Nucleotides
b_seq_r2 ]
where
len_r1 :: Int
len_r1 = v Qual -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
V.length v Qual
b_qual_r1
len_r2 :: Int
len_r2 = v Qual -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
V.length v Qual
b_qual_r2
zz :: Nucleotides -> Qual -> Nucleotides -> Qual -> Nucleotides
zz !Nucleotides
n1 (Q !Word8
q1) !Nucleotides
n2 (Q !Word8
q2) | Nucleotides
n1 Nucleotides -> Nucleotides -> Bool
forall a. Eq a => a -> a -> Bool
== Nucleotides -> Nucleotides
compls Nucleotides
n2 = Nucleotides
n1
| Word8
q1 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
> Word8
q2 = Nucleotides
n1
| Bool
otherwise = Nucleotides -> Nucleotides
compls Nucleotides
n2
{-# INLINE merged_qual #-}
merged_qual :: (V.Vector v Nucleotides, V.Vector v Qual)
=> Word8 -> Int -> v Nucleotides -> v Qual -> v Nucleotides -> v Qual -> v Qual
merged_qual :: Word8
-> Int
-> v Nucleotides
-> v Qual
-> v Nucleotides
-> v Qual
-> v Qual
merged_qual qmax :: Word8
qmax l :: Int
l b_seq_r1 :: v Nucleotides
b_seq_r1 b_qual_r1 :: v Qual
b_qual_r1 b_seq_r2 :: v Nucleotides
b_seq_r2 b_qual_r2 :: v Qual
b_qual_r2 = [v Qual] -> v Qual
forall (v :: * -> *) a. Vector v a => [v a] -> v a
V.concat
[ Int -> v Qual -> v Qual
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
V.take (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
len_r2) v Qual
b_qual_r1
, (Nucleotides -> Qual -> Nucleotides -> Qual -> Qual)
-> v Nucleotides -> v Qual -> v Nucleotides -> v Qual -> v Qual
forall (v :: * -> *) a b c d e.
(Vector v a, Vector v b, Vector v c, Vector v d, Vector v e) =>
(a -> b -> c -> d -> e) -> v a -> v b -> v c -> v d -> v e
V.zipWith4 Nucleotides -> Qual -> Nucleotides -> Qual -> Qual
zz (Int -> v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
V.take Int
l (v Nucleotides -> v Nucleotides) -> v Nucleotides -> v Nucleotides
forall a b. (a -> b) -> a -> b
$ Int -> v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
V.drop (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
len_r2) v Nucleotides
b_seq_r1)
(Int -> v Qual -> v Qual
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
V.take Int
l (v Qual -> v Qual) -> v Qual -> v Qual
forall a b. (a -> b) -> a -> b
$ Int -> v Qual -> v Qual
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
V.drop (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
len_r2) v Qual
b_qual_r1)
(v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => v a -> v a
V.reverse (v Nucleotides -> v Nucleotides) -> v Nucleotides -> v Nucleotides
forall a b. (a -> b) -> a -> b
$ Int -> v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
V.take Int
l (v Nucleotides -> v Nucleotides) -> v Nucleotides -> v Nucleotides
forall a b. (a -> b) -> a -> b
$ Int -> v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
V.drop (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
len_r1) v Nucleotides
b_seq_r2)
(v Qual -> v Qual
forall (v :: * -> *) a. Vector v a => v a -> v a
V.reverse (v Qual -> v Qual) -> v Qual -> v Qual
forall a b. (a -> b) -> a -> b
$ Int -> v Qual -> v Qual
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
V.take Int
l (v Qual -> v Qual) -> v Qual -> v Qual
forall a b. (a -> b) -> a -> b
$ Int -> v Qual -> v Qual
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
V.drop (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
len_r1) v Qual
b_qual_r2)
, v Qual -> v Qual
forall (v :: * -> *) a. Vector v a => v a -> v a
V.reverse (v Qual -> v Qual) -> v Qual -> v Qual
forall a b. (a -> b) -> a -> b
$ Int -> v Qual -> v Qual
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
V.take (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
len_r1) v Qual
b_qual_r2 ]
where
len_r1 :: Int
len_r1 = v Qual -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
V.length v Qual
b_qual_r1
len_r2 :: Int
len_r2 = v Qual -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
V.length v Qual
b_qual_r2
zz :: Nucleotides -> Qual -> Nucleotides -> Qual -> Qual
zz !Nucleotides
n1 (Q !Word8
q1) !Nucleotides
n2 (Q !Word8
q2) | Nucleotides
n1 Nucleotides -> Nucleotides -> Bool
forall a. Eq a => a -> a -> Bool
== Nucleotides -> Nucleotides
compls Nucleotides
n2 = Word8 -> Qual
Q (Word8 -> Qual) -> Word8 -> Qual
forall a b. (a -> b) -> a -> b
$ Word8 -> Word8 -> Word8
forall a. Ord a => a -> a -> a
min Word8
qmax (Word8
q1 Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
+ Word8
q2)
| Word8
q1 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
> Word8
q2 = Word8 -> Qual
Q (Word8 -> Qual) -> Word8 -> Qual
forall a b. (a -> b) -> a -> b
$ Word8
q1 Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Word8
q2
| Bool
otherwise = Word8 -> Qual
Q (Word8 -> Qual) -> Word8 -> Qual
forall a b. (a -> b) -> a -> b
$ Word8
q2 Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Word8
q1
find_trim :: AD_Seqs
-> W.Vector Nucleotides -> W.Vector Qual
-> IO (Int, Int, Int)
find_trim :: AD_Seqs -> Vector Nucleotides -> Vector Qual -> IO (Int, Int, Int)
find_trim pads1 :: AD_Seqs
pads1 r1 :: Vector Nucleotides
r1 q1 :: Vector Qual
q1 =
[Vector Nucleotides]
-> (AD_Seqs -> IO (Int, Int, Int)) -> IO (Int, Int, Int)
forall (m :: * -> *) r.
MonadBaseControl IO m =>
[Vector Nucleotides] -> (AD_Seqs -> m r) -> m r
withADSeqs [Vector Nucleotides
forall a. Storable a => Vector a
W.empty] ((AD_Seqs -> IO (Int, Int, Int)) -> IO (Int, Int, Int))
-> (AD_Seqs -> IO (Int, Int, Int)) -> IO (Int, Int, Int)
forall a b. (a -> b) -> a -> b
$ \pads2 :: AD_Seqs
pads2 ->
Vector Nucleotides
-> Vector Qual
-> (FW_Seq -> IO (Int, Int, Int))
-> IO (Int, Int, Int)
forall r.
Vector Nucleotides -> Vector Qual -> (FW_Seq -> IO r) -> IO r
with_fw_seq Vector Nucleotides
r1 Vector Qual
q1 ((FW_Seq -> IO (Int, Int, Int)) -> IO (Int, Int, Int))
-> (FW_Seq -> IO (Int, Int, Int)) -> IO (Int, Int, Int)
forall a b. (a -> b) -> a -> b
$ \pr1 :: FW_Seq
pr1 ->
AD_Seqs
-> AD_Seqs -> FW_Seq -> FW_Seq -> RC_Seq -> IO (Int, Int, Int)
min_merge_score AD_Seqs
pads1 AD_Seqs
pads2 FW_Seq
pr1 (Ptr Nucleotides -> Ptr Qual -> Int -> FW_Seq
FW_Seq Ptr Nucleotides
forall a. Ptr a
nullPtr Ptr Qual
forall a. Ptr a
nullPtr 0) (Ptr Nucleotides -> Ptr Qual -> Int -> RC_Seq
RC_Seq Ptr Nucleotides
forall a. Ptr a
nullPtr Ptr Qual
forall a. Ptr a
nullPtr 0)
trimBam :: Int -> Int -> AD_Seqs -> BamRec -> IO [BamRec]
trimBam :: Int -> Int -> AD_Seqs -> BamRec -> IO [BamRec]
trimBam lowq :: Int
lowq highq :: Int
highq ads1 :: AD_Seqs
ads1 r1 :: BamRec
r1 = do
let b_seq_r1 :: Vector Nucleotides
b_seq_r1 = Vector_Nucs_half Nucleotides -> Vector Nucleotides
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
V.convert (Vector_Nucs_half Nucleotides -> Vector Nucleotides)
-> Vector_Nucs_half Nucleotides -> Vector Nucleotides
forall a b. (a -> b) -> a -> b
$ BamRec -> Vector_Nucs_half Nucleotides
b_seq BamRec
r1
(mlen :: Int
mlen, qual1 :: Int
qual1, qual2 :: Int
qual2) <- AD_Seqs -> Vector Nucleotides -> Vector Qual -> IO (Int, Int, Int)
find_trim AD_Seqs
ads1 Vector Nucleotides
b_seq_r1 (Vector Qual -> IO (Int, Int, Int))
-> Vector Qual -> IO (Int, Int, Int)
forall a b. (a -> b) -> a -> b
$
Vector Qual -> Maybe (Vector Qual) -> Vector Qual
forall a. a -> Maybe a -> a
fromMaybe ((Nucleotides -> Qual) -> Vector Nucleotides -> Vector Qual
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
V.map (Qual -> Nucleotides -> Qual
forall a b. a -> b -> a
const (Word8 -> Qual
Q 23)) Vector Nucleotides
b_seq_r1) (BamRec -> Maybe (Vector Qual)
b_qual BamRec
r1)
let flag_alternative :: BamRec -> BamRec
flag_alternative br :: BamRec
br = BamRec
br { b_exts :: Extensions
b_exts = BamKey -> Ext -> Extensions -> Extensions
updateE "FF" (Int -> Ext
Int (Int -> Ext) -> Int -> Ext
forall a b. (a -> b) -> a -> b
$ Int -> BamKey -> BamRec -> Int
extAsInt 0 "FF" BamRec
br Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
eflagAlternative) (Extensions -> Extensions) -> Extensions -> Extensions
forall a b. (a -> b) -> a -> b
$ BamRec -> Extensions
b_exts BamRec
br }
store_quals :: BamRec -> BamRec
store_quals br :: BamRec
br = BamRec
br { b_exts :: Extensions
b_exts = BamKey -> Ext -> Extensions -> Extensions
updateE "YM" (Int -> Ext
Int Int
qual1) (Extensions -> Extensions) -> Extensions -> Extensions
forall a b. (a -> b) -> a -> b
$ BamKey -> Ext -> Extensions -> Extensions
updateE "YN" (Int -> Ext
Int Int
qual2) (Extensions -> Extensions) -> Extensions -> Extensions
forall a b. (a -> b) -> a -> b
$ BamRec -> Extensions
b_exts BamRec
br }
r1' :: BamRec
r1' = BamRec -> BamRec
store_quals BamRec
r1
r1t :: BamRec
r1t = BamRec -> BamRec
store_quals (BamRec -> BamRec) -> BamRec -> BamRec
forall a b. (a -> b) -> a -> b
$ Int -> BamRec
trimmed_read Int
mlen
trimmed_read :: Int -> BamRec
trimmed_read l :: Int
l = BamRec
nullBamRec {
b_qname :: Bytes
b_qname = BamRec -> Bytes
b_qname BamRec
r1,
b_flag :: Int
b_flag = Int
flagUnmapped Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. BamRec -> Int
b_flag BamRec
r1,
b_seq :: Vector_Nucs_half Nucleotides
b_seq = Int -> Vector_Nucs_half Nucleotides -> Vector_Nucs_half Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
V.take Int
l (Vector_Nucs_half Nucleotides -> Vector_Nucs_half Nucleotides)
-> Vector_Nucs_half Nucleotides -> Vector_Nucs_half Nucleotides
forall a b. (a -> b) -> a -> b
$ BamRec -> Vector_Nucs_half Nucleotides
b_seq BamRec
r1,
b_qual :: Maybe (Vector Qual)
b_qual = Int -> Vector Qual -> Vector Qual
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
V.take Int
l (Vector Qual -> Vector Qual)
-> Maybe (Vector Qual) -> Maybe (Vector Qual)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> BamRec -> Maybe (Vector Qual)
b_qual BamRec
r1,
b_exts :: Extensions
b_exts = BamKey -> Ext -> Extensions -> Extensions
updateE "FF" (Int -> Ext
Int (Int -> Ext) -> Int -> Ext
forall a b. (a -> b) -> a -> b
$ Int -> BamKey -> BamRec -> Int
extAsInt 0 "FF" BamRec
r1 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
eflagTrimmed) (Extensions -> Extensions) -> Extensions -> Extensions
forall a b. (a -> b) -> a -> b
$ BamRec -> Extensions
b_exts BamRec
r1 }
[BamRec] -> IO [BamRec]
forall (m :: * -> *) a. Monad m => a -> m a
return ([BamRec] -> IO [BamRec]) -> [BamRec] -> IO [BamRec]
forall a b. (a -> b) -> a -> b
$ case () of
_ | Vector_Nucs_half Nucleotides -> Bool
forall (v :: * -> *) a. Vector v a => v a -> Bool
V.null (BamRec -> Vector_Nucs_half Nucleotides
b_seq BamRec
r1) -> [ ]
| Int
mlen Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== 0 Bool -> Bool -> Bool
&& Int
qual1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
highq -> [ ]
| Int
qual1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
lowq Bool -> Bool -> Bool
|| Int
mlen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< 0 -> [ BamRec
r1' ]
| Int
qual1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
highq -> [ BamRec
r1t ]
| Bool
otherwise -> (BamRec -> BamRec) -> [BamRec] -> [BamRec]
forall a b. (a -> b) -> [a] -> [b]
map BamRec -> BamRec
flag_alternative [ BamRec
r1', BamRec
r1t ]
default_fwd_adapters :: [W.Vector Nucleotides]
default_fwd_adapters :: [Vector Nucleotides]
default_fwd_adapters = ([Char] -> Vector Nucleotides) -> [[Char]] -> [Vector Nucleotides]
forall a b. (a -> b) -> [a] -> [b]
map ([Nucleotides] -> Vector Nucleotides
forall a. Storable a => [a] -> Vector a
W.fromList ([Nucleotides] -> Vector Nucleotides)
-> ([Char] -> [Nucleotides]) -> [Char] -> Vector Nucleotides
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Word8 -> Nucleotides) -> [Word8] -> [Nucleotides]
forall a b. (a -> b) -> [a] -> [b]
map Word8 -> Nucleotides
toNucleotides ([Word8] -> [Nucleotides])
-> ([Char] -> [Word8]) -> [Char] -> [Nucleotides]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Char -> Word8) -> [Char] -> [Word8]
forall a b. (a -> b) -> [a] -> [b]
map Char -> Word8
c2w)
[ "AGATCGGAAGAGCGGTTCAG"
, "AGATCGGAAGAGCACACGTC"
, "AGATCGGAAGAGCTCGTATG" ]
default_rev_adapters :: [W.Vector Nucleotides]
default_rev_adapters :: [Vector Nucleotides]
default_rev_adapters = ([Char] -> Vector Nucleotides) -> [[Char]] -> [Vector Nucleotides]
forall a b. (a -> b) -> [a] -> [b]
map ([Nucleotides] -> Vector Nucleotides
forall a. Storable a => [a] -> Vector a
W.fromList ([Nucleotides] -> Vector Nucleotides)
-> ([Char] -> [Nucleotides]) -> [Char] -> Vector Nucleotides
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Word8 -> Nucleotides) -> [Word8] -> [Nucleotides]
forall a b. (a -> b) -> [a] -> [b]
map Word8 -> Nucleotides
toNucleotides ([Word8] -> [Nucleotides])
-> ([Char] -> [Word8]) -> [Char] -> [Nucleotides]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Char -> Word8) -> [Char] -> [Word8]
forall a b. (a -> b) -> [a] -> [b]
map Char -> Word8
c2w)
[ "AGATCGGAAGAGCGTCGTGT"
, "GGAAGAGCGTCGTGTAGGGA" ]
min_merge_score
:: AD_Seqs
-> AD_Seqs
-> FW_Seq
-> FW_Seq
-> RC_Seq
-> IO (Int,Int,Int)
min_merge_score :: AD_Seqs
-> AD_Seqs -> FW_Seq -> FW_Seq -> RC_Seq -> IO (Int, Int, Int)
min_merge_score (AD_Seqs !Ptr (Ptr Nucleotides)
p_fwd_ads !Ptr CInt
p_fwd_lns !Int
n_fwd_ads) (AD_Seqs !Ptr (Ptr Nucleotides)
p_rev_ads !Ptr CInt
p_rev_lns !Int
n_rev_ads)
(FW_Seq !Ptr Nucleotides
p_rd1 !Ptr Qual
p_qs1 !Int
l1) (FW_Seq !Ptr Nucleotides
p_rd2 !Ptr Qual
p_qs2 !Int
l2) (RC_Seq !Ptr Nucleotides
p_rrd2 !Ptr Qual
p_rqs2 _) =
Int -> (Ptr CInt -> IO (Int, Int, Int)) -> IO (Int, Int, Int)
forall a b. Storable a => Int -> (Ptr a -> IO b) -> IO b
allocaArray 2 ((Ptr CInt -> IO (Int, Int, Int)) -> IO (Int, Int, Int))
-> (Ptr CInt -> IO (Int, Int, Int)) -> IO (Int, Int, Int)
forall a b. (a -> b) -> a -> b
$ \pmins :: Ptr CInt
pmins ->
(Int -> Int -> Int -> (Int, Int, Int))
-> IO Int -> IO Int -> IO Int -> IO (Int, Int, Int)
forall (m :: * -> *) a1 a2 a3 r.
Monad m =>
(a1 -> a2 -> a3 -> r) -> m a1 -> m a2 -> m a3 -> m r
liftM3 (,,)
(CInt -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (CInt -> Int) -> IO CInt -> IO Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
Ptr (Ptr Nucleotides)
-> Ptr CInt
-> CInt
-> Ptr (Ptr Nucleotides)
-> Ptr CInt
-> CInt
-> Ptr Nucleotides
-> Ptr Qual
-> CInt
-> Ptr Nucleotides
-> Ptr Qual
-> CInt
-> Ptr Nucleotides
-> Ptr Qual
-> Ptr CInt
-> IO CInt
prim_merge_score Ptr (Ptr Nucleotides)
p_fwd_ads Ptr CInt
p_fwd_lns (Int -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n_fwd_ads)
Ptr (Ptr Nucleotides)
p_rev_ads Ptr CInt
p_rev_lns (Int -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n_rev_ads)
Ptr Nucleotides
p_rd1 Ptr Qual
p_qs1 (Int -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
l1)
Ptr Nucleotides
p_rd2 Ptr Qual
p_qs2 (Int -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
l2)
Ptr Nucleotides
p_rrd2 Ptr Qual
p_rqs2 Ptr CInt
pmins)
(CInt -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (CInt -> Int) -> IO CInt -> IO Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Ptr CInt -> Int -> IO CInt
forall a. Storable a => Ptr a -> Int -> IO a
peekElemOff Ptr CInt
pmins 0)
(CInt -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (CInt -> Int) -> IO CInt -> IO Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Ptr CInt -> Int -> IO CInt
forall a. Storable a => Ptr a -> Int -> IO a
peekElemOff Ptr CInt
pmins 1)
foreign import ccall unsafe "prim_merge_score"
prim_merge_score :: Ptr (Ptr Nucleotides) -> Ptr CInt -> CInt
-> Ptr (Ptr Nucleotides) -> Ptr CInt -> CInt
-> Ptr Nucleotides -> Ptr Qual -> CInt
-> Ptr Nucleotides -> Ptr Qual -> CInt
-> Ptr Nucleotides -> Ptr Qual
-> Ptr CInt -> IO CInt
data AD_Seqs = AD_Seqs !(Ptr (Ptr Nucleotides)) !(Ptr CInt) !Int
data FW_Seq = FW_Seq !(Ptr Nucleotides) !(Ptr Qual) !Int
data RC_Seq = RC_Seq !(Ptr Nucleotides) !(Ptr Qual) !Int
withADSeqs :: MonadBaseControl IO m => [W.Vector Nucleotides] -> (AD_Seqs -> m r) -> m r
withADSeqs :: [Vector Nucleotides] -> (AD_Seqs -> m r) -> m r
withADSeqs ads0 :: [Vector Nucleotides]
ads0 k :: AD_Seqs -> m r
k =
(RunInBase m IO -> IO (StM m r)) -> m r
forall (b :: * -> *) (m :: * -> *) a.
MonadBaseControl b m =>
(RunInBase m b -> b (StM m a)) -> m a
control ((RunInBase m IO -> IO (StM m r)) -> m r)
-> (RunInBase m IO -> IO (StM m r)) -> m r
forall a b. (a -> b) -> a -> b
$ \run_io :: RunInBase m IO
run_io ->
Int -> (Ptr (Ptr Nucleotides) -> IO (StM m r)) -> IO (StM m r)
forall a b. Storable a => Int -> (Ptr a -> IO b) -> IO b
allocaArray ([Vector Nucleotides] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Vector Nucleotides]
ads0) ((Ptr (Ptr Nucleotides) -> IO (StM m r)) -> IO (StM m r))
-> (Ptr (Ptr Nucleotides) -> IO (StM m r)) -> IO (StM m r)
forall a b. (a -> b) -> a -> b
$ \pps :: Ptr (Ptr Nucleotides)
pps ->
Int -> (Ptr CInt -> IO (StM m r)) -> IO (StM m r)
forall a b. Storable a => Int -> (Ptr a -> IO b) -> IO b
allocaArray ([Vector Nucleotides] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Vector Nucleotides]
ads0) ((Ptr CInt -> IO (StM m r)) -> IO (StM m r))
-> (Ptr CInt -> IO (StM m r)) -> IO (StM m r)
forall a b. (a -> b) -> a -> b
$ \pls :: Ptr CInt
pls ->
let go :: Int -> [Vector Nucleotides] -> IO (StM m r)
go !Int
n [ ] = m r -> IO (StM m r)
RunInBase m IO
run_io (AD_Seqs -> m r
k (AD_Seqs -> m r) -> AD_Seqs -> m r
forall a b. (a -> b) -> a -> b
$! Ptr (Ptr Nucleotides) -> Ptr CInt -> Int -> AD_Seqs
AD_Seqs Ptr (Ptr Nucleotides)
pps Ptr CInt
pls Int
n)
go !Int
n (v :: Vector Nucleotides
v:vs :: [Vector Nucleotides]
vs) = Vector Nucleotides
-> (Ptr Nucleotides -> IO (StM m r)) -> IO (StM m r)
forall a b. Storable a => Vector a -> (Ptr a -> IO b) -> IO b
W.unsafeWith Vector Nucleotides
v ((Ptr Nucleotides -> IO (StM m r)) -> IO (StM m r))
-> (Ptr Nucleotides -> IO (StM m r)) -> IO (StM m r)
forall a b. (a -> b) -> a -> b
$ \pa :: Ptr Nucleotides
pa -> do
Ptr (Ptr Nucleotides) -> Int -> Ptr Nucleotides -> IO ()
forall a. Storable a => Ptr a -> Int -> a -> IO ()
pokeElemOff Ptr (Ptr Nucleotides)
pps Int
n Ptr Nucleotides
pa
Ptr CInt -> Int -> CInt -> IO ()
forall a. Storable a => Ptr a -> Int -> a -> IO ()
pokeElemOff Ptr CInt
pls Int
n (Int -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Vector Nucleotides -> Int
forall a. Storable a => Vector a -> Int
W.length Vector Nucleotides
v))
Int -> [Vector Nucleotides] -> IO (StM m r)
go (Int -> Int
forall a. Enum a => a -> a
succ Int
n) [Vector Nucleotides]
vs
in Int -> [Vector Nucleotides] -> IO (StM m r)
go 0 [Vector Nucleotides]
ads0
with_fw_seq :: W.Vector Nucleotides -> W.Vector Qual -> (FW_Seq -> IO r) -> IO r
with_fw_seq :: Vector Nucleotides -> Vector Qual -> (FW_Seq -> IO r) -> IO r
with_fw_seq ns :: Vector Nucleotides
ns qs :: Vector Qual
qs k :: FW_Seq -> IO r
k
| Vector Nucleotides -> Int
forall a. Storable a => Vector a -> Int
W.length Vector Nucleotides
ns Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Vector Qual -> Int
forall a. Storable a => Vector a -> Int
W.length Vector Qual
qs
= Vector Nucleotides -> (Ptr Nucleotides -> IO r) -> IO r
forall a b. Storable a => Vector a -> (Ptr a -> IO b) -> IO b
W.unsafeWith Vector Nucleotides
ns ((Ptr Nucleotides -> IO r) -> IO r)
-> (Ptr Nucleotides -> IO r) -> IO r
forall a b. (a -> b) -> a -> b
$ \p_ns :: Ptr Nucleotides
p_ns ->
Vector Qual -> (Ptr Qual -> IO r) -> IO r
forall a b. Storable a => Vector a -> (Ptr a -> IO b) -> IO b
W.unsafeWith Vector Qual
qs ((Ptr Qual -> IO r) -> IO r) -> (Ptr Qual -> IO r) -> IO r
forall a b. (a -> b) -> a -> b
$ \p_qs :: Ptr Qual
p_qs ->
FW_Seq -> IO r
k (Ptr Nucleotides -> Ptr Qual -> Int -> FW_Seq
FW_Seq Ptr Nucleotides
p_ns Ptr Qual
p_qs (Int -> FW_Seq) -> Int -> FW_Seq
forall a b. (a -> b) -> a -> b
$ Vector Nucleotides -> Int
forall a. Storable a => Vector a -> Int
W.length Vector Nucleotides
ns)
| Bool
otherwise
= LengthMismatch -> IO r
forall e a. Exception e => e -> IO a
throwIO (LengthMismatch -> IO r) -> LengthMismatch -> IO r
forall a b. (a -> b) -> a -> b
$ Bytes -> LengthMismatch
LengthMismatch "forward adapter"
{-# INLINE with_fw_seq #-}
with_rc_seq :: W.Vector Nucleotides -> W.Vector Qual -> (RC_Seq -> IO r) -> IO r
with_rc_seq :: Vector Nucleotides -> Vector Qual -> (RC_Seq -> IO r) -> IO r
with_rc_seq ns :: Vector Nucleotides
ns qs :: Vector Qual
qs k :: RC_Seq -> IO r
k
| Vector Nucleotides -> Int
forall a. Storable a => Vector a -> Int
W.length Vector Nucleotides
ns Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Vector Qual -> Int
forall a. Storable a => Vector a -> Int
W.length Vector Qual
qs
= Vector Nucleotides -> (Ptr Nucleotides -> IO r) -> IO r
forall a b. Storable a => Vector a -> (Ptr a -> IO b) -> IO b
W.unsafeWith (Vector Nucleotides -> Vector Nucleotides
forall a. Storable a => Vector a -> Vector a
W.reverse (Vector Nucleotides -> Vector Nucleotides)
-> Vector Nucleotides -> Vector Nucleotides
forall a b. (a -> b) -> a -> b
$ (Nucleotides -> Nucleotides)
-> Vector Nucleotides -> Vector Nucleotides
forall a b.
(Storable a, Storable b) =>
(a -> b) -> Vector a -> Vector b
W.map Nucleotides -> Nucleotides
compls Vector Nucleotides
ns) ((Ptr Nucleotides -> IO r) -> IO r)
-> (Ptr Nucleotides -> IO r) -> IO r
forall a b. (a -> b) -> a -> b
$ \p_rns :: Ptr Nucleotides
p_rns ->
Vector Qual -> (Ptr Qual -> IO r) -> IO r
forall a b. Storable a => Vector a -> (Ptr a -> IO b) -> IO b
W.unsafeWith (Vector Qual -> Vector Qual
forall a. Storable a => Vector a -> Vector a
W.reverse Vector Qual
qs) ((Ptr Qual -> IO r) -> IO r) -> (Ptr Qual -> IO r) -> IO r
forall a b. (a -> b) -> a -> b
$ \p_rqs :: Ptr Qual
p_rqs ->
RC_Seq -> IO r
k (Ptr Nucleotides -> Ptr Qual -> Int -> RC_Seq
RC_Seq Ptr Nucleotides
p_rns Ptr Qual
p_rqs (Int -> RC_Seq) -> Int -> RC_Seq
forall a b. (a -> b) -> a -> b
$ Vector Nucleotides -> Int
forall a. Storable a => Vector a -> Int
W.length Vector Nucleotides
ns)
| Bool
otherwise
= LengthMismatch -> IO r
forall e a. Exception e => e -> IO a
throwIO (LengthMismatch -> IO r) -> LengthMismatch -> IO r
forall a b. (a -> b) -> a -> b
$ Bytes -> LengthMismatch
LengthMismatch "reverse adapter"
{-# INLINE with_rc_seq #-}