module Bio.Bam.Rmdup(
rmdup, Collapse, cons_collapse, cheap_collapse,
cons_collapse_keep, cheap_collapse_keep,
check_sort, normalizeTo, wrapTo,
ECig(..), toECig, setMD, toCigar,
RmdupException(..), BrokenMD(..)
) where
import Bio.Bam.Header
import Bio.Bam.Rec
import Bio.Prelude hiding ( left, right )
import Bio.Streaming
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as T
import qualified Data.HashMap.Strict as M
import qualified Data.Vector as VV ( fromList )
import qualified Data.Vector.Algorithms.Intro as VV ( sortBy )
import qualified Data.Vector.Generic as V
import qualified Data.Vector.Storable as W
import qualified Data.Vector.Unboxed as U
import qualified Streaming.Prelude as Q
data Collapse m = Collapse {
Collapse m -> [BamRec] -> m (Decision, [BamRec])
collapse :: [BamRec] -> m (Decision,[BamRec]),
Collapse m -> [BamRec] -> [BamRec]
originals :: [BamRec] -> [BamRec] }
data Decision = Consensus { Decision -> BamRec
fromDecision :: BamRec }
| Representative { fromDecision :: BamRec }
cons_collapse :: MonadLog m => Qual -> Collapse m
cons_collapse :: Qual -> Collapse m
cons_collapse maxq :: Qual
maxq = ([BamRec] -> m (Decision, [BamRec]))
-> ([BamRec] -> [BamRec]) -> Collapse m
forall (m :: * -> *).
([BamRec] -> m (Decision, [BamRec]))
-> ([BamRec] -> [BamRec]) -> Collapse m
Collapse (Qual -> [BamRec] -> m (Decision, [BamRec])
forall (m :: * -> *).
MonadLog m =>
Qual -> [BamRec] -> m (Decision, [BamRec])
do_collapse Qual
maxq) ([BamRec] -> [BamRec] -> [BamRec]
forall a b. a -> b -> a
const [])
cons_collapse_keep :: MonadLog m => Qual -> Collapse m
cons_collapse_keep :: Qual -> Collapse m
cons_collapse_keep maxq :: Qual
maxq = ([BamRec] -> m (Decision, [BamRec]))
-> ([BamRec] -> [BamRec]) -> Collapse m
forall (m :: * -> *).
([BamRec] -> m (Decision, [BamRec]))
-> ([BamRec] -> [BamRec]) -> Collapse m
Collapse (Qual -> [BamRec] -> m (Decision, [BamRec])
forall (m :: * -> *).
MonadLog m =>
Qual -> [BamRec] -> m (Decision, [BamRec])
do_collapse Qual
maxq) ((BamRec -> BamRec) -> [BamRec] -> [BamRec]
forall a b. (a -> b) -> [a] -> [b]
map (\b :: BamRec
b -> BamRec
b { b_flag :: Int
b_flag = BamRec -> Int
b_flag BamRec
b Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flagDuplicate }))
cheap_collapse :: Monad m => Collapse m
cheap_collapse :: Collapse m
cheap_collapse = ([BamRec] -> m (Decision, [BamRec]))
-> ([BamRec] -> [BamRec]) -> Collapse m
forall (m :: * -> *).
([BamRec] -> m (Decision, [BamRec]))
-> ([BamRec] -> [BamRec]) -> Collapse m
Collapse [BamRec] -> m (Decision, [BamRec])
forall (m :: * -> *). Monad m => [BamRec] -> m (Decision, [BamRec])
do_cheap_collapse ([BamRec] -> [BamRec] -> [BamRec]
forall a b. a -> b -> a
const [])
cheap_collapse_keep :: Monad m => Collapse m
cheap_collapse_keep :: Collapse m
cheap_collapse_keep = ([BamRec] -> m (Decision, [BamRec]))
-> ([BamRec] -> [BamRec]) -> Collapse m
forall (m :: * -> *).
([BamRec] -> m (Decision, [BamRec]))
-> ([BamRec] -> [BamRec]) -> Collapse m
Collapse [BamRec] -> m (Decision, [BamRec])
forall (m :: * -> *). Monad m => [BamRec] -> m (Decision, [BamRec])
do_cheap_collapse ((BamRec -> BamRec) -> [BamRec] -> [BamRec]
forall a b. (a -> b) -> [a] -> [b]
map (\b :: BamRec
b -> BamRec
b { b_flag :: Int
b_flag = BamRec -> Int
b_flag BamRec
b Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flagDuplicate }))
rmdup :: (MonadLog m, MonadThrow m, Hashable l, Eq l)
=> Bool -> Collapse m -> Q.Stream (Of (l,BamRec)) m r -> Q.Stream (Of ((l,Int),BamRec)) m r
rmdup :: Bool
-> Collapse m
-> Stream (Of (l, BamRec)) m r
-> Stream (Of ((l, Int), BamRec)) m r
rmdup strand_preserved :: Bool
strand_preserved collapse_cfg :: Collapse m
collapse_cfg =
(((l, Int), BamRec) -> (Refseq, Int))
-> RmdupException
-> Stream (Of ((l, Int), BamRec)) m r
-> Stream (Of ((l, Int), BamRec)) m r
forall (m :: * -> *) b a r.
(MonadThrow m, Ord b) =>
(a -> b)
-> RmdupException -> Stream (Of a) m r -> Stream (Of a) m r
check_sort (BamRec -> (Refseq, Int)
b_cpos (BamRec -> (Refseq, Int))
-> (((l, Int), BamRec) -> BamRec)
-> ((l, Int), BamRec)
-> (Refseq, Int)
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ((l, Int), BamRec) -> BamRec
forall a b. (a, b) -> b
snd) RmdupException
InternalError (Stream (Of ((l, Int), BamRec)) m r
-> Stream (Of ((l, Int), BamRec)) m r)
-> (Stream (Of (l, BamRec)) m r
-> Stream (Of ((l, Int), BamRec)) m r)
-> Stream (Of (l, BamRec)) m r
-> Stream (Of ((l, Int), BamRec)) m r
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
.
([(l, BamRec)] -> m (Vector ((l, Int), BamRec)))
-> Stream (Of (l, BamRec)) m r
-> Stream (Of ((l, Int), BamRec)) m r
forall (m :: * -> *) (f :: * -> *) a a c.
(Monad m, Foldable f) =>
([(a, BamRec)] -> m (f a))
-> Stream (Of (a, BamRec)) m c -> Stream (Of a) m c
mapGroups ( ([((l, Int), BamRec)] -> Vector ((l, Int), BamRec))
-> m [((l, Int), BamRec)] -> m (Vector ((l, Int), BamRec))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((forall s. Mutable Vector s ((l, Int), BamRec) -> ST s ())
-> Vector ((l, Int), BamRec) -> Vector ((l, Int), BamRec)
forall (v :: * -> *) a.
Vector v a =>
(forall s. Mutable v s a -> ST s ()) -> v a -> v a
V.modify (Comparison ((l, Int), BamRec)
-> MVector (PrimState (ST s)) ((l, Int), BamRec) -> ST s ()
forall (m :: * -> *) (v :: * -> * -> *) e.
(PrimMonad m, MVector v e) =>
Comparison e -> v (PrimState m) e -> m ()
VV.sortBy ((((l, Int), BamRec) -> Int) -> Comparison ((l, Int), BamRec)
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing ((((l, Int), BamRec) -> Int) -> Comparison ((l, Int), BamRec))
-> (((l, Int), BamRec) -> Int) -> Comparison ((l, Int), BamRec)
forall a b. (a -> b) -> a -> b
$ Vector_Nucs_half Nucleotides -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
V.length (Vector_Nucs_half Nucleotides -> Int)
-> (((l, Int), BamRec) -> Vector_Nucs_half Nucleotides)
-> ((l, Int), BamRec)
-> Int
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. BamRec -> Vector_Nucs_half Nucleotides
b_seq (BamRec -> Vector_Nucs_half Nucleotides)
-> (((l, Int), BamRec) -> BamRec)
-> ((l, Int), BamRec)
-> Vector_Nucs_half Nucleotides
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ((l, Int), BamRec) -> BamRec
forall a b. (a, b) -> b
snd)) (Vector ((l, Int), BamRec) -> Vector ((l, Int), BamRec))
-> ([((l, Int), BamRec)] -> Vector ((l, Int), BamRec))
-> [((l, Int), BamRec)]
-> Vector ((l, Int), BamRec)
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. [((l, Int), BamRec)] -> Vector ((l, Int), BamRec)
forall a. [a] -> Vector a
VV.fromList)
(m [((l, Int), BamRec)] -> m (Vector ((l, Int), BamRec)))
-> ([(l, BamRec)] -> m [((l, Int), BamRec)])
-> [(l, BamRec)]
-> m (Vector ((l, Int), BamRec))
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (HashMap l [(Int, BamRec)] -> [((l, Int), BamRec)])
-> m (HashMap l [(Int, BamRec)]) -> m [((l, Int), BamRec)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((l
-> [(Int, BamRec)] -> [((l, Int), BamRec)] -> [((l, Int), BamRec)])
-> [((l, Int), BamRec)]
-> HashMap l [(Int, BamRec)]
-> [((l, Int), BamRec)]
forall k v a. (k -> v -> a -> a) -> a -> HashMap k v -> a
M.foldrWithKey (\l :: l
l mrs :: [(Int, BamRec)]
mrs -> [((l, Int), BamRec)]
-> [((l, Int), BamRec)] -> [((l, Int), BamRec)]
forall a. [a] -> [a] -> [a]
(++) [ ((l
l,Int
n),BamRec
r) | (n :: Int
n,r :: BamRec
r) <- [(Int, BamRec)]
mrs ]) [])
(m (HashMap l [(Int, BamRec)]) -> m [((l, Int), BamRec)])
-> ([(l, BamRec)] -> m (HashMap l [(Int, BamRec)]))
-> [(l, BamRec)]
-> m [((l, Int), BamRec)]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ([BamRec] -> m [(Int, BamRec)])
-> HashMap l [BamRec] -> m (HashMap l [(Int, BamRec)])
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (Bool -> Collapse m -> [BamRec] -> m [(Int, BamRec)]
forall (m :: * -> *).
Monad m =>
Bool -> Collapse m -> [BamRec] -> m [(Int, BamRec)]
do_rmdup Bool
strand_preserved Collapse m
collapse_cfg)
(HashMap l [BamRec] -> m (HashMap l [(Int, BamRec)]))
-> ([(l, BamRec)] -> HashMap l [BamRec])
-> [(l, BamRec)]
-> m (HashMap l [(Int, BamRec)])
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ((l, BamRec) -> l)
-> ((l, BamRec) -> BamRec) -> [(l, BamRec)] -> HashMap l [BamRec]
forall k a v.
(Hashable k, Eq k) =>
(a -> k) -> (a -> v) -> [a] -> HashMap k [v]
accumMap (l, BamRec) -> l
forall a b. (a, b) -> a
fst (l, BamRec) -> BamRec
forall a b. (a, b) -> b
snd ) (Stream (Of (l, BamRec)) m r -> Stream (Of ((l, Int), BamRec)) m r)
-> (Stream (Of (l, BamRec)) m r -> Stream (Of (l, BamRec)) m r)
-> Stream (Of (l, BamRec)) m r
-> Stream (Of ((l, Int), BamRec)) m r
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
.
((l, BamRec) -> (Refseq, Int))
-> RmdupException
-> Stream (Of (l, BamRec)) m r
-> Stream (Of (l, BamRec)) m r
forall (m :: * -> *) b a r.
(MonadThrow m, Ord b) =>
(a -> b)
-> RmdupException -> Stream (Of a) m r -> Stream (Of a) m r
check_sort (BamRec -> (Refseq, Int)
b_cpos (BamRec -> (Refseq, Int))
-> ((l, BamRec) -> BamRec) -> (l, BamRec) -> (Refseq, Int)
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (l, BamRec) -> BamRec
forall a b. (a, b) -> b
snd) RmdupException
UnsortedInput
where
b_cpos :: BamRec -> (Refseq, Int)
b_cpos = BamRec -> Refseq
b_rname (BamRec -> Refseq) -> (BamRec -> Int) -> BamRec -> (Refseq, Int)
forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&& BamRec -> Int
b_pos
mapGroups :: ([(a, BamRec)] -> m (f a))
-> Stream (Of (a, BamRec)) m c -> Stream (Of a) m c
mapGroups f :: [(a, BamRec)] -> m (f a)
f = m (Either c (Of (a, BamRec) (Stream (Of (a, BamRec)) m c)))
-> Stream
(Of a) m (Either c (Of (a, BamRec) (Stream (Of (a, BamRec)) m c)))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (Either c (Of (a, BamRec) (Stream (Of (a, BamRec)) m c)))
-> Stream
(Of a) m (Either c (Of (a, BamRec) (Stream (Of (a, BamRec)) m c))))
-> (Stream (Of (a, BamRec)) m c
-> m (Either c (Of (a, BamRec) (Stream (Of (a, BamRec)) m c))))
-> Stream (Of (a, BamRec)) m c
-> Stream
(Of a) m (Either c (Of (a, BamRec) (Stream (Of (a, BamRec)) m c)))
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Stream (Of (a, BamRec)) m c
-> m (Either c (Of (a, BamRec) (Stream (Of (a, BamRec)) m c)))
forall (m :: * -> *) (f :: * -> *) r.
Monad m =>
Stream f m r -> m (Either r (f (Stream f m r)))
inspect (Stream (Of (a, BamRec)) m c
-> Stream
(Of a) m (Either c (Of (a, BamRec) (Stream (Of (a, BamRec)) m c))))
-> (Either c (Of (a, BamRec) (Stream (Of (a, BamRec)) m c))
-> Stream (Of a) m c)
-> Stream (Of (a, BamRec)) m c
-> Stream (Of a) m c
forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> (c -> Stream (Of a) m c)
-> (Of (a, BamRec) (Stream (Of (a, BamRec)) m c)
-> Stream (Of a) m c)
-> Either c (Of (a, BamRec) (Stream (Of (a, BamRec)) m c))
-> Stream (Of a) m c
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either c -> Stream (Of a) m c
forall (f :: * -> *) a. Applicative f => a -> f a
pure (\(a :: (a, BamRec)
a :> s :: Stream (Of (a, BamRec)) m c
s) -> ([(a, BamRec)] -> m (f a))
-> [(a, BamRec)]
-> (a, BamRec)
-> Stream (Of (a, BamRec)) m c
-> Stream (Of a) m c
forall (m :: * -> *) (f :: * -> *) a a b.
(Monad m, Foldable f) =>
([(a, BamRec)] -> m (f a))
-> [(a, BamRec)]
-> (a, BamRec)
-> Stream (Of (a, BamRec)) m b
-> Stream (Of a) m b
mg1 [(a, BamRec)] -> m (f a)
f [] (a, BamRec)
a Stream (Of (a, BamRec)) m c
s)
mg1 :: ([(a, BamRec)] -> m (f a))
-> [(a, BamRec)]
-> (a, BamRec)
-> Stream (Of (a, BamRec)) m b
-> Stream (Of a) m b
mg1 f :: [(a, BamRec)] -> m (f a)
f acc :: [(a, BamRec)]
acc a :: (a, BamRec)
a s :: Stream (Of (a, BamRec)) m b
s =
m (Either b (Of (a, BamRec) (Stream (Of (a, BamRec)) m b)))
-> Stream
(Of a) m (Either b (Of (a, BamRec) (Stream (Of (a, BamRec)) m b)))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (Stream (Of (a, BamRec)) m b
-> m (Either b (Of (a, BamRec) (Stream (Of (a, BamRec)) m b)))
forall (m :: * -> *) (f :: * -> *) r.
Monad m =>
Stream f m r -> m (Either r (f (Stream f m r)))
inspect Stream (Of (a, BamRec)) m b
s) Stream
(Of a) m (Either b (Of (a, BamRec) (Stream (Of (a, BamRec)) m b)))
-> (Either b (Of (a, BamRec) (Stream (Of (a, BamRec)) m b))
-> Stream (Of a) m b)
-> Stream (Of a) m b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
Left r :: b
r -> m (f a) -> Stream (Of a) m (f a)
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift ([(a, BamRec)] -> m (f a)
f ((a, BamRec)
a(a, BamRec) -> [(a, BamRec)] -> [(a, BamRec)]
forall a. a -> [a] -> [a]
:[(a, BamRec)]
acc)) Stream (Of a) m (f a)
-> (f a -> Stream (Of a) m ()) -> Stream (Of a) m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= f a -> Stream (Of a) m ()
forall (m :: * -> *) (f :: * -> *) a.
(Monad m, Foldable f) =>
f a -> Stream (Of a) m ()
Q.each Stream (Of a) m () -> Stream (Of a) m b -> Stream (Of a) m b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> b -> Stream (Of a) m b
forall (f :: * -> *) a. Applicative f => a -> f a
pure b
r
Right (b :: (a, BamRec)
b :> s' :: Stream (Of (a, BamRec)) m b
s')
| BamRec -> (Refseq, Int)
b_cpos ((a, BamRec) -> BamRec
forall a b. (a, b) -> b
snd (a, BamRec)
a) (Refseq, Int) -> (Refseq, Int) -> Bool
forall a. Eq a => a -> a -> Bool
== BamRec -> (Refseq, Int)
b_cpos ((a, BamRec) -> BamRec
forall a b. (a, b) -> b
snd (a, BamRec)
b) -> ([(a, BamRec)] -> m (f a))
-> [(a, BamRec)]
-> (a, BamRec)
-> Stream (Of (a, BamRec)) m b
-> Stream (Of a) m b
mg1 [(a, BamRec)] -> m (f a)
f ((a, BamRec)
b(a, BamRec) -> [(a, BamRec)] -> [(a, BamRec)]
forall a. a -> [a] -> [a]
:[(a, BamRec)]
acc) (a, BamRec)
a Stream (Of (a, BamRec)) m b
s'
| Bool
otherwise -> m (f a) -> Stream (Of a) m (f a)
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift ([(a, BamRec)] -> m (f a)
f ((a, BamRec)
a(a, BamRec) -> [(a, BamRec)] -> [(a, BamRec)]
forall a. a -> [a] -> [a]
:[(a, BamRec)]
acc)) Stream (Of a) m (f a)
-> (f a -> Stream (Of a) m ()) -> Stream (Of a) m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= f a -> Stream (Of a) m ()
forall (m :: * -> *) (f :: * -> *) a.
(Monad m, Foldable f) =>
f a -> Stream (Of a) m ()
Q.each Stream (Of a) m () -> Stream (Of a) m b -> Stream (Of a) m b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ([(a, BamRec)] -> m (f a))
-> [(a, BamRec)]
-> (a, BamRec)
-> Stream (Of (a, BamRec)) m b
-> Stream (Of a) m b
mg1 [(a, BamRec)] -> m (f a)
f [] (a, BamRec)
b Stream (Of (a, BamRec)) m b
s'
data RmdupException = UnsortedInput | InternalError deriving (Typeable, Int -> RmdupException -> ShowS
[RmdupException] -> ShowS
RmdupException -> String
(Int -> RmdupException -> ShowS)
-> (RmdupException -> String)
-> ([RmdupException] -> ShowS)
-> Show RmdupException
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [RmdupException] -> ShowS
$cshowList :: [RmdupException] -> ShowS
show :: RmdupException -> String
$cshow :: RmdupException -> String
showsPrec :: Int -> RmdupException -> ShowS
$cshowsPrec :: Int -> RmdupException -> ShowS
Show)
instance Exception RmdupException where
displayException :: RmdupException -> String
displayException UnsortedInput = "input must be sorted for rmdup to work"
displayException InternalError = "internal error, output isn't sorted anymore"
check_sort :: (MonadThrow m, Ord b) => (a -> b) -> RmdupException -> Stream (Of a) m r -> Stream (Of a) m r
check_sort :: (a -> b)
-> RmdupException -> Stream (Of a) m r -> Stream (Of a) m r
check_sort f :: a -> b
f msg :: RmdupException
msg = m (Either r (Of a (Stream (Of a) m r)))
-> Stream (Of a) m (Either r (Of a (Stream (Of a) m r)))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (Either r (Of a (Stream (Of a) m r)))
-> Stream (Of a) m (Either r (Of a (Stream (Of a) m r))))
-> (Stream (Of a) m r -> m (Either r (Of a (Stream (Of a) m r))))
-> Stream (Of a) m r
-> Stream (Of a) m (Either r (Of a (Stream (Of a) m r)))
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Stream (Of a) m r -> m (Either r (Of a (Stream (Of a) m r)))
forall (m :: * -> *) (f :: * -> *) r.
Monad m =>
Stream f m r -> m (Either r (f (Stream f m r)))
inspect (Stream (Of a) m r
-> Stream (Of a) m (Either r (Of a (Stream (Of a) m r))))
-> (Either r (Of a (Stream (Of a) m r)) -> Stream (Of a) m r)
-> Stream (Of a) m r
-> Stream (Of a) m r
forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> (r -> Stream (Of a) m r)
-> (Of a (Stream (Of a) m r) -> Stream (Of a) m r)
-> Either r (Of a (Stream (Of a) m r))
-> Stream (Of a) m r
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either r -> Stream (Of a) m r
forall (f :: * -> *) a. Applicative f => a -> f a
pure Of a (Stream (Of a) m r) -> Stream (Of a) m r
forall (m :: * -> *) a.
MonadThrow m =>
Of a (Stream (Of a) m a) -> Stream (Of a) m a
step
where
step :: Of a (Stream (Of a) m a) -> Stream (Of a) m a
step (a :: a
a :> s :: Stream (Of a) m a
s) = m (Either a (Of a (Stream (Of a) m a)))
-> Stream (Of a) m (Either a (Of a (Stream (Of a) m a)))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (Stream (Of a) m a -> m (Either a (Of a (Stream (Of a) m a)))
forall (m :: * -> *) (f :: * -> *) r.
Monad m =>
Stream f m r -> m (Either r (f (Stream f m r)))
inspect Stream (Of a) m a
s) Stream (Of a) m (Either a (Of a (Stream (Of a) m a)))
-> (Either a (Of a (Stream (Of a) m a)) -> Stream (Of a) m a)
-> Stream (Of a) m a
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= (a -> Stream (Of a) m a)
-> (Of a (Stream (Of a) m a) -> Stream (Of a) m a)
-> Either a (Of a (Stream (Of a) m a))
-> Stream (Of a) m a
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (a -> Stream (Of a) m a -> Stream (Of a) m a
forall (m :: * -> *) a r.
Monad m =>
a -> Stream (Of a) m r -> Stream (Of a) m r
Q.cons a
a (Stream (Of a) m a -> Stream (Of a) m a)
-> (a -> Stream (Of a) m a) -> a -> Stream (Of a) m a
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. a -> Stream (Of a) m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure) (a -> Of a (Stream (Of a) m a) -> Stream (Of a) m a
step' a
a)
step' :: a -> Of a (Stream (Of a) m a) -> Stream (Of a) m a
step' a :: a
a (b :: a
b :> s :: Stream (Of a) m a
s)
| a -> b
f a
a b -> b -> Bool
forall a. Ord a => a -> a -> Bool
> a -> b
f a
b = m a -> Stream (Of a) m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m a -> Stream (Of a) m a) -> m a -> Stream (Of a) m a
forall a b. (a -> b) -> a -> b
$ RmdupException -> m a
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM RmdupException
msg
| Bool
otherwise = a -> Stream (Of a) m a -> Stream (Of a) m a
forall (m :: * -> *) a r.
Monad m =>
a -> Stream (Of a) m r -> Stream (Of a) m r
Q.cons a
a (Stream (Of a) m a -> Stream (Of a) m a)
-> Stream (Of a) m a -> Stream (Of a) m a
forall a b. (a -> b) -> a -> b
$ Of a (Stream (Of a) m a) -> Stream (Of a) m a
step (a
b a -> Stream (Of a) m a -> Of a (Stream (Of a) m a)
forall a b. a -> b -> Of a b
:> Stream (Of a) m a
s)
{-# INLINE check_sort #-}
do_rmdup :: Monad m => Bool -> Collapse m -> [BamRec] -> m [(Int,BamRec)]
do_rmdup :: Bool -> Collapse m -> [BamRec] -> m [(Int, BamRec)]
do_rmdup strand_preserved :: Bool
strand_preserved Collapse{..} rds :: [BamRec]
rds = do
let (raw_pairs :: [BamRec]
raw_pairs, raw_singles :: [BamRec]
raw_singles) = (BamRec -> Bool) -> [BamRec] -> ([BamRec], [BamRec])
forall a. (a -> Bool) -> [a] -> ([a], [a])
partition BamRec -> Bool
isPaired [BamRec]
rds
(merged :: [BamRec]
merged, true_singles :: [BamRec]
true_singles) = (BamRec -> Bool) -> [BamRec] -> ([BamRec], [BamRec])
forall a. (a -> Bool) -> [a] -> ([a], [a])
partition ((Bool -> Bool -> Bool)
-> (BamRec -> Bool) -> (BamRec -> Bool) -> BamRec -> Bool
forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 Bool -> Bool -> Bool
(||) BamRec -> Bool
isMerged BamRec -> Bool
isTrimmed) [BamRec]
raw_singles
(pairs :: [BamRec]
pairs, raw_half_pairs :: [BamRec]
raw_half_pairs) = (BamRec -> Bool) -> [BamRec] -> ([BamRec], [BamRec])
forall a. (a -> Bool) -> [a] -> ([a], [a])
partition BamRec -> Bool
b_totally_aligned [BamRec]
raw_pairs
(half_unaligned :: [BamRec]
half_unaligned, half_aligned :: [BamRec]
half_aligned) = (BamRec -> Bool) -> [BamRec] -> ([BamRec], [BamRec])
forall a. (a -> Bool) -> [a] -> ([a], [a])
partition BamRec -> Bool
isUnmapped [BamRec]
raw_half_pairs
unaligned' :: HashMap Bool [BamRec]
unaligned' = (BamRec -> Bool)
-> (BamRec -> BamRec) -> [BamRec] -> HashMap Bool [BamRec]
forall k a v.
(Hashable k, Eq k) =>
(a -> k) -> (a -> v) -> [a] -> HashMap k [v]
accumMap BamRec -> Bool
b_strand BamRec -> BamRec
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id [BamRec]
half_unaligned
(pairs' :: HashMap ((Refseq, (Int, (Bool, Bool))), Bool, Bool) (Int, Decision)
pairs',r1 :: [BamRec]
r1) <- (BamRec -> ((Refseq, (Int, (Bool, Bool))), Bool, Bool))
-> [BamRec]
-> m (HashMap
((Refseq, (Int, (Bool, Bool))), Bool, Bool) (Int, Decision),
[BamRec])
forall k.
(Hashable k, Eq k) =>
(BamRec -> k)
-> [BamRec] -> m (HashMap k (Int, Decision), [BamRec])
mkMap (\b :: BamRec
b -> (BamRec -> (Refseq, (Int, (Bool, Bool)))
b_mate_pos BamRec
b, BamRec -> Bool
b_strand BamRec
b, BamRec -> Bool
b_mate BamRec
b)) [BamRec]
pairs
(merged' :: HashMap (Int, Bool) (Int, Decision)
merged',r2 :: [BamRec]
r2) <- (BamRec -> (Int, Bool))
-> [BamRec] -> m (HashMap (Int, Bool) (Int, Decision), [BamRec])
forall k.
(Hashable k, Eq k) =>
(BamRec -> k)
-> [BamRec] -> m (HashMap k (Int, Decision), [BamRec])
mkMap (\b :: BamRec
b -> (Vector Cigar -> Int
forall (v :: * -> *). Vector v Cigar => v Cigar -> Int
alignedLength (BamRec -> Vector Cigar
b_cigar BamRec
b), BamRec -> Bool
b_strand BamRec
b)) [BamRec]
merged
(singles' :: HashMap Bool (Int, Decision)
singles',r3 :: [BamRec]
r3) <- (BamRec -> Bool)
-> [BamRec] -> m (HashMap Bool (Int, Decision), [BamRec])
forall k.
(Hashable k, Eq k) =>
(BamRec -> k)
-> [BamRec] -> m (HashMap k (Int, Decision), [BamRec])
mkMap (\b :: BamRec
b -> BamRec -> Bool
b_strand BamRec
b) ([BamRec]
true_singles[BamRec] -> [BamRec] -> [BamRec]
forall a. [a] -> [a] -> [a]
++[BamRec]
half_aligned)
let (results :: [(Int, BamRec)]
results, leftovers :: [BamRec]
leftovers) = HashMap Bool (Int, Decision)
-> HashMap Bool [BamRec]
-> [(Bool, (Int, BamRec))]
-> ([(Int, BamRec)], [BamRec])
merge_singles HashMap Bool (Int, Decision)
singles' HashMap Bool [BamRec]
unaligned' ([(Bool, (Int, BamRec))] -> ([(Int, BamRec)], [BamRec]))
-> [(Bool, (Int, BamRec))] -> ([(Int, BamRec)], [BamRec])
forall a b. (a -> b) -> a -> b
$
[ (Bool
str, (Decision -> BamRec) -> (Int, Decision) -> (Int, BamRec)
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second Decision -> BamRec
fromDecision (Int, Decision)
b) | ((_,str :: Bool
str ),b :: (Int, Decision)
b) <- HashMap (Int, Bool) (Int, Decision)
-> [((Int, Bool), (Int, Decision))]
forall k v. HashMap k v -> [(k, v)]
M.toList HashMap (Int, Bool) (Int, Decision)
merged' ] [(Bool, (Int, BamRec))]
-> [(Bool, (Int, BamRec))] -> [(Bool, (Int, BamRec))]
forall a. [a] -> [a] -> [a]
++
[ (Bool
str, (Decision -> BamRec) -> (Int, Decision) -> (Int, BamRec)
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second Decision -> BamRec
fromDecision (Int, Decision)
b) | ((_,str :: Bool
str,_),b :: (Int, Decision)
b) <- HashMap ((Refseq, (Int, (Bool, Bool))), Bool, Bool) (Int, Decision)
-> [(((Refseq, (Int, (Bool, Bool))), Bool, Bool), (Int, Decision))]
forall k v. HashMap k v -> [(k, v)]
M.toList HashMap ((Refseq, (Int, (Bool, Bool))), Bool, Bool) (Int, Decision)
pairs' ]
[(Int, BamRec)] -> m [(Int, BamRec)]
forall (f :: * -> *) a. Applicative f => a -> f a
pure ([(Int, BamRec)] -> m [(Int, BamRec)])
-> [(Int, BamRec)] -> m [(Int, BamRec)]
forall a b. (a -> b) -> a -> b
$ [(Int, BamRec)]
results [(Int, BamRec)] -> [(Int, BamRec)] -> [(Int, BamRec)]
forall a. [a] -> [a] -> [a]
++ (BamRec -> (Int, BamRec)) -> [BamRec] -> [(Int, BamRec)]
forall a b. (a -> b) -> [a] -> [b]
map ((,) 0) ([BamRec] -> [BamRec]
originals ([BamRec]
leftovers [BamRec] -> [BamRec] -> [BamRec]
forall a. [a] -> [a] -> [a]
++ [BamRec]
r1 [BamRec] -> [BamRec] -> [BamRec]
forall a. [a] -> [a] -> [a]
++ [BamRec]
r2 [BamRec] -> [BamRec] -> [BamRec]
forall a. [a] -> [a] -> [a]
++ [BamRec]
r3))
where
mkMap :: (BamRec -> k)
-> [BamRec] -> m (HashMap k (Int, Decision), [BamRec])
mkMap f :: BamRec -> k
f x :: [BamRec]
x = do HashMap k (Int, (Decision, [BamRec]))
m1 <- ([BamRec] -> m (Int, (Decision, [BamRec])))
-> HashMap k [BamRec] -> m (HashMap k (Int, (Decision, [BamRec])))
forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
traverse (\xs :: [BamRec]
xs -> (,) ([BamRec] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [BamRec]
xs) ((Decision, [BamRec]) -> (Int, (Decision, [BamRec])))
-> m (Decision, [BamRec]) -> m (Int, (Decision, [BamRec]))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [BamRec] -> m (Decision, [BamRec])
collapse [BamRec]
xs) (HashMap k [BamRec] -> m (HashMap k (Int, (Decision, [BamRec]))))
-> HashMap k [BamRec] -> m (HashMap k (Int, (Decision, [BamRec])))
forall a b. (a -> b) -> a -> b
$ (BamRec -> k)
-> (BamRec -> BamRec) -> [BamRec] -> HashMap k [BamRec]
forall k a v.
(Hashable k, Eq k) =>
(a -> k) -> (a -> v) -> [a] -> HashMap k [v]
accumMap BamRec -> k
f BamRec -> BamRec
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id [BamRec]
x
(HashMap k (Int, Decision), [BamRec])
-> m (HashMap k (Int, Decision), [BamRec])
forall (f :: * -> *) a. Applicative f => a -> f a
pure (((Int, (Decision, [BamRec])) -> (Int, Decision))
-> HashMap k (Int, (Decision, [BamRec]))
-> HashMap k (Int, Decision)
forall v1 v2 k. (v1 -> v2) -> HashMap k v1 -> HashMap k v2
M.map (((Decision, [BamRec]) -> Decision)
-> (Int, (Decision, [BamRec])) -> (Int, Decision)
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second (Decision, [BamRec]) -> Decision
forall a b. (a, b) -> a
fst) HashMap k (Int, (Decision, [BamRec]))
m1, ((Int, (Decision, [BamRec])) -> [BamRec])
-> [(Int, (Decision, [BamRec]))] -> [BamRec]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap ((Decision, [BamRec]) -> [BamRec]
forall a b. (a, b) -> b
snd((Decision, [BamRec]) -> [BamRec])
-> ((Int, (Decision, [BamRec])) -> (Decision, [BamRec]))
-> (Int, (Decision, [BamRec]))
-> [BamRec]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
.(Int, (Decision, [BamRec])) -> (Decision, [BamRec])
forall a b. (a, b) -> b
snd) ([(Int, (Decision, [BamRec]))] -> [BamRec])
-> [(Int, (Decision, [BamRec]))] -> [BamRec]
forall a b. (a -> b) -> a -> b
$ HashMap k (Int, (Decision, [BamRec]))
-> [(Int, (Decision, [BamRec]))]
forall k v. HashMap k v -> [v]
M.elems HashMap k (Int, (Decision, [BamRec]))
m1)
b_strand :: BamRec -> Bool
b_strand b :: BamRec
b = Bool
strand_preserved Bool -> Bool -> Bool
&& BamRec -> Bool
isReversed BamRec
b
b_mate :: BamRec -> Bool
b_mate b :: BamRec
b = Bool
strand_preserved Bool -> Bool -> Bool
&& BamRec -> Bool
isFirstMate BamRec
b
b_mate_pos :: BamRec -> (Refseq, (Int, (Bool, Bool)))
b_mate_pos = BamRec -> Refseq
b_mrnm (BamRec -> Refseq)
-> (BamRec -> (Int, (Bool, Bool)))
-> BamRec
-> (Refseq, (Int, (Bool, Bool)))
forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&& BamRec -> Int
b_mpos (BamRec -> Int)
-> (BamRec -> (Bool, Bool)) -> BamRec -> (Int, (Bool, Bool))
forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&& BamRec -> Bool
isUnmapped (BamRec -> Bool) -> (BamRec -> Bool) -> BamRec -> (Bool, Bool)
forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&& BamRec -> Bool
isMateUnmapped
b_totally_aligned :: BamRec -> Bool
b_totally_aligned = Bool -> Bool -> Bool
(&&) (Bool -> Bool -> Bool)
-> (BamRec -> Bool) -> BamRec -> Bool -> Bool
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Bool -> Bool
not (Bool -> Bool) -> (BamRec -> Bool) -> BamRec -> Bool
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. BamRec -> Bool
isUnmapped (BamRec -> Bool -> Bool) -> (BamRec -> Bool) -> BamRec -> Bool
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Bool -> Bool
not (Bool -> Bool) -> (BamRec -> Bool) -> BamRec -> Bool
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. BamRec -> Bool
isMateUnmapped
merge_singles :: M.HashMap Bool (Int,Decision)
-> M.HashMap Bool [BamRec]
-> [ (Bool, (Int, BamRec)) ]
-> ([(Int,BamRec)],[BamRec])
merge_singles :: HashMap Bool (Int, Decision)
-> HashMap Bool [BamRec]
-> [(Bool, (Int, BamRec))]
-> ([(Int, BamRec)], [BamRec])
merge_singles singles :: HashMap Bool (Int, Decision)
singles unaligneds :: HashMap Bool [BamRec]
unaligneds = [(Bool, (Int, BamRec))] -> ([(Int, BamRec)], [BamRec])
go
where
go :: [(Bool, (Int, BamRec))] -> ([(Int, BamRec)], [BamRec])
go ( (str :: Bool
str, (m :: Int
m,v :: BamRec
v)) : paireds :: [(Bool, (Int, BamRec))]
paireds) =
let (r :: [(Int, BamRec)]
r,l :: [BamRec]
l) = HashMap Bool (Int, Decision)
-> HashMap Bool [BamRec]
-> [(Bool, (Int, BamRec))]
-> ([(Int, BamRec)], [BamRec])
merge_singles (Bool
-> HashMap Bool (Int, Decision) -> HashMap Bool (Int, Decision)
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> HashMap k v
M.delete Bool
str HashMap Bool (Int, Decision)
singles) (Bool -> HashMap Bool [BamRec] -> HashMap Bool [BamRec]
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> HashMap k v
M.delete Bool
str HashMap Bool [BamRec]
unaligneds) [(Bool, (Int, BamRec))]
paireds
unal :: [BamRec]
unal = [BamRec] -> Bool -> HashMap Bool [BamRec] -> [BamRec]
forall k v. (Eq k, Hashable k) => v -> k -> HashMap k v -> v
M.lookupDefault [] Bool
str HashMap Bool [BamRec]
unaligneds [BamRec] -> [BamRec] -> [BamRec]
forall a. [a] -> [a] -> [a]
++ [BamRec]
l
in case Bool -> HashMap Bool (Int, Decision) -> Maybe (Int, Decision)
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup Bool
str HashMap Bool (Int, Decision)
singles of
Nothing -> ( (Int
m,BamRec
v) (Int, BamRec) -> [(Int, BamRec)] -> [(Int, BamRec)]
forall a. a -> [a] -> [a]
: [(Int, BamRec)]
r, [BamRec]
unal )
Just (n :: Int
n, Consensus w :: BamRec
w) -> ( (Int
n, BamRec -> BamRec -> BamRec
add_xp_of BamRec
w BamRec
v) (Int, BamRec) -> [(Int, BamRec)] -> [(Int, BamRec)]
forall a. a -> [a] -> [a]
: [(Int, BamRec)]
r, [BamRec]
unal )
Just (n :: Int
n, Representative w :: BamRec
w) -> ( (Int
n, BamRec -> BamRec -> BamRec
add_xp_of BamRec
w BamRec
v) (Int, BamRec) -> [(Int, BamRec)] -> [(Int, BamRec)]
forall a. a -> [a] -> [a]
: [(Int, BamRec)]
r, BamRec
w BamRec -> [BamRec] -> [BamRec]
forall a. a -> [a] -> [a]
: [BamRec]
unal )
go [] = HashMap Bool [BamRec]
-> [(Bool, (Int, Decision))] -> ([(Int, BamRec)], [BamRec])
merge_halves HashMap Bool [BamRec]
unaligneds (HashMap Bool (Int, Decision) -> [(Bool, (Int, Decision))]
forall k v. HashMap k v -> [(k, v)]
M.toList HashMap Bool (Int, Decision)
singles)
add_xp_of :: BamRec -> BamRec -> BamRec
add_xp_of w :: BamRec
w v :: BamRec
v = BamRec
v { b_exts :: Extensions
b_exts = BamKey -> Ext -> Extensions -> Extensions
updateE "XP" (Int -> Ext
Int (Int -> Ext) -> Int -> Ext
forall a b. (a -> b) -> a -> b
$ Int -> BamKey -> BamRec -> Int
extAsInt 1 "XP" BamRec
w Int -> Int -> Int
`oplus` Int -> BamKey -> BamRec -> Int
extAsInt 1 "XP" BamRec
v) (BamRec -> Extensions
b_exts BamRec
v) }
merge_halves :: M.HashMap Bool [BamRec]
-> [(Bool, (Int,Decision))]
-> ([(Int,BamRec)],[BamRec])
merge_halves :: HashMap Bool [BamRec]
-> [(Bool, (Int, Decision))] -> ([(Int, BamRec)], [BamRec])
merge_halves unaligneds :: HashMap Bool [BamRec]
unaligneds ((_, (n :: Int
n, Consensus v :: BamRec
v)) : singles :: [(Bool, (Int, Decision))]
singles) =
( (Int
n, BamRec
v { b_flag :: Int
b_flag = BamRec -> Int
b_flag BamRec
v Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int -> Int
forall a. Bits a => a -> a
complement Int
pflags }) (Int, BamRec) -> [(Int, BamRec)] -> [(Int, BamRec)]
forall a. a -> [a] -> [a]
: [(Int, BamRec)]
r, [BamRec]
l )
where
(r :: [(Int, BamRec)]
r,l :: [BamRec]
l) = HashMap Bool [BamRec]
-> [(Bool, (Int, Decision))] -> ([(Int, BamRec)], [BamRec])
merge_halves HashMap Bool [BamRec]
unaligneds [(Bool, (Int, Decision))]
singles
pflags :: Int
pflags = Int
flagPaired Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flagProperlyPaired Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flagMateUnmapped Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flagMateReversed Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flagFirstMate Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flagSecondMate
merge_halves unaligneds :: HashMap Bool [BamRec]
unaligneds ((str :: Bool
str, (n :: Int
n, Representative v :: BamRec
v)) : singles :: [(Bool, (Int, Decision))]
singles) =
((Int
n,BamRec
v) (Int, BamRec) -> [(Int, BamRec)] -> [(Int, BamRec)]
forall a. a -> [a] -> [a]
: (BamRec -> (Int, BamRec)) -> [BamRec] -> [(Int, BamRec)]
forall a b. (a -> b) -> [a] -> [b]
map ((,)1) (Int -> [BamRec] -> [BamRec]
forall a. Int -> [a] -> [a]
take 1 [BamRec]
same) [(Int, BamRec)] -> [(Int, BamRec)] -> [(Int, BamRec)]
forall a. [a] -> [a] -> [a]
++ [(Int, BamRec)]
r, Int -> [BamRec] -> [BamRec]
forall a. Int -> [a] -> [a]
drop 1 [BamRec]
same [BamRec] -> [BamRec] -> [BamRec]
forall a. [a] -> [a] -> [a]
++ [BamRec]
diff [BamRec] -> [BamRec] -> [BamRec]
forall a. [a] -> [a] -> [a]
++ [BamRec]
l)
where
(r :: [(Int, BamRec)]
r,l :: [BamRec]
l) = HashMap Bool [BamRec]
-> [(Bool, (Int, Decision))] -> ([(Int, BamRec)], [BamRec])
merge_halves (Bool -> HashMap Bool [BamRec] -> HashMap Bool [BamRec]
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> HashMap k v
M.delete Bool
str HashMap Bool [BamRec]
unaligneds) [(Bool, (Int, Decision))]
singles
(same :: [BamRec]
same,diff :: [BamRec]
diff) = (BamRec -> Bool) -> [BamRec] -> ([BamRec], [BamRec])
forall a. (a -> Bool) -> [a] -> ([a], [a])
partition (BamRec -> BamRec -> Bool
is_mate_of BamRec
v) ([BamRec] -> ([BamRec], [BamRec]))
-> [BamRec] -> ([BamRec], [BamRec])
forall a b. (a -> b) -> a -> b
$ [BamRec] -> Bool -> HashMap Bool [BamRec] -> [BamRec]
forall k v. (Eq k, Hashable k) => v -> k -> HashMap k v -> v
M.lookupDefault [] Bool
str HashMap Bool [BamRec]
unaligneds
is_mate_of :: BamRec -> BamRec -> Bool
is_mate_of a :: BamRec
a b :: BamRec
b = BamRec -> Bytes
b_qname BamRec
a Bytes -> Bytes -> Bool
forall a. Eq a => a -> a -> Bool
== BamRec -> Bytes
b_qname BamRec
b Bool -> Bool -> Bool
&& BamRec -> Bool
isPaired BamRec
a Bool -> Bool -> Bool
&& BamRec -> Bool
isPaired BamRec
b Bool -> Bool -> Bool
&& BamRec -> Bool
isFirstMate BamRec
a Bool -> Bool -> Bool
forall a. Eq a => a -> a -> Bool
== BamRec -> Bool
isSecondMate BamRec
b
merge_halves unaligneds :: HashMap Bool [BamRec]
unaligneds [] =
( [], [[BamRec]] -> [BamRec]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[BamRec]] -> [BamRec]) -> [[BamRec]] -> [BamRec]
forall a b. (a -> b) -> a -> b
$ HashMap Bool [BamRec] -> [[BamRec]]
forall k v. HashMap k v -> [v]
M.elems HashMap Bool [BamRec]
unaligneds )
accumMap :: (Hashable k, Eq k) => (a -> k) -> (a -> v) -> [a] -> M.HashMap k [v]
accumMap :: (a -> k) -> (a -> v) -> [a] -> HashMap k [v]
accumMap f :: a -> k
f g :: a -> v
g = HashMap k [v] -> [a] -> HashMap k [v]
go HashMap k [v]
forall k v. HashMap k v
M.empty
where
go :: HashMap k [v] -> [a] -> HashMap k [v]
go m :: HashMap k [v]
m [ ] = HashMap k [v]
m
go m :: HashMap k [v]
m (a :: a
a:as :: [a]
as) = let ws :: [v]
ws = [v] -> k -> HashMap k [v] -> [v]
forall k v. (Eq k, Hashable k) => v -> k -> HashMap k v -> v
M.lookupDefault [] (a -> k
f a
a) HashMap k [v]
m ; g' :: v
g' = a -> v
g a
a
in v
g' v -> HashMap k [v] -> HashMap k [v]
forall a b. a -> b -> b
`seq` HashMap k [v] -> [a] -> HashMap k [v]
go (k -> [v] -> HashMap k [v] -> HashMap k [v]
forall k v.
(Eq k, Hashable k) =>
k -> v -> HashMap k v -> HashMap k v
M.insert (a -> k
f a
a) (v
g'v -> [v] -> [v]
forall a. a -> [a] -> [a]
:[v]
ws) HashMap k [v]
m) [a]
as
do_collapse :: MonadLog m => Qual -> [BamRec] -> m (Decision, [BamRec])
do_collapse :: Qual -> [BamRec] -> m (Decision, [BamRec])
do_collapse maxq :: Qual
maxq [br :: BamRec
br]
| Bool -> (Vector Qual -> Bool) -> Maybe (Vector Qual) -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
True ((Qual -> Bool) -> Vector Qual -> Bool
forall (v :: * -> *) a. Vector v a => (a -> Bool) -> v a -> Bool
V.all (Qual -> Qual -> Bool
forall a. Ord a => a -> a -> Bool
<= Qual
maxq)) (BamRec -> Maybe (Vector Qual)
b_qual BamRec
br) = (Decision, [BamRec]) -> m (Decision, [BamRec])
forall (f :: * -> *) a. Applicative f => a -> f a
pure ( BamRec -> Decision
Representative BamRec
br, [ ] )
| Bool
otherwise = (Decision, [BamRec]) -> m (Decision, [BamRec])
forall (f :: * -> *) a. Applicative f => a -> f a
pure ( BamRec -> Decision
Consensus BamRec
lq_br, [BamRec
br] )
where
lq_br :: BamRec
lq_br = BamRec
br { b_qual :: Maybe (Vector Qual)
b_qual = (Qual -> Qual) -> Vector Qual -> Vector Qual
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
V.map (Qual -> Qual -> Qual
forall a. Ord a => a -> a -> a
min Qual
maxq) (Vector Qual -> Vector Qual)
-> Maybe (Vector Qual) -> Maybe (Vector Qual)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> BamRec -> Maybe (Vector Qual)
b_qual BamRec
br
, b_virtual_offset :: Int64
b_virtual_offset = 0
, b_qname :: Bytes
b_qname = BamRec -> Bytes
b_qname BamRec
br Bytes -> Word8 -> Bytes
`B.snoc` Char -> Word8
c2w 'c' }
do_collapse maxq :: Qual
maxq brs :: [BamRec]
brs = do
Maybe BrokenMD -> (BrokenMD -> m ()) -> m ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ Maybe BrokenMD
ws ((BrokenMD -> m ()) -> m ()) -> (BrokenMD -> m ()) -> m ()
forall a b. (a -> b) -> a -> b
$ Level -> BrokenMD -> m ()
forall (m :: * -> *) e.
(MonadLog m, Exception e) =>
Level -> e -> m ()
logMsg Level
Warning
(Decision, [BamRec]) -> m (Decision, [BamRec])
forall (f :: * -> *) a. Applicative f => a -> f a
pure ( BamRec -> Decision
Consensus BamRec
b0 { b_exts :: Extensions
b_exts = Extensions -> Extensions
modify_extensions (Extensions -> Extensions) -> Extensions -> Extensions
forall a b. (a -> b) -> a -> b
$ BamRec -> Extensions
b_exts BamRec
b0
, b_flag :: Int
b_flag = Int
failflag Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int -> Int
forall a. Bits a => a -> a
complement Int
flagDuplicate
, b_mapq :: Qual
b_mapq = Word8 -> Qual
Q (Word8 -> Qual) -> Word8 -> Qual
forall a b. (a -> b) -> a -> b
$ [Word8] -> Word8
forall (t :: * -> *) a b.
(Foldable t, Integral a, Integral b) =>
t a -> b
rmsq ([Word8] -> Word8) -> [Word8] -> Word8
forall a b. (a -> b) -> a -> b
$ (BamRec -> Word8) -> [BamRec] -> [Word8]
forall a b. (a -> b) -> [a] -> [b]
map (Qual -> Word8
unQ (Qual -> Word8) -> (BamRec -> Qual) -> BamRec -> Word8
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. BamRec -> Qual
b_mapq) ([BamRec] -> [Word8]) -> [BamRec] -> [Word8]
forall a b. (a -> b) -> a -> b
$ [BamRec] -> [BamRec]
good [BamRec]
brs
, b_cigar :: Vector Cigar
b_cigar = Vector Cigar
cigar'
, b_seq :: Vector_Nucs_half Nucleotides
b_seq = [Nucleotides] -> Vector_Nucs_half Nucleotides
forall (v :: * -> *) a. Vector v a => [a] -> v a
V.fromList ([Nucleotides] -> Vector_Nucs_half Nucleotides)
-> [Nucleotides] -> Vector_Nucs_half Nucleotides
forall a b. (a -> b) -> a -> b
$ ((Nucleotides, Qual) -> Nucleotides)
-> [(Nucleotides, Qual)] -> [Nucleotides]
forall a b. (a -> b) -> [a] -> [b]
map (Nucleotides, Qual) -> Nucleotides
forall a b. (a, b) -> a
fst [(Nucleotides, Qual)]
cons_seq_qual
, b_qual :: Maybe (Vector Qual)
b_qual = Vector Qual -> Maybe (Vector Qual)
forall a. a -> Maybe a
Just (Vector Qual -> Maybe (Vector Qual))
-> Vector Qual -> Maybe (Vector Qual)
forall a b. (a -> b) -> a -> b
$ [Qual] -> Vector Qual
forall (v :: * -> *) a. Vector v a => [a] -> v a
V.fromList ([Qual] -> Vector Qual) -> [Qual] -> Vector Qual
forall a b. (a -> b) -> a -> b
$ ((Nucleotides, Qual) -> Qual) -> [(Nucleotides, Qual)] -> [Qual]
forall a b. (a -> b) -> [a] -> [b]
map (Nucleotides, Qual) -> Qual
forall a b. (a, b) -> b
snd [(Nucleotides, Qual)]
cons_seq_qual
, b_qname :: Bytes
b_qname = BamRec -> Bytes
b_qname BamRec
b0 Bytes -> Word8 -> Bytes
`B.snoc` 99
, b_virtual_offset :: Int64
b_virtual_offset = 0 }, [BamRec]
brs )
where
!b0 :: BamRec
b0 = (BamRec -> BamRec -> Ordering) -> [BamRec] -> BamRec
forall (t :: * -> *) a.
Foldable t =>
(a -> a -> Ordering) -> t a -> a
minimumBy ((BamRec -> Bytes) -> BamRec -> BamRec -> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing BamRec -> Bytes
b_qname) [BamRec]
brs
!most_fail :: Bool
most_fail = 2 Int -> Int -> Int
forall a. Num a => a -> a -> a
* [BamRec] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ((BamRec -> Bool) -> [BamRec] -> [BamRec]
forall a. (a -> Bool) -> [a] -> [a]
filter BamRec -> Bool
isFailsQC [BamRec]
brs) Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> [BamRec] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [BamRec]
brs
!failflag :: Int
failflag | Bool
most_fail = BamRec -> Int
b_flag BamRec
b0 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flagFailsQC
| Bool
otherwise = BamRec -> Int
b_flag BamRec
b0 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int -> Int
forall a. Bits a => a -> a
complement Int
flagFailsQC
rmsq :: t a -> b
rmsq xs :: t a
xs = case ((Double, Int) -> a -> (Double, Int))
-> (Double, Int) -> t a -> (Double, Int)
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\(!Double
n,!Int
d) x :: a
x -> (Double
n Double -> Double -> Double
forall a. Num a => a -> a -> a
+ a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
x Double -> Double -> Double
forall a. Num a => a -> a -> a
* a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
x, Int
d Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1)) (0,0) t a
xs of
(!Double
n,!Int
d) -> Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
round (Double -> b) -> Double -> b
forall a b. (a -> b) -> a -> b
$ Double -> Double
forall a. Floating a => a -> a
sqrt (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$ (Double
n::Double) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
d::Int)
maj :: [t] -> t
maj xs :: [t]
xs = [t] -> t
forall a. [a] -> a
head ([t] -> t) -> ([t] -> [t]) -> [t] -> t
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ([t] -> [t] -> Ordering) -> [[t]] -> [t]
forall (t :: * -> *) a.
Foldable t =>
(a -> a -> Ordering) -> t a -> a
maximumBy (([t] -> Int) -> [t] -> [t] -> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing [t] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length) ([[t]] -> [t]) -> ([t] -> [[t]]) -> [t] -> [t]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. [t] -> [[t]]
forall a. Eq a => [a] -> [[a]]
group ([t] -> [[t]]) -> ([t] -> [t]) -> [t] -> [[t]]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. [t] -> [t]
forall a. Ord a => [a] -> [a]
sort ([t] -> t) -> [t] -> t
forall a b. (a -> b) -> a -> b
$ [t]
xs
nub' :: [[Bytes]] -> [Bytes]
nub' = ([[Bytes]] -> [Bytes]) -> [[[Bytes]]] -> [Bytes]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap [[Bytes]] -> [Bytes]
forall a. [a] -> a
head ([[[Bytes]]] -> [Bytes])
-> ([[Bytes]] -> [[[Bytes]]]) -> [[Bytes]] -> [Bytes]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. [[Bytes]] -> [[[Bytes]]]
forall a. Eq a => [a] -> [[a]]
group ([[Bytes]] -> [[[Bytes]]])
-> ([[Bytes]] -> [[Bytes]]) -> [[Bytes]] -> [[[Bytes]]]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. [[Bytes]] -> [[Bytes]]
forall a. Ord a => [a] -> [a]
sort
!cigar' :: Vector Cigar
cigar' = [Vector Cigar] -> Vector Cigar
forall t. Ord t => [t] -> t
maj ([Vector Cigar] -> Vector Cigar) -> [Vector Cigar] -> Vector Cigar
forall a b. (a -> b) -> a -> b
$ (BamRec -> Vector Cigar) -> [BamRec] -> [Vector Cigar]
forall a b. (a -> b) -> [a] -> [b]
map BamRec -> Vector Cigar
b_cigar [BamRec]
brs
good :: [BamRec] -> [BamRec]
good = (BamRec -> Bool) -> [BamRec] -> [BamRec]
forall a. (a -> Bool) -> [a] -> [a]
filter (Vector Cigar -> Vector Cigar -> Bool
forall a. Eq a => a -> a -> Bool
(==) Vector Cigar
cigar' (Vector Cigar -> Bool)
-> (BamRec -> Vector Cigar) -> BamRec -> Bool
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. BamRec -> Vector Cigar
b_cigar)
cons_seq_qual :: [(Nucleotides, Qual)]
cons_seq_qual = [ Qual -> [(Nucleotides, Qual)] -> (Nucleotides, Qual)
consensus Qual
maxq [ (Vector_Nucs_half Nucleotides -> Int -> Nucleotides
forall (v :: * -> *) a. Vector v a => v a -> Int -> a
V.unsafeIndex (BamRec -> Vector_Nucs_half Nucleotides
b_seq BamRec
b) Int
i, Qual
q)
| BamRec
b <- [BamRec] -> [BamRec]
good [BamRec]
brs
, let q :: Qual
q = Qual -> (Vector Qual -> Qual) -> Maybe (Vector Qual) -> Qual
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Word8 -> Qual
Q 23) (Vector Qual -> Int -> Qual
forall (v :: * -> *) a. Vector v a => v a -> Int -> a
V.! Int
i) (BamRec -> Maybe (Vector Qual)
b_qual BamRec
b) ]
| Int
i <- [0 .. Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1] ]
where !len :: Int
len = Vector_Nucs_half Nucleotides -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
V.length (Vector_Nucs_half Nucleotides -> Int)
-> ([BamRec] -> Vector_Nucs_half Nucleotides) -> [BamRec] -> Int
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. BamRec -> Vector_Nucs_half Nucleotides
b_seq (BamRec -> Vector_Nucs_half Nucleotides)
-> ([BamRec] -> BamRec) -> [BamRec] -> Vector_Nucs_half Nucleotides
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. [BamRec] -> BamRec
forall a. [a] -> a
head ([BamRec] -> Int) -> [BamRec] -> Int
forall a b. (a -> b) -> a -> b
$ [BamRec] -> [BamRec]
good [BamRec]
brs
(md' :: [MdOp]
md',ws :: Maybe BrokenMD
ws) = case [ (BamRec -> Vector_Nucs_half Nucleotides
b_seq BamRec
b,[MdOp]
md,BamRec
b) | BamRec
b <- [BamRec] -> [BamRec]
good [BamRec]
brs, Just md :: [MdOp]
md <- [ BamRec -> Maybe [MdOp]
getMd BamRec
b ] ] of
[ ] -> ([],Maybe BrokenMD
forall a. Maybe a
Nothing)
(seq1 :: Vector_Nucs_half Nucleotides
seq1, md1 :: [MdOp]
md1,b :: BamRec
b) : _ -> case [MdOp]
-> [Cigar]
-> [MdOp]
-> [Nucleotides]
-> [Nucleotides]
-> Either MdFail [MdOp]
mk_new_md' [] (Vector Cigar -> [Cigar]
forall (v :: * -> *) a. Vector v a => v a -> [a]
V.toList Vector Cigar
cigar') [MdOp]
md1 (Vector_Nucs_half Nucleotides -> [Nucleotides]
forall (v :: * -> *) a. Vector v a => v a -> [a]
V.toList Vector_Nucs_half Nucleotides
seq1) (((Nucleotides, Qual) -> Nucleotides)
-> [(Nucleotides, Qual)] -> [Nucleotides]
forall a b. (a -> b) -> [a] -> [b]
map (Nucleotides, Qual) -> Nucleotides
forall a b. (a, b) -> a
fst [(Nucleotides, Qual)]
cons_seq_qual) of
Right x :: [MdOp]
x -> ([MdOp]
x,Maybe BrokenMD
forall a. Maybe a
Nothing)
Left (MdFail cigs :: [Cigar]
cigs ms :: [MdOp]
ms osq :: [Nucleotides]
osq nsq :: [Nucleotides]
nsq) -> ([],BrokenMD -> Maybe BrokenMD
forall a. a -> Maybe a
Just BrokenMD
e)
where e :: BrokenMD
e = Bytes
-> Refseq
-> Int
-> [Cigar]
-> [MdOp]
-> [Nucleotides]
-> [Nucleotides]
-> BrokenMD
BrokenMD (BamRec -> Bytes
b_qname BamRec
b) (BamRec -> Refseq
b_rname BamRec
b) (BamRec -> Int
b_pos BamRec
b) [Cigar]
cigs [MdOp]
ms [Nucleotides]
osq [Nucleotides]
nsq
nm' :: Int
nm' = [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ [ Int
n | Ins :* n :: Int
n <- Vector Cigar -> [Cigar]
forall a. Storable a => Vector a -> [a]
W.toList Vector Cigar
cigar' ] [Int] -> [Int] -> [Int]
forall a. [a] -> [a] -> [a]
++ [ Int
n | Del :* n :: Int
n <- Vector Cigar -> [Cigar]
forall a. Storable a => Vector a -> [a]
W.toList Vector Cigar
cigar' ] [Int] -> [Int] -> [Int]
forall a. [a] -> [a] -> [a]
++ [ 1 | MdRep _ <- [MdOp]
md' ]
xa' :: [Bytes]
xa' = [[Bytes]] -> [Bytes]
nub' [ Char -> Bytes -> [Bytes]
T.split ';' Bytes
xas | Just (Text xas :: Bytes
xas) <- (BamRec -> Maybe Ext) -> [BamRec] -> [Maybe Ext]
forall a b. (a -> b) -> [a] -> [b]
map (BamKey -> Extensions -> Maybe Ext
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup "XA" (Extensions -> Maybe Ext)
-> (BamRec -> Extensions) -> BamRec -> Maybe Ext
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. BamRec -> Extensions
b_exts) [BamRec]
brs ]
modify_extensions :: Extensions -> Extensions
modify_extensions es :: Extensions
es = ((Extensions -> Extensions) -> Extensions -> Extensions)
-> Extensions -> [Extensions -> Extensions] -> Extensions
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (Extensions -> Extensions) -> Extensions -> Extensions
forall a b. (a -> b) -> a -> b
($!) Extensions
es ([Extensions -> Extensions] -> Extensions)
-> [Extensions -> Extensions] -> Extensions
forall a b. (a -> b) -> a -> b
$
[ let vs :: [Ext]
vs = (BamRec -> Maybe Ext) -> [BamRec] -> [Ext]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe (BamKey -> Extensions -> Maybe Ext
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup BamKey
k (Extensions -> Maybe Ext)
-> (BamRec -> Extensions) -> BamRec -> Maybe Ext
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. BamRec -> Extensions
b_exts) [BamRec]
brs
in if [Ext] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Ext]
vs then Extensions -> Extensions
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id else BamKey -> Ext -> Extensions -> Extensions
updateE BamKey
k (Ext -> Extensions -> Extensions)
-> Ext -> Extensions -> Extensions
forall a b. (a -> b) -> a -> b
$! [Ext] -> Ext
forall t. Ord t => [t] -> t
maj [Ext]
vs | BamKey
k <- [BamKey]
do_maj ] [Extensions -> Extensions]
-> [Extensions -> Extensions] -> [Extensions -> Extensions]
forall a. [a] -> [a] -> [a]
++
[ let vs :: [Int]
vs = [ Int
v | Just (Int v :: Int
v) <- (BamRec -> Maybe Ext) -> [BamRec] -> [Maybe Ext]
forall a b. (a -> b) -> [a] -> [b]
map (BamKey -> Extensions -> Maybe Ext
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup BamKey
k (Extensions -> Maybe Ext)
-> (BamRec -> Extensions) -> BamRec -> Maybe Ext
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. BamRec -> Extensions
b_exts) [BamRec]
brs ]
in if [Int] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Int]
vs then Extensions -> Extensions
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id else BamKey -> Ext -> Extensions -> Extensions
updateE BamKey
k (Ext -> Extensions -> Extensions)
-> Ext -> Extensions -> Extensions
forall a b. (a -> b) -> a -> b
$! Int -> Ext
Int ([Int] -> Int
forall (t :: * -> *) a b.
(Foldable t, Integral a, Integral b) =>
t a -> b
rmsq [Int]
vs) | BamKey
k <- [BamKey]
do_rmsq ] [Extensions -> Extensions]
-> [Extensions -> Extensions] -> [Extensions -> Extensions]
forall a. [a] -> [a] -> [a]
++
(BamKey -> Extensions -> Extensions)
-> [BamKey] -> [Extensions -> Extensions]
forall a b. (a -> b) -> [a] -> [b]
map BamKey -> Extensions -> Extensions
deleteE [BamKey]
useless [Extensions -> Extensions]
-> [Extensions -> Extensions] -> [Extensions -> Extensions]
forall a. [a] -> [a] -> [a]
++
[ BamKey -> Ext -> Extensions -> Extensions
updateE "NM" (Ext -> Extensions -> Extensions)
-> Ext -> Extensions -> Extensions
forall a b. (a -> b) -> a -> b
$! Int -> Ext
Int Int
nm'
, BamKey -> Ext -> Extensions -> Extensions
updateE "XP" (Ext -> Extensions -> Extensions)
-> Ext -> Extensions -> Extensions
forall a b. (a -> b) -> a -> b
$! Int -> Ext
Int ((Int -> BamRec -> Int) -> Int -> [BamRec] -> Int
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\a :: Int
a b :: BamRec
b -> Int
a Int -> Int -> Int
`oplus` Int -> BamKey -> BamRec -> Int
extAsInt 1 "XP" BamRec
b) 0 [BamRec]
brs)
, if [Bytes] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Bytes]
xa' then Extensions -> Extensions
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id else BamKey -> Ext -> Extensions -> Extensions
updateE "XA" (Ext -> Extensions -> Extensions)
-> Ext -> Extensions -> Extensions
forall a b. (a -> b) -> a -> b
$! (Bytes -> Ext
Text (Bytes -> Ext) -> Bytes -> Ext
forall a b. (a -> b) -> a -> b
$ Bytes -> [Bytes] -> Bytes
T.intercalate (Char -> Bytes
T.singleton ';') [Bytes]
xa')
, if [MdOp] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [MdOp]
md' then Extensions -> Extensions
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id else BamKey -> Ext -> Extensions -> Extensions
updateE "MD" (Ext -> Extensions -> Extensions)
-> Ext -> Extensions -> Extensions
forall a b. (a -> b) -> a -> b
$! (Bytes -> Ext
Text (Bytes -> Ext) -> Bytes -> Ext
forall a b. (a -> b) -> a -> b
$ [MdOp] -> Bytes
showMd [MdOp]
md') ]
useless :: [BamKey]
useless = (String -> BamKey) -> [String] -> [BamKey]
forall a b. (a -> b) -> [a] -> [b]
map String -> BamKey
forall a. IsString a => String -> a
fromString ([String] -> [BamKey]) -> [String] -> [BamKey]
forall a b. (a -> b) -> a -> b
$ String -> [String]
words "BQ CM FZ Q2 R2 XM XO XG YQ EN CQ CS E2 FS OQ OP OC U2 H0 H1 H2 HI NH IH ZQ"
do_rmsq :: [BamKey]
do_rmsq = (String -> BamKey) -> [String] -> [BamKey]
forall a b. (a -> b) -> [a] -> [b]
map String -> BamKey
forall a. IsString a => String -> a
fromString ([String] -> [BamKey]) -> [String] -> [BamKey]
forall a b. (a -> b) -> a -> b
$ String -> [String]
words "AM AS MQ PQ SM UQ"
do_maj :: [BamKey]
do_maj = (String -> BamKey) -> [String] -> [BamKey]
forall a b. (a -> b) -> [a] -> [b]
map String -> BamKey
forall a. IsString a => String -> a
fromString ([String] -> [BamKey]) -> [String] -> [BamKey]
forall a b. (a -> b) -> a -> b
$ String -> [String]
words "X0 X1 XT XS XF XE BC LB RG XI XJ YI YJ"
data BrokenMD = BrokenMD Bytes Refseq Int [Cigar] [MdOp] [Nucleotides] [Nucleotides] deriving (Typeable, Int -> BrokenMD -> ShowS
[BrokenMD] -> ShowS
BrokenMD -> String
(Int -> BrokenMD -> ShowS)
-> (BrokenMD -> String) -> ([BrokenMD] -> ShowS) -> Show BrokenMD
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [BrokenMD] -> ShowS
$cshowList :: [BrokenMD] -> ShowS
show :: BrokenMD -> String
$cshow :: BrokenMD -> String
showsPrec :: Int -> BrokenMD -> ShowS
$cshowsPrec :: Int -> BrokenMD -> ShowS
Show)
instance Exception BrokenMD where
displayException :: BrokenMD -> String
displayException (BrokenMD qname :: Bytes
qname rname :: Refseq
rname pos :: Int
pos cigs :: [Cigar]
cigs ms :: [MdOp]
ms osq :: [Nucleotides]
osq nsq :: [Nucleotides]
nsq)
= [String] -> String
unlines [ "Broken MD field when trying to construct new MD!"
, "QNAME: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Bytes -> String
forall a. Show a => a -> String
show Bytes
qname
, "POS: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Word32 -> ShowS
forall a. Show a => a -> ShowS
shows (Refseq -> Word32
unRefseq Refseq
rname) ":" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
pos
, "CIGAR: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ [Cigar] -> String
forall a. Show a => a -> String
show [Cigar]
cigs
, "MD: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ [MdOp] -> String
forall a. Show a => a -> String
show [MdOp]
ms
, "refseq: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ [Nucleotides] -> String
forall a. Show a => a -> String
show [Nucleotides]
osq
, "readseq: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ [Nucleotides] -> String
forall a. Show a => a -> String
show [Nucleotides]
nsq ]
minViewBy :: (a -> a -> Ordering) -> a -> [a] -> (a,[a])
minViewBy :: (a -> a -> Ordering) -> a -> [a] -> (a, [a])
minViewBy cmp :: a -> a -> Ordering
cmp x :: a
x xs :: [a]
xs = a -> [a] -> [a] -> (a, [a])
go a
x [] [a]
xs
where
go :: a -> [a] -> [a] -> (a, [a])
go m :: a
m acc :: [a]
acc [ ] = (a
m,[a]
acc)
go m :: a
m acc :: [a]
acc (a :: a
a:as :: [a]
as) = case a
m a -> a -> Ordering
`cmp` a
a of GT -> a -> [a] -> [a] -> (a, [a])
go a
a (a
ma -> [a] -> [a]
forall a. a -> [a] -> [a]
:[a]
acc) [a]
as
_ -> a -> [a] -> [a] -> (a, [a])
go a
m (a
aa -> [a] -> [a]
forall a. a -> [a] -> [a]
:[a]
acc) [a]
as
data MdFail = MdFail [Cigar] [MdOp] [Nucleotides] [Nucleotides]
mk_new_md' :: [MdOp] -> [Cigar] -> [MdOp] -> [Nucleotides] -> [Nucleotides] -> Either MdFail [MdOp]
mk_new_md' :: [MdOp]
-> [Cigar]
-> [MdOp]
-> [Nucleotides]
-> [Nucleotides]
-> Either MdFail [MdOp]
mk_new_md' acc :: [MdOp]
acc [] [] [] [] = [MdOp] -> Either MdFail [MdOp]
forall a b. b -> Either a b
Right ([MdOp] -> Either MdFail [MdOp]) -> [MdOp] -> Either MdFail [MdOp]
forall a b. (a -> b) -> a -> b
$ [MdOp] -> [MdOp] -> [MdOp]
normalize [] [MdOp]
acc
where
normalize :: [MdOp] -> [MdOp] -> [MdOp]
normalize a :: [MdOp]
a (MdNum 0:os :: [MdOp]
os) = [MdOp] -> [MdOp] -> [MdOp]
normalize [MdOp]
a [MdOp]
os
normalize (MdNum n :: Int
n:a :: [MdOp]
a) (MdNum m :: Int
m:os :: [MdOp]
os) = [MdOp] -> [MdOp] -> [MdOp]
normalize (Int -> MdOp
MdNum (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
m)MdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
:[MdOp]
a) [MdOp]
os
normalize a :: [MdOp]
a (MdDel []:os :: [MdOp]
os) = [MdOp] -> [MdOp] -> [MdOp]
normalize [MdOp]
a [MdOp]
os
normalize (MdDel u :: [Nucleotides]
u:a :: [MdOp]
a) (MdDel v :: [Nucleotides]
v:os :: [MdOp]
os) = [MdOp] -> [MdOp] -> [MdOp]
normalize ([Nucleotides] -> MdOp
MdDel ([Nucleotides]
v[Nucleotides] -> [Nucleotides] -> [Nucleotides]
forall a. [a] -> [a] -> [a]
++[Nucleotides]
u)MdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
:[MdOp]
a) [MdOp]
os
normalize a :: [MdOp]
a ( o :: MdOp
o:os :: [MdOp]
os) = [MdOp] -> [MdOp] -> [MdOp]
normalize (MdOp
oMdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
:[MdOp]
a) [MdOp]
os
normalize a :: [MdOp]
a [ ] = [MdOp]
a
mk_new_md' acc :: [MdOp]
acc ( _ :* 0 : cigs :: [Cigar]
cigs) mds :: [MdOp]
mds osq :: [Nucleotides]
osq nsq :: [Nucleotides]
nsq = [MdOp]
-> [Cigar]
-> [MdOp]
-> [Nucleotides]
-> [Nucleotides]
-> Either MdFail [MdOp]
mk_new_md' [MdOp]
acc [Cigar]
cigs [MdOp]
mds [Nucleotides]
osq [Nucleotides]
nsq
mk_new_md' acc :: [MdOp]
acc cigs :: [Cigar]
cigs (MdNum 0 : mds :: [MdOp]
mds) osq :: [Nucleotides]
osq nsq :: [Nucleotides]
nsq = [MdOp]
-> [Cigar]
-> [MdOp]
-> [Nucleotides]
-> [Nucleotides]
-> Either MdFail [MdOp]
mk_new_md' [MdOp]
acc [Cigar]
cigs [MdOp]
mds [Nucleotides]
osq [Nucleotides]
nsq
mk_new_md' acc :: [MdOp]
acc cigs :: [Cigar]
cigs (MdDel [] : mds :: [MdOp]
mds) osq :: [Nucleotides]
osq nsq :: [Nucleotides]
nsq = [MdOp]
-> [Cigar]
-> [MdOp]
-> [Nucleotides]
-> [Nucleotides]
-> Either MdFail [MdOp]
mk_new_md' [MdOp]
acc [Cigar]
cigs [MdOp]
mds [Nucleotides]
osq [Nucleotides]
nsq
mk_new_md' acc :: [MdOp]
acc (Mat :* u :: Int
u : cigs :: [Cigar]
cigs) (MdRep b :: Nucleotides
b : mds :: [MdOp]
mds) (_:osq :: [Nucleotides]
osq) (n :: Nucleotides
n:nsq :: [Nucleotides]
nsq)
| Nucleotides
b Nucleotides -> Nucleotides -> Bool
forall a. Eq a => a -> a -> Bool
== Nucleotides
n = [MdOp]
-> [Cigar]
-> [MdOp]
-> [Nucleotides]
-> [Nucleotides]
-> Either MdFail [MdOp]
mk_new_md' (Int -> MdOp
MdNum 1 MdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
: [MdOp]
acc) (CigOp
Mat CigOp -> Int -> Cigar
:* (Int
uInt -> Int -> Int
forall a. Num a => a -> a -> a
-1)Cigar -> [Cigar] -> [Cigar]
forall a. a -> [a] -> [a]
:[Cigar]
cigs) [MdOp]
mds [Nucleotides]
osq [Nucleotides]
nsq
| Bool
otherwise = [MdOp]
-> [Cigar]
-> [MdOp]
-> [Nucleotides]
-> [Nucleotides]
-> Either MdFail [MdOp]
mk_new_md' (Nucleotides -> MdOp
MdRep Nucleotides
b MdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
: [MdOp]
acc) (CigOp
Mat CigOp -> Int -> Cigar
:* (Int
uInt -> Int -> Int
forall a. Num a => a -> a -> a
-1)Cigar -> [Cigar] -> [Cigar]
forall a. a -> [a] -> [a]
:[Cigar]
cigs) [MdOp]
mds [Nucleotides]
osq [Nucleotides]
nsq
mk_new_md' acc :: [MdOp]
acc (Mat :* u :: Int
u : cigs :: [Cigar]
cigs) (MdNum v :: Int
v : mds :: [MdOp]
mds) (o :: Nucleotides
o:osq :: [Nucleotides]
osq) (n :: Nucleotides
n:nsq :: [Nucleotides]
nsq)
| Nucleotides
o Nucleotides -> Nucleotides -> Bool
forall a. Eq a => a -> a -> Bool
== Nucleotides
n = [MdOp]
-> [Cigar]
-> [MdOp]
-> [Nucleotides]
-> [Nucleotides]
-> Either MdFail [MdOp]
mk_new_md' (Int -> MdOp
MdNum 1 MdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
: [MdOp]
acc) (CigOp
Mat CigOp -> Int -> Cigar
:* (Int
uInt -> Int -> Int
forall a. Num a => a -> a -> a
-1)Cigar -> [Cigar] -> [Cigar]
forall a. a -> [a] -> [a]
:[Cigar]
cigs) (Int -> MdOp
MdNum (Int
vInt -> Int -> Int
forall a. Num a => a -> a -> a
-1) MdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
: [MdOp]
mds) [Nucleotides]
osq [Nucleotides]
nsq
| Bool
otherwise = [MdOp]
-> [Cigar]
-> [MdOp]
-> [Nucleotides]
-> [Nucleotides]
-> Either MdFail [MdOp]
mk_new_md' (Nucleotides -> MdOp
MdRep Nucleotides
o MdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
: [MdOp]
acc) (CigOp
Mat CigOp -> Int -> Cigar
:* (Int
uInt -> Int -> Int
forall a. Num a => a -> a -> a
-1)Cigar -> [Cigar] -> [Cigar]
forall a. a -> [a] -> [a]
:[Cigar]
cigs) (Int -> MdOp
MdNum (Int
vInt -> Int -> Int
forall a. Num a => a -> a -> a
-1) MdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
: [MdOp]
mds) [Nucleotides]
osq [Nucleotides]
nsq
mk_new_md' acc :: [MdOp]
acc (Del :* n :: Int
n : cigs :: [Cigar]
cigs) (MdDel bs :: [Nucleotides]
bs : mds :: [MdOp]
mds) osq :: [Nucleotides]
osq nsq :: [Nucleotides]
nsq | Int
n Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== [Nucleotides] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Nucleotides]
bs = [MdOp]
-> [Cigar]
-> [MdOp]
-> [Nucleotides]
-> [Nucleotides]
-> Either MdFail [MdOp]
mk_new_md' ([Nucleotides] -> MdOp
MdDel [Nucleotides]
bs MdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
: [MdOp]
acc) [Cigar]
cigs [MdOp]
mds [Nucleotides]
osq [Nucleotides]
nsq
mk_new_md' acc :: [MdOp]
acc (Del :* n :: Int
n : cigs :: [Cigar]
cigs) (MdDel (b :: Nucleotides
b:bs :: [Nucleotides]
bs) : mds :: [MdOp]
mds) osq :: [Nucleotides]
osq nsq :: [Nucleotides]
nsq = [MdOp]
-> [Cigar]
-> [MdOp]
-> [Nucleotides]
-> [Nucleotides]
-> Either MdFail [MdOp]
mk_new_md' ([Nucleotides] -> MdOp
MdDel [Nucleotides
b] MdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
: [MdOp]
acc) (CigOp
Del CigOp -> Int -> Cigar
:* (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-1) Cigar -> [Cigar] -> [Cigar]
forall a. a -> [a] -> [a]
: [Cigar]
cigs) ([Nucleotides] -> MdOp
MdDel [Nucleotides]
bsMdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
:[MdOp]
mds) [Nucleotides]
osq [Nucleotides]
nsq
mk_new_md' acc :: [MdOp]
acc (Del :* n :: Int
n : cigs :: [Cigar]
cigs) (MdRep b :: Nucleotides
b : mds :: [MdOp]
mds) osq :: [Nucleotides]
osq nsq :: [Nucleotides]
nsq = [MdOp]
-> [Cigar]
-> [MdOp]
-> [Nucleotides]
-> [Nucleotides]
-> Either MdFail [MdOp]
mk_new_md' ([Nucleotides] -> MdOp
MdDel [Nucleotides
b] MdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
: [MdOp]
acc) (CigOp
Del CigOp -> Int -> Cigar
:* (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-1) Cigar -> [Cigar] -> [Cigar]
forall a. a -> [a] -> [a]
: [Cigar]
cigs) [MdOp]
mds [Nucleotides]
osq [Nucleotides]
nsq
mk_new_md' acc :: [MdOp]
acc (Del :* n :: Int
n : cigs :: [Cigar]
cigs) (MdNum m :: Int
m : mds :: [MdOp]
mds) osq :: [Nucleotides]
osq nsq :: [Nucleotides]
nsq = [MdOp]
-> [Cigar]
-> [MdOp]
-> [Nucleotides]
-> [Nucleotides]
-> Either MdFail [MdOp]
mk_new_md' ([Nucleotides] -> MdOp
MdDel [Nucleotides
nucsN] MdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
: [MdOp]
acc) (CigOp
Del CigOp -> Int -> Cigar
:* (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-1) Cigar -> [Cigar] -> [Cigar]
forall a. a -> [a] -> [a]
: [Cigar]
cigs) (Int -> MdOp
MdNum (Int
mInt -> Int -> Int
forall a. Num a => a -> a -> a
-1)MdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
:[MdOp]
mds) [Nucleotides]
osq [Nucleotides]
nsq
mk_new_md' acc :: [MdOp]
acc (Ins :* n :: Int
n : cigs :: [Cigar]
cigs) md :: [MdOp]
md osq :: [Nucleotides]
osq nsq :: [Nucleotides]
nsq = [MdOp]
-> [Cigar]
-> [MdOp]
-> [Nucleotides]
-> [Nucleotides]
-> Either MdFail [MdOp]
mk_new_md' [MdOp]
acc [Cigar]
cigs [MdOp]
md (Int -> [Nucleotides] -> [Nucleotides]
forall a. Int -> [a] -> [a]
drop Int
n [Nucleotides]
osq) (Int -> [Nucleotides] -> [Nucleotides]
forall a. Int -> [a] -> [a]
drop Int
n [Nucleotides]
nsq)
mk_new_md' acc :: [MdOp]
acc (SMa :* n :: Int
n : cigs :: [Cigar]
cigs) md :: [MdOp]
md osq :: [Nucleotides]
osq nsq :: [Nucleotides]
nsq = [MdOp]
-> [Cigar]
-> [MdOp]
-> [Nucleotides]
-> [Nucleotides]
-> Either MdFail [MdOp]
mk_new_md' [MdOp]
acc [Cigar]
cigs [MdOp]
md (Int -> [Nucleotides] -> [Nucleotides]
forall a. Int -> [a] -> [a]
drop Int
n [Nucleotides]
osq) (Int -> [Nucleotides] -> [Nucleotides]
forall a. Int -> [a] -> [a]
drop Int
n [Nucleotides]
nsq)
mk_new_md' acc :: [MdOp]
acc (HMa :* _ : cigs :: [Cigar]
cigs) md :: [MdOp]
md osq :: [Nucleotides]
osq nsq :: [Nucleotides]
nsq = [MdOp]
-> [Cigar]
-> [MdOp]
-> [Nucleotides]
-> [Nucleotides]
-> Either MdFail [MdOp]
mk_new_md' [MdOp]
acc [Cigar]
cigs [MdOp]
md [Nucleotides]
osq [Nucleotides]
nsq
mk_new_md' acc :: [MdOp]
acc (Pad :* _ : cigs :: [Cigar]
cigs) md :: [MdOp]
md osq :: [Nucleotides]
osq nsq :: [Nucleotides]
nsq = [MdOp]
-> [Cigar]
-> [MdOp]
-> [Nucleotides]
-> [Nucleotides]
-> Either MdFail [MdOp]
mk_new_md' [MdOp]
acc [Cigar]
cigs [MdOp]
md [Nucleotides]
osq [Nucleotides]
nsq
mk_new_md' acc :: [MdOp]
acc (Nop :* _ : cigs :: [Cigar]
cigs) md :: [MdOp]
md osq :: [Nucleotides]
osq nsq :: [Nucleotides]
nsq = [MdOp]
-> [Cigar]
-> [MdOp]
-> [Nucleotides]
-> [Nucleotides]
-> Either MdFail [MdOp]
mk_new_md' [MdOp]
acc [Cigar]
cigs [MdOp]
md [Nucleotides]
osq [Nucleotides]
nsq
mk_new_md' _acc :: [MdOp]
_acc cigs :: [Cigar]
cigs ms :: [MdOp]
ms osq :: [Nucleotides]
osq nsq :: [Nucleotides]
nsq = MdFail -> Either MdFail [MdOp]
forall a b. a -> Either a b
Left (MdFail -> Either MdFail [MdOp]) -> MdFail -> Either MdFail [MdOp]
forall a b. (a -> b) -> a -> b
$ [Cigar] -> [MdOp] -> [Nucleotides] -> [Nucleotides] -> MdFail
MdFail [Cigar]
cigs [MdOp]
ms [Nucleotides]
osq [Nucleotides]
nsq
consensus :: Qual -> [ (Nucleotides, Qual) ] -> (Nucleotides, Qual)
consensus :: Qual -> [(Nucleotides, Qual)] -> (Nucleotides, Qual)
consensus (Q maxq :: Word8
maxq) nqs :: [(Nucleotides, Qual)]
nqs =
let accs :: U.Vector Int
accs :: Vector Int
accs = (Int -> Int -> Int) -> Vector Int -> [(Int, Int)] -> Vector Int
forall a b.
Unbox a =>
(a -> b -> a) -> Vector a -> [(Int, b)] -> Vector a
U.accum Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) (Int -> Int -> Vector Int
forall a. Unbox a => Int -> a -> Vector a
U.replicate 16 0) [ (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
n, Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
q) | (Ns n :: Word8
n,Q q :: Word8
q) <- [(Nucleotides, Qual)]
nqs ]
in case ((Nucleotides, Int) -> (Nucleotides, Int) -> Ordering)
-> [(Nucleotides, Int)] -> [(Nucleotides, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (((Nucleotides, Int) -> (Nucleotides, Int) -> Ordering)
-> (Nucleotides, Int) -> (Nucleotides, Int) -> Ordering
forall a b c. (a -> b -> c) -> b -> a -> c
flip (((Nucleotides, Int) -> (Nucleotides, Int) -> Ordering)
-> (Nucleotides, Int) -> (Nucleotides, Int) -> Ordering)
-> ((Nucleotides, Int) -> (Nucleotides, Int) -> Ordering)
-> (Nucleotides, Int)
-> (Nucleotides, Int)
-> Ordering
forall a b. (a -> b) -> a -> b
$ ((Nucleotides, Int) -> Int)
-> (Nucleotides, Int) -> (Nucleotides, Int) -> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (Nucleotides, Int) -> Int
forall a b. (a, b) -> b
snd) ([(Nucleotides, Int)] -> [(Nucleotides, Int)])
-> [(Nucleotides, Int)] -> [(Nucleotides, Int)]
forall a b. (a -> b) -> a -> b
$ [Nucleotides] -> [Int] -> [(Nucleotides, Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Word8 -> Nucleotides
Ns 0 ..] ([Int] -> [(Nucleotides, Int)]) -> [Int] -> [(Nucleotides, Int)]
forall a b. (a -> b) -> a -> b
$ Vector Int -> [Int]
forall a. Unbox a => Vector a -> [a]
U.toList Vector Int
accs of
(n0 :: Nucleotides
n0,q0 :: Int
q0) : (_,q1 :: Int
q1) : _ | Word8
qr Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
> 3 -> (Nucleotides
n0, Word8 -> Qual
Q Word8
qr)
where qr :: Word8
qr = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ (Int
q0Int -> Int -> Int
forall a. Num a => a -> a -> a
-Int
q1) Int -> Int -> Int
forall a. Ord a => a -> a -> a
`min` Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
maxq
_ -> (Nucleotides
nucsN, Word8 -> Qual
Q 0)
do_cheap_collapse :: Monad m => [BamRec] -> m ( Decision, [BamRec] )
do_cheap_collapse :: [BamRec] -> m (Decision, [BamRec])
do_cheap_collapse [ ] = (Decision, [BamRec]) -> m (Decision, [BamRec])
forall (f :: * -> *) a. Applicative f => a -> f a
pure ( BamRec -> Decision
Representative BamRec
nullBamRec, [] )
do_cheap_collapse [ b :: BamRec
b ] = (Decision, [BamRec]) -> m (Decision, [BamRec])
forall (f :: * -> *) a. Applicative f => a -> f a
pure ( BamRec -> Decision
Representative BamRec
b, [] )
do_cheap_collapse (b :: BamRec
b:bs :: [BamRec]
bs) = (Decision, [BamRec]) -> m (Decision, [BamRec])
forall (f :: * -> *) a. Applicative f => a -> f a
pure ( BamRec -> Decision
Representative (BamRec -> Decision) -> BamRec -> Decision
forall a b. (a -> b) -> a -> b
$ Int -> BamRec -> BamRec
replaceXP Int
new_xp BamRec
b0, [BamRec]
bx )
where
(b0 :: BamRec
b0, bx :: [BamRec]
bx) = (BamRec -> BamRec -> Ordering)
-> BamRec -> [BamRec] -> (BamRec, [BamRec])
forall a. (a -> a -> Ordering) -> a -> [a] -> (a, [a])
minViewBy ((BamRec -> Bytes) -> BamRec -> BamRec -> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing BamRec -> Bytes
b_qname) BamRec
b [BamRec]
bs
new_xp :: Int
new_xp = (Int -> BamRec -> Int) -> Int -> [BamRec] -> Int
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\a :: Int
a -> Int -> Int -> Int
oplus Int
a (Int -> Int) -> (BamRec -> Int) -> BamRec -> Int
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Int -> BamKey -> BamRec -> Int
extAsInt 1 "XP") (Int -> BamKey -> BamRec -> Int
extAsInt 1 "XP" BamRec
b) [BamRec]
bs
replaceXP :: Int -> BamRec -> BamRec
replaceXP :: Int -> BamRec -> BamRec
replaceXP new_xp :: Int
new_xp b0 :: BamRec
b0 = BamRec
b0 { b_exts :: Extensions
b_exts = BamKey -> Ext -> Extensions -> Extensions
updateE "XP" (Int -> Ext
Int Int
new_xp) (Extensions -> Extensions) -> Extensions -> Extensions
forall a b. (a -> b) -> a -> b
$ BamRec -> Extensions
b_exts BamRec
b0 }
oplus :: Int -> Int -> Int
_ oplus :: Int -> Int -> Int
`oplus` (-1) = -1
(-1) `oplus` _ = -1
a :: Int
a `oplus` b :: Int
b = Int
a Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
b
normalizeTo :: Bytes -> Int -> BamRec -> Either BamRec BamRec
normalizeTo :: Bytes -> Int -> BamRec -> Either BamRec BamRec
normalizeTo nm :: Bytes
nm l :: Int
l b :: BamRec
b = BamRec -> Either BamRec BamRec
forall b. b -> Either b b
lr (BamRec -> Either BamRec BamRec) -> BamRec -> Either BamRec BamRec
forall a b. (a -> b) -> a -> b
$ BamRec
b { b_pos :: Int
b_pos = BamRec -> Int
b_pos BamRec
b Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
l
, b_mpos :: Int
b_mpos = BamRec -> Int
b_mpos BamRec
b Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
l
, b_mapq :: Qual
b_mapq = if Bool
dups_are_fine then Word8 -> Qual
Q 37 else BamRec -> Qual
b_mapq BamRec
b
, b_exts :: Extensions
b_exts = if Bool
dups_are_fine then BamKey -> Extensions -> Extensions
deleteE "XA" (BamRec -> Extensions
b_exts BamRec
b) else BamRec -> Extensions
b_exts BamRec
b }
where
lr :: b -> Either b b
lr = if BamRec -> Int
b_pos BamRec
b Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
l then b -> Either b b
forall a b. a -> Either a b
Left else b -> Either b b
forall a b. b -> Either a b
Right
dups_are_fine :: Bool
dups_are_fine = Bytes -> Bool
all_match_XA (BamKey -> BamRec -> Bytes
extAsString "XA" BamRec
b)
all_match_XA :: Bytes -> Bool
all_match_XA s :: Bytes
s = case Char -> Bytes -> [Bytes]
T.split ';' Bytes
s of [xa1 :: Bytes
xa1, xa2 :: Bytes
xa2] | Bytes -> Bool
T.null Bytes
xa2 -> Bytes -> Bool
one_match_XA Bytes
xa1
[xa1 :: Bytes
xa1] -> Bytes -> Bool
one_match_XA Bytes
xa1
_ -> Bool
False
one_match_XA :: Bytes -> Bool
one_match_XA s :: Bytes
s = case Char -> Bytes -> [Bytes]
T.split ',' Bytes
s of (sq :: Bytes
sq:pos :: Bytes
pos:_) | Bytes
sq Bytes -> Bytes -> Bool
forall a. Eq a => a -> a -> Bool
== Bytes
nm -> Bytes -> Bool
pos_match_XA Bytes
pos ; _ -> Bool
False
pos_match_XA :: Bytes -> Bool
pos_match_XA s :: Bytes
s = case Bytes -> Maybe (Int, Bytes)
T.readInt Bytes
s of Just (p :: Int
p,z :: Bytes
z) | Bytes -> Bool
T.null Bytes
z -> Int -> Bool
int_match_XA Int
p ; _ -> Bool
False
int_match_XA :: Int -> Bool
int_match_XA p :: Int
p | Int
p Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= 0 = (Int
pInt -> Int -> Int
forall a. Num a => a -> a -> a
-1) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== BamRec -> Int
b_pos BamRec
b Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
l Bool -> Bool -> Bool
&& Bool -> Bool
not (BamRec -> Bool
isReversed BamRec
b)
| Bool
otherwise = (-Int
pInt -> Int -> Int
forall a. Num a => a -> a -> a
-1) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== BamRec -> Int
b_pos BamRec
b Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
l Bool -> Bool -> Bool
&& BamRec -> Bool
isReversed BamRec
b
wrapTo :: Int -> BamRec -> [Either BamRec BamRec]
wrapTo :: Int -> BamRec -> [Either BamRec BamRec]
wrapTo l :: Int
l b :: BamRec
b = if Bool
overhangs then [Either BamRec BamRec]
do_wrap else [BamRec -> Either BamRec BamRec
forall a b. b -> Either a b
Right BamRec
b]
where
overhangs :: Bool
overhangs = Bool -> Bool
not (BamRec -> Bool
isUnmapped BamRec
b) Bool -> Bool -> Bool
&& BamRec -> Int
b_pos BamRec
b Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
l Bool -> Bool -> Bool
&& Int
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< BamRec -> Int
b_pos BamRec
b Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Vector Cigar -> Int
forall (v :: * -> *). Vector v Cigar => v Cigar -> Int
alignedLength (BamRec -> Vector Cigar
b_cigar BamRec
b)
do_wrap :: [Either BamRec BamRec]
do_wrap = case Int -> ECig -> (ECig, ECig)
split_ecig (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- BamRec -> Int
b_pos BamRec
b) (ECig -> (ECig, ECig)) -> ECig -> (ECig, ECig)
forall a b. (a -> b) -> a -> b
$ Vector Cigar -> [MdOp] -> ECig
toECig (BamRec -> Vector Cigar
b_cigar BamRec
b) ([MdOp] -> Maybe [MdOp] -> [MdOp]
forall a. a -> Maybe a -> a
fromMaybe [] (Maybe [MdOp] -> [MdOp]) -> Maybe [MdOp] -> [MdOp]
forall a b. (a -> b) -> a -> b
$ BamRec -> Maybe [MdOp]
getMd BamRec
b) of
(left :: ECig
left,right :: ECig
right) -> [ BamRec -> Either BamRec BamRec
forall a b. b -> Either a b
Right (BamRec -> Either BamRec BamRec) -> BamRec -> Either BamRec BamRec
forall a b. (a -> b) -> a -> b
$ BamRec
b { b_cigar :: Vector Cigar
b_cigar = ECig -> Vector Cigar
toCigar ECig
left } BamRec -> ECig -> BamRec
`setMD` ECig
left
, BamRec -> Either BamRec BamRec
forall a b. a -> Either a b
Left (BamRec -> Either BamRec BamRec) -> BamRec -> Either BamRec BamRec
forall a b. (a -> b) -> a -> b
$ BamRec
b { b_cigar :: Vector Cigar
b_cigar = ECig -> Vector Cigar
toCigar ECig
right, b_pos :: Int
b_pos = 0 } BamRec -> ECig -> BamRec
`setMD` ECig
right ]
split_ecig :: Int -> ECig -> (ECig, ECig)
split_ecig :: Int -> ECig -> (ECig, ECig)
split_ecig _ WithMD = (ECig
WithMD, ECig
WithMD)
split_ecig _ WithoutMD = (ECig
WithoutMD, ECig
WithoutMD)
split_ecig 0 ecs :: ECig
ecs = (ECig -> ECig
mask_all ECig
ecs, ECig
ecs)
split_ecig i :: Int
i (Ins' n :: Int
n ecs :: ECig
ecs) = case Int -> ECig -> (ECig, ECig)
split_ecig Int
i ECig
ecs of (u :: ECig
u,v :: ECig
v) -> (Int -> ECig -> ECig
Ins' Int
n ECig
u, Int -> ECig -> ECig
SMa' Int
n ECig
v)
split_ecig i :: Int
i (SMa' n :: Int
n ecs :: ECig
ecs) = case Int -> ECig -> (ECig, ECig)
split_ecig Int
i ECig
ecs of (u :: ECig
u,v :: ECig
v) -> (Int -> ECig -> ECig
SMa' Int
n ECig
u, Int -> ECig -> ECig
SMa' Int
n ECig
v)
split_ecig i :: Int
i (HMa' n :: Int
n ecs :: ECig
ecs) = case Int -> ECig -> (ECig, ECig)
split_ecig Int
i ECig
ecs of (u :: ECig
u,v :: ECig
v) -> (Int -> ECig -> ECig
HMa' Int
n ECig
u, Int -> ECig -> ECig
HMa' Int
n ECig
v)
split_ecig i :: Int
i (Pad' n :: Int
n ecs :: ECig
ecs) = case Int -> ECig -> (ECig, ECig)
split_ecig Int
i ECig
ecs of (u :: ECig
u,v :: ECig
v) -> (Int -> ECig -> ECig
Pad' Int
n ECig
u, ECig
v)
split_ecig i :: Int
i (Mat' n :: Int
n ecs :: ECig
ecs)
| Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
n = case Int -> ECig -> (ECig, ECig)
split_ecig (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
n) ECig
ecs of (u :: ECig
u,v :: ECig
v) -> (Int -> ECig -> ECig
Mat' Int
n ECig
u, Int -> ECig -> ECig
SMa' Int
n ECig
v)
| Bool
otherwise = (Int -> ECig -> ECig
Mat' Int
i (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ Int -> ECig -> ECig
SMa' (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
i) (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ ECig -> ECig
mask_all ECig
ecs, Int -> ECig -> ECig
SMa' Int
i (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ Int -> ECig -> ECig
Mat' (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
i) ECig
ecs)
split_ecig i :: Int
i (Rep' x :: Nucleotides
x ecs :: ECig
ecs) = case Int -> ECig -> (ECig, ECig)
split_ecig (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-1) ECig
ecs of (u :: ECig
u,v :: ECig
v) -> (Nucleotides -> ECig -> ECig
Rep' Nucleotides
x ECig
u, Int -> ECig -> ECig
SMa' 1 ECig
v)
split_ecig i :: Int
i (Del' x :: Nucleotides
x ecs :: ECig
ecs) = case Int -> ECig -> (ECig, ECig)
split_ecig (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-1) ECig
ecs of (u :: ECig
u,v :: ECig
v) -> (Nucleotides -> ECig -> ECig
Del' Nucleotides
x ECig
u, ECig
v)
split_ecig i :: Int
i (Nop' n :: Int
n ecs :: ECig
ecs)
| Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
n = case Int -> ECig -> (ECig, ECig)
split_ecig (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
n) ECig
ecs of (u :: ECig
u,v :: ECig
v) -> (Int -> ECig -> ECig
Nop' Int
n ECig
u, ECig
v)
| Bool
otherwise = (Int -> ECig -> ECig
Nop' Int
i (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ ECig -> ECig
mask_all ECig
ecs, Int -> ECig -> ECig
Nop' (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
i) ECig
ecs)
mask_all :: ECig -> ECig
mask_all :: ECig -> ECig
mask_all WithMD = ECig
WithMD
mask_all WithoutMD = ECig
WithoutMD
mask_all (Nop' _ ec :: ECig
ec) = ECig -> ECig
mask_all ECig
ec
mask_all (HMa' _ ec :: ECig
ec) = ECig -> ECig
mask_all ECig
ec
mask_all (Pad' _ ec :: ECig
ec) = ECig -> ECig
mask_all ECig
ec
mask_all (Del' _ ec :: ECig
ec) = ECig -> ECig
mask_all ECig
ec
mask_all (Rep' _ ec :: ECig
ec) = Int -> ECig -> ECig
SMa' 1 (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ ECig -> ECig
mask_all ECig
ec
mask_all (Mat' n :: Int
n ec :: ECig
ec) = Int -> ECig -> ECig
SMa' Int
n (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ ECig -> ECig
mask_all ECig
ec
mask_all (Ins' n :: Int
n ec :: ECig
ec) = Int -> ECig -> ECig
SMa' Int
n (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ ECig -> ECig
mask_all ECig
ec
mask_all (SMa' n :: Int
n ec :: ECig
ec) = Int -> ECig -> ECig
SMa' Int
n (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ ECig -> ECig
mask_all ECig
ec
data ECig = WithMD
| WithoutMD
| Mat' Int ECig
| Rep' Nucleotides ECig
| Ins' Int ECig
| Del' Nucleotides ECig
| Nop' Int ECig
| SMa' Int ECig
| HMa' Int ECig
| Pad' Int ECig
toECig :: W.Vector Cigar -> [MdOp] -> ECig
toECig :: Vector Cigar -> [MdOp] -> ECig
toECig = [Cigar] -> [MdOp] -> ECig
go ([Cigar] -> [MdOp] -> ECig)
-> (Vector Cigar -> [Cigar]) -> Vector Cigar -> [MdOp] -> ECig
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Vector Cigar -> [Cigar]
forall a. Storable a => Vector a -> [a]
W.toList
where
go :: [Cigar] -> [MdOp] -> ECig
go cs :: [Cigar]
cs (MdNum 0:mds :: [MdOp]
mds) = [Cigar] -> [MdOp] -> ECig
go [Cigar]
cs [MdOp]
mds
go cs :: [Cigar]
cs (MdDel []:mds :: [MdOp]
mds) = [Cigar] -> [MdOp] -> ECig
go [Cigar]
cs [MdOp]
mds
go (_:*0 :cs :: [Cigar]
cs) mds :: [MdOp]
mds = [Cigar] -> [MdOp] -> ECig
go [Cigar]
cs [MdOp]
mds
go [ ] [ ] = ECig
WithMD
go [ ] _ = ECig
WithoutMD
go (Mat :* n :: Int
n : cs :: [Cigar]
cs) (MdRep x :: Nucleotides
x:mds :: [MdOp]
mds) = Nucleotides -> ECig -> ECig
Rep' Nucleotides
x (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ [Cigar] -> [MdOp] -> ECig
go (CigOp
Mat CigOp -> Int -> Cigar
:* (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-1) Cigar -> [Cigar] -> [Cigar]
forall a. a -> [a] -> [a]
: [Cigar]
cs) [MdOp]
mds
go (Mat :* n :: Int
n : cs :: [Cigar]
cs) (MdNum m :: Int
m:mds :: [MdOp]
mds)
| Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
m = Int -> ECig -> ECig
Mat' Int
n (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ [Cigar] -> [MdOp] -> ECig
go [Cigar]
cs (Int -> MdOp
MdNum (Int
mInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
n)MdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
:[MdOp]
mds)
| Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
m = Int -> ECig -> ECig
Mat' Int
m (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ [Cigar] -> [MdOp] -> ECig
go (CigOp
Mat CigOp -> Int -> Cigar
:* (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
m) Cigar -> [Cigar] -> [Cigar]
forall a. a -> [a] -> [a]
: [Cigar]
cs) [MdOp]
mds
| Bool
otherwise = Int -> ECig -> ECig
Mat' Int
n (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ [Cigar] -> [MdOp] -> ECig
go [Cigar]
cs [MdOp]
mds
go (Mat :* n :: Int
n : cs :: [Cigar]
cs) _ = Int -> ECig -> ECig
Mat' Int
n (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ [Cigar] -> ECig
go' [Cigar]
cs
go (Ins :* n :: Int
n : cs :: [Cigar]
cs) mds :: [MdOp]
mds = Int -> ECig -> ECig
Ins' Int
n (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ [Cigar] -> [MdOp] -> ECig
go [Cigar]
cs [MdOp]
mds
go (Del :* n :: Int
n : cs :: [Cigar]
cs) (MdDel (x :: Nucleotides
x:xs :: [Nucleotides]
xs):mds :: [MdOp]
mds) = Nucleotides -> ECig -> ECig
Del' Nucleotides
x (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ [Cigar] -> [MdOp] -> ECig
go (CigOp
Del CigOp -> Int -> Cigar
:* (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-1) Cigar -> [Cigar] -> [Cigar]
forall a. a -> [a] -> [a]
: [Cigar]
cs) ([Nucleotides] -> MdOp
MdDel [Nucleotides]
xsMdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
:[MdOp]
mds)
go (Del :* n :: Int
n : cs :: [Cigar]
cs) _ = Nucleotides -> ECig -> ECig
Del' Nucleotides
nucsN (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ [Cigar] -> ECig
go' (CigOp
Del CigOp -> Int -> Cigar
:* (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-1) Cigar -> [Cigar] -> [Cigar]
forall a. a -> [a] -> [a]
: [Cigar]
cs)
go (Nop :* n :: Int
n : cs :: [Cigar]
cs) mds :: [MdOp]
mds = Int -> ECig -> ECig
Nop' Int
n (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ [Cigar] -> [MdOp] -> ECig
go [Cigar]
cs [MdOp]
mds
go (SMa :* n :: Int
n : cs :: [Cigar]
cs) mds :: [MdOp]
mds = Int -> ECig -> ECig
SMa' Int
n (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ [Cigar] -> [MdOp] -> ECig
go [Cigar]
cs [MdOp]
mds
go (HMa :* n :: Int
n : cs :: [Cigar]
cs) mds :: [MdOp]
mds = Int -> ECig -> ECig
HMa' Int
n (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ [Cigar] -> [MdOp] -> ECig
go [Cigar]
cs [MdOp]
mds
go (Pad :* n :: Int
n : cs :: [Cigar]
cs) mds :: [MdOp]
mds = Int -> ECig -> ECig
Pad' Int
n (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ [Cigar] -> [MdOp] -> ECig
go [Cigar]
cs [MdOp]
mds
go' :: [Cigar] -> ECig
go' (_ :* 0 : cs :: [Cigar]
cs) = [Cigar] -> ECig
go' [Cigar]
cs
go' [ ] = ECig
WithoutMD
go' (Mat :* n :: Int
n : cs :: [Cigar]
cs) = Int -> ECig -> ECig
Mat' Int
n (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ [Cigar] -> ECig
go' [Cigar]
cs
go' (Ins :* n :: Int
n : cs :: [Cigar]
cs) = Int -> ECig -> ECig
Ins' Int
n (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ [Cigar] -> ECig
go' [Cigar]
cs
go' (Del :* n :: Int
n : cs :: [Cigar]
cs) = Nucleotides -> ECig -> ECig
Del' Nucleotides
nucsN (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ [Cigar] -> ECig
go' (CigOp
Del CigOp -> Int -> Cigar
:* (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-1) Cigar -> [Cigar] -> [Cigar]
forall a. a -> [a] -> [a]
: [Cigar]
cs)
go' (Nop :* n :: Int
n : cs :: [Cigar]
cs) = Int -> ECig -> ECig
Nop' Int
n (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ [Cigar] -> ECig
go' [Cigar]
cs
go' (SMa :* n :: Int
n : cs :: [Cigar]
cs) = Int -> ECig -> ECig
SMa' Int
n (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ [Cigar] -> ECig
go' [Cigar]
cs
go' (HMa :* n :: Int
n : cs :: [Cigar]
cs) = Int -> ECig -> ECig
HMa' Int
n (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ [Cigar] -> ECig
go' [Cigar]
cs
go' (Pad :* n :: Int
n : cs :: [Cigar]
cs) = Int -> ECig -> ECig
Pad' Int
n (ECig -> ECig) -> ECig -> ECig
forall a b. (a -> b) -> a -> b
$ [Cigar] -> ECig
go' [Cigar]
cs
toCigar :: ECig -> W.Vector Cigar
toCigar :: ECig -> Vector Cigar
toCigar = [Cigar] -> Vector Cigar
forall (v :: * -> *) a. Vector v a => [a] -> v a
V.fromList ([Cigar] -> Vector Cigar)
-> (ECig -> [Cigar]) -> ECig -> Vector Cigar
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ECig -> [Cigar]
go
where
go :: ECig -> [Cigar]
go WithMD = []
go WithoutMD = []
go (Ins' n :: Int
n ecs :: ECig
ecs) = CigOp
Ins CigOp -> Int -> Cigar
:* Int
n Cigar -> [Cigar] -> [Cigar]
forall a. a -> [a] -> [a]
: ECig -> [Cigar]
go ECig
ecs
go (Nop' n :: Int
n ecs :: ECig
ecs) = CigOp
Nop CigOp -> Int -> Cigar
:* Int
n Cigar -> [Cigar] -> [Cigar]
forall a. a -> [a] -> [a]
: ECig -> [Cigar]
go ECig
ecs
go (HMa' n :: Int
n ecs :: ECig
ecs) = CigOp
HMa CigOp -> Int -> Cigar
:* Int
n Cigar -> [Cigar] -> [Cigar]
forall a. a -> [a] -> [a]
: ECig -> [Cigar]
go ECig
ecs
go (Pad' n :: Int
n ecs :: ECig
ecs) = CigOp
Pad CigOp -> Int -> Cigar
:* Int
n Cigar -> [Cigar] -> [Cigar]
forall a. a -> [a] -> [a]
: ECig -> [Cigar]
go ECig
ecs
go (SMa' n :: Int
n ecs :: ECig
ecs) = Int -> ECig -> [Cigar]
go_sma Int
n ECig
ecs
go (Mat' n :: Int
n ecs :: ECig
ecs) = Int -> ECig -> [Cigar]
go_mat Int
n ECig
ecs
go (Rep' _ ecs :: ECig
ecs) = Int -> ECig -> [Cigar]
go_mat 1 ECig
ecs
go (Del' _ ecs :: ECig
ecs) = Int -> ECig -> [Cigar]
go_del 1 ECig
ecs
go_sma :: Int -> ECig -> [Cigar]
go_sma !Int
n (SMa' m :: Int
m ecs :: ECig
ecs) = Int -> ECig -> [Cigar]
go_sma (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
m) ECig
ecs
go_sma !Int
n ecs :: ECig
ecs = CigOp
SMa CigOp -> Int -> Cigar
:* Int
n Cigar -> [Cigar] -> [Cigar]
forall a. a -> [a] -> [a]
: ECig -> [Cigar]
go ECig
ecs
go_mat :: Int -> ECig -> [Cigar]
go_mat !Int
n (Mat' m :: Int
m ecs :: ECig
ecs) = Int -> ECig -> [Cigar]
go_mat (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
m) ECig
ecs
go_mat !Int
n (Rep' _ ecs :: ECig
ecs) = Int -> ECig -> [Cigar]
go_mat (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+1) ECig
ecs
go_mat !Int
n ecs :: ECig
ecs = CigOp
Mat CigOp -> Int -> Cigar
:* Int
n Cigar -> [Cigar] -> [Cigar]
forall a. a -> [a] -> [a]
: ECig -> [Cigar]
go ECig
ecs
go_del :: Int -> ECig -> [Cigar]
go_del !Int
n (Del' _ ecs :: ECig
ecs) = Int -> ECig -> [Cigar]
go_del (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+1) ECig
ecs
go_del !Int
n ecs :: ECig
ecs = CigOp
Del CigOp -> Int -> Cigar
:* Int
n Cigar -> [Cigar] -> [Cigar]
forall a. a -> [a] -> [a]
: ECig -> [Cigar]
go ECig
ecs
setMD :: BamRec -> ECig -> BamRec
setMD :: BamRec -> ECig -> BamRec
setMD b :: BamRec
b ec :: ECig
ec = case ECig -> Maybe [MdOp]
go ECig
ec of
Just md :: [MdOp]
md -> BamRec
b { b_exts :: Extensions
b_exts = BamKey -> Ext -> Extensions -> Extensions
updateE "MD" (Bytes -> Ext
Text (Bytes -> Ext) -> Bytes -> Ext
forall a b. (a -> b) -> a -> b
$ [MdOp] -> Bytes
showMd [MdOp]
md) (BamRec -> Extensions
b_exts BamRec
b) }
Nothing -> BamRec
b { b_exts :: Extensions
b_exts = BamKey -> Extensions -> Extensions
deleteE "MD" (BamRec -> Extensions
b_exts BamRec
b) }
where
go :: ECig -> Maybe [MdOp]
go WithMD = [MdOp] -> Maybe [MdOp]
forall a. a -> Maybe a
Just []
go WithoutMD = Maybe [MdOp]
forall a. Maybe a
Nothing
go (Ins' _ ecs :: ECig
ecs) = ECig -> Maybe [MdOp]
go ECig
ecs
go (Nop' _ ecs :: ECig
ecs) = ECig -> Maybe [MdOp]
go ECig
ecs
go (SMa' _ ecs :: ECig
ecs) = ECig -> Maybe [MdOp]
go ECig
ecs
go (HMa' _ ecs :: ECig
ecs) = ECig -> Maybe [MdOp]
go ECig
ecs
go (Pad' _ ecs :: ECig
ecs) = ECig -> Maybe [MdOp]
go ECig
ecs
go (Mat' n :: Int
n ecs :: ECig
ecs) = (if Int
n Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== 0 then Maybe [MdOp] -> Maybe [MdOp]
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id else ([MdOp] -> [MdOp]) -> Maybe [MdOp] -> Maybe [MdOp]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Int -> [MdOp] -> [MdOp]
addNum Int
n)) (Maybe [MdOp] -> Maybe [MdOp]) -> Maybe [MdOp] -> Maybe [MdOp]
forall a b. (a -> b) -> a -> b
$ ECig -> Maybe [MdOp]
go ECig
ecs
go (Rep' x :: Nucleotides
x ecs :: ECig
ecs) = (if Nucleotides -> Bool
isGap Nucleotides
x then Maybe [MdOp] -> Maybe [MdOp]
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id else ([MdOp] -> [MdOp]) -> Maybe [MdOp] -> Maybe [MdOp]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Nucleotides -> [MdOp] -> [MdOp]
addRep Nucleotides
x)) (Maybe [MdOp] -> Maybe [MdOp]) -> Maybe [MdOp] -> Maybe [MdOp]
forall a b. (a -> b) -> a -> b
$ ECig -> Maybe [MdOp]
go ECig
ecs
go (Del' x :: Nucleotides
x ecs :: ECig
ecs) = (if Nucleotides -> Bool
isGap Nucleotides
x then Maybe [MdOp] -> Maybe [MdOp]
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id else ([MdOp] -> [MdOp]) -> Maybe [MdOp] -> Maybe [MdOp]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Nucleotides -> [MdOp] -> [MdOp]
addDel Nucleotides
x)) (Maybe [MdOp] -> Maybe [MdOp]) -> Maybe [MdOp] -> Maybe [MdOp]
forall a b. (a -> b) -> a -> b
$ ECig -> Maybe [MdOp]
go ECig
ecs
addNum :: Int -> [MdOp] -> [MdOp]
addNum n :: Int
n (MdNum m :: Int
m : mds :: [MdOp]
mds) = Int -> MdOp
MdNum (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
m) MdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
: [MdOp]
mds
addNum n :: Int
n mds :: [MdOp]
mds = Int -> MdOp
MdNum Int
n MdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
: [MdOp]
mds
addRep :: Nucleotides -> [MdOp] -> [MdOp]
addRep x :: Nucleotides
x mds :: [MdOp]
mds = Nucleotides -> MdOp
MdRep Nucleotides
x MdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
: [MdOp]
mds
addDel :: Nucleotides -> [MdOp] -> [MdOp]
addDel x :: Nucleotides
x (MdDel y :: [Nucleotides]
y : mds :: [MdOp]
mds) = [Nucleotides] -> MdOp
MdDel (Nucleotides
xNucleotides -> [Nucleotides] -> [Nucleotides]
forall a. a -> [a] -> [a]
:[Nucleotides]
y) MdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
: [MdOp]
mds
addDel x :: Nucleotides
x mds :: [MdOp]
mds = [Nucleotides] -> MdOp
MdDel [Nucleotides
x] MdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
: [MdOp]
mds