module Bio.Bam.Filter (
filterPairs, QualFilter,
complexSimple, complexEntropy,
qualityAverage, qualityMinimum,
qualityFromOldIllumina, qualityFromNewIllumina
) where
import Bio.Bam.Header
import Bio.Bam.Rec
import Bio.Prelude
import Bio.Streaming
import qualified Data.Vector.Generic as V
filterPairs :: Monad m => (BamRec -> [BamRec])
-> (Maybe BamRec -> Maybe BamRec -> [BamRec])
-> Stream (Of BamRec) m r -> Stream (Of BamRec) m r
filterPairs ps pp = step
where
step = lift . inspect >=> either pure step'
step' (b :> s)
| isPaired b = lift (inspect s) >>= step'' b
| otherwise = each (ps b) >> step s
step'' b (Left r) = each (pp (Just b) Nothing) >> pure r
step'' b (Right (c :> s))
| b_rname b /= b_rname c || not (isPaired c) =
let b' = if isSecondMate b then pp Nothing (Just b) else pp (Just b) Nothing
in each b' >> step' (c :> s)
| isFirstMate c && isSecondMate b = step''' c b s
| otherwise = step''' b c s
step''' b c s = each (pp (Just b) (Just c)) >> step s
type QualFilter = BamRec -> BamRec
{-# INLINE count #-}
count :: (V.Vector v a, Eq a) => a -> v a -> Int
count x = V.foldl' (\acc y -> if x == y then acc+1 else acc) 0
{-# INLINE complexSimple #-}
complexSimple :: Double -> QualFilter
complexSimple r b = if p then b else b'
where
b' = setQualFlag 'C' $ b { b_flag = b_flag b .|. flagFailsQC }
p = let counts = [ count x $ b_seq b | x <- properBases ]
lim = floor $ r * fromIntegral (sum counts)
in all (<= lim) counts
{-# INLINE complexEntropy #-}
complexEntropy :: Double -> QualFilter
complexEntropy r b = if p then b else b'
where
b' = setQualFlag 'C' $ b { b_flag = b_flag b .|. flagFailsQC }
p = ent >= r * total
counts = [ count x $ b_seq b | x <- properBases ]
total = fromIntegral $ V.length $ b_seq b
ent = sum [ fromIntegral c * log (total / fromIntegral c) | c <- counts, c /= 0 ] / log 2
{-# INLINE qualityAverage #-}
qualityAverage :: Int -> QualFilter
qualityAverage q b = if p then b else b'
where
b' = setQualFlag 'Q' $ b { b_flag = b_flag b .|. flagFailsQC }
p = case b_qual b of Nothing -> True
Just qs -> let total = V.foldl' (\a x -> a + fromIntegral (unQ x)) 0 qs
in total >= q * V.length qs
{-# INLINE qualityMinimum #-}
qualityMinimum :: Int -> Qual -> QualFilter
qualityMinimum n (Q q) b = if p then b else b'
where
b' = setQualFlag 'Q' $ b { b_flag = b_flag b .|. flagFailsQC }
p = case b_qual b of Nothing -> True
Just qs -> V.length (V.filter (< Q q) qs) <= n
qualityFromOldIllumina :: BamRec -> BamRec
qualityFromOldIllumina b = b { b_qual = V.map conv <$> b_qual b }
where
conv (Q s) = let s' :: Double
s' = exp $ log 10 * (fromIntegral s - 31) / (-10)
p = s' / (1+s')
q = - 10 * log p / log 10
in Q (round q)
qualityFromNewIllumina :: BamRec -> BamRec
qualityFromNewIllumina b = b { b_qual = V.map (Q . subtract 31 . unQ) <$> b_qual b }