module Bio.Bam.Filter (
filterPairs, QualFilter,
complexSimple, complexEntropy,
qualityAverage, qualityMinimum,
qualityFromOldIllumina, qualityFromNewIllumina
) where
import Bio.Bam.Header
import Bio.Bam.Rec
import Bio.Base
import Bio.Iteratee
import Data.Bits
import Prelude
import qualified Data.Vector.Generic as V
filterPairs :: Monad m => (BamRec -> [BamRec])
-> (Maybe BamRec -> Maybe BamRec -> [BamRec])
-> Enumeratee [BamRec] [BamRec] m a
filterPairs ps pp = eneeCheckIfDone step
where
step k = tryHead >>= step' k
step' k Nothing = return $ liftI k
step' k (Just b)
| isPaired b = tryHead >>= step'' k b
| otherwise = case ps b of [] -> step k ; b' -> eneeCheckIfDone step . k $ Chunk b'
step'' k b Nothing = case pp (Just b) Nothing of
[] -> return $ liftI k
b' -> return $ k $ Chunk b'
step'' k b (Just c)
| b_rname b /= b_rname c || not (isPaired c) =
let b' = if isSecondMate b then pp Nothing (Just b) else pp (Just b) Nothing
in case b' of [] -> step' k (Just c)
_ -> eneeCheckIfDone (\k' -> step' k' (Just c)) . k $ Chunk b'
| isFirstMate c && isSecondMate b = step''' k c b
| otherwise = step''' k b c
step''' k b c = case pp (Just b) (Just c) of [] -> step k
b' -> eneeCheckIfDone step . k $ Chunk b'
type QualFilter = BamRec -> BamRec
{-# INLINE count #-}
count :: (V.Vector v a, Eq a) => a -> v a -> Int
count x = V.foldl' (\acc y -> if x == y then acc+1 else acc) 0
{-# INLINE complexSimple #-}
complexSimple :: Double -> QualFilter
complexSimple r b = if p then b else b'
where
b' = setQualFlag 'C' $ b { b_flag = b_flag b .|. flagFailsQC }
p = let counts = [ count x $ b_seq b | x <- properBases ]
lim = floor $ r * fromIntegral (sum counts)
in all (<= lim) counts
{-# INLINE complexEntropy #-}
complexEntropy :: Double -> QualFilter
complexEntropy r b = if p then b else b'
where
b' = setQualFlag 'C' $ b { b_flag = b_flag b .|. flagFailsQC }
p = ent >= r * total
counts = [ count x $ b_seq b | x <- properBases ]
total = fromIntegral $ V.length $ b_seq b
ent = sum [ fromIntegral c * log (total / fromIntegral c) | c <- counts, c /= 0 ] / log 2
{-# INLINE qualityAverage #-}
qualityAverage :: Int -> QualFilter
qualityAverage q b = if p then b else b'
where
b' = setQualFlag 'Q' $ b { b_flag = b_flag b .|. flagFailsQC }
p = let total = V.foldl' (\a x -> a + fromIntegral (unQ x)) 0 $ b_qual b
in total >= q * V.length (b_qual b)
{-# INLINE qualityMinimum #-}
qualityMinimum :: Int -> Qual -> QualFilter
qualityMinimum n (Q q) b = if p then b else b'
where
b' = setQualFlag 'Q' $ b { b_flag = b_flag b .|. flagFailsQC }
p = V.length (V.filter (< Q q) (b_qual b)) <= n
qualityFromOldIllumina :: BamRec -> BamRec
qualityFromOldIllumina b = b { b_qual = V.map conv $ b_qual b }
where
conv (Q s) = let s' :: Double
s' = exp $ log 10 * (fromIntegral s - 31) / (-10)
p = s' / (1+s')
q = - 10 * log p / log 10
in Q (round q)
qualityFromNewIllumina :: BamRec -> BamRec
qualityFromNewIllumina b = b { b_qual = V.map (Q . subtract 31 . unQ) $ b_qual b }