module Bio.ABI.Clean
( Cleanable (..)
, Thresholds (..)
) where
import Bio.Sequence (mean, meanInRange)
import qualified Bio.Sequence as S (drop, length, reverse, tail)
import Bio.Sequence.Basecalled (BasecalledSequence)
import Control.Monad (join)
class Cleanable a where
clean :: a -> Maybe a
clean = cleanWith defaultThresholds
cleanWith :: Thresholds -> a -> Maybe a
data Thresholds
= Thresholds { frameSize :: Int
, edgeThreshold :: Double
, innerThreshold :: Double
}
deriving (Eq, Show)
defaultThresholds :: Thresholds
defaultThresholds = Thresholds 10 20 30
instance Cleanable BasecalledSequence where
cleanWith thr input = if fmap (checkInner thr) fromBoth == Just True
then fromBoth
else Nothing
where
fromLeft = cutEdge defaultThresholds input
fromBoth = fmap S.reverse
. join
$ cutEdge defaultThresholds
. S.reverse
<$> fromLeft
checkInner :: Thresholds -> BasecalledSequence -> Bool
checkInner Thresholds{..} = (> innerThreshold) . mean
cutEdge :: Thresholds -> BasecalledSequence -> Maybe BasecalledSequence
cutEdge t@Thresholds{..} sequ | S.length sequ < frameSize = Just sequ
| meanInR < edgeThreshold && S.length sequ > 1 = cutEdge t $ S.tail sequ
| S.length sequ > frameSize = Just $ S.drop frameSize sequ
| otherwise = Nothing
where
meanInR = meanInRange sequ (0, frameSize - 1)