module Bio.ABI.Clean
( Cleanable (..)
, Thresholds (..)
, defaultThresholds
) where
import Control.Monad (guard, join)
import qualified Data.Vector as V
import Bio.Sequence (mean, meanInRange)
import qualified Bio.Sequence as S (drop, length, reverse, tail, take)
import Bio.Sequence.Basecalled (BasecalledSequence, BasecalledSequenceWithRawData (..))
class Cleanable a where
clean :: a -> Maybe a
clean = forall a. Cleanable a => Thresholds -> a -> Maybe a
cleanWith Thresholds
defaultThresholds
cleanWith :: Thresholds -> a -> Maybe a
data Thresholds
= Thresholds
{ Thresholds -> Int
frameSize :: Int
, Thresholds -> Double
edgeThreshold :: Double
, Thresholds -> Double
innerThreshold :: Double
}
deriving (Thresholds -> Thresholds -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Thresholds -> Thresholds -> Bool
$c/= :: Thresholds -> Thresholds -> Bool
== :: Thresholds -> Thresholds -> Bool
$c== :: Thresholds -> Thresholds -> Bool
Eq, Int -> Thresholds -> ShowS
[Thresholds] -> ShowS
Thresholds -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Thresholds] -> ShowS
$cshowList :: [Thresholds] -> ShowS
show :: Thresholds -> String
$cshow :: Thresholds -> String
showsPrec :: Int -> Thresholds -> ShowS
$cshowsPrec :: Int -> Thresholds -> ShowS
Show)
defaultThresholds :: Thresholds
defaultThresholds :: Thresholds
defaultThresholds = Int -> Double -> Double -> Thresholds
Thresholds Int
10 Double
20 Double
30
instance Cleanable BasecalledSequence where
cleanWith :: Thresholds -> BasecalledSequence -> Maybe BasecalledSequence
cleanWith Thresholds
thr BasecalledSequence
input = do
BasecalledSequence
cut <- Maybe BasecalledSequence
fromBoth
forall (f :: * -> *). Alternative f => Bool -> f ()
guard forall a b. (a -> b) -> a -> b
$ Thresholds -> BasecalledSequence -> Bool
checkInner Thresholds
thr BasecalledSequence
cut
forall (m :: * -> *) a. Monad m => a -> m a
return BasecalledSequence
cut
where
fromLeft :: Maybe BasecalledSequence
fromLeft = Thresholds -> BasecalledSequence -> Maybe BasecalledSequence
doCutEdge Thresholds
thr BasecalledSequence
input
fromBoth :: Maybe BasecalledSequence
fromBoth = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall s. IsSequence s => s -> s
S.reverse
forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (m :: * -> *) a. Monad m => m (m a) -> m a
join
forall a b. (a -> b) -> a -> b
$ Thresholds -> BasecalledSequence -> Maybe BasecalledSequence
doCutEdge Thresholds
thr
forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall s. IsSequence s => s -> s
S.reverse
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe BasecalledSequence
fromLeft
instance Cleanable BasecalledSequenceWithRawData where
cleanWith :: Thresholds
-> BasecalledSequenceWithRawData
-> Maybe BasecalledSequenceWithRawData
cleanWith Thresholds
thr input :: BasecalledSequenceWithRawData
input@BasecalledSequenceWithRawData{Vector Int
Vector Int16
BasecalledSequence
bsPeakLocations :: BasecalledSequenceWithRawData -> Vector Int
bsRawC :: BasecalledSequenceWithRawData -> Vector Int16
bsRawT :: BasecalledSequenceWithRawData -> Vector Int16
bsRawA :: BasecalledSequenceWithRawData -> Vector Int16
bsRawG :: BasecalledSequenceWithRawData -> Vector Int16
bsSequence :: BasecalledSequenceWithRawData -> BasecalledSequence
bsPeakLocations :: Vector Int
bsRawC :: Vector Int16
bsRawT :: Vector Int16
bsRawA :: Vector Int16
bsRawG :: Vector Int16
bsSequence :: BasecalledSequence
..} = do
Int
toDropLeft <- Thresholds -> BasecalledSequence -> Maybe Int
cutEdge Thresholds
thr BasecalledSequence
bsSequence
let leftDroppedSequ :: BasecalledSequence
leftDroppedSequ = forall s. ContainsNoMarking s => Int -> s -> s
S.drop Int
toDropLeft BasecalledSequence
bsSequence
let leftDroppedPloc :: Vector Int
leftDroppedPloc = forall a. Int -> Vector a -> Vector a
V.drop Int
toDropLeft Vector Int
bsPeakLocations
Int
toDropRight <- Thresholds -> BasecalledSequence -> Maybe Int
cutEdge Thresholds
thr forall a b. (a -> b) -> a -> b
$ forall s. IsSequence s => s -> s
S.reverse BasecalledSequence
leftDroppedSequ
let rightDroppedSequ :: BasecalledSequence
rightDroppedSequ = forall s. ContainsNoMarking s => Int -> s -> s
S.take (forall s. IsSequence s => s -> Int
S.length BasecalledSequence
leftDroppedSequ forall a. Num a => a -> a -> a
- Int
toDropRight) BasecalledSequence
leftDroppedSequ
let rightDroppedPloc :: Vector Int
rightDroppedPloc = forall a. Int -> Vector a -> Vector a
V.take (forall a. Vector a -> Int
V.length Vector Int
leftDroppedPloc forall a. Num a => a -> a -> a
- Int
toDropRight) Vector Int
leftDroppedPloc
forall (f :: * -> *). Alternative f => Bool -> f ()
guard forall a b. (a -> b) -> a -> b
$ Thresholds -> BasecalledSequence -> Bool
checkInner Thresholds
thr BasecalledSequence
rightDroppedSequ
forall (m :: * -> *) a. Monad m => a -> m a
return BasecalledSequenceWithRawData
input { bsSequence :: BasecalledSequence
bsSequence = BasecalledSequence
rightDroppedSequ, bsPeakLocations :: Vector Int
bsPeakLocations = Vector Int
rightDroppedPloc }
checkInner :: Thresholds -> BasecalledSequence -> Bool
checkInner :: Thresholds -> BasecalledSequence -> Bool
checkInner Thresholds{Double
Int
innerThreshold :: Double
edgeThreshold :: Double
frameSize :: Int
innerThreshold :: Thresholds -> Double
edgeThreshold :: Thresholds -> Double
frameSize :: Thresholds -> Int
..} = (forall a. Ord a => a -> a -> Bool
> Double
innerThreshold) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall s. ContainsWeight s => s -> Double
mean
doCutEdge :: Thresholds -> BasecalledSequence -> Maybe BasecalledSequence
doCutEdge :: Thresholds -> BasecalledSequence -> Maybe BasecalledSequence
doCutEdge Thresholds
t BasecalledSequence
sequ = do
Int
toDrop <- Thresholds -> BasecalledSequence -> Maybe Int
cutEdge Thresholds
t BasecalledSequence
sequ
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall s. ContainsNoMarking s => Int -> s -> s
S.drop Int
toDrop BasecalledSequence
sequ
cutEdge :: Thresholds -> BasecalledSequence -> Maybe Int
cutEdge :: Thresholds -> BasecalledSequence -> Maybe Int
cutEdge t :: Thresholds
t@Thresholds{Double
Int
innerThreshold :: Double
edgeThreshold :: Double
frameSize :: Int
innerThreshold :: Thresholds -> Double
edgeThreshold :: Thresholds -> Double
frameSize :: Thresholds -> Int
..} BasecalledSequence
sequ | forall s. IsSequence s => s -> Int
S.length BasecalledSequence
sequ forall a. Ord a => a -> a -> Bool
< Int
frameSize = forall a. a -> Maybe a
Just Int
0
| Double
meanInR forall a. Ord a => a -> a -> Bool
< Double
edgeThreshold Bool -> Bool -> Bool
&& forall s. IsSequence s => s -> Int
S.length BasecalledSequence
sequ forall a. Ord a => a -> a -> Bool
> Int
1 = (Int
1forall a. Num a => a -> a -> a
+) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Thresholds -> BasecalledSequence -> Maybe Int
cutEdge Thresholds
t (forall s. ContainsNoMarking s => s -> s
S.tail BasecalledSequence
sequ)
| forall s. IsSequence s => s -> Int
S.length BasecalledSequence
sequ forall a. Ord a => a -> a -> Bool
> Int
frameSize = forall a. a -> Maybe a
Just Int
frameSize
| Bool
otherwise = forall a. Maybe a
Nothing
where
meanInR :: Double
meanInR = forall s. ContainsWeight s => s -> RangeInclusive -> Double
meanInRange BasecalledSequence
sequ (Int
0, Int
frameSize forall a. Num a => a -> a -> a
- Int
1)