{-# LANGUAGE OverloadedStrings #-}
module SLynx.Filter.Filter
( filterRowsCmd,
filterColsCmd,
)
where
import Control.Monad (when)
import Control.Monad.Trans.Reader
import qualified Data.ByteString.Lazy.Char8 as BL
import Data.Maybe (fromMaybe)
import qualified ELynx.Sequence.Alignment as M
import ELynx.Sequence.Export.Fasta
import qualified ELynx.Sequence.Sequence as S
import ELynx.Tools.ELynx
import ELynx.Tools.Environment
import ELynx.Tools.Logger
import SLynx.Filter.Options
import SLynx.Tools
compose :: [a -> a] -> a -> a
compose :: forall a. [a -> a] -> a -> a
compose = forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl (forall a b c. (a -> b -> c) -> b -> a -> c
flip forall b c a. (b -> c) -> (a -> b) -> a -> c
(.)) forall a. a -> a
id
filterRows :: Maybe Int -> Maybe Int -> Bool -> [S.Sequence] -> BL.ByteString
filterRows :: Maybe Int -> Maybe Int -> Bool -> [Sequence] -> ByteString
filterRows Maybe Int
ml Maybe Int
ms Bool
std [Sequence]
ss = [Sequence] -> ByteString
sequencesToFasta forall a b. (a -> b) -> a -> b
$ forall a. [a -> a] -> a -> a
compose [[Sequence] -> [Sequence]]
filters [Sequence]
ss
where
filters' :: [[Sequence] -> [Sequence]]
filters' =
forall a b. (a -> b) -> [a] -> [b]
map (forall a. a -> Maybe a -> a
fromMaybe forall a. a -> a
id) [Int -> [Sequence] -> [Sequence]
S.filterLongerThan forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe Int
ml, Int -> [Sequence] -> [Sequence]
S.filterShorterThan forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe Int
ms]
filters :: [[Sequence] -> [Sequence]]
filters = if Bool
std then [Sequence] -> [Sequence]
S.filterStandard forall a. a -> [a] -> [a]
: [[Sequence] -> [Sequence]]
filters' else [[Sequence] -> [Sequence]]
filters'
filterRowsCmd :: ELynx FilterRowsArguments ()
filterRowsCmd :: ELynx FilterRowsArguments ()
filterRowsCmd = do
(FilterRowsArguments Alphabet
al String
inFile Maybe Int
long Maybe Int
short Bool
std) <- forall a. Environment a -> a
localArguments forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) r. Monad m => ReaderT r m r
ask
forall b a. b -> (a -> b) -> Maybe a -> b
maybe
(forall (m :: * -> *) a. Monad m => a -> m a
return ())
( \Int
val ->
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS forall a b. (a -> b) -> a -> b
$ String
" Keep sequences longer than " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show Int
val forall a. Semigroup a => a -> a -> a
<> String
"."
)
Maybe Int
long
forall b a. b -> (a -> b) -> Maybe a -> b
maybe
(forall (m :: * -> *) a. Monad m => a -> m a
return ())
( \Int
val ->
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS forall a b. (a -> b) -> a -> b
$ String
" Keep sequences shorter than " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show Int
val forall a. Semigroup a => a -> a -> a
<> String
"."
)
Maybe Int
short
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
std forall a b. (a -> b) -> a -> b
$
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS
String
" Keep sequences containing at least one standard (i.e., non-IUPAC) character."
[Sequence]
ss <- forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
Alphabet -> String -> Logger e [Sequence]
readSeqs Alphabet
al String
inFile
let result :: ByteString
result = Maybe Int -> Maybe Int -> Bool -> [Sequence] -> ByteString
filterRows Maybe Int
long Maybe Int
short Bool
std [Sequence]
ss
forall a.
Reproducible a =>
String -> ByteString -> String -> ELynx a ()
out String
"filtered sequences" ByteString
result String
".fasta"
filterCols :: Maybe Double -> [S.Sequence] -> BL.ByteString
filterCols :: Maybe Double -> [Sequence] -> ByteString
filterCols Maybe Double
ms [Sequence]
ss = [Sequence] -> ByteString
sequencesToFasta forall b c a. (b -> c) -> (a -> b) -> a -> c
. Alignment -> [Sequence]
M.toSequences forall a b. (a -> b) -> a -> b
$ forall a. [a -> a] -> a -> a
compose [Alignment -> Alignment]
filters Alignment
a
where
a :: Alignment
a = forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either forall a. HasCallStack => String -> a
error forall a. a -> a
id ([Sequence] -> Either String Alignment
M.fromSequences [Sequence]
ss)
filters :: [Alignment -> Alignment]
filters = forall a b. (a -> b) -> [a] -> [b]
map (forall a. a -> Maybe a -> a
fromMaybe forall a. a -> a
id) [Double -> Alignment -> Alignment
M.filterColsStd forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe Double
ms]
filterColsCmd :: ELynx FilterColsArguments ()
filterColsCmd :: ELynx FilterColsArguments ()
filterColsCmd = do
(FilterColsArguments Alphabet
al String
inFile Maybe Double
standard) <- forall a. Environment a -> a
localArguments forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) r. Monad m => ReaderT r m r
ask
case Maybe Double
standard of
Maybe Double
Nothing -> forall (m :: * -> *) a. Monad m => a -> m a
return ()
Just Double
p ->
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS forall a b. (a -> b) -> a -> b
$
String
" Keep columns with a proportion of standard (non-IUPAC) characters larger than "
forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show Double
p
forall a. [a] -> [a] -> [a]
++ String
"."
[Sequence]
ss <- forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
Alphabet -> String -> Logger e [Sequence]
readSeqs Alphabet
al String
inFile
let result :: ByteString
result = Maybe Double -> [Sequence] -> ByteString
filterCols Maybe Double
standard [Sequence]
ss
forall a.
Reproducible a =>
String -> ByteString -> String -> ELynx a ()
out String
"filtered sequences" ByteString
result String
".fasta"