{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}

-- |
-- Module      :  Analyze.Analyze
-- Description :  Parse sequence file formats and analyze them
-- Copyright   :  (c) Dominik Schrempf 2021
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Fri Oct  5 08:41:05 2018.
module SLynx.Filter.Filter
  ( filterRowsCmd,
    filterColsCmd,
  )
where

import Control.Monad (when)
import Control.Monad.Trans.Reader
import qualified Data.ByteString.Lazy.Char8 as BL
import Data.Maybe (fromMaybe)
import qualified ELynx.Data.Sequence.Alignment as M
import qualified ELynx.Data.Sequence.Sequence as S
import ELynx.Export.Sequence.Fasta
import ELynx.Tools
import SLynx.Filter.Options
import SLynx.Tools

filterRows :: Maybe Int -> Maybe Int -> Bool -> [S.Sequence] -> BL.ByteString
filterRows :: Maybe Int -> Maybe Int -> Bool -> [Sequence] -> ByteString
filterRows Maybe Int
ml Maybe Int
ms Bool
std [Sequence]
ss = [Sequence] -> ByteString
sequencesToFasta ([Sequence] -> ByteString) -> [Sequence] -> ByteString
forall a b. (a -> b) -> a -> b
$ [[Sequence] -> [Sequence]] -> [Sequence] -> [Sequence]
forall a. [a -> a] -> a -> a
compose [[Sequence] -> [Sequence]]
filters [Sequence]
ss
  where
    filters' :: [[Sequence] -> [Sequence]]
filters' =
      (Maybe ([Sequence] -> [Sequence]) -> [Sequence] -> [Sequence])
-> [Maybe ([Sequence] -> [Sequence])] -> [[Sequence] -> [Sequence]]
forall a b. (a -> b) -> [a] -> [b]
map (([Sequence] -> [Sequence])
-> Maybe ([Sequence] -> [Sequence]) -> [Sequence] -> [Sequence]
forall a. a -> Maybe a -> a
fromMaybe [Sequence] -> [Sequence]
forall a. a -> a
id) [Int -> [Sequence] -> [Sequence]
S.filterLongerThan (Int -> [Sequence] -> [Sequence])
-> Maybe Int -> Maybe ([Sequence] -> [Sequence])
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe Int
ml, Int -> [Sequence] -> [Sequence]
S.filterShorterThan (Int -> [Sequence] -> [Sequence])
-> Maybe Int -> Maybe ([Sequence] -> [Sequence])
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe Int
ms]
    filters :: [[Sequence] -> [Sequence]]
filters = if Bool
std then [Sequence] -> [Sequence]
S.filterStandard ([Sequence] -> [Sequence])
-> [[Sequence] -> [Sequence]] -> [[Sequence] -> [Sequence]]
forall a. a -> [a] -> [a]
: [[Sequence] -> [Sequence]]
filters' else [[Sequence] -> [Sequence]]
filters'

-- | Filter sequences.
filterRowsCmd :: ELynx FilterRowsArguments ()
filterRowsCmd :: ELynx FilterRowsArguments ()
filterRowsCmd = do
  (FilterRowsArguments Alphabet
al FilePath
inFile Maybe Int
long Maybe Int
short Bool
std) <- Environment FilterRowsArguments -> FilterRowsArguments
forall a. Environment a -> a
localArguments (Environment FilterRowsArguments -> FilterRowsArguments)
-> ReaderT
     (Environment FilterRowsArguments)
     IO
     (Environment FilterRowsArguments)
-> ReaderT (Environment FilterRowsArguments) IO FilterRowsArguments
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ReaderT
  (Environment FilterRowsArguments)
  IO
  (Environment FilterRowsArguments)
forall (m :: * -> *) r. Monad m => ReaderT r m r
ask
  ELynx FilterRowsArguments ()
-> (Int -> ELynx FilterRowsArguments ())
-> Maybe Int
-> ELynx FilterRowsArguments ()
forall b a. b -> (a -> b) -> Maybe a -> b
maybe
    (() -> ELynx FilterRowsArguments ()
forall (m :: * -> *) a. Monad m => a -> m a
return ())
    ( \Int
val ->
        FilePath -> ELynx FilterRowsArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
FilePath -> Logger e ()
logInfoS (FilePath -> ELynx FilterRowsArguments ())
-> FilePath -> ELynx FilterRowsArguments ()
forall a b. (a -> b) -> a -> b
$ FilePath
"  Keep sequences longer than " FilePath -> FilePath -> FilePath
forall a. Semigroup a => a -> a -> a
<> Int -> FilePath
forall a. Show a => a -> FilePath
show Int
val FilePath -> FilePath -> FilePath
forall a. Semigroup a => a -> a -> a
<> FilePath
"."
    )
    Maybe Int
long
  ELynx FilterRowsArguments ()
-> (Int -> ELynx FilterRowsArguments ())
-> Maybe Int
-> ELynx FilterRowsArguments ()
forall b a. b -> (a -> b) -> Maybe a -> b
maybe
    (() -> ELynx FilterRowsArguments ()
forall (m :: * -> *) a. Monad m => a -> m a
return ())
    ( \Int
val ->
        FilePath -> ELynx FilterRowsArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
FilePath -> Logger e ()
logInfoS (FilePath -> ELynx FilterRowsArguments ())
-> FilePath -> ELynx FilterRowsArguments ()
forall a b. (a -> b) -> a -> b
$ FilePath
"  Keep sequences shorter than " FilePath -> FilePath -> FilePath
forall a. Semigroup a => a -> a -> a
<> Int -> FilePath
forall a. Show a => a -> FilePath
show Int
val FilePath -> FilePath -> FilePath
forall a. Semigroup a => a -> a -> a
<> FilePath
"."
    )
    Maybe Int
short
  Bool
-> ELynx FilterRowsArguments () -> ELynx FilterRowsArguments ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
std (ELynx FilterRowsArguments () -> ELynx FilterRowsArguments ())
-> ELynx FilterRowsArguments () -> ELynx FilterRowsArguments ()
forall a b. (a -> b) -> a -> b
$
    FilePath -> ELynx FilterRowsArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
FilePath -> Logger e ()
logInfoS
      FilePath
"  Keep sequences containing at least one standard (i.e., non-IUPAC) character."
  [Sequence]
ss <- Alphabet
-> FilePath -> Logger (Environment FilterRowsArguments) [Sequence]
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
Alphabet -> FilePath -> Logger e [Sequence]
readSeqs Alphabet
al FilePath
inFile
  let result :: ByteString
result = Maybe Int -> Maybe Int -> Bool -> [Sequence] -> ByteString
filterRows Maybe Int
long Maybe Int
short Bool
std [Sequence]
ss
  FilePath -> ByteString -> FilePath -> ELynx FilterRowsArguments ()
forall a.
Reproducible a =>
FilePath -> ByteString -> FilePath -> ELynx a ()
out FilePath
"filtered sequences" ByteString
result FilePath
".fasta"

filterCols :: Maybe Double -> [S.Sequence] -> BL.ByteString
filterCols :: Maybe Double -> [Sequence] -> ByteString
filterCols Maybe Double
ms [Sequence]
ss = [Sequence] -> ByteString
sequencesToFasta ([Sequence] -> ByteString)
-> (Alignment -> [Sequence]) -> Alignment -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Alignment -> [Sequence]
M.toSequences (Alignment -> ByteString) -> Alignment -> ByteString
forall a b. (a -> b) -> a -> b
$ [Alignment -> Alignment] -> Alignment -> Alignment
forall a. [a -> a] -> a -> a
compose [Alignment -> Alignment]
filters Alignment
a
  where
    a :: Alignment
a = (FilePath -> Alignment)
-> (Alignment -> Alignment)
-> Either FilePath Alignment
-> Alignment
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either FilePath -> Alignment
forall a. HasCallStack => FilePath -> a
error Alignment -> Alignment
forall a. a -> a
id ([Sequence] -> Either FilePath Alignment
M.fromSequences [Sequence]
ss)
    filters :: [Alignment -> Alignment]
filters = (Maybe (Alignment -> Alignment) -> Alignment -> Alignment)
-> [Maybe (Alignment -> Alignment)] -> [Alignment -> Alignment]
forall a b. (a -> b) -> [a] -> [b]
map ((Alignment -> Alignment)
-> Maybe (Alignment -> Alignment) -> Alignment -> Alignment
forall a. a -> Maybe a -> a
fromMaybe Alignment -> Alignment
forall a. a -> a
id) [Double -> Alignment -> Alignment
M.filterColsStd (Double -> Alignment -> Alignment)
-> Maybe Double -> Maybe (Alignment -> Alignment)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe Double
ms]

-- | Filter columns.
filterColsCmd :: ELynx FilterColsArguments ()
filterColsCmd :: ELynx FilterColsArguments ()
filterColsCmd = do
  (FilterColsArguments Alphabet
al FilePath
inFile Maybe Double
standard) <- Environment FilterColsArguments -> FilterColsArguments
forall a. Environment a -> a
localArguments (Environment FilterColsArguments -> FilterColsArguments)
-> ReaderT
     (Environment FilterColsArguments)
     IO
     (Environment FilterColsArguments)
-> ReaderT (Environment FilterColsArguments) IO FilterColsArguments
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ReaderT
  (Environment FilterColsArguments)
  IO
  (Environment FilterColsArguments)
forall (m :: * -> *) r. Monad m => ReaderT r m r
ask
  case Maybe Double
standard of
    Maybe Double
Nothing -> () -> ELynx FilterColsArguments ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
    Just Double
p ->
      FilePath -> ELynx FilterColsArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
FilePath -> Logger e ()
logInfoS (FilePath -> ELynx FilterColsArguments ())
-> FilePath -> ELynx FilterColsArguments ()
forall a b. (a -> b) -> a -> b
$
        FilePath
"  Keep columns with a proportion of standard (non-IUPAC) characters larger than "
          FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ Double -> FilePath
forall a. Show a => a -> FilePath
show Double
p
          FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ FilePath
"."
  [Sequence]
ss <- Alphabet
-> FilePath -> Logger (Environment FilterColsArguments) [Sequence]
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
Alphabet -> FilePath -> Logger e [Sequence]
readSeqs Alphabet
al FilePath
inFile
  let result :: ByteString
result = Maybe Double -> [Sequence] -> ByteString
filterCols Maybe Double
standard [Sequence]
ss
  FilePath -> ByteString -> FilePath -> ELynx FilterColsArguments ()
forall a.
Reproducible a =>
FilePath -> ByteString -> FilePath -> ELynx a ()
out FilePath
"filtered sequences" ByteString
result FilePath
".fasta"