{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TemplateHaskell #-} {- | Module : Analyze.Analyze Description : Parse sequence file formats and analyze them Copyright : (c) Dominik Schrempf 2018 License : GPL-3 Maintainer : dominik.schrempf@gmail.com Stability : unstable Portability : portable Creation date: Fri Oct 5 08:41:05 2018. -} module Filter.Filter ( filterRowsCmd , filterColsCmd ) where import Control.Monad (when) import Control.Monad.Logger import Control.Monad.Trans.Class import Control.Monad.Trans.Reader import qualified Data.ByteString.Lazy.Char8 as L import Data.Maybe (fromMaybe) import qualified Data.Text as T import Filter.Options import Tools import qualified ELynx.Data.Sequence.Alignment as M import qualified ELynx.Data.Sequence.Sequence as S import ELynx.Export.Sequence.Fasta import ELynx.Tools.InputOutput import ELynx.Tools.Misc filterRows :: Maybe Int -> Maybe Int -> Bool -> [S.Sequence] -> L.ByteString filterRows ml ms std ss = sequencesToFasta $ compose filters ss where filters' = map (fromMaybe id) [S.filterLongerThan <$> ml, S.filterShorterThan <$> ms] filters = if std then S.filterStandard : filters' else filters' -- | Filter sequences. filterRowsCmd :: Maybe FilePath -> FilterRows () filterRowsCmd outFileBaseName = do $(logInfo) "Command: Filter sequences of a list of sequences." FilterRowsArguments al inFile long short std <- lift ask maybe (return ()) (\val -> $(logInfo) $ T.pack $ " Keep sequences longer than " <> show val <> ".") long maybe (return ()) (\val -> $(logInfo) $ T.pack $ " Keep sequences shorter than " <> show val <> ".") short when std $ $(logInfo) " Keep sequences containing at least one standard (i.e., non-IUPAC) character." ss <- readSeqs al inFile let result = filterRows long short std ss let outFilePath = (++ ".fasta") <$> outFileBaseName out "filtered sequences" result outFilePath filterCols :: Maybe Double -> [S.Sequence] -> L.ByteString filterCols ms ss = sequencesToFasta . M.toSequences $ compose filters a where a = either error id (M.fromSequences ss) filters = map (fromMaybe id) [ M.filterColsStd <$> ms ] -- | Filter columns. filterColsCmd :: Maybe FilePath -> FilterCols () filterColsCmd outFileBaseName = do $(logInfo) "Command: Filter columns of a multi sequence alignment." FilterColsArguments al inFile standard <- lift ask case standard of Nothing -> return () Just p -> $(logInfo) $ T.pack $ " Keep columns with a proportion of standard (non-IUPAC) characters larger than " ++ show p ++ "." ss <- readSeqs al inFile let result = filterCols standard ss let outFilePath = (++ ".fasta") <$> outFileBaseName out "filtered sequences" result outFilePath