{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}

-- |
-- Module      :  Analyze.Analyze
-- Description :  Parse sequence file formats and analyze them
-- Copyright   :  (c) Dominik Schrempf 2021
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Fri Oct  5 08:41:05 2018.
module SLynx.SubSample.SubSample
  ( subSampleCmd,
  )
where

import Control.Monad
import Control.Monad.IO.Class
import Control.Monad.Logger
import Control.Monad.Trans.Reader (ask)
import qualified Data.Text as T
import qualified ELynx.Data.Sequence.Alignment as M
import ELynx.Export.Sequence.Fasta
import ELynx.Tools
import SLynx.SubSample.Options
import SLynx.Tools
import System.Random.MWC

-- | Sub sample sequences.
subSampleCmd :: ELynx SubSampleArguments ()
subSampleCmd :: ELynx SubSampleArguments ()
subSampleCmd = do
  (SubSampleArguments Alphabet
al FilePath
inFile Int
nSites Int
nAlignments (Fixed Vector Word32
s)) <- Arguments SubSampleArguments -> SubSampleArguments
forall a. Arguments a -> a
local (Arguments SubSampleArguments -> SubSampleArguments)
-> ReaderT
     (Arguments SubSampleArguments)
     (LoggingT IO)
     (Arguments SubSampleArguments)
-> ReaderT
     (Arguments SubSampleArguments) (LoggingT IO) SubSampleArguments
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ReaderT
  (Arguments SubSampleArguments)
  (LoggingT IO)
  (Arguments SubSampleArguments)
forall (m :: * -> *) r. Monad m => ReaderT r m r
ask
  $(Int
FilePath
LogLevel
FilePath -> Text
FilePath -> FilePath -> FilePath -> CharPos -> CharPos -> Loc
Text -> Text
Loc -> Text -> LogLevel -> Text -> ELynx SubSampleArguments ()
(Text -> ELynx SubSampleArguments ())
-> (Text -> Text) -> Text -> ELynx SubSampleArguments ()
forall a. a -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
pack :: FilePath -> Text
monadLoggerLog :: forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
id :: forall a. a -> a
. :: forall b c a. (b -> c) -> (a -> b) -> a -> c
logInfo) Text
"Command: Sub sample from a multi sequence alignment."
  $(Int
FilePath
LogLevel
FilePath -> Text
FilePath -> FilePath -> FilePath -> CharPos -> CharPos -> Loc
Text -> Text
Loc -> Text -> LogLevel -> Text -> ELynx SubSampleArguments ()
(Text -> ELynx SubSampleArguments ())
-> (Text -> Text) -> Text -> ELynx SubSampleArguments ()
forall a. a -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
pack :: FilePath -> Text
monadLoggerLog :: forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
id :: forall a. a -> a
. :: forall b c a. (b -> c) -> (a -> b) -> a -> c
logInfo) (Text -> ELynx SubSampleArguments ())
-> Text -> ELynx SubSampleArguments ()
forall a b. (a -> b) -> a -> b
$ FilePath -> Text
T.pack (FilePath -> Text) -> FilePath -> Text
forall a b. (a -> b) -> a -> b
$ FilePath
"  Sample " FilePath -> FilePath -> FilePath
forall a. Semigroup a => a -> a -> a
<> Int -> FilePath
forall a. Show a => a -> FilePath
show Int
nSites FilePath -> FilePath -> FilePath
forall a. Semigroup a => a -> a -> a
<> FilePath
" sites."
  $(Int
FilePath
LogLevel
FilePath -> Text
FilePath -> FilePath -> FilePath -> CharPos -> CharPos -> Loc
Text -> Text
Loc -> Text -> LogLevel -> Text -> ELynx SubSampleArguments ()
(Text -> ELynx SubSampleArguments ())
-> (Text -> Text) -> Text -> ELynx SubSampleArguments ()
forall a. a -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
pack :: FilePath -> Text
monadLoggerLog :: forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
id :: forall a. a -> a
. :: forall b c a. (b -> c) -> (a -> b) -> a -> c
logInfo) (Text -> ELynx SubSampleArguments ())
-> Text -> ELynx SubSampleArguments ()
forall a b. (a -> b) -> a -> b
$
    FilePath -> Text
T.pack (FilePath -> Text) -> FilePath -> Text
forall a b. (a -> b) -> a -> b
$
      FilePath
"  Sample "
        FilePath -> FilePath -> FilePath
forall a. Semigroup a => a -> a -> a
<> Int -> FilePath
forall a. Show a => a -> FilePath
show Int
nAlignments
        FilePath -> FilePath -> FilePath
forall a. Semigroup a => a -> a -> a
<> FilePath
" multi sequence alignments."
  [Sequence]
ss <- Alphabet
-> FilePath
-> ReaderT (Arguments SubSampleArguments) (LoggingT IO) [Sequence]
forall (m :: * -> *).
(MonadIO m, MonadLogger m) =>
Alphabet -> FilePath -> m [Sequence]
readSeqs Alphabet
al FilePath
inFile
  Gen RealWorld
gen <- IO (Gen RealWorld)
-> ReaderT
     (Arguments SubSampleArguments) (LoggingT IO) (Gen RealWorld)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Gen RealWorld)
 -> ReaderT
      (Arguments SubSampleArguments) (LoggingT IO) (Gen RealWorld))
-> IO (Gen RealWorld)
-> ReaderT
     (Arguments SubSampleArguments) (LoggingT IO) (Gen RealWorld)
forall a b. (a -> b) -> a -> b
$ Vector Word32 -> IO (Gen (PrimState IO))
forall (m :: * -> *) (v :: * -> *).
(PrimMonad m, Vector v Word32) =>
v Word32 -> m (Gen (PrimState m))
initialize Vector Word32
s
  let a :: Alignment
a = (FilePath -> Alignment)
-> (Alignment -> Alignment)
-> Either FilePath Alignment
-> Alignment
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either FilePath -> Alignment
forall a. HasCallStack => FilePath -> a
error Alignment -> Alignment
forall a. a -> a
id ([Sequence] -> Either FilePath Alignment
M.fromSequences [Sequence]
ss)
  [Alignment]
samples <- IO [Alignment]
-> ReaderT (Arguments SubSampleArguments) (LoggingT IO) [Alignment]
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO [Alignment]
 -> ReaderT
      (Arguments SubSampleArguments) (LoggingT IO) [Alignment])
-> IO [Alignment]
-> ReaderT (Arguments SubSampleArguments) (LoggingT IO) [Alignment]
forall a b. (a -> b) -> a -> b
$ Int -> IO Alignment -> IO [Alignment]
forall (m :: * -> *) a. Applicative m => Int -> m a -> m [a]
replicateM Int
nAlignments (IO Alignment -> IO [Alignment]) -> IO Alignment -> IO [Alignment]
forall a b. (a -> b) -> a -> b
$ Int -> Alignment -> Gen (PrimState IO) -> IO Alignment
forall (m :: * -> *).
PrimMonad m =>
Int -> Alignment -> Gen (PrimState m) -> m Alignment
M.randomSubSample Int
nSites Alignment
a Gen RealWorld
Gen (PrimState IO)
gen
  let results :: [ByteString]
results = (Alignment -> ByteString) -> [Alignment] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map ([Sequence] -> ByteString
sequencesToFasta ([Sequence] -> ByteString)
-> (Alignment -> [Sequence]) -> Alignment -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Alignment -> [Sequence]
M.toSequences) [Alignment]
samples
      sfxs :: [FilePath]
sfxs = Int -> FilePath -> [FilePath]
getOutSuffixes Int
nAlignments FilePath
"fasta"
  (ByteString -> FilePath -> ELynx SubSampleArguments ())
-> [ByteString] -> [FilePath] -> ELynx SubSampleArguments ()
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ (FilePath -> ByteString -> FilePath -> ELynx SubSampleArguments ()
forall a.
Reproducible a =>
FilePath -> ByteString -> FilePath -> ELynx a ()
out FilePath
"sub sampled multi sequence alignments") [ByteString]
results [FilePath]
sfxs