{-# LANGUAGE DeriveGeneric #-}

-- |
-- Module      :  SLynx.Simulate.Options
-- Description :  ELynxSim argument parsing
-- Copyright   :  2021 Dominik Schrempf
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Sun Oct  7 17:29:45 2018.
--
-- Available options:
--   -h,--help                Show this help text
--   -v,--version             Show version
--   -t,--tree-file NAME      Specify tree file NAME
--   -s,--substitution-model MODEL
--                            Set the phylogenetic substitution model; available
--                            models are shown below
--   -m,--mixture-model MODEL Set the phylogenetic mixture model; available models
--                            are shown below
--   -l,--length NUMBER       Set alignment length to NUMBER
--   -e,--edm-file NAME       empirical distribution model file NAME in Phylobayes
--                            format
--   -w,--mixture-model-weights [DOUBLE,DOUBLE,...]
--                            weights of mixture model components
--   -g,--gamma-rate-heterogeneity (NCAT, SHAPE)
--                            number of gamma rate categories and shape parameter
--   -e,--seed [INT]            Set seed for the random number generator; list of 32
--                            bit integers with up to 256 elements (default: [0])
--   -q,--quiet               Be quiet
--   -o,--output-file NAME    Specify output file NAME
module SLynx.Simulate.Options
  ( GammaRateHeterogeneityParams,
    MixtureModelGlobalNormalization (..),
    SimulateArguments (..),
    simulateArguments,
    simulateFooter,
  )
where

import Data.Aeson
import Data.Maybe
  ( fromMaybe,
    maybeToList,
  )
import ELynx.Tools.Options
import ELynx.Tools.Reproduction
import GHC.Generics
import Options.Applicative

-- | Number of gamma rate categories and alpha parameter.
type GammaRateHeterogeneityParams = (Int, Double)

data MixtureModelGlobalNormalization = GlobalNormalization | LocalNormalization
  deriving (Int -> MixtureModelGlobalNormalization -> ShowS
[MixtureModelGlobalNormalization] -> ShowS
MixtureModelGlobalNormalization -> FilePath
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
showList :: [MixtureModelGlobalNormalization] -> ShowS
$cshowList :: [MixtureModelGlobalNormalization] -> ShowS
show :: MixtureModelGlobalNormalization -> FilePath
$cshow :: MixtureModelGlobalNormalization -> FilePath
showsPrec :: Int -> MixtureModelGlobalNormalization -> ShowS
$cshowsPrec :: Int -> MixtureModelGlobalNormalization -> ShowS
Show, ReadPrec [MixtureModelGlobalNormalization]
ReadPrec MixtureModelGlobalNormalization
Int -> ReadS MixtureModelGlobalNormalization
ReadS [MixtureModelGlobalNormalization]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [MixtureModelGlobalNormalization]
$creadListPrec :: ReadPrec [MixtureModelGlobalNormalization]
readPrec :: ReadPrec MixtureModelGlobalNormalization
$creadPrec :: ReadPrec MixtureModelGlobalNormalization
readList :: ReadS [MixtureModelGlobalNormalization]
$creadList :: ReadS [MixtureModelGlobalNormalization]
readsPrec :: Int -> ReadS MixtureModelGlobalNormalization
$creadsPrec :: Int -> ReadS MixtureModelGlobalNormalization
Read, MixtureModelGlobalNormalization
-> MixtureModelGlobalNormalization -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: MixtureModelGlobalNormalization
-> MixtureModelGlobalNormalization -> Bool
$c/= :: MixtureModelGlobalNormalization
-> MixtureModelGlobalNormalization -> Bool
== :: MixtureModelGlobalNormalization
-> MixtureModelGlobalNormalization -> Bool
$c== :: MixtureModelGlobalNormalization
-> MixtureModelGlobalNormalization -> Bool
Eq, forall x.
Rep MixtureModelGlobalNormalization x
-> MixtureModelGlobalNormalization
forall x.
MixtureModelGlobalNormalization
-> Rep MixtureModelGlobalNormalization x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x.
Rep MixtureModelGlobalNormalization x
-> MixtureModelGlobalNormalization
$cfrom :: forall x.
MixtureModelGlobalNormalization
-> Rep MixtureModelGlobalNormalization x
Generic)

instance FromJSON MixtureModelGlobalNormalization

instance ToJSON MixtureModelGlobalNormalization

-- | Arguments needed to simulate sequences.
data SimulateArguments = SimulateArguments
  { SimulateArguments -> FilePath
argsTreeFile :: FilePath,
    SimulateArguments -> Maybe FilePath
argsSubstitutionModelString :: Maybe String,
    SimulateArguments -> Maybe FilePath
argsMixtureModelString :: Maybe String,
    SimulateArguments -> MixtureModelGlobalNormalization
argsMixtureModelGlobalNormalization :: MixtureModelGlobalNormalization,
    SimulateArguments -> Maybe FilePath
argsEDMFile :: Maybe FilePath,
    SimulateArguments -> Maybe [FilePath]
argsSiteprofilesFiles :: Maybe [FilePath],
    SimulateArguments -> Maybe [Double]
argsMixtureWeights :: Maybe [Double],
    SimulateArguments -> Maybe GammaRateHeterogeneityParams
argsGammaParams :: Maybe GammaRateHeterogeneityParams,
    SimulateArguments -> Int
argsLength :: Int,
    SimulateArguments -> SeedOpt
argsSeed :: SeedOpt
  }
  deriving (SimulateArguments -> SimulateArguments -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: SimulateArguments -> SimulateArguments -> Bool
$c/= :: SimulateArguments -> SimulateArguments -> Bool
== :: SimulateArguments -> SimulateArguments -> Bool
$c== :: SimulateArguments -> SimulateArguments -> Bool
Eq, Int -> SimulateArguments -> ShowS
[SimulateArguments] -> ShowS
SimulateArguments -> FilePath
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
showList :: [SimulateArguments] -> ShowS
$cshowList :: [SimulateArguments] -> ShowS
show :: SimulateArguments -> FilePath
$cshow :: SimulateArguments -> FilePath
showsPrec :: Int -> SimulateArguments -> ShowS
$cshowsPrec :: Int -> SimulateArguments -> ShowS
Show, forall x. Rep SimulateArguments x -> SimulateArguments
forall x. SimulateArguments -> Rep SimulateArguments x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep SimulateArguments x -> SimulateArguments
$cfrom :: forall x. SimulateArguments -> Rep SimulateArguments x
Generic)

instance Reproducible SimulateArguments where
  inFiles :: SimulateArguments -> [FilePath]
inFiles SimulateArguments
a =
    SimulateArguments -> FilePath
argsTreeFile SimulateArguments
a
      forall a. a -> [a] -> [a]
: (forall a. Maybe a -> [a]
maybeToList (SimulateArguments -> Maybe FilePath
argsEDMFile SimulateArguments
a) forall a. [a] -> [a] -> [a]
++ forall a. a -> Maybe a -> a
fromMaybe [] (SimulateArguments -> Maybe [FilePath]
argsSiteprofilesFiles SimulateArguments
a))
  outSuffixes :: SimulateArguments -> [FilePath]
outSuffixes SimulateArguments
_ = [FilePath
".model.gz", FilePath
".fasta"]
  getSeed :: SimulateArguments -> Maybe SeedOpt
getSeed = forall a. a -> Maybe a
Just forall b c a. (b -> c) -> (a -> b) -> a -> c
. SimulateArguments -> SeedOpt
argsSeed
  setSeed :: SimulateArguments -> SeedOpt -> SimulateArguments
setSeed SimulateArguments
a SeedOpt
s = SimulateArguments
a {argsSeed :: SeedOpt
argsSeed = SeedOpt
s}
  parser :: Parser SimulateArguments
parser = Parser SimulateArguments
simulateArguments
  cmdName :: FilePath
cmdName = FilePath
"simulate"
  cmdDsc :: [FilePath]
cmdDsc = [FilePath
"Simulate multi sequence alignments."]
  cmdFtr :: [FilePath]
cmdFtr = [FilePath]
simulateFooter

instance FromJSON SimulateArguments

instance ToJSON SimulateArguments

-- | Sub command parser.
simulateArguments :: Parser SimulateArguments
simulateArguments :: Parser SimulateArguments
simulateArguments =
  FilePath
-> Maybe FilePath
-> Maybe FilePath
-> MixtureModelGlobalNormalization
-> Maybe FilePath
-> Maybe [FilePath]
-> Maybe [Double]
-> Maybe GammaRateHeterogeneityParams
-> Int
-> SeedOpt
-> SimulateArguments
SimulateArguments
    forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser FilePath
treeFileOpt
    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser (Maybe FilePath)
phyloSubstitutionModelOpt
    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser (Maybe FilePath)
phyloMixtureModelOpt
    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser MixtureModelGlobalNormalization
globalNormalizationFlag
    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser (Maybe FilePath)
maybeEDMFileOpt
    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser (Maybe [FilePath])
maybeSiteprofilesFilesOpt
    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser (Maybe [Double])
maybeMixtureWeights
    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser (Maybe GammaRateHeterogeneityParams)
maybeGammaParams
    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser Int
lengthOpt
    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser SeedOpt
seedOpt

treeFileOpt :: Parser FilePath
treeFileOpt :: Parser FilePath
treeFileOpt =
  forall s. IsString s => Mod OptionFields s -> Parser s
strOption forall a b. (a -> b) -> a -> b
$
    forall (f :: * -> *) a. HasName f => FilePath -> Mod f a
long FilePath
"tree-file"
      forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
't'
      forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => FilePath -> Mod f a
metavar FilePath
"Name"
      forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. FilePath -> Mod f a
help
        FilePath
"Read tree from Newick file NAME"

phyloSubstitutionModelOpt :: Parser (Maybe String)
phyloSubstitutionModelOpt :: Parser (Maybe FilePath)
phyloSubstitutionModelOpt =
  forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional forall a b. (a -> b) -> a -> b
$
    forall s. IsString s => Mod OptionFields s -> Parser s
strOption forall a b. (a -> b) -> a -> b
$
      forall (f :: * -> *) a. HasName f => FilePath -> Mod f a
long FilePath
"substitution-model"
        forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
's'
        forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => FilePath -> Mod f a
metavar FilePath
"MODEL"
        forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. FilePath -> Mod f a
help
          FilePath
"Set the phylogenetic substitution model; available models are shown below (mutually exclusive with -m option)"

phyloMixtureModelOpt :: Parser (Maybe String)
phyloMixtureModelOpt :: Parser (Maybe FilePath)
phyloMixtureModelOpt =
  forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional forall a b. (a -> b) -> a -> b
$
    forall s. IsString s => Mod OptionFields s -> Parser s
strOption
      ( forall (f :: * -> *) a. HasName f => FilePath -> Mod f a
long FilePath
"mixture-model"
          forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
'm'
          forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => FilePath -> Mod f a
metavar FilePath
"MODEL"
          forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. FilePath -> Mod f a
help
            FilePath
"Set the phylogenetic mixture model; available models are shown below (mutually exclusive with -s option)"
      )

globalNormalizationFlag :: Parser MixtureModelGlobalNormalization
globalNormalizationFlag :: Parser MixtureModelGlobalNormalization
globalNormalizationFlag =
  forall a. a -> a -> Mod FlagFields a -> Parser a
flag
    MixtureModelGlobalNormalization
LocalNormalization
    MixtureModelGlobalNormalization
GlobalNormalization
    ( forall (f :: * -> *) a. HasName f => FilePath -> Mod f a
long FilePath
"global-normalization"
        forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
'n'
        forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. FilePath -> Mod f a
help FilePath
"Normalize mixture model globally (one normalization constant for all components)"
    )

maybeEDMFileOpt :: Parser (Maybe FilePath)
maybeEDMFileOpt :: Parser (Maybe FilePath)
maybeEDMFileOpt =
  forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional forall a b. (a -> b) -> a -> b
$
    forall s. IsString s => Mod OptionFields s -> Parser s
strOption
      ( forall (f :: * -> *) a. HasName f => FilePath -> Mod f a
long FilePath
"edm-file"
          forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
'e'
          forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => FilePath -> Mod f a
metavar FilePath
"NAME"
          forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. FilePath -> Mod f a
help
            FilePath
"Empirical distribution model file NAME in Phylobayes format"
      )

maybeSiteprofilesFilesOpt :: Parser (Maybe [FilePath])
maybeSiteprofilesFilesOpt :: Parser (Maybe [FilePath])
maybeSiteprofilesFilesOpt =
  forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional forall a b. (a -> b) -> a -> b
$
    FilePath -> [FilePath]
words
      forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall s. IsString s => Mod OptionFields s -> Parser s
strOption
        ( forall (f :: * -> *) a. HasName f => FilePath -> Mod f a
long FilePath
"siteprofile-files"
            forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
'p'
            forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => FilePath -> Mod f a
metavar FilePath
"NAMES"
            forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. FilePath -> Mod f a
help
              FilePath
"File names of site profiles in Phylobayes format"
        )

maybeMixtureWeights :: Parser (Maybe [Double])
maybeMixtureWeights :: Parser (Maybe [Double])
maybeMixtureWeights =
  forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional forall a b. (a -> b) -> a -> b
$
    forall a. ReadM a -> Mod OptionFields a -> Parser a
option
      forall a. Read a => ReadM a
auto
      ( forall (f :: * -> *) a. HasName f => FilePath -> Mod f a
long FilePath
"mixture-model-weights"
          forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
'w'
          forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => FilePath -> Mod f a
metavar FilePath
"\"[DOUBLE,DOUBLE,...]\""
          forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. FilePath -> Mod f a
help FilePath
"Weights of mixture model components"
      )

maybeGammaParams :: Parser (Maybe GammaRateHeterogeneityParams)
maybeGammaParams :: Parser (Maybe GammaRateHeterogeneityParams)
maybeGammaParams =
  forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional forall a b. (a -> b) -> a -> b
$
    forall a. ReadM a -> Mod OptionFields a -> Parser a
option
      forall a. Read a => ReadM a
auto
      ( forall (f :: * -> *) a. HasName f => FilePath -> Mod f a
long FilePath
"gamma-rate-heterogeneity"
          forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
'g'
          forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => FilePath -> Mod f a
metavar FilePath
"\"(NCAT,SHAPE)\""
          forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. FilePath -> Mod f a
help FilePath
"Number of gamma rate categories and shape parameter"
      )

lengthOpt :: Parser Int
lengthOpt :: Parser Int
lengthOpt =
  forall a. ReadM a -> Mod OptionFields a -> Parser a
option
    forall a. Read a => ReadM a
auto
    ( forall (f :: * -> *) a. HasName f => FilePath -> Mod f a
long FilePath
"length"
        forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
'l'
        forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => FilePath -> Mod f a
metavar FilePath
"NUMBER"
        forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. FilePath -> Mod f a
help
          FilePath
"Set alignment length to NUMBER"
    )

-- | The model specification is somewhat complicated, so we need to provide
-- additional help.
simulateFooter :: [String]
simulateFooter :: [FilePath]
simulateFooter = [FilePath]
sms forall a. [a] -> [a] -> [a]
++ [FilePath]
mms
  where
    sms :: [FilePath]
sms =
      [ FilePath
"Substitution models:",
        FilePath
"-s \"MODEL[PARAMETER,PARAMETER,...]{STATIONARY_DISTRIBUTION}\"",
        FilePath
"   Supported DNA models: JC, F81, HKY, GTR4.",
        FilePath
"     For example,",
        FilePath
"       -s HKY[KAPPA]{DOUBLE,DOUBLE,DOUBLE,DOUBLE}",
        FilePath
"       -s GTR4[e_AC,e_AG,e_AT,e_CG,e_CT,e_GT]{DOUBLE,DOUBLE,DOUBLE,DOUBLE}",
        FilePath
"          where the 'e_XY' are the exchangeabilities from nucleotide X to Y.",
        FilePath
"   Supported Protein models: Poisson, Poisson-Custom, LG, LG-Custom, WAG, WAG-Custom, GTR20.",
        FilePath
"     MODEL-Custom means that only the exchangeabilities of MODEL are used,",
        FilePath
"     and a custom stationary distribution is provided.",
        FilePath
"     For example,",
        FilePath
"       -s LG",
        FilePath
"       -s LG-Custom{...}",
        FilePath
"       -s GTR20[e_AR,e_AN,...]{...}",
        FilePath
"          the 'e_XY' are the exchangeabilities from amino acid X to Y (alphabetical order).",
        FilePath
"   Notes: The F81 model for DNA is equivalent to the Poisson-Custom for proteins.",
        FilePath
"          The GTR4 model for DNA is equivalent to the GTR20 for proteins."
      ]
    mms :: [FilePath]
mms =
      [ FilePath
"",
        FilePath
"Mixture models:",
        FilePath
"-m \"MIXTURE(SUBSTITUTION_MODEL_1,SUBSTITUTION_MODEL_2[PARAMETERS]{STATIONARY_DISTRIBUTION},...)\"",
        FilePath
"   For example,",
        FilePath
"     -m \"MIXTURE(JC,HKY[6.0]{0.3,0.2,0.2,0.3})\"",
        FilePath
"Mixture weights have to be provided with the -w option.",
        FilePath
"",
        FilePath
"Special mixture models:",
        FilePath
"-m CXX",
        FilePath
"   where XX is 10, 20, 30, 40, 50, or 60; CXX models, Quang et al., 2008.",
        FilePath
"-m \"EDM(EXCHANGEABILITIES)\"",
        FilePath
"   Arbitrary empirical distribution mixture (EDM) models.",
        FilePath
"   Stationary distributions have to be provided with the -e or -p option.",
        FilePath
"   For example,",
        FilePath
"     LG exchangeabilities with stationary distributions given in FILE.",
        FilePath
"     -m \"EDM(LG-Custom)\" -e FILE",
        FilePath
"     LG exchangeabilities with site profiles (Phylobayes) given in FILES.",
        FilePath
"     -m \"EDM(LG-Custom)\" -p FILES",
        FilePath
"For special mixture models, mixture weights are optional."
      ]