{-# Options_GHC -fno-cse #-}

module Main where

import           Control.Monad.IO.Class (liftIO, MonadIO)
import           Control.Monad
import           Control.Error
import           Data.FileEmbed
import qualified Data.ByteString.Char8 as BS
import           System.Console.CmdArgs
import           System.Directory (createDirectoryIfMissing)
import           System.Exit (exitSuccess, exitFailure)
import           System.FilePath

import BioInf.MutationOrder
import BioInf.MutationOrder.RNA
import BioInf.MutationOrder.SequenceDB

data Options
  = MutationOrder
    { infiles       :: [FilePath]
    , workdb        :: FilePath
    , temperature   :: Double
    , fillweight    :: FillWeight
    , fillstyle     :: FillStyle
    , cooptcount    :: Int
    , cooptprint    :: Int
    , outprefix     :: FilePath
    , scoretype     :: ScoreType
    , positivesquared :: Bool
    , onlypositive  :: Bool
    , equalStart    :: Bool
    , posscaled :: Maybe (Double,Double)
    , lkupfile :: Maybe FilePath
    , showmanual    :: Bool
    , everykth :: Int
    }
  | GenSequences
    { ancestralSequence ∷ FilePath
    -- ^ the (presumably) ancestral sequence from which to start the mutation
    -- order prediction.
    , extantSequence ∷ FilePath
    -- ^ the extant or target sequence to which to mutate.
    -- 
    , globalbackmutations ∷ Int
    -- ^ how many global back mutations to allow. For example, if set to @1@,
    -- then for each position in the sequence all "unobserved" nucleotides are
    -- possible with one switch to and one switch back. It is generally a bad
    -- idea to have more than @1@ here and we only allow exactly 1 backmutation
    -- with a specialized algorithm (that still takes quite a while to run).
    , backmutationcolumns ∷ [Int]
    -- ^ Additional columns for backmutations, may overlap with observed
    -- mutations.
    , sequenceLimit ∷ Int
    -- ^ Complain if the number of sequences is above this limit.
    , workdb ∷ FilePath
    -- ^ will write files to @workdb </> seqs@
    , prefixlength ∷ Int
    -- ^ how many characters as prefix for writing
    , seqsperfile ∷ Int
    -- ^ how many sequences to write into each file
    , alphabet ∷ String
    -- ^ the different characters that are allowed
    }
  | Backmutation
    { ancestralSequence ∷ FilePath
    -- ^ the (presumably) ancestral sequence from which to start the mutation
    -- order prediction.
    , extantSequence ∷ FilePath
    -- ^ the extant or target sequence to which to mutate.
    , workdb ∷ FilePath
    -- ^
    , position ∷ Int
    -- ^
    , alphabet ∷ String
    , scoretype     :: ScoreType
    , positivesquared :: Bool
    , onlypositive  :: Bool
    }
  deriving (Show,Data,Typeable)

oMutationOrder = MutationOrder
  { infiles       = def &= args
  , workdb        = "work.db" &= help "name of the database to store intermediates in"
  , temperature   = 1.0  &= help "lower temperatures favor the more optimal paths, defaults to 1.0"
  , fillweight    = FWlog &= help "scale options for probability plots: fwlog, fwlinear, fwfill"
  , fillstyle     = FSfull &= help "fill options for probability plots: fsopacitylog, fsopacitylinear, fsfull"
  , cooptcount    = 1000  &= help "how many co-optimals to count"
  , cooptprint    = 2   &= help "how many co-optimals to actually print out"
  , outprefix     = "tmp" &= help "prefix for output files"
  , scoretype     = Centroid &= help "choose 'mfe', 'centroid', 'pairdistmfe', or 'pairdistcen' for the evaluation of each mutational step"
  , positivesquared = False &= help "square positive energies to penalize worse structures"
  , onlypositive  = False &= help "minimize only over penalties, not energy gains"
  , equalStart    = False &= help "run mea with equal start probabilities"
  , posscaled     = Nothing &= help "--posscaled=x,y   scale all values >= x by using y as exponent"
  , lkupfile = Nothing  &= help "developer option: if an RNAfold file with foldings exists, then use it"
  , showmanual = False  &= help "shows the manual"
  , everykth = 1 &= help "for optimal history sampling, choose every kth item only"
  }

oGenSequences = GenSequences
  { ancestralSequence = def
  , extantSequence = def
  , globalbackmutations = 0
  , backmutationcolumns = []
  , sequenceLimit = 1000000
  , workdb = def
  , prefixlength = 4
  , seqsperfile = 10000
  , alphabet = ""
  }

oBackmutation = Backmutation
  { ancestralSequence = def
  , extantSequence = def
  , workdb = def
  , position = -1
  , alphabet = ""
  , scoretype     = Centroid &= help "choose 'mfe', 'centroid', 'pairdistmfe', or 'pairdistcen' for the evaluation of each mutational step"
  , positivesquared = False &= help "square positive energies to penalize worse structures"
  , onlypositive  = False &= help "minimize only over penalties, not energy gains"
  }

main :: IO ()
main = do
  o <- cmdArgs $ modes [oMutationOrder &= auto, oGenSequences, oBackmutation] &= verbosity
  case o of
    MutationOrder{} → mainProgram o
    GenSequences{}  → genSequences o
    Backmutation{}  → runBackmutation o

-- | This is a simple wrapper around the RNA landscape creation. Landscape
-- creation generates all sequences between ancestral and extant sequence. It
-- will take into account additional global backmutations and further
-- backmutation columns. Note that this can *very easily* lead to combinatorial
-- explosion.
--
-- Needs extra options on: (i) globally active backmutations. For each position
-- try all four nucleotides. (ii) Additional active columns. For each position,
-- try all four nucleotides.

genSequences o = do
  let GenSequences{..} = o
  e ← runExceptT $ do
    when (null ancestralSequence) $ throwE "ancestral sequence file?"
    when (null extantSequence) $ throwE "extant sequence file?"
    when (null workdb) $ throwE "work db directory?"
    when (null alphabet) $ throwE "use --alphabet=ACGT (or ACGU if your fasta files are RNA-based)"
    a ← liftIO $ stupidReader ancestralSequence
    e ← liftIO $ stupidReader extantSequence
    (numSeqs, origs, sqs) ← createRNAlandscape2
                              alphabet
                              (Left $ GlobalBackmutations globalbackmutations)
                              (map BackmutationCol backmutationcolumns)
                              (Ancestral a) (Extant e)
    unless (numSeqs <= sequenceLimit) $ throwE $ "combinatiorial explosion (" ++ show numSeqs ++ "): reduce search space or allow for higher --sequencelimit"
    -- write out sequences
    let sdb = workdb </> "sequences"
    liftIO $ createDirectoryIfMissing True sdb
    writeSequenceFiles sdb prefixlength seqsperfile
      $ origs ++ concatMap (\(_,_,xs) → xs) sqs
  case e of
    Left err → print err >> exitFailure
    Right () → return ()
  {-
  ancestral <- stupidReader $ infiles !! 0
  current   <- stupidReader $ infiles !! 1
  let ls = snd $ createRNAlandscape Nothing False ancestral current
  forM_ ls $ \(k,sq) -> BS.putStrLn sq
  return ()
  -}

embeddedManual = $(embedFile "README.md")

mainProgram oOptions = do
  let MutationOrder{..} = oOptions
  when showmanual $ do
    BS.putStrLn embeddedManual
    exitSuccess
  unless (length infiles == 2) $ do
    BS.putStrLn embeddedManual
    putStrLn "\n\n\nThis program expects exactly two equal-length fasta files as input"
    exitFailure
  isL <- isLoud
  runMutationOrder isL fillweight fillstyle scoretype positivesquared posscaled onlypositive cooptcount cooptprint lkupfile outprefix workdb temperature equalStart infiles everykth

runBackmutation ∷ Options → IO ()
runBackmutation Backmutation{..} = do
  e ← runExceptT $ do
    when (null ancestralSequence) $ throwE "ancestral sequence file?"
    when (null extantSequence) $ throwE "extant sequence file?"
    when (null workdb) $ throwE "work db directory?"
    when (null alphabet) $ throwE "use --alphabet=ACGT (or ACGU if your fasta files are RNA-based)"
    a ← liftIO $ Ancestral <$> stupidReader ancestralSequence
    e ← liftIO $ Extant    <$> stupidReader extantSequence
    let scaleFun = case scoretype of
                      Centroid → centroidDelta' onlypositive positivesquared
                      Mfe → mfeDelta' onlypositive positivesquared
                      PairDistMfe → mfebpdist' onlypositive positivesquared
                      PairDistCen → centroidbpdist' onlypositive positivesquared
    runBackmutationVariants scaleFun workdb alphabet a e position
  case e of
    Left err → print (err ∷ String) >> exitFailure
    Right () → return ()
  return ()