{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE NoMonomorphismRestriction #-}
{-# LANGUAGE RecordWildCards #-}

module BioInf.RNAdesign where

import qualified Data.Array.IArray as A
import System.IO.Unsafe
import Control.Monad.IO.Class
import Control.Monad.Primitive.Class
import Control.Monad.Primitive
import System.Random.MWC.Monad
import Control.Monad
import qualified Data.Vector.Unboxed as VU
import qualified Data.Vector as V
import Data.List (sort,group)
import qualified Data.Map as M
import qualified Data.Vector.Fusion.Stream.Monadic as SM
import Control.Arrow
import System.IO.Unsafe -- TODO remove
import Data.List
import Data.Tuple.Select

import Biobase.Primary
import Biobase.Secondary.Diagrams
import Biobase.Secondary
import Biobase.Vienna
import qualified BioInf.ViennaRNA.Bindings  as RNA     -- NOTE removes the ability to call into ghci!
import BioInf.ViennaRNA.Eval

import BioInf.RNAdesign.Graph
import BioInf.RNAdesign.OptParser
import BioInf.RNAdesign.Assignment
import BioInf.RNAdesign.LogMultinomial

import Debug.Trace


-- A single candidate, with its sequence and the score, this sequence receives.
-- Candidates are ordered by their scores.

data Candidate = Candidate
  { candidate :: Primary
  , score :: Score
  } deriving (Eq,Show)

instance Ord Candidate where
  (Candidate _ a) <= (Candidate _ b) = ropt a <= ropt b

-- | Create an initial, legal, candidate. Give it a really bad score.

mkInitial :: (MonadPrim m, PrimMonad m) => Int -> DesignProblem -> Rand m Candidate
mkInitial l dp = do
  let z = VU.replicate l nA
  c <- foldM mutateOneAssignment z $ assignments dp
  return $ Candidate c (Score [] 999999)

{-
-- | Sum probabilities over base pairs in the structural constraints

sumProbStructures :: Primary -> [D1Secondary] -> Double
sumProbStructures inp ss = s where
  s = sum $ map ((bp A.!) . first (+1) . second (+1)) ps
  ps = concatMap snd (map fromD1S ss :: [(Int,[PairIdx])])
  bp = let (_,_,bp') = unsafePerformIO (RNA.part $ concatMap show $ VU.toList inp) in bp'

sumProbNotStructures :: Primary -> [D1Secondary] -> Double
sumProbNotStructures inp ss = undefined

probabilityDefect inp str = s where
  s = sum (map (bp A.!) ps) - sum (map (bp A.!) ups)
  ups = [ (i,j) | i<-[1..l], j<-[i..l] ] \\ ps
  (l,ps) = second (map (first (+1) . second (+1))) $ fromD1S str :: (Int,[PairIdx])
  bp = let (_,_,bp') = unsafePerformIO (RNA.part $ concatMap show $ VU.toList inp) in bp'
-}

probabilityDefectAll inp ss = s where
  ca :: A.Array (Int,Int) Double
  ca = A.amap (\c -> c / n) . A.accumArray (+) 0 ((1,1),(l,l)) $ zip ps (repeat 1)
  n = genericLength ss
--  s = sum (map (abs . (n-) . (bp A.!)) ps) + sum (map (bp A.!) ups)
  s = sum (map (\ix -> abs $ ca A.! ix - bp A.! ix) ps) + sum (map (bp A.!) ups)
  l = VU.length inp
  ups = [ (i,j) | i<-[1..l], j<-[i..l] ] \\ ps
  ps = map (first (+1) . second (+1)) $ concatMap snd (map fromD1S ss :: [(Int,[PairIdx])])
  bp = let (_,_,bp') = unsafePerformIO (RNA.part $ concatMap show $ VU.toList inp) in bp'

ensembleDefect inp str = s where
  s = n - 2 * sps - sus
  n = fromIntegral $ VU.length inp
  sps = sum $ map (bp A.!) ps
  sus = sum $ [bp A.! (i,j) | i <- us, j <- [i..n]]
  ps = map (first (+1) . second (+1)) $ snd $ (fromD1S str :: (Int,[PairIdx]))
  us = [1..n] \\ (map fst ps ++ map snd ps)
  (_,_,bp) = unsafePerformIO (RNA.part $ concatMap show $ VU.toList inp)

-- | Resolve the optimization task. Each possible optimization function is
-- given here. Try to keep the functions defined here in sync with some
-- (non-existent ;-) documentation.

resolveOpt :: String -> t -> Primary -> [D1Secondary] -> Double
resolveOpt optfun ener inp secs = parseOptString l sops mops gops props optfun where
  l = length secs
  sops =
    [ ("eos"   , \k -> unsafePerformIO $ RNA.eos (concatMap show (VU.toList inp)) (fromD1S $ secs !! (k-1)))
    , ("ed"    , \k -> ensembleDefect inp (secs !! (k-1))) -- ensemble defect
--    , ("pdef"  , \k -> probabilityDefect inp (secs !! (k-1)))
--    [ ("EOS",\k -> let (Deka e) = fst $ rnaEval ener inp $ secs !! (k-1) in fromIntegral e / 100)
--    , ("PF" ...
    ]
  mops =
    [ ("sum",sum)
    , ("max",maximum)
    , ("min",minimum)
    ]
  gops =
    [ ("Ged"   , probabilityDefectAll inp secs) -- global ensemble defect a la ``me''
    , ("gibbs" , sel1 . unsafePerformIO $ RNA.part (concatMap show (VU.toList inp)))
    , ("mfe"   , fst  . unsafePerformIO $ RNA.mfe (concatMap show (VU.toList inp)))
--    , ("Pin" , sumProbStructures inp secs)
    ]
  props =
    [ ("logMN", \ps -> lmn ps inp)
    ]

lmn ps inp = logMultinomial l p c where
  l   = VU.length inp
  p   = VU.fromList ps
  cM  = M.fromList . map (\z -> (head z, length z)) . group . sort $ VU.toList inp
  c   = VU.fromList $ map (\z -> M.findWithDefault 0 z cM) acgu

data Score = Score
  { eoss :: [Deka]
  , ropt :: Double
  } deriving (Eq,Show,Read)

instance Ord Score where
  (Score _ a) <= (Score _ b) = a<=b

scoreSequence :: String -> Vienna2004 -> DesignProblem -> Primary -> Score
scoreSequence optfun ener DesignProblem{..} s = score where
  score = Score
    { eoss = error "don't call this" -- map (fst . rnaEval ener s) structures
    , ropt = resolveOpt optfun ener s structures
    }

-- | This structure defines a "design problem"

data DesignProblem = DesignProblem
  { structures  :: [D1Secondary]
  , assignments :: [Assignment]
  } deriving (Eq,Read,Show)

-- | Given a set of structures, create the set of independent graphs and
-- assignment possibilities.

mkDesignProblem :: Int -> [String] -> [String] -> DesignProblem
mkDesignProblem asnLimit xs scs = dp where
  dp = DesignProblem
        { structures  = map mkD1S xs
        , assignments = as
        }
  gs = independentGraphs xs
  as = map (allCandidates asnLimit sv) gs
  ss = M.map fixup . M.unionsWith (++) $ map (M.fromList . zip [0..] . (map ((:[]). mkNuc))) scs
  sv = V.fromList $ map (\k -> M.findWithDefault acgu k ss) [0 .. length (head xs) - 1]
  fixup zs = filter (/=nN) $ if (all (==nN) zs) then acgu else zs

unfoldStreamNew
  :: forall m . (MonadPrim m, PrimMonad m)
  => Int -> Int -> Int -> (Primary -> Score) -> (Candidate -> Candidate -> Rand m Bool) -> DesignProblem -> Candidate -> SM.Stream (Rand m) Candidate
unfoldStreamNew burnin number thin score f dp = go where
  go s  = SM.map snd                                                          -- remove remaining indices from stream
        . SM.take number                                                      -- take the number of sequences we want
        . SM.filter ((==0) . flip mod thin . fst)                             -- keep only every thin'th sequence
        . SM.indexed                                                          -- add index
        . SM.drop burnin                                                      -- drop the burnin sequences
        . SM.drop 1                                                           -- drop original input
        . SM.scanlM' (mutateOneAssignmentCandidateWith score f) s             -- starting with 's', mutate s further and further using cycled assignments
        $ SM.unfoldr (Just . first head . splitAt 1) (cycle $ assignments dp) -- create inifinite cycled assignments

unfoldStream
  :: forall m . (MonadPrim m, PrimMonad m)
  => Int -> Int -> Int -> (Primary -> Primary -> Rand m Bool) -> DesignProblem -> Primary -> SM.Stream (Rand m) Primary
unfoldStream burnin number thin f dp = go where
  go s  = SM.map snd                                                          -- remove remaining indices from stream
        . SM.take number                                                      -- take the number of sequences we want
        . SM.filter ((==0) . flip mod thin . fst)                             -- keep only every thin'th sequence
        . SM.indexed                                                          -- add index
        . SM.drop burnin                                                      -- drop the burnin sequences
        . SM.drop 1                                                           -- drop original input
        . SM.scanlM' (mutateOneAssignmentWith f) s                            -- starting with 's', mutate s further and further using cycled assignments
        $ SM.unfoldr (Just . first head . splitAt 1) (cycle $ assignments dp) -- create inifinite cycled assignments

-- | Mutate the sequence in a candidate

mutateOneAssignmentCandidateWith
  :: (MonadPrim m, PrimMonad m)
  => (Primary -> Score) -> (Candidate -> Candidate -> Rand m Bool) -> Candidate -> Assignment -> Rand m Candidate
mutateOneAssignmentCandidateWith score f old Assignment{..} = do
  i <- uniformR (0,V.length assignment -1) -- inclusive range for Int
  let cs = VU.zip columns (assignment V.! i)
  let nw = VU.update (candidate old) cs
  let new = Candidate nw (score nw)
  b <- f old new
  return $ if b then new else old

-- | Mutate the sequence using one assignment with evaluation function.

mutateOneAssignmentWith
  :: (MonadPrim m, PrimMonad m)
  => (Primary -> Primary -> Rand m Bool) -> Primary -> Assignment -> Rand m Primary
mutateOneAssignmentWith f old Assignment{..} = do
  i <- uniformR (0,V.length assignment -1) -- inclusive range for Int
  let cs = VU.zip columns (assignment V.! i)
  let new = VU.update old cs
  b <- f old new
  return $ if b then new else old

-- | Create a number of sequences, thinning the list of candidates to yield
-- more independent candidates. The optimization function is used to make the
-- choice between emitting the current candidate again and selecting a new one.

generateSequences
  :: (MonadPrim m, PrimMonad m)
  => Int -> Int -> (Primary -> Primary -> Rand m Bool) -> DesignProblem -> Primary -> Rand m [Primary]
generateSequences number thin f dp s = go number thin s where
  go n t s
    | n < 1 = return []
    | t == 0 = do s' <- mutateSequence f dp s
                  ss <- go (n-1) thin s'
                  return $ s' : ss
    | otherwise = mutateSequence f dp s >>= go n (t-1)

-- | Mutate a sequence using the possible assignments.

mutateSequence
  :: (MonadPrim m, PrimMonad m)
  => (Primary -> Primary -> Rand m Bool) -> DesignProblem -> Primary -> Rand m Primary
mutateSequence f dp old = do
  new <- foldM mutateOneAssignment old $ assignments dp
  b <- f old new
  return $ if b then new else old

-- | Mutate the sequence using one assignment.

mutateOneAssignment
  :: (MonadPrim m, PrimMonad m)
  => Primary -> Assignment -> Rand m Primary
mutateOneAssignment s Assignment{..} = do
  i <- uniformR (0,V.length assignment -1) -- inclusive range for Int
  let cs = VU.zip columns (assignment V.! i)
  return $ VU.update s cs