{-# LANGUAGE CPP, MultiParamTypeClasses, FlexibleInstances, RankNTypes, TupleSections #-}
module Game.Hanabi.Strategies.MCSearch(MCSearch(..), mcs) where
import System.Random
import Control.Monad(zipWithM, MonadPlus(..))
#ifdef DEBUG
import Debug.Trace
#endif
#ifdef TEST
import Test.QuickCheck hiding (shuffle)
import Data.List(maximumBy, delete, tails, permutations, sort)
#else
import Data.List(maximumBy, delete, tails, permutations)
#endif
import Data.Function(on)
import Data.Array hiding (index)
import Data.Maybe(isNothing)
import Game.Hanabi

type BeliefState s = [([State], [s], Probability)]

-- naive alternative
eval :: (Monad m, Strategy s m) => Bool -> BeliefState s -> ([State]->[s]->m (EndGame, [State], [Move])) -> m Probability
eval False samples run = mapM (\ (sts, ps, _)    ->                run sts ps) samples >>= \tups -> return $ sum [ egToNum st eg | (eg,st:_,_) <- tups ]
eval True  samples run = mapM (\ (sts, ps, prob) -> fmap (,prob) $ run sts ps) samples >>= \tups -> return $ sum [ egToNum st eg * prob | ((eg,st:_,_), prob) <- tups ]

-- slightly naive alternative
{-
evalALittleQuick :: (Monad m, Strategy s m) => Bool -> Probability -> Probability -> Probability -> Probability -> BeliefState s -> ([State]->[s]->m (EndGame, [State], [Move])) -> m Probability
evalALittleQuick weighted bestPossibleScore bestPossibleSum bestSumSoFar sumSoFar []                 run = return sumSoFar
evalALittleQuick weighted bestPossibleScore bestPossibleSum bestSumSoFar sumSoFar ((sts,ps,prob):ss) run = do
                                                                                  ~(eg, st:_, _) <- run sts ps
                                                                                  let weight | weighted  = prob
                                                                                             | otherwise = 1    -- Specializing to this case should not contribute to the efficiency, because the @run@ part IS the bottleneck.
                                                                                  let newSum = egToNum st eg * weight + sumSoFar
                                                                                      newRemainingSum = bestPossibleSum - bestPossibleScore * weight
                                                                                  if bestSumSoFar < newSum + newRemainingSum then evalALittleQuick weighted bestPossibleScore newRemainingSum bestSumSoFar newSum ss run else return (-1)
-}
evalALittleQuick :: (Monad m, Strategy s m) => Bool -> Probability -> Probability -> Probability -> BeliefState s -> ([State]->[s]->m (EndGame, [State], [Move])) -> m Probability
evalALittleQuick weighted bestPossibleScore bestPossibleMargin sumSoFar []                 run = return sumSoFar
evalALittleQuick weighted bestPossibleScore bestPossibleMargin sumSoFar ((sts,ps,prob):ss) run = do
                                                                                  ~(eg, st:_, _) <- run sts ps
                                                                                  let weight | weighted  = prob
                                                                                             | otherwise = 1    -- Specializing to this case should not contribute to the efficiency, because the @run@ part IS the bottleneck.
                                                                                  let score  = egToNum st eg
                                                                                      newSum = score * weight + sumSoFar
                                                                                      newPossibleMargin = bestPossibleMargin - (bestPossibleScore - score) * weight
                                                                                  if newPossibleMargin>0 then evalALittleQuick weighted bestPossibleScore newPossibleMargin newSum ss run else return (-1)

evalQuick :: (Monad m, Strategy s m) => Bool -> Probability -> Probability -> Probability -> BeliefState s -> ([State]->[s]->m (EndGame, [State], [Move])) -> m Probability
evalQuick weighted bestPossibleMargin bestPossibleScore sumSoFar []                 run = return sumSoFar
evalQuick weighted bestPossibleMargin bestPossibleScore sumSoFar ((sts,ps,prob):ss) run
  | bestPossibleMargin <= 0 = return (-1)
  | otherwise = do ~(eg, st:_, _) <- run sts ps
                   let score  = egToNum st eg
                       weight | weighted  = prob
                              | otherwise = 1    -- Specializing to this case should not contribute to the efficiency, because the @run@ part IS the bottleneck.
                   evalQuick weighted (bestPossibleMargin - (bestPossibleScore-score)*weight) bestPossibleScore (score*weight + sumSoFar) ss run

findBestMove :: (Monad m, Strategy s m) => Bool -> Probability -> Probability -> BeliefState s -> (Move -> [State]->[s]->m (EndGame, [State], [Move])) -> (Probability, Move) -> [Move] -> m (Probability, Move)
findBestMove weighted achievable bestPossibleScore samples run best              []     = return best
findBestMove weighted achievable bestPossibleScore samples run best@(bestSum, _) (m:ms) = do
                                                                                    -- sumScore <- evalALittleQuick weighted bestPossibleScore (achievable-bestSum) 0 samples (run m)  -- a little naive
                                                                                    sumScore <- evalQuick weighted (achievable-bestSum) bestPossibleScore 0 samples (run m)
                                                                                    findBestMove weighted achievable bestPossibleScore samples run (if sumScore > bestSum then (sumScore, m) else best) ms

enumerate :: BeliefState s -> BeliefState s
enumerate bs = [ (s{pile=cards}:ss, ps, n) | (s:ss,ps,n) <- bs, cards <- permutations $ pile s ]

-- The "Probability" in the returned "BeliefState" does not mean anything.
mkSamples :: (RandomGen g) => Int -> BeliefState s -> g -> (BeliefState s, g)
mkSamples numSamples beliefstate gen = samples numCases numSamples (listArray (1, length beliefAcc) beliefAcc) gen
  where beliefAcc = scanl1 (\(_,_,m) (ss,ps,n) -> (ss,ps,m+n)) beliefstate
        (_,_,numCases) = last beliefAcc
samples :: (RandomGen g) => Probability -> Int -> Array Int ([State], [s], Probability) -> g -> (BeliefState s, g)
samples numCases 0          _         gen =     ([], gen)
samples numCases numSamples beliefAcc gen =     let (r, g1) = randomR (1, numCases) gen
                                                    (sts,ps,n) = binSearchOn (\(_,_,m) -> m) beliefAcc r
                                                    (sts',g2) = case sts of [] -> ([], g1)  -- should not happen
                                                                            s:ss -> (s{pile=cards}:ss, g2)
                                                                              where (cards,g2) = shuffle (pile s) g1
                                                    (tups, g3) = samples numCases (pred numSamples) beliefAcc g2
                                                in ((sts',ps,n):tups, g3)

#ifdef TEST
prop_binSearchOn fun xs val = not (null xs) && val >= 1 ==> let ys = sort xs in val > fun (last ys) ||
                                binSearchOn fun (listArray (1, length ys) ys) val == head (dropWhile (<val) ys)
#endif
#ifdef DEBUG
#else
trace _ = id
#endif

binSearchOn :: (a -> Probability) -> Array Int a -> Probability -> a
binSearchOn fun arr val = bso (bounds arr)
  where bso (l,r) | l == r    = arr ! l
                  | otherwise = bso $ if fun (arr ! m) < val then (succ m, r) else (l, m)
          where m = (l+r) `div` 2


mcs f n g sp p ps = MCS f n g sp p ps []

-- | @'MCS' f c sp p [s] memory@ usually behaves based on @p@, but it conducts the exhaustive search assuming that others behave based on @[s]@ when @f@ returns True.
--   The strategy @sp@ suggests a desired move in order to prioritize a promising strategy, and is used as the rollout strategy, too.
--   @c@ or @rolloutCond@ is used to decide when to start rollout, such as @\start end -> turn end - turn start > 1@.
data MCSearch g sp s = MCS { fromWhenMC::PublicInfo->Bool
                           , rolloutSampleTurns :: Int        -- ^ (number of rollout samples) x ((number of cards at the deck) + (number of players))
                           , rolloutRNG :: g, suggestedStrategyMC::sp, rolloutStrategy::s, rolloutStrategies::[s], beliefState :: [([State],[s],Probability)]}

instance (RandomGen g, Monad m, Strategy sp m, Strategy s m) => Strategy (MCSearch g sp s) m where
  strategyName ms = return "MCSearch"
  move pvs@(pv:_) mvs str@(MCS f num gen sp p ps bs)
    | f pub     = do statess <- case splitAt (pred numP) mvs of
                                  (tk,m:_) | not $ null bs -> do
                                                 let moves = m : reverse tk

                                                 let filteredbs = [
                                                                    (tup, ({- reverse states ++ lststs -} states, lstps {- ここが違うので、statelessじゃないと正しく動かない -}, prob))
                                                              | tup@(lststs@(lastState:_), lstps, prob) <- bs

                                                              , states@(guessedSt:_) <- foldl (\statss mov -> statss >>= \stats@(stat:_) -> map (:stats) (map (rotate 1) $ proceeds stat mov)) [lststs] moves
                                                              , let guessedPV = view guessedSt
                                                              , isNothing $ checkEndGame $ publicView guessedPV    -- The game should not have ended, and          -- Note that earlyQuit will change this result.
                                                                                                                   -- guessedPV == pv, which can be checked just by checking if
                                                              , ((==) `on` (nonPublic . publicView)) guessedPV pv  -- the revealed sets agree,
                                                              , ((==) `on` (map (take 1) . handsPV)) guessedPV pv  -- the newest (or leftmost) cards agree, and
                                                              , ((==) `on` (map marks . head . annotations . publicView)) guessedPV pv      -- the marks on my hand agree.
                                                              ]
                                                 let mvss = reverse $ take (pred numP) $ tails mvs
                                                 resultss <- mapM (checkMoves numP mvss) filteredbs
                                                 let filtRes = concat resultss

                                                 return $ if null filtRes then
--                                                    trace "using filteredbs"
                                                    map snd filteredbs  -- error "null filtRes"
                                                    else -- trace "filtRes"
                                                         filtRes

                                  _           -> -- trace "bottom" $
                                                 return alternativeStatess
                     if null statess then do
                       (m,q) <- move pvs mvs p
                       return (m, MCS f num gen sp q ps alternativeStatess)
                       else do
                       (sq,(_i, m),g2) <- mcMove (num `div` (pileNum pub + numP)) gen statess p pvs' mvs sp
                       return (m, MCS f num g2 sq p ps statess)
    | otherwise = do (m,q) <- move pvs mvs p
                     return (m, MCS f num gen sp q ps alternativeStatess)
    where pvs'@(hdpv:tlpvs) = pvs -- [ pv{publicView=pub{gameSpec=gs{rule=r{earlyQuit=True}}}} | pv@PV{publicView=pub@PI{gameSpec=gs@GS{rule=r}}}<- pvs ]
          pub = publicView hdpv
          numP = numPlayers $ gameSpec pub
          alternativeStatess = [ (stateToStateHistory (map publicView tlpvs) mvs state, take (pred numP) ps, fromIntegral n)  | (state, n) <- possibleStates hdpv ]

-- seeIf theSt st = view st == view theSt && hands st == hands theSt -- これだとixDeckの違いを反映してしまう。
seeIf theSt@St{publicState=thePub} st@St{publicState=pub} = thePub{annotations=[]} == pub{annotations=[]} && hands st == hands theSt && ((==) `on` (map (map (\a -> (marks a, possibilities a))) . annotations)) pub thePub


mcMove :: (RandomGen g, Monad m, Strategy sp m, Strategy s m) =>
                  Int                  -- ^ number of samples for rollout
               -> g                    -- ^ RandomGen used for rollout
               -> [([State],[s],Probability)]   -- ^ possible pairs of the state history and the internal memory states of other players' strategies
               -> s             -- ^ rollout strategy
               -> [PrivateView] -- ^ view history
               -> [Move]        -- ^ move history
               -> sp             -- ^ default (recommended) strategy
               -> m (sp, (Probability, Move), g)             -- Probabilityはスコアだけどいらんといえばいらん。
mcMove num gen statess p pvs@(pv:_) mvs sp = do
  (defaultMove, sq) <- move pvs mvs sp
  let candidateMoves = defaultMove : delete defaultMove (validMoves pv)
  let pub = publicView pv
      numPermutations = product [1 .. fromIntegral $ pileNum pub]
      numStates = fromIntegral (length statess) * numPermutations
      exhaustive = numStates <= fromIntegral num    -- If the number of distinct states is less or equal to the number of samples, exhaustive enumeration should be done instead of Monte-Carlo evaluation.
      (smpls, g1) | exhaustive = (enumerate statess, g1)
                  | otherwise  = mkSamples num statess gen
      bestPossibleScore = fromIntegral $ moreStrictlyAchievableScore pub
      achievable | exhaustive = bestPossibleScore * sum [prob | (_,_,prob) <- statess] * numPermutations
                 | otherwise  = bestPossibleScore * fromIntegral num
        --  ToDo:  Also consider critical cards at the bottom deck.
#ifdef DEBUG
-- This is the naive alternative.
  scores <- mapM (\m -> eval exhaustive smpls (\sts ps -> fmap fst $ tryAMove sts mvs (ps++[p]) m)) candidateMoves
  let asc = zip scores candidateMoves :: [(Probability, Move)]
  if any ((>achievable) . fst) asc then error ("turn = "++show (turn pub) ++ "\n asc = "++show asc++"\n achievable = "++show achievable) else
    trace ("turn = "++show (turn pub) ++ "\n asc = "++show asc++"\n achievable = "++show achievable) $
    return $
           (sq,
            case lookup achievable asc of Nothing -> maximumBy (compare `on` fst) $ reverse asc
                                          Just k  -> (achievable, k) -- Stop search when the best possible score is found.
           , g1)
#else
  best <- findBestMove exhaustive achievable bestPossibleScore smpls (\m sts ps -> fmap fst $ tryAMove sts mvs (ps++[p]) m) (-1, error "findBestMove: not found") candidateMoves
  return (sq, best, g1)
#endif


-- | 'tryAMove' tries a 'Move' and then simulate the game to the end, using given 'Strategies'. Running this with empty history, such as @tryAMove [st] [] strs m@ is possible, but that assumes other strategies does not depend on the history.
tryAMove :: (Monad m, Strategy s m) => [State] -> [Move] -> [s] -> Move -> m ((EndGame, [State], [Move]),[s])
tryAMove states@(st:_) mvs strs mov = case proceed st mov of Nothing -> error $ show mov ++ ": invalid move in tryAMove"
                                                             Just st -> let nxt = rotate 1 st
                                                                        in case checkEndGame $ publicState nxt of Nothing -> runSilently (nxt:states) (mov:mvs) strs
                                                                                                                  Just eg -> return ((eg, nxt:states, mov:mvs), strs)

type Probability = Integer

checkMoves :: (Monad m, Strategy s m, MonadPlus l) => Int -> [[Move]] -> (([State], [s], p),([State], [s], p)) -> m (l ([State], [s], p))
checkMoves numP mvss ((_, ps, _), (states, _, x)) = do
  let statess = reverse $ take numP $ tails states
  mbqs <- checkMvs statess mvss ps
  return $ fmap (\qs -> (states, qs, x)) mbqs
checkMvs :: (Monad m, Strategy s m, MonadPlus l) => [[State]] -> [[Move]] -> [s] -> m (l [s])
checkMvs (sts:stss) ((m:ms):mss) (p:ps) = do (mv,q) <- move (viewStates sts) ms p
                                             if mv /= m then return mzero else do
                                                   mbqs <- checkMvs stss mss ps
                                                   return $ fmap (q:) mbqs
checkMvs _ _ _ = return $ return []