{-# LANGUAGE CPP, MultiParamTypeClasses, FlexibleInstances, RankNTypes, TupleSections #-} module Game.Hanabi.Strategies.MCSearch(MCSearch(..), mcs) where import System.Random import Control.Monad(zipWithM, MonadPlus(..)) #ifdef DEBUG import Debug.Trace #endif #ifdef TEST import Test.QuickCheck hiding (shuffle) import Data.List(maximumBy, delete, tails, permutations, sort) #else import Data.List(maximumBy, delete, tails, permutations) #endif import Data.Function(on) import Data.Array hiding (index) import Data.Maybe(isNothing) import Game.Hanabi type BeliefState s = [([State], [s], Probability)] -- naive alternative eval :: (Monad m, Strategy s m) => Bool -> BeliefState s -> ([State]->[s]->m (EndGame, [State], [Move])) -> m Probability eval False samples run = mapM (\ (sts, ps, _) -> run sts ps) samples >>= \tups -> return $ sum [ egToNum st eg | (eg,st:_,_) <- tups ] eval True samples run = mapM (\ (sts, ps, prob) -> fmap (,prob) $ run sts ps) samples >>= \tups -> return $ sum [ egToNum st eg * prob | ((eg,st:_,_), prob) <- tups ] -- slightly naive alternative {- evalALittleQuick :: (Monad m, Strategy s m) => Bool -> Probability -> Probability -> Probability -> Probability -> BeliefState s -> ([State]->[s]->m (EndGame, [State], [Move])) -> m Probability evalALittleQuick weighted bestPossibleScore bestPossibleSum bestSumSoFar sumSoFar [] run = return sumSoFar evalALittleQuick weighted bestPossibleScore bestPossibleSum bestSumSoFar sumSoFar ((sts,ps,prob):ss) run = do ~(eg, st:_, _) <- run sts ps let weight | weighted = prob | otherwise = 1 -- Specializing to this case should not contribute to the efficiency, because the @run@ part IS the bottleneck. let newSum = egToNum st eg * weight + sumSoFar newRemainingSum = bestPossibleSum - bestPossibleScore * weight if bestSumSoFar < newSum + newRemainingSum then evalALittleQuick weighted bestPossibleScore newRemainingSum bestSumSoFar newSum ss run else return (-1) -} evalALittleQuick :: (Monad m, Strategy s m) => Bool -> Probability -> Probability -> Probability -> BeliefState s -> ([State]->[s]->m (EndGame, [State], [Move])) -> m Probability evalALittleQuick weighted bestPossibleScore bestPossibleMargin sumSoFar [] run = return sumSoFar evalALittleQuick weighted bestPossibleScore bestPossibleMargin sumSoFar ((sts,ps,prob):ss) run = do ~(eg, st:_, _) <- run sts ps let weight | weighted = prob | otherwise = 1 -- Specializing to this case should not contribute to the efficiency, because the @run@ part IS the bottleneck. let score = egToNum st eg newSum = score * weight + sumSoFar newPossibleMargin = bestPossibleMargin - (bestPossibleScore - score) * weight if newPossibleMargin>0 then evalALittleQuick weighted bestPossibleScore newPossibleMargin newSum ss run else return (-1) evalQuick :: (Monad m, Strategy s m) => Bool -> Probability -> Probability -> Probability -> BeliefState s -> ([State]->[s]->m (EndGame, [State], [Move])) -> m Probability evalQuick weighted bestPossibleMargin bestPossibleScore sumSoFar [] run = return sumSoFar evalQuick weighted bestPossibleMargin bestPossibleScore sumSoFar ((sts,ps,prob):ss) run | bestPossibleMargin <= 0 = return (-1) | otherwise = do ~(eg, st:_, _) <- run sts ps let score = egToNum st eg weight | weighted = prob | otherwise = 1 -- Specializing to this case should not contribute to the efficiency, because the @run@ part IS the bottleneck. evalQuick weighted (bestPossibleMargin - (bestPossibleScore-score)*weight) bestPossibleScore (score*weight + sumSoFar) ss run findBestMove :: (Monad m, Strategy s m) => Bool -> Probability -> Probability -> BeliefState s -> (Move -> [State]->[s]->m (EndGame, [State], [Move])) -> (Probability, Move) -> [Move] -> m (Probability, Move) findBestMove weighted achievable bestPossibleScore samples run best [] = return best findBestMove weighted achievable bestPossibleScore samples run best@(bestSum, _) (m:ms) = do -- sumScore <- evalALittleQuick weighted bestPossibleScore (achievable-bestSum) 0 samples (run m) -- a little naive sumScore <- evalQuick weighted (achievable-bestSum) bestPossibleScore 0 samples (run m) findBestMove weighted achievable bestPossibleScore samples run (if sumScore > bestSum then (sumScore, m) else best) ms enumerate :: BeliefState s -> BeliefState s enumerate bs = [ (s{pile=cards}:ss, ps, n) | (s:ss,ps,n) <- bs, cards <- permutations $ pile s ] -- The "Probability" in the returned "BeliefState" does not mean anything. mkSamples :: (RandomGen g) => Int -> BeliefState s -> g -> (BeliefState s, g) mkSamples numSamples beliefstate gen = samples numCases numSamples (listArray (1, length beliefAcc) beliefAcc) gen where beliefAcc = scanl1 (\(_,_,m) (ss,ps,n) -> (ss,ps,m+n)) beliefstate (_,_,numCases) = last beliefAcc samples :: (RandomGen g) => Probability -> Int -> Array Int ([State], [s], Probability) -> g -> (BeliefState s, g) samples numCases 0 _ gen = ([], gen) samples numCases numSamples beliefAcc gen = let (r, g1) = randomR (1, numCases) gen (sts,ps,n) = binSearchOn (\(_,_,m) -> m) beliefAcc r (sts',g2) = case sts of [] -> ([], g1) -- should not happen s:ss -> (s{pile=cards}:ss, g2) where (cards,g2) = shuffle (pile s) g1 (tups, g3) = samples numCases (pred numSamples) beliefAcc g2 in ((sts',ps,n):tups, g3) #ifdef TEST prop_binSearchOn fun xs val = not (null xs) && val >= 1 ==> let ys = sort xs in val > fun (last ys) || binSearchOn fun (listArray (1, length ys) ys) val == head (dropWhile ( Probability) -> Array Int a -> Probability -> a binSearchOn fun arr val = bso (bounds arr) where bso (l,r) | l == r = arr ! l | otherwise = bso $ if fun (arr ! m) < val then (succ m, r) else (l, m) where m = (l+r) `div` 2 mcs f n g sp p ps = MCS f n g sp p ps [] -- | @'MCS' f c sp p [s] memory@ usually behaves based on @p@, but it conducts the exhaustive search assuming that others behave based on @[s]@ when @f@ returns True. -- The strategy @sp@ suggests a desired move in order to prioritize a promising strategy, and is used as the rollout strategy, too. -- @c@ or @rolloutCond@ is used to decide when to start rollout, such as @\start end -> turn end - turn start > 1@. data MCSearch g sp s = MCS { fromWhenMC::PublicInfo->Bool , rolloutSampleTurns :: Int -- ^ (number of rollout samples) x ((number of cards at the deck) + (number of players)) , rolloutRNG :: g, suggestedStrategyMC::sp, rolloutStrategy::s, rolloutStrategies::[s], beliefState :: [([State],[s],Probability)]} instance (RandomGen g, Monad m, Strategy sp m, Strategy s m) => Strategy (MCSearch g sp s) m where strategyName ms = return "MCSearch" move pvs@(pv:_) mvs str@(MCS f num gen sp p ps bs) | f pub = do statess <- case splitAt (pred numP) mvs of (tk,m:_) | not $ null bs -> do let moves = m : reverse tk let filteredbs = [ (tup, ({- reverse states ++ lststs -} states, lstps {- ここが違うので、statelessじゃないと正しく動かない -}, prob)) | tup@(lststs@(lastState:_), lstps, prob) <- bs , states@(guessedSt:_) <- foldl (\statss mov -> statss >>= \stats@(stat:_) -> map (:stats) (map (rotate 1) $ proceeds stat mov)) [lststs] moves , let guessedPV = view guessedSt , isNothing $ checkEndGame $ publicView guessedPV -- The game should not have ended, and -- Note that earlyQuit will change this result. -- guessedPV == pv, which can be checked just by checking if , ((==) `on` (nonPublic . publicView)) guessedPV pv -- the revealed sets agree, , ((==) `on` (map (take 1) . handsPV)) guessedPV pv -- the newest (or leftmost) cards agree, and , ((==) `on` (map marks . head . annotations . publicView)) guessedPV pv -- the marks on my hand agree. ] let mvss = reverse $ take (pred numP) $ tails mvs resultss <- mapM (checkMoves numP mvss) filteredbs let filtRes = concat resultss return $ if null filtRes then -- trace "using filteredbs" map snd filteredbs -- error "null filtRes" else -- trace "filtRes" filtRes _ -> -- trace "bottom" $ return alternativeStatess if null statess then do (m,q) <- move pvs mvs p return (m, MCS f num gen sp q ps alternativeStatess) else do (sq,(_i, m),g2) <- mcMove (num `div` (pileNum pub + numP)) gen statess p pvs' mvs sp return (m, MCS f num g2 sq p ps statess) | otherwise = do (m,q) <- move pvs mvs p return (m, MCS f num gen sp q ps alternativeStatess) where pvs'@(hdpv:tlpvs) = pvs -- [ pv{publicView=pub{gameSpec=gs{rule=r{earlyQuit=True}}}} | pv@PV{publicView=pub@PI{gameSpec=gs@GS{rule=r}}}<- pvs ] pub = publicView hdpv numP = numPlayers $ gameSpec pub alternativeStatess = [ (stateToStateHistory (map publicView tlpvs) mvs state, take (pred numP) ps, fromIntegral n) | (state, n) <- possibleStates hdpv ] -- seeIf theSt st = view st == view theSt && hands st == hands theSt -- これだとixDeckの違いを反映してしまう。 seeIf theSt@St{publicState=thePub} st@St{publicState=pub} = thePub{annotations=[]} == pub{annotations=[]} && hands st == hands theSt && ((==) `on` (map (map (\a -> (marks a, possibilities a))) . annotations)) pub thePub mcMove :: (RandomGen g, Monad m, Strategy sp m, Strategy s m) => Int -- ^ number of samples for rollout -> g -- ^ RandomGen used for rollout -> [([State],[s],Probability)] -- ^ possible pairs of the state history and the internal memory states of other players' strategies -> s -- ^ rollout strategy -> [PrivateView] -- ^ view history -> [Move] -- ^ move history -> sp -- ^ default (recommended) strategy -> m (sp, (Probability, Move), g) -- Probabilityはスコアだけどいらんといえばいらん。 mcMove num gen statess p pvs@(pv:_) mvs sp = do (defaultMove, sq) <- move pvs mvs sp let candidateMoves = defaultMove : delete defaultMove (validMoves pv) let pub = publicView pv numPermutations = product [1 .. fromIntegral $ pileNum pub] numStates = fromIntegral (length statess) * numPermutations exhaustive = numStates <= fromIntegral num -- If the number of distinct states is less or equal to the number of samples, exhaustive enumeration should be done instead of Monte-Carlo evaluation. (smpls, g1) | exhaustive = (enumerate statess, g1) | otherwise = mkSamples num statess gen bestPossibleScore = fromIntegral $ moreStrictlyAchievableScore pub achievable | exhaustive = bestPossibleScore * sum [prob | (_,_,prob) <- statess] * numPermutations | otherwise = bestPossibleScore * fromIntegral num -- ToDo: Also consider critical cards at the bottom deck. #ifdef DEBUG -- This is the naive alternative. scores <- mapM (\m -> eval exhaustive smpls (\sts ps -> fmap fst $ tryAMove sts mvs (ps++[p]) m)) candidateMoves let asc = zip scores candidateMoves :: [(Probability, Move)] if any ((>achievable) . fst) asc then error ("turn = "++show (turn pub) ++ "\n asc = "++show asc++"\n achievable = "++show achievable) else trace ("turn = "++show (turn pub) ++ "\n asc = "++show asc++"\n achievable = "++show achievable) $ return $ (sq, case lookup achievable asc of Nothing -> maximumBy (compare `on` fst) $ reverse asc Just k -> (achievable, k) -- Stop search when the best possible score is found. , g1) #else best <- findBestMove exhaustive achievable bestPossibleScore smpls (\m sts ps -> fmap fst $ tryAMove sts mvs (ps++[p]) m) (-1, error "findBestMove: not found") candidateMoves return (sq, best, g1) #endif -- | 'tryAMove' tries a 'Move' and then simulate the game to the end, using given 'Strategies'. Running this with empty history, such as @tryAMove [st] [] strs m@ is possible, but that assumes other strategies does not depend on the history. tryAMove :: (Monad m, Strategy s m) => [State] -> [Move] -> [s] -> Move -> m ((EndGame, [State], [Move]),[s]) tryAMove states@(st:_) mvs strs mov = case proceed st mov of Nothing -> error $ show mov ++ ": invalid move in tryAMove" Just st -> let nxt = rotate 1 st in case checkEndGame $ publicState nxt of Nothing -> runSilently (nxt:states) (mov:mvs) strs Just eg -> return ((eg, nxt:states, mov:mvs), strs) type Probability = Integer checkMoves :: (Monad m, Strategy s m, MonadPlus l) => Int -> [[Move]] -> (([State], [s], p),([State], [s], p)) -> m (l ([State], [s], p)) checkMoves numP mvss ((_, ps, _), (states, _, x)) = do let statess = reverse $ take numP $ tails states mbqs <- checkMvs statess mvss ps return $ fmap (\qs -> (states, qs, x)) mbqs checkMvs :: (Monad m, Strategy s m, MonadPlus l) => [[State]] -> [[Move]] -> [s] -> m (l [s]) checkMvs (sts:stss) ((m:ms):mss) (p:ps) = do (mv,q) <- move (viewStates sts) ms p if mv /= m then return mzero else do mbqs <- checkMvs stss mss ps return $ fmap (q:) mbqs checkMvs _ _ _ = return $ return []