-- The old LazyMC.hs is moved to NaiveLazyMC.hs. Also note that LazyMC.LotsOfComments was for NaiveLazyMC.hs {-# LANGUAGE CPP, MultiParamTypeClasses, FlexibleInstances, RankNTypes, TupleSections, FlexibleContexts #-} module Game.Hanabi.Strategies.LazyMC(LazyMC(..), lmcs, glmcs, flatlmcs, Priority(..), mkPool) where import System.Random import Control.Monad(MonadPlus(..)) import Data.Functor.Identity #ifdef TEST import Test.QuickCheck hiding (shuffle, (.&.)) import Data.List(partition, maximumBy, delete, tails, permutations, transpose, sort) #else import Data.List(partition, maximumBy, delete, tails, permutations, transpose) #endif import Data.Bits hiding (rotate) import Data.Function(on) import Data.Array hiding (index) import Data.Maybe(isNothing, isJust, fromJust, maybeToList) import qualified Data.IntMap as IM import Game.Hanabi -- only used by the fallback. import Game.Hanabi.Strategies.EndGameSearch import Game.Hanabi.Strategies.EndGameLite(egml, egl, EndGameMirrorLite(..), EndGameLite(..)) import Game.Hanabi.Strategies.Stateless #ifdef DEBUG import Debug.Trace #else trace _ = id #endif -- type FullState s = ([State], [s]) -- type BeliefState s = [(FullState s, Probability)] -- mkPool is a function that helps the user to create the pool of strategies based on the list of lists of strategies that are annotated with the probability with which each list of strategies is selected. mkPool :: [([s],Double)] -> [(Array Int s, Double)] mkPool ts = zip [ listArray (1, length ss) ss | (ss,_) <- ts ] ps where probs = map snd ts _:ps = scanl (-) 1 probs samplePairsOfStatesAndStrategies :: (RandomGen g, Strategy s Identity) => Either (Array Int [s]) [[(Array Int s, Double)]] -> [PrivateView] -> [Move] -> g -> [([State],[s])] samplePairsOfStatesAndStrategies pool pvs mvs gen = case split gen of (g1,g2) -> zip (sampleStatess pvs mvs g1) $ case pool of Left pss -> samplesFromArr pss g2 Right tss -> sampleStrategiess tss g2 samplesFromArr :: (RandomGen g) => Array Int ss -> g -> [ss] samplesFromArr arr g = [ arr ! ix | ix <- randomRs (bounds arr) g ] sampleStrategiess :: (RandomGen g, Strategy s Identity) => [[(Array Int s, Double)]] -> g -> [[s]] sampleStrategiess tss gen = transpose $ zipWith sampleStrategies tss $ map snd $ iterate (split . fst) $ split gen -- The Double here is not the probability of selecting the current array but that of going deeper. This decision is for efficiency. So, if the list is finite, it must end with @(some_array,0):[]@ sampleStrategies :: (RandomGen g, Strategy s Identity) => [(Array Int s, Double)] -> g -> [s] sampleStrategies [] _ = [] sampleStrategies ts gen = let (d,g1) = random gen (strs,_) = case dropWhile (\(_,prob) -> d t [] -> last ts in case randomR (bounds strs) g1 of (i,g2) -> (strs!i) : sampleStrategies ts g2 -- Make sure that @indices strs@ is not null when defining the pool! sampleStatess :: RandomGen g => [PrivateView] -> [Move] -> g -> [[State]] sampleStatess pvs@(pv:tlpvs) moves g = let (s,gen) = sampleState pv g in map (stateToStateHistory (map publicView tlpvs) moves) s ++ sampleStatess pvs moves gen -- | 'nSampleStatess' is an eager alternative of sampleStatess. -- 'nSampleStatess' is not desired because n can be very big and most of the samples would not be actually used, but there are RNGs without good split. -- @take n $ sampleStatess pvs moves g@ should be used instead in most cases. nSampleStatess :: RandomGen g => Int -- ^ number of samples -> [PrivateView] -> [Move] -> g -> ([[State]],g) nSampleStatess 0 pvs moves g = ([],g) nSampleStatess n pvs@(pv:tlpvs) moves g = let (s,g') = sampleState pv g (ss,gen) = nSampleStatess (pred n) pvs moves g' in (map (stateToStateHistory (map publicView tlpvs) moves) s ++ ss, gen) -- naive alternative {- eval :: (Monad m, Strategy s m) => [([State],[s])] -> ([State]->[s]->m (EndGame, [State], [Move])) -> m Int eval samples run = trace ("entered eval. length samples = " ++ show (length samples)) $ mapM (\ (sts, ps) -> trace "running" $ run sts ps) samples >>= \tups -> return $ sum [ egToNum st eg | (eg,st:_,_) <- tups ] -} evals :: [([State],[s])] -> ([State]->[s]->(EndGame, [State], [Move])) -> [Int] evals samples run = [ egToNum st eg | (sts,ps) <- samples, let (eg,st:_,_) = run sts ps ] evalALittleQuick :: Int -> Int -> Int -> [Int] -> Int evalALittleQuick bestPossibleScore bestPossibleMargin sumSoFar [] = sumSoFar evalALittleQuick bestPossibleScore bestPossibleMargin sumSoFar (score:ss) | newPossibleMargin <= 0 = -1 | otherwise = evalALittleQuick bestPossibleScore newPossibleMargin newSum ss where newSum = score + sumSoFar newPossibleMargin = bestPossibleMargin - bestPossibleScore + score evalQuick :: Int -> Int -> Int -> [Int] -> Int evalQuick bestPossibleMargin bestPossibleScore sumSoFar [] = sumSoFar evalQuick bestPossibleMargin bestPossibleScore sumSoFar (score:ss) | bestPossibleMargin <= 0 = -1 | otherwise = evalQuick (bestPossibleMargin - (bestPossibleScore-score)) bestPossibleScore (score + sumSoFar) ss -- findBestMove finds the best move based on the average score. findBestMove :: Int -> Int -> [([State], [s])] -> (Move -> [State]->[s]-> (EndGame, [State], [Move])) -> (Int, Move) -> [Move] -> (Int, Move) findBestMove achievable bestPossibleScore samples run best [] = best findBestMove achievable bestPossibleScore samples run best@(bestSum, _) (m:ms) = let scores = evals samples $ run m sumScore = evalQuick (achievable-bestSum) bestPossibleScore 0 scores in findBestMove achievable bestPossibleScore samples run (if sumScore > bestSum then (sumScore, m) else best) ms evalDecent :: Priority -> Int -> Int -> Int -> Int -> Int -> [Int] -> (Int, Int) evalDecent pri bestPossibleNumMargin bestPossibleMargin bestPossibleScore numBest sumSoFar [] = (numBest, sumSoFar) evalDecent AvoidZero bestPossibleNumMargin bestPossibleMargin bestPossibleScore numBest sumSoFar (0:ss) = (0, sumSoFar) -- Further computation is not worthy, but there has to be some valid value. evalDecent pri bestPossibleNumMargin bestPossibleMargin bestPossibleScore numBest sumSoFar (score:ss) | bestPossibleNumMargin < 0 || (bestPossibleNumMargin == 0 && bestPossibleMargin <= 0) = (-1, -1) | score == bestPossibleScore = evalDecent pri bestPossibleNumMargin (bestPossibleMargin - (bestPossibleScore-score)) bestPossibleScore (succ numBest) (score + sumSoFar) ss | otherwise = evalDecent pri (pred bestPossibleNumMargin) (bestPossibleMargin - (bestPossibleScore-score)) bestPossibleScore numBest (score + sumSoFar) ss -- findBestMoveDecently is a variant of findBestMove that optimizes ( - <# of failure>, <# of perfect>, ) in the lexicographical order. findBestMoveDecently :: Priority -> Int -> Int -> Int -> [([State], [s])] -> (Move -> [State]->[s]-> (EndGame, [State], [Move])) -> ((Int,Int), Move) -> [Move] -> ((Int,Int), Move) findBestMoveDecently pri numSmpls achievable bestPossibleScore samples run best [] = best findBestMoveDecently pri numSmpls achievable bestPossibleScore samples run best@(bestRes@(bestNum,bestSum), _) (m:ms) = let scores = evals samples $ run m sumScore = evalDecent pri (numSmpls-bestNum) (achievable-bestSum) bestPossibleScore 0 0 scores in findBestMoveDecently pri numSmpls achievable bestPossibleScore samples run (if sumScore > bestRes then (sumScore, m) else best) ms -- In effect, lmcs pri n rr g sp p ps === glmcs pri n rr g sp p [ [([p],1)] | p <- ps ] flatlmcs pri n rr g sp p pss = glmcs pri n rr g sp p [ [(ps,1)] | ps <- pss ] glmcs pri n rr g sp p ps = LMC pri n rr g sp p (Right $ map mkPool ps) [] IM.empty [] undefined lmcs pri n rr g sp p pss = LMC pri n rr g sp p (Left $ listArray (0, pred $ length pss) pss) [] IM.empty [] True --lmcs pri n rr g sp p ps = LMC pri n rr g sp p (Right [ mkPool [([p],1)] | p <- ps ]) [] -- This should be equivalent to the above. data LazyMC g sp s = LMC { priority :: Priority , numSamplesLMC :: Int , rollbackRounds :: Int -- ^ after resampling, how many past rounds to check the consistency with the rollout policy. This is reset to 1 when @move@ is called in the beginning of game, and is not a config value any longer. , rngLMC :: g , suggestedStrategyLMC :: sp , rolloutStrategyLMC :: s , rolloutStrategiesLMC :: Either (Array Int [s]) [[(Array Int s,Double)]] , consistentSampleFullStatesLMC :: [([State],[s])] -- ^ list of sample full states that are consistent with the game environment and the conjectured teammate policies. , ixToCard :: IM.IntMap Card , fixedStateHistory :: [State] , ready :: Bool -- ^ True when initialization is finished, ⊥ otherwise. } data Priority = AverageScore -- ^ evaluate only by the average score | BestScoreRate -- ^ firstly consider the number of cases where the best possible score is achieved, and then consider the average score | AvoidZero -- ^ firstly avoid Failure and score 0, and then follow BestScoreRate. This is the recommendation if self-play of the rollout policy does not achieve 24 on average. annotationToCT4 :: PrivateView -> Annotation -> (Int, CardTo4) annotationToCT4 pv ann = (ixDeck ann, case marks ann of (Just _, Just _) -> possibilities ann _ -> possibilities ann .&. invisibleBag pv) {- updateIxToCT4 :: Int -> [PrivateView] -> IM.IntMap CardTo4 -> IM.IntMap CardTo4 updateIxToCT4 numP pvs@(pv:_) i2ct4 = let lastResult = result $ pvs !! pred numP -} annotationToMbCard :: PrivateView -> Annotation -> [(Int, Card)] annotationToMbCard pv ann = case marks ann of (Just c, Just r) -> [(ixDeck ann, C c r)] _ | bit trz == pos -> [(ixDeck ann, qitPosToCard $ trz `div` 2)] | otherwise -> [] where pos = possibilities ann .&. invisibleBag pv trz = countTrailingZeros pos -- updateIxToCard :: Int -> [PrivateView] -> [Move] -> IM.IntMap Card -> IM.IntMap Card -- updateIxToCard numP pvs mvs i2c = roundToIxToCard numP pvs mvs `union` i2c roundToIxToCard :: Int -> [PrivateView] -> [Move] -> IM.IntMap Card roundToIxToCard numP pvs@(pv:_) mvs = let myRevealed = case drop (pred numP) pvs of PV{publicView=PI{result=lastResult}} : myLastPV : _ -> case lastResult of None -> [] _ -> [(ixDeck $ head (annotations $ publicView myLastPV) !! index myLastMove, revealed lastResult)] where myLastMove = mvs !! pred numP -- mvs is only used here, for extracting the index of the last move. _ -> [] myAnns = head $ annotations $ publicView pv in IM.fromList $ myRevealed ++ (myAnns >>= annotationToMbCard pv) -- fromAscList could be used for the second list with adequate specification. instance (RandomGen g, Monad m, Strategy sp m, Strategy s Identity) => Strategy (LazyMC g sp s) m where strategyName ms = return "LazyMC" initialize True p = return p initialize False p = ready p `seq` return p move pvs@(pv:_) mvs str@(LMC pri num rr gen sp p ps css i2c fsh ready) | turn pub < numP || null newcss = do let (g3,g4) = split g2 let (newrr, newPS) = case filteredPS of Right tss | turn pub >= numP -> (succ rr, Right $ take (pred numP) $ ( map (\ts -> case ts of ((a,d):(b,e):vs) | fst (randomR (bounds a) g3) * 2 < limit -- The random number is used to make sure roughly most of the programs are tried at least once. -> (listArray (1, snd (bounds a) + snd (bounds b)) $ elems b ++ elems a, e) : vs _ -> ts) tss) ++ error "nanka takusann tukawareteru") _ -> (1, filteredPS) -- let (m, EGML q) = trace "!!!!!!!!!!!!!Fall Back!!!!!!!!!!!!!!!!!!!!!" $ -- use this version with end game search for more fun. -- runIdentity $ move (sontakuColorHint pvs mvs) mvs $ egml (\pub -> pileNum pub == 0) p (egl (\pub -> pileNum pub <= 1) p p $ either (!0) (map ((!1).fst.head)) newPS) numP -- return (m, LMC pri num newrr g4 sp p newPS $ take num alternativeStatess) let (m, q) = -- trace "!!!!!!!!!!!!!Fall Back!!!!!!!!!!!!!!!!!!!!!" $ runIdentity $ move pvs mvs p return (m, LMC pri num newrr g4 sp q newPS (take num alternativeStatess) ni2c nfsh ready) | otherwise = do #ifdef DEBUG (sq,(_i, m)) <- trace ("mcMove") $ trace ("length samples = " ++ show (length $ take num newcss)) $ #else (sq, m) <- #endif mcMove pri num (take num newcss) p pvs' mvs sp return (m, LMC pri num (succ rr) g2 sq p filteredPS (take num newcss) ni2c nfsh ready) where (ni2c, fsh') | turn pub < numP = (IM.empty, []) | otherwise = (roundToIxToCard numP pvs mvs `IM.union` i2c, fsh) lenfsh = length fsh' rpvs = drop lenfsh $ reverse $ tail pvs cardss = map fromJust $ takeWhile isJust [ mapM (flip IM.lookup ni2c . ixDeck) $ head $ annotations $ publicView pv | pv <- rpvs ] nonrotatedPartialStates = zipWith (\(PV{publicView=pub, handsPV=hpv}) myHand -> St{publicState=pub, hands=myHand:hpv}) rpvs cardss nfsh = reverse nonrotatedPartialStates ++ fsh' lenFshDelta = length nonrotatedPartialStates deltas = map unzip $ take lenFshDelta $ tails $ zip nfsh $ drop (turn pub - lenFshDelta - lenfsh) mvs :: [([State], [Move])] arrOfsPvsMvs = accumArray (flip (:)) [] (0, pred numP) [ (offset, (map (view . rotate offset) sts, mvs)) | (sts@(st:_), mvs) <- deltas, let offset = (turn (publicState st) - turn pub) `mod` numP ] :: Array Int [([PrivateView], [Move])] filteredPS = case ps of Right tss -> Right $ zipWith filtPS tss [1..pred numP] _ -> ps -- filtPS :: [(Array Int s, Double)] -> Int -> [(Array Int s, Double)] filtPS ts offset = let hists = arrOfsPvsMvs ! offset in filter (not . null . elems . fst) $ map (fps hists) ts where fps hists (ar, prob) = let strs = filter (\str -> all (pred str) hists) $ elems ar in (listArray (1, length strs) strs, prob) where -- pred :: s -> ([PrivateView], [Move]) -> Bool pred str (pvs, mv:mvs) = fst (runIdentity $ move pvs mvs str) == mv newcss = map snd $ filteredCss ++ filter fst (take limit newlyCheckedCss) lenFCss = length filteredCss (tk,m:_) = splitAt (pred numP) mvs moves = m : reverse tk revPVs = reverse $ take numP pvs agree pv1 pv2 = ((==) `on` (nonPublic . publicView)) pv1 pv2 && ((==) `on` (map (take 1) . handsPV)) pv1 pv2 && ((==) `on` (map marks . head . annotations . publicView)) pv1 pv2 checkView samples = -- [(fst checkMoved, ({- states -}stateToStateHistory (map publicView tlpvs) mvs stNow, snd $ snd checkMoved)) | [ checkMoved | tup@(lststs@(lastState:_), lstps) <- samples, states@(stNow:_) <- take 1 $ foldl (\statss (i,rpv,mov) -> statss >>= \stats@(stat:_) -> map (:stats) (map (rotate 1) $ filter (\st -> isNothing (checkEndGame $ publicState st) && agree rpv (view $ rotate (-i) st)) $ proceeds stat mov)) [lststs] $ zip3 [0..] revPVs moves -- stNow <- take 1 $ foldl (\statss (i,rpv,mov) -> statss >>= \stat -> (map (rotate 1) $ filter (\st -> isNothing (checkEndGame $ publicState st) && agree rpv (view $ rotate (-i) st)) $ proceeds stat mov)) [lastState] $ zip3 [0..] revPVs moves , let checkMoved = checkMoves 1 numP tailsMoves lstps states ] tailsMoves = tails mvs checkedCss = checkView css filteredCss = filter fst $ checkedCss newlyCheckedCss = [ checkMoves rr numP tailsMoves strs states | (states, strs) <- samplePairsOfStatesAndStrategies filteredPS pvs mvs g1 ] -- This recedes rr rounds!!!!!!!!! {- a slightly inefficient version newlyCheckedCss = checkView [ (drop numP states, take (pred numP) ps) | states <- sampleStatess pvs mvs g1 ] -} pvs'@(hdpv:tlpvs) = pvs -- [ pv{publicView=pub{gameSpec=gs{rule=r{earlyQuit=True}}}} | pv@PV{publicView=pub@PI{gameSpec=gs@GS{rule=r}}}<- pvs ] pub = publicView hdpv numP = numPlayers $ gameSpec pub (g1,g2) = split gen alternativeStatess = [ (states, take (pred numP) strs) | (states, strs) <- samplePairsOfStatesAndStrategies filteredPS pvs mvs g1 ] limit = 10000 -- limit = 10000 -- product [1 .. sumCT4 $ invisibleBag pv] -- seeIf theSt st = view st == view theSt && hands st == hands theSt seeIf theSt@St{publicState=thePub} st@St{publicState=pub} = thePub{annotations=[]} == pub{annotations=[]} && hands st == hands theSt && ((==) `on` (map (map (\a -> (marks a, possibilities a))) . annotations)) pub thePub mcMove :: (Monad m, Strategy sp m, Strategy s Identity) => Priority -> Int -- ^ number of samples -> [([State],[s])] -- ^ possible sample pairs of the state history and the internal memory states of other players' strategies -> s -- ^ rollout strategy for the player -> [PrivateView] -- ^ view history -> [Move] -- ^ move history -> sp -- ^ default (recommended) lightweight strategy #ifdef DEBUG -> m (sp, ((Bool,Int,Int), Move)) #else -> m (sp, Move) #endif mcMove pri numSmpls smpls p pvs@(pv:_) mvs sp = do (defaultMove, sq) <- move pvs mvs sp let candidateMoves = defaultMove : delete defaultMove (validMoves pv) let pub = publicView pv bestPossibleScore = fromIntegral $ moreStrictlyAchievableScore pub achievable = bestPossibleScore * fromIntegral numSmpls #ifdef DEBUG -- This is the naive alternative. let asc = case pri of AvoidZero -> map (\m -> let scores = evals smpls (\sts ps -> runIdentity $ fmap fst $ tryAMove sts mvs (ps++[p]) m) in ((all (/=0) scores, length $ filter (==bestPossibleScore) scores, sum scores), m) ) candidateMoves :: [((Bool,Int,Int), Move)] BestScoreRate -> map (\m -> let scores = evals smpls (\sts ps -> runIdentity $ fmap fst $ tryAMove sts mvs (ps++[p]) m) in ((True, length $ filter (==bestPossibleScore) scores, sum scores), m) ) candidateMoves AverageScore -> map (\m -> let scores = evals smpls (\sts ps -> runIdentity $ fmap fst $ tryAMove sts mvs (ps++[p]) m) in ((True, fromIntegral numSmpls, sum scores), m) ) candidateMoves achi = (True, fromIntegral numSmpls, achievable) if trace "if any" $ any ((>achi) . fst) asc then error ("turn = "++show (turn pub) ++ "\n asc = "++show asc++"\n achi = "++show achi) else trace ("turn = "++show (turn pub) ++ "\n asc = "++show asc++"\n achi = "++show achi) $ return $ (sq, trace ("defaultMove = "++show defaultMove++", and the result seems "++show (maximumBy (compare `on` fst) $ reverse asc)) $ case lookup achi asc of Nothing -> maximumBy (compare `on` fst) $ reverse asc Just k -> (achi, k) -- Stop search when the best possible score is found. ) #else let best = case pri of AverageScore -> snd $ findBestMove achievable bestPossibleScore smpls (\m sts ps -> fst $ runIdentity $ tryAMove sts mvs (ps++[p]) m) (-1, error "findBestMove: not found: should be undefined" defaultMove) candidateMoves _ -> snd $ findBestMoveDecently pri numSmpls achievable bestPossibleScore smpls (\m sts ps -> fst $ runIdentity $ tryAMove sts mvs (ps++[p]) m) ((0,-1), error "findBestMoveDecently: not found: should be undefined" defaultMove) candidateMoves return (sq, best) #endif -- | 'tryAMove' tries a 'Move' and then simulate the game to the end, using given 'Strategies'. Running this with empty history, such as @tryAMove [st] [] strs m@ is possible, but that assumes other strategies does not depend on the history. tryAMove :: (Monad m, Strategy s m) => [State] -> [Move] -> [s] -> Move -> m ((EndGame, [State], [Move]),[s]) tryAMove states@(st:_) mvs strs mov = case proceed st mov of Nothing -> error $ show mov ++ ": invalid move in tryAMove" Just st -> let nxt = rotate 1 st in case checkEndGame $ publicState nxt of Nothing -> -- trace "runSilently" $ runSilently (nxt:states) (mov:mvs) strs Just eg -> return ((eg, nxt:states, mov:mvs), strs) checkMoves :: (Strategy s Identity) => Int -> Int -> [[Move]] -> [s] -> [State] -> (Bool, ([State], [s])) checkMoves rounds numP tailsMoves ps states = let turns = rounds * numP statess = reverse $ take turns $ tails states mvss = reverse $ take (pred turns) tailsMoves pnp = pred numP (b,qs) = checkMvs numP (length mvss) statess mvss ps -- !!! Exactly speaking, ps should be rotated when @length mvss /= pred turns@ !!!!!!!!! in (b, (states, take pnp qs)) checkMvs :: (Strategy s Identity) => Int -> Int -> [[State]] -> [[Move]] -> [s] -> (Bool, [s]) checkMvs _ 0 _ _ qs = (True, qs) checkMvs numP n (sts:stss) (ms:mss) (p:ps) | n `mod` numP == 0 || null ms = checkMvs numP (pred n) stss mss (p:ps) -- skip the check if it is in the player's own turn, or it is the first turn. | otherwise = let (mv,q) = runIdentity $ move (viewStates sts) (tail ms) p (restb, rest) = checkMvs numP (pred n) stss mss $ ps ++ [q] in (mv == (head ms) && restb, rest) -- Once mv /= head ms, the remaining part would not be computed.