{-| Satton, 'Reinforcement Learning: The Introduction', pg.145, Example 6.6: Cliff Walking -} {-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE LambdaCase #-} module Examples.Ch6_Cliff.Rules where import qualified Data.List as List import qualified Data.Map.Strict as Map import qualified Data.Set as Set import RL.Imports import RL.Types type Point = (Int,Int) data Action = L | R | U | D deriving(Show, Eq, Ord, Enum, Bounded, Generic, Hashable) showAction :: Action -> String showAction = \case { L->"<" ; R->">" ; U->"^" ; D->"v" ; } data CW = CW { cw_size :: (Int,Int) } deriving(Show) exits (CW (sx,sy)) = (sx-1,sy-1) transition (CW (sx,sy)) (x,y) a = let check (x',y') = if x' >= 0 && x' < sx && y' >= 0 && y' < sy then if (y' == sy-1) && (x' /= 0) && (x' /= sx-1) then ((0,sy-1), True) else ((x',y'), False) else ((x,y), False) in case a of L -> check (x-1,y) R -> check (x+1,y) U -> check (x,y-1) D -> check (x,y+1) reward cw s a s' = if fall then -100 else -1 where (s'', fall) = transition cw s a showActionTable :: (MonadIO m, Fractional num, Real num) => CW -> [(Point,(Action, num))] -> m () showActionTable pr@(CW (sx,sy)) at = liftIO $ do forM_ [0..sy-1] $ \y -> do forM_ [0..sx-1] $ \x -> do case List.lookup (x,y) at of Nothing -> do printf "% 4s " "?" Just (a,_) -> do printf "% 4s " (showAction a) printf "\n" showV :: (MonadIO m, Real num) => CW -> [(Point, num)] -> m () showV (CW (sx,sy)) v = liftIO $ do forM_ [0..sy-1] $ \y -> do forM_ [0..sx-1] $ \x -> do case List.lookup (x,y) v of Just v -> do printf "%-2.1f " ((fromRational $ toRational v) :: Double) Nothing -> do printf " ? " printf "\n" pickState :: MonadRnd g m => CW -> m Point pickState (CW (sx,sy)) = do uniform $ (0,sy-1) : [(x,y) | x <- [0..sx-1], y <- [0..sy-2] ]