rl-satton-0.1.2.4: Collection of Reinforcement Learning algorithms

Safe HaskellNone
LanguageHaskell2010

RL.TDl

Synopsis

Documentation

type Q s a = M s a TD_Number Source #

type Z s a = M s a TD_Number Source #

type V s a = HashMap s (a, TD_Number) Source #

toV :: (Bounded a, Enum a, Eq a, Hashable a, Eq s, Hashable s) => Q s a -> HashMap s TD_Number Source #

data TDl_State s a Source #

Constructors

TDl_State 

Fields

tdl_z :: forall s a. Lens' (TDl_State s a) (Z s a) Source #

tdl_q :: forall s a. Lens' (TDl_State s a) (Q s a) Source #

class (Eq s, Hashable s, Show s, Eq a, Hashable a, Enum a, Bounded a, Show a) => TDl_Problem pr m s a | pr -> m, pr -> s, pr -> a where Source #

Methods

td_is_terminal :: pr -> s -> Bool Source #

td_greedy :: pr -> Bool -> a -> a Source #

td_transition :: pr -> s -> a -> TDl_State s a -> m s Source #

td_reward :: pr -> s -> a -> s -> TD_Number Source #

td_modify :: pr -> s -> a -> TDl_State s a -> m () Source #

queryQ :: (Hashable a, Hashable s, MonadState (TDl_State s a) f, Eq a, Eq s, Enum a, Bounded a) => s -> f [(a, TD_Number)] Source #

modifyQ :: (Hashable a, Hashable s, MonadState (TDl_State s a) m, Eq a, Eq s, Enum a, Bounded a) => t -> s -> a -> (TD_Number -> TD_Number) -> m () Source #

listZ :: (TDl_Problem pr m s a, MonadTrans t, MonadState (TDl_State s a) (t m), Monad m) => pr -> s -> a -> ((s, a, TD_Number) -> t m b) -> t m () Source #

modifyZ :: (Hashable a, Hashable s, MonadState (TDl_State s a) m, Eq a, Eq s, Enum a, Bounded a) => t -> s -> a -> (TD_Number -> TD_Number) -> m () Source #

action :: (TDl_Problem pr m1 s1 a, MonadRnd g m, Hashable s, MonadState (TDl_State s a) m, Eq s) => pr -> s -> TD_Number -> m (a, TD_Number) Source #

transition :: (TDl_Problem pr m b a, MonadTrans t, MonadState (TDl_State b a) (t m), Monad m) => pr -> b -> a -> t m b Source #

getQ :: (Hashable s, Hashable a, MonadState (TDl_State s a) f, Eq s, Eq a, Enum a, Bounded a) => s -> a -> f TD_Number Source #

tdl_learn :: (MonadRnd g m, TDl_Problem pr m s a) => TDl_Opts -> Q s a -> s -> pr -> m (s, Q s a) Source #

TD(lambda) learning, aka Sarsa(lambda), pg 171

qlw_learn :: (MonadRnd g m, TDl_Problem pr m s a) => TDl_Opts -> Q s a -> s -> pr -> m (s, Q s a) Source #

Watkins's Q(lambda) learning algorithm, pg 174