rl-satton-0.1.2.4: Collection of Reinforcement Learning algorithms

Safe HaskellNone
LanguageHaskell2010

RL.TD

Synopsis

Documentation

data Q_Opts Source #

Constructors

Q_Opts 

Instances

type Q s a = M s a TD_Number Source #

toV :: (Bounded a, Enum a, Eq a, Hashable a, Eq s, Hashable s) => Q s a -> HashMap s TD_Number Source #

class (Monad m, Eq s, Hashable s, Show s, Eq a, Hashable a, Enum a, Bounded a, Show a) => TD_Problem pr m s a | pr -> m, pr -> s, pr -> a where Source #

Methods

td_is_terminal :: pr -> s -> Bool Source #

td_greedy :: pr -> Bool -> a -> a Source #

td_reward :: pr -> s -> a -> s -> TD_Number Source #

td_transition :: pr -> s -> a -> Q s a -> m s Source #

td_modify :: pr -> s -> a -> Q s a -> m () Source #

queryQ :: (Hashable s, Hashable k, MonadState (M s k v) f, Eq s, Eq k, Enum k, Bounded k) => s -> f [(k, v)] Source #

modifyQ :: (TD_Problem pr m s a, MonadTrans t, MonadState (Q s a) (t m)) => pr -> s -> a -> (TD_Number -> TD_Number) -> t m () Source #

action :: (TD_Problem pr m1 s1 a, MonadRnd g m, Hashable s, MonadState (M s a num) m, Real num, Fractional num, Eq s) => pr -> s -> num -> m (a, num) Source #

transition :: (TD_Problem pr m b a, MonadTrans t, MonadState (Q b a) (t m)) => pr -> b -> a -> t m b Source #

q_learn :: (MonadRnd g m, TD_Problem pr m s a) => Q_Opts -> Q s a -> s -> pr -> m (s, Q s a) Source #

Q-Learning algorithm

q_exec :: (MonadRnd g m, TD_Problem pr m s a) => Q_Opts -> Q s a -> s -> pr -> m s Source #

Q-Executive algorithm. Actions are taken greedily, no learning is performed