Safe Haskell | None |
---|---|
Language | Haskell2010 |
- type Probability = Rational
- type P s a = HashMap s (Set (a, Probability))
- type V s num = HashMap s num
- diffV :: (Eq s, Hashable s, Num num) => V s num -> V s num -> num
- class (Ord s, Ord a, Fractional num, Ord num, Hashable s) => DP_Problem pr s a num | pr -> s, pr -> a, pr -> num where
- action :: DP_Problem pr s a num => pr -> P s a -> s -> Set (a, Probability)
- initV :: DP_Problem pr s a num => pr -> num -> V s num
- invariant_probable_actions :: (DP_Problem pr s a num, Show s, Show a) => pr -> Bool
- invariant_closed_transition :: (DP_Problem pr s a num, Show s, Show a) => pr -> Bool
- invariant_no_dead_states :: (DP_Problem pr s a num, Show s, Show a) => pr -> Bool
- invariant_terminal :: (DP_Problem pr s a num, Show s, Show a) => pr -> Bool
- invariant_policy_actions :: (DP_Problem pr s a num, Ord a, Show s, Show a) => P s a -> pr -> Bool
- invariant_policy_prob :: (DP_Problem pr s a num, Ord a, Show s, Show a) => P s a -> pr -> Bool
- invariant :: (DP_Problem pr s a num, Show s, Show a, Ord a) => pr -> Bool
- policy_eq :: (Eq a, DP_Problem pr s a num) => pr -> P s a -> P s a -> Bool
- uniformPolicy :: (Ord a, DP_Problem pr s a num) => pr -> P s a
- data Opts num s a = Opts {
- eo_gamma :: num
- eo_etha :: num
- eo_max_iter :: Int
- defaultOpts :: Fractional num => Opts num s a
- data EvalState num s = EvalState {}
- es_v' :: forall num s. Lens' (EvalState num s) (V s num)
- es_v :: forall num s. Lens' (EvalState num s) (V s num)
- es_iter :: forall num s. Lens' (EvalState num s) Int
- es_delta :: forall num s. Lens' (EvalState num s) num
- initEvalState :: Fractional num => V s num -> EvalState num s
- policy_eval :: (Monad m, DP_Problem pr s a num) => Opts num s a -> P s a -> V s num -> DP pr m s a num -> m (V s num)
- policy_action_value :: DP_Problem pr s a num => Opts num s a -> s -> a -> V s num -> pr -> num
- policy_improve :: (Monad m, DP_Problem pr s a num) => Opts num s a -> V s num -> DP pr m s a num -> m (P s a)
- data DP pr m s a num = DP {}
- policy_iteration :: (Monad m, DP_Problem pr s a num, Ord a) => Opts num s a -> P s a -> V s num -> DP pr m s a num -> m (V s num, P s a)
Documentation
type Probability = Rational Source #
Probability [0..1]
class (Ord s, Ord a, Fractional num, Ord num, Hashable s) => DP_Problem pr s a num | pr -> s, pr -> a, pr -> num where Source #
Dynamic Programming Problem. Parameters have the following meaning: num
-
Type of Numbers; pr
- the problem; s
- State; a
- Action
dp_states :: pr -> Set s Source #
dp_actions :: pr -> s -> Set a Source #
dp_transitions :: pr -> s -> a -> Set (s, Probability) Source #
dp_reward :: pr -> s -> a -> s -> num Source #
dp_terminal_states :: pr -> Set s Source #
action :: DP_Problem pr s a num => pr -> P s a -> s -> Set (a, Probability) Source #
initV :: DP_Problem pr s a num => pr -> num -> V s num Source #
invariant_probable_actions :: (DP_Problem pr s a num, Show s, Show a) => pr -> Bool Source #
For given state, probabilities for all possible action should sum up to 1
invariant_closed_transition :: (DP_Problem pr s a num, Show s, Show a) => pr -> Bool Source #
No action leads to unlisted state
invariant_no_dead_states :: (DP_Problem pr s a num, Show s, Show a) => pr -> Bool Source #
Terminal states are dead ends and non-terminal states are not
invariant_terminal :: (DP_Problem pr s a num, Show s, Show a) => pr -> Bool Source #
invariant_policy_actions :: (DP_Problem pr s a num, Ord a, Show s, Show a) => P s a -> pr -> Bool Source #
invariant_policy_prob :: (DP_Problem pr s a num, Ord a, Show s, Show a) => P s a -> pr -> Bool Source #
uniformPolicy :: (Ord a, DP_Problem pr s a num) => pr -> P s a Source #
Opts | |
|
defaultOpts :: Fractional num => Opts num s a Source #
initEvalState :: Fractional num => V s num -> EvalState num s Source #
policy_eval :: (Monad m, DP_Problem pr s a num) => Opts num s a -> P s a -> V s num -> DP pr m s a num -> m (V s num) Source #
Iterative policy evaluation algorithm Figure 4.1, pg.86.
policy_action_value :: DP_Problem pr s a num => Opts num s a -> s -> a -> V s num -> pr -> num Source #
policy_improve :: (Monad m, DP_Problem pr s a num) => Opts num s a -> V s num -> DP pr m s a num -> m (P s a) Source #