module Algorithms.MDP.CTMDP
( CTMDP (..)
, mkCTMDP
, Rates
, uniformize
) where
import qualified Data.Vector as V
import Algorithms.MDP (MDP(MDP))
import Algorithms.MDP hiding (MDP (..))
data CTMDP a b t = CTMDP
{ _states :: V.Vector a
, _actions :: V.Vector b
, _fixedCosts :: V.Vector (V.Vector t)
, _rateCosts :: V.Vector (V.Vector t)
, _rates :: V.Vector (V.Vector t)
, _trans :: V.Vector (V.Vector (V.Vector t))
, _discount :: t
, _actionSet :: V.Vector (V.Vector Int)
}
type Rates a b t = b -> a -> t
mkCTMDP :: (Eq b) =>
[a]
-> [b]
-> Transitions a b t
-> Rates a b t
-> Costs a b t
-> Costs a b t
-> ActionSet a b
-> t
-> CTMDP a b t
mkCTMDP states actions trans rates fixedCost rateCost actionSet discount =
let
_states = V.fromList states
_actions = V.fromList actions
_states' = V.fromList [0..length states 1]
_actions' = V.fromList [0..length actions 1]
mkCostVecFor cf ac = V.fromList $ map (cf ac) states
_fixedCosts = V.fromList $ map (mkCostVecFor fixedCost) actions
_rateCosts = V.fromList $ map (mkCostVecFor rateCost) actions
mkProbAS a s = V.fromList $ map (trans a s) states
mkProbA a = V.fromList $ map (mkProbAS a) states
_trans = V.fromList $ map mkProbA actions
mkTransVec ac = V.fromList $ map (rates ac) states
_rates = V.fromList $ map mkTransVec actions
actionPairs = zip [0..] actions
actionSet' st = V.fromList $ map fst $ filter ((`elem` acs) . snd) actionPairs
where
acs = actionSet st
_actionSet = V.fromList $ map actionSet' states
in
CTMDP
{ _states = _states
, _actions = _actions
, _fixedCosts = _fixedCosts
, _rateCosts = _rateCosts
, _rates = _rates
, _trans = _trans
, _discount = discount
, _actionSet = _actionSet
}
uniformize :: (Ord t, Fractional t) => CTMDP a b t -> MDP a b t
uniformize ctmdc =
let
states = _states ctmdc
actions = _actions ctmdc
trans = _trans ctmdc
rateCosts = _rateCosts ctmdc
fixedCosts = _fixedCosts ctmdc
rates = _rates ctmdc
actionSet = _actionSet ctmdc
discount = _discount ctmdc
nStates = length states
nActions = length actions
nu = maximum (fmap maximum rates)
beta = nu * (1 / discount 1)
rescaleProb ac s v = V.imap (\t z -> newP t z) v
where
newP t z = if s == t
then (nu r + z * r) / (beta + nu)
else r * z / (beta + nu)
r = rates V.! ac V.! s
trans' = V.imap (\a vv -> V.imap (\s v -> rescaleProb a s v) vv) trans
costFor ac s = nu * ((beta + r) * f + rc) / (beta + nu)
where
f = fixedCosts V.! ac V.! s
rc = rateCosts V.! ac V.! s
r = rates V.! ac V.! s
costs' = V.generate nActions (\ac -> V.generate nStates (costFor ac))
discount' = nu / (beta + nu)
in
MDP states actions costs' trans' discount' actionSet