------------------------------------------------------------------------------- -- | -- Module : Environments.Gym.Internal -- Copyright : (c) Sentenai 2017 -- License : BSD3 -- Maintainer: sam@sentenai.com -- Stability : experimental -- Portability: non-portable -- -- Underlying implementation to run a Gym environment using the -- @gym-http-client@. ------------------------------------------------------------------------------- {-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} module Environments.Gym.Internal ( GymEnvironmentT(..) , RunnerT , Runner , GymException(..) , runEnvironmentT , runEnvironment , runDefaultEnvironmentT , runDefaultEnvironment , _reset , _step ) where import Control.MonadEnv import Reinforce.Prelude import Data.Aeson import Control.MonadMWCRandom import Control.Monad.Except import qualified Data.Text as T (pack) import qualified OpenAI.Gym as OpenAI import Servant.Client import Data.Logger (Event) import qualified Data.Logger as Logger import Network.HTTP.Client (Manager, newManager, defaultManagerSettings) import OpenAI.Gym ( GymEnv(..) , InstID(..) , Observation(..) ) -- | A newtype wrapper around Servant so that 'GymEnvironmentT' doesn't require -- Servant to be at the bottom of the stack newtype ClientT t a = ClientT (ReaderT ClientEnv (ExceptT ServantError t) a) deriving ( Functor , Applicative , Monad , MonadError ServantError , MonadReader ClientEnv , MonadIO , MonadThrow ) instance MonadTrans ClientT where lift = ClientT . lift . lift -- | runner of a ClientT, which should really be in Servant, but since it's not -- there we will roll our own. This allows us to have other things at the botton -- of our transformer stack runClientT :: MonadIO t => ClientT t a -> ClientEnv -> t (Either ServantError a) runClientT (ClientT m) env = runExceptT $ runReaderT m env -- | Lift a ClientM action into ClientT liftClientM :: MonadIO t => ClientM a -> ClientT t a liftClientM m = ClientT . ReaderT $ \env -> ExceptT $ liftIO (runClientM m env) -- | The gym environment, which is shared through all openai/gym instances newtype GymEnvironmentT s a t x = GymEnvironmentT { getEnvironmentT :: RWST GymConfigs (DList (Event Reward s a)) (LastState s) (ClientT t) x } deriving ( Functor , Applicative , Monad , MonadIO , MonadThrow , MonadReader GymConfigs , MonadWriter (DList (Event Reward s a)) , MonadState (LastState s) , MonadRWS GymConfigs (DList (Event Reward s a)) (LastState s) ) instance MonadTrans (GymEnvironmentT s a) where lift = GymEnvironmentT . lift . lift -- | Use IO as the base monad in the transformer stack. type GymEnvironment s a = GymEnvironmentT s a IO instance (MonadIO t, MonadMWCRandom t) => MonadMWCRandom (GymEnvironmentT s a t) where getGen = liftIO getGen -- | run a servant action in 'GymEnvironmentT' inEnvironment :: MonadIO t => ClientM x -> GymEnvironmentT s a t x inEnvironment c = GymEnvironmentT $ lift $ liftClientM c -- | type alias for a runEnvironment type which executes an environment action type RunnerT s a t x = Bool -> GymEnvironmentT s a t x -> t (Either ServantError (DList (Event Reward s a))) -- | type alias of 'RunnerT' in IO type Runner s a x = RunnerT s a IO x -- ========================================================================= -- -- | Configurations for an environment -- TODO: there are more of these in the gym documentation data GymConfigs = GymConfigs InstID Bool -- ^ the instance id, as well as a flag of if we want to render the state -- | Remember the last state of the episode data LastState o = LastState Integer o -- ^ the episode number and last state | Uninitialized Integer -- ^ a flag that the state is no longer initialized, and the current episode deriving (Eq, Show) -- | Possible errors we might encounter while interacting with our environment data GymException = UnexpectedServerResponse String | TypeError String | EnvironmentRequiresReset deriving (Show) instance Exception GymException where -- | run an effectful gym to completion and return either an error, or the history of the agent. -- FIXME: move this into a History monad seperate from the Environment so that we don't blow up memory. runEnvironmentT :: forall o a t x . MonadIO t => GymEnv -> Manager -> BaseUrl -> Bool -> GymEnvironmentT o a t x -> t (Either ServantError (DList (Event Reward o a))) runEnvironmentT t m u mon env = runClientT action (ClientEnv m u) where action :: ClientT t (DList (Event Reward o a)) action = do i <- liftClientM $ OpenAI.envCreate t (_, w) <- execRWST (getEnvironmentT renderableEnv) (GymConfigs i mon) (Uninitialized 0) liftClientM $ OpenAI.envClose i return w renderableEnv :: GymEnvironmentT o a t () renderableEnv = if mon then withMonitor env else void env -- | same as 'runEnvironmentT', but with IO as the base monad runEnvironment :: GymEnv -> Manager -> BaseUrl -> Bool -> GymEnvironment o a x -> IO (Either ServantError (DList (Event Reward o a))) runEnvironment = runEnvironmentT -- | same as 'runEnvironmentT', however use http-client's default manager settings runDefaultEnvironmentT :: MonadIO t => GymEnv -> Bool -> GymEnvironmentT o a t x -> t (Either ServantError (DList (Event Reward o a))) runDefaultEnvironmentT t m e = do mngr <- liftIO (newManager defaultManagerSettings) runEnvironmentT t mngr (BaseUrl Http "localhost" 5000 "") m e -- | same as 'runEnvironment', however use http-client's default manager settings runDefaultEnvironment :: GymEnv -> Bool -> GymEnvironment o a x -> IO (Either ServantError (DList (Event Reward o a))) runDefaultEnvironment = runDefaultEnvironmentT -- | get the environment's id from the gym server getInstID :: Monad t => GymEnvironmentT o a t InstID getInstID = ask >>= \(GymConfigs i _) -> pure i -- | if a user wants to render the agent as it performs, this makes sure to properly -- start and stop the monitor withMonitor :: MonadIO t => GymEnvironmentT o a t x -> GymEnvironmentT o a t () withMonitor env = do i <- getInstID inEnvironment $ OpenAI.envMonitorStart i (m i) _ <- env inEnvironment $ OpenAI.envMonitorClose i return () where m :: InstID -> OpenAI.Monitor m (InstID t) = OpenAI.Monitor ("/tmp/"<> T.pack (show CartPoleV0) <>"-" <> t) True False False -- | ensure that the gym has reset before stepping, otherwise throw 'EnvironmentRequiresReset' stepCheck :: MonadThrow t => GymEnvironmentT o a t () stepCheck = get >>= \case Uninitialized _ -> throwM EnvironmentRequiresReset LastState _ _ -> return () -- | generic rest function which makes a call to the gym and returns the first observation _reset :: (MonadIO t, MonadThrow t, FromJSON o) => GymEnvironmentT o a t (Initial o) _reset = do i <- getInstID Observation o <- inEnvironment . OpenAI.envReset $ i s <- aesonToState o get >>= \case Uninitialized ep -> put $ LastState (ep+1) s LastState ep _ -> put $ LastState (ep+1) s return $ Initial s -- | generic step function which takes a ToJSONable action and returns a reward -- and a FromJSONable state _step :: (MonadIO t, MonadThrow t, ToJSON a, r ~ Reward, FromJSON o) => a -> GymEnvironmentT o a t (Obs r o) _step a = do stepCheck GymConfigs i mon <- ask out <- inEnvironment . OpenAI.envStep i $ renderStep mon let r = OpenAI.reward out if OpenAI.done out then do s <- aesonToMaybeState (OpenAI.observation out) LastState ep prior <- get tell . pure $ Logger.Event ep r prior a return $ Done r s else do s <- aesonToState (OpenAI.observation out) let n = Next r s LastState ep prior <- get put $ LastState ep s tell . pure $ Logger.Event ep r prior a return n where renderStep :: Bool -> OpenAI.Step renderStep = OpenAI.Step (toJSON a) -- | Convert an aeson value into an environment's state aesonToState :: forall o m . (FromJSON o, MonadThrow m) => Value -> m o aesonToState = aesonToMaybeState >=> \case Nothing -> throw $ UnexpectedServerResponse "observation returned was null" Just o -> pure o -- | Convert an aeson value into an environment's state, but safer aesonToMaybeState :: forall o m . (FromJSON o, MonadThrow m) => Value -> m (Maybe o) aesonToMaybeState Null = pure Nothing aesonToMaybeState o = case (fromJSON o :: Result o) of Error str -> throw $ UnexpectedServerResponse str Success o' -> pure $ Just o'