From 68ad76472732a6a98c03ef8442f12e95801f723b Mon Sep 17 00:00:00 2001
From: spencerfolk <sfolk@seas.upenn.edu>
Date: Mon, 20 Jan 2025 12:32:43 -0500
Subject: [PATCH] Made quadrotor gymnasium env compatible with Gymnasium 1.0.0
 (Issue #11)

---
 examples/gymnasium_basic_usage.py          |  2 +-
 examples/ppo_hover_train.py                |  2 +-
 pyproject.toml                             |  2 +-
 rotorpy/learning/quadrotor_environments.py | 54 +++++++++++++---------
 4 files changed, 35 insertions(+), 25 deletions(-)

diff --git a/examples/gymnasium_basic_usage.py b/examples/gymnasium_basic_usage.py
index e5f356b..fdf9ea5 100644
--- a/examples/gymnasium_basic_usage.py
+++ b/examples/gymnasium_basic_usage.py
@@ -48,7 +48,7 @@ env = gym.make("Quadrotor-v0",
 # Now reset the quadrotor.
 # Setting initial_state to 'random' will randomly place the vehicle in the map near the origin.
 # But you can also set the environment resetting to be deterministic. 
-observation, info = env.reset(initial_state='random')
+observation, info = env.reset(options={'initial_state': 'random'})
 
 # Number of timesteps
 T = 300
diff --git a/examples/ppo_hover_train.py b/examples/ppo_hover_train.py
index c8401f7..fdc5d5c 100644
--- a/examples/ppo_hover_train.py
+++ b/examples/ppo_hover_train.py
@@ -59,7 +59,7 @@ env = gym.make("Quadrotor-v0",
 # check_env(env, warn=True)  # you can check the environment using built-in tools
 
 # Reset the environment
-observation, info = env.reset(initial_state='random', options={'pos_bound': 2, 'vel_bound': 0})
+observation, info = env.reset(options={'initial_state': 'random', 'pos_bound': 2, 'vel_bound': 0})
 
 # Create a new model
 model = PPO(MlpPolicy, env, verbose=1, ent_coef=0.01, tensorboard_log=log_dir)
diff --git a/pyproject.toml b/pyproject.toml
index 8922589..840fcee 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -28,7 +28,7 @@ dependencies = [
     'scipy',
     'pandas',
     'tqdm',
-    'gymnasium == 0.29.1',
+    'gymnasium',
 ]
 
 [project.optional-dependencies]
diff --git a/rotorpy/learning/quadrotor_environments.py b/rotorpy/learning/quadrotor_environments.py
index 8a02989..6b8338f 100644
--- a/rotorpy/learning/quadrotor_environments.py
+++ b/rotorpy/learning/quadrotor_environments.py
@@ -70,9 +70,9 @@ class QuadrotorEnv(gym.Env):
 
         self.metadata['render_fps'] = render_fps
 
-        self.initial_state = initial_state
+        self.initial_state = deepcopy(initial_state)
 
-        self.vehicle_state = initial_state
+        self.vehicle_state = deepcopy(initial_state)
 
         assert control_mode in self.metadata["control_modes"]  # Don't accept improper control modes
         self.control_mode = control_mode
@@ -184,34 +184,44 @@ class QuadrotorEnv(gym.Env):
             # Close the plots
             plt.close('all')
     
-    def reset(self, seed=None, initial_state='random', options={'pos_bound': 2, 'vel_bound': 0}):
+    def reset(self, seed=None, options={'initial_state': 'random', 'pos_bound': 2, 'vel_bound': 0}):
         """
         Reset the environment
         Inputs:
-            seed: the seed for any random number generation, mostly for reproducibility. 
-            initial_state: determines how to set the quadrotor again. Options are...
-                        'random': will randomly select the state of the quadrotor. 
-                        'deterministic': will set the state to the initial state selected by the user when creating
-                                         the quadrotor environment (usually hover). 
-                        the user can also specify the state itself as a dictionary... e.g. 
-                            reset(options={'initial_state': 
-                                 {'x': np.array([0,0,0]),
-                                  'v': np.zeros(3,),
-                                  'q': np.array([0, 0, 0, 1]), # [i,j,k,w]
-                                  'w': np.zeros(3,),
-                                  'wind': np.array([0,0,0]),  # Since wind is handled elsewhere, this value is overwritten
-                                  'rotor_speeds': np.array([1788.53, 1788.53, 1788.53, 1788.53])}
-                                  })
+            seed: the seed for any random number generation, mostly for reproducibility.
             options: dictionary for misc options for resetting the scene. 
+                        'initial_state': determines how to set the quadrotor again. Options are...
+                            'random': will randomly select the state of the quadrotor. 
+                            'deterministic': will set the state to the initial state selected by the user when creating
+                                            the quadrotor environment (usually hover). 
+                            the user can also specify the state itself as a dictionary... e.g. 
+                                reset(options={'initial_state': 
+                                    {'x': np.array([0,0,0]),
+                                    'v': np.zeros(3,),
+                                    'q': np.array([0, 0, 0, 1]), # [i,j,k,w]
+                                    'w': np.zeros(3,),
+                                    'wind': np.array([0,0,0]),  # Since wind is handled elsewhere, this value is overwritten
+                                    'rotor_speeds': np.array([1788.53, 1788.53, 1788.53, 1788.53])}
+                                    })
                         'pos_bound': the min/max position region for random placement. 
                         'vel_bound': the min/max velocity region for random placement
                                 
         """
+        # If any options are not specified, set them to default values.
+        if 'pos_bound' not in options:
+            options['pos_bound'] = 2
+        if 'vel_bound' not in options:
+            options['vel_bound'] = 0
+        if 'initial_state' not in options:
+            options['initial_state'] = 'random'
+        
+        # Assert that the bounds are greater than or equal to 0.
         assert options['pos_bound'] >= 0 and options['vel_bound'] >= 0 , "Bounds must be greater than or equal to 0."
 
+        # Reset the gym environment
         super().reset(seed=seed)
 
-        if initial_state == 'random':
+        if options['initial_state'] == 'random':
             # Randomly select an initial state for the quadrotor. At least assume it is level. 
             pos = np.random.uniform(low=-options['pos_bound'], high=options['pos_bound'], size=(3,))
             vel = np.random.uniform(low=-options['vel_bound'], high=options['vel_bound'], size=(3,))
@@ -222,13 +232,13 @@ class QuadrotorEnv(gym.Env):
                      'wind': np.array([0,0,0]),  # Since wind is handled elsewhere, this value is overwritten
                      'rotor_speeds': np.array([1788.53, 1788.53, 1788.53, 1788.53])}
 
-        elif initial_state == 'deterministic':
+        elif options['initial_state'] == 'deterministic':
             state = self.initial_state
         
-        elif isinstance(initial_state, dict):
+        elif isinstance(options['initial_state'], dict):
             # Ensure the correct keys are in dict.  
-            if all(key in initial_state for key in ('x', 'v', 'q', 'w', 'wind', 'rotor_speeds')):
-                state = initial_state
+            if all(key in options['initial_state'] for key in ('x', 'v', 'q', 'w', 'wind', 'rotor_speeds')):
+                state = options['initial_state']
             else:
                 raise KeyError("Missing state keys in your initial_state. You must specify values for ('x', 'v', 'q', 'w', 'wind', 'rotor_speeds')")