Merge pull request #8 from spencerfolk/reinforcement_learning
v1.1.0 Reinforcement Learning
This commit is contained in:
6
rotorpy/data_out/.gitignore
vendored
6
rotorpy/data_out/.gitignore
vendored
@@ -1,5 +1,3 @@
|
||||
# Never commit output data.
|
||||
*.mp4
|
||||
*.json
|
||||
*.pdf
|
||||
*.csv
|
||||
*
|
||||
!.gitignore
|
||||
2
rotorpy/learning/.gitignore
vendored
Normal file
2
rotorpy/learning/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
/logs/
|
||||
/policies/
|
||||
7
rotorpy/learning/README.md
Normal file
7
rotorpy/learning/README.md
Normal file
@@ -0,0 +1,7 @@
|
||||
# Learning Module
|
||||
|
||||
In the learning module, we provide a bridge between RotorPy's dynamic models with aerodynamics, wind, motor dynamics, and more with popular reinforcement learning (RL) APIs to develop and compare RL algorithms and control abstractions for UAV control.
|
||||
|
||||
An environment compatible with [Gymnasium](https://github.com/Farama-Foundation/Gymnasium) is implemented as `QuadrotorEnv`. In this environment, the user can provide a reward function and choose an appropriate control abstraction ranging from high level (velocity vector) all the way down to low level individual motor speed control. For higher level abstractions, there are lower level controllers that run within the dynamics to track your commands (note, tuning may be necessary here to get desired performance).
|
||||
|
||||
In the current implementation, the observation space contains the entire state space of the UAV but the environment can easily be changed to suit your needs. All actions are assumed to be in the range $[-1,1]$.
|
||||
6
rotorpy/learning/__init__.py
Normal file
6
rotorpy/learning/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
from gymnasium.envs.registration import register
|
||||
|
||||
register(
|
||||
id="Quadrotor-v0",
|
||||
entry_point="rotorpy.learning.quadrotor_environments:QuadrotorEnv",
|
||||
)
|
||||
441
rotorpy/learning/quadrotor_environments.py
Normal file
441
rotorpy/learning/quadrotor_environments.py
Normal file
@@ -0,0 +1,441 @@
|
||||
import numpy as np
|
||||
from scipy.spatial.transform import Rotation
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.colors as mcolors
|
||||
|
||||
from enum import Enum
|
||||
from rotorpy.world import World
|
||||
from rotorpy.vehicles.multirotor import Multirotor
|
||||
from rotorpy.vehicles.crazyflie_params import quad_params as crazyflie_params
|
||||
from rotorpy.learning.quadrotor_reward_functions import hover_reward
|
||||
from rotorpy.utils.shapes import Quadrotor
|
||||
|
||||
import gymnasium as gym
|
||||
from gymnasium import spaces
|
||||
|
||||
import math
|
||||
from copy import deepcopy
|
||||
|
||||
class QuadrotorEnv(gym.Env):
|
||||
"""
|
||||
|
||||
A quadrotor environment for reinforcement learning using Gymnasium.
|
||||
|
||||
Inputs:
|
||||
initial_state: the initial state of the quadrotor. The default is hover.
|
||||
control_mode: the appropriate control abstraction that is used by the controller, options are...
|
||||
'cmd_motor_speeds': the controller directly commands motor speeds.
|
||||
'cmd_motor_thrusts': the controller commands forces for each rotor.
|
||||
'cmd_ctbr': the controller commands a collective thrsut and body rates.
|
||||
'cmd_ctbm': the controller commands a collective thrust and moments on the x/y/z body axes
|
||||
'cmd_vel': the controller commands a velocity vector in the body frame.
|
||||
reward_fn: the reward function, default to hover, but the user can pass in any function that is used as a reward.
|
||||
quad_params: the parameters for the quadrotor.
|
||||
max_time: the maximum time of the session. After this time, the session will exit.
|
||||
world: the world for the quadrotor to operate within.
|
||||
sim_rate: the simulation rate (in Hz), i.e. the timestep.
|
||||
aero: boolean, determines whether or not aerodynamic wrenches are computed.
|
||||
render_mode: render the quadrotor.
|
||||
render_fps: rendering frames per second, lower this for faster visualization.
|
||||
ax: for plotting purposes, you can supply an axis object that the quadrotor will visualize on.
|
||||
color: choose the color of the quadrotor. If none, it will randomly select a color.
|
||||
"""
|
||||
|
||||
metadata = {"render_modes": ["None", "3D", "console"],
|
||||
"render_fps": 30,
|
||||
"control_modes": ['cmd_motor_speeds', 'cmd_motor_thrusts', 'cmd_ctbr', 'cmd_ctbm', 'cmd_vel']}
|
||||
|
||||
def __init__(self,
|
||||
initial_state = {'x': np.array([0,0,0]),
|
||||
'v': np.zeros(3,),
|
||||
'q': np.array([0, 0, 0, 1]), # [i,j,k,w]
|
||||
'w': np.zeros(3,),
|
||||
'wind': np.array([0,0,0]), # Since wind is handled elsewhere, this value is overwritten
|
||||
'rotor_speeds': np.array([1788.53, 1788.53, 1788.53, 1788.53])},
|
||||
control_mode = 'cmd_vel',
|
||||
reward_fn = hover_reward,
|
||||
quad_params = crazyflie_params,
|
||||
max_time = 10, # Maximum time to run the simulation for in a single session.
|
||||
wind_profile = None, # wind profile object, if none is supplied it will choose no wind.
|
||||
world = None, # The world object
|
||||
sim_rate = 100, # The update frequency of the simulator in Hz
|
||||
aero = True, # Whether or not aerodynamic wrenches are computed.
|
||||
render_mode = "None", # The rendering mode
|
||||
render_fps = 30, # The rendering frames per second. Lower this for faster visualization.
|
||||
fig = None, # Figure for rendering. Optional.
|
||||
ax = None, # Axis for rendering. Optional.
|
||||
color = None, # The color of the quadrotor.
|
||||
):
|
||||
super(QuadrotorEnv, self).__init__()
|
||||
|
||||
self.metadata['render_fps'] = render_fps
|
||||
|
||||
self.initial_state = initial_state
|
||||
|
||||
self.vehicle_state = initial_state
|
||||
|
||||
assert control_mode in self.metadata["control_modes"] # Don't accept improper control modes
|
||||
self.control_mode = control_mode
|
||||
|
||||
self.sim_rate = sim_rate
|
||||
self.t_step = 1/self.sim_rate
|
||||
self.reward_fn = reward_fn
|
||||
|
||||
# Create quadrotor from quad params and control abstraction.
|
||||
self.quadrotor = Multirotor(quad_params=quad_params, initial_state=initial_state, control_abstraction=control_mode, aero=aero)
|
||||
|
||||
assert render_mode is None or render_mode in self.metadata["render_modes"]
|
||||
self.render_mode = render_mode
|
||||
|
||||
self.t = 0 # The current time of the instance
|
||||
self.max_time = max_time
|
||||
|
||||
############ OBSERVATION SPACE
|
||||
|
||||
# The observation state is the full state of the quadrotor.
|
||||
# position, x, observation_state[0:3]
|
||||
# velocity, v, observation_state[3:6]
|
||||
# orientation, q, observation_state[6:10]
|
||||
# body rates, w, observation_state[10:13]
|
||||
# wind, wind, observation_state[13:16]
|
||||
# motor_speeds, rotor_speeds, observation_state[16:20]
|
||||
# For simplicitly, we assume these observations can lie within -inf to inf.
|
||||
|
||||
self.observation_space = spaces.Box(low = -np.inf, high=np.inf, shape = (13,), dtype=np.float32)
|
||||
|
||||
############ ACTION SPACE
|
||||
|
||||
# For generalizability, we assume the controller outputs 4 numbers between -1 and 1. Depending on the control mode, we scale these appropriately.
|
||||
|
||||
if self.control_mode == 'cmd_vel':
|
||||
self.action_space = spaces.Box(low = -1, high = 1, shape = (3,), dtype=np.float32)
|
||||
else:
|
||||
self.action_space = spaces.Box(low = -1, high = 1, shape = (4,), dtype=np.float32)
|
||||
|
||||
###### Min/max values for scaling control outputs.
|
||||
|
||||
self.rotor_speed_max = self.quadrotor.rotor_speed_max
|
||||
self.rotor_speed_min = self.quadrotor.rotor_speed_min
|
||||
|
||||
# Compute the min/max thrust by assuming the rotor is spinning at min/max speed. (also generalizes to bidirectional rotors)
|
||||
self.max_thrust = self.quadrotor.k_eta * self.quadrotor.rotor_speed_max**2
|
||||
self.min_thrust = self.quadrotor.k_eta * self.quadrotor.rotor_speed_min**2
|
||||
|
||||
# Find the maximum moment on each axis, N-m
|
||||
self.max_roll_moment = self.max_thrust * np.abs(self.quadrotor.rotor_pos['r1'][1])
|
||||
self.max_pitch_moment = self.max_thrust * np.abs(self.quadrotor.rotor_pos['r1'][0])
|
||||
self.max_yaw_moment = self.quadrotor.k_m * self.quadrotor.rotor_speed_max**2
|
||||
|
||||
# Set the maximum body rate on each axis (this is hand selected), rad/s
|
||||
self.max_roll_br = 7.0
|
||||
self.max_pitch_br = 7.0
|
||||
self.max_yaw_br = 3.0
|
||||
|
||||
# Set the maximum speed command (this is hand selected), m/s
|
||||
self.max_vel = 3/math.sqrt(3) # Selected so that at most the max speed is 3 m/s
|
||||
|
||||
###################################################################################################
|
||||
|
||||
# Save the order of magnitude of the rotor speeds for later normalization
|
||||
self.rotor_speed_order_mag = math.floor(math.log(self.quadrotor.rotor_speed_max, 10))
|
||||
|
||||
if world is None:
|
||||
# If no world is specified, assume that it means that the intended world is free space.
|
||||
wbound = 4
|
||||
self.world = World.empty((-wbound, wbound, -wbound,
|
||||
wbound, -wbound, wbound))
|
||||
else:
|
||||
self.world = world
|
||||
|
||||
if wind_profile is None:
|
||||
# If wind is not specified, default to no wind.
|
||||
from rotorpy.wind.default_winds import NoWind
|
||||
self.wind_profile = NoWind()
|
||||
else:
|
||||
self.wind_profile = wind_profile
|
||||
|
||||
if self.render_mode == '3D':
|
||||
if fig is None and ax is None:
|
||||
self.fig = plt.figure('Visualization')
|
||||
self.ax = self.fig.add_subplot(projection='3d')
|
||||
else:
|
||||
self.fig = fig
|
||||
self.ax = ax
|
||||
if color is None:
|
||||
colors = list(mcolors.CSS4_COLORS)
|
||||
else:
|
||||
colors = [color]
|
||||
self.quad_obj = Quadrotor(self.ax, wind=True, color=np.random.choice(colors))
|
||||
self.world_artists = None
|
||||
self.title_artist = self.ax.set_title('t = {}'.format(self.t))
|
||||
|
||||
self.rendering = False # Bool for tracking when the renderer is actually rendering a frame.
|
||||
|
||||
return
|
||||
|
||||
def render(self):
|
||||
if self.render_mode == '3D':
|
||||
self._plot_quad()
|
||||
elif self.render_mode == 'console':
|
||||
self._print_quad()
|
||||
|
||||
def close(self):
|
||||
if self.fig is not None:
|
||||
# Close the plots
|
||||
plt.close('all')
|
||||
|
||||
def reset(self, seed=None, initial_state='random', options={'pos_bound': 2, 'vel_bound': 0}):
|
||||
"""
|
||||
Reset the environment
|
||||
Inputs:
|
||||
seed: the seed for any random number generation, mostly for reproducibility.
|
||||
initial_state: determines how to set the quadrotor again. Options are...
|
||||
'random': will randomly select the state of the quadrotor.
|
||||
'deterministic': will set the state to the initial state selected by the user when creating
|
||||
the quadrotor environment (usually hover).
|
||||
the user can also specify the state itself as a dictionary... e.g.
|
||||
reset(options={'initial_state':
|
||||
{'x': np.array([0,0,0]),
|
||||
'v': np.zeros(3,),
|
||||
'q': np.array([0, 0, 0, 1]), # [i,j,k,w]
|
||||
'w': np.zeros(3,),
|
||||
'wind': np.array([0,0,0]), # Since wind is handled elsewhere, this value is overwritten
|
||||
'rotor_speeds': np.array([1788.53, 1788.53, 1788.53, 1788.53])}
|
||||
})
|
||||
options: dictionary for misc options for resetting the scene.
|
||||
'pos_bound': the min/max position region for random placement.
|
||||
'vel_bound': the min/max velocity region for random placement
|
||||
|
||||
"""
|
||||
assert options['pos_bound'] >= 0 and options['vel_bound'] >= 0 , "Bounds must be greater than or equal to 0."
|
||||
|
||||
super().reset(seed=seed)
|
||||
|
||||
if initial_state == 'random':
|
||||
# Randomly select an initial state for the quadrotor. At least assume it is level.
|
||||
pos = np.random.uniform(low=-options['pos_bound'], high=options['pos_bound'], size=(3,))
|
||||
vel = np.random.uniform(low=-options['vel_bound'], high=options['vel_bound'], size=(3,))
|
||||
state = {'x': pos,
|
||||
'v': vel,
|
||||
'q': np.array([0, 0, 0, 1]), # [i,j,k,w]
|
||||
'w': np.zeros(3,),
|
||||
'wind': np.array([0,0,0]), # Since wind is handled elsewhere, this value is overwritten
|
||||
'rotor_speeds': np.array([1788.53, 1788.53, 1788.53, 1788.53])}
|
||||
|
||||
elif initial_state == 'deterministic':
|
||||
state = self.initial_state
|
||||
|
||||
elif isinstance(initial_state, dict):
|
||||
# Ensure the correct keys are in dict.
|
||||
if all(key in initial_state for key in ('x', 'v', 'q', 'w', 'wind', 'rotor_speeds')):
|
||||
state = initial_state
|
||||
else:
|
||||
raise KeyError("Missing state keys in your initial_state. You must specify values for ('x', 'v', 'q', 'w', 'wind', 'rotor_speeds')")
|
||||
|
||||
else:
|
||||
raise ValueError("You must either specify 'random', 'deterministic', or provide a dict containing your desired initial state.")
|
||||
|
||||
# Set the initial state.
|
||||
self.vehicle_state = state
|
||||
|
||||
# Reset the time
|
||||
self.t = 0.0
|
||||
|
||||
# Reset the reward
|
||||
self.reward = 0.0
|
||||
|
||||
# Now get observation and info using the new state
|
||||
observation = self._get_obs()
|
||||
info = self._get_info()
|
||||
|
||||
self.render()
|
||||
|
||||
return (observation, info)
|
||||
|
||||
|
||||
def step(self, action):
|
||||
|
||||
"""
|
||||
Step the quadrotor dynamics forward by one step based on the policy action.
|
||||
Inputs:
|
||||
action: The action is a 4x1 vector which depends on the control abstraction:
|
||||
|
||||
if control_mode == 'cmd_vel':
|
||||
action[0] (-1,1) := commanded velocity in x direction (will be rescaled to m/s)
|
||||
action[1] (-1,1) := commanded velocity in y direction (will be rescaled to m/s)
|
||||
action[2] (-1,1) := commanded velocity in z direction (will be rescaled to m/s)
|
||||
if control_mode == 'cmd_ctbr':
|
||||
action[0] (-1,1) := the thrust command (will be rescaled to Newtons)
|
||||
action[1] (-1,1) := the roll body rate (will be rescaled to rad/s)
|
||||
action[2] (-1,1) := the pitch body rate (will be rescaled to rad/s)
|
||||
action[3] (-1,1) := the yaw body rate (will be rescaled to rad/s)
|
||||
if control_mode == 'cmd_ctbm':
|
||||
action[0] (-1,1) := the thrust command (will be rescaled to Newtons)
|
||||
action[1] (-1,1) := the roll moment (will be rescaled to Newton-meters)
|
||||
action[2] (-1,1) := the pitch moment (will be rescaled to Newton-meters)
|
||||
action[3] (-1,1) := the yaw moment (will be rescaled to Newton-meters)
|
||||
if control_mode == 'cmd_motor_speeds':
|
||||
action[0] (-1,1) := motor 1 speed (will be rescaled to rad/s)
|
||||
action[1] (-1,1) := motor 2 speed (will be rescaled to rad/s)
|
||||
action[2] (-1,1) := motor 3 speed (will be rescaled to rad/s)
|
||||
action[3] (-1,1) := motor 4 speed (will be rescaled to rad/s)
|
||||
if control_mode == 'cmd_motor_forces':
|
||||
action[0] (-1,1) := motor 1 force (will be rescaled to Newtons)
|
||||
action[1] (-1,1) := motor 2 force (will be rescaled to Newtons)
|
||||
action[2] (-1,1) := motor 3 force (will be rescaled to Newtons)
|
||||
action[3] (-1,1) := motor 4 force (will be rescaled to Newtons)
|
||||
|
||||
"""
|
||||
|
||||
# First rescale the action and get the appropriate control dictionary given the control mode.
|
||||
self.control_dict = self.rescale_action(action)
|
||||
|
||||
# Now update the wind state using the wind profile
|
||||
self.vehicle_state['wind'] = self.wind_profile.update(0, self.vehicle_state['x'])
|
||||
|
||||
# Last perform forward integration using the commanded motor speed and the current state
|
||||
self.vehicle_state = self.quadrotor.step(self.vehicle_state, self.control_dict, self.t_step)
|
||||
observation = self._get_obs()
|
||||
|
||||
# Update t by t_step
|
||||
self.t += self.t_step
|
||||
|
||||
# Check for safety
|
||||
safe = self.safety_exit()
|
||||
|
||||
# Determine whether or not the session should terminate.
|
||||
terminated = (self.t >= self.max_time) or not safe
|
||||
|
||||
# Now compute the reward based on the current state
|
||||
self.reward = self._get_reward(observation, action) if safe else -500.0
|
||||
|
||||
# Finally get info
|
||||
info = self._get_info()
|
||||
|
||||
truncated = False
|
||||
|
||||
self.render()
|
||||
|
||||
return (observation, self.reward, terminated, truncated, info)
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Close the environment
|
||||
"""
|
||||
return None
|
||||
|
||||
def rescale_action(self, action):
|
||||
"""
|
||||
Rescales the action to within the control limits and then assigns the appropriate dictionary.
|
||||
"""
|
||||
|
||||
control_dict = {}
|
||||
|
||||
if self.control_mode == 'cmd_ctbm':
|
||||
# Scale action[0] to (0,1) and then scale to the max thrust
|
||||
cmd_thrust = np.interp(action[0], [-1,1], [self.quadrotor.num_rotors*self.min_thrust, self.quadrotor.num_rotors*self.max_thrust])
|
||||
|
||||
# Scale the moments
|
||||
cmd_roll_moment = np.interp(action[1], [-1,1], [-self.max_roll_moment, self.max_roll_moment])
|
||||
cmd_pitch_moment = np.interp(action[2], [-1,1], [-self.max_pitch_moment, self.max_pitch_moment])
|
||||
cmd_yaw_moment = np.interp(action[3], [-1,1], [-self.max_yaw_moment, self.max_yaw_moment])
|
||||
|
||||
control_dict['cmd_thrust'] = cmd_thrust
|
||||
control_dict['cmd_moment'] = np.array([cmd_roll_moment, cmd_pitch_moment, cmd_yaw_moment])
|
||||
|
||||
elif self.control_mode == 'cmd_ctbr':
|
||||
# Scale action to min and max thrust.
|
||||
cmd_thrust = np.interp(action[0], [-1, 1], [self.quadrotor.num_rotors*self.min_thrust, self.quadrotor.num_rotors*self.max_thrust])
|
||||
|
||||
# Scale the body rates.
|
||||
cmd_roll_br = np.interp(action[1], [-1,1], [-self.max_roll_br, self.max_roll_br])
|
||||
cmd_pitch_br = np.interp(action[2], [-1,1], [-self.max_pitch_br, self.max_pitch_br])
|
||||
cmd_yaw_br = np.interp(action[3], [-1,1], [-self.max_yaw_br, self.max_yaw_br])
|
||||
|
||||
control_dict['cmd_thrust'] = cmd_thrust
|
||||
control_dict['cmd_w'] = np.array([cmd_roll_br, cmd_pitch_br, cmd_yaw_br])
|
||||
|
||||
elif self.control_mode == 'cmd_motor_speeds':
|
||||
# Scale the action to min and max motor speeds.
|
||||
control_dict['cmd_motor_speeds'] = np.interp(action, [-1,1], [self.rotor_speed_min, self.rotor_speed_max])
|
||||
|
||||
elif self.control_mode == 'cmd_motor_thrusts':
|
||||
# Scale the action to min and max rotor thrusts.
|
||||
control_dict['cmd_motor_thrusts'] = np.interp(action, [-1,1], [self.min_thrust, self.max_thrust])
|
||||
|
||||
elif self.control_mode == 'cmd_vel':
|
||||
# Scale the velcoity to min and max values.
|
||||
control_dict['cmd_v'] = np.interp(action, [-1,1], [-self.max_vel, self.max_vel])
|
||||
|
||||
return control_dict
|
||||
|
||||
def _get_reward(self, observation, action):
|
||||
"""
|
||||
Compute the reward for the current state and goal.
|
||||
Inputs:
|
||||
observation: The current state of the quadrotor
|
||||
action: The current action of the controller
|
||||
Outputs:
|
||||
the current reward.
|
||||
"""
|
||||
|
||||
return self.reward_fn(observation, action)
|
||||
|
||||
def safety_exit(self):
|
||||
"""
|
||||
Return exit status if any safety condition is violated, otherwise None.
|
||||
"""
|
||||
if np.any(np.abs(self.vehicle_state['v']) > 100):
|
||||
return False
|
||||
if np.any(np.abs(self.vehicle_state['w']) > 100):
|
||||
return False
|
||||
if self.vehicle_state['x'][0] < self.world.world['bounds']['extents'][0] or self.vehicle_state['x'][0] > self.world.world['bounds']['extents'][1]:
|
||||
return False
|
||||
if self.vehicle_state['x'][1] < self.world.world['bounds']['extents'][2] or self.vehicle_state['x'][1] > self.world.world['bounds']['extents'][3]:
|
||||
return False
|
||||
if self.vehicle_state['x'][2] < self.world.world['bounds']['extents'][4] or self.vehicle_state['x'][2] > self.world.world['bounds']['extents'][5]:
|
||||
return False
|
||||
|
||||
if len(self.world.world.get('blocks', [])) > 0:
|
||||
# If a world has objects in it we need to check for collisions.
|
||||
collision_pts = self.world.path_collisions(self.vehicle_state['x'], 0.25)
|
||||
no_collision = collision_pts.size == 0
|
||||
if not no_collision:
|
||||
return False
|
||||
return True
|
||||
|
||||
def _get_obs(self):
|
||||
# Concatenate all the state variables into a single vector
|
||||
state_vec = np.concatenate([self.vehicle_state['x'], self.vehicle_state['v'], self.vehicle_state['q'], self.vehicle_state['w']], dtype=np.float32)
|
||||
|
||||
return state_vec
|
||||
|
||||
def _get_info(self):
|
||||
return {}
|
||||
|
||||
def _plot_quad(self):
|
||||
|
||||
if abs(self.t / (1/self.metadata['render_fps']) - round(self.t / (1/self.metadata['render_fps']))) > 5e-2:
|
||||
self.rendering = False # Set rendering bool to false.
|
||||
return
|
||||
|
||||
self.rendering = True # Set rendering bool to true.
|
||||
|
||||
plot_position = deepcopy(self.vehicle_state['x'])
|
||||
plot_rotation = Rotation.from_quat(self.vehicle_state['q']).as_matrix()
|
||||
plot_wind = deepcopy(self.vehicle_state['wind'])
|
||||
|
||||
if self.world_artists is None and not ('x' in self.ax.get_xlabel()):
|
||||
self.world_artists = self.world.draw(self.ax)
|
||||
self.ax.plot(0, 0, 0, 'go')
|
||||
|
||||
self.quad_obj.transform(position=plot_position, rotation=plot_rotation, wind=plot_wind)
|
||||
self.title_artist.set_text('t = {:.2f}'.format(self.t))
|
||||
|
||||
plt.pause(1e-9)
|
||||
|
||||
return
|
||||
|
||||
def _print_quad(self):
|
||||
|
||||
print("Time: %3.2f \t Position: (%3.2f, %3.2f, %3.2f) \t Reward: %3.2f" % (self.t, self.vehicle_state['x'][0], self.vehicle_state['x'][1], self.vehicle_state['x'][2], self.reward))
|
||||
31
rotorpy/learning/quadrotor_reward_functions.py
Normal file
31
rotorpy/learning/quadrotor_reward_functions.py
Normal file
@@ -0,0 +1,31 @@
|
||||
import numpy as np
|
||||
from scipy.spatial.transform import Rotation
|
||||
|
||||
import gymnasium as gym
|
||||
from gymnasium import spaces
|
||||
|
||||
import math
|
||||
|
||||
"""
|
||||
Reward functions for quadrotor tasks.
|
||||
"""
|
||||
|
||||
def hover_reward(observation, action, weights={'x': 1, 'v': 0.1, 'w': 0, 'u': 1e-5}):
|
||||
"""
|
||||
Rewards hovering at (0, 0, 0). It is a combination of position error, velocity error, body rates, and
|
||||
action reward.
|
||||
"""
|
||||
|
||||
# Compute the distance to goal
|
||||
dist_reward = -weights['x']*np.linalg.norm(observation[0:3])
|
||||
|
||||
# Compute the velocity reward
|
||||
vel_reward = -weights['v']*np.linalg.norm(observation[3:6])
|
||||
|
||||
# Compute the angular rate reward
|
||||
ang_rate_reward = -weights['w']*np.linalg.norm(observation[10:13])
|
||||
|
||||
# Compute the action reward
|
||||
action_reward = -weights['u']*np.linalg.norm(action)
|
||||
|
||||
return dist_reward + vel_reward + action_reward + ang_rate_reward
|
||||
@@ -4,6 +4,8 @@ import scipy.integrate
|
||||
from scipy.spatial.transform import Rotation
|
||||
from rotorpy.vehicles.hummingbird_params import quad_params
|
||||
|
||||
import time
|
||||
|
||||
"""
|
||||
Multirotor models
|
||||
"""
|
||||
@@ -40,11 +42,13 @@ class Multirotor(object):
|
||||
quad_params: a dictionary containing relevant physical parameters for the multirotor.
|
||||
initial_state: the initial state of the vehicle.
|
||||
control_abstraction: the appropriate control abstraction that is used by the controller, options are...
|
||||
'cmd_motor_speed': the controller directly commands motor speeds.
|
||||
'cmd_motor_force': the controller commands forces for each rotor.
|
||||
'cmd_motor_speeds': the controller directly commands motor speeds.
|
||||
'cmd_motor_thrusts': the controller commands forces for each rotor.
|
||||
'cmd_ctbr': the controller commands a collective thrsut and body rates.
|
||||
'cmd_ctbm': the controller commands a collective thrust and moments on the x/y/z body axes
|
||||
'cmd_ctatt': the controller commands a collective thrust and attitude (as a quaternion).
|
||||
'cmd_vel': the controller commands a velocity vector in the body frame.
|
||||
aero: boolean, determines whether or not aerodynamic drag forces are computed.
|
||||
"""
|
||||
def __init__(self, quad_params, initial_state = {'x': np.array([0,0,0]),
|
||||
'v': np.zeros(3,),
|
||||
@@ -53,6 +57,7 @@ class Multirotor(object):
|
||||
'wind': np.array([0,0,0]), # Since wind is handled elsewhere, this value is overwritten
|
||||
'rotor_speeds': np.array([1788.53, 1788.53, 1788.53, 1788.53])},
|
||||
control_abstraction='cmd_motor_speeds',
|
||||
aero = True,
|
||||
):
|
||||
"""
|
||||
Initialize quadrotor physical parameters.
|
||||
@@ -126,6 +131,8 @@ class Multirotor(object):
|
||||
self.kp_att = 544 # The attitude P gain (for cmd_vel and cmd_ctatt)
|
||||
self.kd_att = 46.64 # The attitude D gain (for cmd_vel and cmd_ctatt)
|
||||
|
||||
self.aero = aero
|
||||
|
||||
def extract_geometry(self):
|
||||
"""
|
||||
Extracts the geometry in self.rotors for efficient use later on in the computation of
|
||||
@@ -209,9 +216,6 @@ class Multirotor(object):
|
||||
|
||||
R = Rotation.from_quat(state['q']).as_matrix()
|
||||
|
||||
# Compute airspeed vector in the body frame
|
||||
body_airspeed_vector = R.T@(inertial_velocity - wind_velocity)
|
||||
|
||||
# Rotor speed derivative
|
||||
rotor_accel = (1/self.tau_m)*(cmd_rotor_speeds - rotor_speeds)
|
||||
|
||||
@@ -221,8 +225,11 @@ class Multirotor(object):
|
||||
# Orientation derivative.
|
||||
q_dot = quat_dot(state['q'], state['w'])
|
||||
|
||||
# Compute airspeed vector in the body frame
|
||||
body_airspeed_vector = R.T@(inertial_velocity - wind_velocity)
|
||||
|
||||
# Compute total wrench in the body frame based on the current rotor speeds and their location w.r.t. CoM
|
||||
(FtotB, Mtot) = self.compute_body_wrench(state['w'], rotor_speeds, body_airspeed_vector)
|
||||
(FtotB, MtotB) = self.compute_body_wrench(state['w'], rotor_speeds, body_airspeed_vector)
|
||||
|
||||
# Rotate the force from the body frame to the inertial frame
|
||||
Ftot = R@FtotB
|
||||
@@ -233,7 +240,7 @@ class Multirotor(object):
|
||||
# Angular velocity derivative.
|
||||
w = state['w']
|
||||
w_hat = Multirotor.hat_map(w)
|
||||
w_dot = self.inv_inertia @ (Mtot - w_hat @ (self.inertia @ w))
|
||||
w_dot = self.inv_inertia @ (MtotB - w_hat @ (self.inertia @ w))
|
||||
|
||||
# NOTE: the wind dynamics are currently handled in the wind_profile object.
|
||||
# The line below doesn't do anything, as the wind state is assigned elsewhere.
|
||||
@@ -259,32 +266,32 @@ class Multirotor(object):
|
||||
The net moment Mtot is represented in the body frame.
|
||||
"""
|
||||
|
||||
FtotB = np.zeros((3,))
|
||||
MtotB = np.zeros((3,))
|
||||
# Get the local airspeeds for each rotor
|
||||
local_airspeeds = body_airspeed_vector[:, np.newaxis] + Multirotor.hat_map(body_rates)@(self.rotor_geometry.T)
|
||||
|
||||
for i in range(self.num_rotors):
|
||||
# Loop through each rotor, compute the forces
|
||||
# Compute the thrust of each rotor, assuming that the rotors all point in the body z direction!
|
||||
T = np.array([0, 0, self.k_eta])[:, np.newaxis]*rotor_speeds**2
|
||||
|
||||
# Add in aero wrenches (if applicable)
|
||||
if self.aero:
|
||||
# Parasitic drag force acting at the CoM
|
||||
D = -Multirotor._norm(body_airspeed_vector)*self.drag_matrix@body_airspeed_vector
|
||||
# Rotor drag (aka H force) acting at each propeller hub.
|
||||
H = -rotor_speeds*(self.rotor_drag_matrix@local_airspeeds)
|
||||
# Pitching flapping moment acting at each propeller hub.
|
||||
M_flap = -self.k_flap*rotor_speeds*((Multirotor.hat_map(local_airspeeds.T).transpose(2, 0, 1))@np.array([0,0,1])).T
|
||||
else:
|
||||
D = np.zeros(3,)
|
||||
H = np.zeros((3,self.num_rotors))
|
||||
M_flap = np.zeros((3,self.num_rotors))
|
||||
|
||||
r = self.rotor_geometry[i,:] # the position of rotor i relative to the CoM, in body coordinates
|
||||
|
||||
# Compute the local airspeed by adding on the rotational component to the airspeed.
|
||||
local_airspeed_vector = body_airspeed_vector + Multirotor.hat_map(body_rates)@r
|
||||
# Compute the moments due to the rotor thrusts, rotor drag (if applicable), and rotor drag torques
|
||||
M_force = -np.einsum('ijk, ik->j', Multirotor.hat_map(self.rotor_geometry), T+H)
|
||||
M_yaw = self.rotor_dir*(np.array([0, 0, self.k_m])[:, np.newaxis]*rotor_speeds**2)
|
||||
|
||||
T = np.array([0, 0, self.k_eta*rotor_speeds[i]**2]) # thrust vector in body frame
|
||||
H = -rotor_speeds[i]*self.rotor_drag_matrix@local_airspeed_vector # rotor drag force
|
||||
|
||||
# Compute the moments
|
||||
M_force = Multirotor.hat_map(r)@(T+H)
|
||||
M_yaw = self.rotor_dir[i]*np.array([0, 0, self.k_m*rotor_speeds[i]**2])
|
||||
M_flap = -rotor_speeds[i]*self.k_flap*Multirotor.hat_map(local_airspeed_vector)@np.array([0,0,1])
|
||||
|
||||
FtotB += (T+H)
|
||||
MtotB += (M_force + M_yaw + M_flap)
|
||||
|
||||
# Compute the drag force acting on the frame
|
||||
D = -Multirotor._norm(body_airspeed_vector)*self.drag_matrix@body_airspeed_vector
|
||||
|
||||
FtotB += D
|
||||
# Sum all elements to compute the total body wrench
|
||||
FtotB = np.sum(T + H, axis=1) + D
|
||||
MtotB = M_force + np.sum(M_yaw + M_flap, axis=1)
|
||||
|
||||
return (FtotB, MtotB)
|
||||
|
||||
@@ -395,10 +402,16 @@ class Multirotor(object):
|
||||
def hat_map(cls, s):
|
||||
"""
|
||||
Given vector s in R^3, return associate skew symmetric matrix S in R^3x3
|
||||
In the vectorized implementation, we assume that s is in the shape (N arrays, 3)
|
||||
"""
|
||||
return np.array([[ 0, -s[2], s[1]],
|
||||
[ s[2], 0, -s[0]],
|
||||
[-s[1], s[0], 0]])
|
||||
if len(s.shape) > 1: # Vectorized implementation
|
||||
return np.array([[ np.zeros(s.shape[0]), -s[:,2], s[:,1]],
|
||||
[ s[:,2], np.zeros(s.shape[0]), -s[:,0]],
|
||||
[-s[:,1], s[:,0], np.zeros(s.shape[0])]])
|
||||
else:
|
||||
return np.array([[ 0, -s[2], s[1]],
|
||||
[ s[2], 0, -s[0]],
|
||||
[-s[1], s[0], 0]])
|
||||
|
||||
@classmethod
|
||||
def _pack_state(cls, state):
|
||||
|
||||
Reference in New Issue
Block a user