Skip to content

Support MuJoCo 1.5 #767

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions gym/envs/mujoco/ant.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def _step(self, a):
forward_reward = (xposafter - xposbefore)/self.dt
ctrl_cost = .5 * np.square(a).sum()
contact_cost = 0.5 * 1e-3 * np.sum(
np.square(np.clip(self.model.data.cfrc_ext, -1, 1)))
np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
survive_reward = 1.0
reward = forward_reward - ctrl_cost - contact_cost + survive_reward
state = self.state_vector()
Expand All @@ -30,9 +30,9 @@ def _step(self, a):

def _get_obs(self):
return np.concatenate([
self.model.data.qpos.flat[2:],
self.model.data.qvel.flat,
np.clip(self.model.data.cfrc_ext, -1, 1).flat,
self.sim.data.qpos.flat[2:],
self.sim.data.qvel.flat,
np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
])

def reset_model(self):
Expand Down
8 changes: 4 additions & 4 deletions gym/envs/mujoco/half_cheetah.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ def __init__(self):
utils.EzPickle.__init__(self)

def _step(self, action):
xposbefore = self.model.data.qpos[0, 0]
xposbefore = self.sim.data.qpos[0]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My understanding is that qpos lists joint angles. In half_cheetah.xml, it looks like the first joint is "rootx", a slide joint that should give displacement in the x direction. So qpos[0] should give the desired value.

self.do_simulation(action, self.frame_skip)
xposafter = self.model.data.qpos[0, 0]
xposafter = self.sim.data.qpos[0]
ob = self._get_obs()
reward_ctrl = - 0.1 * np.square(action).sum()
reward_run = (xposafter - xposbefore)/self.dt
Expand All @@ -20,8 +20,8 @@ def _step(self, action):

def _get_obs(self):
return np.concatenate([
self.model.data.qpos.flat[1:],
self.model.data.qvel.flat,
self.sim.data.qpos.flat[1:],
self.sim.data.qvel.flat,
])

def reset_model(self):
Expand Down
8 changes: 4 additions & 4 deletions gym/envs/mujoco/hopper.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ def __init__(self):
utils.EzPickle.__init__(self)

def _step(self, a):
posbefore = self.model.data.qpos[0, 0]
posbefore = self.sim.data.qpos[0]
self.do_simulation(a, self.frame_skip)
posafter, height, ang = self.model.data.qpos[0:3, 0]
posafter, height, ang = self.sim.data.qpos[0:3]
alive_bonus = 1.0
reward = (posafter - posbefore) / self.dt
reward += alive_bonus
Expand All @@ -23,8 +23,8 @@ def _step(self, a):

def _get_obs(self):
return np.concatenate([
self.model.data.qpos.flat[1:],
np.clip(self.model.data.qvel.flat, -10, 10)
self.sim.data.qpos.flat[1:],
np.clip(self.sim.data.qvel.flat, -10, 10)
])

def reset_model(self):
Expand Down
16 changes: 8 additions & 8 deletions gym/envs/mujoco/humanoid.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
from gym.envs.mujoco import mujoco_env
from gym import utils

def mass_center(model):
mass = model.body_mass
xpos = model.data.xipos
def mass_center(model, sim):
mass = np.expand_dims(model.body_mass, 1)
xpos = sim.data.xipos
return (np.sum(mass * xpos, 0) / np.sum(mass))[0]

class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
Expand All @@ -13,7 +13,7 @@ def __init__(self):
utils.EzPickle.__init__(self)

def _get_obs(self):
data = self.model.data
data = self.sim.data
return np.concatenate([data.qpos.flat[2:],
data.qvel.flat,
data.cinert.flat,
Expand All @@ -22,17 +22,17 @@ def _get_obs(self):
data.cfrc_ext.flat])

def _step(self, a):
pos_before = mass_center(self.model)
pos_before = mass_center(self.model, self.sim)
self.do_simulation(a, self.frame_skip)
pos_after = mass_center(self.model)
pos_after = mass_center(self.model, self.sim)
alive_bonus = 5.0
data = self.model.data
data = self.sim.data
lin_vel_cost = 0.25 * (pos_after - pos_before) / self.model.opt.timestep
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
quad_impact_cost = min(quad_impact_cost, 10)
reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
qpos = self.model.data.qpos
qpos = self.sim.data.qpos
done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))
return self._get_obs(), reward, done, dict(reward_linvel=lin_vel_cost, reward_quadctrl=-quad_ctrl_cost, reward_alive=alive_bonus, reward_impact=-quad_impact_cost)

Expand Down
11 changes: 3 additions & 8 deletions gym/envs/mujoco/humanoidstandup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,13 @@
from gym.envs.mujoco import mujoco_env
from gym import utils

def mass_center(model):
mass = model.body_mass
xpos = model.data.xipos
return (np.sum(mass * xpos, 0) / np.sum(mass))[0]

class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, 'humanoidstandup.xml', 5)
utils.EzPickle.__init__(self)

def _get_obs(self):
data = self.model.data
data = self.sim.data
return np.concatenate([data.qpos.flat[2:],
data.qvel.flat,
data.cinert.flat,
Expand All @@ -23,8 +18,8 @@ def _get_obs(self):

def _step(self, a):
self.do_simulation(a, self.frame_skip)
pos_after = self.model.data.qpos[2][0]
data = self.model.data
pos_after = self.sim.data.qpos[2]
data = self.sim.data
uph_cost = (pos_after - 0) / self.model.opt.timestep

quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
Expand Down
16 changes: 8 additions & 8 deletions gym/envs/mujoco/inverted_double_pendulum.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,22 @@ def __init__(self):
def _step(self, action):
self.do_simulation(action, self.frame_skip)
ob = self._get_obs()
x, _, y = self.model.data.site_xpos[0]
x, _, y = self.sim.data.site_xpos[0]
dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2
v1, v2 = self.model.data.qvel[1:3]
v1, v2 = self.sim.data.qvel[1:3]
vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
alive_bonus = 10
r = (alive_bonus - dist_penalty - vel_penalty)[0]
r = alive_bonus - dist_penalty - vel_penalty
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure why alive_bonus - dist_penalty - vel_penalty used to be an array. Please double check if I broke something here.

done = bool(y <= 1)
return ob, r, done, {}

def _get_obs(self):
return np.concatenate([
self.model.data.qpos[:1], # cart x pos
np.sin(self.model.data.qpos[1:]), # link angles
np.cos(self.model.data.qpos[1:]),
np.clip(self.model.data.qvel, -10, 10),
np.clip(self.model.data.qfrc_constraint, -10, 10)
self.sim.data.qpos[:1], # cart x pos
np.sin(self.sim.data.qpos[1:]), # link angles
np.cos(self.sim.data.qpos[1:]),
np.clip(self.sim.data.qvel, -10, 10),
np.clip(self.sim.data.qfrc_constraint, -10, 10)
]).ravel()

def reset_model(self):
Expand Down
2 changes: 1 addition & 1 deletion gym/envs/mujoco/inverted_pendulum.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def reset_model(self):
return self._get_obs()

def _get_obs(self):
return np.concatenate([self.model.data.qpos, self.model.data.qvel]).ravel()
return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel()

def viewer_setup(self):
v = self.viewer
Expand Down
49 changes: 19 additions & 30 deletions gym/envs/mujoco/mujoco_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

try:
import mujoco_py
from mujoco_py.mjlib import mjlib
except ImportError as e:
raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))

Expand All @@ -25,17 +24,18 @@ def __init__(self, model_path, frame_skip):
if not path.exists(fullpath):
raise IOError("File %s does not exist" % fullpath)
self.frame_skip = frame_skip
self.model = mujoco_py.MjModel(fullpath)
self.data = self.model.data
self.model = mujoco_py.load_model_from_path(fullpath)
self.sim = mujoco_py.MjSim(self.model)
self.data = self.sim.data
self.viewer = None

self.metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': int(np.round(1.0 / self.dt))
}

self.init_qpos = self.model.data.qpos.ravel().copy()
self.init_qvel = self.model.data.qvel.ravel().copy()
self.init_qpos = self.sim.data.qpos.ravel().copy()
self.init_qvel = self.sim.data.qvel.ravel().copy()
observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
assert not done
self.obs_dim = observation.size
Expand Down Expand Up @@ -76,33 +76,33 @@ def viewer_setup(self):
# -----------------------------

def _reset(self):
mjlib.mj_resetData(self.model.ptr, self.data.ptr)
self.sim.reset()
ob = self.reset_model()
if self.viewer is not None:
self.viewer.autoscale()
self.viewer_setup()
return ob

def set_state(self, qpos, qvel):
assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
self.model.data.qpos = qpos
self.model.data.qvel = qvel
self.model._compute_subtree() # pylint: disable=W0212
self.model.forward()
old_state = self.sim.get_state()
new_state = mujoco_py.MjSimState(old_state.time, qpos, qvel,
old_state.act, old_state.udd_state)
self.sim.set_state(new_state)
self.sim.forward()

@property
def dt(self):
return self.model.opt.timestep * self.frame_skip

def do_simulation(self, ctrl, n_frames):
self.model.data.ctrl = ctrl
self.sim.data.ctrl[:] = ctrl
for _ in range(n_frames):
self.model.step()
self.sim.step()

def _render(self, mode='human', close=False):
if close:
if self.viewer is not None:
self._get_viewer().finish()
self._get_viewer()
self.viewer = None
return

Expand All @@ -111,30 +111,19 @@ def _render(self, mode='human', close=False):
data, width, height = self._get_viewer().get_image()
return np.fromstring(data, dtype='uint8').reshape(height, width, 3)[::-1, :, :]
elif mode == 'human':
self._get_viewer().loop_once()
self._get_viewer().render()

def _get_viewer(self):
if self.viewer is None:
self.viewer = mujoco_py.MjViewer()
self.viewer.start()
self.viewer.set_model(self.model)
self.viewer = mujoco_py.MjViewer(self.sim)
self.viewer_setup()
return self.viewer

def get_body_com(self, body_name):
idx = self.model.body_names.index(six.b(body_name))
return self.model.data.com_subtree[idx]

def get_body_comvel(self, body_name):
idx = self.model.body_names.index(six.b(body_name))
return self.model.body_comvels[idx]

def get_body_xmat(self, body_name):
idx = self.model.body_names.index(six.b(body_name))
return self.model.data.xmat[idx].reshape((3, 3))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These methods don't have obvious implementations in mujoco 1.5 and they aren't used anywhere so I deleted them.

return self.data.get_body_xpos(body_name)

def state_vector(self):
return np.concatenate([
self.model.data.qpos.flat,
self.model.data.qvel.flat
self.sim.data.qpos.flat,
self.sim.data.qvel.flat
])
5 changes: 2 additions & 3 deletions gym/envs/mujoco/pusher.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from gym.envs.mujoco import mujoco_env

import mujoco_py
from mujoco_py.mjlib import mjlib

class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
Expand Down Expand Up @@ -50,8 +49,8 @@ def reset_model(self):

def _get_obs(self):
return np.concatenate([
self.model.data.qpos.flat[:7],
self.model.data.qvel.flat[:7],
self.sim.data.qpos.flat[:7],
self.sim.data.qvel.flat[:7],
self.get_body_com("tips_arm"),
self.get_body_com("object"),
self.get_body_com("goal"),
Expand Down
6 changes: 3 additions & 3 deletions gym/envs/mujoco/reacher.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ def reset_model(self):
return self._get_obs()

def _get_obs(self):
theta = self.model.data.qpos.flat[:2]
theta = self.sim.data.qpos.flat[:2]
return np.concatenate([
np.cos(theta),
np.sin(theta),
self.model.data.qpos.flat[2:],
self.model.data.qvel.flat[:2],
self.sim.data.qpos.flat[2:],
self.sim.data.qvel.flat[:2],
self.get_body_com("fingertip") - self.get_body_com("target")
])
4 changes: 2 additions & 2 deletions gym/envs/mujoco/striker.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ def reset_model(self):

def _get_obs(self):
return np.concatenate([
self.model.data.qpos.flat[:7],
self.model.data.qvel.flat[:7],
self.sim.data.qpos.flat[:7],
self.sim.data.qvel.flat[:7],
self.get_body_com("tips_arm"),
self.get_body_com("object"),
self.get_body_com("goal"),
Expand Down
8 changes: 4 additions & 4 deletions gym/envs/mujoco/swimmer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,18 @@ def __init__(self):

def _step(self, a):
ctrl_cost_coeff = 0.0001
xposbefore = self.model.data.qpos[0, 0]
xposbefore = self.sim.data.qpos[0]
self.do_simulation(a, self.frame_skip)
xposafter = self.model.data.qpos[0, 0]
xposafter = self.sim.data.qpos[0]
reward_fwd = (xposafter - xposbefore) / self.dt
reward_ctrl = - ctrl_cost_coeff * np.square(a).sum()
reward = reward_fwd + reward_ctrl
ob = self._get_obs()
return ob, reward, False, dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl)

def _get_obs(self):
qpos = self.model.data.qpos
qvel = self.model.data.qvel
qpos = self.sim.data.qpos
qvel = self.sim.data.qvel
return np.concatenate([qpos.flat[2:], qvel.flat])

def reset_model(self):
Expand Down
4 changes: 2 additions & 2 deletions gym/envs/mujoco/thrower.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ def reset_model(self):

def _get_obs(self):
return np.concatenate([
self.model.data.qpos.flat[:7],
self.model.data.qvel.flat[:7],
self.sim.data.qpos.flat[:7],
self.sim.data.qvel.flat[:7],
self.get_body_com("r_wrist_roll_link"),
self.get_body_com("ball"),
self.get_body_com("goal"),
Expand Down
8 changes: 4 additions & 4 deletions gym/envs/mujoco/walker2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ def __init__(self):
utils.EzPickle.__init__(self)

def _step(self, a):
posbefore = self.model.data.qpos[0, 0]
posbefore = self.sim.data.qpos[0]
self.do_simulation(a, self.frame_skip)
posafter, height, ang = self.model.data.qpos[0:3, 0]
posafter, height, ang = self.sim.data.qpos[0:3]
alive_bonus = 1.0
reward = ((posafter - posbefore) / self.dt)
reward += alive_bonus
Expand All @@ -22,8 +22,8 @@ def _step(self, a):
return ob, reward, done, {}

def _get_obs(self):
qpos = self.model.data.qpos
qvel = self.model.data.qvel
qpos = self.sim.data.qpos
qvel = self.sim.data.qvel
return np.concatenate([qpos[1:], np.clip(qvel, -10, 10)]).ravel()

def reset_model(self):
Expand Down
Loading