Skip to content

Commit

Permalink
Migrate to mujoco-py 1.50 (openai#834)
Browse files Browse the repository at this point in the history
* all envs run offscreen

* render works

* changed mujoco-py version

* Bump versions

* Update version and README

* Same versioning for all mujoco envs

* Fix typo

* Fix version

* Bump version again

* Revert "Fix version"

This reverts commit decc577.
  • Loading branch information
matthiasplappert authored Jan 24, 2018
1 parent 8db9eff commit 921169b
Show file tree
Hide file tree
Showing 18 changed files with 89 additions and 102 deletions.
8 changes: 6 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ OpenAI Gym
**OpenAI Gym is a toolkit for developing and comparing reinforcement learning algorithms.** This is the ``gym`` open-source library, which gives you access to a standardized set of environments.

.. image:: https://travis-ci.org/openai/gym.svg?branch=master
:target: https://travis-ci.org/openai/gym
:target: https://travis-ci.org/openai/gym

`See What's New section below <#what-s-new>`_

Expand Down Expand Up @@ -126,7 +126,7 @@ fake display. The easiest way to do this is by running under

.. code:: shell
xvfb-run -s "-screen 0 1400x900x24" bash
xvfb-run -s "-screen 0 1400x900x24" bash
Installing dependencies for specific environments
-------------------------------------------------
Expand Down Expand Up @@ -261,6 +261,10 @@ We are using `pytest <http://doc.pytest.org>`_ for tests. You can run them via:
What's new
==========

- 2018-01-24: All continuous control environments now use mujoco_py >= 1.50.
Versions have been updated accordingly to -v2, e.g. HalfCheetah-v2. Performance
should be similar (see https://github.com/openai/gym/pull/834) but there are likely
some differences due to changes in MuJoCo.
- 2017-06-16: Make env.spec into a property to fix a bug that occurs
when you try to print out an unregistered Env.
- 2017-05-13: BACKWARDS INCOMPATIBILITY: The Atari environments are now at
Expand Down
26 changes: 13 additions & 13 deletions gym/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,89 +204,89 @@
# 2D

register(
id='Reacher-v1',
id='Reacher-v2',
entry_point='gym.envs.mujoco:ReacherEnv',
max_episode_steps=50,
reward_threshold=-3.75,
)

register(
id='Pusher-v0',
id='Pusher-v2',
entry_point='gym.envs.mujoco:PusherEnv',
max_episode_steps=100,
reward_threshold=0.0,
)

register(
id='Thrower-v0',
id='Thrower-v2',
entry_point='gym.envs.mujoco:ThrowerEnv',
max_episode_steps=100,
reward_threshold=0.0,
)

register(
id='Striker-v0',
id='Striker-v2',
entry_point='gym.envs.mujoco:StrikerEnv',
max_episode_steps=100,
reward_threshold=0.0,
)

register(
id='InvertedPendulum-v1',
id='InvertedPendulum-v2',
entry_point='gym.envs.mujoco:InvertedPendulumEnv',
max_episode_steps=1000,
reward_threshold=950.0,
)

register(
id='InvertedDoublePendulum-v1',
id='InvertedDoublePendulum-v2',
entry_point='gym.envs.mujoco:InvertedDoublePendulumEnv',
max_episode_steps=1000,
reward_threshold=9100.0,
)

register(
id='HalfCheetah-v1',
id='HalfCheetah-v2',
entry_point='gym.envs.mujoco:HalfCheetahEnv',
max_episode_steps=1000,
reward_threshold=4800.0,
)

register(
id='Hopper-v1',
id='Hopper-v2',
entry_point='gym.envs.mujoco:HopperEnv',
max_episode_steps=1000,
reward_threshold=3800.0,
)

register(
id='Swimmer-v1',
id='Swimmer-v2',
entry_point='gym.envs.mujoco:SwimmerEnv',
max_episode_steps=1000,
reward_threshold=360.0,
)

register(
id='Walker2d-v1',
id='Walker2d-v2',
max_episode_steps=1000,
entry_point='gym.envs.mujoco:Walker2dEnv',
)

register(
id='Ant-v1',
id='Ant-v2',
entry_point='gym.envs.mujoco:AntEnv',
max_episode_steps=1000,
reward_threshold=6000.0,
)

register(
id='Humanoid-v1',
id='Humanoid-v2',
entry_point='gym.envs.mujoco:HumanoidEnv',
max_episode_steps=1000,
)

register(
id='HumanoidStandup-v1',
id='HumanoidStandup-v2',
entry_point='gym.envs.mujoco:HumanoidStandupEnv',
max_episode_steps=1000,
)
Expand Down
8 changes: 4 additions & 4 deletions gym/envs/mujoco/ant.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def _step(self, a):
forward_reward = (xposafter - xposbefore)/self.dt
ctrl_cost = .5 * np.square(a).sum()
contact_cost = 0.5 * 1e-3 * np.sum(
np.square(np.clip(self.model.data.cfrc_ext, -1, 1)))
np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
survive_reward = 1.0
reward = forward_reward - ctrl_cost - contact_cost + survive_reward
state = self.state_vector()
Expand All @@ -30,9 +30,9 @@ def _step(self, a):

def _get_obs(self):
return np.concatenate([
self.model.data.qpos.flat[2:],
self.model.data.qvel.flat,
np.clip(self.model.data.cfrc_ext, -1, 1).flat,
self.sim.data.qpos.flat[2:],
self.sim.data.qvel.flat,
np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
])

def reset_model(self):
Expand Down
8 changes: 4 additions & 4 deletions gym/envs/mujoco/half_cheetah.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ def __init__(self):
utils.EzPickle.__init__(self)

def _step(self, action):
xposbefore = self.model.data.qpos[0, 0]
xposbefore = self.sim.data.qpos[0]
self.do_simulation(action, self.frame_skip)
xposafter = self.model.data.qpos[0, 0]
xposafter = self.sim.data.qpos[0]
ob = self._get_obs()
reward_ctrl = - 0.1 * np.square(action).sum()
reward_run = (xposafter - xposbefore)/self.dt
Expand All @@ -20,8 +20,8 @@ def _step(self, action):

def _get_obs(self):
return np.concatenate([
self.model.data.qpos.flat[1:],
self.model.data.qvel.flat,
self.sim.data.qpos.flat[1:],
self.sim.data.qvel.flat,
])

def reset_model(self):
Expand Down
8 changes: 4 additions & 4 deletions gym/envs/mujoco/hopper.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ def __init__(self):
utils.EzPickle.__init__(self)

def _step(self, a):
posbefore = self.model.data.qpos[0, 0]
posbefore = self.sim.data.qpos[0]
self.do_simulation(a, self.frame_skip)
posafter, height, ang = self.model.data.qpos[0:3, 0]
posafter, height, ang = self.sim.data.qpos[0:3]
alive_bonus = 1.0
reward = (posafter - posbefore) / self.dt
reward += alive_bonus
Expand All @@ -23,8 +23,8 @@ def _step(self, a):

def _get_obs(self):
return np.concatenate([
self.model.data.qpos.flat[1:],
np.clip(self.model.data.qvel.flat, -10, 10)
self.sim.data.qpos.flat[1:],
np.clip(self.sim.data.qvel.flat, -10, 10)
])

def reset_model(self):
Expand Down
16 changes: 8 additions & 8 deletions gym/envs/mujoco/humanoid.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
from gym.envs.mujoco import mujoco_env
from gym import utils

def mass_center(model):
mass = model.body_mass
xpos = model.data.xipos
def mass_center(model, sim):
mass = np.expand_dims(model.body_mass, 1)
xpos = sim.data.xipos
return (np.sum(mass * xpos, 0) / np.sum(mass))[0]

class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
Expand All @@ -13,7 +13,7 @@ def __init__(self):
utils.EzPickle.__init__(self)

def _get_obs(self):
data = self.model.data
data = self.sim.data
return np.concatenate([data.qpos.flat[2:],
data.qvel.flat,
data.cinert.flat,
Expand All @@ -22,17 +22,17 @@ def _get_obs(self):
data.cfrc_ext.flat])

def _step(self, a):
pos_before = mass_center(self.model)
pos_before = mass_center(self.model, self.sim)
self.do_simulation(a, self.frame_skip)
pos_after = mass_center(self.model)
pos_after = mass_center(self.model, self.sim)
alive_bonus = 5.0
data = self.model.data
data = self.sim.data
lin_vel_cost = 0.25 * (pos_after - pos_before) / self.model.opt.timestep
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
quad_impact_cost = min(quad_impact_cost, 10)
reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
qpos = self.model.data.qpos
qpos = self.sim.data.qpos
done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))
return self._get_obs(), reward, done, dict(reward_linvel=lin_vel_cost, reward_quadctrl=-quad_ctrl_cost, reward_alive=alive_bonus, reward_impact=-quad_impact_cost)

Expand Down
11 changes: 3 additions & 8 deletions gym/envs/mujoco/humanoidstandup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,13 @@
from gym.envs.mujoco import mujoco_env
from gym import utils

def mass_center(model):
mass = model.body_mass
xpos = model.data.xipos
return (np.sum(mass * xpos, 0) / np.sum(mass))[0]

class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, 'humanoidstandup.xml', 5)
utils.EzPickle.__init__(self)

def _get_obs(self):
data = self.model.data
data = self.sim.data
return np.concatenate([data.qpos.flat[2:],
data.qvel.flat,
data.cinert.flat,
Expand All @@ -23,8 +18,8 @@ def _get_obs(self):

def _step(self, a):
self.do_simulation(a, self.frame_skip)
pos_after = self.model.data.qpos[2][0]
data = self.model.data
pos_after = self.sim.data.qpos[2]
data = self.sim.data
uph_cost = (pos_after - 0) / self.model.opt.timestep

quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
Expand Down
16 changes: 8 additions & 8 deletions gym/envs/mujoco/inverted_double_pendulum.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,22 @@ def __init__(self):
def _step(self, action):
self.do_simulation(action, self.frame_skip)
ob = self._get_obs()
x, _, y = self.model.data.site_xpos[0]
x, _, y = self.sim.data.site_xpos[0]
dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2
v1, v2 = self.model.data.qvel[1:3]
v1, v2 = self.sim.data.qvel[1:3]
vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
alive_bonus = 10
r = (alive_bonus - dist_penalty - vel_penalty)[0]
r = alive_bonus - dist_penalty - vel_penalty
done = bool(y <= 1)
return ob, r, done, {}

def _get_obs(self):
return np.concatenate([
self.model.data.qpos[:1], # cart x pos
np.sin(self.model.data.qpos[1:]), # link angles
np.cos(self.model.data.qpos[1:]),
np.clip(self.model.data.qvel, -10, 10),
np.clip(self.model.data.qfrc_constraint, -10, 10)
self.sim.data.qpos[:1], # cart x pos
np.sin(self.sim.data.qpos[1:]), # link angles
np.cos(self.sim.data.qpos[1:]),
np.clip(self.sim.data.qvel, -10, 10),
np.clip(self.sim.data.qfrc_constraint, -10, 10)
]).ravel()

def reset_model(self):
Expand Down
2 changes: 1 addition & 1 deletion gym/envs/mujoco/inverted_pendulum.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def reset_model(self):
return self._get_obs()

def _get_obs(self):
return np.concatenate([self.model.data.qpos, self.model.data.qvel]).ravel()
return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel()

def viewer_setup(self):
v = self.viewer
Expand Down
Loading

0 comments on commit 921169b

Please sign in to comment.