Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate to mujoco-py 1.5 #834

Merged
merged 10 commits into from
Jan 24, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ OpenAI Gym
**OpenAI Gym is a toolkit for developing and comparing reinforcement learning algorithms.** This is the ``gym`` open-source library, which gives you access to an ever-growing variety of environments.

.. image:: https://travis-ci.org/openai/gym.svg?branch=master
:target: https://travis-ci.org/openai/gym
:target: https://travis-ci.org/openai/gym

`See What's New section below <#what-s-new>`_

Expand Down Expand Up @@ -126,7 +126,7 @@ fake display. The easiest way to do this is by running under

.. code:: shell

xvfb-run -s "-screen 0 1400x900x24" bash
xvfb-run -s "-screen 0 1400x900x24" bash

Installing dependencies for specific environments
-------------------------------------------------
Expand Down Expand Up @@ -262,6 +262,10 @@ We are using `pytest <http://doc.pytest.org>`_ for tests. You can run them via:
What's new
==========

- 2018-01-24: All continuous control environments now use mujoco_py >= 1.50.
Versions have been updated accordingly to -v2, e.g. HalfCheetah-v2. Performance
should be similar (see https://github.com/openai/gym/pull/834) but there are likely
some differences due to changes in MuJoCo.
- 2017-06-16: Make env.spec into a property to fix a bug that occurs
when you try to print out an unregistered Env.
- 2017-05-13: BACKWARDS INCOMPATIBILITY: The Atari environments are now at
Expand Down
26 changes: 13 additions & 13 deletions gym/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,89 +204,89 @@
# 2D

register(
id='Reacher-v1',
id='Reacher-v2',
entry_point='gym.envs.mujoco:ReacherEnv',
max_episode_steps=50,
reward_threshold=-3.75,
)

register(
id='Pusher-v0',
id='Pusher-v2',
entry_point='gym.envs.mujoco:PusherEnv',
max_episode_steps=100,
reward_threshold=0.0,
)

register(
id='Thrower-v0',
id='Thrower-v2',
entry_point='gym.envs.mujoco:ThrowerEnv',
max_episode_steps=100,
reward_threshold=0.0,
)

register(
id='Striker-v0',
id='Striker-v2',
entry_point='gym.envs.mujoco:StrikerEnv',
max_episode_steps=100,
reward_threshold=0.0,
)

register(
id='InvertedPendulum-v1',
id='InvertedPendulum-v2',
entry_point='gym.envs.mujoco:InvertedPendulumEnv',
max_episode_steps=1000,
reward_threshold=950.0,
)

register(
id='InvertedDoublePendulum-v1',
id='InvertedDoublePendulum-v2',
entry_point='gym.envs.mujoco:InvertedDoublePendulumEnv',
max_episode_steps=1000,
reward_threshold=9100.0,
)

register(
id='HalfCheetah-v1',
id='HalfCheetah-v2',
entry_point='gym.envs.mujoco:HalfCheetahEnv',
max_episode_steps=1000,
reward_threshold=4800.0,
)

register(
id='Hopper-v1',
id='Hopper-v2',
entry_point='gym.envs.mujoco:HopperEnv',
max_episode_steps=1000,
reward_threshold=3800.0,
)

register(
id='Swimmer-v1',
id='Swimmer-v2',
entry_point='gym.envs.mujoco:SwimmerEnv',
max_episode_steps=1000,
reward_threshold=360.0,
)

register(
id='Walker2d-v1',
id='Walker2d-v2',
max_episode_steps=1000,
entry_point='gym.envs.mujoco:Walker2dEnv',
)

register(
id='Ant-v1',
id='Ant-v2',
entry_point='gym.envs.mujoco:AntEnv',
max_episode_steps=1000,
reward_threshold=6000.0,
)

register(
id='Humanoid-v1',
id='Humanoid-v2',
entry_point='gym.envs.mujoco:HumanoidEnv',
max_episode_steps=1000,
)

register(
id='HumanoidStandup-v1',
id='HumanoidStandup-v2',
entry_point='gym.envs.mujoco:HumanoidStandupEnv',
max_episode_steps=1000,
)
Expand Down
8 changes: 4 additions & 4 deletions gym/envs/mujoco/ant.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def _step(self, a):
forward_reward = (xposafter - xposbefore)/self.dt
ctrl_cost = .5 * np.square(a).sum()
contact_cost = 0.5 * 1e-3 * np.sum(
np.square(np.clip(self.model.data.cfrc_ext, -1, 1)))
np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
survive_reward = 1.0
reward = forward_reward - ctrl_cost - contact_cost + survive_reward
state = self.state_vector()
Expand All @@ -30,9 +30,9 @@ def _step(self, a):

def _get_obs(self):
return np.concatenate([
self.model.data.qpos.flat[2:],
self.model.data.qvel.flat,
np.clip(self.model.data.cfrc_ext, -1, 1).flat,
self.sim.data.qpos.flat[2:],
self.sim.data.qvel.flat,
np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
])

def reset_model(self):
Expand Down
8 changes: 4 additions & 4 deletions gym/envs/mujoco/half_cheetah.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ def __init__(self):
utils.EzPickle.__init__(self)

def _step(self, action):
xposbefore = self.model.data.qpos[0, 0]
xposbefore = self.sim.data.qpos[0]
self.do_simulation(action, self.frame_skip)
xposafter = self.model.data.qpos[0, 0]
xposafter = self.sim.data.qpos[0]
ob = self._get_obs()
reward_ctrl = - 0.1 * np.square(action).sum()
reward_run = (xposafter - xposbefore)/self.dt
Expand All @@ -20,8 +20,8 @@ def _step(self, action):

def _get_obs(self):
return np.concatenate([
self.model.data.qpos.flat[1:],
self.model.data.qvel.flat,
self.sim.data.qpos.flat[1:],
self.sim.data.qvel.flat,
])

def reset_model(self):
Expand Down
8 changes: 4 additions & 4 deletions gym/envs/mujoco/hopper.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ def __init__(self):
utils.EzPickle.__init__(self)

def _step(self, a):
posbefore = self.model.data.qpos[0, 0]
posbefore = self.sim.data.qpos[0]
self.do_simulation(a, self.frame_skip)
posafter, height, ang = self.model.data.qpos[0:3, 0]
posafter, height, ang = self.sim.data.qpos[0:3]
alive_bonus = 1.0
reward = (posafter - posbefore) / self.dt
reward += alive_bonus
Expand All @@ -23,8 +23,8 @@ def _step(self, a):

def _get_obs(self):
return np.concatenate([
self.model.data.qpos.flat[1:],
np.clip(self.model.data.qvel.flat, -10, 10)
self.sim.data.qpos.flat[1:],
np.clip(self.sim.data.qvel.flat, -10, 10)
])

def reset_model(self):
Expand Down
16 changes: 8 additions & 8 deletions gym/envs/mujoco/humanoid.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
from gym.envs.mujoco import mujoco_env
from gym import utils

def mass_center(model):
mass = model.body_mass
xpos = model.data.xipos
def mass_center(model, sim):
mass = np.expand_dims(model.body_mass, 1)
xpos = sim.data.xipos
return (np.sum(mass * xpos, 0) / np.sum(mass))[0]

class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
Expand All @@ -13,7 +13,7 @@ def __init__(self):
utils.EzPickle.__init__(self)

def _get_obs(self):
data = self.model.data
data = self.sim.data
return np.concatenate([data.qpos.flat[2:],
data.qvel.flat,
data.cinert.flat,
Expand All @@ -22,17 +22,17 @@ def _get_obs(self):
data.cfrc_ext.flat])

def _step(self, a):
pos_before = mass_center(self.model)
pos_before = mass_center(self.model, self.sim)
self.do_simulation(a, self.frame_skip)
pos_after = mass_center(self.model)
pos_after = mass_center(self.model, self.sim)
alive_bonus = 5.0
data = self.model.data
data = self.sim.data
lin_vel_cost = 0.25 * (pos_after - pos_before) / self.model.opt.timestep
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
quad_impact_cost = min(quad_impact_cost, 10)
reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
qpos = self.model.data.qpos
qpos = self.sim.data.qpos
done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))
return self._get_obs(), reward, done, dict(reward_linvel=lin_vel_cost, reward_quadctrl=-quad_ctrl_cost, reward_alive=alive_bonus, reward_impact=-quad_impact_cost)

Expand Down
11 changes: 3 additions & 8 deletions gym/envs/mujoco/humanoidstandup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,13 @@
from gym.envs.mujoco import mujoco_env
from gym import utils

def mass_center(model):
mass = model.body_mass
xpos = model.data.xipos
return (np.sum(mass * xpos, 0) / np.sum(mass))[0]

class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, 'humanoidstandup.xml', 5)
utils.EzPickle.__init__(self)

def _get_obs(self):
data = self.model.data
data = self.sim.data
return np.concatenate([data.qpos.flat[2:],
data.qvel.flat,
data.cinert.flat,
Expand All @@ -23,8 +18,8 @@ def _get_obs(self):

def _step(self, a):
self.do_simulation(a, self.frame_skip)
pos_after = self.model.data.qpos[2][0]
data = self.model.data
pos_after = self.sim.data.qpos[2]
data = self.sim.data
uph_cost = (pos_after - 0) / self.model.opt.timestep

quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
Expand Down
16 changes: 8 additions & 8 deletions gym/envs/mujoco/inverted_double_pendulum.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,22 @@ def __init__(self):
def _step(self, action):
self.do_simulation(action, self.frame_skip)
ob = self._get_obs()
x, _, y = self.model.data.site_xpos[0]
x, _, y = self.sim.data.site_xpos[0]
dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2
v1, v2 = self.model.data.qvel[1:3]
v1, v2 = self.sim.data.qvel[1:3]
vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
alive_bonus = 10
r = (alive_bonus - dist_penalty - vel_penalty)[0]
r = alive_bonus - dist_penalty - vel_penalty
done = bool(y <= 1)
return ob, r, done, {}

def _get_obs(self):
return np.concatenate([
self.model.data.qpos[:1], # cart x pos
np.sin(self.model.data.qpos[1:]), # link angles
np.cos(self.model.data.qpos[1:]),
np.clip(self.model.data.qvel, -10, 10),
np.clip(self.model.data.qfrc_constraint, -10, 10)
self.sim.data.qpos[:1], # cart x pos
np.sin(self.sim.data.qpos[1:]), # link angles
np.cos(self.sim.data.qpos[1:]),
np.clip(self.sim.data.qvel, -10, 10),
np.clip(self.sim.data.qfrc_constraint, -10, 10)
]).ravel()

def reset_model(self):
Expand Down
2 changes: 1 addition & 1 deletion gym/envs/mujoco/inverted_pendulum.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def reset_model(self):
return self._get_obs()

def _get_obs(self):
return np.concatenate([self.model.data.qpos, self.model.data.qvel]).ravel()
return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel()

def viewer_setup(self):
v = self.viewer
Expand Down
Loading