forked from hill-a/stable-baselines
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_continuous.py
188 lines (150 loc) · 6.72 KB
/
test_continuous.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
import subprocess
import os
import gym
import pytest
import numpy as np
from stable_baselines import A2C, ACKTR, SAC, DDPG, PPO1, PPO2, TRPO, TD3
# TODO: add support for continuous actions
# from stable_baselines.acer import ACER
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines.common.identity_env import IdentityEnvBox
from stable_baselines.ddpg import AdaptiveParamNoiseSpec, NormalActionNoise
from stable_baselines.common.evaluation import evaluate_policy
from tests.test_common import _assert_eq
N_EVAL_EPISODES = 20
NUM_TIMESTEPS = 15000
MODEL_LIST = [
A2C,
# ACER,
ACKTR,
DDPG,
PPO1,
PPO2,
SAC,
TD3,
TRPO
]
@pytest.mark.slow
@pytest.mark.parametrize("model_class", MODEL_LIST)
def test_model_manipulation(request, model_class):
"""
Test if the algorithm can be loaded and saved without any issues, the environment switching
works and that the action prediction works
:param model_class: (BaseRLModel) A model
"""
try:
env = DummyVecEnv([lambda: IdentityEnvBox(eps=0.5)])
# create and train
model = model_class(policy="MlpPolicy", env=env, seed=0)
model.learn(total_timesteps=NUM_TIMESTEPS)
acc_reward, _ = evaluate_policy(model, env, n_eval_episodes=N_EVAL_EPISODES)
# saving
model_fname = './test_model_{}.zip'.format(request.node.name)
model.save(model_fname)
del model, env
# loading
model = model_class.load(model_fname)
# changing environment (note: this can be done at loading)
env = DummyVecEnv([lambda: IdentityEnvBox(eps=0.5)])
model.set_env(env)
loaded_acc_reward, _ = evaluate_policy(model, env, n_eval_episodes=N_EVAL_EPISODES)
obs = env.reset()
with pytest.warns(None) as record:
act_prob = model.action_probability(obs)
if model_class in [DDPG, SAC, TD3]:
# check that only one warning was raised
assert len(record) == 1, "No warning was raised for {}".format(model_class)
assert act_prob is None, "Error: action_probability should be None for {}".format(model_class)
else:
assert act_prob[0].shape == (1, 1) and act_prob[1].shape == (1, 1), \
"Error: action_probability not returning correct shape"
# test action probability for given (obs, action) pair
# must return zero and raise a warning or raise an exception if not defined
env = model.get_env()
obs = env.reset()
observations = np.array([obs for _ in range(10)])
observations = np.squeeze(observations)
observations = observations.reshape((-1, 1))
actions = np.array([env.action_space.sample() for _ in range(10)])
if model_class in [DDPG, SAC, TD3]:
with pytest.raises(ValueError):
model.action_probability(observations, actions=actions)
else:
actions_probas = model.action_probability(observations, actions=actions)
assert actions_probas.shape == (len(actions), 1), actions_probas.shape
assert np.all(actions_probas >= 0), actions_probas
actions_logprobas = model.action_probability(observations, actions=actions, logp=True)
assert np.allclose(actions_probas, np.exp(actions_logprobas)), (actions_probas, actions_logprobas)
# assert <15% diff
assert abs(acc_reward - loaded_acc_reward) / max(acc_reward, loaded_acc_reward) < 0.15, \
"Error: the prediction seems to have changed between loading and saving"
# learn post loading
model.learn(total_timesteps=100)
# validate no reset post learning
# This test was failing from time to time for no good reason
# other than bad luck
# We should change this test
# loaded_acc_reward = 0
# set_global_seeds(0)
# obs = env.reset()
# for _ in range(N_EVAL_EPISODES):
# action, _ = model.predict(obs)
# obs, reward, _, _ = env.step(action)
# loaded_acc_reward += reward
# loaded_acc_reward = sum(loaded_acc_reward) / N_EVAL_EPISODES
# # assert <10% diff
# assert abs(acc_reward - loaded_acc_reward) / max(acc_reward, loaded_acc_reward) < 0.1, \
# "Error: the prediction seems to have changed between pre learning and post learning"
# predict new values
evaluate_policy(model, env, n_eval_episodes=N_EVAL_EPISODES)
# Free memory
del model, env
finally:
if os.path.exists(model_fname):
os.remove(model_fname)
def test_ddpg():
args = ['--env-id', 'Pendulum-v0', '--num-timesteps', 1000, '--noise-type', 'ou_0.01']
args = list(map(str, args))
return_code = subprocess.call(['python', '-m', 'stable_baselines.ddpg.main'] + args)
_assert_eq(return_code, 0)
def test_ddpg_eval_env():
"""
Additional test to check that everything is working when passing
an eval env.
"""
eval_env = gym.make("Pendulum-v0")
model = DDPG("MlpPolicy", "Pendulum-v0", nb_rollout_steps=5,
nb_train_steps=2, nb_eval_steps=10,
eval_env=eval_env, verbose=0)
model.learn(1000)
def test_ddpg_normalization():
"""
Test that observations and returns normalizations are properly saved and loaded.
"""
param_noise = AdaptiveParamNoiseSpec(initial_stddev=0.05, desired_action_stddev=0.05)
model = DDPG('MlpPolicy', 'Pendulum-v0', memory_limit=50000, normalize_observations=True,
normalize_returns=True, nb_rollout_steps=128, nb_train_steps=1,
batch_size=64, param_noise=param_noise)
model.learn(1000)
obs_rms_params = model.sess.run(model.obs_rms_params)
ret_rms_params = model.sess.run(model.ret_rms_params)
model.save('./test_ddpg.zip')
loaded_model = DDPG.load('./test_ddpg.zip')
obs_rms_params_2 = loaded_model.sess.run(loaded_model.obs_rms_params)
ret_rms_params_2 = loaded_model.sess.run(loaded_model.ret_rms_params)
for param, param_loaded in zip(obs_rms_params + ret_rms_params,
obs_rms_params_2 + ret_rms_params_2):
assert np.allclose(param, param_loaded)
del model, loaded_model
if os.path.exists("./test_ddpg.zip"):
os.remove("./test_ddpg.zip")
def test_ddpg_popart():
"""
Test DDPG with pop-art normalization
"""
n_actions = 1
action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))
model = DDPG('MlpPolicy', 'Pendulum-v0', memory_limit=50000, normalize_observations=True,
normalize_returns=True, nb_rollout_steps=128, nb_train_steps=1,
batch_size=64, action_noise=action_noise, enable_popart=True)
model.learn(1000)