Skip to content

Commit 90da731

Browse files
author
Ervin T
authored
[tests] Make end-to-end tests more stable (#3697)
1 parent 1161d33 commit 90da731

File tree

2 files changed

+17
-9
lines changed

2 files changed

+17
-9
lines changed

ml-agents/mlagents/trainers/tests/simple_test_envs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
VIS_OBS_SIZE = (20, 20, 3)
1818
STEP_SIZE = 0.1
1919

20-
TIME_PENALTY = 0.001
20+
TIME_PENALTY = 0.01
2121
MIN_STEPS = int(1.0 / STEP_SIZE) + 1
2222
SUCCESS_REWARD = 1.0 + MIN_STEPS * TIME_PENALTY
2323

ml-agents/mlagents/trainers/tests/test_simple_rl.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
lambd: 0.95
3737
learning_rate: 5.0e-3
3838
learning_rate_schedule: constant
39-
max_steps: 2000
39+
max_steps: 3000
4040
memory_size: 16
4141
normalize: false
4242
num_epoch: 3
@@ -96,6 +96,9 @@ def generate_config(
9696
# Custom reward processors shuld be built within the test function and passed to _check_environment_trains
9797
# Default is average over the last 5 final rewards
9898
def default_reward_processor(rewards, last_n_rewards=5):
99+
rewards_to_use = rewards[-last_n_rewards:]
100+
# For debugging tests
101+
print("Last {} rewards:".format(last_n_rewards), rewards_to_use)
99102
return np.array(rewards[-last_n_rewards:], dtype=np.float32).mean()
100103

101104

@@ -124,7 +127,7 @@ def _check_environment_trains(
124127
trainer_config,
125128
reward_processor=default_reward_processor,
126129
meta_curriculum=None,
127-
success_threshold=0.99,
130+
success_threshold=0.9,
128131
env_manager=None,
129132
):
130133
# Create controller and begin training.
@@ -168,7 +171,6 @@ def _check_environment_trains(
168171
if (
169172
success_threshold is not None
170173
): # For tests where we are just checking setup and not reward
171-
172174
processed_rewards = [
173175
reward_processor(rewards) for rewards in env.final_rewards.values()
174176
]
@@ -253,11 +255,11 @@ def test_simple_sac(use_discrete):
253255
@pytest.mark.parametrize("use_discrete", [True, False])
254256
def test_2d_sac(use_discrete):
255257
env = SimpleEnvironment(
256-
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.5
258+
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
257259
)
258-
override_vals = {"buffer_init_steps": 2000, "max_steps": 3000}
260+
override_vals = {"buffer_init_steps": 2000, "max_steps": 4000}
259261
config = generate_config(SAC_CONFIG, override_vals)
260-
_check_environment_trains(env, config)
262+
_check_environment_trains(env, config, success_threshold=0.8)
261263

262264

263265
@pytest.mark.parametrize("use_discrete", [True, False])
@@ -301,7 +303,13 @@ def test_visual_advanced_sac(vis_encode_type, num_visual):
301303
@pytest.mark.parametrize("use_discrete", [True, False])
302304
def test_recurrent_sac(use_discrete):
303305
env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete)
304-
override_vals = {"batch_size": 32, "use_recurrent": True, "max_steps": 2000}
306+
override_vals = {
307+
"batch_size": 64,
308+
"use_recurrent": True,
309+
"max_steps": 3000,
310+
"learning_rate": 1e-3,
311+
"buffer_init_steps": 500,
312+
}
305313
config = generate_config(SAC_CONFIG, override_vals)
306314
_check_environment_trains(env, config)
307315

@@ -343,7 +351,7 @@ def test_simple_ghost_fails(use_discrete):
343351
processed_rewards = [
344352
default_reward_processor(rewards) for rewards in env.final_rewards.values()
345353
]
346-
success_threshold = 0.99
354+
success_threshold = 0.9
347355
assert any(reward > success_threshold for reward in processed_rewards) and any(
348356
reward < success_threshold for reward in processed_rewards
349357
)

0 commit comments

Comments
 (0)