Skip to content

Commit fd0e092

Browse files
Merge pull request #4872 from Unity-Technologies/fix-numti-env-delayed-spawn
[Bug Fix] Fix crash if spawn is delayed in multi-env
2 parents 78c3f31 + 9a99962 commit fd0e092

File tree

3 files changed

+33
-2
lines changed

3 files changed

+33
-2
lines changed

com.unity.ml-agents/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ removed when training with a player. The Editor still requires it to be clamped
3232
- Fix a compile warning about using an obsolete enum in `GrpcExtensions.cs`. (#4812)
3333
#### ml-agents / ml-agents-envs / gym-unity (Python)
3434
- Fixed a bug that would cause an exception when `RunOptions` was deserialized via `pickle`. (#4842)
35+
- Fixed a bug that can cause a crash if a behavior can appear during training in multi-environment training. (#4872)
3536
- Fixed the computation of entropy for continuous actions. (#4869)
3637

3738

ml-agents/mlagents/trainers/subprocess_env_manager.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -310,8 +310,11 @@ def set_env_parameters(self, config: Dict = None) -> None:
310310

311311
@property
312312
def training_behaviors(self) -> Dict[BehaviorName, BehaviorSpec]:
313-
self.env_workers[0].send(EnvironmentCommand.BEHAVIOR_SPECS)
314-
return self.env_workers[0].recv().payload
313+
result: Dict[BehaviorName, BehaviorSpec] = {}
314+
for worker in self.env_workers:
315+
worker.send(EnvironmentCommand.BEHAVIOR_SPECS)
316+
result.update(worker.recv().payload)
317+
return result
315318

316319
def close(self) -> None:
317320
logger.debug("SubprocessEnvManager closing.")

ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,33 @@ def test_reset_collects_results_from_all_envs(self, mock_create_worker):
102102
)
103103
assert res == list(map(lambda ew: ew.previous_step, manager.env_workers))
104104

105+
@mock.patch(
106+
"mlagents.trainers.subprocess_env_manager.SubprocessEnvManager.create_worker"
107+
)
108+
def test_training_behaviors_collects_results_from_all_envs(
109+
self, mock_create_worker
110+
):
111+
def create_worker_mock(worker_id, step_queue, env_factor, engine_c):
112+
return MockEnvWorker(
113+
worker_id,
114+
EnvironmentResponse(
115+
EnvironmentCommand.RESET, worker_id, {f"key{worker_id}": worker_id}
116+
),
117+
)
118+
119+
mock_create_worker.side_effect = create_worker_mock
120+
manager = SubprocessEnvManager(
121+
mock_env_factory, EngineConfig.default_config(), 4
122+
)
123+
124+
res = manager.training_behaviors
125+
for env in manager.env_workers:
126+
env.send.assert_called_with(EnvironmentCommand.BEHAVIOR_SPECS)
127+
env.recv.assert_called()
128+
for worker_id in range(4):
129+
assert f"key{worker_id}" in res
130+
assert res[f"key{worker_id}"] == worker_id
131+
105132
@mock.patch(
106133
"mlagents.trainers.subprocess_env_manager.SubprocessEnvManager.create_worker"
107134
)

0 commit comments

Comments
 (0)