Skip to content

Commit 68779b4

Browse files
author
Chris Elion
authored
handle mismatch between brain and metacurriculum (#3034)
* handle mismatch between brain and metacur * add unit tests * use os.path.splitext in metacurriculum * fix type
1 parent d5c6ff8 commit 68779b4

File tree

6 files changed

+87
-19
lines changed

6 files changed

+87
-19
lines changed

ml-agents/mlagents/trainers/meta_curriculum.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ def __init__(self, curriculum_folder: str):
3232
try:
3333
for curriculum_filename in os.listdir(curriculum_folder):
3434
# This process requires JSON files
35-
if not curriculum_filename.lower().endswith(".json"):
35+
brain_name, extension = os.path.splitext(curriculum_filename)
36+
if extension.lower() != ".json":
3637
continue
37-
brain_name = curriculum_filename.split(".")[0]
3838
curriculum_filepath = os.path.join(
3939
curriculum_folder, curriculum_filename
4040
)
@@ -78,7 +78,9 @@ def lesson_nums(self, lesson_nums):
7878
for brain_name, lesson in lesson_nums.items():
7979
self.brains_to_curriculums[brain_name].lesson_num = lesson
8080

81-
def _lesson_ready_to_increment(self, brain_name, reward_buff_size):
81+
def _lesson_ready_to_increment(
82+
self, brain_name: str, reward_buff_size: int
83+
) -> bool:
8284
"""Determines whether the curriculum of a specified brain is ready
8385
to attempt an increment.
8486
@@ -92,6 +94,9 @@ def _lesson_ready_to_increment(self, brain_name, reward_buff_size):
9294
Whether the curriculum of the specified brain should attempt to
9395
increment its lesson.
9496
"""
97+
if brain_name not in self.brains_to_curriculums:
98+
return False
99+
95100
return reward_buff_size >= (
96101
self.brains_to_curriculums[brain_name].min_lesson_length
97102
)

ml-agents/mlagents/trainers/tests/test_curriculum.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from mlagents.trainers.exception import CurriculumConfigError, CurriculumLoadingError
77
from mlagents.trainers.curriculum import Curriculum
88

9-
109
dummy_curriculum_json_str = """
1110
{
1211
"measure" : "reward",

ml-agents/mlagents/trainers/tests/test_meta_curriculum.py

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,17 @@
11
import pytest
2-
from unittest.mock import patch, call
2+
from unittest.mock import patch, call, mock_open
33

44
from mlagents.trainers.meta_curriculum import MetaCurriculum
5+
from mlagents.trainers.curriculum import Curriculum
56
from mlagents.trainers.exception import MetaCurriculumError
67

8+
from mlagents.trainers.tests.test_simple_rl import (
9+
Simple1DEnvironment,
10+
_check_environment_trains,
11+
BRAIN_NAME,
12+
)
13+
from mlagents.trainers.tests.test_curriculum import dummy_curriculum_json_str
14+
715

816
class MetaCurriculumTest(MetaCurriculum):
917
"""This class allows us to test MetaCurriculum objects without calling
@@ -36,7 +44,7 @@ def reward_buff_sizes():
3644

3745
@patch("mlagents.trainers.curriculum.Curriculum.get_config", return_value={})
3846
@patch("mlagents.trainers.curriculum.Curriculum.__init__", return_value=None)
39-
@patch("os.listdir", return_value=["Brain1.json", "Brain2.json"])
47+
@patch("os.listdir", return_value=["Brain1.json", "Brain2.test.json"])
4048
def test_init_meta_curriculum_happy_path(
4149
listdir, mock_curriculum_init, mock_curriculum_get_config, default_reset_parameters
4250
):
@@ -45,9 +53,9 @@ def test_init_meta_curriculum_happy_path(
4553
assert len(meta_curriculum.brains_to_curriculums) == 2
4654

4755
assert "Brain1" in meta_curriculum.brains_to_curriculums
48-
assert "Brain2" in meta_curriculum.brains_to_curriculums
56+
assert "Brain2.test" in meta_curriculum.brains_to_curriculums
4957

50-
calls = [call("test/Brain1.json"), call("test/Brain2.json")]
58+
calls = [call("test/Brain1.json"), call("test/Brain2.test.json")]
5159

5260
mock_curriculum_init.assert_has_calls(calls)
5361

@@ -133,3 +141,40 @@ def test_get_config(
133141
new_reset_parameters.update(more_reset_parameters)
134142

135143
assert meta_curriculum.get_config() == new_reset_parameters
144+
145+
146+
META_CURRICULUM_CONFIG = """
147+
default:
148+
trainer: ppo
149+
batch_size: 16
150+
beta: 5.0e-3
151+
buffer_size: 64
152+
epsilon: 0.2
153+
hidden_units: 128
154+
lambd: 0.95
155+
learning_rate: 5.0e-3
156+
max_steps: 100
157+
memory_size: 256
158+
normalize: false
159+
num_epoch: 3
160+
num_layers: 2
161+
time_horizon: 64
162+
sequence_length: 64
163+
summary_freq: 50
164+
use_recurrent: false
165+
reward_signals:
166+
extrinsic:
167+
strength: 1.0
168+
gamma: 0.99
169+
"""
170+
171+
172+
@pytest.mark.parametrize("curriculum_brain_name", [BRAIN_NAME, "WrongBrainName"])
173+
def test_simple_metacurriculum(curriculum_brain_name):
174+
env = Simple1DEnvironment(use_discrete=False)
175+
with patch(
176+
"builtins.open", new_callable=mock_open, read_data=dummy_curriculum_json_str
177+
):
178+
curriculum = Curriculum("TestBrain.json")
179+
mc = MetaCurriculumTest({curriculum_brain_name: curriculum})
180+
_check_environment_trains(env, META_CURRICULUM_CONFIG, mc, -100.0)

ml-agents/mlagents/trainers/tests/test_simple_rl.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,9 @@ def close(self):
183183
"""
184184

185185

186-
def _check_environment_trains(env, config):
186+
def _check_environment_trains(
187+
env, config, meta_curriculum=None, success_threshold=0.99
188+
):
187189
# Create controller and begin training.
188190
with tempfile.TemporaryDirectory() as dir:
189191
run_id = "id"
@@ -201,7 +203,7 @@ def _check_environment_trains(env, config):
201203
train_model=True,
202204
load_model=False,
203205
seed=seed,
204-
meta_curriculum=None,
206+
meta_curriculum=meta_curriculum,
205207
multi_gpu=False,
206208
)
207209

@@ -210,7 +212,7 @@ def _check_environment_trains(env, config):
210212
summaries_dir=dir,
211213
model_path=dir,
212214
run_id=run_id,
213-
meta_curriculum=None,
215+
meta_curriculum=meta_curriculum,
214216
train=True,
215217
training_seed=seed,
216218
sampler_manager=SamplerManager(None),
@@ -223,7 +225,7 @@ def _check_environment_trains(env, config):
223225
print(tc._get_measure_vals())
224226
for brain_name, mean_reward in tc._get_measure_vals().items():
225227
assert not math.isnan(mean_reward)
226-
assert mean_reward > 0.99
228+
assert mean_reward > success_threshold
227229

228230

229231
@pytest.mark.parametrize("use_discrete", [True, False])

ml-agents/mlagents/trainers/trainer_controller.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ def _get_measure_vals(self):
7171
brain_name,
7272
curriculum,
7373
) in self.meta_curriculum.brains_to_curriculums.items():
74+
# Skip brains that are in the metacurriculum but no trainer yet.
75+
if brain_name not in self.trainers:
76+
continue
7477
if curriculum.measure == "progress":
7578
measure_val = (
7679
self.trainers[brain_name].get_step
@@ -168,7 +171,10 @@ def write_to_tensorboard(self, global_step: int) -> None:
168171
for brain_name, trainer in self.trainers.items():
169172
# Write training statistics to Tensorboard.
170173
delta_train_start = time() - self.training_start_time
171-
if self.meta_curriculum is not None:
174+
if (
175+
self.meta_curriculum
176+
and brain_name in self.meta_curriculum.brains_to_curriculums
177+
):
172178
trainer.write_summary(
173179
global_step,
174180
delta_train_start,

ml-agents/mlagents/trainers/trainer_util.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import yaml
22
from typing import Any, Dict, TextIO
3+
import logging
34

45
from mlagents.trainers.meta_curriculum import MetaCurriculum
56
from mlagents.trainers.exception import TrainerConfigError
@@ -8,6 +9,8 @@
89
from mlagents.trainers.ppo.trainer import PPOTrainer
910
from mlagents.trainers.sac.trainer import SACTrainer
1011

12+
logger = logging.getLogger("mlagents.trainers")
13+
1114

1215
class TrainerFactory:
1316
def __init__(
@@ -101,6 +104,18 @@ def initialize_trainer(
101104
_brain_key = trainer_config[_brain_key]
102105
trainer_parameters.update(trainer_config[_brain_key])
103106

107+
min_lesson_length = 1
108+
if meta_curriculum:
109+
if brain_name in meta_curriculum.brains_to_curriculums:
110+
min_lesson_length = meta_curriculum.brains_to_curriculums[
111+
brain_name
112+
].min_lesson_length
113+
else:
114+
logger.warning(
115+
f"Metacurriculum enabled, but no curriculum for brain {brain_name}. "
116+
f"Brains with curricula: {meta_curriculum.brains_to_curriculums.keys()}. "
117+
)
118+
104119
trainer: Trainer = None # type: ignore # will be set to one of these, or raise
105120
if "trainer" not in trainer_parameters:
106121
raise TrainerConfigError(
@@ -117,9 +132,7 @@ def initialize_trainer(
117132
elif trainer_type == "ppo":
118133
trainer = PPOTrainer(
119134
brain_parameters,
120-
meta_curriculum.brains_to_curriculums[brain_name].min_lesson_length
121-
if meta_curriculum
122-
else 1,
135+
min_lesson_length,
123136
trainer_parameters,
124137
train_model,
125138
load_model,
@@ -130,9 +143,7 @@ def initialize_trainer(
130143
elif trainer_type == "sac":
131144
trainer = SACTrainer(
132145
brain_parameters,
133-
meta_curriculum.brains_to_curriculums[brain_name].min_lesson_length
134-
if meta_curriculum
135-
else 1,
146+
min_lesson_length,
136147
trainer_parameters,
137148
train_model,
138149
load_model,

0 commit comments

Comments
 (0)