Skip to content

Fix issue with different decision intervals for different brains #3181

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions ml-agents/mlagents/trainers/agent_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ def add_experiences(
del self.episode_rewards[agent_id]
elif not next_info.local_done[next_idx]:
self.episode_steps[agent_id] += 1
self.policy.save_previous_action(
curr_info.agents, take_action_outputs["action"]
)
if "action" in take_action_outputs:
self.policy.save_previous_action(
curr_info.agents, take_action_outputs["action"]
)
9 changes: 4 additions & 5 deletions ml-agents/mlagents/trainers/env_manager.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from abc import ABC, abstractmethod
from typing import List, Dict, NamedTuple
from typing import List, Dict, NamedTuple, Iterable
from mlagents.trainers.brain import AllBrainInfo, BrainParameters
from mlagents.trainers.policy import Policy
from mlagents.trainers.action_info import ActionInfo
Expand All @@ -10,10 +10,9 @@ class EnvironmentStep(NamedTuple):
current_all_brain_info: AllBrainInfo
brain_name_to_action_info: Dict[str, ActionInfo]

def has_actions_for_brain(self, brain_name: str) -> bool:
return brain_name in self.brain_name_to_action_info and bool(
self.brain_name_to_action_info[brain_name].outputs
)
@property
def name_behavior_ids(self) -> Iterable[str]:
return self.brain_name_to_action_info.keys()


class EnvManager(ABC):
Expand Down
23 changes: 13 additions & 10 deletions ml-agents/mlagents/trainers/trainer_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,17 +294,20 @@ def advance(self, env: EnvManager) -> int:
with hierarchical_timer("env_step"):
new_step_infos = env.step()
for step_info in new_step_infos:
for brain_name in self.trainers.keys():
for name_behavior_id in self.brain_name_to_identifier[brain_name]:
if step_info.has_actions_for_brain(name_behavior_id):
_processor = self.managers[name_behavior_id].processor
_processor.add_experiences(
step_info.previous_all_brain_info[name_behavior_id],
step_info.current_all_brain_info[name_behavior_id],
step_info.brain_name_to_action_info[
name_behavior_id
].outputs,
for name_behavior_id in step_info.name_behavior_ids:
if name_behavior_id not in self.managers:
self.logger.warning(
"Agent manager was not created for behavior id {}.".format(
name_behavior_id
)
)
continue
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we warn and/or throw an exception here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a warning

_processor = self.managers[name_behavior_id].processor
_processor.add_experiences(
step_info.previous_all_brain_info[name_behavior_id],
step_info.current_all_brain_info[name_behavior_id],
step_info.brain_name_to_action_info[name_behavior_id].outputs,
)

for brain_name, trainer in self.trainers.items():
if self.train_model and trainer.get_step <= trainer.get_max_steps:
Expand Down