Skip to content

Release 2 cherry pick #3971

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions com.unity.ml-agents/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ and this project adheres to

## [1.0.1-preview] - 2020-05-19
### Bug Fixes
- An issue was fixed where using `--initialize-from` would resume from the past step count. (#3962)
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)

Expand Down
20 changes: 18 additions & 2 deletions com.unity.ml-agents/Editor/BrainParametersDrawer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,14 @@ static void DrawVectorAction(Rect position, SerializedProperty property)
static void DrawContinuousVectorAction(Rect position, SerializedProperty property)
{
var vecActionSize = property.FindPropertyRelative(k_ActionSizePropName);
vecActionSize.arraySize = 1;

// This check is here due to:
// https://fogbugz.unity3d.com/f/cases/1246524/
// If this case has been resolved, please remove this if condition.
if (vecActionSize.arraySize != 1)
{
vecActionSize.arraySize = 1;
}
var continuousActionSize =
vecActionSize.GetArrayElementAtIndex(0);
EditorGUI.PropertyField(
Expand All @@ -142,8 +149,17 @@ static void DrawContinuousVectorAction(Rect position, SerializedProperty propert
static void DrawDiscreteVectorAction(Rect position, SerializedProperty property)
{
var vecActionSize = property.FindPropertyRelative(k_ActionSizePropName);
vecActionSize.arraySize = EditorGUI.IntField(
var newSize = EditorGUI.IntField(
position, "Branches Size", vecActionSize.arraySize);

// This check is here due to:
// https://fogbugz.unity3d.com/f/cases/1246524/
// If this case has been resolved, please remove this if condition.
if (newSize != vecActionSize.arraySize)
{
vecActionSize.arraySize = newSize;
}

position.y += k_LineHeight;
position.x += 20;
position.width -= 20;
Expand Down
18 changes: 11 additions & 7 deletions com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,11 @@ public static AgentInfoActionPairProto ToInfoActionPairProto(this AgentInfo ai)
{
var agentInfoProto = ai.ToAgentInfoProto();

var agentActionProto = new AgentActionProto
var agentActionProto = new AgentActionProto();
if(ai.storedVectorActions != null)
{
VectorActions = { ai.storedVectorActions }
};
agentActionProto.VectorActions.AddRange(ai.storedVectorActions);
}

return new AgentInfoActionPairProto
{
Expand Down Expand Up @@ -95,12 +96,14 @@ public static BrainParametersProto ToProto(this BrainParameters bp, string name,
var brainParametersProto = new BrainParametersProto
{
VectorActionSize = { bp.VectorActionSize },
VectorActionSpaceType =
(SpaceTypeProto)bp.VectorActionSpaceType,
VectorActionSpaceType = (SpaceTypeProto) bp.VectorActionSpaceType,
BrainName = name,
IsTraining = isTraining
};
brainParametersProto.VectorActionDescriptions.AddRange(bp.VectorActionDescriptions);
if(bp.VectorActionDescriptions != null)
{
brainParametersProto.VectorActionDescriptions.AddRange(bp.VectorActionDescriptions);
}
return brainParametersProto;
}

Expand Down Expand Up @@ -128,13 +131,14 @@ public static BrainParameters ToBrainParameters(this BrainParametersProto bpp)
/// </summary>
public static DemonstrationMetaProto ToProto(this DemonstrationMetaData dm)
{
var demonstrationName = dm.demonstrationName ?? "";
var demoProto = new DemonstrationMetaProto
{
ApiVersion = DemonstrationMetaData.ApiVersion,
MeanReward = dm.meanReward,
NumberSteps = dm.numberSteps,
NumberEpisodes = dm.numberEpisodes,
DemonstrationName = dm.demonstrationName
DemonstrationName = demonstrationName
};
return demoProto;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
using NUnit.Framework;
using UnityEngine;
using Unity.MLAgents.Policies;
using Unity.MLAgents.Demonstrations;
using Unity.MLAgents.Sensors;

namespace Unity.MLAgents.Tests
{
[TestFixture]
public class GrpcExtensionsTests
{
[Test]
public void TestDefaultBrainParametersToProto()
{
// Should be able to convert a default instance to proto.
var brain = new BrainParameters();
brain.ToProto("foo", false);
}

[Test]
public void TestDefaultAgentInfoToProto()
{
// Should be able to convert a default instance to proto.
var agentInfo = new AgentInfo();
agentInfo.ToInfoActionPairProto();
agentInfo.ToAgentInfoProto();
}

[Test]
public void TestDefaultDemonstrationMetaDataToProto()
{
// Should be able to convert a default instance to proto.
var demoMetaData = new DemonstrationMetaData();
demoMetaData.ToProto();
}
}
}
13 changes: 8 additions & 5 deletions com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@ internal class TestPolicy : IPolicy
{
public Action OnRequestDecision;
ObservationWriter m_ObsWriter = new ObservationWriter();
public void RequestDecision(AgentInfo info, List<ISensor> sensors) {
foreach(var sensor in sensors){
public void RequestDecision(AgentInfo info, List<ISensor> sensors)
{
foreach (var sensor in sensors)
{
sensor.GetObservationProto(m_ObsWriter);
}
OnRequestDecision?.Invoke();
Expand Down Expand Up @@ -517,8 +519,10 @@ public void AssertStackingReset()
agent1.SetPolicy(policy);

StackingSensor sensor = null;
foreach(ISensor s in agent1.sensors){
if (s is StackingSensor){
foreach (ISensor s in agent1.sensors)
{
if (s is StackingSensor)
{
sensor = s as StackingSensor;
}
}
Expand All @@ -529,7 +533,6 @@ public void AssertStackingReset()
{
agent1.RequestDecision();
aca.EnvironmentStep();

}

policy.OnRequestDecision = () => SensorTestHelper.CompareObservation(sensor, new[] {18f, 19f, 21f});
Expand Down
4 changes: 2 additions & 2 deletions docs/Training-ML-Agents.md
Original file line number Diff line number Diff line change
Expand Up @@ -445,8 +445,8 @@ Below is a list of included `sampler-type` as part of the toolkit.
`interval_2_max`], ...]
- **sub-arguments** - `intervals`

The implementation of the samplers can be found at
`ml-agents-envs/mlagents_envs/sampler_class.py`.
The implementation of the samplers can be found in the
[sampler_class.py file](../ml-agents/mlagents/trainers/sampler_class.py).

#### Defining a New Sampler Type

Expand Down
6 changes: 2 additions & 4 deletions docs/Using-Tensorboard.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,8 @@ session running on port 6006 a new session can be launched on an open port using
the --port option.

**Note:** If you don't assign a `run-id` identifier, `mlagents-learn` uses the
default string, "ppo". All the statistics will be saved to the same sub-folder
and displayed as one session in TensorBoard. After a few runs, the displays can
become difficult to interpret in this situation. You can delete the folders
under the `summaries` directory to clear out old statistics.
default string, "ppo". You can delete the folders under the `results` directory
to clear out old statistics.

On the left side of the TensorBoard window, you can select which of the training
runs you want to display. You can select multiple run-ids to compare statistics.
Expand Down
13 changes: 5 additions & 8 deletions gym-unity/gym_unity/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ def __init__(
self._env.step()

self.visual_obs = None
self._n_agents = -1

# Save the step result from the last time all Agents requested decisions.
self._previous_decision_step: DecisionSteps = None
Expand Down Expand Up @@ -172,6 +171,7 @@ def step(self, action: List[Any]) -> GymStepResult:

self._env.step()
decision_step, terminal_step = self._env.get_steps(self.name)
self._check_agents(max(len(decision_step), len(terminal_step)))
if len(terminal_step) != 0:
# The agent is done
self.game_over = True
Expand Down Expand Up @@ -264,10 +264,11 @@ def seed(self, seed: Any = None) -> None:
logger.warning("Could not seed environment %s", self.name)
return

def _check_agents(self, n_agents: int) -> None:
if self._n_agents > 1:
@staticmethod
def _check_agents(n_agents: int) -> None:
if n_agents > 1:
raise UnityGymException(
"There can only be one Agent in the environment but {n_agents} were detected."
f"There can only be one Agent in the environment but {n_agents} were detected."
)

@property
Expand All @@ -290,10 +291,6 @@ def action_space(self):
def observation_space(self):
return self._observation_space

@property
def number_agents(self):
return self._n_agents


class ActionFlattener:
"""
Expand Down
6 changes: 3 additions & 3 deletions ml-agents-envs/mlagents_envs/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,9 +415,9 @@ def set_actions(self, behavior_name: BehaviorName, action: np.ndarray) -> None:
expected_shape = (len(self._env_state[behavior_name][0]), spec.action_size)
if action.shape != expected_shape:
raise UnityActionException(
"The behavior {0} needs an input of dimension {1} but received input of dimension {2}".format(
behavior_name, expected_shape, action.shape
)
"The behavior {0} needs an input of dimension {1} for "
"(<number of agents>, <action size>) but received input of "
"dimension {2}".format(behavior_name, expected_shape, action.shape)
)
if action.dtype != expected_type:
action = action.astype(expected_type)
Expand Down
1 change: 1 addition & 0 deletions ml-agents/mlagents/trainers/policy/tf_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ def _load_graph(self, model_path: str, reset_global_steps: bool = False) -> None
)
)
if reset_global_steps:
self._set_step(0)
logger.info(
"Starting training from step 0 and saving to {}.".format(
self.model_path
Expand Down
4 changes: 4 additions & 0 deletions ml-agents/mlagents/trainers/tests/test_nn_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def test_load_save(dummy_config, tmp_path):
trainer_params["model_path"] = path1
policy = create_policy_mock(trainer_params)
policy.initialize_or_load()
policy._set_step(2000)
policy.save_model(2000)

assert len(os.listdir(tmp_path)) > 0
Expand All @@ -94,6 +95,7 @@ def test_load_save(dummy_config, tmp_path):
policy2 = create_policy_mock(trainer_params, load=True, seed=1)
policy2.initialize_or_load()
_compare_two_policies(policy, policy2)
assert policy2.get_current_step() == 2000

# Try initialize from path 1
trainer_params["model_path"] = path2
Expand All @@ -102,6 +104,8 @@ def test_load_save(dummy_config, tmp_path):
policy3.initialize_or_load()

_compare_two_policies(policy2, policy3)
# Assert that the steps are 0.
assert policy3.get_current_step() == 0


def _compare_two_policies(policy1: NNPolicy, policy2: NNPolicy) -> None:
Expand Down