Unity-Technologies
diff --git a/‎com.unity.ml-agents/CHANGELOG.md
Lines changed: 1 addition & 0 deletions b/‎com.unity.ml-agents/CHANGELOG.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎config/sac_trainer_config.yaml
Lines changed: 5 additions & 4 deletions b/‎config/sac_trainer_config.yaml
Lines changed: 5 additions & 4 deletions
diff --git a/‎config/trainer_config.yaml
Lines changed: 4 additions & 4 deletions b/‎config/trainer_config.yaml
Lines changed: 4 additions & 4 deletions
diff --git a/‎docs/Migrating.md
Lines changed: 1 addition & 0 deletions b/‎docs/Migrating.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/Training-ML-Agents.md
Lines changed: 0 additions & 1 deletion b/‎docs/Training-ML-Agents.md
Lines changed: 0 additions & 1 deletion
diff --git a/‎docs/Training-PPO.md
Lines changed: 3 additions & 3 deletions b/‎docs/Training-PPO.md
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/Training-SAC.md
Lines changed: 3 additions & 3 deletions b/‎docs/Training-SAC.md
Lines changed: 3 additions & 3 deletions
diff --git a/‎ml-agents/mlagents/trainers/agent_processor.py
Lines changed: 0 additions & 3 deletions b/‎ml-agents/mlagents/trainers/agent_processor.py
Lines changed: 0 additions & 3 deletions
diff --git a/‎ml-agents/mlagents/trainers/common/__init__.py b/‎ml-agents/mlagents/trainers/common/__init__.py
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
  - Agent.CollectObservations now takes a VectorSensor argument. It was also overloaded to optionally take an ActionMasker argument. (#3352, #3389)
  - Beta support for ONNX export was added. If the `tf2onnx` python package is installed, models will be saved to `.onnx` as well as `.nn` format.
  Note that Barracuda 0.6.0 or later is required to import the `.onnx` files properly
+ - Multi-GPU training and the `--multi-gpu` option has been removed temporarily. (#3345)
 
 ### Minor Changes
  - Monitor.cs was moved to Examples. (#3372)
 
@@ -8,7 +8,7 @@ default:
     learning_rate: 3.0e-4
     learning_rate_schedule: constant
     max_steps: 5.0e5
-    memory_size: 256
+    memory_size: 128
     normalize: false
     num_update: 1
     train_interval: 1
@@ -214,7 +214,7 @@ Hallway:
     sequence_length: 32
     num_layers: 2
     hidden_units: 128
-    memory_size: 256
+    memory_size: 128
     init_entcoef: 0.1
     max_steps: 1.0e7
     summary_freq: 10000
@@ -225,10 +225,11 @@ VisualHallway:
     sequence_length: 32
     num_layers: 1
     hidden_units: 128
-    memory_size: 256
+    memory_size: 128
     gamma: 0.99
     batch_size: 64
     max_steps: 1.0e7
+    summary_freq: 10000
     time_horizon: 64
     use_recurrent: true
 
@@ -237,7 +238,7 @@ VisualPushBlock:
     sequence_length: 32
     num_layers: 1
     hidden_units: 128
-    memory_size: 256
+    memory_size: 128
     gamma: 0.99
     buffer_size: 1024
     batch_size: 64
 
@@ -9,7 +9,7 @@ default:
     learning_rate: 3.0e-4
     learning_rate_schedule: linear
     max_steps: 5.0e5
-    memory_size: 256
+    memory_size: 128
     normalize: false
     num_epoch: 3
     num_layers: 2
@@ -219,7 +219,7 @@ Hallway:
     sequence_length: 64
     num_layers: 2
     hidden_units: 128
-    memory_size: 256
+    memory_size: 128
     beta: 1.0e-2
     num_epoch: 3
     buffer_size: 1024
@@ -233,7 +233,7 @@ VisualHallway:
     sequence_length: 64
     num_layers: 1
     hidden_units: 128
-    memory_size: 256
+    memory_size: 128
     beta: 1.0e-2
     num_epoch: 3
     buffer_size: 1024
@@ -247,7 +247,7 @@ VisualPushBlock:
     sequence_length: 32
     num_layers: 1
     hidden_units: 128
-    memory_size: 256
+    memory_size: 128
     beta: 1.0e-2
     num_epoch: 3
     buffer_size: 1024
 
@@ -17,6 +17,7 @@ The versions can be found in
 * The interface for `RayPerceptionSensor.PerceiveStatic()` was changed to take an input class and write to an output class.
 * The `SetActionMask` method must now be called on the optional `ActionMasker` argument of the `CollectObservations` method. (We now consider an action mask as a type of observation)
 * The method `GetStepCount()` on the Agent class has been replaced with the property getter `StepCount`
+* The `--multi-gpu` option has been removed temporarily.
 
 ### Steps to Migrate
 * Replace your Agent's implementation of `CollectObservations()` with `CollectObservations(VectorSensor sensor)`. In addition, replace all calls to `AddVectorObs()` with `sensor.AddObservation()` or `sensor.AddOneHotObservation()` on the `VectorSensor` passed as argument.
 
@@ -151,7 +151,6 @@ environment, you can set the following command line options when invoking
   [here](https://docs.unity3d.com/Manual/CommandLineArguments.html) for more
   details.
 * `--debug`: Specify this option to enable debug-level logging for some parts of the code.
-* `--multi-gpu`: Setting this flag enables the use of multiple GPU's (if available) during training.
 * `--cpu`: Forces training using CPU only.
 * Engine Configuration :
   * `--width' : The width of the executable window of the environment(s) in pixels
 
@@ -218,11 +218,11 @@ Typical Range: `4` - `128`
 ### Memory Size
 
 `memory_size` corresponds to the size of the array of floating point numbers
-used to store the hidden state of the recurrent neural network. This value must
-be a multiple of 4, and should scale with the amount of information you expect
+used to store the hidden state of the recurrent neural network of the policy. This value must
+be a multiple of 2, and should scale with the amount of information you expect
 the agent will need to remember in order to successfully complete the task.
 
-Typical Range: `64` - `512`
+Typical Range: `32` - `256`
 
 ## (Optional) Behavioral Cloning Using Demonstrations
 
 
@@ -223,11 +223,11 @@ Typical Range: `4` - `128`
 ### Memory Size
 
 `memory_size` corresponds to the size of the array of floating point numbers
-used to store the hidden state of the recurrent neural network. This value must
-be a multiple of 4, and should scale with the amount of information you expect
+used to store the hidden state of the recurrent neural network in the policy.
+This value must be a multiple of 2, and should scale with the amount of information you expect
 the agent will need to remember in order to successfully complete the task.
 
-Typical Range: `64` - `512`
+Typical Range: `32` - `256`
 
 ### (Optional) Save Replay Buffer
 
 
@@ -65,9 +65,6 @@ def add_experiences(
         if take_action_outputs:
             for _entropy in take_action_outputs["entropy"]:
                 self.stats_reporter.add_stat("Policy/Entropy", _entropy)
-            self.stats_reporter.add_stat(
-                "Policy/Learning Rate", take_action_outputs["learning_rate"]
-            )
 
         terminated_agents: Set[str] = set()
         # Make unique agent_ids that are global across workers