[perf] Optimizations for performance (#5192)

Ervin T · web-flow · commit e5395a35f7bd · 2021-04-01T16:45:26.000-04:00
* Lazy init the buffer when sampling

* Update references rather than copy data

* Don't create unneeded numpy arrays

* Remove self[key] from loop
diff --git a/ml-agents/mlagents/trainers/buffer.py b/ml-agents/mlagents/trainers/buffer.py
@@ -394,10 +394,11 @@ def shuffle(
         s = np.arange(len(self[key_list[0]]) // sequence_length)
         np.random.shuffle(s)
         for key in key_list:
+            buffer_field = self[key]
             tmp: List[np.ndarray] = []
             for i in s:
-                tmp += self[key][i * sequence_length : (i + 1) * sequence_length]
-            self[key][:] = tmp
+                tmp += buffer_field[i * sequence_length : (i + 1) * sequence_length]
+            buffer_field.set(tmp)
 
     def make_mini_batch(self, start: int, end: int) -> "AgentBuffer":
         """
@@ -430,7 +431,8 @@ def sample_mini_batch(
             * sequence_length
         )  # Sample random sequence starts
         for key in self:
-            mb_list = [self[key][i : i + sequence_length] for i in start_idxes]
+            buffer_field = self[key]
+            mb_list = (buffer_field[i : i + sequence_length] for i in start_idxes)
             # See comparison of ways to make a list from a list of lists here:
             # https://stackoverflow.com/questions/952914/how-to-make-a-flat-list-out-of-list-of-lists
             mini_batch[key].set(list(itertools.chain.from_iterable(mb_list)))
diff --git a/ml-agents/mlagents/trainers/torch/encoders.py b/ml-agents/mlagents/trainers/torch/encoders.py
@@ -23,20 +23,23 @@ def forward(self, inputs: torch.Tensor) -> torch.Tensor:
         return normalized_state
 
     def update(self, vector_input: torch.Tensor) -> None:
-        steps_increment = vector_input.size()[0]
-        total_new_steps = self.normalization_steps + steps_increment
-
-        input_to_old_mean = vector_input - self.running_mean
-        new_mean = self.running_mean + (input_to_old_mean / total_new_steps).sum(0)
-
-        input_to_new_mean = vector_input - new_mean
-        new_variance = self.running_variance + (
-            input_to_new_mean * input_to_old_mean
-        ).sum(0)
-        # Update in-place
-        self.running_mean.data.copy_(new_mean.data)
-        self.running_variance.data.copy_(new_variance.data)
-        self.normalization_steps.data.copy_(total_new_steps.data)
+        with torch.no_grad():
+            steps_increment = vector_input.size()[0]
+            total_new_steps = self.normalization_steps + steps_increment
+
+            input_to_old_mean = vector_input - self.running_mean
+            new_mean: torch.Tensor = self.running_mean + (
+                input_to_old_mean / total_new_steps
+            ).sum(0)
+
+            input_to_new_mean = vector_input - new_mean
+            new_variance = self.running_variance + (
+                input_to_new_mean * input_to_old_mean
+            ).sum(0)
+            # Update references. This is much faster than in-place data update.
+            self.running_mean: torch.Tensor = new_mean
+            self.running_variance: torch.Tensor = new_variance
+            self.normalization_steps: torch.Tensor = total_new_steps
 
     def copy_from(self, other_normalizer: "Normalizer") -> None:
         self.normalization_steps.data.copy_(other_normalizer.normalization_steps.data)
diff --git a/ml-agents/mlagents/trainers/trajectory.py b/ml-agents/mlagents/trainers/trajectory.py
@@ -246,13 +246,13 @@ def to_agentbuffer(self) -> AgentBuffer:
                 exp.action.discrete
             )
 
-            cont_next_actions = np.zeros_like(exp.action.continuous)
-            disc_next_actions = np.zeros_like(exp.action.discrete)
-
             if not is_last_step:
                 next_action = self.steps[step + 1].action
                 cont_next_actions = next_action.continuous
                 disc_next_actions = next_action.discrete
+            else:
+                cont_next_actions = np.zeros_like(exp.action.continuous)
+                disc_next_actions = np.zeros_like(exp.action.discrete)
 
             agent_buffer_trajectory[BufferKey.NEXT_CONT_ACTION].append(
                 cont_next_actions