Unity-Technologies · chriselion · Jul 17, 2020 · Jul 17, 2020 · Jul 17, 2020 · Jul 17, 2020
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -43,7 +43,7 @@ repos:
     rev: v2.7.0
     hooks:
     -   id: pyupgrade
-        args: [--py3-plus]
+        args: [--py3-plus, --py36-plus]
         exclude: .*barracuda.py
 
 -   repo: https://github.com/pre-commit/pre-commit-hooks

diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
@@ -11,6 +11,7 @@ and this project adheres to
 ### Major Changes
 #### com.unity.ml-agents (C#)
 #### ml-agents / ml-agents-envs / gym-unity (Python)
+The minimum supported python version for ml-agents-envs was changed to 3.6.1. (#4244)
 
 ### Minor Changes
 #### com.unity.ml-agents (C#)

diff --git a/gym-unity/setup.py b/gym-unity/setup.py
@@ -38,6 +38,6 @@ def run(self):
     author_email="ML-Agents@unity3d.com",
     url="https://github.com/Unity-Technologies/ml-agents",
     packages=find_packages(),
-    install_requires=["gym", "mlagents_envs=={}".format(VERSION)],
+    install_requires=["gym", f"mlagents_envs=={VERSION}"],
     cmdclass={"verify": VerifyVersionCommand},
 )
diff --git a/ml-agents-envs/mlagents_envs/base_env.py b/ml-agents-envs/mlagents_envs/base_env.py
@@ -109,9 +109,7 @@ def __getitem__(self, agent_id: AgentId) -> DecisionStep:
         :returns: The DecisionStep
         """
         if agent_id not in self.agent_id_to_index:
-            raise KeyError(
-                "agent_id {} is not present in the DecisionSteps".format(agent_id)
-            )
+            raise KeyError(f"agent_id {agent_id} is not present in the DecisionSteps")
         agent_index = self._agent_id_to_index[agent_id]  # type: ignore
         agent_obs = []
         for batched_obs in self.obs:
@@ -214,9 +212,7 @@ def __getitem__(self, agent_id: AgentId) -> TerminalStep:
         specific agent
         """
         if agent_id not in self.agent_id_to_index:
-            raise KeyError(
-                "agent_id {} is not present in the TerminalSteps".format(agent_id)
-            )
+            raise KeyError(f"agent_id {agent_id} is not present in the TerminalSteps")
         agent_index = self._agent_id_to_index[agent_id]  # type: ignore
         agent_obs = []
         for batched_obs in self.obs:

diff --git a/ml-agents-envs/mlagents_envs/env_utils.py b/ml-agents-envs/mlagents_envs/env_utils.py
@@ -27,7 +27,7 @@ def validate_environment_path(env_path: str) -> Optional[str]:
         .replace(".x86", "")
     )
     true_filename = os.path.basename(os.path.normpath(env_path))
-    get_logger(__name__).debug("The true file name is {}".format(true_filename))
+    get_logger(__name__).debug(f"The true file name is {true_filename}")
 
     if not (glob.glob(env_path) or glob.glob(env_path + ".*")):
         return None
@@ -86,7 +86,7 @@ def launch_executable(file_name: str, args: List[str]) -> subprocess.Popen:
             f"Couldn't launch the {file_name} environment. Provided filename does not match any environments."
         )
     else:
-        get_logger(__name__).debug("This is the launch string {}".format(launch_string))
+        get_logger(__name__).debug(f"This is the launch string {launch_string}")
         # Launch Unity environment
         subprocess_args = [launch_string] + args
         try:

diff --git a/ml-agents-envs/setup.py b/ml-agents-envs/setup.py
@@ -53,6 +53,6 @@ def run(self):
         "protobuf>=3.6",
         "pyyaml>=3.1.0",
     ],
-    python_requires=">=3.5",
+    python_requires=">=3.6.1",
     cmdclass={"verify": VerifyVersionCommand},
 )
diff --git a/ml-agents/mlagents/trainers/buffer.py b/ml-agents/mlagents/trainers/buffer.py
@@ -275,7 +275,7 @@ def resequence_and_append(
             key_list = list(self.keys())
         if not self.check_length(key_list):
             raise BufferException(
-                "The length of the fields {} were not of same length".format(key_list)
+                f"The length of the fields {key_list} were not of same length"
             )
         for field_key in key_list:
             target_buffer[field_key].extend(

diff --git a/ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py b/ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py
@@ -51,7 +51,7 @@ def create_curiosity_encoders(self) -> Tuple[tf.Tensor, tf.Tensor]:
                     self.encoding_size,
                     ModelUtils.swish,
                     1,
-                    "curiosity_stream_{}_visual_obs_encoder".format(i),
+                    f"curiosity_stream_{i}_visual_obs_encoder",
                     False,
                 )
 
@@ -60,7 +60,7 @@ def create_curiosity_encoders(self) -> Tuple[tf.Tensor, tf.Tensor]:
                     self.encoding_size,
                     ModelUtils.swish,
                     1,
-                    "curiosity_stream_{}_visual_obs_encoder".format(i),
+                    f"curiosity_stream_{i}_visual_obs_encoder",
                     True,
                 )
                 visual_encoders.append(encoded_visual)

diff --git a/ml-agents/mlagents/trainers/components/reward_signals/gail/model.py b/ml-agents/mlagents/trainers/components/reward_signals/gail/model.py
@@ -124,7 +124,7 @@ def make_inputs(self) -> None:
                     self.encoding_size,
                     ModelUtils.swish,
                     1,
-                    "gail_stream_{}_visual_obs_encoder".format(i),
+                    f"gail_stream_{i}_visual_obs_encoder",
                     False,
                 )
 
@@ -133,7 +133,7 @@ def make_inputs(self) -> None:
                     self.encoding_size,
                     ModelUtils.swish,
                     1,
-                    "gail_stream_{}_visual_obs_encoder".format(i),
+                    f"gail_stream_{i}_visual_obs_encoder",
                     True,
                 )
                 visual_policy_encoders.append(encoded_policy_visual)

diff --git a/ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py b/ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py
@@ -31,7 +31,7 @@ def create_reward_signal(
     """
     rcls = NAME_TO_CLASS.get(name)
     if not rcls:
-        raise UnityTrainerException("Unknown reward signal type {}".format(name))
+        raise UnityTrainerException(f"Unknown reward signal type {name}")
 
     class_inst = rcls(policy, settings)
     return class_inst
diff --git a/ml-agents/mlagents/trainers/ghost/controller.py b/ml-agents/mlagents/trainers/ghost/controller.py
@@ -69,9 +69,7 @@ def change_training_team(self, step: int) -> None:
         """
         self._queue.append(self._learning_team)
         self._learning_team = self._queue.popleft()
-        logger.debug(
-            "Learning team {} swapped on step {}".format(self._learning_team, step)
-        )
+        logger.debug(f"Learning team {self._learning_team} swapped on step {step}")
         self._changed_training_team = True
 
     # Adapted from https://github.com/Unity-Technologies/ml-agents/pull/1975 and

diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py
@@ -82,7 +82,7 @@ def create_schedule(
                 parameter, global_step, max_step, min_value, power=1.0
             )
         else:
-            raise UnityTrainerException("The schedule {} is invalid.".format(schedule))
+            raise UnityTrainerException(f"The schedule {schedule} is invalid.")
         return parameter_rate
 
     @staticmethod
@@ -290,7 +290,7 @@ def create_vector_observation_encoder(
                     h_size,
                     activation=activation,
                     reuse=reuse,
-                    name="hidden_{}".format(i),
+                    name=f"hidden_{i}",
                     kernel_initializer=tf.initializers.variance_scaling(1.0),
                 )
         return hidden
@@ -656,7 +656,7 @@ def create_value_heads(
         """
         value_heads = {}
         for name in stream_names:
-            value = tf.layers.dense(hidden_input, 1, name="{}_value".format(name))
+            value = tf.layers.dense(hidden_input, 1, name=f"{name}_value")
             value_heads[name] = value
         value = tf.reduce_mean(list(value_heads.values()), 0)
         return value_heads, value
diff --git a/ml-agents/mlagents/trainers/policy/tf_policy.py b/ml-agents/mlagents/trainers/policy/tf_policy.py
@@ -182,9 +182,7 @@ def _load_graph(self, model_path: str, reset_global_steps: bool = False) -> None
                     )
                 )
             else:
-                logger.info(
-                    "Resuming training from step {}.".format(self.get_current_step())
-                )
+                logger.info(f"Resuming training from step {self.get_current_step()}.")
 
     def initialize_or_load(self):
         # If there is an initialize path, load from that. Else, load from the set model path.

diff --git a/ml-agents/mlagents/trainers/ppo/optimizer.py b/ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -229,10 +229,10 @@ def _create_losses(
         self.old_values = {}
         for name in value_heads.keys():
             returns_holder = tf.placeholder(
-                shape=[None], dtype=tf.float32, name="{}_returns".format(name)
+                shape=[None], dtype=tf.float32, name=f"{name}_returns"
             )
             old_value = tf.placeholder(
-                shape=[None], dtype=tf.float32, name="{}_value_estimate".format(name)
+                shape=[None], dtype=tf.float32, name=f"{name}_value_estimate"
             )
             self.returns_holders[name] = returns_holder
             self.old_values[name] = old_value
@@ -334,12 +334,8 @@ def _construct_feed_dict(
             self.all_old_log_probs: mini_batch["action_probs"],
         }
         for name in self.reward_signals:
-            feed_dict[self.returns_holders[name]] = mini_batch[
-                "{}_returns".format(name)
-            ]
-            feed_dict[self.old_values[name]] = mini_batch[
-                "{}_value_estimates".format(name)
-            ]
+            feed_dict[self.returns_holders[name]] = mini_batch[f"{name}_returns"]
+            feed_dict[self.old_values[name]] = mini_batch[f"{name}_value_estimates"]
 
         if self.policy.output_pre is not None and "actions_pre" in mini_batch:
             feed_dict[self.policy.output_pre] = mini_batch["actions_pre"]

diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py
@@ -75,7 +75,7 @@ def _process_trajectory(self, trajectory: Trajectory) -> None:
             trajectory.done_reached and not trajectory.interrupted,
         )
         for name, v in value_estimates.items():
-            agent_buffer_trajectory["{}_value_estimates".format(name)].extend(v)
+            agent_buffer_trajectory[f"{name}_value_estimates"].extend(v)
             self._stats_reporter.add_stat(
                 self.optimizer.reward_signals[name].value_name, np.mean(v)
             )
@@ -88,7 +88,7 @@ def _process_trajectory(self, trajectory: Trajectory) -> None:
             evaluate_result = reward_signal.evaluate_batch(
                 agent_buffer_trajectory
             ).scaled_reward
-            agent_buffer_trajectory["{}_rewards".format(name)].extend(evaluate_result)
+            agent_buffer_trajectory[f"{name}_rewards"].extend(evaluate_result)
             # Report the reward signals
             self.collected_rewards[name][agent_id] += np.sum(evaluate_result)
 
@@ -98,11 +98,9 @@ def _process_trajectory(self, trajectory: Trajectory) -> None:
         for name in self.optimizer.reward_signals:
             bootstrap_value = value_next[name]
 
-            local_rewards = agent_buffer_trajectory[
-                "{}_rewards".format(name)
-            ].get_batch()
+            local_rewards = agent_buffer_trajectory[f"{name}_rewards"].get_batch()
             local_value_estimates = agent_buffer_trajectory[
-                "{}_value_estimates".format(name)
+                f"{name}_value_estimates"
             ].get_batch()
             local_advantage = get_gae(
                 rewards=local_rewards,
@@ -113,8 +111,8 @@ def _process_trajectory(self, trajectory: Trajectory) -> None:
             )
             local_return = local_advantage + local_value_estimates
             # This is later use as target for the different value estimates
-            agent_buffer_trajectory["{}_returns".format(name)].set(local_return)
-            agent_buffer_trajectory["{}_advantage".format(name)].set(local_advantage)
+            agent_buffer_trajectory[f"{name}_returns"].set(local_return)
+            agent_buffer_trajectory[f"{name}_advantage"].set(local_advantage)
             tmp_advantages.append(local_advantage)
             tmp_returns.append(local_return)
 

diff --git a/ml-agents/mlagents/trainers/sac/network.py b/ml-agents/mlagents/trainers/sac/network.py
@@ -99,7 +99,7 @@ def create_value_heads(self, stream_names, hidden_input):
         """
         self.value_heads = {}
         for name in stream_names:
-            value = tf.layers.dense(hidden_input, 1, name="{}_value".format(name))
+            value = tf.layers.dense(hidden_input, 1, name=f"{name}_value")
             self.value_heads[name] = value
         self.value = tf.reduce_mean(list(self.value_heads.values()), 0)
 
@@ -244,7 +244,7 @@ def create_q_heads(
 
             q1_heads = {}
             for name in stream_names:
-                _q1 = tf.layers.dense(q1_hidden, num_outputs, name="{}_q1".format(name))
+                _q1 = tf.layers.dense(q1_hidden, num_outputs, name=f"{name}_q1")
                 q1_heads[name] = _q1
 
             q1 = tf.reduce_mean(list(q1_heads.values()), axis=0)
@@ -263,7 +263,7 @@ def create_q_heads(
 
             q2_heads = {}
             for name in stream_names:
-                _q2 = tf.layers.dense(q2_hidden, num_outputs, name="{}_q2".format(name))
+                _q2 = tf.layers.dense(q2_hidden, num_outputs, name=f"{name}_q2")
                 q2_heads[name] = _q2
 
             q2 = tf.reduce_mean(list(q2_heads.values()), axis=0)

diff --git a/ml-agents/mlagents/trainers/sac/optimizer.py b/ml-agents/mlagents/trainers/sac/optimizer.py
@@ -271,7 +271,7 @@ def _create_losses(
                 )
 
             rewards_holder = tf.placeholder(
-                shape=[None], dtype=tf.float32, name="{}_rewards".format(name)
+                shape=[None], dtype=tf.float32, name=f"{name}_rewards"
             )
             self.rewards_holders[name] = rewards_holder
 
@@ -607,7 +607,7 @@ def _construct_feed_dict(
             self.policy.mask_input: batch["masks"] * burn_in_mask,
         }
         for name in self.reward_signals:
-            feed_dict[self.rewards_holders[name]] = batch["{}_rewards".format(name)]
+            feed_dict[self.rewards_holders[name]] = batch[f"{name}_rewards"]
 
         if self.policy.use_continuous_act:
             feed_dict[self.policy_network.external_action_in] = batch["actions"]

diff --git a/ml-agents/mlagents/trainers/sac/trainer.py b/ml-agents/mlagents/trainers/sac/trainer.py
@@ -100,7 +100,7 @@ def save_replay_buffer(self) -> None:
         Save the training buffer's update buffer to a pickle file.
         """
         filename = os.path.join(self.artifact_path, "last_replay_buffer.hdf5")
-        logger.info("Saving Experience Replay Buffer to {}".format(filename))
+        logger.info(f"Saving Experience Replay Buffer to {filename}")
         with open(filename, "wb") as file_object:
             self.update_buffer.save_to_file(file_object)
 
@@ -109,7 +109,7 @@ def load_replay_buffer(self) -> None:
         Loads the last saved replay buffer from a file.
         """
         filename = os.path.join(self.artifact_path, "last_replay_buffer.hdf5")
-        logger.info("Loading Experience Replay Buffer from {}".format(filename))
+        logger.info(f"Loading Experience Replay Buffer from {filename}")
         with open(filename, "rb+") as file_object:
             self.update_buffer.load_from_file(file_object)
         logger.info(
@@ -239,7 +239,7 @@ def _update_sac_policy(self) -> bool:
         while (
             self.step - self.hyperparameters.buffer_init_steps
         ) / self.update_steps > self.steps_per_update:
-            logger.debug("Updating SAC policy at step {}".format(self.step))
+            logger.debug(f"Updating SAC policy at step {self.step}")
             buffer = self.update_buffer
             if self.update_buffer.num_experiences >= self.hyperparameters.batch_size:
                 sampled_minibatch = buffer.sample_mini_batch(
@@ -248,9 +248,9 @@ def _update_sac_policy(self) -> bool:
                 )
                 # Get rewards for each reward
                 for name, signal in self.optimizer.reward_signals.items():
-                    sampled_minibatch[
-                        "{}_rewards".format(name)
-                    ] = signal.evaluate_batch(sampled_minibatch).scaled_reward
+                    sampled_minibatch[f"{name}_rewards"] = signal.evaluate_batch(
+                        sampled_minibatch
+                    ).scaled_reward
 
                 update_stats = self.optimizer.update(sampled_minibatch, n_sequences)
                 for stat_name, value in update_stats.items():
@@ -296,7 +296,7 @@ def _update_reward_signals(self) -> None:
             # Get minibatches for reward signal update if needed
             reward_signal_minibatches = {}
             for name, signal in self.optimizer.reward_signals.items():
-                logger.debug("Updating {} at step {}".format(name, self.step))
+                logger.debug(f"Updating {name} at step {self.step}")
                 # Some signals don't need a minibatch to be sampled - so we don't!
                 if signal.update_dict:
                     reward_signal_minibatches[name] = buffer.sample_mini_batch(

diff --git a/ml-agents/mlagents/trainers/stats.py b/ml-agents/mlagents/trainers/stats.py
@@ -114,7 +114,7 @@ def write_stats(
             )
             if self.self_play and "Self-play/ELO" in values:
                 elo_stats = values["Self-play/ELO"]
-                logger.info("{} ELO: {:0.3f}. ".format(category, elo_stats.mean))
+                logger.info(f"{category} ELO: {elo_stats.mean:0.3f}. ")
         else:
             logger.info(
                 "{}: Step: {}. No episode was completed since last summary. {}".format(
@@ -177,7 +177,7 @@ def write_stats(
         self._maybe_create_summary_writer(category)
         for key, value in values.items():
             summary = tf.Summary()
-            summary.value.add(tag="{}".format(key), simple_value=value.mean)
+            summary.value.add(tag=f"{key}", simple_value=value.mean)
             self.summary_writers[category].add_summary(summary, step)
             self.summary_writers[category].flush()
 
@@ -195,7 +195,7 @@ def _delete_all_events_files(self, directory_name: str) -> None:
         for file_name in os.listdir(directory_name):
             if file_name.startswith("events.out"):
                 logger.warning(
-                    "{} was left over from a previous run. Deleting.".format(file_name)
+                    f"{file_name} was left over from a previous run. Deleting."
                 )
                 full_fname = os.path.join(directory_name, file_name)
                 try:

diff --git a/ml-agents/mlagents/trainers/tests/test_simple_rl.py b/ml-agents/mlagents/trainers/tests/test_simple_rl.py
@@ -80,7 +80,7 @@
 def default_reward_processor(rewards, last_n_rewards=5):
     rewards_to_use = rewards[-last_n_rewards:]
     # For debugging tests
-    print("Last {} rewards:".format(last_n_rewards), rewards_to_use)
+    print(f"Last {last_n_rewards} rewards:", rewards_to_use)
     return np.array(rewards[-last_n_rewards:], dtype=np.float32).mean()
 
 

diff --git a/ml-agents/setup.py b/ml-agents/setup.py
@@ -58,7 +58,7 @@ def run(self):
         # Test-only dependencies should go in test_requirements.txt, not here.
         "grpcio>=1.11.0",
         "h5py>=2.9.0",
-        "mlagents_envs=={}".format(VERSION),
+        f"mlagents_envs=={VERSION}",
         "numpy>=1.13.3,<2.0",
         "Pillow>=4.2.1",
         "protobuf>=3.6",

diff --git a/ml-agents/tests/yamato/check_coverage_percent.py b/ml-agents/tests/yamato/check_coverage_percent.py
@@ -17,7 +17,7 @@ def check_coverage(root_dir, min_percentage):
             summary_xml = os.path.join(dirpath, SUMMARY_XML_FILENAME)
             break
     if not summary_xml:
-        print("Couldn't find {} in root directory".format(SUMMARY_XML_FILENAME))
+        print(f"Couldn't find {SUMMARY_XML_FILENAME} in root directory")
         sys.exit(1)
 
     with open(summary_xml) as f: