[IBR-2068] Merge with master branch

isk03276 · isk03276 · commit d05cc0ab3e1d · 2021-07-05T13:13:48.000+09:00
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -1,7 +1,7 @@
 pre-commit
 
 # formatting
-black == 19.10b0
+black == 21.6b0
 isort == 5.6.4
 
 # testing
diff --git a/rl_algorithms/common/abstract/distributed_logger.py b/rl_algorithms/common/abstract/distributed_logger.py
@@ -125,7 +125,8 @@ def set_wandb(self):
             name=f"{self.log_cfg.agent}/{self.log_cfg.curr_time}",
         )
         additional_log = dict(
-            episode_num=self.episode_num, max_episode_steps=self.max_episode_steps,
+            episode_num=self.episode_num,
+            max_episode_steps=self.max_episode_steps,
         )
         wandb.config.update(additional_log)
         shutil.copy(self.log_cfg.cfg_path, os.path.join(wandb.run.dir, "config.py"))
diff --git a/rl_algorithms/common/abstract/reward_fn.py b/rl_algorithms/common/abstract/reward_fn.py
@@ -11,7 +11,7 @@
 
 class RewardFn(ABC):
     """Abstract class for computing reward.
-       New compute_reward class should redefine __call__()
+    New compute_reward class should redefine __call__()
 
     """
 
diff --git a/rl_algorithms/common/apex/architecture.py b/rl_algorithms/common/apex/architecture.py
@@ -99,7 +99,8 @@ def __init__(
     def _spawn(self):
         """Intialize distributed worker, learner and centralized replay buffer."""
         replay_buffer = ReplayBuffer(
-            self.hyper_params.buffer_size, self.hyper_params.batch_size,
+            self.hyper_params.buffer_size,
+            self.hyper_params.batch_size,
         )
         per_buffer = PrioritizedBufferWrapper(
             replay_buffer, alpha=self.hyper_params.per_alpha
diff --git a/rl_algorithms/common/apex/learner.py b/rl_algorithms/common/apex/learner.py
@@ -80,7 +80,9 @@ def publish_params(self, update_step: int, np_state_dict: Dict[str, np.ndarray])
         self.pub_socket.send(new_params_id)
 
     def send_info_to_logger(
-        self, np_state_dict: List[np.ndarray], step_info: list,
+        self,
+        np_state_dict: List[np.ndarray],
+        step_info: list,
     ):
         """Send new params and log info to logger."""
         log_value = dict(update_step=self.update_step, step_info=step_info)
diff --git a/rl_algorithms/common/buffer/distillation_buffer.py b/rl_algorithms/common/buffer/distillation_buffer.py
@@ -28,7 +28,9 @@ class DistillationBuffer:
     """
 
     def __init__(
-        self, batch_size: int, dataset_path: List[str],
+        self,
+        batch_size: int,
+        dataset_path: List[str],
     ):
         """Initialize a DistillationBuffer object.
 
@@ -47,7 +49,7 @@ def __init__(
 
     def reset_dataloader(self):
         """Initialize and reset DataLoader class.
-           DataLoader class must be reset for every epoch.
+        DataLoader class must be reset for every epoch.
         """
         dataset = DistillationDataset(self.dataset_path)
         self.is_contain_q = dataset.is_contain_q
diff --git a/rl_algorithms/common/buffer/segment_tree.py b/rl_algorithms/common/buffer/segment_tree.py
@@ -6,7 +6,7 @@
 
 
 class SegmentTree:
-    """ Create SegmentTree.
+    """Create SegmentTree.
 
     Taken from OpenAI baselines github repository:
     https://github.com/openai/baselines/blob/master/baselines/common/segment_tree.py
@@ -78,7 +78,7 @@ def __getitem__(self, idx: int) -> float:
 
 
 class SumSegmentTree(SegmentTree):
-    """ Create SumSegmentTree.
+    """Create SumSegmentTree.
 
     Taken from OpenAI baselines github repository:
     https://github.com/openai/baselines/blob/master/baselines/common/segment_tree.py
@@ -119,7 +119,7 @@ def retrieve(self, upperbound: float) -> int:
 
 
 class MinSegmentTree(SegmentTree):
-    """ Create SegmentTree.
+    """Create SegmentTree.
 
     Taken from OpenAI baselines github repository:
     https://github.com/openai/baselines/blob/master/baselines/common/segment_tree.py
diff --git a/rl_algorithms/common/env/atari_wrappers.py b/rl_algorithms/common/env/atari_wrappers.py
@@ -50,7 +50,7 @@ def __init__(self, env, noop_max=30):
 
     # pylint: disable=method-hidden
     def reset(self, **kwargs):
-        """ Do no-op action for a number of steps in [1, noop_max]."""
+        """Do no-op action for a number of steps in [1, noop_max]."""
         self.env.reset(**kwargs)
         if self.override_num_noops is not None:
             noops = self.override_num_noops
@@ -317,8 +317,7 @@ def make_atari(env_id, max_episode_steps=None):
 def wrap_deepmind(
     env, episode_life=True, clip_rewards=True, frame_stack=False, scale=False
 ):
-    """Configure environment for DeepMind-style Atari.
-    """
+    """Configure environment for DeepMind-style Atari."""
     if episode_life:
         env = EpisodicLifeEnv(env)
     if "FIRE" in env.unwrapped.get_action_meanings():
diff --git a/rl_algorithms/common/helper_functions.py b/rl_algorithms/common/helper_functions.py
@@ -100,8 +100,8 @@ def numpy2floattensor(
     arrays: Union[np.ndarray, Tuple[np.ndarray]], device_: torch.device
 ) -> Tuple[torch.Tensor]:
     """Convert numpy type to torch FloatTensor.
-        - Convert numpy array to torch float tensor.
-        - Convert numpy array with Tuple type to torch FloatTensor with Tuple.
+    - Convert numpy array to torch float tensor.
+    - Convert numpy array with Tuple type to torch FloatTensor with Tuple.
     """
 
     if isinstance(arrays, tuple):  # check Tuple or not
@@ -127,9 +127,9 @@ def state_dict2numpy(state_dict) -> Dict[str, np.ndarray]:
 def smoothen_graph(scalars: List[float], weight: float = 0.6) -> List[float]:
     """Smoothen result graph using exponential moving average formula as TensorBoard.
 
-        Reference:
-            https://docs.wandb.com/library/technical-faq#what-formula-do-you-use-for-
-            your-smoothing-algorithm
+    Reference:
+        https://docs.wandb.com/library/technical-faq#what-formula-do-you-use-for-
+        your-smoothing-algorithm
     """
     last = scalars[0]  # First value in the plot (first timestep)
     smoothed = list()
diff --git a/rl_algorithms/common/networks/backbones/resnet.py b/rl_algorithms/common/networks/backbones/resnet.py
@@ -29,7 +29,12 @@ def __init__(
 
         self.expansion = expansion
         self.conv1 = nn.Conv2d(
-            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False,
+            in_planes,
+            planes,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            bias=False,
         )
         self.bn1 = nn.BatchNorm2d(planes)
         self.conv2 = nn.Conv2d(
@@ -74,14 +79,27 @@ def __init__(
         super(Bottleneck, self).__init__()
 
         self.expansion = expansion
-        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False,)
+        self.conv1 = nn.Conv2d(
+            in_planes,
+            planes,
+            kernel_size=1,
+            bias=False,
+        )
         self.bn1 = nn.BatchNorm2d(planes)
         self.conv2 = nn.Conv2d(
-            planes, planes, kernel_size=3, stride=stride, padding=1, bias=False,
+            planes,
+            planes,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            bias=False,
         )
         self.bn2 = nn.BatchNorm2d(planes)
         self.conv3 = nn.Conv2d(
-            planes, self.expansion * planes, kernel_size=1, bias=False,
+            planes,
+            self.expansion * planes,
+            kernel_size=1,
+            bias=False,
         )
         self.bn3 = nn.BatchNorm2d(self.expansion * planes)
 
@@ -114,7 +132,8 @@ class ResNet(nn.Module):
     """Baseline of ResNet(https://arxiv.org/pdf/1512.03385.pdf)."""
 
     def __init__(
-        self, configs: ConfigDict,
+        self,
+        configs: ConfigDict,
     ):
         super(ResNet, self).__init__()
         block = Bottleneck if configs.use_bottleneck else BasicBlock
diff --git a/rl_algorithms/common/networks/brain.py b/rl_algorithms/common/networks/brain.py
@@ -75,12 +75,18 @@ class GRUBrain(Brain):
     """Class for holding backbone, GRU, and head networks."""
 
     def __init__(
-        self, backbone_cfg: ConfigDict, head_cfg: ConfigDict, gru_cfg: ConfigDict,
+        self,
+        backbone_cfg: ConfigDict,
+        head_cfg: ConfigDict,
+        gru_cfg: ConfigDict,
     ):
         self.action_size = head_cfg.configs.output_size
         """Initialize. Generate different structure whether it has CNN module or not."""
         Brain.__init__(self, backbone_cfg, head_cfg)
-        self.fc = nn.Linear(head_cfg.configs.input_size, gru_cfg.rnn_hidden_size,)
+        self.fc = nn.Linear(
+            head_cfg.configs.input_size,
+            gru_cfg.rnn_hidden_size,
+        )
         self.gru = nn.GRU(
             gru_cfg.rnn_hidden_size + self.action_size + 1,  # 1 is for prev_reward
             gru_cfg.rnn_hidden_size,
diff --git a/rl_algorithms/common/networks/heads.py b/rl_algorithms/common/networks/heads.py
@@ -207,11 +207,15 @@ class CategoricalDist(MLP):
     """Multilayer perceptron with Categorical distribution output."""
 
     def __init__(
-        self, configs: ConfigDict, hidden_activation: Callable = F.relu,
+        self,
+        configs: ConfigDict,
+        hidden_activation: Callable = F.relu,
     ):
         """Initialize."""
         super().__init__(
-            configs=configs, hidden_activation=hidden_activation, use_output_layer=True,
+            configs=configs,
+            hidden_activation=hidden_activation,
+            use_output_layer=True,
         )
 
     def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, ...]:
diff --git a/rl_algorithms/dqn/agent.py b/rl_algorithms/dqn/agent.py
@@ -114,7 +114,8 @@ def _initialize(self):
         if not self.is_test:
             # replay memory for a single step
             self.memory = ReplayBuffer(
-                self.hyper_params.buffer_size, self.hyper_params.batch_size,
+                self.hyper_params.buffer_size,
+                self.hyper_params.batch_size,
             )
             self.memory = PrioritizedBufferWrapper(
                 self.memory, alpha=self.hyper_params.per_alpha
diff --git a/rl_algorithms/dqn/networks.py b/rl_algorithms/dqn/networks.py
@@ -29,7 +29,9 @@ class DuelingMLP(MLP, NoisyMLPHandler):
     """Multilayer perceptron with dueling construction."""
 
     def __init__(
-        self, configs: ConfigDict, hidden_activation: Callable = F.relu,
+        self,
+        configs: ConfigDict,
+        hidden_activation: Callable = F.relu,
     ):
         """Initialize."""
         if configs.use_noisy_net:
@@ -83,7 +85,9 @@ class C51DuelingMLP(MLP, NoisyMLPHandler):
     """Multilayered perceptron for C51 with dueling construction."""
 
     def __init__(
-        self, configs: ConfigDict, hidden_activation: Callable = F.relu,
+        self,
+        configs: ConfigDict,
+        hidden_activation: Callable = F.relu,
     ):
         """Initialize."""
         if configs.use_noisy_net:
@@ -151,7 +155,9 @@ class IQNMLP(MLP, NoisyMLPHandler):
     """
 
     def __init__(
-        self, configs: ConfigDict, hidden_activation: Callable = F.relu,
+        self,
+        configs: ConfigDict,
+        hidden_activation: Callable = F.relu,
     ):
         """Initialize."""
         if configs.use_noisy_net:
diff --git a/rl_algorithms/fd/ddpg_agent.py b/rl_algorithms/fd/ddpg_agent.py
@@ -60,7 +60,9 @@ def _initialize(self):
 
             # replay memory for a single step
             self.memory = ReplayBuffer(
-                self.hyper_params.buffer_size, self.hyper_params.batch_size, demo=demos,
+                self.hyper_params.buffer_size,
+                self.hyper_params.batch_size,
+                demo=demos,
             )
             self.memory = PrioritizedBufferWrapper(
                 self.memory,
diff --git a/rl_algorithms/fd/dqn_agent.py b/rl_algorithms/fd/dqn_agent.py
@@ -51,7 +51,9 @@ def _initialize(self):
 
             # replay memory
             self.memory = ReplayBuffer(
-                self.hyper_params.buffer_size, self.hyper_params.batch_size, demo=demos,
+                self.hyper_params.buffer_size,
+                self.hyper_params.batch_size,
+                demo=demos,
             )
             self.memory = PrioritizedBufferWrapper(
                 self.memory,
diff --git a/rl_algorithms/ppo/agent.py b/rl_algorithms/ppo/agent.py
@@ -183,7 +183,10 @@ def decay_epsilon(self, t: int = 0):
             1.0, t / (epsilon_decay_period + 1e-7)
         )
 
-    def write_log(self, log_value: tuple):
+    def write_log(
+        self,
+        log_value: tuple,
+    ):
         i_episode, n_step, score, actor_loss, critic_loss, total_loss = log_value
         print(
             "[INFO] episode %d\tepisode steps: %d\ttotal score: %d\n"
diff --git a/rl_algorithms/recurrent/utils.py b/rl_algorithms/recurrent/utils.py
@@ -76,7 +76,9 @@ def valid_from_done(done: torch.Tensor) -> torch.Tensor:
 
 
 def slice_r2d1_arguments(
-    experiences: Tuple[Any, ...], burn_in_step: int, output_size: int,
+    experiences: Tuple[Any, ...],
+    burn_in_step: int,
+    output_size: int,
 ) -> tuple:
     """Get mini-batch sequence-size transitions and slice
     in accordance with R2D1 agent loss calculating process.
@@ -92,11 +94,20 @@ def slice_r2d1_arguments(
     agent_states = states[:, burn_in_step:-1]
     target_states = states[:, burn_in_step + 1 :]
 
-    burnin_prev_actions = make_one_hot(actions[:, : burn_in_step - 1], output_size,)
+    burnin_prev_actions = make_one_hot(
+        actions[:, : burn_in_step - 1],
+        output_size,
+    )
     target_burnin_prev_actions = make_one_hot(actions[:, :burn_in_step], output_size)
     agent_actions = actions[:, burn_in_step:-1].long().unsqueeze(-1)
-    prev_actions = make_one_hot(actions[:, burn_in_step - 1 : -2], output_size,)
-    target_prev_actions = make_one_hot(actions[:, burn_in_step:-1].long(), output_size,)
+    prev_actions = make_one_hot(
+        actions[:, burn_in_step - 1 : -2],
+        output_size,
+    )
+    target_prev_actions = make_one_hot(
+        actions[:, burn_in_step:-1].long(),
+        output_size,
+    )
 
     burnin_prev_rewards = rewards[:, : burn_in_step - 1].unsqueeze(-1)
     target_burnin_prev_rewards = rewards[:, :burn_in_step].unsqueeze(-1)
diff --git a/run_pong_no_frameskip_v4.py b/run_pong_no_frameskip_v4.py
@@ -76,7 +76,9 @@ def parse_args() -> argparse.Namespace:
         help="turn off framestack",
     )
     parser.add_argument(
-        "--saliency-map", action="store_true", help="save saliency map",
+        "--saliency-map",
+        action="store_true",
+        help="save saliency map",
     )
 
     return parser.parse_args()
diff --git a/tests/buffer/test_prioritized_buffer.py b/tests/buffer/test_prioritized_buffer.py
@@ -44,8 +44,8 @@ def sample_dummy(prioritized_buffer: PrioritizedBufferWrapper, times: int) -> Li
 def check_prioritized(prop_lst: List, sampled_lst: List) -> bool:
     """Check two input lists have same distribution by kstest.
 
-        Reference:
-        https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test
+    Reference:
+    https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test
     """
     res = ks_2samp(prop_lst, sampled_lst)
     return res[1] >= 0.05

Original file line number	Diff line number	Diff line change
`@@ -125,7 +125,8 @@ def set_wandb(self):`
`125`	`125`	`name=f"{self.log_cfg.agent}/{self.log_cfg.curr_time}",`
`126`	`126`	`)`
`127`	`127`	`additional_log = dict(`
`128`		`- episode_num=self.episode_num, max_episode_steps=self.max_episode_steps,`
	`128`	`+ episode_num=self.episode_num,`
	`129`	`+ max_episode_steps=self.max_episode_steps,`
`129`	`130`	`)`
`130`	`131`	`wandb.config.update(additional_log)`
`131`	`132`	`shutil.copy(self.log_cfg.cfg_path, os.path.join(wandb.run.dir, "config.py"))`
Original file line number	Diff line number	Diff line change
`@@ -76,7 +76,9 @@ def parse_args() -> argparse.Namespace:`
`76`	`76`	`help="turn off framestack",`
`77`	`77`	`)`
`78`	`78`	`parser.add_argument(`
`79`		`- "--saliency-map", action="store_true", help="save saliency map",`
	`79`	`+ "--saliency-map",`
	`80`	`+ action="store_true",`
	`81`	`+ help="save saliency map",`
`80`	`82`	`)`
`81`	`83`
`82`	`84`	`return parser.parse_args()`