Skip to content

Commit d05cc0a

Browse files
committed
[IBR-2068] Merge with master branch
2 parents 8856de0 + b3df31e commit d05cc0a

20 files changed

+102
-41
lines changed

requirements-dev.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
pre-commit
22

33
# formatting
4-
black == 19.10b0
4+
black == 21.6b0
55
isort == 5.6.4
66

77
# testing

rl_algorithms/common/abstract/distributed_logger.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,8 @@ def set_wandb(self):
125125
name=f"{self.log_cfg.agent}/{self.log_cfg.curr_time}",
126126
)
127127
additional_log = dict(
128-
episode_num=self.episode_num, max_episode_steps=self.max_episode_steps,
128+
episode_num=self.episode_num,
129+
max_episode_steps=self.max_episode_steps,
129130
)
130131
wandb.config.update(additional_log)
131132
shutil.copy(self.log_cfg.cfg_path, os.path.join(wandb.run.dir, "config.py"))

rl_algorithms/common/abstract/reward_fn.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
class RewardFn(ABC):
1313
"""Abstract class for computing reward.
14-
New compute_reward class should redefine __call__()
14+
New compute_reward class should redefine __call__()
1515
1616
"""
1717

rl_algorithms/common/apex/architecture.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,8 @@ def __init__(
9999
def _spawn(self):
100100
"""Intialize distributed worker, learner and centralized replay buffer."""
101101
replay_buffer = ReplayBuffer(
102-
self.hyper_params.buffer_size, self.hyper_params.batch_size,
102+
self.hyper_params.buffer_size,
103+
self.hyper_params.batch_size,
103104
)
104105
per_buffer = PrioritizedBufferWrapper(
105106
replay_buffer, alpha=self.hyper_params.per_alpha

rl_algorithms/common/apex/learner.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,9 @@ def publish_params(self, update_step: int, np_state_dict: Dict[str, np.ndarray])
8080
self.pub_socket.send(new_params_id)
8181

8282
def send_info_to_logger(
83-
self, np_state_dict: List[np.ndarray], step_info: list,
83+
self,
84+
np_state_dict: List[np.ndarray],
85+
step_info: list,
8486
):
8587
"""Send new params and log info to logger."""
8688
log_value = dict(update_step=self.update_step, step_info=step_info)

rl_algorithms/common/buffer/distillation_buffer.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@ class DistillationBuffer:
2828
"""
2929

3030
def __init__(
31-
self, batch_size: int, dataset_path: List[str],
31+
self,
32+
batch_size: int,
33+
dataset_path: List[str],
3234
):
3335
"""Initialize a DistillationBuffer object.
3436
@@ -47,7 +49,7 @@ def __init__(
4749

4850
def reset_dataloader(self):
4951
"""Initialize and reset DataLoader class.
50-
DataLoader class must be reset for every epoch.
52+
DataLoader class must be reset for every epoch.
5153
"""
5254
dataset = DistillationDataset(self.dataset_path)
5355
self.is_contain_q = dataset.is_contain_q

rl_algorithms/common/buffer/segment_tree.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77

88
class SegmentTree:
9-
""" Create SegmentTree.
9+
"""Create SegmentTree.
1010
1111
Taken from OpenAI baselines github repository:
1212
https://github.com/openai/baselines/blob/master/baselines/common/segment_tree.py
@@ -78,7 +78,7 @@ def __getitem__(self, idx: int) -> float:
7878

7979

8080
class SumSegmentTree(SegmentTree):
81-
""" Create SumSegmentTree.
81+
"""Create SumSegmentTree.
8282
8383
Taken from OpenAI baselines github repository:
8484
https://github.com/openai/baselines/blob/master/baselines/common/segment_tree.py
@@ -119,7 +119,7 @@ def retrieve(self, upperbound: float) -> int:
119119

120120

121121
class MinSegmentTree(SegmentTree):
122-
""" Create SegmentTree.
122+
"""Create SegmentTree.
123123
124124
Taken from OpenAI baselines github repository:
125125
https://github.com/openai/baselines/blob/master/baselines/common/segment_tree.py

rl_algorithms/common/env/atari_wrappers.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def __init__(self, env, noop_max=30):
5050

5151
# pylint: disable=method-hidden
5252
def reset(self, **kwargs):
53-
""" Do no-op action for a number of steps in [1, noop_max]."""
53+
"""Do no-op action for a number of steps in [1, noop_max]."""
5454
self.env.reset(**kwargs)
5555
if self.override_num_noops is not None:
5656
noops = self.override_num_noops
@@ -317,8 +317,7 @@ def make_atari(env_id, max_episode_steps=None):
317317
def wrap_deepmind(
318318
env, episode_life=True, clip_rewards=True, frame_stack=False, scale=False
319319
):
320-
"""Configure environment for DeepMind-style Atari.
321-
"""
320+
"""Configure environment for DeepMind-style Atari."""
322321
if episode_life:
323322
env = EpisodicLifeEnv(env)
324323
if "FIRE" in env.unwrapped.get_action_meanings():

rl_algorithms/common/helper_functions.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,8 @@ def numpy2floattensor(
100100
arrays: Union[np.ndarray, Tuple[np.ndarray]], device_: torch.device
101101
) -> Tuple[torch.Tensor]:
102102
"""Convert numpy type to torch FloatTensor.
103-
- Convert numpy array to torch float tensor.
104-
- Convert numpy array with Tuple type to torch FloatTensor with Tuple.
103+
- Convert numpy array to torch float tensor.
104+
- Convert numpy array with Tuple type to torch FloatTensor with Tuple.
105105
"""
106106

107107
if isinstance(arrays, tuple): # check Tuple or not
@@ -127,9 +127,9 @@ def state_dict2numpy(state_dict) -> Dict[str, np.ndarray]:
127127
def smoothen_graph(scalars: List[float], weight: float = 0.6) -> List[float]:
128128
"""Smoothen result graph using exponential moving average formula as TensorBoard.
129129
130-
Reference:
131-
https://docs.wandb.com/library/technical-faq#what-formula-do-you-use-for-
132-
your-smoothing-algorithm
130+
Reference:
131+
https://docs.wandb.com/library/technical-faq#what-formula-do-you-use-for-
132+
your-smoothing-algorithm
133133
"""
134134
last = scalars[0] # First value in the plot (first timestep)
135135
smoothed = list()

rl_algorithms/common/networks/backbones/resnet.py

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,12 @@ def __init__(
2929

3030
self.expansion = expansion
3131
self.conv1 = nn.Conv2d(
32-
in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False,
32+
in_planes,
33+
planes,
34+
kernel_size=3,
35+
stride=stride,
36+
padding=1,
37+
bias=False,
3338
)
3439
self.bn1 = nn.BatchNorm2d(planes)
3540
self.conv2 = nn.Conv2d(
@@ -74,14 +79,27 @@ def __init__(
7479
super(Bottleneck, self).__init__()
7580

7681
self.expansion = expansion
77-
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False,)
82+
self.conv1 = nn.Conv2d(
83+
in_planes,
84+
planes,
85+
kernel_size=1,
86+
bias=False,
87+
)
7888
self.bn1 = nn.BatchNorm2d(planes)
7989
self.conv2 = nn.Conv2d(
80-
planes, planes, kernel_size=3, stride=stride, padding=1, bias=False,
90+
planes,
91+
planes,
92+
kernel_size=3,
93+
stride=stride,
94+
padding=1,
95+
bias=False,
8196
)
8297
self.bn2 = nn.BatchNorm2d(planes)
8398
self.conv3 = nn.Conv2d(
84-
planes, self.expansion * planes, kernel_size=1, bias=False,
99+
planes,
100+
self.expansion * planes,
101+
kernel_size=1,
102+
bias=False,
85103
)
86104
self.bn3 = nn.BatchNorm2d(self.expansion * planes)
87105

@@ -114,7 +132,8 @@ class ResNet(nn.Module):
114132
"""Baseline of ResNet(https://arxiv.org/pdf/1512.03385.pdf)."""
115133

116134
def __init__(
117-
self, configs: ConfigDict,
135+
self,
136+
configs: ConfigDict,
118137
):
119138
super(ResNet, self).__init__()
120139
block = Bottleneck if configs.use_bottleneck else BasicBlock

rl_algorithms/common/networks/brain.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,12 +75,18 @@ class GRUBrain(Brain):
7575
"""Class for holding backbone, GRU, and head networks."""
7676

7777
def __init__(
78-
self, backbone_cfg: ConfigDict, head_cfg: ConfigDict, gru_cfg: ConfigDict,
78+
self,
79+
backbone_cfg: ConfigDict,
80+
head_cfg: ConfigDict,
81+
gru_cfg: ConfigDict,
7982
):
8083
self.action_size = head_cfg.configs.output_size
8184
"""Initialize. Generate different structure whether it has CNN module or not."""
8285
Brain.__init__(self, backbone_cfg, head_cfg)
83-
self.fc = nn.Linear(head_cfg.configs.input_size, gru_cfg.rnn_hidden_size,)
86+
self.fc = nn.Linear(
87+
head_cfg.configs.input_size,
88+
gru_cfg.rnn_hidden_size,
89+
)
8490
self.gru = nn.GRU(
8591
gru_cfg.rnn_hidden_size + self.action_size + 1, # 1 is for prev_reward
8692
gru_cfg.rnn_hidden_size,

rl_algorithms/common/networks/heads.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -207,11 +207,15 @@ class CategoricalDist(MLP):
207207
"""Multilayer perceptron with Categorical distribution output."""
208208

209209
def __init__(
210-
self, configs: ConfigDict, hidden_activation: Callable = F.relu,
210+
self,
211+
configs: ConfigDict,
212+
hidden_activation: Callable = F.relu,
211213
):
212214
"""Initialize."""
213215
super().__init__(
214-
configs=configs, hidden_activation=hidden_activation, use_output_layer=True,
216+
configs=configs,
217+
hidden_activation=hidden_activation,
218+
use_output_layer=True,
215219
)
216220

217221
def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, ...]:

rl_algorithms/dqn/agent.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,8 @@ def _initialize(self):
114114
if not self.is_test:
115115
# replay memory for a single step
116116
self.memory = ReplayBuffer(
117-
self.hyper_params.buffer_size, self.hyper_params.batch_size,
117+
self.hyper_params.buffer_size,
118+
self.hyper_params.batch_size,
118119
)
119120
self.memory = PrioritizedBufferWrapper(
120121
self.memory, alpha=self.hyper_params.per_alpha

rl_algorithms/dqn/networks.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,9 @@ class DuelingMLP(MLP, NoisyMLPHandler):
2929
"""Multilayer perceptron with dueling construction."""
3030

3131
def __init__(
32-
self, configs: ConfigDict, hidden_activation: Callable = F.relu,
32+
self,
33+
configs: ConfigDict,
34+
hidden_activation: Callable = F.relu,
3335
):
3436
"""Initialize."""
3537
if configs.use_noisy_net:
@@ -83,7 +85,9 @@ class C51DuelingMLP(MLP, NoisyMLPHandler):
8385
"""Multilayered perceptron for C51 with dueling construction."""
8486

8587
def __init__(
86-
self, configs: ConfigDict, hidden_activation: Callable = F.relu,
88+
self,
89+
configs: ConfigDict,
90+
hidden_activation: Callable = F.relu,
8791
):
8892
"""Initialize."""
8993
if configs.use_noisy_net:
@@ -151,7 +155,9 @@ class IQNMLP(MLP, NoisyMLPHandler):
151155
"""
152156

153157
def __init__(
154-
self, configs: ConfigDict, hidden_activation: Callable = F.relu,
158+
self,
159+
configs: ConfigDict,
160+
hidden_activation: Callable = F.relu,
155161
):
156162
"""Initialize."""
157163
if configs.use_noisy_net:

rl_algorithms/fd/ddpg_agent.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,9 @@ def _initialize(self):
6060

6161
# replay memory for a single step
6262
self.memory = ReplayBuffer(
63-
self.hyper_params.buffer_size, self.hyper_params.batch_size, demo=demos,
63+
self.hyper_params.buffer_size,
64+
self.hyper_params.batch_size,
65+
demo=demos,
6466
)
6567
self.memory = PrioritizedBufferWrapper(
6668
self.memory,

rl_algorithms/fd/dqn_agent.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,9 @@ def _initialize(self):
5151

5252
# replay memory
5353
self.memory = ReplayBuffer(
54-
self.hyper_params.buffer_size, self.hyper_params.batch_size, demo=demos,
54+
self.hyper_params.buffer_size,
55+
self.hyper_params.batch_size,
56+
demo=demos,
5557
)
5658
self.memory = PrioritizedBufferWrapper(
5759
self.memory,

rl_algorithms/ppo/agent.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,10 @@ def decay_epsilon(self, t: int = 0):
183183
1.0, t / (epsilon_decay_period + 1e-7)
184184
)
185185

186-
def write_log(self, log_value: tuple):
186+
def write_log(
187+
self,
188+
log_value: tuple,
189+
):
187190
i_episode, n_step, score, actor_loss, critic_loss, total_loss = log_value
188191
print(
189192
"[INFO] episode %d\tepisode steps: %d\ttotal score: %d\n"

rl_algorithms/recurrent/utils.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,9 @@ def valid_from_done(done: torch.Tensor) -> torch.Tensor:
7676

7777

7878
def slice_r2d1_arguments(
79-
experiences: Tuple[Any, ...], burn_in_step: int, output_size: int,
79+
experiences: Tuple[Any, ...],
80+
burn_in_step: int,
81+
output_size: int,
8082
) -> tuple:
8183
"""Get mini-batch sequence-size transitions and slice
8284
in accordance with R2D1 agent loss calculating process.
@@ -92,11 +94,20 @@ def slice_r2d1_arguments(
9294
agent_states = states[:, burn_in_step:-1]
9395
target_states = states[:, burn_in_step + 1 :]
9496

95-
burnin_prev_actions = make_one_hot(actions[:, : burn_in_step - 1], output_size,)
97+
burnin_prev_actions = make_one_hot(
98+
actions[:, : burn_in_step - 1],
99+
output_size,
100+
)
96101
target_burnin_prev_actions = make_one_hot(actions[:, :burn_in_step], output_size)
97102
agent_actions = actions[:, burn_in_step:-1].long().unsqueeze(-1)
98-
prev_actions = make_one_hot(actions[:, burn_in_step - 1 : -2], output_size,)
99-
target_prev_actions = make_one_hot(actions[:, burn_in_step:-1].long(), output_size,)
103+
prev_actions = make_one_hot(
104+
actions[:, burn_in_step - 1 : -2],
105+
output_size,
106+
)
107+
target_prev_actions = make_one_hot(
108+
actions[:, burn_in_step:-1].long(),
109+
output_size,
110+
)
100111

101112
burnin_prev_rewards = rewards[:, : burn_in_step - 1].unsqueeze(-1)
102113
target_burnin_prev_rewards = rewards[:, :burn_in_step].unsqueeze(-1)

run_pong_no_frameskip_v4.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,9 @@ def parse_args() -> argparse.Namespace:
7676
help="turn off framestack",
7777
)
7878
parser.add_argument(
79-
"--saliency-map", action="store_true", help="save saliency map",
79+
"--saliency-map",
80+
action="store_true",
81+
help="save saliency map",
8082
)
8183

8284
return parser.parse_args()

tests/buffer/test_prioritized_buffer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ def sample_dummy(prioritized_buffer: PrioritizedBufferWrapper, times: int) -> Li
4444
def check_prioritized(prop_lst: List, sampled_lst: List) -> bool:
4545
"""Check two input lists have same distribution by kstest.
4646
47-
Reference:
48-
https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test
47+
Reference:
48+
https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test
4949
"""
5050
res = ks_2samp(prop_lst, sampled_lst)
5151
return res[1] >= 0.05

0 commit comments

Comments
 (0)