-
Notifications
You must be signed in to change notification settings - Fork 6.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RLlib] Fix Atari learning test regressions (2 bugs) and 1 minor attention net bug. #18306
Changes from 1 commit
8c111b0
ad3e309
75cf702
e83540a
6843b6b
a8e69cf
e841883
54c8c00
0833489
077c9f7
f389810
dc4c0e8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -942,21 +942,28 @@ def get_single_step_input_dict(self, view_requirements, index="last"): | |
data_col = last_mappings.get(data_col, data_col) | ||
# Range needed. | ||
if view_req.shift_from is not None: | ||
data = self[view_col][-1] | ||
traj_len = len(self[data_col]) | ||
missing_at_end = traj_len % view_req.batch_repeat_value | ||
obs_shift = -1 if data_col in [ | ||
SampleBatch.OBS, SampleBatch.NEXT_OBS | ||
] else 0 | ||
from_ = view_req.shift_from + obs_shift | ||
to_ = view_req.shift_to + obs_shift + 1 | ||
if to_ == 0: | ||
to_ = None | ||
input_dict[view_col] = np.array([ | ||
np.concatenate( | ||
[data, | ||
self[data_col][-missing_at_end:]])[from_:to_] | ||
]) | ||
# Batch repeat value > 1: We have single frames in the | ||
# batch at each timestep. | ||
if view_req.batch_repeat_value > 1: | ||
data = self[view_col][-1] | ||
traj_len = len(self[data_col]) | ||
missing_at_end = traj_len % view_req.batch_repeat_value | ||
obs_shift = -1 if data_col in [ | ||
SampleBatch.OBS, SampleBatch.NEXT_OBS | ||
] else 0 | ||
from_ = view_req.shift_from + obs_shift | ||
to_ = view_req.shift_to + obs_shift + 1 | ||
if to_ == 0: | ||
to_ = None | ||
input_dict[view_col] = np.array([ | ||
np.concatenate( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Makes sense. |
||
[self[data_col][-missing_at_end:], | ||
data])[from_:to_] | ||
]) | ||
# Batch repeat value = 1: We already have framestacks | ||
# at each timestep. | ||
else: | ||
input_dict[view_col] = self[data_col][-1][None] | ||
# Single index. | ||
else: | ||
data = self[data_col][-1] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Anyway to say this is deprecated in the logs?