Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "[RLlib] Add option for APPO/IMPALA to change number of GPU-lo… #50063

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 18 additions & 12 deletions rllib/algorithms/impala/impala.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,8 @@ def setup(self, config: AlgorithmConfig):

# Queue of data to be sent to the Learner.
self.data_to_place_on_learner = []
# The local mixin buffer (if required).
self.local_mixin_buffer = None
self._batch_being_built = [] # @OldAPIStack

# Create extra aggregation workers and assign each rollout worker to
Expand All @@ -563,17 +565,18 @@ def setup(self, config: AlgorithmConfig):
i: [] for i in range(self.config.num_learners or 1)
}

# Create our local mixin buffer.
# Create our local mixin buffer if the num of aggregation workers is 0.
if not self.config.enable_rl_module_and_learner:
self.local_mixin_buffer = MixInMultiAgentReplayBuffer(
capacity=(
self.config.replay_buffer_num_slots
if self.config.replay_buffer_num_slots > 0
else 1
),
replay_ratio=self.config.replay_ratio,
replay_mode=ReplayMode.LOCKSTEP,
)
if self.config.replay_proportion > 0.0:
self.local_mixin_buffer = MixInMultiAgentReplayBuffer(
capacity=(
self.config.replay_buffer_num_slots
if self.config.replay_buffer_num_slots > 0
else 1
),
replay_ratio=self.config.replay_ratio,
replay_mode=ReplayMode.LOCKSTEP,
)

# This variable is used to keep track of the statistics from the most recent
# update of the learner group
Expand Down Expand Up @@ -1078,8 +1081,11 @@ def _process_experiences_old_api_stack(
batch = batch.decompress_if_needed()
# Only make a pass through the buffer, if replay proportion is > 0.0 (and
# we actually have one).
self.local_mixin_buffer.add(batch)
batch = self.local_mixin_buffer.replay(_ALL_POLICIES)
if self.local_mixin_buffer:
self.local_mixin_buffer.add(batch)
batch = self.local_mixin_buffer.replay(_ALL_POLICIES)
else:
batch = batch.copy()
if batch:
processed_batches.append(batch)

Expand Down