Skip to content

Commit

Permalink
[RLlib] Rename instances of Impala to IMPALA and Appo to APPO. (
Browse files Browse the repository at this point in the history
  • Loading branch information
sven1977 authored Jul 16, 2024
1 parent 1b0af29 commit 5e16fd0
Show file tree
Hide file tree
Showing 26 changed files with 110 additions and 76 deletions.
9 changes: 8 additions & 1 deletion rllib/algorithms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@
from ray.rllib.algorithms.bc.bc import BC, BCConfig
from ray.rllib.algorithms.cql.cql import CQL, CQLConfig
from ray.rllib.algorithms.dqn.dqn import DQN, DQNConfig
from ray.rllib.algorithms.impala.impala import Impala, ImpalaConfig
from ray.rllib.algorithms.impala.impala import (
IMPALA,
IMPALAConfig,
Impala,
ImpalaConfig,
)
from ray.rllib.algorithms.marwil.marwil import MARWIL, MARWILConfig
from ray.rllib.algorithms.ppo.ppo import PPO, PPOConfig
from ray.rllib.algorithms.sac.sac import SAC, SACConfig
Expand All @@ -21,6 +26,8 @@
"CQLConfig",
"DQN",
"DQNConfig",
"IMPALA",
"IMPALAConfig",
"Impala",
"ImpalaConfig",
"MARWIL",
Expand Down
20 changes: 10 additions & 10 deletions rllib/algorithms/appo/appo.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import logging

from ray.rllib.algorithms.algorithm_config import AlgorithmConfig, NotProvided
from ray.rllib.algorithms.impala.impala import Impala, ImpalaConfig
from ray.rllib.algorithms.impala.impala import IMPALA, IMPALAConfig
from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
from ray.rllib.policy.policy import Policy
from ray.rllib.utils.annotations import override
Expand All @@ -38,7 +38,7 @@
OLD_ACTION_DIST_LOGITS_KEY = "old_action_dist_logits"


class APPOConfig(ImpalaConfig):
class APPOConfig(IMPALAConfig):
"""Defines a configuration class from which an APPO Algorithm can be built.
.. testcode::
Expand Down Expand Up @@ -98,7 +98,7 @@ def __init__(self, algo_class=None):
self.kl_coeff = 1.0
self.kl_target = 0.01

# Override some of ImpalaConfig's default values with APPO-specific values.
# Override some of IMPALAConfig's default values with APPO-specific values.
self.num_env_runners = 2
self.min_time_s_per_iteration = 10
self.num_gpus = 0
Expand Down Expand Up @@ -142,7 +142,7 @@ def __init__(self, algo_class=None):
# __sphinx_doc_end__
# fmt: on

@override(ImpalaConfig)
@override(IMPALAConfig)
def training(
self,
*,
Expand Down Expand Up @@ -217,7 +217,7 @@ def training(

return self

@override(ImpalaConfig)
@override(IMPALAConfig)
def get_default_learner_class(self):
if self.framework_str == "torch":
from ray.rllib.algorithms.appo.torch.appo_torch_learner import (
Expand All @@ -235,7 +235,7 @@ def get_default_learner_class(self):
"Use either 'torch' or 'tf2'."
)

@override(ImpalaConfig)
@override(IMPALAConfig)
def get_default_rl_module_spec(self) -> SingleAgentRLModuleSpec:
if self.framework_str == "torch":
from ray.rllib.algorithms.appo.torch.appo_torch_rl_module import (
Expand All @@ -261,7 +261,7 @@ def _model_config_auto_includes(self):
return super()._model_config_auto_includes | {"vf_share_layers": False}


class APPO(Impala):
class APPO(IMPALA):
def __init__(self, config, *args, **kwargs):
"""Initializes an APPO instance."""
super().__init__(config, *args, **kwargs)
Expand All @@ -275,7 +275,7 @@ def __init__(self, config, *args, **kwargs):
lambda p, _: p.update_target()
)

@override(Impala)
@override(IMPALA)
def training_step(self) -> ResultDict:
train_results = super().training_step()

Expand Down Expand Up @@ -336,12 +336,12 @@ def update(pi, pi_id):
return train_results

@classmethod
@override(Impala)
@override(IMPALA)
def get_default_config(cls) -> AlgorithmConfig:
return APPOConfig()

@classmethod
@override(Impala)
@override(IMPALA)
def get_default_policy_class(
cls, config: AlgorithmConfig
) -> Optional[Type[Policy]]:
Expand Down
13 changes: 8 additions & 5 deletions rllib/algorithms/appo/appo_learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Any, Dict

from ray.rllib.algorithms.appo.appo import APPOConfig
from ray.rllib.algorithms.impala.impala_learner import ImpalaLearner
from ray.rllib.algorithms.impala.impala_learner import IMPALALearner
from ray.rllib.core.learner.learner import Learner
from ray.rllib.utils.annotations import override
from ray.rllib.utils.lambda_defaultdict import LambdaDefaultDict
Expand All @@ -16,13 +16,13 @@
from ray.rllib.utils.typing import ModuleID


class AppoLearner(ImpalaLearner):
"""Adds KL coeff updates via `after_gradient_based_update()` to Impala logic.
class APPOLearner(IMPALALearner):
"""Adds KL coeff updates via `after_gradient_based_update()` to IMPALA logic.
Framework-specific subclasses must override `_update_module_kl_coeff()`.
"""

@override(ImpalaLearner)
@override(IMPALALearner)
def build(self):
super().build()

Expand All @@ -41,7 +41,7 @@ def build(self):
)
)

@override(ImpalaLearner)
@override(IMPALALearner)
def remove_module(self, module_id: str):
super().remove_module(module_id)
self.curr_kl_coeffs_per_module.pop(module_id)
Expand Down Expand Up @@ -102,3 +102,6 @@ def _update_module_kl_coeff(self, module_id: ModuleID, config: APPOConfig) -> No
module_id: The module whose KL loss coefficient to update.
config: The AlgorithmConfig specific to the given `module_id`.
"""


AppoLearner = APPOLearner
12 changes: 6 additions & 6 deletions rllib/algorithms/appo/tf/appo_tf_learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
LEARNER_RESULTS_KL_KEY,
OLD_ACTION_DIST_LOGITS_KEY,
)
from ray.rllib.algorithms.appo.appo_learner import AppoLearner
from ray.rllib.algorithms.impala.tf.impala_tf_learner import ImpalaTfLearner
from ray.rllib.algorithms.appo.appo_learner import APPOLearner
from ray.rllib.algorithms.impala.tf.impala_tf_learner import IMPALATfLearner
from ray.rllib.algorithms.impala.tf.vtrace_tf_v2 import make_time_major, vtrace_tf2
from ray.rllib.core.columns import Columns
from ray.rllib.core.learner.learner import POLICY_LOSS_KEY, VF_LOSS_KEY, ENTROPY_KEY
Expand All @@ -20,10 +20,10 @@
_, tf, _ = try_import_tf()


class APPOTfLearner(AppoLearner, ImpalaTfLearner):
"""Implements APPO loss / update logic on top of ImpalaTfLearner."""
class APPOTfLearner(APPOLearner, IMPALATfLearner):
"""Implements APPO loss / update logic on top of IMPALATfLearner."""

@override(ImpalaTfLearner)
@override(IMPALATfLearner)
def compute_loss_for_module(
self,
*,
Expand Down Expand Up @@ -180,7 +180,7 @@ def compute_loss_for_module(
# Return the total loss.
return total_loss

@override(AppoLearner)
@override(APPOLearner)
def _update_module_kl_coeff(self, module_id: ModuleID, config: APPOConfig) -> None:
# Update the current KL value based on the recently measured value.
# Increase.
Expand Down
12 changes: 6 additions & 6 deletions rllib/algorithms/appo/torch/appo_torch_learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
LEARNER_RESULTS_KL_KEY,
OLD_ACTION_DIST_LOGITS_KEY,
)
from ray.rllib.algorithms.appo.appo_learner import AppoLearner
from ray.rllib.algorithms.impala.torch.impala_torch_learner import ImpalaTorchLearner
from ray.rllib.algorithms.appo.appo_learner import APPOLearner
from ray.rllib.algorithms.impala.torch.impala_torch_learner import IMPALATorchLearner
from ray.rllib.algorithms.impala.torch.vtrace_torch_v2 import (
make_time_major,
vtrace_torch,
Expand All @@ -32,10 +32,10 @@
torch, nn = try_import_torch()


class APPOTorchLearner(AppoLearner, ImpalaTorchLearner):
"""Implements APPO loss / update logic on top of ImpalaTorchLearner."""
class APPOTorchLearner(APPOLearner, IMPALATorchLearner):
"""Implements APPO loss / update logic on top of IMPALATorchLearner."""

@override(ImpalaTorchLearner)
@override(IMPALATorchLearner)
def compute_loss_for_module(
self,
*,
Expand Down Expand Up @@ -219,7 +219,7 @@ def _make_modules_ddp_if_necessary(self) -> None:
override=True,
)

@override(AppoLearner)
@override(APPOLearner)
def _update_module_kl_coeff(self, module_id: ModuleID, config: APPOConfig) -> None:
# Update the current KL value based on the recently measured value.
# Increase.
Expand Down
14 changes: 11 additions & 3 deletions rllib/algorithms/impala/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,22 @@
from ray.rllib.algorithms.impala.impala import Impala, ImpalaConfig
from ray.rllib.algorithms.impala.impala import (
IMPALA,
IMPALAConfig,
Impala,
ImpalaConfig,
)
from ray.rllib.algorithms.impala.impala_tf_policy import (
ImpalaTF1Policy,
ImpalaTF2Policy,
)
from ray.rllib.algorithms.impala.impala_torch_policy import ImpalaTorchPolicy

__all__ = [
"ImpalaConfig",
"Impala",
"IMPALAConfig",
"IMPALA",
"ImpalaTF1Policy",
"ImpalaTF2Policy",
"ImpalaTorchPolicy",
# Deprecated names (lowercase)
"ImpalaConfig",
"Impala",
]
18 changes: 12 additions & 6 deletions rllib/algorithms/impala/impala.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@
LEARNER_RESULTS_CURR_ENTROPY_COEFF_KEY = "curr_entropy_coeff"


class ImpalaConfig(AlgorithmConfig):
class IMPALAConfig(AlgorithmConfig):
"""Defines a configuration class from which an Impala can be built.
.. testcode::
Expand Down Expand Up @@ -492,14 +492,14 @@ def minibatch_size(self):
def get_default_learner_class(self):
if self.framework_str == "torch":
from ray.rllib.algorithms.impala.torch.impala_torch_learner import (
ImpalaTorchLearner,
IMPALATorchLearner,
)

return ImpalaTorchLearner
return IMPALATorchLearner
elif self.framework_str == "tf2":
from ray.rllib.algorithms.impala.tf.impala_tf_learner import ImpalaTfLearner
from ray.rllib.algorithms.impala.tf.impala_tf_learner import IMPALATfLearner

return ImpalaTfLearner
return IMPALATfLearner
else:
raise ValueError(
f"The framework {self.framework_str} is not supported. "
Expand Down Expand Up @@ -531,7 +531,10 @@ def get_default_rl_module_spec(self) -> SingleAgentRLModuleSpec:
)


class Impala(Algorithm):
ImpalaConfig = IMPALAConfig


class IMPALA(Algorithm):
"""Importance weighted actor/learner architecture (IMPALA) Algorithm
== Overview of data flow in IMPALA ==
Expand Down Expand Up @@ -1448,6 +1451,9 @@ def _compile_iteration_results_old_and_hybrid_api_stacks(self, *args, **kwargs):
return result


Impala = IMPALA


@DeveloperAPI
@ray.remote(num_cpus=0, max_restarts=-1)
class AggregationWorker(FaultAwareApply):
Expand Down
5 changes: 4 additions & 1 deletion rllib/algorithms/impala/impala_learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
QUEUE_SIZE_RESULTS_QUEUE = "queue_size_results_queue"


class ImpalaLearner(Learner):
class IMPALALearner(Learner):
@override(Learner)
def build(self) -> None:
super().build()
Expand Down Expand Up @@ -174,6 +174,9 @@ def remove_module(self, module_id: str):
self.entropy_coeff_schedulers_per_module.pop(module_id)


ImpalaLearner = IMPALALearner


class _GPULoaderThread(threading.Thread):
def __init__(
self,
Expand Down
4 changes: 2 additions & 2 deletions rllib/algorithms/impala/impala_torch_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,11 +231,11 @@ class ImpalaTorchPolicy(
ValueNetworkMixin,
TorchPolicyV2,
):
"""PyTorch policy class used with Impala."""
"""PyTorch policy class used with IMPALA."""

def __init__(self, observation_space, action_space, config):
config = dict(
ray.rllib.algorithms.impala.impala.ImpalaConfig().to_dict(), **config
ray.rllib.algorithms.impala.impala.IMPALAConfig().to_dict(), **config
)

# If Learner API is used, we don't need any loss-specific mixins.
Expand Down
6 changes: 3 additions & 3 deletions rllib/algorithms/impala/tests/test_impala.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ def tearDownClass(cls) -> None:
ray.shutdown()

def test_impala_compilation(self):
"""Test whether Impala can be built with both frameworks."""
"""Test whether IMPALA can be built with both frameworks."""
config = (
impala.ImpalaConfig()
impala.IMPALAConfig()
.environment("CartPole-v1")
.resources(num_gpus=0)
.env_runners(num_env_runners=2)
Expand Down Expand Up @@ -68,7 +68,7 @@ def test_impala_lr_schedule(self):
# Test whether we correctly ignore the "lr" setting.
# The first lr should be 0.05.
config = (
impala.ImpalaConfig()
impala.IMPALAConfig()
.resources(num_gpus=0)
.training(
lr=0.1,
Expand Down
11 changes: 7 additions & 4 deletions rllib/algorithms/impala/tf/impala_tf_learner.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from typing import Dict

import tree
from ray.rllib.algorithms.impala.impala import ImpalaConfig
from ray.rllib.algorithms.impala.impala_learner import ImpalaLearner
from ray.rllib.algorithms.impala.impala import IMPALAConfig
from ray.rllib.algorithms.impala.impala_learner import IMPALALearner
from ray.rllib.algorithms.impala.tf.vtrace_tf_v2 import make_time_major, vtrace_tf2
from ray.rllib.core import DEFAULT_MODULE_ID
from ray.rllib.core.columns import Columns
Expand All @@ -17,15 +17,15 @@
_, tf, _ = try_import_tf()


class ImpalaTfLearner(ImpalaLearner, TfLearner):
class IMPALATfLearner(IMPALALearner, TfLearner):
"""Implements the IMPALA loss function in tensorflow."""

@override(TfLearner)
def compute_loss_for_module(
self,
*,
module_id: ModuleID,
config: ImpalaConfig,
config: IMPALAConfig,
batch: NestedDict,
fwd_out: Dict[str, TensorType],
) -> TensorType:
Expand Down Expand Up @@ -151,3 +151,6 @@ def _compute_values(self, batch):
vf_out = module.vf(encoder_outs[ENCODER_OUT][CRITIC])
# Squeeze out last dimension (single node value head).
return tf.squeeze(vf_out, -1)


ImpalaTfLearner = IMPALATfLearner
Loading

0 comments on commit 5e16fd0

Please sign in to comment.