diff --git a/rllib/algorithms/sac/torch/sac_torch_learner.py b/rllib/algorithms/sac/torch/sac_torch_learner.py index a44685f3cd7d..62c4a03ecadb 100644 --- a/rllib/algorithms/sac/torch/sac_torch_learner.py +++ b/rllib/algorithms/sac/torch/sac_torch_learner.py @@ -1,4 +1,4 @@ -from typing import Dict, Mapping +from typing import Dict from ray.rllib.algorithms.algorithm_config import AlgorithmConfig from ray.rllib.algorithms.dqn.torch.dqn_rainbow_torch_learner import ( @@ -108,7 +108,7 @@ def compute_loss_for_module( module_id: ModuleID, config: SACConfig, batch: NestedDict, - fwd_out: Mapping[str, TensorType] + fwd_out: Dict[str, TensorType] ) -> TensorType: # Only for debugging. deterministic = config._deterministic_loss diff --git a/rllib/core/models/specs/checker.py b/rllib/core/models/specs/checker.py index ea0b43350d4a..19f9aa9146d2 100644 --- a/rllib/core/models/specs/checker.py +++ b/rllib/core/models/specs/checker.py @@ -1,7 +1,7 @@ import functools import logging from collections import abc -from typing import Union, Mapping, Any, Callable +from typing import Any, Callable, Dict, Union from ray.rllib.core.models.specs.specs_base import Spec, TypeSpec from ray.rllib.core.models.specs.specs_dict import SpecDict @@ -127,7 +127,7 @@ def _validate( *, cls_instance: object, method: Callable, - data: Mapping[str, Any], + data: Dict[str, Any], spec: Spec, filter: bool = False, tag: str = "input", diff --git a/rllib/core/models/specs/specs_dict.py b/rllib/core/models/specs/specs_dict.py index 769fbd91aae3..3defd89770bd 100644 --- a/rllib/core/models/specs/specs_dict.py +++ b/rllib/core/models/specs/specs_dict.py @@ -1,4 +1,4 @@ -from typing import Union, Mapping, Any +from typing import Any, Dict, Union from ray.rllib.utils.annotations import ExperimentalAPI, override from ray.rllib.utils.nested_dict import NestedDict @@ -18,7 +18,7 @@ "{} has type {} (expected type {})." ) -DATA_TYPE = Union[NestedDict[Any], Mapping[str, Any]] +DATA_TYPE = Union[NestedDict[Any], Dict[str, Any]] IS_NOT_PROPERTY = "Spec {} must be a property of the class {}." diff --git a/rllib/core/rl_module/tf/tests/test_tf_rl_module.py b/rllib/core/rl_module/tf/tests/test_tf_rl_module.py index f8695e04b6a9..7005966651c3 100644 --- a/rllib/core/rl_module/tf/tests/test_tf_rl_module.py +++ b/rllib/core/rl_module/tf/tests/test_tf_rl_module.py @@ -1,6 +1,5 @@ import tempfile import unittest -from typing import Mapping import gymnasium as gym import tensorflow as tf @@ -53,7 +52,7 @@ def test_forward_train(self): ) loss = -tf.math.reduce_mean(action_dist.logp(actions)) - self.assertIsInstance(output, Mapping) + self.assertIsInstance(output, dict) grads = tape.gradient(loss, module.trainable_variables) diff --git a/rllib/core/rl_module/torch/tests/test_torch_rl_module.py b/rllib/core/rl_module/torch/tests/test_torch_rl_module.py index 67fe4fbb875d..08646ae66e7f 100644 --- a/rllib/core/rl_module/torch/tests/test_torch_rl_module.py +++ b/rllib/core/rl_module/torch/tests/test_torch_rl_module.py @@ -1,6 +1,5 @@ import tempfile import unittest -from typing import Mapping import gc import gymnasium as gym @@ -48,7 +47,7 @@ def test_forward_train(self): ) output = module.forward_train({"obs": obs}) - self.assertIsInstance(output, Mapping) + self.assertIsInstance(output, dict) self.assertIn(Columns.ACTION_DIST_INPUTS, output) action_dist_inputs = output[Columns.ACTION_DIST_INPUTS] diff --git a/rllib/core/testing/tf/bc_learner.py b/rllib/core/testing/tf/bc_learner.py index 8276e7212fd6..cef9033fe5a1 100644 --- a/rllib/core/testing/tf/bc_learner.py +++ b/rllib/core/testing/tf/bc_learner.py @@ -1,5 +1,5 @@ import tensorflow as tf -from typing import Mapping, TYPE_CHECKING +from typing import Dict, TYPE_CHECKING from ray.rllib.core.columns import Columns from ray.rllib.core.learner.tf.tf_learner import TfLearner @@ -18,7 +18,7 @@ def compute_loss_for_module( module_id: ModuleID, config: "AlgorithmConfig", batch: NestedDict, - fwd_out: Mapping[str, TensorType], + fwd_out: Dict[str, TensorType], ) -> TensorType: BaseTestingLearner.compute_loss_for_module( self, diff --git a/rllib/core/testing/tf/bc_module.py b/rllib/core/testing/tf/bc_module.py index b7a98ae22431..a5bbd7f0e9f1 100644 --- a/rllib/core/testing/tf/bc_module.py +++ b/rllib/core/testing/tf/bc_module.py @@ -1,5 +1,5 @@ import tensorflow as tf -from typing import Any, Mapping +from typing import Any, Dict from ray.rllib.core.columns import Columns from ray.rllib.core.models.specs.typing import SpecType @@ -54,30 +54,30 @@ def output_specs_inference(self) -> SpecType: def output_specs_train(self) -> SpecType: return [Columns.ACTION_DIST_INPUTS] - def _forward_shared(self, batch: NestedDict) -> Mapping[str, Any]: + def _forward_shared(self, batch: NestedDict) -> Dict[str, Any]: # We can use a shared forward method because BC does not need to distinguish # between train, inference, and exploration. action_logits = self.policy(batch["obs"]) return {Columns.ACTION_DIST_INPUTS: action_logits} @override(RLModule) - def _forward_inference(self, batch: NestedDict) -> Mapping[str, Any]: + def _forward_inference(self, batch: NestedDict) -> Dict[str, Any]: return self._forward_shared(batch) @override(RLModule) - def _forward_exploration(self, batch: NestedDict) -> Mapping[str, Any]: + def _forward_exploration(self, batch: NestedDict) -> Dict[str, Any]: return self._forward_shared(batch) @override(RLModule) - def _forward_train(self, batch: NestedDict) -> Mapping[str, Any]: + def _forward_train(self, batch: NestedDict) -> Dict[str, Any]: return self._forward_shared(batch) @override(RLModule) - def get_state(self, inference_only: bool = False) -> Mapping[str, Any]: + def get_state(self, inference_only: bool = False) -> Dict[str, Any]: return {"policy": self.policy.get_weights()} @override(RLModule) - def set_state(self, state: Mapping[str, Any]) -> None: + def set_state(self, state: Dict[str, Any]) -> None: self.policy.set_weights(state["policy"]) diff --git a/rllib/core/testing/torch/bc_learner.py b/rllib/core/testing/torch/bc_learner.py index 070e6f74b8b7..125e28e65e2f 100644 --- a/rllib/core/testing/torch/bc_learner.py +++ b/rllib/core/testing/torch/bc_learner.py @@ -1,5 +1,5 @@ import torch -from typing import Mapping, TYPE_CHECKING +from typing import Dict, TYPE_CHECKING from ray.rllib.core.columns import Columns from ray.rllib.core.learner.torch.torch_learner import TorchLearner @@ -18,7 +18,7 @@ def compute_loss_for_module( module_id: ModuleID, config: "AlgorithmConfig", batch: NestedDict, - fwd_out: Mapping[str, TensorType], + fwd_out: Dict[str, TensorType], ) -> TensorType: BaseTestingLearner.compute_loss_for_module( self, diff --git a/rllib/core/testing/torch/bc_module.py b/rllib/core/testing/torch/bc_module.py index a5d543d47549..542a7347c356 100644 --- a/rllib/core/testing/torch/bc_module.py +++ b/rllib/core/testing/torch/bc_module.py @@ -1,4 +1,4 @@ -from typing import Any, Mapping +from typing import Any, Dict from ray.rllib.core.columns import Columns from ray.rllib.core.rl_module.rl_module import RLModule, RLModuleConfig @@ -55,17 +55,17 @@ def output_specs_train(self) -> SpecType: return [Columns.ACTION_DIST_INPUTS] @override(RLModule) - def _forward_inference(self, batch: NestedDict) -> Mapping[str, Any]: + def _forward_inference(self, batch: NestedDict) -> Dict[str, Any]: with torch.no_grad(): return self._forward_train(batch) @override(RLModule) - def _forward_exploration(self, batch: NestedDict) -> Mapping[str, Any]: + def _forward_exploration(self, batch: NestedDict) -> Dict[str, Any]: with torch.no_grad(): return self._forward_train(batch) @override(RLModule) - def _forward_train(self, batch: NestedDict) -> Mapping[str, Any]: + def _forward_train(self, batch: NestedDict) -> Dict[str, Any]: action_logits = self.policy(batch["obs"]) return {Columns.ACTION_DIST_INPUTS: action_logits} diff --git a/rllib/examples/learners/train_w_bc_finetune_w_ppo.py b/rllib/examples/learners/train_w_bc_finetune_w_ppo.py index def8f0b4597e..d4186f6b23cb 100644 --- a/rllib/examples/learners/train_w_bc_finetune_w_ppo.py +++ b/rllib/examples/learners/train_w_bc_finetune_w_ppo.py @@ -2,12 +2,12 @@ This example shows how to pretrain an RLModule using behavioral cloning from offline data and, thereafter, continue training it online with PPO (fine-tuning). """ +from typing import Dict import gymnasium as gym import shutil import tempfile import torch -from typing import Mapping import ray from ray import tune @@ -49,7 +49,7 @@ def __init__( self.distribution_cls = distribution_cls def forward( - self, batch: Mapping[str, torch.Tensor] + self, batch: Dict[str, torch.Tensor] ) -> torch.distributions.Distribution: """Return an action distribution output by the policy network. diff --git a/rllib/examples/rl_modules/classes/random_rlm.py b/rllib/examples/rl_modules/classes/random_rlm.py index 05af1df92a1f..994485307df6 100644 --- a/rllib/examples/rl_modules/classes/random_rlm.py +++ b/rllib/examples/rl_modules/classes/random_rlm.py @@ -1,5 +1,5 @@ import pathlib -from typing import Mapping, Any +from typing import Any, Dict import gymnasium as gym import numpy as np @@ -59,7 +59,7 @@ def from_model_config( observation_space: gym.Space, action_space: gym.Space, *, - model_config_dict: Mapping[str, Any], + model_config_dict: Dict[str, Any], ) -> "RLModule": return cls(action_space) diff --git a/rllib/models/tf/tf_distributions.py b/rllib/models/tf/tf_distributions.py index 1a9df392d6c1..a99898f53e7f 100644 --- a/rllib/models/tf/tf_distributions.py +++ b/rllib/models/tf/tf_distributions.py @@ -6,7 +6,7 @@ import gymnasium as gym import tree import numpy as np -from typing import Optional, List, Mapping, Iterable, Dict +from typing import Dict, Iterable, List, Optional import abc @@ -499,7 +499,7 @@ def required_input_dim(space: gym.Space, input_lens: List[int], **kwargs) -> int def from_logits( cls, logits: tf.Tensor, - child_distribution_cls_struct: Union[Mapping, Iterable], + child_distribution_cls_struct: Union[Dict, Iterable], input_lens: Union[Dict, List[int]], space: gym.Space, **kwargs, diff --git a/rllib/models/torch/torch_distributions.py b/rllib/models/torch/torch_distributions.py index 45af69c89b27..4cda3d7f4f61 100644 --- a/rllib/models/torch/torch_distributions.py +++ b/rllib/models/torch/torch_distributions.py @@ -5,7 +5,7 @@ """ import gymnasium as gym import numpy as np -from typing import Optional, List, Mapping, Iterable, Dict +from typing import Dict, Iterable, List, Optional import tree import abc @@ -613,7 +613,7 @@ def required_input_dim( def from_logits( cls, logits: torch.Tensor, - child_distribution_cls_struct: Union[Mapping, Iterable], + child_distribution_cls_struct: Union[Dict, Iterable], input_lens: Union[Dict, List[int]], space: gym.Space, **kwargs, diff --git a/rllib/policy/policy.py b/rllib/policy/policy.py index 4d4b6d5d3ad6..21987234484a 100644 --- a/rllib/policy/policy.py +++ b/rllib/policy/policy.py @@ -10,7 +10,6 @@ Collection, Dict, List, - Mapping, Optional, Tuple, Type, @@ -1811,7 +1810,7 @@ def __repr__(self): @OldAPIStack def get_gym_space_from_struct_of_tensors( - value: Union[Mapping, Tuple, List, TensorType], + value: Union[Dict, Tuple, List, TensorType], batched_input=True, ) -> gym.Space: start_idx = 1 if batched_input else 0 @@ -1827,7 +1826,7 @@ def get_gym_space_from_struct_of_tensors( @OldAPIStack def get_gym_space_from_struct_of_spaces(value: Union[Dict, Tuple]) -> gym.spaces.Dict: - if isinstance(value, Mapping): + if isinstance(value, dict): return gym.spaces.Dict( {k: get_gym_space_from_struct_of_spaces(v) for k, v in value.items()} ) diff --git a/rllib/policy/sample_batch.py b/rllib/policy/sample_batch.py index 10ebbe96b0f0..811612c4da6f 100644 --- a/rllib/policy/sample_batch.py +++ b/rllib/policy/sample_batch.py @@ -1315,8 +1315,8 @@ class MultiAgentBatch: """A batch of experiences from multiple agents in the environment. Attributes: - policy_batches (Dict[PolicyID, SampleBatch]): Mapping from policy - ids to SampleBatches of experiences. + policy_batches (Dict[PolicyID, SampleBatch]): Dict mapping policy IDs to + SampleBatches of experiences. count: The number of env steps in this batch. """ @@ -1325,8 +1325,7 @@ def __init__(self, policy_batches: Dict[PolicyID, SampleBatch], env_steps: int): """Initialize a MultiAgentBatch instance. Args: - policy_batches: Mapping from policy - ids to SampleBatches of experiences. + policy_batches: Dict mapping policy IDs to SampleBatches of experiences. env_steps: The number of environment steps in the environment this batch contains. This will be less than the number of transitions this batch contains across all policies in total. diff --git a/rllib/utils/actor_manager.py b/rllib/utils/actor_manager.py index 1ee0d247e907..c933f3783f70 100644 --- a/rllib/utils/actor_manager.py +++ b/rllib/utils/actor_manager.py @@ -4,7 +4,7 @@ import logging import sys import time -from typing import Any, Callable, Dict, Iterator, List, Mapping, Optional, Tuple, Union +from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Union import ray from ray.actor import ActorHandle @@ -261,14 +261,14 @@ def __init__( self._next_id = init_id # Actors are stored in a map and indexed by a unique (int) ID. - self._actors: Mapping[int, ActorHandle] = {} - self._remote_actor_states: Mapping[int, self._ActorState] = {} + self._actors: Dict[int, ActorHandle] = {} + self._remote_actor_states: Dict[int, self._ActorState] = {} self._restored_actors = set() self.add_actors(actors or []) # Maps outstanding async requests to the IDs of the actor IDs that # are executing them. - self._in_flight_req_to_actor_id: Mapping[ray.ObjectRef, int] = {} + self._in_flight_req_to_actor_id: Dict[ray.ObjectRef, int] = {} self._max_remote_requests_in_flight_per_actor = ( max_remote_requests_in_flight_per_actor diff --git a/rllib/utils/debug/summary.py b/rllib/utils/debug/summary.py index e0e6737a63c0..57ff0f06e982 100644 --- a/rllib/utils/debug/summary.py +++ b/rllib/utils/debug/summary.py @@ -1,6 +1,6 @@ import numpy as np import pprint -from typing import Any, Mapping +from typing import Any from ray.rllib.policy.sample_batch import SampleBatch, MultiAgentBatch from ray.rllib.utils.annotations import DeveloperAPI @@ -26,7 +26,7 @@ def summarize(obj: Any) -> Any: def _summarize(obj): - if isinstance(obj, Mapping): + if isinstance(obj, dict): return {k: _summarize(v) for k, v in obj.items()} elif hasattr(obj, "_asdict"): return { diff --git a/rllib/utils/policy.py b/rllib/utils/policy.py index 4ec72408f8a8..80d998601ee9 100644 --- a/rllib/utils/policy.py +++ b/rllib/utils/policy.py @@ -6,7 +6,6 @@ Callable, Dict, List, - Mapping, Optional, Tuple, Type, @@ -183,7 +182,7 @@ def local_policy_inference( reward: Optional[float] = None, terminated: Optional[bool] = None, truncated: Optional[bool] = None, - info: Optional[Mapping] = None, + info: Optional[Dict] = None, explore: bool = None, timestep: Optional[int] = None, ) -> TensorStructType: