Open
Description
What happened + What you expected to happen
MBMPO fails to reproduce. Below I show an error using Pendulum task config from https://github.com/ray-project/ray/blob/master/rllib/tuned_examples/mbmpo/pendulum-mbmpo.yaml
Failure # 1 (occurred at 2023-08-10_14-43-28)
The actor died because of an error raised in its creation task, ray::MBMPO.__init__() (pid=37903, ip=172.17.0.4, actor_id=b859af4813e75b517b9ee23901000000, repr=MBMPO)
File "/home/user/.local/lib/python3.7/site-packages/ray/rllib/utils/deprecation.py", line 106, in patched_init
return obj_init(*args, **kwargs)
File "/home/user/.local/lib/python3.7/site-packages/ray/rllib/algorithms/algorithm.py", line 520, in __init__
**kwargs,
File "/home/user/.local/lib/python3.7/site-packages/ray/tune/trainable/trainable.py", line 169, in __init__
self.setup(copy.deepcopy(self.config))
File "/home/user/.local/lib/python3.7/site-packages/ray/rllib/algorithms/algorithm.py", line 646, in setup
logdir=self.logdir,
File "/home/user/.local/lib/python3.7/site-packages/ray/rllib/evaluation/worker_set.py", line 161, in __init__
local_worker=local_worker,
File "/home/user/.local/lib/python3.7/site-packages/ray/rllib/evaluation/worker_set.py", line 254, in _setup
spaces=spaces,
File "/home/user/.local/lib/python3.7/site-packages/ray/rllib/evaluation/worker_set.py", line 935, in _make_worker
dataset_shards=self._ds_shards,
File "/home/user/.local/lib/python3.7/site-packages/ray/rllib/evaluation/rollout_worker.py", line 525, in __init__
self._update_policy_map(policy_dict=self.policy_dict)
File "/home/user/.local/lib/python3.7/site-packages/ray/rllib/evaluation/rollout_worker.py", line 1730, in _update_policy_map
policy_states=policy_states,
File "/home/user/.local/lib/python3.7/site-packages/ray/rllib/evaluation/rollout_worker.py", line 1847, in _build_policy_map
seed=self.seed,
File "/home/user/.local/lib/python3.7/site-packages/ray/rllib/utils/policy.py", line 142, in create_policy_for_framework
return policy_class(observation_space, action_space, merged_config)
File "/home/user/.local/lib/python3.7/site-packages/ray/rllib/algorithms/mbmpo/mbmpo_torch_policy.py", line 36, in __init__
super().__init__(observation_space, action_space, config)
File "/home/user/.local/lib/python3.7/site-packages/ray/rllib/algorithms/maml/maml_torch_policy.py", line 318, in __init__
self._initialize_loss_from_dummy_batch()
File "/home/user/.local/lib/python3.7/site-packages/ray/rllib/policy/policy.py", line 1506, in _initialize_loss_from_dummy_batch
self.loss(self.model, self.dist_class, train_batch)
File "/home/user/.local/lib/python3.7/site-packages/ray/rllib/algorithms/maml/maml_torch_policy.py", line 394, in loss
meta_opt=self.meta_opt,
File "/home/user/.local/lib/python3.7/site-packages/ray/rllib/algorithms/maml/maml_torch_policy.py", line 175, in __init__
self.obs = self.split_placeholders(obs, split)
File "/home/user/.local/lib/python3.7/site-packages/ray/rllib/algorithms/maml/maml_torch_policy.py", line 271, in split_placeholders
placeholder, torch.sum(split, dim=1).tolist(), dim=0
File "/opt/conda/lib/python3.7/site-packages/torch/functional.py", line 189, in split
return tensor.split(split_size_or_sections, dim)
File "/opt/conda/lib/python3.7/site-packages/torch/_tensor.py", line 611, in split
return super(Tensor, self).split_with_sizes(split_size, dim)
RuntimeError: split_with_sizes expects split_sizes to sum exactly to 32 (input tensor's size at dimension 0), but got split_sizes=[30]
Versions / Dependencies
Ray 2.6.2 (also 2.2.0, 2.0.0, etc.)
pytorch 1.12.1
Python 3.7.13
Ubuntu 18.04 (Docker container)
Reproduction script
""" pip install higher """
import yaml
import ray
from ray import air, tune
if __name__ == "__main__":
ray.init()
config = yaml.load(open("pendulum-mbmpo.yaml"), Loader=yaml.FullLoader)
config = config["pendulum-mbmpo"]
run = config.pop("run")
print("Run:", run)
env = config.pop("env")
stop = config.pop("stop")
checkpoint_config=air.CheckpointConfig(
checkpoint_at_end=True,
checkpoint_frequency=10
)
config = config["config"]
config["env"] = env
tuner = tune.Tuner(
run,
param_space=config,
run_config=air.RunConfig(stop=stop, checkpoint_config=checkpoint_config, verbose=2),
)
analysis = tuner.fit()
best_result = analysis.get_best_result()
print("Best config:", best_result.config.items())
ray.shutdown()
Issue Severity
High: It blocks me from completing my task.