Skip to content

[Bug][tune] HyperOpt nested dict throws TypeError (switch argument was GarbageCollected) #19507

Closed
@krfricke

Description

Search before asking

  • I searched the issues and found no similar issues.

Ray Component

Ray Tune

What happened + What you expected to happen

HyperOpt can run into TypeError: ('switch argument was', <class 'hyperopt.pyll.base.GarbageCollected'>)

Full output:

Traceback (most recent call last):
  File "/Users/kai/.pyenv/versions/3.7.7/lib/python3.7/site-packages/hyperopt/pyll/base.py", line 869, in rec_eval
    int(switch_i)
TypeError: int() argument must be a string, a bytes-like object or a number, not 'type'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/kai/coding/sandbox/scratch.py", line 79, in <module>
    main()
  File "/Users/kai/coding/sandbox/scratch.py", line 72, in main
    "custom_model": FullyConnectedNetwork,
  File "/Users/kai/coding/ray/python/ray/tune/tune.py", line 601, in run
    runner.step()
  File "/Users/kai/coding/ray/python/ray/tune/trial_runner.py", line 661, in step
    next_trial = self._get_next_trial()  # blocking
  File "/Users/kai/coding/ray/python/ray/tune/trial_runner.py", line 805, in _get_next_trial
    self._update_trial_queue(blocking=wait_for_trial)
  File "/Users/kai/coding/ray/python/ray/tune/trial_runner.py", line 1276, in _update_trial_queue
    trial = self._search_alg.next_trial()
  File "/Users/kai/coding/ray/python/ray/tune/suggest/search_generator.py", line 90, in next_trial
    self._experiment.dir_name)
  File "/Users/kai/coding/ray/python/ray/tune/suggest/search_generator.py", line 97, in create_trial_if_possible
    suggested_config = self.searcher.suggest(trial_id)
  File "/Users/kai/coding/ray/python/ray/tune/suggest/suggestion.py", line 395, in suggest
    suggestion = self.searcher.suggest(trial_id)
  File "/Users/kai/coding/ray/python/ray/tune/suggest/hyperopt.py", line 300, in suggest
    print_node_on_error=self.domain.rec_eval_print_node_on_error)
  File "/Users/kai/.pyenv/versions/3.7.7/lib/python3.7/site-packages/hyperopt/pyll/base.py", line 871, in rec_eval
    raise TypeError("switch argument was", switch_i)
TypeError: ('switch argument was', <class 'hyperopt.pyll.base.GarbageCollected'>)

Versions / Dependencies

Latest master

Reproduction script

import ray
from ray.tune import run

from ray.rllib.agents.ppo import DEFAULT_CONFIG
from ray.rllib.models.tf.fcnet import FullyConnectedNetwork

from ray.tune.schedulers import ASHAScheduler
from ray.tune.suggest.hyperopt import HyperOptSearch
from ray.tune.suggest import ConcurrencyLimiter

from hyperopt import hp
from hyperopt.pyll import scope

from copy import deepcopy


def main():
    metric = "episode_reward_mean"
    mode = "max"
    hc = [[4, 4], [8, 8], [16, 16], [32, 32], [64, 64], [128, 128], [256, 256],
          [512, 512], [1024, 1024], [2048, 2048], [5012, 5012]]
    lc = [4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 5012]
    space = {
        # Exp Collection
        "train_batch_size": scope.int(hp.quniform("train_batch_size",
                                                  32, 10000, 1)),
        "sgd_minibatch_size": scope.int(hp.quniform("sgd_minibatch_size",
                                                    4, 8192, 1)),
        "num_sgd_iter": scope.int(hp.quniform("num_sgd_iter",
                                              3, 30, 1)),
        # Policy Updating
        "clip_param": hp.choice("clip_param", [0.1, 0.2, 0.3]),
        "kl_target": hp.quniform("kl_target", 3e-3, 3e-2, 1e-3),
        "kl_coeff": hp.quniform("kl_coeff", 0.3, 1.0, 0.1),
        "gamma": hp.quniform("gamma", 0.8, 0.9997, 1e-4),
        "lambda": hp.quniform("lambda", 0.9, 1.0, 0.1),
        # Loss
        "vf_loss_coeff": hp.quniform("vf_loss_coeff", 0.5, 1.0, 0.1),
        "entropy_coeff": hp.quniform("entropy_coeff", 0, 0.01, 1e-2),
        "lr": hp.quniform("lr", 5e-6, 3e-3, 1e-6),
        "model": {
            "fcnet_hiddens": hp.choice("fcnet_hiddens", hc),
            "use_lstm": hp.choice("use_lstm", [True, False]),
            "lstm_cell_size": hp.choice("lstm_cell_size", lc),
        }
    }

    best_configs = []
    base_cfg = DEFAULT_CONFIG
    base_cfg["model"] = {"use_lstm": False}
    remove = []
    for key in base_cfg:
        if key not in space:
            remove.append(key)
    [base_cfg.pop(key) for key in remove]
    best_configs.append(deepcopy(base_cfg))

    algo = HyperOptSearch(space, points_to_evaluate=best_configs,
                          metric=metric, mode=mode)
    algo = ConcurrencyLimiter(algo, 8)
    sched = ASHAScheduler(metric=metric, mode=mode)

    ray.init(include_dashboard=False, num_gpus=0)
    run("PPO",
        search_alg=algo,
        scheduler=sched,
        num_samples=-1,
        config={
            "env": "CartPole-v1",
            "num_workers": 1,
            "model": {
                "custom_model": FullyConnectedNetwork,
            },
        },
        )


if __name__ == "__main__":
    main()

Anything else

This seems to come from the nested model dict - we fixes a similar issue in #18113

Are you willing to submit a PR?

  • Yes I am willing to submit a PR!

Activity

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Metadata

Metadata

Assignees

Labels

bugSomething that is supposed to be working; but isn'tstaleThe issue is stale. It will be closed within 7 days unless there are further conversationtriageNeeds triage (eg: priority, bug/not-bug, and owning component)tuneTune-related issues

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions