From 9825da723380f42ab7a24f7784eb5255a154dac8 Mon Sep 17 00:00:00 2001 From: Yuhong Guo Date: Tue, 7 Aug 2018 04:35:26 +0800 Subject: [PATCH] Change training tasks to xray for Jenkins tests (#2567) --- python/ray/rllib/env/atari_wrappers.py | 10 +- .../rllib/examples/serving/cartpole_server.py | 6 +- python/ray/rllib/utils/actors.py | 6 +- python/ray/rllib/utils/policy_server.py | 6 +- test/jenkins_tests/run_multi_node_tests.sh | 98 +++++++++---------- 5 files changed, 71 insertions(+), 55 deletions(-) diff --git a/python/ray/rllib/env/atari_wrappers.py b/python/ray/rllib/env/atari_wrappers.py index f9bf5b94a3c7f..76f5d4f01402e 100644 --- a/python/ray/rllib/env/atari_wrappers.py +++ b/python/ray/rllib/env/atari_wrappers.py @@ -153,7 +153,10 @@ def __init__(self, env, dim): self.width = dim # in rllib we use 80 self.height = dim self.observation_space = spaces.Box( - low=0, high=255, shape=(self.height, self.width, 1)) + low=0, + high=255, + shape=(self.height, self.width, 1), + dtype=np.float32) def observation(self, frame): frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY) @@ -170,7 +173,10 @@ def __init__(self, env, k): self.frames = deque([], maxlen=k) shp = env.observation_space.shape self.observation_space = spaces.Box( - low=0, high=255, shape=(shp[0], shp[1], shp[2] * k)) + low=0, + high=255, + shape=(shp[0], shp[1], shp[2] * k), + dtype=np.float32) def reset(self): ob = self.env.reset() diff --git a/python/ray/rllib/examples/serving/cartpole_server.py b/python/ray/rllib/examples/serving/cartpole_server.py index a64ce03e689f4..dbbdf85809ff8 100755 --- a/python/ray/rllib/examples/serving/cartpole_server.py +++ b/python/ray/rllib/examples/serving/cartpole_server.py @@ -10,6 +10,7 @@ import os from gym import spaces +import numpy as np import ray from ray.rllib.agents.dqn import DQNAgent @@ -25,8 +26,9 @@ class CartpoleServing(ServingEnv): def __init__(self): - ServingEnv.__init__(self, spaces.Discrete(2), - spaces.Box(low=-10, high=10, shape=(4, ))) + ServingEnv.__init__( + self, spaces.Discrete(2), + spaces.Box(low=-10, high=10, shape=(4, ), dtype=np.float32)) def run(self): print("Starting policy server at {}:{}".format(SERVER_ADDRESS, diff --git a/python/ray/rllib/utils/actors.py b/python/ray/rllib/utils/actors.py index 68788cbc99da3..e865feb431b4b 100644 --- a/python/ray/rllib/utils/actors.py +++ b/python/ray/rllib/utils/actors.py @@ -36,7 +36,11 @@ def completed_prefetch(self): for worker, obj_id in self.completed(): plasma_id = ray.pyarrow.plasma.ObjectID(obj_id.id()) - ray.worker.global_worker.plasma_client.fetch([plasma_id]) + if not ray.global_state.use_raylet: + ray.worker.global_worker.plasma_client.fetch([plasma_id]) + else: + (ray.worker.global_worker.local_scheduler_client. + reconstruct_objects([obj_id], True)) self._fetching.append((worker, obj_id)) remaining = [] diff --git a/python/ray/rllib/utils/policy_server.py b/python/ray/rllib/utils/policy_server.py index 7a5a05093e36b..13ca376bb82ab 100644 --- a/python/ray/rllib/utils/policy_server.py +++ b/python/ray/rllib/utils/policy_server.py @@ -28,7 +28,11 @@ class PolicyServer(ThreadingMixIn, HTTPServer): def __init__(self): ServingEnv.__init__( self, spaces.Discrete(2), - spaces.Box(low=-10, high=10, shape=(4,))) + spaces.Box( + low=-10, + high=10, + shape=(4,), + dtype=np.float32)) def run(self): server = PolicyServer(self, "localhost", 8900) server.serve_forever() diff --git a/test/jenkins_tests/run_multi_node_tests.sh b/test/jenkins_tests/run_multi_node_tests.sh index db362bcf64abd..8965d33b1773a 100755 --- a/test/jenkins_tests/run_multi_node_tests.sh +++ b/test/jenkins_tests/run_multi_node_tests.sh @@ -11,208 +11,208 @@ ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd) DOCKER_SHA=$($ROOT_DIR/../../build-docker.sh --output-sha --no-cache) echo "Using Docker image" $DOCKER_SHA -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env PongDeterministic-v0 \ --run A3C \ --stop '{"training_iteration": 2}' \ --config '{"num_workers": 16}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env CartPole-v1 \ --run PPO \ --stop '{"training_iteration": 2}' \ --config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "sgd_stepsize": 1e-4, "sgd_batchsize": 64, "timesteps_per_batch": 2000, "num_workers": 1, "model": {"free_log_std": true}}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env CartPole-v1 \ --run PPO \ --stop '{"training_iteration": 2}' \ --config '{"simple_optimizer": false, "num_sgd_iter": 2, "model": {"use_lstm": true}}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env CartPole-v1 \ --run PPO \ --stop '{"training_iteration": 2}' \ --config '{"simple_optimizer": true, "num_sgd_iter": 2, "model": {"use_lstm": true}}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env CartPole-v1 \ --run PPO \ --stop '{"training_iteration": 2}' \ --config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "sgd_stepsize": 1e-4, "sgd_batchsize": 64, "timesteps_per_batch": 2000, "num_workers": 1, "use_gae": false}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env Pendulum-v0 \ --run ES \ --stop '{"training_iteration": 2}' \ --config '{"stepsize": 0.01, "episodes_per_batch": 20, "timesteps_per_batch": 100}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env Pong-v0 \ --run ES \ --stop '{"training_iteration": 2}' \ --config '{"stepsize": 0.01, "episodes_per_batch": 20, "timesteps_per_batch": 100}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env CartPole-v0 \ --run A3C \ --stop '{"training_iteration": 2}' \ -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env CartPole-v0 \ --run DQN \ --stop '{"training_iteration": 2}' \ --config '{"lr": 1e-3, "schedule_max_timesteps": 100000, "exploration_fraction": 0.1, "exploration_final_eps": 0.02, "dueling": false, "hiddens": [], "model": {"fcnet_hiddens": [64], "fcnet_activation": "relu"}}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env CartPole-v0 \ --run DQN \ --stop '{"training_iteration": 2}' \ --config '{"num_workers": 2}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env CartPole-v0 \ --run APEX \ --stop '{"training_iteration": 2}' \ --config '{"num_workers": 2, "timesteps_per_iteration": 1000, "gpu": false, "min_iter_time_s": 1}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env FrozenLake-v0 \ --run DQN \ --stop '{"training_iteration": 2}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env FrozenLake-v0 \ --run PPO \ --stop '{"training_iteration": 2}' \ --config '{"num_sgd_iter": 10, "sgd_batchsize": 64, "timesteps_per_batch": 1000, "num_workers": 1}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env PongDeterministic-v4 \ --run DQN \ --stop '{"training_iteration": 2}' \ --config '{"lr": 1e-4, "schedule_max_timesteps": 2000000, "buffer_size": 10000, "exploration_fraction": 0.1, "exploration_final_eps": 0.01, "sample_batch_size": 4, "learning_starts": 10000, "target_network_update_freq": 1000, "gamma": 0.99, "prioritized_replay": true}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env MontezumaRevenge-v0 \ --run PPO \ --stop '{"training_iteration": 2}' \ --config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "sgd_stepsize": 1e-4, "sgd_batchsize": 64, "timesteps_per_batch": 2000, "num_workers": 1, "model": {"dim": 40, "conv_filters": [[16, [8, 8], 4], [32, [4, 4], 2], [512, [5, 5], 1]]}}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env PongDeterministic-v4 \ --run A3C \ --stop '{"training_iteration": 2}' \ --config '{"num_workers": 2, "use_pytorch": true, "model": {"use_lstm": false, "grayscale": true, "zero_mean": false, "dim": 80, "channel_major": true}}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env CartPole-v1 \ --run A3C \ --stop '{"training_iteration": 2}' \ --config '{"num_workers": 2, "use_pytorch": true}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env CartPole-v1 \ --run A3C \ --stop '{"training_iteration": 2}' \ --config '{"num_workers": 2, "model": {"use_lstm": true}}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env CartPole-v0 \ --run DQN \ --stop '{"training_iteration": 2}' \ --config '{"num_workers": 2}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env CartPole-v0 \ --run PG \ --stop '{"training_iteration": 2}' \ --config '{"sample_batch_size": 500, "num_workers": 1}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env CartPole-v0 \ --run PG \ --stop '{"training_iteration": 2}' \ --config '{"sample_batch_size": 500, "num_workers": 1, "model": {"use_lstm": true, "max_seq_len": 100}}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env CartPole-v0 \ --run PG \ --stop '{"training_iteration": 2}' \ --config '{"sample_batch_size": 500, "num_workers": 1, "num_envs_per_worker": 10}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env Pong-v0 \ --run PG \ --stop '{"training_iteration": 2}' \ --config '{"sample_batch_size": 500, "num_workers": 1}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env FrozenLake-v0 \ --run PG \ --stop '{"training_iteration": 2}' \ --config '{"sample_batch_size": 500, "num_workers": 1}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env Pendulum-v0 \ --run DDPG \ --stop '{"training_iteration": 2}' \ --config '{"num_workers": 1}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env CartPole-v0 \ --run IMPALA \ --stop '{"training_iteration": 2}' \ --config '{"gpu": false, "num_workers": 2, "min_iter_time_s": 1}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env CartPole-v0 \ --run IMPALA \ --stop '{"training_iteration": 2}' \ --config '{"gpu": false, "num_workers": 2, "min_iter_time_s": 1, "model": {"use_lstm": true}}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env MountainCarContinuous-v0 \ --run DDPG \ --stop '{"training_iteration": 2}' \ --config '{"num_workers": 1}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ rllib train \ --env MountainCarContinuous-v0 \ --run DDPG \ --stop '{"training_iteration": 2}' \ --config '{"num_workers": 1}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env Pendulum-v0 \ --run APEX_DDPG \ @@ -220,69 +220,69 @@ docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ --stop '{"training_iteration": 2}' \ --config '{"num_workers": 2, "optimizer": {"num_replay_buffer_shards": 1}, "learning_starts": 100, "min_iter_time_s": 1}' -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ sh /ray/test/jenkins_tests/multi_node_tests/test_rllib_eval.sh -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/test/test_checkpoint_restore.py -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/test/test_policy_evaluator.py -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/test/test_serving_env.py -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/test/test_lstm.py -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/test/test_multi_agent_env.py -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/test/test_supported_spaces.py -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/tune/examples/tune_mnist_ray.py \ --smoke-test -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/tune/examples/pbt_example.py \ --smoke-test -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/tune/examples/hyperband_example.py \ --smoke-test -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/tune/examples/async_hyperband_example.py \ --smoke-test -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/tune/examples/tune_mnist_ray_hyperband.py \ --smoke-test -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/tune/examples/tune_mnist_async_hyperband.py \ --smoke-test -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/tune/examples/hyperopt_example.py \ --smoke-test -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/tune/examples/tune_mnist_keras.py \ --smoke-test -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/examples/legacy_multiagent/multiagent_mountaincar.py -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/examples/legacy_multiagent/multiagent_pendulum.py -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/examples/multiagent_cartpole.py --num-iters=2 -docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA \ +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/examples/multiagent_two_trainers.py --num-iters=2 python3 $ROOT_DIR/multi_node_docker_test.py \