Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/rllib-training.rst
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ You can also access just the "master" copy of the agent state through ``agent.lo
REST API
--------

In some cases (i.e., when interacting with an external environment) it makes more sense to interact with RLlib as if were an independently running service, rather than RLlib hosting the simulations itself. This is possible via RLlib's serving env `interface <rllib-envs.html#serving>`__.
In some cases (i.e., when interacting with an external environment) it makes more sense to interact with RLlib as if were an independently running service, rather than RLlib hosting the simulations itself. This is possible via RLlib's serving env `interface <rllib-env.html#agent-driven>`__.

.. autoclass:: ray.rllib.utils.policy_client.PolicyClient
:members:
Expand Down
36 changes: 24 additions & 12 deletions python/ray/rllib/agents/ppo/ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
# Whether to allocate CPUs for workers (if > 0).
"num_cpus_per_worker": 1,
# Whether to rollout "complete_episodes" or "truncate_episodes"
"batch_mode": "complete_episodes",
"batch_mode": "truncate_episodes",
# Which observation filter to apply to the observation
"observation_filter": "MeanStdFilter",
# Use the sync samples optimizer instead of the multi-gpu one
Expand Down Expand Up @@ -80,17 +80,7 @@ def default_resource_request(cls, config):
extra_gpu=cf["num_gpus_per_worker"] * cf["num_workers"])

def _init(self):
waste_ratio = (
self.config["sample_batch_size"] * self.config["num_workers"] /
self.config["train_batch_size"])
if waste_ratio > 1:
msg = ("sample_batch_size * num_workers >> train_batch_size. "
"This means that many steps will be discarded. Consider "
"reducing sample_batch_size, or increase train_batch_size.")
if waste_ratio > 1.5:
raise ValueError(msg)
else:
print("Warning: " + msg)
self._validate_config()
self.local_evaluator = self.make_local_evaluator(
self.env_creator, self._policy_graph)
self.remote_evaluators = self.make_remote_evaluators(
Expand All @@ -114,6 +104,28 @@ def _init(self):
"standardize_fields": ["advantages"],
})

def _validate_config(self):
waste_ratio = (
self.config["sample_batch_size"] * self.config["num_workers"] /
self.config["train_batch_size"])
if waste_ratio > 1:
msg = ("sample_batch_size * num_workers >> train_batch_size. "
"This means that many steps will be discarded. Consider "
"reducing sample_batch_size, or increase train_batch_size.")
if waste_ratio > 1.5:
raise ValueError(msg)
else:
print("Warning: " + msg)
if self.config["sgd_minibatch_size"] > self.config["train_batch_size"]:
raise ValueError(
"Minibatch size {} must be <= train batch size {}.".format(
self.config["sgd_minibatch_size"],
self.config["train_batch_size"]))
if (self.config["batch_mode"] == "truncate_episodes"
and not self.config["use_gae"]):
raise ValueError(
"Episode truncation is not supported without a value function")

def _train(self):
prev_steps = self.optimizer.num_steps_sampled
fetches = self.optimizer.step()
Expand Down
1 change: 1 addition & 0 deletions python/ray/rllib/tuned_examples/hopper-ppo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ hopper-ppo:
train_batch_size: 160000
num_workers: 64
num_gpus: 4
batch_mode: complete_episodes
1 change: 1 addition & 0 deletions python/ray/rllib/tuned_examples/humanoid-ppo-gae.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ humanoid-ppo-gae:
free_log_std: true
num_workers: 64
num_gpus: 4
batch_mode: complete_episodes
1 change: 1 addition & 0 deletions python/ray/rllib/tuned_examples/humanoid-ppo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ humanoid-ppo:
use_gae: false
num_workers: 64
num_gpus: 4
batch_mode: complete_episodes
2 changes: 1 addition & 1 deletion python/ray/rllib/tuned_examples/pendulum-ppo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ pendulum-ppo:
num_sgd_iter: 10
model:
fcnet_hiddens: [64, 64]
squash_to_range: True
batch_mode: complete_episodes
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ cartpole-ppo:
time_total_s: 300
config:
num_workers: 1
batch_mode: complete_episodes
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ pendulum-ppo:
num_sgd_iter: 10
model:
fcnet_hiddens: [64, 64]
batch_mode: complete_episodes
1 change: 1 addition & 0 deletions python/ray/rllib/tuned_examples/walker2d-ppo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ walker2d-v1-ppo:
train_batch_size: 320000
num_workers: 64
num_gpus: 4
batch_mode: complete_episodes
8 changes: 1 addition & 7 deletions test/jenkins_tests/run_multi_node_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \
--env CartPole-v1 \
--run PPO \
--stop '{"training_iteration": 2}' \
--config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "lr": 1e-4, "sgd_minibatch_size": 64, "train_batch_size": 2000, "num_workers": 1, "use_gae": false}'
--config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "lr": 1e-4, "sgd_minibatch_size": 64, "train_batch_size": 2000, "num_workers": 1, "use_gae": false, "batch_mode": "complete_episodes"}'

docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \
python /ray/python/ray/rllib/train.py \
Expand Down Expand Up @@ -288,12 +288,6 @@ docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \
python /ray/python/ray/tune/examples/genetic_example.py \
--smoke-test

docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \
python /ray/python/ray/rllib/examples/legacy_multiagent/multiagent_mountaincar.py

docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \
python /ray/python/ray/rllib/examples/legacy_multiagent/multiagent_pendulum.py

docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \
python /ray/python/ray/rllib/examples/multiagent_cartpole.py --num-iters=2

Expand Down