From ef10189d80dbb2efb3b5391cba4eb3c2ab5c7aae Mon Sep 17 00:00:00 2001 From: Max Weltevrede <31962715+MWeltevrede@users.noreply.github.com> Date: Mon, 4 Jul 2022 15:08:54 +0200 Subject: [PATCH] Prohibit simultaneous use of optimize_memory_usage and handle_timeout_termination (#948) * Prohibit simultaneous use of optimize_memory_buffer and handle_timeout_termination * Modify test to avoid unsupported buffer configuration * Change from assertion to raising of ValueError * Update changelog * Update style for consistency * Use handle_timeout_termination when possible Co-authored-by: Anssi Co-authored-by: Antonin Raffin --- docs/misc/changelog.rst | 3 ++- stable_baselines3/common/buffers.py | 7 +++++++ tests/test_save_load.py | 3 +++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst index 52bf3e48e..5e893d9a5 100644 --- a/docs/misc/changelog.rst +++ b/docs/misc/changelog.rst @@ -33,6 +33,7 @@ Bug Fixes: - Added a check for unbounded actions - Fixed issues due to newer version of protobuf (tensorboard) and sphinx - Fix exception causes all over the codebase (@cool-RR) +- Prohibit simultaneous use of optimize_memory_usage and handle_timeout_termination due to a bug (@MWeltevrede) Deprecations: ^^^^^^^^^^^^^ @@ -979,4 +980,4 @@ And all the contributors: @wkirgsn @AechPro @CUN-bjy @batu @IljaAvadiev @timokau @kachayev @cleversonahum @eleurent @ac-93 @cove9988 @theDebugger811 @hsuehch @Demetrio92 @thomasgubler @IperGiove @ScheiklP @simoninithomas @armandpl @manuel-delverme @Gautam-J @gianlucadecola @buoyancy99 @caburu @xy9485 -@Gregwar @ycheng517 @quantitative-technologies @bcollazo @git-thor @TibiGG @cool-RR +@Gregwar @ycheng517 @quantitative-technologies @bcollazo @git-thor @TibiGG @cool-RR @MWeltevrede diff --git a/stable_baselines3/common/buffers.py b/stable_baselines3/common/buffers.py index d7728cbeb..d36c5f5ca 100644 --- a/stable_baselines3/common/buffers.py +++ b/stable_baselines3/common/buffers.py @@ -164,6 +164,7 @@ class ReplayBuffer(BaseBuffer): at a cost of more complexity. See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195 and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274 + Cannot be used in combination with handle_timeout_termination. :param handle_timeout_termination: Handle timeout termination (due to timelimit) separately and treat the task as infinite horizon task. https://github.com/DLR-RM/stable-baselines3/issues/284 @@ -188,6 +189,12 @@ def __init__( if psutil is not None: mem_available = psutil.virtual_memory().available + # there is a bug if both optimize_memory_usage and handle_timeout_termination are true + # see https://github.com/DLR-RM/stable-baselines3/issues/934 + if optimize_memory_usage and handle_timeout_termination: + raise ValueError( + "ReplayBuffer does not support optimize_memory_usage = True and handle_timeout_termination = True simultaneously." + ) self.optimize_memory_usage = optimize_memory_usage self.observations = np.zeros((self.buffer_size, self.n_envs) + self.obs_shape, dtype=observation_space.dtype) diff --git a/tests/test_save_load.py b/tests/test_save_load.py index 2fdebbeaa..d7a74c5ee 100644 --- a/tests/test_save_load.py +++ b/tests/test_save_load.py @@ -375,6 +375,9 @@ def test_warn_buffer(recwarn, model_class, optimize_memory_usage): select_env(model_class), buffer_size=100, optimize_memory_usage=optimize_memory_usage, + # we cannot use optimize_memory_usage and handle_timeout_termination + # at the same time + replay_buffer_kwargs={"handle_timeout_termination": not optimize_memory_usage}, policy_kwargs=dict(net_arch=[64]), learning_starts=10, )