From ef10189d80dbb2efb3b5391cba4eb3c2ab5c7aae Mon Sep 17 00:00:00 2001
From: Max Weltevrede <31962715+MWeltevrede@users.noreply.github.com>
Date: Mon, 4 Jul 2022 15:08:54 +0200
Subject: [PATCH] Prohibit simultaneous use of optimize_memory_usage and
 handle_timeout_termination (#948)

* Prohibit simultaneous use of optimize_memory_buffer and handle_timeout_termination

* Modify test to avoid unsupported buffer configuration

* Change from assertion to raising of ValueError

* Update changelog

* Update style for consistency

* Use handle_timeout_termination when possible

Co-authored-by: Anssi <kaneran21@hotmail.com>
Co-authored-by: Antonin Raffin <antonin.raffin@ensta.org>
---
 docs/misc/changelog.rst             | 3 ++-
 stable_baselines3/common/buffers.py | 7 +++++++
 tests/test_save_load.py             | 3 +++
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
index 52bf3e48e..5e893d9a5 100644
--- a/docs/misc/changelog.rst
+++ b/docs/misc/changelog.rst
@@ -33,6 +33,7 @@ Bug Fixes:
 - Added a check for unbounded actions
 - Fixed issues due to newer version of protobuf (tensorboard) and sphinx
 - Fix exception causes all over the codebase (@cool-RR)
+- Prohibit simultaneous use of optimize_memory_usage and handle_timeout_termination due to a bug (@MWeltevrede)
 
 Deprecations:
 ^^^^^^^^^^^^^
@@ -979,4 +980,4 @@ And all the contributors:
 @wkirgsn @AechPro @CUN-bjy @batu @IljaAvadiev @timokau @kachayev @cleversonahum
 @eleurent @ac-93 @cove9988 @theDebugger811 @hsuehch @Demetrio92 @thomasgubler @IperGiove @ScheiklP
 @simoninithomas @armandpl @manuel-delverme @Gautam-J @gianlucadecola @buoyancy99 @caburu @xy9485
-@Gregwar @ycheng517 @quantitative-technologies @bcollazo @git-thor @TibiGG @cool-RR
+@Gregwar @ycheng517 @quantitative-technologies @bcollazo @git-thor @TibiGG @cool-RR @MWeltevrede
diff --git a/stable_baselines3/common/buffers.py b/stable_baselines3/common/buffers.py
index d7728cbeb..d36c5f5ca 100644
--- a/stable_baselines3/common/buffers.py
+++ b/stable_baselines3/common/buffers.py
@@ -164,6 +164,7 @@ class ReplayBuffer(BaseBuffer):
         at a cost of more complexity.
         See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195
         and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274
+        Cannot be used in combination with handle_timeout_termination.
     :param handle_timeout_termination: Handle timeout termination (due to timelimit)
         separately and treat the task as infinite horizon task.
         https://github.com/DLR-RM/stable-baselines3/issues/284
@@ -188,6 +189,12 @@ def __init__(
         if psutil is not None:
             mem_available = psutil.virtual_memory().available
 
+        # there is a bug if both optimize_memory_usage and handle_timeout_termination are true
+        # see https://github.com/DLR-RM/stable-baselines3/issues/934
+        if optimize_memory_usage and handle_timeout_termination:
+            raise ValueError(
+                "ReplayBuffer does not support optimize_memory_usage = True and handle_timeout_termination = True simultaneously."
+            )
         self.optimize_memory_usage = optimize_memory_usage
 
         self.observations = np.zeros((self.buffer_size, self.n_envs) + self.obs_shape, dtype=observation_space.dtype)
diff --git a/tests/test_save_load.py b/tests/test_save_load.py
index 2fdebbeaa..d7a74c5ee 100644
--- a/tests/test_save_load.py
+++ b/tests/test_save_load.py
@@ -375,6 +375,9 @@ def test_warn_buffer(recwarn, model_class, optimize_memory_usage):
         select_env(model_class),
         buffer_size=100,
         optimize_memory_usage=optimize_memory_usage,
+        # we cannot use optimize_memory_usage and handle_timeout_termination
+        # at the same time
+        replay_buffer_kwargs={"handle_timeout_termination": not optimize_memory_usage},
         policy_kwargs=dict(net_arch=[64]),
         learning_starts=10,
     )