ugr-sail · AlejandroCN7 · Nov 13, 2024 · Nov 12, 2024 · Nov 12, 2024 · Nov 12, 2024
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -46,7 +46,6 @@
 				"github.copilot-chat",
 				"github.vscode-pull-request-github",
 				"grapecity.gc-excelviewer",
-				"himanoa.python-autopep8",
 				"james-yu.latex-workshop",
 				"me-dutour-mathieu.vscode-github-actions",
 				"mechatroner.rainbow-csv",

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -6,7 +6,7 @@
 package-mode = true
 name = "sinergym"
 
-version = "3.6.6"
+version = "3.6.7"
 description = "The goal of sinergym is to create an environment following OpenAI Gym interface for wrapping simulation engines for building control using deep reinforcement learning."
 license = "MIT"
 
@@ -56,8 +56,8 @@ include = [
 
 # Compatible Python versions
 python = "^3.12"
-gymnasium = "^0.29.1"
-numpy = "^2.1.1"
+gymnasium = "^1.0.0"
+numpy = "^1.26.4"
 pandas = "^2.2.2"
 eppy = "^0.5.63"
 tqdm = "^4.66.5"
@@ -69,7 +69,7 @@ pytest = { version = "^8.3.3", optional = true }
 pytest-cov = { version = "^5.0.0", optional = true }
 pytest-xdist = { version = "^3.6.1", optional = true }
 coverage = { version = "^7.6.1", optional = true }
-stable-baselines3 = { version = "^2.3.2", optional = true }
+stable-baselines3 = { git = "https://github.com/DLR-RM/stable-baselines3", branch = "master", optional=true }
 wandb = { version = "^0.18.1", optional = true }
 IPython = { version = "^8.27.0", optional = true }
 google-api-python-client = { version = "^2.58.0", optional = true }
@@ -111,7 +111,7 @@ IPython = "^8.27.0"
 IPython = "^8.27.0"
 
 [tool.poetry.group.drl.dependencies]
-stable-baselines3 = "^2.3.2"
+stable-baselines3 = { git = "https://github.com/DLR-RM/stable-baselines3", branch = "master"}
 
 [tool.poetry.group.platforms.dependencies]
 wandb = "^0.18.1"

diff --git a/scripts/eval/load_agent.py b/scripts/eval/load_agent.py
@@ -6,7 +6,6 @@
 import gymnasium as gym
 import numpy as np
 import wandb
-from gymnasium.wrappers.normalize import NormalizeReward
 from stable_baselines3 import A2C, DDPG, DQN, PPO, SAC, TD3
 from stable_baselines3.common.monitor import Monitor
 

diff --git a/scripts/train/train_agent.py b/scripts/train/train_agent.py
@@ -7,7 +7,6 @@
 import gymnasium as gym
 import numpy as np
 import wandb
-from gymnasium.wrappers.normalize import NormalizeReward
 from stable_baselines3 import *
 from stable_baselines3.common.callbacks import CallbackList
 from stable_baselines3.common.logger import HumanOutputFormat

diff --git a/sinergym/__init__.py b/sinergym/__init__.py
@@ -1,6 +1,7 @@
 import json
 import logging
 import os
+import warnings
 from typing import Union
 
 import gymnasium as gym
@@ -10,6 +11,11 @@
 from sinergym.utils.constants import *
 from sinergym.utils.rewards import *
 
+# Ignore epw module warning (epw module mistake)
+warnings.filterwarnings(
+    "ignore",
+    module='epw')
+
 # ------------------------- Set __version__ in module ------------------------ #
 version_file = os.path.join(os.path.dirname(__file__), 'version.txt')
 with open(version_file, 'r') as file_handler:

diff --git a/sinergym/utils/callbacks.py b/sinergym/utils/callbacks.py
@@ -167,11 +167,11 @@ def _on_event(self) -> None:
                 np.savetxt(
                     fname=self.save_path +
                     '/mean.txt',
-                    X=self.eval_env.mean)
+                    X=self.eval_env.get_wrapper_attr('mean'))
                 np.savetxt(
                     fname=self.save_path +
                     '/var.txt',
-                    X=self.eval_env.var)
+                    X=self.eval_env.get_wrapper_attr('var'))
 
         # We close evaluation env and starts training env again
         self.eval_env.close()

diff --git a/sinergym/utils/constants.py b/sinergym/utils/constants.py
@@ -1,17 +1,16 @@
 """Constants used in whole project."""
 
 import os
+from importlib import resources
 from typing import List, Union
 
 import numpy as np
-import pkg_resources
 
 # ---------------------------------------------------------------------------- #
 #                               Generic constants                              #
 # ---------------------------------------------------------------------------- #
 # Sinergym Data path
-PKG_DATA_PATH = pkg_resources.resource_filename(
-    'sinergym', 'data/')
+PKG_DATA_PATH = str(resources.files('sinergym') / 'data')
 # Weekday encoding for simulations
 WEEKDAY_ENCODING = {'monday': 0, 'tuesday': 1, 'wednesday': 2, 'thursday': 3,
                     'friday': 4, 'saturday': 5, 'sunday': 6}

diff --git a/sinergym/utils/controllers.py b/sinergym/utils/controllers.py
@@ -44,8 +44,8 @@ def __init__(self, env: Env) -> None:
             'observation_variables')
         self.action_variables = env.get_wrapper_attr('action_variables')
 
-        self.setpoints_summer = (23.0, 26.0)
-        self.setpoints_winter = (20.0, 23.5)
+        self.setpoints_summer = np.array((23.0, 26.0), dtype=np.float32)
+        self.setpoints_winter = np.array((20.0, 23.5), dtype=np.float32)
 
     def act(self, observation: List[Any]) -> Sequence[Any]:
         """Select action based on indoor temperature.
@@ -72,7 +72,7 @@ def act(self, observation: List[Any]) -> Sequence[Any]:
         else:  # pragma: no cover
             season_range = self.setpoints_winter
 
-        return (season_range[0], season_range[1])
+        return season_range
 
 
 class RBCDatacenter(object):
@@ -91,7 +91,7 @@ def __init__(self, env: Env) -> None:
         self.action_variables = env.get_wrapper_attr('action_variables')
 
         # ASHRAE recommended temperature range = [18, 27] Celsius
-        self.range_datacenter = (18, 27)
+        self.range_datacenter = np.array((18, 27), dtype=np.float32)
 
     def act(self) -> Sequence[Any]:
         """Select same action always, corresponding with comfort range.
@@ -147,6 +147,7 @@ def act(self, observation: List[Any]) -> Sequence[Any]:
             new_cool_setpoint = current_cool_setpoint - 1
             new_heat_setpoint = current_heat_setpoint - 1
 
-        return (
-            new_heat_setpoint,
-            new_cool_setpoint)
+        return np.array(
+            (new_heat_setpoint,
+             new_cool_setpoint),
+            dtype=np.float32)
diff --git a/sinergym/utils/env_checker.py b/sinergym/utils/env_checker.py
@@ -60,20 +60,18 @@ def _check_obs(obs: Union[tuple,
     correspond to the declared one.
     """
     if not isinstance(observation_space, spaces.Tuple):
-        assert not isinstance(
-            obs, tuple
-        ), f"The observation returned by the `{method_name}()` method should be a single value, not a tuple"
+        assert not isinstance(obs, tuple), f"The observation returned by the `{
+            method_name}()` method should be a single value, not a tuple"
 
     if isinstance(observation_space, spaces.Discrete):
-        assert isinstance(
-            obs, int), f"The observation returned by `{method_name}()` method must be an int"
+        assert isinstance(obs, int), f"The observation returned by `{
+            method_name}()` method must be an int"
     elif _is_numpy_array_space(observation_space):
-        assert isinstance(
-            obs, np.ndarray), f"The observation returned by `{method_name}()` method must be a numpy array"
+        assert isinstance(obs, np.ndarray), f"The observation returned by `{
+            method_name}()` method must be a numpy array"
 
-    assert observation_space.contains(
-        obs
-    ), f"The observation returned by the `{method_name}()` method does not match the given observation space"
+    assert observation_space.contains(obs), f"The observation returned by the `{
+        method_name}()` method does not match the given observation space"
 
 
 def _check_returned_values(
@@ -141,10 +139,10 @@ def _check_spaces(env: gym.Env) -> None:
     # Helper to link to the code, because gym has no proper documentation
     gym_spaces = " cf https://github.com/openai/gym/blob/master/gym/spaces/"
 
-    assert hasattr(
-        env, "observation_space"), "You must specify an observation space (cf gym.spaces)" + gym_spaces
-    assert hasattr(
-        env, "action_space"), "You must specify an action space (cf gym.spaces)" + gym_spaces
+    assert env.has_wrapper_attr(
+        'observation_space'), 'You must specify an observation space (cf gym.spaces)' + gym_spaces
+    assert env.has_wrapper_attr(
+        'action_space'), 'You must specify an action space (cf gym.spaces)' + gym_spaces
 
     assert isinstance(env.observation_space,
                       spaces.Space), "The observation space must inherit from gymnasium.spaces" + gym_spaces
@@ -239,8 +237,8 @@ def check_env(
                 spaces.Box) and action_space.dtype != np.dtype(
                 np.float32):
             warnings.warn(
-                f"Your action space has dtype {action_space.dtype}, we recommend using np.float32 to avoid cast errors."
-            )
+                f"Your action space has dtype {
+                    action_space.dtype}, we recommend using np.float32 to avoid cast errors.")
 
     # ============ Check the returned values ===============
     _check_returned_values(env, observation_space, action_space)

diff --git a/sinergym/utils/wrappers.py b/sinergym/utils/wrappers.py
@@ -15,7 +15,7 @@
 import wandb
 from epw.weather import Weather
 from gymnasium import Env
-from gymnasium.wrappers.normalize import RunningMeanStd
+from gymnasium.wrappers.utils import RunningMeanStd
 
 from sinergym.utils.common import is_wrapped
 from sinergym.utils.constants import LOG_WRAPPERS_LEVEL, YEAR
@@ -726,7 +726,7 @@ def apply_ou_variability(self):
                     noise[i + 1] = noise[i] + dt * (-(noise[i] - mu) / tau) + \
                         sigma_bis * sqrtdt * np.random.randn()
 
-                self.energy_cost_data[variable] += noise
+                self.energy_cost_data.loc[:, variable] += noise
 
     def set_energy_cost_data(self):
         """Sets the cost of energy data used to construct the state observation.
@@ -818,12 +818,12 @@ def __init__(self,
         new_shape = self.env.get_wrapper_attr(
             'observation_space').shape[0] + len(temperature_variables)
         self.observation_space = gym.spaces.Box(
-            low=self.env.observation_space.low[0],
-            high=self.env.observation_space.high[0],
+            low=self.env.get_wrapper_attr('observation_space').low[0],
+            high=self.env.get_wrapper_attr('observation_space').high[0],
             shape=(
                 new_shape,
             ),
-            dtype=self.env.observation_space.dtype)
+            dtype=self.env.get_wrapper_attr('observation_space').dtype)
 
         self.logger.info('Wrapper initialized.')
 
@@ -881,7 +881,7 @@ def __init__(
         super().__init__(env)
 
         # Params
-        self.current_values = initial_values
+        self.current_values = np.array(initial_values, dtype=np.float32)
 
         # Check environment is valid
         try:
@@ -908,8 +908,10 @@ def __init__(
         # All posible incremental variations
         self.values_definition = {}
         # Original action space variables
-        action_space_low = deepcopy(self.env.action_space.low)
-        action_space_high = deepcopy(self.env.action_space.high)
+        action_space_low = deepcopy(
+            self.env.get_wrapper_attr('action_space').low)
+        action_space_high = deepcopy(
+            self.env.get_wrapper_attr('action_space').high)
         # Calculating incremental variations and action space for each
         # incremental variable
         for variable, (delta_temp,
@@ -936,7 +938,7 @@ def __init__(
         self.action_space = gym.spaces.Box(
             low=action_space_low,
             high=action_space_high,
-            shape=self.env.action_space.shape,
+            shape=self.env.get_wrapper_attr('action_space').shape,
             dtype=np.float32)
 
         self.logger.info(
@@ -962,12 +964,13 @@ def action(self, action):
             # Update current_values
             self.current_values[i] += increment_value
             # Clip the value with original action space
-            self.current_values[i] = max(self.env.action_space.low[index], min(
-                self.current_values[i], self.env.action_space.high[index]))
+            self.current_values[i] = max(
+                self.env.get_wrapper_attr('action_space').low[index], min(
+                    self.current_values[i], self.env.get_wrapper_attr('action_space').high[index]))
 
             action_[index] = self.current_values[i]
 
-        return list(action_)
+        return action_
 
 # ---------------------------------------------------------------------------- #
 
@@ -1000,7 +1003,7 @@ def __init__(
         super().__init__(env)
 
         # Params
-        self.current_setpoints = initial_values
+        self.current_setpoints = np.array(initial_values, dtype=np.float32)
 
         # Check environment is valid
         try:
@@ -1044,26 +1047,25 @@ def __init__(
         self.logger.info('Wrapper initialized')
 
     # Define action mapping method
-    def action_mapping(self, action: int) -> List[float]:
-        return self.mapping[action]
+    def action_mapping(self, action: int) -> np.ndarray:
+        return np.array(self.mapping[action], dtype=np.float32)
 
     def action(self, action):
         """Takes the discrete action and transforms it to setpoints tuple."""
         action_ = deepcopy(action)
         action_ = self.get_wrapper_attr('action_mapping')(action_)
         # Update current setpoints values with incremental action
-        self.current_setpoints = [
+        self.current_setpoints = np.array([
             sum(i) for i in zip(
                 self.get_wrapper_attr('current_setpoints'),
-                action_)]
+                action_)], dtype=np.float32)
         # clip setpoints returned
         self.current_setpoints = np.clip(
-            np.array(self.get_wrapper_attr('current_setpoints')),
-            self.env.action_space.low,
-            self.env.action_space.high
-        )
+            self.get_wrapper_attr('current_setpoints'),
+            self.env.get_wrapper_attr('action_space').low,
+            self.env.get_wrapper_attr('action_space').high)
 
-        return list(self.current_setpoints)
+        return self.current_setpoints
 
     # Updating property
     @property  # pragma: no cover
@@ -1116,9 +1118,10 @@ def __init__(self,
             'Make sure that the action space is compatible and contained in the original environment.')
         self.logger.info('Wrapper initialized')
 
-    def action(self, action: Union[int, List[int]]) -> List[int]:
+    def action(self, action: Union[int, List[int]]) -> np.ndarray:
         action_ = deepcopy(action)
-        action_ = self.get_wrapper_attr('action_mapping')(action_)
+        action_ = np.array(self.get_wrapper_attr(
+            'action_mapping')(action_), dtype=np.float32)
         return action_
 
     # Updating property
@@ -1172,14 +1175,14 @@ def __init__(self,
             low=np.array(
                 np.repeat(
                     lower_norm_value,
-                    env.action_space.shape[0]),
+                    env.get_wrapper_attr('action_space').shape[0]),
                 dtype=np.float32),
             high=np.array(
                 np.repeat(
                     upper_norm_value,
-                    env.action_space.shape[0]),
+                    env.get_wrapper_attr('action_space').shape[0]),
                 dtype=np.float32),
-            dtype=env.action_space.dtype)
+            dtype=env.get_wrapper_attr('action_space').dtype)
         # Updated action space to normalized space
         self.action_space = self.normalized_space
 
@@ -1695,9 +1698,6 @@ def __init__(self,
                 'It is required to be wrapped by a BaseLoggerWrapper child class previously.')
             raise err
 
-        # Add requirement for wandb core
-        wandb.require("core")
-
         # Define wandb run name if is not specified
         run_name = run_name if run_name is not None else self.env.get_wrapper_attr(
             'name') + '_' + wandb.util.generate_id()
@@ -1724,7 +1724,7 @@ def __init__(self,
                 'Error initializing WandB run, if project and entity are not specified, it should be a previous active wandb run, but it has not been found.')
             raise RuntimeError
 
-        # Wandb finish with env.close flag
+        # Flag to Wandb finish with env close
         self.wandb_finish = True
 
         # Define X-Axis for episode summaries
@@ -1958,7 +1958,7 @@ def __init__(self,
             low=-5e6,
             high=5e6,
             shape=(
-                self.env.observation_space.shape[0] -
+                self.env.get_wrapper_attr('observation_space').shape[0] -
                 len(obs_reduction),
             ),
             dtype=np.float32)