LucaInis · smarianimore · Apr 4, 2023 · Feb 21, 2023 · Feb 22, 2023 · Feb 22, 2023
diff --git a/env-test-gym.py b/env-test-gym.py
@@ -0,0 +1,14 @@
+from stable_baselines3.common.env_checker import check_env
+import slime_environments
+import gym
+import json
+from gym.spaces import MultiBinary
+
+PARAMS_FILE = "slime_environments/agents/single-agent-params.json"
+with open(PARAMS_FILE) as f:
+    params = json.load(f)
+
+# print(gym.__version__)
+env = gym.make("Slime-v0", **params)
+check_env(env)
+print("Environment compatible with Stable Baselines3")
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,5 @@
 gym
+importlib-metadata==4.13
 pygame
 pettingzoo
 stable-baselines3
diff --git a/single-test-03-2023-02-22 13:54:51.756431.csv b/single-test-03-2023-02-22 13:54:51.756431.csv
@@ -0,0 +1,37 @@
+{
+  "population": 50,
+  "sniff_threshold": 0.9,
+  "diffuse_area": 3,
+  "diffuse_mode": "cascade",
+  "follow_mode": "prob",
+  "smell_area": 5,
+  "lay_area": 1,
+  "lay_amount": 3,
+  "evaporation": 0.9,
+  "cluster_threshold": 30,
+  "cluster_radius": 10,
+  "rew": 100,
+  "penalty": -1,
+  "episode_ticks": 500,
+  "W": 66,
+  "H": 38,
+  "PATCH_SIZE": 20,
+  "TURTLE_SIZE": 16,
+  "FPS": 30,
+  "SHADE_STRENGTH": 10,
+  "SHOW_CHEM_TEXT": false,
+  "CLUSTER_FONT_SIZE": 12,
+  "CHEMICAL_FONT_SIZE": 8
+}
+----------
+TRAIN_EPISODES = 2
+TEST_EPISODES = 2
+----------
+alpha = 0.2
+gamma = 0.8
+epsilon = 0.9
+decay = 0.9998
+----------
+Episode, Tick, Avg cluster size X tick, move-toward-chemical, random-walk, drop-chemical, Avg reward X episode
+1, 500, -1, 182, 145, 173, 5196725.440000001
+2, 1000, -1, 163, 176, 161, 2886389.080000002
diff --git a/slime_environments/agents/SA_QLearning.py b/slime_environments/agents/SA_QLearning.py
@@ -54,7 +54,7 @@
 cluster_dict = {}
 
 
-def observation_to_int_map(obs: [bool, bool]):
+def observation_to_int_map(obs):
     if sum(obs) == 0:  # DOC [False, False]
         mapped = sum(obs)  # 0
     elif sum(obs) == 2:  # DOC [True, True]
@@ -69,15 +69,15 @@ def observation_to_int_map(obs: [bool, bool]):
 # TRAINING
 print("Start training...")
 for ep in range(1, TRAIN_EPISODES+1):
-    observation, _ = env.reset()
+    observation = env.reset()
     obs = observation_to_int_map(observation)
     for tick in range(1, params['episode_ticks']+1):
         if random.uniform(0, 1) < epsilon:
             action = env.action_space.sample()  # Explore action space
         else:
             action = np.argmax(q_table[obs])  # Exploit learned values
 
-        next_observation, reward, _, _, _ = env.step(action)
+        next_observation, reward, _, _ = env.step(action)
         next_obs = observation_to_int_map(next_observation)
 
         old_value = q_table[obs][action]
@@ -115,7 +115,7 @@ def observation_to_int_map(obs: [bool, bool]):
     obs = observation_to_int_map(observation)
     for tick in range(params['episode_ticks']):
         action = np.argmax(q_table[obs])
-        observation, reward, _, _, _ = env.step(action)
+        observation, reward, _, _, = env.step(action)
         obs = observation_to_int_map(observation)
         reward_episode += reward
         env.render()

diff --git a/slime_environments/environments/SlimeEnvSingleAgent.py b/slime_environments/environments/SlimeEnvSingleAgent.py
@@ -8,6 +8,7 @@
 import numpy as np
 import pygame
 from gym import spaces
+from gym.spaces import MultiBinary
 
 BLACK = (0, 0, 0)
 BLUE = (0, 0, 255)
@@ -152,7 +153,7 @@ def __init__(self,
         self._find_neighbours(self.cluster_patches, self.cluster_radius)
 
         self.action_space = spaces.Discrete(3)  # DOC 0 = walk, 1 = lay_pheromone, 2 = follow_pheromone TODO as dict
-        self.observation_space = BooleanSpace(size=2)  # DOC [0] = whether the turtle is in a cluster [1] = whether there is chemical in turtle patch
+        self.observation_space = MultiBinary(2)  # DOC [0] = whether the turtle is in a cluster [1] = whether there is chemical in turtle patch
         self._action_to_name = {0: "random-walk", 1: "drop-chemical", 2: "move-toward-chemical"}
 
         self.screen = pygame.display.set_mode((self.W_pixels, self.H_pixels))
@@ -287,9 +288,9 @@ def step(self, action: int):
 
         cur_reward = self.reward_cluster_and_time_punish_time()
 
-        return self._get_obs(), cur_reward, False, False, {}  # DOC Gym v26 has additional 'truncated' boolean
+        return self._get_obs(), cur_reward, False, {}  # DOC Gym v26 has additional 'truncated' boolean
 
-    def lay_pheromone(self, pos: tuple[int, int], amount: int):
+    def lay_pheromone(self, pos, amount: int):
         """
         Lay 'amount' pheromone in square 'area' centred in 'pos'
 
@@ -343,7 +344,7 @@ def _evaporate(self):
             if self.patches[patch]['chemical'] > 0:
                 self.patches[patch]['chemical'] *= self.evaporation
 
-    def walk(self, turtle: dict[str: tuple[int, int]], _id: int):
+    def walk(self, turtle, _id: int):
         """
         Action 0: move in random direction (8 sorrounding cells)
 
@@ -359,7 +360,7 @@ def walk(self, turtle: dict[str: tuple[int, int]], _id: int):
         turtle['pos'] = (x2, y2)
         self.patches[turtle['pos']]['turtles'].append(_id)
 
-    def follow_pheromone(self, ph_coords: tuple[int, int], turtle: dict[str: tuple[int, int]], _id: int):
+    def follow_pheromone(self, ph_coords, turtle, _id: int):
         """
         Action 2: move turtle towards greatest pheromone found
 
@@ -396,7 +397,7 @@ def follow_pheromone(self, ph_coords: tuple[int, int], turtle: dict[str: tuple[i
         turtle['pos'] = (x, y)
         self.patches[turtle['pos']]['turtles'].append(_id)
 
-    def _find_max_pheromone(self, pos: tuple[int, int]):
+    def _find_max_pheromone(self, pos):
         """
         Find where the maximum pheromone level is within a square controlled by self.smell_area centred in 'pos'.
         Following pheromone modeis controlled by param self.follow_mode:
@@ -482,8 +483,8 @@ def reward_cluster_and_time_punish_time(self):
         self.rewards.append(cur_reward)
         return cur_reward
 
-    def reset(self, seed=None, options=None):
-        super().reset(seed=seed)
+    def reset(self):
+        # super().reset()
         # empty stuff
         self.rewards = []
         self.cluster_ticks = 0
@@ -501,9 +502,9 @@ def reset(self, seed=None, options=None):
         for p in self.patches:
             self.patches[p]['chemical'] = 0.0
 
-        return self._get_obs(), {}
+        return self._get_obs()
 
-    def render(self, **kwargs):
+    def render(self, mode="human",**kwargs):
         for event in pygame.event.get():
             if event.type == pygame.QUIT:  # window closed -> program quits
                 pygame.quit()
@@ -546,7 +547,7 @@ def close(self):
             pygame.quit()
 
     def _get_obs(self):
-        return self._compute_cluster() >= self.cluster_threshold, self._check_chemical()
+        return np.array([self._compute_cluster() >= self.cluster_threshold, self._check_chemical()])
 
 
 if __name__ == "__main__":