Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Slime env compatible with SB3 #5

Merged
merged 7 commits into from
Apr 4, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions env-test-gym.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from stable_baselines3.common.env_checker import check_env
import slime_environments
import gym
import json
from gym.spaces import MultiBinary

PARAMS_FILE = "slime_environments/agents/single-agent-params.json"
with open(PARAMS_FILE) as f:
params = json.load(f)

# print(gym.__version__)
env = gym.make("Slime-v0", **params)
check_env(env)
print("Environment compatible with Stable Baselines3")
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
gym
importlib-metadata==4.13
pygame
pettingzoo
stable-baselines3
37 changes: 37 additions & 0 deletions single-test-03-2023-02-22 13:54:51.756431.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"population": 50,
"sniff_threshold": 0.9,
"diffuse_area": 3,
"diffuse_mode": "cascade",
"follow_mode": "prob",
"smell_area": 5,
"lay_area": 1,
"lay_amount": 3,
"evaporation": 0.9,
"cluster_threshold": 30,
"cluster_radius": 10,
"rew": 100,
"penalty": -1,
"episode_ticks": 500,
"W": 66,
"H": 38,
"PATCH_SIZE": 20,
"TURTLE_SIZE": 16,
"FPS": 30,
"SHADE_STRENGTH": 10,
"SHOW_CHEM_TEXT": false,
"CLUSTER_FONT_SIZE": 12,
"CHEMICAL_FONT_SIZE": 8
}
----------
TRAIN_EPISODES = 2
TEST_EPISODES = 2
----------
alpha = 0.2
gamma = 0.8
epsilon = 0.9
decay = 0.9998
----------
Episode, Tick, Avg cluster size X tick, move-toward-chemical, random-walk, drop-chemical, Avg reward X episode
1, 500, -1, 182, 145, 173, 5196725.440000001
2, 1000, -1, 163, 176, 161, 2886389.080000002
8 changes: 4 additions & 4 deletions slime_environments/agents/SA_QLearning.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
cluster_dict = {}


def observation_to_int_map(obs: [bool, bool]):
def observation_to_int_map(obs):
if sum(obs) == 0: # DOC [False, False]
mapped = sum(obs) # 0
elif sum(obs) == 2: # DOC [True, True]
Expand All @@ -69,15 +69,15 @@ def observation_to_int_map(obs: [bool, bool]):
# TRAINING
print("Start training...")
for ep in range(1, TRAIN_EPISODES+1):
observation, _ = env.reset()
observation = env.reset()
obs = observation_to_int_map(observation)
for tick in range(1, params['episode_ticks']+1):
if random.uniform(0, 1) < epsilon:
action = env.action_space.sample() # Explore action space
else:
action = np.argmax(q_table[obs]) # Exploit learned values

next_observation, reward, _, _, _ = env.step(action)
next_observation, reward, _, _ = env.step(action)
next_obs = observation_to_int_map(next_observation)

old_value = q_table[obs][action]
Expand Down Expand Up @@ -115,7 +115,7 @@ def observation_to_int_map(obs: [bool, bool]):
obs = observation_to_int_map(observation)
for tick in range(params['episode_ticks']):
action = np.argmax(q_table[obs])
observation, reward, _, _, _ = env.step(action)
observation, reward, _, _, = env.step(action)
obs = observation_to_int_map(observation)
reward_episode += reward
env.render()
Expand Down
23 changes: 12 additions & 11 deletions slime_environments/environments/SlimeEnvSingleAgent.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import numpy as np
import pygame
from gym import spaces
from gym.spaces import MultiBinary

BLACK = (0, 0, 0)
BLUE = (0, 0, 255)
Expand Down Expand Up @@ -152,7 +153,7 @@ def __init__(self,
self._find_neighbours(self.cluster_patches, self.cluster_radius)

self.action_space = spaces.Discrete(3) # DOC 0 = walk, 1 = lay_pheromone, 2 = follow_pheromone TODO as dict
self.observation_space = BooleanSpace(size=2) # DOC [0] = whether the turtle is in a cluster [1] = whether there is chemical in turtle patch
self.observation_space = MultiBinary(2) # DOC [0] = whether the turtle is in a cluster [1] = whether there is chemical in turtle patch
self._action_to_name = {0: "random-walk", 1: "drop-chemical", 2: "move-toward-chemical"}

self.screen = pygame.display.set_mode((self.W_pixels, self.H_pixels))
Expand Down Expand Up @@ -287,9 +288,9 @@ def step(self, action: int):

cur_reward = self.reward_cluster_and_time_punish_time()

return self._get_obs(), cur_reward, False, False, {} # DOC Gym v26 has additional 'truncated' boolean
return self._get_obs(), cur_reward, False, {} # DOC Gym v26 has additional 'truncated' boolean

def lay_pheromone(self, pos: tuple[int, int], amount: int):
def lay_pheromone(self, pos, amount: int):
"""
Lay 'amount' pheromone in square 'area' centred in 'pos'

Expand Down Expand Up @@ -343,7 +344,7 @@ def _evaporate(self):
if self.patches[patch]['chemical'] > 0:
self.patches[patch]['chemical'] *= self.evaporation

def walk(self, turtle: dict[str: tuple[int, int]], _id: int):
def walk(self, turtle, _id: int):
"""
Action 0: move in random direction (8 sorrounding cells)

Expand All @@ -359,7 +360,7 @@ def walk(self, turtle: dict[str: tuple[int, int]], _id: int):
turtle['pos'] = (x2, y2)
self.patches[turtle['pos']]['turtles'].append(_id)

def follow_pheromone(self, ph_coords: tuple[int, int], turtle: dict[str: tuple[int, int]], _id: int):
def follow_pheromone(self, ph_coords, turtle, _id: int):
"""
Action 2: move turtle towards greatest pheromone found

Expand Down Expand Up @@ -396,7 +397,7 @@ def follow_pheromone(self, ph_coords: tuple[int, int], turtle: dict[str: tuple[i
turtle['pos'] = (x, y)
self.patches[turtle['pos']]['turtles'].append(_id)

def _find_max_pheromone(self, pos: tuple[int, int]):
def _find_max_pheromone(self, pos):
"""
Find where the maximum pheromone level is within a square controlled by self.smell_area centred in 'pos'.
Following pheromone modeis controlled by param self.follow_mode:
Expand Down Expand Up @@ -482,8 +483,8 @@ def reward_cluster_and_time_punish_time(self):
self.rewards.append(cur_reward)
return cur_reward

def reset(self, seed=None, options=None):
super().reset(seed=seed)
def reset(self):
# super().reset()
# empty stuff
self.rewards = []
self.cluster_ticks = 0
Expand All @@ -501,9 +502,9 @@ def reset(self, seed=None, options=None):
for p in self.patches:
self.patches[p]['chemical'] = 0.0

return self._get_obs(), {}
return self._get_obs()

def render(self, **kwargs):
def render(self, mode="human",**kwargs):
for event in pygame.event.get():
if event.type == pygame.QUIT: # window closed -> program quits
pygame.quit()
Expand Down Expand Up @@ -546,7 +547,7 @@ def close(self):
pygame.quit()

def _get_obs(self):
return self._compute_cluster() >= self.cluster_threshold, self._check_chemical()
return np.array([self._compute_cluster() >= self.cluster_threshold, self._check_chemical()])


if __name__ == "__main__":
Expand Down