update code and readme

NielsenErik · Nov 20, 2023 · cdc8026 · cdc8026
1 parent 2af335d
commit cdc8026
Show file tree

Hide file tree

Showing 105 changed files with 5,391 additions and 510 deletions.
diff --git a/README.md b/README.md
@@ -6,8 +6,26 @@ The project is supervised by Giovanni Iacca and Andrea Ferigo from University of
 In [references](/references) there is a comprehensive list of references of the studied papers to complete the project.
 
 ## Source codes
-In [src](/src) are stored all the scripts developed during the project. The produced scripts work only if linked to the code developed in the following papers by Giovanni Iacca, Marco Crespi, Andrea Ferigo, Leonardo Lucio Custode:
+In [src](/src) are stored all the scripts developed during the project. The produced scripts are based and continue the work developed in the following papers by Giovanni Iacca, Marco Crespi, Andrea Ferigo, Leonardo Lucio Custode:
 - [A Population-Based Approach for Multi-Agent Interpretable Reinforcement Learning](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4467882)
 - [Quality Diversity Evolutionary Learning of Decision Trees](https://arxiv.org/abs/2208.12758)
 
-It is possible to run the aforementioned code by following the instructions in the README.md file in the [src/base](/src/base/) folder.
+It is possible to run the aforementioned code by following the instructions in the README.md file in the [src/base](/src/base/) folder.
+Otherwise by changing the working direcotry `cd src` and running the following command:
+`chmod +x script.sh`
+`source script.sh`
+On the terminal output will appear the following menu:
+`Hello! Here you can set environment and run codes`
+`Please enter an integer to select an option:`
+`[1]. Activate environment`
+`[2]. Deactivate environment`
+`[3]. Run code dts4marl`
+`[4]. Run code marldts`
+`[5]. Run code qd_marl`
+`[6]. Run code qd_marl in debug mode`
+`[7]. Run test environment`
+`[8]. Exit`
+Press 1 to activate the python venv.
+Then run `./script.sh` again and select one of the possible experiment.
+If 3 or 4 is selected it will run the projects developed by Giovanni Iacca, Marco Crespi, Andrea Ferigo, Leonardo Lucio Custode.
+if 5 or 6 (for debug and serialized mode) is selected it will run the project developed in this repository which apply a Quality Diversity approach to a Multi Agent Reinforcement Learning task.
diff --git a/src/QD_MARL/__pycache__/agents.cpython-310.pyc b/src/QD_MARL/__pycache__/agents.cpython-310.pyc
diff --git a/src/QD_MARL/__pycache__/agents.cpython-311.pyc b/src/QD_MARL/__pycache__/agents.cpython-311.pyc
diff --git a/src/QD_MARL/__pycache__/differentObservations.cpython-310.pyc b/src/QD_MARL/__pycache__/differentObservations.cpython-310.pyc
diff --git a/src/QD_MARL/__pycache__/differentObservations.cpython-311.pyc b/src/QD_MARL/__pycache__/differentObservations.cpython-311.pyc
diff --git a/src/QD_MARL/__pycache__/evaluations.cpython-311.pyc b/src/QD_MARL/__pycache__/evaluations.cpython-311.pyc
diff --git a/src/QD_MARL/__pycache__/get_interpretability.cpython-311.pyc b/src/QD_MARL/__pycache__/get_interpretability.cpython-311.pyc
diff --git a/src/QD_MARL/__pycache__/processing_element.cpython-310.pyc b/src/QD_MARL/__pycache__/processing_element.cpython-310.pyc
diff --git a/src/QD_MARL/__pycache__/team_evaluation.cpython-311.pyc b/src/QD_MARL/__pycache__/team_evaluation.cpython-311.pyc
diff --git a/src/QD_MARL/__pycache__/test_environments.cpython-310.pyc b/src/QD_MARL/__pycache__/test_environments.cpython-310.pyc
diff --git a/src/QD_MARL/__pycache__/test_environments.cpython-311.pyc b/src/QD_MARL/__pycache__/test_environments.cpython-311.pyc
diff --git a/src/QD_MARL/__pycache__/utils.cpython-311.pyc b/src/QD_MARL/__pycache__/utils.cpython-311.pyc
diff --git a/src/QD_MARL/agents.py b/src/QD_MARL/agents.py
@@ -1,3 +1,15 @@
+import os
+import sys
+from utils import print_debugging
+sys.path.append(".")
+import random
+import time
+from copy import deepcopy
+from math import sqrt
+import pettingzoo
+
+import numpy as np
+
 class Agent:
     def __init__(self, name, squad, set_, tree, manual_policy, to_optimize):
         self._name = name
@@ -32,29 +44,50 @@ def get_output(self, observation):
     def set_reward(self, reward):
         self._tree.set_reward(reward)
         self._score[-1] = reward
+
+    def set_action(self, action):
+        if self._manual_policy is not None: 
+            self._manual_policy.set_action(action)
 
     def get_score_statistics(self, params):
-        score_values = [score_dict[key] for score_dict in self._score for key in score_dict]
-        return getattr(np, f"{params['type']}")(a=score_values, **params['params'])#Can't compare dicts with >
+
+        scores = np.array(self._score)
+        avg = np.mean(scores)
+        return avg
+        #return getattr(np, f"{params['type']}")(a=self._score, **params['params'])#Can't compare dicts with >
 
     def new_episode(self):
         self._score.append(0)
 
     def has_policy(self):
         return not self._manual_policy is None
+
+    def observe(self, observation):
+        pass
 
     def __str__(self):
         return f"Name: {self._name}; Squad: {self._squad}; Set: {self._set}; Optimize: {str(self._to_optimize)}"
 
 class CoachAgent(Agent):
-    def __init__(self, name, squad, set_, tree, manual_policy, to_optimize):
-        super().__init__(name, squad, set_, tree, manual_policy, to_optimize)
+    def __init__(self, name, squad, tree, manual_policy = None):
+        super().__init__(name, squad, None, tree, manual_policy, None)
+        self._agents_features = {}
+        self._max_fit = 0
+        self._get_team = False
+        self._fitnesses = {}
+        self._score = []
 
     def get_output(self, observation):
         return super().get_output(observation)
 
-    def set_reward(self, reward):
-        return super().set_reward(reward)
+    def set_reward(self, agent_features):
+        if self._agents_features is not None:
+            if agent_features > 5:
+                self._tree.set_reward(1)
+                self._score.append(1)
+            else:
+                self._tree.set_reward(0)
+                self._score.append(0)
 
     def get_score_statistics(self, params):
         return super().get_score_statistics(params)
@@ -68,5 +101,5 @@ def has_policy(self):
     def __str__(self):
         return super().__str__()
 
-    def select_team(self):
+    def select_team(self):        
         pass