Unity-Technologies · vincentpierre · Mar 4, 2019 · Mar 1, 2019 · Mar 4, 2019 · Mar 4, 2019
diff --git a/Assets/ECSEnvironment.py b/Assets/ECSEnvironment.py
@@ -0,0 +1,183 @@
+import mmap
+import struct
+import numpy as np
+
+from .brain import BrainInfo, BrainParameters
+
+
+class ECSEnvironment(object):
+    VEC_SIZE = 8
+    ACT_SIZE = 3
+
+    def __init__(self):
+        self.comm = UnityCommunication()
+        self.step_count = 0
+        print(" READY TO COMMUNICATE")
+
+    def reset(self, train_mode=True, config=None, lesson=None):
+        u_ready = False
+        while not u_ready:
+            u_ready = self.comm.unity_ready()
+        s = self.comm.read_sensor()
+        return self.make_brain_info(s)
+
+    def step(self, vector_action=None, memory=None, text_action=None, value=None):
+        # print("step")
+        self.comm.write_actuator(vector_action["ECSBrain"])
+        self.comm.set_ready()
+
+        u_ready = False
+        while not u_ready:
+            u_ready = self.comm.unity_ready()
+        s = self.comm.read_sensor()
+        return self.make_brain_info(s)
+
+    def close(self):
+        self.comm.close()
+
+    def make_brain_info(self, sensor):
+        # This assumes the order is consistent
+        self.step_count+=1
+        done = False
+        if self.step_count % 50 == 0:
+            done = True
+        return {"ECSBrain" : BrainInfo([], sensor, [" "] * sensor.shape[0],
+                         reward=sensor[:,0],
+                         agents=list(range(sensor.shape[0])),
+                         local_done=[done] * sensor.shape[0],
+                         max_reached=[done] * sensor.shape[0],
+                         vector_action=sensor,
+                         text_action = [" "] * sensor.shape[0])}
+
+
+    @property
+    def curriculum(self):
+        return None
+
+    @property
+    def logfile_path(self):
+        return None
+
+    @property
+    def brains(self):
+        return {"ECSBrain": BrainParameters("ECSBrain", self.VEC_SIZE, 1,
+                 [], [self.ACT_SIZE],
+                 [" "]*self.ACT_SIZE, 1)} # 1 for continuopus
+
+    @property
+    def global_done(self):
+        return False
+
+    @property
+    def academy_name(self):
+        return "ECSAcademy"
+
+    @property
+    def number_brains(self):
+        return 1
+
+    @property
+    def number_external_brains(self):
+        return 1
+
+    @property
+    def brain_names(self):
+        return ["ECSBrain"]
+
+    @property
+    def external_brain_names(self):
+        return ["ECSBrain"]
+
+
+
+
+
+class UnityCommunication:
+    FILE_CAPACITY = 200000
+    NUMBER_AGENTS_POSITION = 0
+    SENSOR_SIZE_POSITION = 4
+    ACTUATOR_SIZE_POSITION = 8
+    UNITY_READY_POSITION = 12
+    SENSOR_DATA_POSITION = 13
+
+    PYTHON_READY_POSITION = 100000
+    ACTUATOR_DATA_POSITION = 100001
+
+    # FILE_NAME = "../../../ml-agents-ecs/Assets/shared_communication_file.txt"
+    FILE_NAME = "shared_communication_file.txt" # This is relative to where the script is called
+
+    def __init__(self):
+        with open(self.FILE_NAME, "r+b") as f:
+            # memory-map the file, size 0 means whole file
+            self.accessor = mmap.mmap(f.fileno(), 0)
+
+    def get_int(self, position : int) -> int:
+        return struct.unpack("i", self.accessor[position:position + 4])[0]
+
+    def read_sensor(self) -> np.ndarray:
+        sensor_size = self.get_int(self.SENSOR_SIZE_POSITION)
+        number_agents = self.get_int(self.NUMBER_AGENTS_POSITION)
+
+        sensor = np.frombuffer(
+            buffer=self.accessor[self.SENSOR_DATA_POSITION: self.SENSOR_DATA_POSITION + 4*sensor_size*number_agents],
+            dtype=np.float32,
+            count=sensor_size * number_agents,
+            offset=0
+        )
+        return np.reshape(sensor, (number_agents, sensor_size))
+
+    def get_parameters(self) -> (int, int, int):
+        return self.get_int(self.NUMBER_AGENTS_POSITION), \
+               self.get_int(self.SENSOR_SIZE_POSITION), \
+               self.get_int(self.ACTUATOR_SIZE_POSITION)
+
+    def write_actuator(self, actuator: np.ndarray):
+        actuator_size = self.get_int(self.ACTUATOR_SIZE_POSITION)
+        number_agents = self.get_int(self.NUMBER_AGENTS_POSITION)
+
+        # TODO : Support more types ?
+        if actuator.dtype != np.float32:
+            actuator = actuator.astype(np.float32)
+
+        try:
+            assert(actuator.shape == (number_agents, actuator_size))
+        except:
+            print("_________")
+            print(actuator.shape)
+            print((number_agents, actuator_size))
+
+        self.accessor[self.ACTUATOR_DATA_POSITION: self.ACTUATOR_DATA_POSITION + 4*actuator_size*number_agents] = \
+            actuator.tobytes()
+
+    def set_ready(self):
+        self.accessor[self.UNITY_READY_POSITION: self.UNITY_READY_POSITION+1] = bytearray(struct.pack("b", False))
+        self.accessor[self.PYTHON_READY_POSITION: self.PYTHON_READY_POSITION+1] = bytearray(struct.pack("b", True))
+
+    def unity_ready(self) -> bool:
+        return self.accessor[self.UNITY_READY_POSITION]
+
+    def close(self):
+        self.accessor.close()
+
+
+# if __name__ == "__main__":
+#     comm = UnityCommunication()
+#
+#     steps = 0
+#     while True:
+#
+#         u_ready = False
+#         while not u_ready:
+#             u_ready = comm.unity_ready()
+#         steps += 1
+#         s = comm.read_sensor()
+#         nag, nse, nac = comm.get_parameters()
+#         # print(s.shape)
+#         # time.sleep(0.1)
+#         comm.write_actuator(
+#             np.random.normal(size=(nag, nac))
+#         )
+#         comm.set_ready()
+#
+
+
diff --git a/Assets/ECS_MLAgents_v0/Core/AgentSystem.cs b/Assets/ECS_MLAgents_v0/Core/AgentSystem.cs
@@ -1,4 +1,5 @@
-using System.Linq;
+using System.Diagnostics.CodeAnalysis;
+using System.Linq;
 using Unity.Collections;
 using Unity.Collections.LowLevel.Unsafe;
 using Unity.Entities;
@@ -44,7 +45,7 @@ public abstract class AgentSystem<TS, TA> : JobComponentSystem, IAgentSystem
         private int _sensorMemorySize = INITIAL_MEMORY_SIZE;
         private int _actuatorMemorySize = INITIAL_MEMORY_SIZE;
 
-        public int DecisionInterval { get; set; }
+        public IDecisionRequester DecisionRequester { get; set; }
         private int _phase;
 
         public IAgentDecision Decision { get; set; }
@@ -63,6 +64,11 @@ public abstract class AgentSystem<TS, TA> : JobComponentSystem, IAgentSystem
 
         protected override void OnCreateManager()
         {
+            if (DecisionRequester == null)
+            {
+                DecisionRequester = new FixedCountRequester();
+            }
+
             _logger = new Logger(GetType().Name);
             _logger.Log("OnCreateManager");
             SetNewComponentGroup();
@@ -108,12 +114,13 @@ protected override JobHandle OnUpdate(JobHandle inputDeps)
         {
             _logger.Log("OnUpdate");
 
-            if (_phase > 0)
+            DecisionRequester.Update();
+            if (!DecisionRequester.Ready)
             {
-                _phase--;
                 return inputDeps;
             }
-            _phase = DecisionInterval;
+            DecisionRequester.Reset();
+
 
             var nAgents = _componentGroup.CalculateLength();
 

diff --git a/Assets/ECS_MLAgents_v0/Core/ExternalDecision.cs b/Assets/ECS_MLAgents_v0/Core/ExternalDecision.cs
@@ -0,0 +1,106 @@
+using System;
+using Unity.Collections;
+using Unity.Jobs;
+using System.IO.MemoryMappedFiles;
+using System.IO;
+using Unity.Collections.LowLevel.Unsafe;
+using UnityEngine;
+using UnityEngine.Profiling;
+
+
+namespace ECS_MLAgents_v0.Core{
+    public class ExternalDecision : IAgentDecision
+    {
+
+        // [ Unity Ready (1) , nAgents (4) , sensorSize (4) , actuatorSize (4) , Data
+
+        // TODO : This capacity needs to scale / communicate multiple times per step ?
+        private const int FILE_CAPACITY = 200000;
+        private const int NUMBER_AGENTS_POSITION = 0;
+        private const int SENSOR_SIZE_POSITION = 4;
+        private const int ACTUATOR_SIZE_POSITION = 8;
+        private const int UNITY_READY_POSITION = 12;
+        private const int SENSOR_DATA_POSITION = 13;
+
+        private const int PYTHON_READY_POSITION = 100000;
+        private const int ACTUATOR_DATA_POSITION = 100001;
+
+
+        private float[] actuatorData = new float[0];
+
+        // This is a temporary test file
+        // TODO : Replace with a file creation system
+        // TODO : Implement the communication in a separate class
+        // TODO : Have separate files for sensor and actuators
+        private string filenameWrite = "Assets/shared_communication_file.txt";
+
+        private MemoryMappedViewAccessor accessor;
+
+        public ExternalDecision()
+        {
+            var mmf = MemoryMappedFile.CreateFromFile(filenameWrite, FileMode.Open, "Test");
+            accessor = mmf.CreateViewAccessor(
+                0, FILE_CAPACITY, MemoryMappedFileAccess.ReadWrite);
+//            accessor.WriteArray(0, new bool[FILE_CAPACITY], 0, FILE_CAPACITY);
+            accessor.Write(PYTHON_READY_POSITION, false);
+            accessor.Write(UNITY_READY_POSITION, false);
+            Debug.Log("Is Ready to Communicate");
+        }
+
+        public JobHandle DecideBatch(
+            ref NativeArray<float> sensor, 
+            ref NativeArray<float> actuator, 
+            int sensorSize,
+            int actuatorSize, 
+            int nAgents, 
+            JobHandle handle)
+        {
+            Profiler.BeginSample("Communicating");
+            if (sensor.Length > 4 * 50000)
+            {
+                throw new Exception("TOO much data to send");
+            }
+
+            if (actuator.Length > 4 * 50000)
+            {
+                throw new Exception("TOO much data to send");
+            }
+
+            if (actuatorData.Length < actuator.Length)
+            {
+                actuatorData = new float[actuator.Length];
+            }
+
+
+            accessor.Write(NUMBER_AGENTS_POSITION, nAgents);
+            accessor.Write(SENSOR_SIZE_POSITION, sensorSize);
+            accessor.Write(ACTUATOR_SIZE_POSITION, actuatorSize);
+
+            accessor.WriteArray(SENSOR_DATA_POSITION, sensor.ToArray(), 0, sensor.Length);
+
+            accessor.Write(PYTHON_READY_POSITION, false);
+
+            accessor.Write(UNITY_READY_POSITION, true);
+
+
+            var readyToContinue = false;
+            int loopIter = 0;
+            while (!readyToContinue)
+            {
+                loopIter++;
+                readyToContinue = accessor.ReadBoolean(PYTHON_READY_POSITION);
+                readyToContinue = readyToContinue || loopIter > 20000000;
+                if (loopIter > 20000000)
+                {
+                    Debug.Log("Missed Communication");
+                }
+            }
+
+            accessor.ReadArray(ACTUATOR_DATA_POSITION, actuatorData, 0, actuator.Length);
+            actuator.CopyFrom(actuatorData);
+
+            Profiler.BeginSample("Communicating");
+            return handle;
+        }
+    }
+}
diff --git a/Assets/ECS_MLAgents_v0/Core/ExternalDecision.cs.meta b/Assets/ECS_MLAgents_v0/Core/ExternalDecision.cs.meta
diff --git a/Assets/ECS_MLAgents_v0/Core/IAgentSystem.cs b/Assets/ECS_MLAgents_v0/Core/IAgentSystem.cs
@@ -46,6 +46,6 @@ void SetFilter<T0, T1>(T0 filterA, T1 filterB)
         /// </summary>
         void ResetFilter();
 
-        int DecisionInterval { get; set; }
+        IDecisionRequester DecisionRequester { get; set; }
     }
 }