nickswalker
diff --git a/‎SixJoint/Arm.cpp
Lines changed: 4 additions & 7 deletions b/‎SixJoint/Arm.cpp
Lines changed: 4 additions & 7 deletions
diff --git a/‎SixJoint/Gaussian.h
Lines changed: 3 additions & 2 deletions b/‎SixJoint/Gaussian.h
Lines changed: 3 additions & 2 deletions
diff --git a/‎SixJoint/Learning.cpp
Lines changed: 33 additions & 10 deletions b/‎SixJoint/Learning.cpp
Lines changed: 33 additions & 10 deletions
diff --git a/‎SixJoint/Learning.h
Lines changed: 1 addition & 1 deletion b/‎SixJoint/Learning.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎SixJoint/SixJoint.ino
Lines changed: 13 additions & 23 deletions b/‎SixJoint/SixJoint.ino
Lines changed: 13 additions & 23 deletions
@@ -23,8 +23,7 @@ void resetArm() {
     for (uint i = 0; i < NUM_JOINTS; i++) {
       middlePosition.jointAngles[i] = ((uint16_t)jointRangeMin[i] + (uint16_t)jointRangeMax[i]) / 2;
     }
-    // Could have to move quite a bit to reset.
-    delay(700);
+    moveSmoothlyTo(middlePosition);
     currentState = previousState = middlePosition;
 
 }
@@ -48,11 +47,9 @@ void moveSmoothlyTo(ArmState& targetState) {
 }
 
 void resetArmToRandomPosition() {
-    ArmAction randomPosition;
-    chooseRandomAction(randomPosition);
-    apply(randomPosition);
-    // Could have to move quite a bit to reset.
-    delay(700);
+    ArmState randomPosition;
+    chooseRandomState(randomPosition);
+    moveSmoothlyTo(randomPosition);
     previousState = currentState;
 
 }
 
@@ -1,9 +1,10 @@
 #ifndef GAUSSIAN_H
 #define GAUSSIAN_H
+#include "Debug.h"
 
+const uint16_t intMax = ~0;
 float randomf(const float min, const float max) {
-  uint32_t intMax = ~0u;
-  float normalized = random(intMax) / float(intMax);
+  float normalized = float(random(intMax)) / float(intMax);
   return min + normalized * (max - min);
 }
 
 
@@ -10,9 +10,10 @@
 #include "Debug.h"
 #include "Arm.h"
 #include "Gaussian.h"
-
+#include "Output.h"
 
 #define NUM_PERTURBATIONS NUM_POLICY_FEATURES
+#define BASE_SPEED 5.0f
 
 extern ArmState startState;
 extern ArmState currentState;
@@ -25,14 +26,14 @@ extern ArmState targetState;
 #define P_MV(parameters, index) parameters[4 * index + 3u]
 
 float theta[NUM_POLICY_FEATURES] = {0};
+float bestWeights[NUM_POLICY_FEATURES] = {0.05, -0.02, 0.95, 4.21, 0.06, 0.04, 1.13, 4.13, 0.15, 0.15, 1.18, 4.05, -0.06, 0.10, 1.27, 4.03, -0.15, 0.18, 1.23, 4.05, 0.03, -0.05, 1.01, 3.98};
 float actingTheta[NUM_POLICY_FEATURES] = {0};
 float perturbations[NUM_PERTURBATIONS][NUM_POLICY_FEATURES] = {0};
 
 extern uint8_t jointRangeMin[];
 extern uint8_t jointRnageMax[];
 
 float alpha = DEFAULT_ALPHA;
-float rl_gamma = 0.9999999;
 
 
 
@@ -42,24 +43,34 @@ void logPolicyParameters() {
 }
 
 float evaluatePolicy() {
+  chirpN(2, 20);
   moveSmoothlyTo(startState);
+  chirpN(1, 20);
   ArmAction deltaToGoal;
   actionBetweenStates(currentState, targetState, deltaToGoal);
 
   float equations[NUM_JOINTS][4];
    uint32_t maxIterations = 0;
    for (uint16_t j = 0; j < NUM_JOINTS; j++) {
 
-      float a = exp(P_A(actingTheta, j));
-      float b = exp(P_B(actingTheta, j));
-      float c = exp(P_C(actingTheta, j));
+      float a = P_A(actingTheta, j);
+      float b = P_B(actingTheta, j);
+      float c = P_C(actingTheta, j);
+
 
       const float target = deltaToGoal.jointDeltas[j];
-      const float sum = a + b + c;
+      float sum = a + b + c;
+      if (sum == 0) {
+        a = 0;
+        b = 0;
+        c = 1;
+        sum = 1;
+      }
       const float alpha = (a / sum) * target;
       const float beta = (b / sum) * target;
       const float gamma = (c / sum) * target;
 
+
       float maximizingT;
       const float curveMaxVelocity = maximizeQuadratic(3.0 * alpha, 2.0 * beta, gamma, maximizingT);
       const float percentMax =  fabs(curveMaxVelocity) / 10.0;
@@ -71,8 +82,11 @@ float evaluatePolicy() {
       equations[j][1] = beta;
       equations[j][2] = gamma;
 
-      const float velocityFactor = exp(P_MV(actingTheta, j)) + 1;
-      const float iterations = 5.0 * percentMax * velocityFactor;
+      // What percentage of the maximum speed should we go?
+      const float velocityFactor = 1.0 / (exp(-P_MV(actingTheta, j)) + 1.0);
+      // If the policy has less than max speed, iterations should be lower.
+      // If the velocity factor is anything less than 1, the number of iterations should be higher.
+      const float iterations = BASE_SPEED * percentMax * (1.0 / velocityFactor);
       equations[j][3] = ceil(iterations);
       maxIterations = max(maxIterations, (uint32_t)(iterations));
 
@@ -87,10 +101,11 @@ float evaluatePolicy() {
     for (uint32_t i = 0; i <= maxIterations; i++) {
       for (uint8_t j = 0; j < NUM_JOINTS; j++) {
         float* e = equations[j];
+        const float t = (float) i/ e[3];
         if ((float)i <= e[3]) {
 
           const float firstSample = currentState.jointAngles[j];
-          const float nextSample = cubic(e[0],e[1],e[2], startState.jointAngles[j], (float) i/ e[3]);
+          const float nextSample = cubic(e[0],e[1],e[2], startState.jointAngles[j], t);
           const int8_t delta = max(min(nextSample - firstSample, 126), -126);
           movement.jointDeltas[j] = delta;
         } else {
@@ -102,6 +117,7 @@ float evaluatePolicy() {
 
       delay(30);
   }
+  //D_LOG_V("END",currentState.jointAngles, 6);
   return -1.0 * getPowerUsage();
 }
 
@@ -182,13 +198,20 @@ void randomlyInitializeWeights() {
 
 void initializeLinearWeights() {
   for (uint8_t j = 0; j < NUM_POLICY_FEATURES; j++) {
-    if (j % 3 == 0) {
+    // Make gamma 1
+    if ((j + 2) % 4 == 0) {
       theta[j] = 1.0;
+    } else if ((j + 1) % 4 == 0) {
+      // Make delta 4, effectively maxing out the velocity factor.
+      theta[j] = 4.0;
     } else {
       theta[j] = 0.0;
     }
   }
 }
 
 void initializeBestWeights() {
+  for (uint8_t j = 0; j < NUM_POLICY_FEATURES; j++) {
+    theta[j] = bestWeights[j];
+  }
 }
@@ -5,7 +5,7 @@
 
 #define NUM_POLICY_FEATURES NUM_JOINTS * 4
 #define PERTURBATION_STEP 0.1
-#define DEFAULT_ALPHA 0.1
+#define DEFAULT_ALPHA 0.05
 
 typedef struct ArmState {
     uint8_t jointAngles[NUM_JOINTS];
 
@@ -7,9 +7,6 @@
 #include "Strings.h"
 #include "Output.h"
 
-#define EVALUATION_MODE 1
-#define EVALUATION_SWITCH_POINT 100
-
 extern float alpha;
 
 extern const char spaceString[];
@@ -51,6 +48,8 @@ void setup() {
     moveSmoothlyTo(startState);
     randomlyInitializeWeights();
     initializeLinearWeights();
+    //initializeBestWeights();
+    logPolicyParameters();
 }
 
 void logEpisodeInformation(float totalReturn) {
@@ -83,6 +82,11 @@ bool checkForResetSignal(){
 
 
 void loop() { 
+    /*while(true) {
+      const float result = evaluatePolicy();
+      Serial.println(result);
+    }*/
+  
     if (checkForResetSignal()) {
         chirpN(3,5);
         resetArmToRandomPosition();
@@ -91,26 +95,12 @@ void loop() {
         markTrialStart();
     }
 
-    if (EVALUATION_MODE && currentEpisode > EVALUATION_SWITCH_POINT) {
-        chirp();
-        alpha = 0.0;
-        float totalReturn = evaluatePolicy();
-        logEpisodeInformation(totalReturn);
-        markEpisodeEnd();
-        currentEpisode += 1;
-         
-    } else {
-      chirpN(2,10);
-      iterate();
-      float totalReturn = evaluatePolicy();
-      logEpisodeInformation(totalReturn);
-      markEpisodeEnd();
-      currentEpisode += 1;
-    }
-
-    
-
-    
+    chirpN(5,20);
+    iterate();
+    float totalReturn = evaluatePolicy();
+    logEpisodeInformation(totalReturn);
+    markEpisodeEnd();
+    currentEpisode += 1;
 
 }
Original file line number	Diff line number	Diff line change
`@@ -23,8 +23,7 @@ void resetArm() {`
`23`	`23`	`for (uint i = 0; i < NUM_JOINTS; i++) {`
`24`	`24`	`middlePosition.jointAngles[i] = ((uint16_t)jointRangeMin[i] + (uint16_t)jointRangeMax[i]) / 2;`
`25`	`25`	`}`
`26`		`- // Could have to move quite a bit to reset.`
`27`		`- delay(700);`
	`26`	`+ moveSmoothlyTo(middlePosition);`
`28`	`27`	`currentState = previousState = middlePosition;`
`29`	`28`
`30`	`29`	`}`
`@@ -48,11 +47,9 @@ void moveSmoothlyTo(ArmState& targetState) {`
`48`	`47`	`}`
`49`	`48`
`50`	`49`	`void resetArmToRandomPosition() {`
`51`		`- ArmAction randomPosition;`
`52`		`- chooseRandomAction(randomPosition);`
`53`		`- apply(randomPosition);`
`54`		`- // Could have to move quite a bit to reset.`
`55`		`- delay(700);`
	`50`	`+ ArmState randomPosition;`
	`51`	`+ chooseRandomState(randomPosition);`
	`52`	`+ moveSmoothlyTo(randomPosition);`
`56`	`53`	`previousState = currentState;`
`57`	`54`
`58`	`55`	`}`