Skip to content

Commit aee7319

Browse files
committed
Adjust plotting code for sixjoint use
1 parent 8e2eea7 commit aee7319

File tree

9 files changed

+275
-213
lines changed

9 files changed

+275
-213
lines changed

SixJoint/Arm.cpp

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,7 @@ void resetArm() {
2323
for (uint i = 0; i < NUM_JOINTS; i++) {
2424
middlePosition.jointAngles[i] = ((uint16_t)jointRangeMin[i] + (uint16_t)jointRangeMax[i]) / 2;
2525
}
26-
// Could have to move quite a bit to reset.
27-
delay(700);
26+
moveSmoothlyTo(middlePosition);
2827
currentState = previousState = middlePosition;
2928

3029
}
@@ -48,11 +47,9 @@ void moveSmoothlyTo(ArmState& targetState) {
4847
}
4948

5049
void resetArmToRandomPosition() {
51-
ArmAction randomPosition;
52-
chooseRandomAction(randomPosition);
53-
apply(randomPosition);
54-
// Could have to move quite a bit to reset.
55-
delay(700);
50+
ArmState randomPosition;
51+
chooseRandomState(randomPosition);
52+
moveSmoothlyTo(randomPosition);
5653
previousState = currentState;
5754

5855
}

SixJoint/Gaussian.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
#ifndef GAUSSIAN_H
22
#define GAUSSIAN_H
3+
#include "Debug.h"
34

5+
const uint16_t intMax = ~0;
46
float randomf(const float min, const float max) {
5-
uint32_t intMax = ~0u;
6-
float normalized = random(intMax) / float(intMax);
7+
float normalized = float(random(intMax)) / float(intMax);
78
return min + normalized * (max - min);
89
}
910

SixJoint/Learning.cpp

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@
1010
#include "Debug.h"
1111
#include "Arm.h"
1212
#include "Gaussian.h"
13-
13+
#include "Output.h"
1414

1515
#define NUM_PERTURBATIONS NUM_POLICY_FEATURES
16+
#define BASE_SPEED 5.0f
1617

1718
extern ArmState startState;
1819
extern ArmState currentState;
@@ -25,14 +26,14 @@ extern ArmState targetState;
2526
#define P_MV(parameters, index) parameters[4 * index + 3u]
2627

2728
float theta[NUM_POLICY_FEATURES] = {0};
29+
float bestWeights[NUM_POLICY_FEATURES] = {0.05, -0.02, 0.95, 4.21, 0.06, 0.04, 1.13, 4.13, 0.15, 0.15, 1.18, 4.05, -0.06, 0.10, 1.27, 4.03, -0.15, 0.18, 1.23, 4.05, 0.03, -0.05, 1.01, 3.98};
2830
float actingTheta[NUM_POLICY_FEATURES] = {0};
2931
float perturbations[NUM_PERTURBATIONS][NUM_POLICY_FEATURES] = {0};
3032

3133
extern uint8_t jointRangeMin[];
3234
extern uint8_t jointRnageMax[];
3335

3436
float alpha = DEFAULT_ALPHA;
35-
float rl_gamma = 0.9999999;
3637

3738

3839

@@ -42,24 +43,34 @@ void logPolicyParameters() {
4243
}
4344

4445
float evaluatePolicy() {
46+
chirpN(2, 20);
4547
moveSmoothlyTo(startState);
48+
chirpN(1, 20);
4649
ArmAction deltaToGoal;
4750
actionBetweenStates(currentState, targetState, deltaToGoal);
4851

4952
float equations[NUM_JOINTS][4];
5053
uint32_t maxIterations = 0;
5154
for (uint16_t j = 0; j < NUM_JOINTS; j++) {
5255

53-
float a = exp(P_A(actingTheta, j));
54-
float b = exp(P_B(actingTheta, j));
55-
float c = exp(P_C(actingTheta, j));
56+
float a = P_A(actingTheta, j);
57+
float b = P_B(actingTheta, j);
58+
float c = P_C(actingTheta, j);
59+
5660

5761
const float target = deltaToGoal.jointDeltas[j];
58-
const float sum = a + b + c;
62+
float sum = a + b + c;
63+
if (sum == 0) {
64+
a = 0;
65+
b = 0;
66+
c = 1;
67+
sum = 1;
68+
}
5969
const float alpha = (a / sum) * target;
6070
const float beta = (b / sum) * target;
6171
const float gamma = (c / sum) * target;
6272

73+
6374
float maximizingT;
6475
const float curveMaxVelocity = maximizeQuadratic(3.0 * alpha, 2.0 * beta, gamma, maximizingT);
6576
const float percentMax = fabs(curveMaxVelocity) / 10.0;
@@ -71,8 +82,11 @@ float evaluatePolicy() {
7182
equations[j][1] = beta;
7283
equations[j][2] = gamma;
7384

74-
const float velocityFactor = exp(P_MV(actingTheta, j)) + 1;
75-
const float iterations = 5.0 * percentMax * velocityFactor;
85+
// What percentage of the maximum speed should we go?
86+
const float velocityFactor = 1.0 / (exp(-P_MV(actingTheta, j)) + 1.0);
87+
// If the policy has less than max speed, iterations should be lower.
88+
// If the velocity factor is anything less than 1, the number of iterations should be higher.
89+
const float iterations = BASE_SPEED * percentMax * (1.0 / velocityFactor);
7690
equations[j][3] = ceil(iterations);
7791
maxIterations = max(maxIterations, (uint32_t)(iterations));
7892

@@ -87,10 +101,11 @@ float evaluatePolicy() {
87101
for (uint32_t i = 0; i <= maxIterations; i++) {
88102
for (uint8_t j = 0; j < NUM_JOINTS; j++) {
89103
float* e = equations[j];
104+
const float t = (float) i/ e[3];
90105
if ((float)i <= e[3]) {
91106

92107
const float firstSample = currentState.jointAngles[j];
93-
const float nextSample = cubic(e[0],e[1],e[2], startState.jointAngles[j], (float) i/ e[3]);
108+
const float nextSample = cubic(e[0],e[1],e[2], startState.jointAngles[j], t);
94109
const int8_t delta = max(min(nextSample - firstSample, 126), -126);
95110
movement.jointDeltas[j] = delta;
96111
} else {
@@ -102,6 +117,7 @@ float evaluatePolicy() {
102117

103118
delay(30);
104119
}
120+
//D_LOG_V("END",currentState.jointAngles, 6);
105121
return -1.0 * getPowerUsage();
106122
}
107123

@@ -182,13 +198,20 @@ void randomlyInitializeWeights() {
182198

183199
void initializeLinearWeights() {
184200
for (uint8_t j = 0; j < NUM_POLICY_FEATURES; j++) {
185-
if (j % 3 == 0) {
201+
// Make gamma 1
202+
if ((j + 2) % 4 == 0) {
186203
theta[j] = 1.0;
204+
} else if ((j + 1) % 4 == 0) {
205+
// Make delta 4, effectively maxing out the velocity factor.
206+
theta[j] = 4.0;
187207
} else {
188208
theta[j] = 0.0;
189209
}
190210
}
191211
}
192212

193213
void initializeBestWeights() {
214+
for (uint8_t j = 0; j < NUM_POLICY_FEATURES; j++) {
215+
theta[j] = bestWeights[j];
216+
}
194217
}

SixJoint/Learning.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
#define NUM_POLICY_FEATURES NUM_JOINTS * 4
77
#define PERTURBATION_STEP 0.1
8-
#define DEFAULT_ALPHA 0.1
8+
#define DEFAULT_ALPHA 0.05
99

1010
typedef struct ArmState {
1111
uint8_t jointAngles[NUM_JOINTS];

SixJoint/SixJoint.ino

Lines changed: 13 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,6 @@
77
#include "Strings.h"
88
#include "Output.h"
99

10-
#define EVALUATION_MODE 1
11-
#define EVALUATION_SWITCH_POINT 100
12-
1310
extern float alpha;
1411

1512
extern const char spaceString[];
@@ -51,6 +48,8 @@ void setup() {
5148
moveSmoothlyTo(startState);
5249
randomlyInitializeWeights();
5350
initializeLinearWeights();
51+
//initializeBestWeights();
52+
logPolicyParameters();
5453
}
5554

5655
void logEpisodeInformation(float totalReturn) {
@@ -83,6 +82,11 @@ bool checkForResetSignal(){
8382

8483

8584
void loop() {
85+
/*while(true) {
86+
const float result = evaluatePolicy();
87+
Serial.println(result);
88+
}*/
89+
8690
if (checkForResetSignal()) {
8791
chirpN(3,5);
8892
resetArmToRandomPosition();
@@ -91,26 +95,12 @@ void loop() {
9195
markTrialStart();
9296
}
9397

94-
if (EVALUATION_MODE && currentEpisode > EVALUATION_SWITCH_POINT) {
95-
chirp();
96-
alpha = 0.0;
97-
float totalReturn = evaluatePolicy();
98-
logEpisodeInformation(totalReturn);
99-
markEpisodeEnd();
100-
currentEpisode += 1;
101-
102-
} else {
103-
chirpN(2,10);
104-
iterate();
105-
float totalReturn = evaluatePolicy();
106-
logEpisodeInformation(totalReturn);
107-
markEpisodeEnd();
108-
currentEpisode += 1;
109-
}
110-
111-
112-
113-
98+
chirpN(5,20);
99+
iterate();
100+
float totalReturn = evaluatePolicy();
101+
logEpisodeInformation(totalReturn);
102+
markEpisodeEnd();
103+
currentEpisode += 1;
114104

115105
}
116106

0 commit comments

Comments
 (0)