10
10
#include " Debug.h"
11
11
#include " Arm.h"
12
12
#include " Gaussian.h"
13
-
13
+ # include " Output.h "
14
14
15
15
#define NUM_PERTURBATIONS NUM_POLICY_FEATURES
16
+ #define BASE_SPEED 5 .0f
16
17
17
18
extern ArmState startState;
18
19
extern ArmState currentState;
@@ -25,14 +26,14 @@ extern ArmState targetState;
25
26
#define P_MV (parameters, index ) parameters[4 * index + 3u ]
26
27
27
28
float theta[NUM_POLICY_FEATURES] = {0 };
29
+ float bestWeights[NUM_POLICY_FEATURES] = {0.05 , -0.02 , 0.95 , 4.21 , 0.06 , 0.04 , 1.13 , 4.13 , 0.15 , 0.15 , 1.18 , 4.05 , -0.06 , 0.10 , 1.27 , 4.03 , -0.15 , 0.18 , 1.23 , 4.05 , 0.03 , -0.05 , 1.01 , 3.98 };
28
30
float actingTheta[NUM_POLICY_FEATURES] = {0 };
29
31
float perturbations[NUM_PERTURBATIONS][NUM_POLICY_FEATURES] = {0 };
30
32
31
33
extern uint8_t jointRangeMin[];
32
34
extern uint8_t jointRnageMax[];
33
35
34
36
float alpha = DEFAULT_ALPHA;
35
- float rl_gamma = 0.9999999 ;
36
37
37
38
38
39
@@ -42,24 +43,34 @@ void logPolicyParameters() {
42
43
}
43
44
44
45
float evaluatePolicy () {
46
+ chirpN (2 , 20 );
45
47
moveSmoothlyTo (startState);
48
+ chirpN (1 , 20 );
46
49
ArmAction deltaToGoal;
47
50
actionBetweenStates (currentState, targetState, deltaToGoal);
48
51
49
52
float equations[NUM_JOINTS][4 ];
50
53
uint32_t maxIterations = 0 ;
51
54
for (uint16_t j = 0 ; j < NUM_JOINTS; j++) {
52
55
53
- float a = exp (P_A (actingTheta, j));
54
- float b = exp (P_B (actingTheta, j));
55
- float c = exp (P_C (actingTheta, j));
56
+ float a = P_A (actingTheta, j);
57
+ float b = P_B (actingTheta, j);
58
+ float c = P_C (actingTheta, j);
59
+
56
60
57
61
const float target = deltaToGoal.jointDeltas [j];
58
- const float sum = a + b + c;
62
+ float sum = a + b + c;
63
+ if (sum == 0 ) {
64
+ a = 0 ;
65
+ b = 0 ;
66
+ c = 1 ;
67
+ sum = 1 ;
68
+ }
59
69
const float alpha = (a / sum) * target;
60
70
const float beta = (b / sum) * target;
61
71
const float gamma = (c / sum) * target;
62
72
73
+
63
74
float maximizingT;
64
75
const float curveMaxVelocity = maximizeQuadratic (3.0 * alpha, 2.0 * beta, gamma, maximizingT);
65
76
const float percentMax = fabs (curveMaxVelocity) / 10.0 ;
@@ -71,8 +82,11 @@ float evaluatePolicy() {
71
82
equations[j][1 ] = beta;
72
83
equations[j][2 ] = gamma;
73
84
74
- const float velocityFactor = exp (P_MV (actingTheta, j)) + 1 ;
75
- const float iterations = 5.0 * percentMax * velocityFactor;
85
+ // What percentage of the maximum speed should we go?
86
+ const float velocityFactor = 1.0 / (exp (-P_MV (actingTheta, j)) + 1.0 );
87
+ // If the policy has less than max speed, iterations should be lower.
88
+ // If the velocity factor is anything less than 1, the number of iterations should be higher.
89
+ const float iterations = BASE_SPEED * percentMax * (1.0 / velocityFactor);
76
90
equations[j][3 ] = ceil (iterations);
77
91
maxIterations = max (maxIterations, (uint32_t )(iterations));
78
92
@@ -87,10 +101,11 @@ float evaluatePolicy() {
87
101
for (uint32_t i = 0 ; i <= maxIterations; i++) {
88
102
for (uint8_t j = 0 ; j < NUM_JOINTS; j++) {
89
103
float * e = equations[j];
104
+ const float t = (float ) i/ e[3 ];
90
105
if ((float )i <= e[3 ]) {
91
106
92
107
const float firstSample = currentState.jointAngles [j];
93
- const float nextSample = cubic (e[0 ],e[1 ],e[2 ], startState.jointAngles [j], ( float ) i/ e[ 3 ] );
108
+ const float nextSample = cubic (e[0 ],e[1 ],e[2 ], startState.jointAngles [j], t );
94
109
const int8_t delta = max (min (nextSample - firstSample, 126 ), -126 );
95
110
movement.jointDeltas [j] = delta;
96
111
} else {
@@ -102,6 +117,7 @@ float evaluatePolicy() {
102
117
103
118
delay (30 );
104
119
}
120
+ // D_LOG_V("END",currentState.jointAngles, 6);
105
121
return -1.0 * getPowerUsage ();
106
122
}
107
123
@@ -182,13 +198,20 @@ void randomlyInitializeWeights() {
182
198
183
199
void initializeLinearWeights () {
184
200
for (uint8_t j = 0 ; j < NUM_POLICY_FEATURES; j++) {
185
- if (j % 3 == 0 ) {
201
+ // Make gamma 1
202
+ if ((j + 2 ) % 4 == 0 ) {
186
203
theta[j] = 1.0 ;
204
+ } else if ((j + 1 ) % 4 == 0 ) {
205
+ // Make delta 4, effectively maxing out the velocity factor.
206
+ theta[j] = 4.0 ;
187
207
} else {
188
208
theta[j] = 0.0 ;
189
209
}
190
210
}
191
211
}
192
212
193
213
void initializeBestWeights () {
214
+ for (uint8_t j = 0 ; j < NUM_POLICY_FEATURES; j++) {
215
+ theta[j] = bestWeights[j];
216
+ }
194
217
}
0 commit comments