11
11
#include " Arm.h"
12
12
13
13
14
- #define NUM_PERTURBATIONS 2 * NUM_POLICY_FEATURES
14
+ #define NUM_PERTURBATIONS NUM_POLICY_FEATURES
15
15
16
16
extern ArmState startState;
17
17
extern ArmState currentState;
@@ -23,25 +23,31 @@ extern ArmState targetState;
23
23
#define P_C (parameters, index ) parameters[4 * index + 2u ]
24
24
#define P_MV (parameters, index ) parameters[4 * index + 3u ]
25
25
26
- float theta[NUM_POLICY_FEATURES] = {.5 };
26
+ float theta[NUM_POLICY_FEATURES] = {.5 , .5 , .5 , .5 ,
27
+ .5 , .5 , .5 , .5 ,
28
+ .5 , .5 , .5 , .5 ,
29
+ .5 , .5 , .5 , .5 ,
30
+ .5 , .5 , .5 , .5 ,
31
+ .5 , .5 , .5 , .5 };
27
32
float actingTheta[NUM_POLICY_FEATURES] = {0 };
28
33
float perturbations[NUM_PERTURBATIONS][NUM_POLICY_FEATURES] = {0 };
29
34
30
35
extern uint8_t jointRangeMin[];
31
36
extern uint8_t jointRnageMax[];
32
37
33
- float alpha = 0.001 ;
38
+ float alpha = DEFAULT_ALPHA ;
34
39
float rl_gamma = 0.9999999 ;
35
- float I;
40
+
41
+
36
42
37
43
void logPolicyParameters () {
38
44
logVector (theta, NUM_POLICY_FEATURES);
39
45
40
46
}
41
47
42
48
float evaluatePolicy () {
43
- ArmAction deltaToGoal;
44
49
moveSmoothlyTo (startState);
50
+ ArmAction deltaToGoal;
45
51
actionBetweenStates (currentState, targetState, deltaToGoal);
46
52
float equations[NUM_JOINTS][4 ];
47
53
uint32_t maxIterations = 0 ;
@@ -66,14 +72,14 @@ float evaluatePolicy() {
66
72
equations[j][2 ] = gamma;
67
73
68
74
const float velocityFactor = exp (P_MV (actingTheta, j)) + 1 ;
69
- const float iterations = 10 .0 * percentMax * velocityFactor;
75
+ const float iterations = 5 .0 * percentMax * velocityFactor;
70
76
equations[j][3 ] = ceil (iterations);
71
77
maxIterations = max (maxIterations, (uint32_t )(iterations));
72
78
73
79
// D_LOG_V("EQ", equations[j], 4);
74
80
75
81
}
76
- maxIterations = min (maxIterations, 2000 );
82
+ maxIterations = min (maxIterations, 2000u );
77
83
// D_LOG("iterations", maxIterations);
78
84
resetPowerMeasurement ();
79
85
@@ -106,11 +112,11 @@ void iterate() {
106
112
add (perturbations[i], theta, actingTheta, NUM_POLICY_FEATURES);
107
113
const float evaluation = evaluatePolicy ();
108
114
evaluations[i] = evaluation;
109
- D_LOG (" eval" , evaluation);
110
115
}
111
- float numUp[NUM_POLICY_FEATURES] = {0 };
112
- float numDown[NUM_POLICY_FEATURES] = {0 };
113
- float numNone[NUM_POLICY_FEATURES] = {0 };
116
+ D_LOG_V (" evals" , evaluations, NUM_PERTURBATIONS);
117
+ uint8_t numUp[NUM_POLICY_FEATURES] = {0 };
118
+ uint8_t numDown[NUM_POLICY_FEATURES] = {0 };
119
+ uint8_t numNone[NUM_POLICY_FEATURES] = {0 };
114
120
float averageUp[NUM_POLICY_FEATURES] = {0 };
115
121
float averageDown[NUM_POLICY_FEATURES] = {0 };
116
122
float averageNone[NUM_POLICY_FEATURES] = {0 };
@@ -121,28 +127,34 @@ void iterate() {
121
127
numUp[j] += 1 ;
122
128
averageUp[j] += evaluations[i];
123
129
} else if (direction < 0.0 ) {
124
- numDown[j] += 1 ;
125
- averageDown[j] += evaluations[i];
130
+ numDown[j] += 1 ;
131
+ averageDown[j] += evaluations[i];
126
132
} else {
127
- numNone[j] += 1 ;
128
- averageNone[j] += evaluations[i];
133
+ numNone[j] += 1 ;
134
+ averageNone[j] += evaluations[i];
129
135
}
130
136
}
131
137
}
132
138
133
- for (uint8_t i = 0 ; i < NUM_PERTURBATIONS; i++) {
134
- for (uint8_t j = 0 ; j < NUM_POLICY_FEATURES; j++) {
135
- if (numUp[j] > 0 ) {
136
- averageUp[j] /= numUp[j];
137
- }
138
- if (numDown[j] > 0 ) {
139
- averageDown[j] /= numDown[j];
140
- }
141
- if (numNone[j] > 0 ) {
142
- averageNone[j] /= numNone[j];
143
- }
139
+ for (uint8_t j = 0 ; j < NUM_POLICY_FEATURES; j++) {
140
+ if (numUp[j] > 0 ) {
141
+ averageUp[j] /= (float )numUp[j];
142
+ }
143
+ if (numDown[j] > 0 ) {
144
+ averageDown[j] /= (float )numDown[j];
145
+ }
146
+ if (numNone[j] > 0 ) {
147
+ averageNone[j] /= (float )numNone[j];
144
148
}
145
149
}
150
+
151
+ D_LOG_V (" nup" , numUp, NUM_POLICY_FEATURES);
152
+ D_LOG_V (" ndown" , numDown, NUM_POLICY_FEATURES);
153
+ D_LOG_V (" nnone" , numNone, NUM_POLICY_FEATURES);
154
+
155
+ D_LOG_V (" aup" , averageUp, NUM_POLICY_FEATURES);
156
+ D_LOG_V (" adown" , averageDown, NUM_POLICY_FEATURES);
157
+ D_LOG_V (" anone" , averageNone, NUM_POLICY_FEATURES);
146
158
147
159
float delta[NUM_POLICY_FEATURES] = {0 };
148
160
for (uint8_t j = 0 ; j < NUM_POLICY_FEATURES; j++) {
@@ -152,10 +164,18 @@ void iterate() {
152
164
delta[j] = averageUp[j] - averageDown[j];
153
165
}
154
166
}
167
+ D_LOG_V (" delta" , delta, NUM_POLICY_FEATURES);
155
168
norm (delta, NUM_POLICY_FEATURES);
156
- multiply (0.10 , delta, NUM_POLICY_FEATURES);
169
+ D_LOG_V (" norm" , delta, NUM_POLICY_FEATURES);
170
+ multiply (alpha, delta, NUM_POLICY_FEATURES);
171
+ D_LOG_V (" step" , delta, NUM_POLICY_FEATURES);
172
+
173
+ D_LOG_V (" theta" , theta, NUM_POLICY_FEATURES);
157
174
add (theta, delta, NUM_POLICY_FEATURES);
175
+ D_LOG_V (" new theta" , theta, NUM_POLICY_FEATURES);
176
+
158
177
copy (theta, actingTheta, NUM_POLICY_FEATURES);
178
+ D_LOG_V (" acting theta" , actingTheta, NUM_POLICY_FEATURES);
159
179
160
180
}
161
181
0 commit comments