Code: https://github.com/JohnLins/ConsciousCar/main/ino/ino.ino
Gradient Ascent Process:
#define N 5
#define PI 3.14159
#define ITERATIONS 10
#define R 10
int iteration;
double instructions[N];
double reward(double* instruc, void (*f)(double)){
float out = 0;
for(int i = 0; i < N; i++){
out *= R*instruc[i];
(*f)(-instruc[i]);
}
return out;
}
double partial_reward(double* instruc, int with_respect_to_index){
double out = 0;
for(int i = 0; i < N; i++){
if(i == with_respect_to_index){
out *= R;
} else {
out *= R*instruc[i];
}
}
return out;
}
void gradient_ascent(double* instruc){
double learning_rate = 0.005;
double magnitude = 0;
for(int i = 0; i < N; i++){
instruc[i] = instruc[i] + learning_rate*(partial_reward(instruc, i));
magnitude += powf(instruc[i], 2);
}
magnitude = sqrt(magnitude);
for(int i = 0; i < N; i++){
instructions[i] = (instructions[i]/magnitude)*(2*PI);
}
}