Skip to content

Commit

Permalink
elite average -> trajectory[0]
Browse files Browse the repository at this point in the history
  • Loading branch information
thowell committed Feb 9, 2024
1 parent 1ecabed commit 41505e1
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 72 deletions.
97 changes: 26 additions & 71 deletions mjpc/planners/cross_entropy/planner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,6 @@ void CrossEntropyPlanner::Allocate() {
trajectory[i].Allocate(kMaxTrajectoryHorizon);
candidate_policy[i].Allocate(model, *task, kMaxTrajectoryHorizon);
}

// elite average trajectory
elite_avg.Initialize(num_state, model->nu, task->num_residual,
task->num_trace, kMaxTrajectoryHorizon);
elite_avg.Allocate(kMaxTrajectoryHorizon);
}

// reset memory to zeros
Expand Down Expand Up @@ -143,7 +138,6 @@ void CrossEntropyPlanner::Reset(int horizon,
trajectory[i].Reset(kMaxTrajectoryHorizon);
candidate_policy[i].Reset(horizon);
}
elite_avg.Reset(kMaxTrajectoryHorizon);

for (const auto& d : data_) {
mju_zero(d->ctrl, model->nu);
Expand All @@ -161,11 +155,6 @@ void CrossEntropyPlanner::SetState(const State& state) {

// optimize nominal policy using random sampling
void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {
// check horizon
if (horizon != elite_avg.horizon) {
NominalTrajectory(horizon, pool);
}

// if num_trajectory_ has changed, use it in this new iteration.
// num_trajectory_ might change while this function runs. Keep it constant
// for the duration of this function.
Expand Down Expand Up @@ -202,12 +191,13 @@ void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {

// sort so that the first ncandidates elements are the best candidates, and
// the rest are in an unspecified order
std::partial_sort(
trajectory_order.begin(), trajectory_order.begin() + num_trajectory,
trajectory_order.begin() + num_trajectory,
[&trajectory = trajectory](int a, int b) {
return trajectory[a].total_return < trajectory[b].total_return;
});
std::partial_sort(trajectory_order.begin() + 1,
trajectory_order.begin() + 1 + num_trajectory,
trajectory_order.begin() + 1 + num_trajectory,
[&trajectory = trajectory](int a, int b) {
return trajectory[a].total_return <
trajectory[b].total_return;
});

// stop timer
rollouts_compute_time = GetDuration(rollouts_start);
Expand All @@ -220,66 +210,29 @@ void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {
int num_spline_points = resampled_policy.num_spline_points;
int num_parameters = resampled_policy.num_parameters;

// reset parameters scratch to zero
std::fill(parameters_scratch.begin(), parameters_scratch.end(), 0.0);

// reset elite average
elite_avg.Reset(horizon);
// averaged return over elites
double avg_return = 0.0;

// set elite average trajectory times
for (int tt = 0; tt <= horizon; tt++) {
elite_avg.times[tt] = time + tt * model->opt.timestep;
}

// best elite
int idx = trajectory_order[0];

// add parameters
mju_copy(parameters_scratch.data(), candidate_policy[idx].parameters.data(),
num_parameters);
// reset parameters scratch
std::fill(parameters_scratch.begin(), parameters_scratch.end(), 0.0);

// copy first elite trajectory
mju_copy(elite_avg.actions.data(), trajectory[idx].actions.data(),
model->nu * (horizon - 1));
mju_copy(elite_avg.trace.data(), trajectory[idx].trace.data(),
trajectory[idx].dim_trace * horizon);
mju_copy(elite_avg.residual.data(), trajectory[idx].residual.data(),
elite_avg.dim_residual * horizon);
mju_copy(elite_avg.costs.data(), trajectory[idx].costs.data(), horizon);
elite_avg.total_return = trajectory[idx].total_return;

// loop over remaining elites to compute average
for (int i = 1; i < n_elite; i++) {
// loop over elites to compute average
for (int i = 0; i < n_elite; i++) {
// ordered trajectory index
int idx = trajectory_order[i];

// add parameters
mju_addTo(parameters_scratch.data(),
candidate_policy[idx].parameters.data(), num_parameters);

// add elite trajectory
mju_addTo(elite_avg.actions.data(), trajectory[idx].actions.data(),
model->nu * (horizon - 1));
mju_addTo(elite_avg.trace.data(), trajectory[idx].trace.data(),
trajectory[idx].dim_trace * horizon);
mju_addTo(elite_avg.residual.data(), trajectory[idx].residual.data(),
elite_avg.dim_residual * horizon);
mju_addTo(elite_avg.costs.data(), trajectory[idx].costs.data(), horizon);
elite_avg.total_return += trajectory[idx].total_return;
// add total return
avg_return += trajectory[idx].total_return;
}

// normalize
mju_scl(parameters_scratch.data(), parameters_scratch.data(), 1.0 / n_elite,
num_parameters);
mju_scl(elite_avg.actions.data(), elite_avg.actions.data(), 1.0 / n_elite,
model->nu * (horizon - 1));
mju_scl(elite_avg.trace.data(), elite_avg.trace.data(), 1.0 / n_elite,
elite_avg.dim_trace * horizon);
mju_scl(elite_avg.residual.data(), elite_avg.residual.data(), 1.0 / n_elite,
elite_avg.dim_residual * horizon);
mju_scl(elite_avg.costs.data(), elite_avg.costs.data(), 1.0 / n_elite,
horizon);
elite_avg.total_return /= n_elite;
avg_return /= n_elite;

// loop over elites to compute variance
std::fill(variance.begin(), variance.end(), 0.0); // reset variance to zero
Expand All @@ -304,9 +257,8 @@ void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {
}

// improvement: compare nominal to elite average
improvement = mju_max(
elite_avg.total_return - trajectory[trajectory_order[0]].total_return,
0.0);
improvement =
mju_max(avg_return - trajectory[trajectory_order[0]].total_return, 0.0);

// stop timer
policy_update_compute_time = GetDuration(policy_update_start);
Expand All @@ -321,8 +273,9 @@ void CrossEntropyPlanner::NominalTrajectory(int horizon, ThreadPool& pool) {
};

// rollout nominal policy
elite_avg.Rollout(nominal_policy, task, model, data_[0].get(), state.data(),
time, mocap.data(), userdata.data(), horizon);
trajectory[0].Rollout(nominal_policy, task, model, data_[0].get(),
state.data(), time, mocap.data(), userdata.data(),
horizon);
}

// set action from policy
Expand Down Expand Up @@ -428,7 +381,7 @@ void CrossEntropyPlanner::Rollouts(int num_trajectory, int horizon,
s.resampled_policy.representation;

// sample noise
s.AddNoiseToPolicy(i, std_min);
if (i > 0) s.AddNoiseToPolicy(i, std_min);
}

// ----- rollout sample policy ----- //
Expand All @@ -450,8 +403,10 @@ void CrossEntropyPlanner::Rollouts(int num_trajectory, int horizon,
pool.ResetCount();
}

// returns the nominal trajectory (this is the purple trace)
const Trajectory* CrossEntropyPlanner::BestTrajectory() { return &elite_avg; }
// returns the **nominal** trajectory (this is the purple trace)
const Trajectory* CrossEntropyPlanner::BestTrajectory() {
return &trajectory[0];
}

// visualize planner-specific traces
void CrossEntropyPlanner::Traces(mjvScene* scn) {
Expand Down
4 changes: 3 additions & 1 deletion mjpc/planners/cross_entropy/planner.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,6 @@ class CrossEntropyPlanner : public Planner {

// trajectories
Trajectory trajectory[kMaxTrajectory];
Trajectory elite_avg;

// order of indices of rolled out trajectories, ordered by total return
std::vector<int> trajectory_order;
Expand Down Expand Up @@ -139,6 +138,9 @@ class CrossEntropyPlanner : public Planner {

int num_trajectory_;
mutable std::shared_mutex mtx_;

// elite average index
const int elite_average_index_ = 0;
};

} // namespace mjpc
Expand Down

0 comments on commit 41505e1

Please sign in to comment.