Skip to content

Commit

Permalink
Merge branch 'main' into python_examples
Browse files Browse the repository at this point in the history
  • Loading branch information
thowell authored Feb 17, 2024
2 parents 450d723 + 48b8b5f commit 679cbeb
Show file tree
Hide file tree
Showing 26 changed files with 655 additions and 917 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ set(MUJOCO_MPC_MUJOCO_GIT_TAG
)

set(MUJOCO_MPC_MENAGERIE_GIT_TAG
aef3ee5c07ea51506e893a62fd832773ff0162c8
94ea114fa8c60a0fd542c8e1ffeb997204acbea2
CACHE STRING "Git revision for MuJoCo Menagerie."
)

Expand Down
20 changes: 15 additions & 5 deletions mjpc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -186,23 +186,33 @@ if(APPLE)
target_link_libraries(mjpc "-framework Cocoa")
endif()

add_executable(
testspeed
add_library(
libtestspeed STATIC
testspeed_app.cc
testspeed.h
testspeed.cc
)
target_link_libraries(
testspeed
absl::flags
absl::flags_parse
libtestspeed
absl::random_random
absl::strings
libmjpc
mujoco::mujoco
threadpool
Threads::Threads
)
target_include_directories(libtestspeed PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/..)

add_executable(
testspeed
testspeed_app.cc
)
target_link_libraries(
testspeed
libtestspeed
absl::flags
absl::flags_parse
)
target_include_directories(testspeed PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/..)
target_compile_options(testspeed PUBLIC ${MJPC_COMPILE_OPTIONS})
target_link_options(testspeed PRIVATE ${MJPC_LINK_OPTIONS})
Expand Down
26 changes: 24 additions & 2 deletions mjpc/grpc/ui_agent_service.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,16 @@ namespace mjpc::agent_grpc {

using ::agent::GetActionRequest;
using ::agent::GetActionResponse;
using ::agent::GetModeRequest;
using ::agent::GetModeResponse;
using ::agent::GetAllModesRequest;
using ::agent::GetAllModesResponse;
using ::agent::GetBestTrajectoryRequest;
using ::agent::GetBestTrajectoryResponse;
using ::agent::GetResidualsRequest;
using ::agent::GetResidualsResponse;
using ::agent::GetCostValuesAndWeightsRequest;
using ::agent::GetCostValuesAndWeightsResponse;
using ::agent::GetModeRequest;
using ::agent::GetModeResponse;
using ::agent::GetStateRequest;
using ::agent::GetStateResponse;
using ::agent::GetTaskParametersRequest;
Expand Down Expand Up @@ -222,6 +226,24 @@ grpc::Status UiAgentService::GetMode(grpc::ServerContext* context,
});
}

grpc::Status UiAgentService::GetAllModes(grpc::ServerContext* context,
const GetAllModesRequest* request,
GetAllModesResponse* response) {
return RunBeforeStep(
context, [request, response](mjpc::Agent* agent, const mjModel* model,
mjData* data) {
return grpc_agent_util::GetAllModes(request, agent, response);
});
}

grpc::Status UiAgentService::GetBestTrajectory(
grpc::ServerContext* context, const GetBestTrajectoryRequest* request,
GetBestTrajectoryResponse* response) {
// TODO - Implement.
return {grpc::StatusCode::UNIMPLEMENTED,
"GetBestTrajectory is not implemented."};
}

grpc::Status UiAgentService::SetAnything(grpc::ServerContext* context,
const SetAnythingRequest* request,
SetAnythingResponse* response) {
Expand Down
12 changes: 12 additions & 0 deletions mjpc/grpc/ui_agent_service.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,16 @@
#ifndef MJPC_MJPC_GRPC_UI_AGENT_SERVICE_H_
#define MJPC_MJPC_GRPC_UI_AGENT_SERVICE_H_

#include <absl/functional/any_invocable.h>
#include <grpcpp/server_context.h>
#include <grpcpp/support/status.h>
#include <mujoco/mujoco.h>

#include <mjpc/agent.h>
#include <mjpc/grpc/agent.grpc.pb.h>
#include <mjpc/grpc/agent.pb.h>
#include <mjpc/simulate.h> // mjpc fork
#include <mjpc/states/state.h>
#include <mjpc/utilities.h>

namespace mjpc::agent_grpc {
Expand Down Expand Up @@ -95,6 +98,15 @@ class UiAgentService final : public agent::Agent::Service {
const agent::GetModeRequest* request,
agent::GetModeResponse* response) override;

grpc::Status GetAllModes(grpc::ServerContext* context,
const agent::GetAllModesRequest* request,
agent::GetAllModesResponse* response) override;

grpc::Status GetBestTrajectory(
grpc::ServerContext* context,
const agent::GetBestTrajectoryRequest* request,
agent::GetBestTrajectoryResponse* response) override;

grpc::Status SetAnything(grpc::ServerContext* context,
const agent::SetAnythingRequest* request,
agent::SetAnythingResponse* response) override;
Expand Down
99 changes: 33 additions & 66 deletions mjpc/planners/cross_entropy/planner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,9 @@ void CrossEntropyPlanner::Allocate() {
trajectory[i].Allocate(kMaxTrajectoryHorizon);
candidate_policy[i].Allocate(model, *task, kMaxTrajectoryHorizon);
}

// elite average trajectory
elite_avg.Initialize(num_state, model->nu, task->num_residual,
task->num_trace, kMaxTrajectoryHorizon);
elite_avg.Allocate(kMaxTrajectoryHorizon);
nominal_trajectory.Initialize(num_state, model->nu, task->num_residual,
task->num_trace, kMaxTrajectoryHorizon);
nominal_trajectory.Allocate(kMaxTrajectoryHorizon);
}

// reset memory to zeros
Expand Down Expand Up @@ -143,7 +141,7 @@ void CrossEntropyPlanner::Reset(int horizon,
trajectory[i].Reset(kMaxTrajectoryHorizon);
candidate_policy[i].Reset(horizon);
}
elite_avg.Reset(kMaxTrajectoryHorizon);
nominal_trajectory.Reset(kMaxTrajectoryHorizon);

for (const auto& d : data_) {
mju_zero(d->ctrl, model->nu);
Expand All @@ -161,11 +159,6 @@ void CrossEntropyPlanner::SetState(const State& state) {

// optimize nominal policy using random sampling
void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {
// check horizon
if (horizon != elite_avg.horizon) {
NominalTrajectory(horizon, pool);
}

// if num_trajectory_ has changed, use it in this new iteration.
// num_trajectory_ might change while this function runs. Keep it constant
// for the duration of this function.
Expand Down Expand Up @@ -220,66 +213,29 @@ void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {
int num_spline_points = resampled_policy.num_spline_points;
int num_parameters = resampled_policy.num_parameters;

// reset parameters scratch to zero
std::fill(parameters_scratch.begin(), parameters_scratch.end(), 0.0);

// reset elite average
elite_avg.Reset(horizon);

// set elite average trajectory times
for (int tt = 0; tt <= horizon; tt++) {
elite_avg.times[tt] = time + tt * model->opt.timestep;
}

// best elite
int idx = trajectory_order[0];
// averaged return over elites
double avg_return = 0.0;

// add parameters
mju_copy(parameters_scratch.data(), candidate_policy[idx].parameters.data(),
num_parameters);
// reset parameters scratch
std::fill(parameters_scratch.begin(), parameters_scratch.end(), 0.0);

// copy first elite trajectory
mju_copy(elite_avg.actions.data(), trajectory[idx].actions.data(),
model->nu * (horizon - 1));
mju_copy(elite_avg.trace.data(), trajectory[idx].trace.data(),
trajectory[idx].dim_trace * horizon);
mju_copy(elite_avg.residual.data(), trajectory[idx].residual.data(),
elite_avg.dim_residual * horizon);
mju_copy(elite_avg.costs.data(), trajectory[idx].costs.data(), horizon);
elite_avg.total_return = trajectory[idx].total_return;

// loop over remaining elites to compute average
for (int i = 1; i < n_elite; i++) {
// loop over elites to compute average
for (int i = 0; i < n_elite; i++) {
// ordered trajectory index
int idx = trajectory_order[i];

// add parameters
mju_addTo(parameters_scratch.data(),
candidate_policy[idx].parameters.data(), num_parameters);

// add elite trajectory
mju_addTo(elite_avg.actions.data(), trajectory[idx].actions.data(),
model->nu * (horizon - 1));
mju_addTo(elite_avg.trace.data(), trajectory[idx].trace.data(),
trajectory[idx].dim_trace * horizon);
mju_addTo(elite_avg.residual.data(), trajectory[idx].residual.data(),
elite_avg.dim_residual * horizon);
mju_addTo(elite_avg.costs.data(), trajectory[idx].costs.data(), horizon);
elite_avg.total_return += trajectory[idx].total_return;
// add total return
avg_return += trajectory[idx].total_return;
}

// normalize
mju_scl(parameters_scratch.data(), parameters_scratch.data(), 1.0 / n_elite,
num_parameters);
mju_scl(elite_avg.actions.data(), elite_avg.actions.data(), 1.0 / n_elite,
model->nu * (horizon - 1));
mju_scl(elite_avg.trace.data(), elite_avg.trace.data(), 1.0 / n_elite,
elite_avg.dim_trace * horizon);
mju_scl(elite_avg.residual.data(), elite_avg.residual.data(), 1.0 / n_elite,
elite_avg.dim_residual * horizon);
mju_scl(elite_avg.costs.data(), elite_avg.costs.data(), 1.0 / n_elite,
horizon);
elite_avg.total_return /= n_elite;
avg_return /= n_elite;

// loop over elites to compute variance
std::fill(variance.begin(), variance.end(), 0.0); // reset variance to zero
Expand All @@ -304,25 +260,28 @@ void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {
}

// improvement: compare nominal to elite average
improvement = mju_max(
elite_avg.total_return - trajectory[trajectory_order[0]].total_return,
0.0);
improvement =
mju_max(avg_return - trajectory[trajectory_order[0]].total_return, 0.0);

// stop timer
policy_update_compute_time = GetDuration(policy_update_start);
}

// compute trajectory using nominal policy
void CrossEntropyPlanner::NominalTrajectory(int horizon, ThreadPool& pool) {
void CrossEntropyPlanner::NominalTrajectory(int horizon) {
// set policy
auto nominal_policy = [&cp = resampled_policy](
double* action, const double* state, double time) {
cp.Action(action, state, time);
};

// rollout nominal policy
elite_avg.Rollout(nominal_policy, task, model, data_[0].get(), state.data(),
time, mocap.data(), userdata.data(), horizon);
nominal_trajectory.Rollout(nominal_policy, task, model,
data_[ThreadPool::WorkerId()].get(), state.data(),
time, mocap.data(), userdata.data(), horizon);
}
void CrossEntropyPlanner::NominalTrajectory(int horizon, ThreadPool& pool) {
NominalTrajectory(horizon);
}

// set action from policy
Expand Down Expand Up @@ -363,6 +322,8 @@ void CrossEntropyPlanner::ResamplePolicy(int horizon) {

LinearRange(resampled_policy.times.data(), time_shift,
resampled_policy.times[0], num_spline_points);

resampled_policy.representation = policy.representation;
}

// add random noise to nominal policy
Expand Down Expand Up @@ -446,12 +407,18 @@ void CrossEntropyPlanner::Rollouts(int num_trajectory, int horizon,
state.data(), time, mocap.data(), userdata.data(), horizon);
});
}
pool.WaitCount(count_before + num_trajectory);
// nominal
pool.Schedule([&s = *this, horizon]() { s.NominalTrajectory(horizon); });

// wait
pool.WaitCount(count_before + num_trajectory + 1);
pool.ResetCount();
}

// returns the nominal trajectory (this is the purple trace)
const Trajectory* CrossEntropyPlanner::BestTrajectory() { return &elite_avg; }
// returns the **nominal** trajectory (this is the purple trace)
const Trajectory* CrossEntropyPlanner::BestTrajectory() {
return &nominal_trajectory;
}

// visualize planner-specific traces
void CrossEntropyPlanner::Traces(mjvScene* scn) {
Expand Down
6 changes: 2 additions & 4 deletions mjpc/planners/cross_entropy/planner.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ class CrossEntropyPlanner : public Planner {

// compute trajectory using nominal policy
void NominalTrajectory(int horizon, ThreadPool& pool) override;
void NominalTrajectory(int horizon);

// set action from policy
void ActionFromPolicy(double* action, const double* state, double time,
Expand Down Expand Up @@ -111,7 +112,7 @@ class CrossEntropyPlanner : public Planner {

// trajectories
Trajectory trajectory[kMaxTrajectory];
Trajectory elite_avg;
Trajectory nominal_trajectory;

// order of indices of rolled out trajectories, ordered by total return
std::vector<int> trajectory_order;
Expand All @@ -129,9 +130,6 @@ class CrossEntropyPlanner : public Planner {
// improvement
double improvement;

// flags
int processed_noise_status;

// timing
std::atomic<double> noise_compute_time;
double rollouts_compute_time;
Expand Down
Loading

0 comments on commit 679cbeb

Please sign in to comment.