Merge branch 'main' into python_examples

google-deepmind · Feb 17, 2024 · 679cbeb · 679cbeb
2 parents 450d723 + 48b8b5f
commit 679cbeb
Show file tree

Hide file tree

Showing 26 changed files with 655 additions and 917 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -61,7 +61,7 @@ set(MUJOCO_MPC_MUJOCO_GIT_TAG
 )
 
 set(MUJOCO_MPC_MENAGERIE_GIT_TAG
-    aef3ee5c07ea51506e893a62fd832773ff0162c8
+    94ea114fa8c60a0fd542c8e1ffeb997204acbea2
     CACHE STRING "Git revision for MuJoCo Menagerie."
 )
 

diff --git a/mjpc/CMakeLists.txt b/mjpc/CMakeLists.txt
@@ -186,23 +186,33 @@ if(APPLE)
   target_link_libraries(mjpc "-framework Cocoa")
 endif()
 
-add_executable(
-  testspeed
+add_library(
+  libtestspeed STATIC
   testspeed_app.cc
   testspeed.h
   testspeed.cc
 )
 target_link_libraries(
-  testspeed
-  absl::flags
-  absl::flags_parse
+  libtestspeed
   absl::random_random
   absl::strings
   libmjpc
   mujoco::mujoco
   threadpool
   Threads::Threads
 )
+target_include_directories(libtestspeed PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/..)
+
+add_executable(
+  testspeed
+  testspeed_app.cc
+)
+target_link_libraries(
+  testspeed
+  libtestspeed
+  absl::flags
+  absl::flags_parse
+)
 target_include_directories(testspeed PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/..)
 target_compile_options(testspeed PUBLIC ${MJPC_COMPILE_OPTIONS})
 target_link_options(testspeed PRIVATE ${MJPC_LINK_OPTIONS})

diff --git a/mjpc/grpc/ui_agent_service.cc b/mjpc/grpc/ui_agent_service.cc
@@ -35,12 +35,16 @@ namespace mjpc::agent_grpc {
 
 using ::agent::GetActionRequest;
 using ::agent::GetActionResponse;
-using ::agent::GetModeRequest;
-using ::agent::GetModeResponse;
+using ::agent::GetAllModesRequest;
+using ::agent::GetAllModesResponse;
+using ::agent::GetBestTrajectoryRequest;
+using ::agent::GetBestTrajectoryResponse;
 using ::agent::GetResidualsRequest;
 using ::agent::GetResidualsResponse;
 using ::agent::GetCostValuesAndWeightsRequest;
 using ::agent::GetCostValuesAndWeightsResponse;
+using ::agent::GetModeRequest;
+using ::agent::GetModeResponse;
 using ::agent::GetStateRequest;
 using ::agent::GetStateResponse;
 using ::agent::GetTaskParametersRequest;
@@ -222,6 +226,24 @@ grpc::Status UiAgentService::GetMode(grpc::ServerContext* context,
       });
 }
 
+grpc::Status UiAgentService::GetAllModes(grpc::ServerContext* context,
+                                         const GetAllModesRequest* request,
+                                         GetAllModesResponse* response) {
+  return RunBeforeStep(
+      context, [request, response](mjpc::Agent* agent, const mjModel* model,
+                                   mjData* data) {
+        return grpc_agent_util::GetAllModes(request, agent, response);
+      });
+}
+
+grpc::Status UiAgentService::GetBestTrajectory(
+    grpc::ServerContext* context, const GetBestTrajectoryRequest* request,
+    GetBestTrajectoryResponse* response) {
+  // TODO - Implement.
+  return {grpc::StatusCode::UNIMPLEMENTED,
+          "GetBestTrajectory is not implemented."};
+}
+
 grpc::Status UiAgentService::SetAnything(grpc::ServerContext* context,
                                          const SetAnythingRequest* request,
                                          SetAnythingResponse* response) {

diff --git a/mjpc/grpc/ui_agent_service.h b/mjpc/grpc/ui_agent_service.h
@@ -15,13 +15,16 @@
 #ifndef MJPC_MJPC_GRPC_UI_AGENT_SERVICE_H_
 #define MJPC_MJPC_GRPC_UI_AGENT_SERVICE_H_
 
+#include <absl/functional/any_invocable.h>
 #include <grpcpp/server_context.h>
 #include <grpcpp/support/status.h>
 #include <mujoco/mujoco.h>
 
+#include <mjpc/agent.h>
 #include <mjpc/grpc/agent.grpc.pb.h>
 #include <mjpc/grpc/agent.pb.h>
 #include <mjpc/simulate.h>  // mjpc fork
+#include <mjpc/states/state.h>
 #include <mjpc/utilities.h>
 
 namespace mjpc::agent_grpc {
@@ -95,6 +98,15 @@ class UiAgentService final : public agent::Agent::Service {
       const agent::GetModeRequest* request,
       agent::GetModeResponse* response) override;
 
+  grpc::Status GetAllModes(grpc::ServerContext* context,
+                           const agent::GetAllModesRequest* request,
+                           agent::GetAllModesResponse* response) override;
+
+  grpc::Status GetBestTrajectory(
+      grpc::ServerContext* context,
+      const agent::GetBestTrajectoryRequest* request,
+      agent::GetBestTrajectoryResponse* response) override;
+
   grpc::Status SetAnything(grpc::ServerContext* context,
                            const agent::SetAnythingRequest* request,
                            agent::SetAnythingResponse* response) override;

diff --git a/mjpc/planners/cross_entropy/planner.cc b/mjpc/planners/cross_entropy/planner.cc
@@ -106,11 +106,9 @@ void CrossEntropyPlanner::Allocate() {
     trajectory[i].Allocate(kMaxTrajectoryHorizon);
     candidate_policy[i].Allocate(model, *task, kMaxTrajectoryHorizon);
   }
-
-  // elite average trajectory
-  elite_avg.Initialize(num_state, model->nu, task->num_residual,
-                       task->num_trace, kMaxTrajectoryHorizon);
-  elite_avg.Allocate(kMaxTrajectoryHorizon);
+  nominal_trajectory.Initialize(num_state, model->nu, task->num_residual,
+                                task->num_trace, kMaxTrajectoryHorizon);
+  nominal_trajectory.Allocate(kMaxTrajectoryHorizon);
 }
 
 // reset memory to zeros
@@ -143,7 +141,7 @@ void CrossEntropyPlanner::Reset(int horizon,
     trajectory[i].Reset(kMaxTrajectoryHorizon);
     candidate_policy[i].Reset(horizon);
   }
-  elite_avg.Reset(kMaxTrajectoryHorizon);
+  nominal_trajectory.Reset(kMaxTrajectoryHorizon);
 
   for (const auto& d : data_) {
     mju_zero(d->ctrl, model->nu);
@@ -161,11 +159,6 @@ void CrossEntropyPlanner::SetState(const State& state) {
 
 // optimize nominal policy using random sampling
 void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {
-  // check horizon
-  if (horizon != elite_avg.horizon) {
-    NominalTrajectory(horizon, pool);
-  }
-
   // if num_trajectory_ has changed, use it in this new iteration.
   // num_trajectory_ might change while this function runs. Keep it constant
   // for the duration of this function.
@@ -220,66 +213,29 @@ void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {
   int num_spline_points = resampled_policy.num_spline_points;
   int num_parameters = resampled_policy.num_parameters;
 
-  // reset parameters scratch to zero
-  std::fill(parameters_scratch.begin(), parameters_scratch.end(), 0.0);
-
-  // reset elite average
-  elite_avg.Reset(horizon);
-
-  // set elite average trajectory times
-  for (int tt = 0; tt <= horizon; tt++) {
-    elite_avg.times[tt] = time + tt * model->opt.timestep;
-  }
-
-  // best elite
-  int idx = trajectory_order[0];
+  // averaged return over elites
+  double avg_return = 0.0;
 
-  // add parameters
-  mju_copy(parameters_scratch.data(), candidate_policy[idx].parameters.data(),
-           num_parameters);
+  // reset parameters scratch
+  std::fill(parameters_scratch.begin(), parameters_scratch.end(), 0.0);
 
-  // copy first elite trajectory
-  mju_copy(elite_avg.actions.data(), trajectory[idx].actions.data(),
-           model->nu * (horizon - 1));
-  mju_copy(elite_avg.trace.data(), trajectory[idx].trace.data(),
-           trajectory[idx].dim_trace * horizon);
-  mju_copy(elite_avg.residual.data(), trajectory[idx].residual.data(),
-           elite_avg.dim_residual * horizon);
-  mju_copy(elite_avg.costs.data(), trajectory[idx].costs.data(), horizon);
-  elite_avg.total_return = trajectory[idx].total_return;
-
-  // loop over remaining elites to compute average
-  for (int i = 1; i < n_elite; i++) {
+  // loop over elites to compute average
+  for (int i = 0; i < n_elite; i++) {
     // ordered trajectory index
     int idx = trajectory_order[i];
 
     // add parameters
     mju_addTo(parameters_scratch.data(),
               candidate_policy[idx].parameters.data(), num_parameters);
 
-    // add elite trajectory
-    mju_addTo(elite_avg.actions.data(), trajectory[idx].actions.data(),
-              model->nu * (horizon - 1));
-    mju_addTo(elite_avg.trace.data(), trajectory[idx].trace.data(),
-              trajectory[idx].dim_trace * horizon);
-    mju_addTo(elite_avg.residual.data(), trajectory[idx].residual.data(),
-              elite_avg.dim_residual * horizon);
-    mju_addTo(elite_avg.costs.data(), trajectory[idx].costs.data(), horizon);
-    elite_avg.total_return += trajectory[idx].total_return;
+    // add total return
+    avg_return += trajectory[idx].total_return;
   }
 
   // normalize
   mju_scl(parameters_scratch.data(), parameters_scratch.data(), 1.0 / n_elite,
           num_parameters);
-  mju_scl(elite_avg.actions.data(), elite_avg.actions.data(), 1.0 / n_elite,
-          model->nu * (horizon - 1));
-  mju_scl(elite_avg.trace.data(), elite_avg.trace.data(), 1.0 / n_elite,
-          elite_avg.dim_trace * horizon);
-  mju_scl(elite_avg.residual.data(), elite_avg.residual.data(), 1.0 / n_elite,
-          elite_avg.dim_residual * horizon);
-  mju_scl(elite_avg.costs.data(), elite_avg.costs.data(), 1.0 / n_elite,
-          horizon);
-  elite_avg.total_return /= n_elite;
+  avg_return /= n_elite;
 
   // loop over elites to compute variance
   std::fill(variance.begin(), variance.end(), 0.0);  // reset variance to zero
@@ -304,25 +260,28 @@ void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {
   }
 
   // improvement: compare nominal to elite average
-  improvement = mju_max(
-      elite_avg.total_return - trajectory[trajectory_order[0]].total_return,
-      0.0);
+  improvement =
+      mju_max(avg_return - trajectory[trajectory_order[0]].total_return, 0.0);
 
   // stop timer
   policy_update_compute_time = GetDuration(policy_update_start);
 }
 
 // compute trajectory using nominal policy
-void CrossEntropyPlanner::NominalTrajectory(int horizon, ThreadPool& pool) {
+void CrossEntropyPlanner::NominalTrajectory(int horizon) {
   // set policy
   auto nominal_policy = [&cp = resampled_policy](
                             double* action, const double* state, double time) {
     cp.Action(action, state, time);
   };
 
   // rollout nominal policy
-  elite_avg.Rollout(nominal_policy, task, model, data_[0].get(), state.data(),
-                    time, mocap.data(), userdata.data(), horizon);
+  nominal_trajectory.Rollout(nominal_policy, task, model,
+                             data_[ThreadPool::WorkerId()].get(), state.data(),
+                             time, mocap.data(), userdata.data(), horizon);
+}
+void CrossEntropyPlanner::NominalTrajectory(int horizon, ThreadPool& pool) {
+  NominalTrajectory(horizon);
 }
 
 // set action from policy
@@ -363,6 +322,8 @@ void CrossEntropyPlanner::ResamplePolicy(int horizon) {
 
   LinearRange(resampled_policy.times.data(), time_shift,
               resampled_policy.times[0], num_spline_points);
+
+  resampled_policy.representation = policy.representation;
 }
 
 // add random noise to nominal policy
@@ -446,12 +407,18 @@ void CrossEntropyPlanner::Rollouts(int num_trajectory, int horizon,
           state.data(), time, mocap.data(), userdata.data(), horizon);
     });
   }
-  pool.WaitCount(count_before + num_trajectory);
+  // nominal
+  pool.Schedule([&s = *this, horizon]() { s.NominalTrajectory(horizon); });
+
+  // wait
+  pool.WaitCount(count_before + num_trajectory + 1);
   pool.ResetCount();
 }
 
-// returns the nominal trajectory (this is the purple trace)
-const Trajectory* CrossEntropyPlanner::BestTrajectory() { return &elite_avg; }
+// returns the **nominal** trajectory (this is the purple trace)
+const Trajectory* CrossEntropyPlanner::BestTrajectory() {
+  return &nominal_trajectory;
+}
 
 // visualize planner-specific traces
 void CrossEntropyPlanner::Traces(mjvScene* scn) {

diff --git a/mjpc/planners/cross_entropy/planner.h b/mjpc/planners/cross_entropy/planner.h
@@ -57,6 +57,7 @@ class CrossEntropyPlanner : public Planner {
 
   // compute trajectory using nominal policy
   void NominalTrajectory(int horizon, ThreadPool& pool) override;
+  void NominalTrajectory(int horizon);
 
   // set action from policy
   void ActionFromPolicy(double* action, const double* state, double time,
@@ -111,7 +112,7 @@ class CrossEntropyPlanner : public Planner {
 
   // trajectories
   Trajectory trajectory[kMaxTrajectory];
-  Trajectory elite_avg;
+  Trajectory nominal_trajectory;
 
   // order of indices of rolled out trajectories, ordered by total return
   std::vector<int> trajectory_order;
@@ -129,9 +130,6 @@ class CrossEntropyPlanner : public Planner {
   // improvement
   double improvement;
 
-  // flags
-  int processed_noise_status;
-
   // timing
   std::atomic<double> noise_compute_time;
   double rollouts_compute_time;