diff --git a/CMakeLists.txt b/CMakeLists.txt
index 20d732d51..4717c5b8c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -61,7 +61,7 @@ set(MUJOCO_MPC_MUJOCO_GIT_TAG
 )
 
 set(MUJOCO_MPC_MENAGERIE_GIT_TAG
-    aef3ee5c07ea51506e893a62fd832773ff0162c8
+    94ea114fa8c60a0fd542c8e1ffeb997204acbea2
     CACHE STRING "Git revision for MuJoCo Menagerie."
 )
 
diff --git a/mjpc/CMakeLists.txt b/mjpc/CMakeLists.txt
index 7c086410f..276d625c3 100644
--- a/mjpc/CMakeLists.txt
+++ b/mjpc/CMakeLists.txt
@@ -186,16 +186,14 @@ if(APPLE)
   target_link_libraries(mjpc "-framework Cocoa")
 endif()
 
-add_executable(
-  testspeed
+add_library(
+  libtestspeed STATIC
   testspeed_app.cc
   testspeed.h
   testspeed.cc
 )
 target_link_libraries(
-  testspeed
-  absl::flags
-  absl::flags_parse
+  libtestspeed
   absl::random_random
   absl::strings
   libmjpc
@@ -203,6 +201,18 @@ target_link_libraries(
   threadpool
   Threads::Threads
 )
+target_include_directories(libtestspeed PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/..)
+
+add_executable(
+  testspeed
+  testspeed_app.cc
+)
+target_link_libraries(
+  testspeed
+  libtestspeed
+  absl::flags
+  absl::flags_parse
+)
 target_include_directories(testspeed PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/..)
 target_compile_options(testspeed PUBLIC ${MJPC_COMPILE_OPTIONS})
 target_link_options(testspeed PRIVATE ${MJPC_LINK_OPTIONS})
diff --git a/mjpc/grpc/ui_agent_service.cc b/mjpc/grpc/ui_agent_service.cc
index 8b49b01b6..48a92a7d2 100644
--- a/mjpc/grpc/ui_agent_service.cc
+++ b/mjpc/grpc/ui_agent_service.cc
@@ -35,12 +35,16 @@ namespace mjpc::agent_grpc {
 
 using ::agent::GetActionRequest;
 using ::agent::GetActionResponse;
-using ::agent::GetModeRequest;
-using ::agent::GetModeResponse;
+using ::agent::GetAllModesRequest;
+using ::agent::GetAllModesResponse;
+using ::agent::GetBestTrajectoryRequest;
+using ::agent::GetBestTrajectoryResponse;
 using ::agent::GetResidualsRequest;
 using ::agent::GetResidualsResponse;
 using ::agent::GetCostValuesAndWeightsRequest;
 using ::agent::GetCostValuesAndWeightsResponse;
+using ::agent::GetModeRequest;
+using ::agent::GetModeResponse;
 using ::agent::GetStateRequest;
 using ::agent::GetStateResponse;
 using ::agent::GetTaskParametersRequest;
@@ -222,6 +226,24 @@ grpc::Status UiAgentService::GetMode(grpc::ServerContext* context,
       });
 }
 
+grpc::Status UiAgentService::GetAllModes(grpc::ServerContext* context,
+                                         const GetAllModesRequest* request,
+                                         GetAllModesResponse* response) {
+  return RunBeforeStep(
+      context, [request, response](mjpc::Agent* agent, const mjModel* model,
+                                   mjData* data) {
+        return grpc_agent_util::GetAllModes(request, agent, response);
+      });
+}
+
+grpc::Status UiAgentService::GetBestTrajectory(
+    grpc::ServerContext* context, const GetBestTrajectoryRequest* request,
+    GetBestTrajectoryResponse* response) {
+  // TODO - Implement.
+  return {grpc::StatusCode::UNIMPLEMENTED,
+          "GetBestTrajectory is not implemented."};
+}
+
 grpc::Status UiAgentService::SetAnything(grpc::ServerContext* context,
                                          const SetAnythingRequest* request,
                                          SetAnythingResponse* response) {
diff --git a/mjpc/grpc/ui_agent_service.h b/mjpc/grpc/ui_agent_service.h
index dcff0d9f0..7ad7366bc 100644
--- a/mjpc/grpc/ui_agent_service.h
+++ b/mjpc/grpc/ui_agent_service.h
@@ -15,13 +15,16 @@
 #ifndef MJPC_MJPC_GRPC_UI_AGENT_SERVICE_H_
 #define MJPC_MJPC_GRPC_UI_AGENT_SERVICE_H_
 
+#include <absl/functional/any_invocable.h>
 #include <grpcpp/server_context.h>
 #include <grpcpp/support/status.h>
 #include <mujoco/mujoco.h>
 
+#include <mjpc/agent.h>
 #include <mjpc/grpc/agent.grpc.pb.h>
 #include <mjpc/grpc/agent.pb.h>
 #include <mjpc/simulate.h>  // mjpc fork
+#include <mjpc/states/state.h>
 #include <mjpc/utilities.h>
 
 namespace mjpc::agent_grpc {
@@ -95,6 +98,15 @@ class UiAgentService final : public agent::Agent::Service {
       const agent::GetModeRequest* request,
       agent::GetModeResponse* response) override;
 
+  grpc::Status GetAllModes(grpc::ServerContext* context,
+                           const agent::GetAllModesRequest* request,
+                           agent::GetAllModesResponse* response) override;
+
+  grpc::Status GetBestTrajectory(
+      grpc::ServerContext* context,
+      const agent::GetBestTrajectoryRequest* request,
+      agent::GetBestTrajectoryResponse* response) override;
+
   grpc::Status SetAnything(grpc::ServerContext* context,
                            const agent::SetAnythingRequest* request,
                            agent::SetAnythingResponse* response) override;
diff --git a/mjpc/planners/cross_entropy/planner.cc b/mjpc/planners/cross_entropy/planner.cc
index d24328767..af4cf395f 100644
--- a/mjpc/planners/cross_entropy/planner.cc
+++ b/mjpc/planners/cross_entropy/planner.cc
@@ -106,11 +106,9 @@ void CrossEntropyPlanner::Allocate() {
     trajectory[i].Allocate(kMaxTrajectoryHorizon);
     candidate_policy[i].Allocate(model, *task, kMaxTrajectoryHorizon);
   }
-
-  // elite average trajectory
-  elite_avg.Initialize(num_state, model->nu, task->num_residual,
-                       task->num_trace, kMaxTrajectoryHorizon);
-  elite_avg.Allocate(kMaxTrajectoryHorizon);
+  nominal_trajectory.Initialize(num_state, model->nu, task->num_residual,
+                                task->num_trace, kMaxTrajectoryHorizon);
+  nominal_trajectory.Allocate(kMaxTrajectoryHorizon);
 }
 
 // reset memory to zeros
@@ -143,7 +141,7 @@ void CrossEntropyPlanner::Reset(int horizon,
     trajectory[i].Reset(kMaxTrajectoryHorizon);
     candidate_policy[i].Reset(horizon);
   }
-  elite_avg.Reset(kMaxTrajectoryHorizon);
+  nominal_trajectory.Reset(kMaxTrajectoryHorizon);
 
   for (const auto& d : data_) {
     mju_zero(d->ctrl, model->nu);
@@ -161,11 +159,6 @@ void CrossEntropyPlanner::SetState(const State& state) {
 
 // optimize nominal policy using random sampling
 void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {
-  // check horizon
-  if (horizon != elite_avg.horizon) {
-    NominalTrajectory(horizon, pool);
-  }
-
   // if num_trajectory_ has changed, use it in this new iteration.
   // num_trajectory_ might change while this function runs. Keep it constant
   // for the duration of this function.
@@ -220,36 +213,14 @@ void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {
   int num_spline_points = resampled_policy.num_spline_points;
   int num_parameters = resampled_policy.num_parameters;
 
-  // reset parameters scratch to zero
-  std::fill(parameters_scratch.begin(), parameters_scratch.end(), 0.0);
-
-  // reset elite average
-  elite_avg.Reset(horizon);
-
-  // set elite average trajectory times
-  for (int tt = 0; tt <= horizon; tt++) {
-    elite_avg.times[tt] = time + tt * model->opt.timestep;
-  }
-
-  // best elite
-  int idx = trajectory_order[0];
+  // averaged return over elites
+  double avg_return = 0.0;
 
-  // add parameters
-  mju_copy(parameters_scratch.data(), candidate_policy[idx].parameters.data(),
-           num_parameters);
+  // reset parameters scratch
+  std::fill(parameters_scratch.begin(), parameters_scratch.end(), 0.0);
 
-  // copy first elite trajectory
-  mju_copy(elite_avg.actions.data(), trajectory[idx].actions.data(),
-           model->nu * (horizon - 1));
-  mju_copy(elite_avg.trace.data(), trajectory[idx].trace.data(),
-           trajectory[idx].dim_trace * horizon);
-  mju_copy(elite_avg.residual.data(), trajectory[idx].residual.data(),
-           elite_avg.dim_residual * horizon);
-  mju_copy(elite_avg.costs.data(), trajectory[idx].costs.data(), horizon);
-  elite_avg.total_return = trajectory[idx].total_return;
-
-  // loop over remaining elites to compute average
-  for (int i = 1; i < n_elite; i++) {
+  // loop over elites to compute average
+  for (int i = 0; i < n_elite; i++) {
     // ordered trajectory index
     int idx = trajectory_order[i];
 
@@ -257,29 +228,14 @@ void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {
     mju_addTo(parameters_scratch.data(),
               candidate_policy[idx].parameters.data(), num_parameters);
 
-    // add elite trajectory
-    mju_addTo(elite_avg.actions.data(), trajectory[idx].actions.data(),
-              model->nu * (horizon - 1));
-    mju_addTo(elite_avg.trace.data(), trajectory[idx].trace.data(),
-              trajectory[idx].dim_trace * horizon);
-    mju_addTo(elite_avg.residual.data(), trajectory[idx].residual.data(),
-              elite_avg.dim_residual * horizon);
-    mju_addTo(elite_avg.costs.data(), trajectory[idx].costs.data(), horizon);
-    elite_avg.total_return += trajectory[idx].total_return;
+    // add total return
+    avg_return += trajectory[idx].total_return;
   }
 
   // normalize
   mju_scl(parameters_scratch.data(), parameters_scratch.data(), 1.0 / n_elite,
           num_parameters);
-  mju_scl(elite_avg.actions.data(), elite_avg.actions.data(), 1.0 / n_elite,
-          model->nu * (horizon - 1));
-  mju_scl(elite_avg.trace.data(), elite_avg.trace.data(), 1.0 / n_elite,
-          elite_avg.dim_trace * horizon);
-  mju_scl(elite_avg.residual.data(), elite_avg.residual.data(), 1.0 / n_elite,
-          elite_avg.dim_residual * horizon);
-  mju_scl(elite_avg.costs.data(), elite_avg.costs.data(), 1.0 / n_elite,
-          horizon);
-  elite_avg.total_return /= n_elite;
+  avg_return /= n_elite;
 
   // loop over elites to compute variance
   std::fill(variance.begin(), variance.end(), 0.0);  // reset variance to zero
@@ -304,16 +260,15 @@ void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {
   }
 
   // improvement: compare nominal to elite average
-  improvement = mju_max(
-      elite_avg.total_return - trajectory[trajectory_order[0]].total_return,
-      0.0);
+  improvement =
+      mju_max(avg_return - trajectory[trajectory_order[0]].total_return, 0.0);
 
   // stop timer
   policy_update_compute_time = GetDuration(policy_update_start);
 }
 
 // compute trajectory using nominal policy
-void CrossEntropyPlanner::NominalTrajectory(int horizon, ThreadPool& pool) {
+void CrossEntropyPlanner::NominalTrajectory(int horizon) {
   // set policy
   auto nominal_policy = [&cp = resampled_policy](
                             double* action, const double* state, double time) {
@@ -321,8 +276,12 @@ void CrossEntropyPlanner::NominalTrajectory(int horizon, ThreadPool& pool) {
   };
 
   // rollout nominal policy
-  elite_avg.Rollout(nominal_policy, task, model, data_[0].get(), state.data(),
-                    time, mocap.data(), userdata.data(), horizon);
+  nominal_trajectory.Rollout(nominal_policy, task, model,
+                             data_[ThreadPool::WorkerId()].get(), state.data(),
+                             time, mocap.data(), userdata.data(), horizon);
+}
+void CrossEntropyPlanner::NominalTrajectory(int horizon, ThreadPool& pool) {
+  NominalTrajectory(horizon);
 }
 
 // set action from policy
@@ -363,6 +322,8 @@ void CrossEntropyPlanner::ResamplePolicy(int horizon) {
 
   LinearRange(resampled_policy.times.data(), time_shift,
               resampled_policy.times[0], num_spline_points);
+  
+  resampled_policy.representation = policy.representation;
 }
 
 // add random noise to nominal policy
@@ -446,12 +407,18 @@ void CrossEntropyPlanner::Rollouts(int num_trajectory, int horizon,
           state.data(), time, mocap.data(), userdata.data(), horizon);
     });
   }
-  pool.WaitCount(count_before + num_trajectory);
+  // nominal
+  pool.Schedule([&s = *this, horizon]() { s.NominalTrajectory(horizon); });
+
+  // wait
+  pool.WaitCount(count_before + num_trajectory + 1);
   pool.ResetCount();
 }
 
-// returns the nominal trajectory (this is the purple trace)
-const Trajectory* CrossEntropyPlanner::BestTrajectory() { return &elite_avg; }
+// returns the **nominal** trajectory (this is the purple trace)
+const Trajectory* CrossEntropyPlanner::BestTrajectory() {
+  return &nominal_trajectory;
+}
 
 // visualize planner-specific traces
 void CrossEntropyPlanner::Traces(mjvScene* scn) {
diff --git a/mjpc/planners/cross_entropy/planner.h b/mjpc/planners/cross_entropy/planner.h
index 05db5e815..ec11f2925 100644
--- a/mjpc/planners/cross_entropy/planner.h
+++ b/mjpc/planners/cross_entropy/planner.h
@@ -57,6 +57,7 @@ class CrossEntropyPlanner : public Planner {
 
   // compute trajectory using nominal policy
   void NominalTrajectory(int horizon, ThreadPool& pool) override;
+  void NominalTrajectory(int horizon);
 
   // set action from policy
   void ActionFromPolicy(double* action, const double* state, double time,
@@ -111,7 +112,7 @@ class CrossEntropyPlanner : public Planner {
 
   // trajectories
   Trajectory trajectory[kMaxTrajectory];
-  Trajectory elite_avg;
+  Trajectory nominal_trajectory;
 
   // order of indices of rolled out trajectories, ordered by total return
   std::vector<int> trajectory_order;
@@ -129,9 +130,6 @@ class CrossEntropyPlanner : public Planner {
   // improvement
   double improvement;
 
-  // flags
-  int processed_noise_status;
-
   // timing
   std::atomic<double> noise_compute_time;
   double rollouts_compute_time;
diff --git a/mjpc/spline/spline.h b/mjpc/spline/spline.h
index 737b5c809..2a8a875eb 100644
--- a/mjpc/spline/spline.h
+++ b/mjpc/spline/spline.h
@@ -102,45 +102,41 @@ class TimeSpline {
     }
 
     // Copyable, Movable.
-    IteratorT<SplineType, NodeType>(
-        const IteratorT<SplineType, NodeType>& other) = default;
-    IteratorT<SplineType, NodeType>& operator=(
-        const IteratorT<SplineType, NodeType>& other) = default;
-    IteratorT<SplineType, NodeType>(IteratorT<SplineType, NodeType>&& other) =
-        default;
-    IteratorT<SplineType, NodeType>& operator=(
-        IteratorT<SplineType, NodeType>&& other) = default;
+    IteratorT(const IteratorT& other) = default;
+    IteratorT& operator=(const IteratorT& other) = default;
+    IteratorT(IteratorT&& other) = default;
+    IteratorT& operator=(IteratorT&& other) = default;
 
     reference operator*() { return node_; }
 
     pointer operator->() { return &node_; }
     pointer operator->() const { return &node_; }
 
-    IteratorT<SplineType, NodeType>& operator++() {
+    IteratorT& operator++() {
       ++index_;
       node_ = index_ == spline_->Size() ? NodeType() : spline_->NodeAt(index_);
       return *this;
     }
 
-    IteratorT<SplineType, NodeType> operator++(int) {
-      IteratorT<SplineType, NodeType> tmp = *this;
+    IteratorT operator++(int) {
+      IteratorT tmp = *this;
       ++(*this);
       return tmp;
     }
 
-    IteratorT<SplineType, NodeType>& operator--() {
+    IteratorT& operator--() {
       --index_;
       node_ = spline_->NodeAt(index_);
       return *this;
     }
 
-    IteratorT<SplineType, NodeType> operator--(int) {
-      IteratorT<SplineType, NodeType> tmp = *this;
+    IteratorT operator--(int) {
+      IteratorT tmp = *this;
       --(*this);
       return tmp;
     }
 
-    IteratorT<SplineType, NodeType>& operator+=(difference_type n) {
+    IteratorT& operator+=(difference_type n) {
       if (n != 0) {
         index_ += n;
         node_ =
@@ -149,29 +145,25 @@ class TimeSpline {
       return *this;
     }
 
-    IteratorT<SplineType, NodeType>& operator-=(difference_type n) {
-      return *this += -n;
-    }
+    IteratorT& operator-=(difference_type n) { return *this += -n; }
 
-    IteratorT<SplineType, NodeType> operator+(difference_type n) const {
-      IteratorT<SplineType, NodeType> tmp(*this);
+    IteratorT operator+(difference_type n) const {
+      IteratorT tmp(*this);
       tmp += n;
       return tmp;
     }
 
-    IteratorT<SplineType, NodeType> operator-(difference_type n) const {
-      IteratorT<SplineType, NodeType> tmp(*this);
+    IteratorT operator-(difference_type n) const {
+      IteratorT tmp(*this);
       tmp -= n;
       return tmp;
     }
 
-    friend IteratorT<SplineType, NodeType> operator+(
-        difference_type n, const IteratorT<SplineType, NodeType>& it) {
+    friend IteratorT operator+(difference_type n, const IteratorT& it) {
       return it + n;
     }
 
-    friend difference_type operator-(const IteratorT<SplineType, NodeType>& x,
-                                     const IteratorT<SplineType, NodeType>& y) {
+    friend difference_type operator-(const IteratorT& x, const IteratorT& y) {
       CHECK_EQ(x.spline_, y.spline_)
           << "Comparing iterators from different splines";
       if (x != y) return (x.index_ - y.index_);
@@ -180,35 +172,29 @@ class TimeSpline {
 
     NodeType operator[](difference_type n) const { return *(*this + n); }
 
-    friend bool operator==(const IteratorT<SplineType, NodeType>& x,
-                           const IteratorT<SplineType, NodeType>& y) {
+    friend bool operator==(const IteratorT& x, const IteratorT& y) {
       return x.spline_ == y.spline_ && x.index_ == y.index_;
     }
 
-    friend bool operator!=(const IteratorT<SplineType, NodeType>& x,
-                           const IteratorT<SplineType, NodeType>& y) {
+    friend bool operator!=(const IteratorT& x, const IteratorT& y) {
       return !(x == y);
     }
 
-    friend bool operator<(const IteratorT<SplineType, NodeType>& x,
-                          const IteratorT<SplineType, NodeType>& y) {
+    friend bool operator<(const IteratorT& x, const IteratorT& y) {
       CHECK_EQ(x.spline_, y.spline_)
           << "Comparing iterators from different splines";
       return x.index_ < y.index_;
     }
 
-    friend bool operator>(const IteratorT<SplineType, NodeType>& x,
-                          const IteratorT<SplineType, NodeType>& y) {
+    friend bool operator>(const IteratorT& x, const IteratorT& y) {
       return y < x;
     }
 
-    friend bool operator<=(const IteratorT<SplineType, NodeType>& x,
-                           const IteratorT<SplineType, NodeType>& y) {
+    friend bool operator<=(const IteratorT& x, const IteratorT& y) {
       return !(y < x);
     }
 
-    friend bool operator>=(const IteratorT<SplineType, NodeType>& x,
-                           const IteratorT<SplineType, NodeType>& y) {
+    friend bool operator>=(const IteratorT& x, const IteratorT& y) {
       return !(x < y);
     }
 
diff --git a/mjpc/tasks/bimanual/task.xml b/mjpc/tasks/bimanual/task.xml
index 619718786..1a7df9783 100644
--- a/mjpc/tasks/bimanual/task.xml
+++ b/mjpc/tasks/bimanual/task.xml
@@ -51,7 +51,6 @@
     "0 -0.96 1.16 0 -0.3 0 0.002 0.002
     0 -0.96 1.16 0 -0.3 0 0.002 0.002
     0.1 0.2 0.3 1 0 0 0"
-    act ="-0.1 0 0 0 0 0 0.01 0.1 0 0 0 0 0 0.01"
     ctrl="-0.1 0 0 0 0 0 0.01 0.1 0 0 0 0 0 0.01"/>
   </keyframe>
 
diff --git a/mjpc/tasks/cartpole/task.xml b/mjpc/tasks/cartpole/task.xml
index 917d247fe..46c2002b6 100644
--- a/mjpc/tasks/cartpole/task.xml
+++ b/mjpc/tasks/cartpole/task.xml
@@ -44,4 +44,7 @@
     <framepos name="position" objtype="site" objname="tip"/>
     <framelinvel name="velocity" objtype="site" objname="tip"/>
   </sensor>
+  <keyframe>
+    <key name="home" qpos="1 0" />
+  </keyframe>
 </mujoco>
diff --git a/mjpc/tasks/humanoid/stand/stand.cc b/mjpc/tasks/humanoid/stand/stand.cc
index 9bf54888f..5f961a092 100644
--- a/mjpc/tasks/humanoid/stand/stand.cc
+++ b/mjpc/tasks/humanoid/stand/stand.cc
@@ -17,7 +17,6 @@
 #include <string>
 
 #include <mujoco/mujoco.h>
-#include "mjpc/task.h"
 #include "mjpc/utilities.h"
 
 
diff --git a/mjpc/tasks/quadrotor/quadrotor.h b/mjpc/tasks/quadrotor/quadrotor.h
index 024a2f95a..018a90d27 100644
--- a/mjpc/tasks/quadrotor/quadrotor.h
+++ b/mjpc/tasks/quadrotor/quadrotor.h
@@ -15,6 +15,7 @@
 #ifndef MJPC_TASKS_QUADROTOR_QUADROTOR_H_
 #define MJPC_TASKS_QUADROTOR_QUADROTOR_H_
 
+#include <memory>
 #include <string>
 #include <mujoco/mujoco.h>
 #include "mjpc/task.h"
@@ -51,7 +52,7 @@ class Quadrotor : public Task {
   ResidualFn* InternalResidual() override { return &residual_; }
 
  private:
-  int current_mode_;
+  int current_mode_ = 0;
   ResidualFn residual_;
 };
 }  // namespace mjpc
diff --git a/mjpc/tasks/swimmer/swimmer.cc b/mjpc/tasks/swimmer/swimmer.cc
index 59350dfa7..a12b745b6 100644
--- a/mjpc/tasks/swimmer/swimmer.cc
+++ b/mjpc/tasks/swimmer/swimmer.cc
@@ -18,7 +18,6 @@
 
 #include <absl/random/random.h>
 #include <mujoco/mujoco.h>
-#include "mjpc/task.h"
 #include "mjpc/utilities.h"
 
 namespace mjpc {
diff --git a/mjpc/test/tasks/CMakeLists.txt b/mjpc/test/tasks/CMakeLists.txt
index 40639adaa..a6cc6277c 100644
--- a/mjpc/test/tasks/CMakeLists.txt
+++ b/mjpc/test/tasks/CMakeLists.txt
@@ -13,4 +13,4 @@
 # limitations under the License.
 
 test(task_test)
-target_link_libraries(task_test load gmock)
+target_link_libraries(task_test load gmock libtestspeed)
diff --git a/mjpc/test/tasks/task_test.cc b/mjpc/test/tasks/task_test.cc
index 13c0fb29f..8e3c4a52a 100644
--- a/mjpc/test/tasks/task_test.cc
+++ b/mjpc/test/tasks/task_test.cc
@@ -13,9 +13,14 @@
 // limitations under the License.
 
 #include "mjpc/task.h"
+#include <memory>
+#include <string>
 
 #include "gtest/gtest.h"
 #include <mujoco/mujoco.h>
+#include "mjpc/norm.h"
+#include "mjpc/tasks/tasks.h"
+#include "mjpc/testspeed.h"
 #include "mjpc/test/load.h"
 
 namespace mjpc {
@@ -93,5 +98,15 @@ TEST(TasksTest, Task) {
   mj_deleteModel(model);
 }
 
+TEST(StepAllTasksTest, Task) {
+  auto tasks = GetTasks();
+  for (auto& task : tasks) {
+    double cost = SynchronousPlanningCost(
+                  task->Name(), /*planner_thread_count=*/1,
+                  /*steps_per_planning_iteration=*/100, /*total_time=*/0.1);
+    EXPECT_GT(cost, 0) << "Task " << task->Name() << " failed.";
+  }
+}
+
 }  // namespace
 }  // namespace mjpc
diff --git a/mjpc/testspeed.cc b/mjpc/testspeed.cc
index d0d9a2fdb..0eebc87df 100644
--- a/mjpc/testspeed.cc
+++ b/mjpc/testspeed.cc
@@ -41,8 +41,9 @@ void residual_callback(const mjModel* model, mjData* data, int stage) {
 }  // namespace
 
 // Run synchronous planning, print timing info,return 0 if nothing failed.
-int TestSpeed(std::string task_name, int planner_thread_count,
-              int steps_per_planning_iteration, double total_time) {
+double SynchronousPlanningCost(std::string task_name, int planner_thread_count,
+                               int steps_per_planning_iteration,
+                               double total_time) {
   std::cout << "Test MJPC Speed\n";
   std::cout << " MuJoCo version " << mj_versionString() << "\n";
   if (mjVERSION_HEADER != mj_version()) {
@@ -63,7 +64,7 @@ int TestSpeed(std::string task_name, int planner_thread_count,
   mjModel* model = load_model.model.release();
   if (!model) {
     std::cerr << load_model.error << "\n";
-    return 1;
+    return -1;
   }
   mjData* data = mj_makeData(model);
   mj_forward(model, data);
@@ -123,6 +124,7 @@ int TestSpeed(std::string task_name, int planner_thread_count,
 
   mj_deleteData(data);
   mj_deleteModel(model);
-  return 0;
+  mjcb_sensor = nullptr;
+  return total_cost;
 }
 }  // namespace mjpc
diff --git a/mjpc/testspeed.h b/mjpc/testspeed.h
index ea728869f..53bd6f9ff 100644
--- a/mjpc/testspeed.h
+++ b/mjpc/testspeed.h
@@ -18,8 +18,9 @@
 #include <string>
 
 namespace mjpc {
-int TestSpeed(std::string task_name, int planner_thread_count,
-              int steps_per_planning_iteration, double total_time);
+double SynchronousPlanningCost(std::string task_name, int planner_thread_count,
+                               int steps_per_planning_iteration,
+                               double total_time);
 }  // namespace mjpc
 
 #endif  // MJPC_MJPC_TESTSPEED_H_
diff --git a/mjpc/testspeed_app.cc b/mjpc/testspeed_app.cc
index 28a5a27e0..181ba6be0 100644
--- a/mjpc/testspeed_app.cc
+++ b/mjpc/testspeed_app.cc
@@ -34,6 +34,10 @@ int main(int argc, char** argv) {
   int steps_per_planning_iteration =
       absl::GetFlag(FLAGS_steps_per_planning_iteration);
   double total_time = absl::GetFlag(FLAGS_total_time);
-  return mjpc::TestSpeed(task_name, planner_thread_count,
-                         steps_per_planning_iteration, total_time);
+  double cost =
+      mjpc::SynchronousPlanningCost(task_name, planner_thread_count,
+                                    steps_per_planning_iteration, total_time);
+  if (cost < 0) {
+    return -1;
+  }
 }
diff --git a/mjpc/testspeed_test.cc b/mjpc/testspeed_test.cc
deleted file mode 100644
index bf479c555..000000000
--- a/mjpc/testspeed_test.cc
+++ /dev/null
@@ -1,27 +0,0 @@
-// Copyright 2024 DeepMind Technologies Limited
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "mjpc/testspeed.h"
-
-#include "gtest/gtest.h"
-
-namespace {
-
-TEST(TestSeed, Test) {
-  EXPECT_EQ(
-      mjpc::TestSpeed("Cartpole", /*planner_thread_count=*/10,
-                      /*steps_per_planning_iteration=*/10, /*total_time=*/10),
-      0);
-}
-}  // namespace
diff --git a/python/mujoco_mpc/demos/direct/box_drop_smoother.py b/python/mujoco_mpc/demos/direct/api_examples/box_drop_smoother.py
similarity index 100%
rename from python/mujoco_mpc/demos/direct/box_drop_smoother.py
rename to python/mujoco_mpc/demos/direct/api_examples/box_drop_smoother.py
diff --git a/python/mujoco_mpc/demos/direct/cartpole_trajopt.py b/python/mujoco_mpc/demos/direct/api_examples/cartpole_trajopt.py
similarity index 100%
rename from python/mujoco_mpc/demos/direct/cartpole_trajopt.py
rename to python/mujoco_mpc/demos/direct/api_examples/cartpole_trajopt.py
diff --git a/python/mujoco_mpc/demos/direct/particle_smoother.py b/python/mujoco_mpc/demos/direct/api_examples/particle_smoother.py
similarity index 100%
rename from python/mujoco_mpc/demos/direct/particle_smoother.py
rename to python/mujoco_mpc/demos/direct/api_examples/particle_smoother.py
diff --git a/python/mujoco_mpc/demos/direct/api_examples/particle_trajopt.py b/python/mujoco_mpc/demos/direct/api_examples/particle_trajopt.py
new file mode 100644
index 000000000..858d385c4
--- /dev/null
+++ b/python/mujoco_mpc/demos/direct/api_examples/particle_trajopt.py
@@ -0,0 +1,260 @@
+# Copyright 2023 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import matplotlib.pyplot as plt
+import mediapy as media
+import mujoco
+from mujoco_mpc import direct as direct_lib
+import numpy as np
+
+
+# %%
+# 2D Particle Model
+xml = """
+<mujoco model="Particle">
+  <visual>
+    <headlight ambient=".4 .4 .4" diffuse=".8 .8 .8" specular="0.1 0.1 0.1"/>
+    <map znear=".01"/>
+    <quality shadowsize="2048"/>
+    <global elevation="-15"/>
+  </visual>
+
+  <asset>
+    <texture name="blue_grid" type="2d" builtin="checker" rgb1=".02 .14 .44" rgb2=".27 .55 1" width="300" height="300" mark="edge" markrgb="1 1 1"/>
+    <material name="blue_grid" texture="blue_grid" texrepeat="1 1" texuniform="true" reflectance=".2"/>
+    <texture name="skybox" type="skybox" builtin="gradient" rgb1=".66 .79 1" rgb2=".9 .91 .93" width="800" height="800"/>
+    <material name="self" rgba=".7 .5 .3 1"/>
+    <material name="decoration" rgba=".2 .6 .3 1"/>
+  </asset>
+
+  <option timestep="0.01"></option>
+
+  <default>
+    <joint type="hinge" axis="0 0 1" limited="true" range="-.29 .29" damping="1"/>
+    <motor gear=".1" ctrlrange="-1 1" ctrllimited="true"/>
+  </default>
+
+  <worldbody>
+    <light name="light" pos="0 0 1"/>
+    <camera name="fixed" pos="0 0 .75" quat="1 0 0 0"/>
+    <geom name="ground" type="plane" pos="0 0 0" size=".3 .3 .1" material="blue_grid"/>
+    <geom name="wall_x" type="plane" pos="-.3 0 .02" zaxis="1 0 0"  size=".02 .3 .02" material="decoration"/>
+    <geom name="wall_y" type="plane" pos="0 -.3 .02" zaxis="0 1 0"  size=".3 .02 .02" material="decoration"/>
+    <geom name="wall_neg_x" type="plane" pos=".3 0 .02" zaxis="-1 0 0"  size=".02 .3 .02" material="decoration"/>
+    <geom name="wall_neg_y" type="plane" pos="0 .3 .02" zaxis="0 -1 0"  size=".3 .02 .02" material="decoration"/>
+
+    <body name="pointmass" pos="0 0 .01">
+      <camera name="cam0" pos="0 -0.3 0.3" xyaxes="1 0 0 0 0.7 0.7"/>
+      <joint name="root_x" type="slide"  pos="0 0 0" axis="1 0 0" />
+      <joint name="root_y" type="slide"  pos="0 0 0" axis="0 1 0" />
+      <geom name="pointmass" type="sphere" size=".01" material="self" mass=".3"/>
+      <site name="tip" pos="0 0 0" size="0.01"/>
+    </body>
+  </worldbody>
+
+  <actuator>
+    <motor name="x_motor" joint="root_x" gear="1" ctrllimited="true" ctrlrange="-1 1"/>
+    <motor name="y_motor" joint="root_y" gear="1" ctrllimited="true" ctrlrange="-1 1"/>
+  </actuator>
+
+  <sensor>
+    <jointpos name="x" joint="root_x" />
+    <jointpos name="y" joint="root_y" />
+  </sensor>
+</mujoco>
+"""
+
+model = mujoco.MjModel.from_xml_string(xml)
+data = mujoco.MjData(model)
+renderer = mujoco.Renderer(model)
+# %%
+# initialization
+T = 100
+q0 = np.array([-0.25, -0.25])
+qM = np.array([-0.25, 0.25])
+qN = np.array([0.25, -0.25])
+qT = np.array([0.25, 0.25])
+
+# compute linear interpolation
+qinterp = np.zeros((model.nq, T))
+for t in range(T):
+  # slope
+  slope = (qT - q0) / T
+
+  # interpolation
+  qinterp[:, t] = q0 + t * slope
+
+# time
+time = [t * model.opt.timestep for t in range(T)]
+# %%
+# plot position
+fig = plt.figure()
+
+# arm position
+plt.plot(qinterp[0, :], qinterp[1, :], label="interpolation", color="black")
+plt.plot(q0[0], q0[1], color="magenta", label="waypoint", marker="o")
+plt.plot(qM[0], qM[1], color="magenta", marker="o")
+plt.plot(qN[0], qN[1], color="magenta", marker="o")
+plt.plot(qT[0], qT[1], color="magenta", marker="o")
+
+plt.legend()
+plt.xlabel("X")
+plt.ylabel("Y")
+# %%
+# optimizer model
+model_optimizer = mujoco.MjModel.from_xml_string(xml)
+
+# direct optimizer
+configuration_length = T + 2
+optimizer = direct_lib.Direct(
+    model=model_optimizer,
+    configuration_length=configuration_length,
+)
+# %%
+# set data
+for t in range(configuration_length):
+  # unpack
+  qt = np.zeros(model.nq)
+  st = np.zeros(model.nsensordata)
+  mt = np.zeros(model.nsensor)
+  ft = np.zeros(model.nv)
+  ct = np.zeros(model.nu)
+  tt = np.array([t * model.opt.timestep])
+
+  # set initial state
+  if t == 0 or t == 1:
+    qt = q0
+    st = q0
+    mt = np.array([1, 1])
+
+  # set goal
+  elif t >= configuration_length - 2:
+    qt = qT
+    st = qT
+    mt = np.array([1, 1])
+
+  # set waypoint
+  elif t == 25:
+    st = qM
+    mt = np.array([1, 1])
+
+  # set waypoint
+  elif t == 75:
+    st = qN
+    mt = np.array([1, 1])
+
+  # initialize qpos
+  else:
+    qt = qinterp[:, t - 1]
+    mt = np.array([0, 0])
+
+  # set data
+  data_ = optimizer.data(
+      t,
+      configuration=qt,
+      sensor_measurement=st,
+      sensor_mask=mt,
+      force_measurement=ft,
+      time=tt,
+  )
+# %%
+# set std
+optimizer.noise(process=np.array([1000.0, 1000.0]), sensor=np.array([1.0, 1.0]))
+
+# set settings
+optimizer.settings(
+    sensor_flag=True,
+    force_flag=True,
+    max_smoother_iterations=1000,
+    max_search_iterations=1000,
+    regularization_initial=1.0e-12,
+    gradient_tolerance=1.0e-6,
+    search_direction_tolerance=1.0e-6,
+    cost_tolerance=1.0e-6,
+    first_step_position_sensors=True,
+    last_step_position_sensors=True,
+    last_step_velocity_sensors=True,
+)
+
+# optimize
+optimizer.optimize()
+
+# costs
+optimizer.print_cost()
+
+# status
+optimizer.print_status()
+# %%
+# get estimated trajectories
+q_est = np.zeros((model_optimizer.nq, configuration_length))
+v_est = np.zeros((model_optimizer.nv, configuration_length))
+s_est = np.zeros((model_optimizer.nsensordata, configuration_length))
+f_est = np.zeros((model_optimizer.nv, configuration_length))
+t_est = np.zeros(configuration_length)
+for t in range(configuration_length):
+  data_ = optimizer.data(t)
+  q_est[:, t] = data_["configuration"]
+  v_est[:, t] = data_["velocity"]
+  s_est[:, t] = data_["sensor_prediction"]
+  f_est[:, t] = data_["force_prediction"]
+  t_est[t] = data_["time"]
+# %%
+# plot position
+fig = plt.figure()
+
+plt.plot(qinterp[0, :], qinterp[1, :], label="interpolation", color="black")
+plt.plot(q_est[0, :], q_est[1, :], label="direct trajopt", color="orange")
+plt.plot(q0[0], q0[1], color="magenta", label="waypoint", marker="o")
+plt.plot(qM[0], qM[1], color="magenta", marker="o")
+plt.plot(qN[0], qN[1], color="magenta", marker="o")
+plt.plot(qT[0], qT[1], color="magenta", marker="o")
+
+plt.legend()
+plt.xlabel("X")
+plt.ylabel("Y")
+
+# plot velocity
+fig = plt.figure()
+
+# velocity
+plt.plot(t_est[1:] - model.opt.timestep, v_est[0, 1:], label="v0", color="cyan")
+plt.plot(
+    t_est[1:] - model.opt.timestep, v_est[1, 1:], label="v1", color="orange"
+)
+
+plt.legend()
+plt.xlabel("Time (s)")
+plt.ylabel("Velocity")
+# %%
+# frames optimized
+frames_opt = []
+
+# simulate
+for t in range(configuration_length - 1):
+  # get solution from optimizer
+  data_ = optimizer.data(t)
+
+  # set configuration
+  data.qpos = q_est[:, t]
+  data.qvel = v_est[:, t]
+
+  mujoco.mj_forward(model, data)
+
+  # render and save frames
+  renderer.update_scene(data)
+  pixels = renderer.render()
+  frames_opt.append(pixels)
+
+# display video
+# media.show_video(frames_opt, fps=1.0 / model.opt.timestep, loop=False)
diff --git a/python/mujoco_mpc/demos/direct/direct_optimizer.py b/python/mujoco_mpc/demos/direct/direct_optimizer.py
index 21a625380..ec08e49af 100644
--- a/python/mujoco_mpc/demos/direct/direct_optimizer.py
+++ b/python/mujoco_mpc/demos/direct/direct_optimizer.py
@@ -201,68 +201,18 @@ def inverse_dynamics(
   force = np.zeros((model.nv, horizon))
 
   # loop over horizon
-  for t in range(0, horizon):
-    # first step
-    if t == 0:
-      # set data
-      data.qpos = qpos[:, t]
-      data.qvel = np.zeros(model.nv)
-      data.qacc = np.zeros(model.nv)
-
-      # evaluate position sensors
-      mujoco.mj_fwdPosition(model, data)
-      mujoco.mj_sensorPos(model, data)
-
-      # zero memory
-      sensor[:, t] = 0.0
-
-      # set position sensors
-      for i in range(model.nsensor):
-        if model.sensor_needstage[i] == mujoco.mjtStage.mjSTAGE_POS:
-          adr = model.sensor_adr[i]
-          dim = model.sensor_dim[i]
-          idx = slice(adr, adr + dim)
-          sensor[idx, t] = data.sensordata[idx]
-
-    # last step
-    elif t == horizon - 1:
-      # set data
-      data.qpos = qpos[:, t]
-      data.qvel = qvel[:, t]
-      data.qacc = np.zeros(model.nv)
-
-      # evaluate position and velocity sensors
-      mujoco.mj_fwdPosition(model, data)
-      mujoco.mj_sensorPos(model, data)
-      mujoco.mj_fwdVelocity(model, data)
-      mujoco.mj_sensorVel(model, data)
-
-      # zero memory
-      sensor[:, t] = 0.0
-
-      # only set position and velocity sensors
-      for i in range(model.nsensor):
-        needstage = model.sensor_needstage[i]
-        if (
-            needstage == mujoco.mjtStage.mjSTAGE_POS
-            or needstage == mujoco.mjtStage.mjSTAGE_VEL
-        ):
-          adr = model.sensor_adr[i]
-          dim = model.sensor_dim[i]
-          idx = slice(adr, adr + dim)
-          sensor[idx, t] = data.sensordata[idx]
-    else:
-      # set data
-      data.qpos = qpos[:, t]
-      data.qvel = qvel[:, t]
-      data.qacc = qacc[:, t]
+  for t in range(1, horizon - 1):
+    # set data
+    data.qpos = qpos[:, t]
+    data.qvel = qvel[:, t]
+    data.qacc = qacc[:, t]
 
-      # inverse dynamics
-      mujoco.mj_inverse(model, data)
+    # inverse dynamics
+    mujoco.mj_inverse(model, data)
 
-      # copy sensor and force
-      sensor[:, t] = data.sensordata
-      force[:, t] = data.qfrc_inverse
+    # copy sensor and force
+    sensor[:, t] = data.sensordata
+    force[:, t] = data.qfrc_inverse
 
   return sensor, force
 
@@ -319,101 +269,34 @@ def diff_inverse_dynamics(
   dadf = np.zeros((model.nv, model.nv))
 
   # loop over horizon
-  for t in range(0, horizon):
-    # first step
-    if t == 0:
-      # set data
-      data.qpos = qpos[:, t]
-      data.qvel = np.zeros(model.nv)
-      data.qacc = np.zeros(model.nv)
-
-      # Jacobian
-      mujoco.mjd_inverseFD(
-          model,
-          data,
-          eps,
-          flg_actuation,
-          None,
-          None,
-          None,
-          dqds,
-          None,
-          None,
-          None,
-      )
-
-      # transpose
-      dsdq[t] = np.transpose(dqds)
-
-      # zero velocity and acceleration sensor derivatives
-      for i in range(model.nsensor):
-        if model.sensor_needstage[i] != mujoco.mjtStage.mjSTAGE_POS:
-          adr = model.sensor_adr[i]
-          dim = model.sensor_dim[i]
-          dsdq[t][adr : (adr + dim), 0 : model.nv] = 0.0
-
-    # last step
-    elif t == horizon - 1:
-      # set data
-      data.qpos = qpos[:, t]
-      data.qvel = qvel[:, t]
-      data.qacc = np.zeros(model.nv)
-
-      # Jacobian
-      mujoco.mjd_inverseFD(
-          model,
-          data,
-          eps,
-          flg_actuation,
-          None,
-          None,
-          None,
-          dqds,
-          dvds,
-          None,
-          None,
-      )
-
-      # transpose
-      dsdq[t] = np.transpose(dqds)
-      dsdv[t] = np.transpose(dvds)
-
-      # zero acceleration sensor derivatives
-      for i in range(model.nsensor):
-        if model.sensor_needstage[i] == mujoco.mjtStage.mjSTAGE_ACC:
-          adr = model.sensor_adr[i]
-          dim = model.sensor_dim[i]
-          idx = slice(adr, adr + dim)
-          dsdq[t][idx, 0 : model.nv] = 0.0
-          dsdv[t][idx, 0 : model.nv] = 0.0
-    else:
-      # set data
-      data.qpos = qpos[:, t]
-      data.qvel = qvel[:, t]
-      data.qacc = qacc[:, t]
-
-      # Jacobian
-      mujoco.mjd_inverseFD(
-          model,
-          data,
-          eps,
-          flg_actuation,
-          dqdf,
-          dvdf,
-          dadf,
-          dqds,
-          dvds,
-          dads,
-          None,
-      )
+  for t in range(1, horizon - 1):
+    # set data
+    data.qpos = qpos[:, t]
+    data.qvel = qvel[:, t]
+    data.qacc = qacc[:, t]
+
+    # Jacobian
+    mujoco.mjd_inverseFD(
+        model,
+        data,
+        eps,
+        flg_actuation,
+        dqdf,
+        dvdf,
+        dadf,
+        dqds,
+        dvds,
+        dads,
+        None,
+    )
 
-      # transpose
-      dsdq[t] = np.transpose(dqds)
-      dsdv[t] = np.transpose(dvds)
-      dsda[t] = np.transpose(dads)
-      dfdq[t] = np.transpose(dqdf)
-      dfdv[t] = np.transpose(dvdf)
-      dfda[t] = np.transpose(dadf)
+    # transpose
+    dsdq[t] = np.transpose(dqds)
+    dsdv[t] = np.transpose(dvds)
+    dsda[t] = np.transpose(dads)
+    dfdq[t] = np.transpose(dqdf)
+    dfdv[t] = np.transpose(dvdf)
+    dfda[t] = np.transpose(dadf)
 
   return dfdq, dfdv, dfda, dsdq, dsdv, dsda
 
@@ -454,23 +337,12 @@ def diff_sensor(
       np.zeros((model.nsensordata, 3 * model.nv)) for _ in range(horizon)
   ]
   # loop over horizon
-  for t in range(0, horizon):
-    # first step
-    if t == 0:
-      dsdq012[t][:, 0 : model.nv] = 0.0
-      dsdq012[t][:, model.nv : (2 * model.nv)] = dsdq[t]
-      dsdq012[t][:, (2 * model.nv) : (3 * model.nv)] = 0.0
-    # last step
-    elif t == horizon - 1:
-      dsdq012[t][:, 0 : model.nv] = dsdv[t] @ dvdq0[t]
-      dsdq012[t][:, model.nv : (2 * model.nv)] = dsdq[t] + dsdv[t] @ dvdq1[t]
-      dsdq012[t][:, (2 * model.nv) : (3 * model.nv)] = 0.0
-    else:
-      dsdq012[t][:, 0 : model.nv] = dsdv[t] @ dvdq0[t] + dsda[t] @ dadq0[t]
-      dsdq012[t][:, model.nv : (2 * model.nv)] = (
-          dsdq[t] + dsdv[t] @ dvdq1[t] + dsda[t] @ dadq1[t]
-      )
-      dsdq012[t][:, (2 * model.nv) : (3 * model.nv)] = dsda[t] @ dadq2[t]
+  for t in range(1, horizon - 1):
+    dsdq012[t][:, 0 : model.nv] = dsdv[t] @ dvdq0[t] + dsda[t] @ dadq0[t]
+    dsdq012[t][:, model.nv : (2 * model.nv)] = (
+        dsdq[t] + dsdv[t] @ dvdq1[t] + dsda[t] @ dadq1[t]
+    )
+    dsdq012[t][:, (2 * model.nv) : (3 * model.nv)] = dsda[t] @ dadq2[t]
 
   return dsdq012
 
@@ -510,19 +382,12 @@ def diff_force(
   dfdq012 = [np.zeros((model.nv, 3 * model.nv)) for _ in range(horizon)]
 
   # loop over horizon
-  for t in range(horizon):
-    # first step
-    if t == 0:
-      dfdq012[t][:, :] = 0.0
-    # last step
-    elif t == horizon - 1:
-      dfdq012[t][:, :] = 0.0
-    else:
-      dfdq012[t][:, 0 : model.nv] = dfdv[t] @ dvdq0[t] + dfda[t] @ dadq0[t]
-      dfdq012[t][:, model.nv : (2 * model.nv)] = (
-          dfdq[t] + dfdv[t] @ dvdq1[t] + dfda[t] @ dadq1[t]
-      )
-      dfdq012[t][:, (2 * model.nv) : (3 * model.nv)] = dfda[t] @ dadq2[t]
+  for t in range(1, horizon - 1):
+    dfdq012[t][:, 0 : model.nv] = dfdv[t] @ dvdq0[t] + dfda[t] @ dadq0[t]
+    dfdq012[t][:, model.nv : (2 * model.nv)] = (
+        dfdq[t] + dfdv[t] @ dvdq1[t] + dfda[t] @ dadq1[t]
+    )
+    dfdq012[t][:, (2 * model.nv) : (3 * model.nv)] = dfda[t] @ dadq2[t]
 
   return dfdq012
 
@@ -654,18 +519,14 @@ def cost_sensor(
   cost = 0.0
 
   # loop over horizon
-  for t in range(horizon):
+  for t in range(1, horizon - 1):
     # residual
     res = sensor[:, t] - target[:, t]
 
     # loop over sensors
     for i in range(model.nsensor):
-      # skip
+      # stage
       needstage = model.sensor_needstage[i]
-      if t == 0 and needstage != mujoco.mjtStage.mjSTAGE_POS:
-        continue
-      if t == horizon - 1 and needstage == mujoco.mjtStage.mjSTAGE_ACC:
-        continue
 
       # sensor i
       adr = model.sensor_adr[i]
@@ -722,18 +583,14 @@ def diff_cost_sensor(
   hess = np.zeros((ntotal, nband))
 
   # loop over horizon
-  for t in range(horizon):
+  for t in range(1, horizon - 1):
     # residual
     res = sensor[:, t] - target[:, t]
 
     # loop over sensors
     for i in range(model.nsensor):
-      # skip
+      # stage
       needstage = model.sensor_needstage[i]
-      if t == 0 and needstage != mujoco.mjtStage.mjSTAGE_POS:
-        continue
-      if t == horizon - 1 and needstage == mujoco.mjtStage.mjSTAGE_ACC:
-        continue
 
       # adr
       adr = model.sensor_adr[i]
@@ -756,55 +613,20 @@ def diff_cost_sensor(
       # quadratic norm Hessian
       normi_hess = (scale * weights[i, t] * np.eye(dim)).reshape((dim, dim))
 
-      # first step
-      if t == 0:
-        # indices
-        idxt = slice(0, model.nv)
-
-        # subblock
-        dsidq1 = dsdq012[t][idx, model.nv : (2 * model.nv)].reshape(
-            (dim, model.nv)
-        )
-
-        # gradient
-        grad[idxt] += (dsidq1.T @ normi_grad).ravel()
-
-        # Hessian
-        blk = dsidq1.T @ normi_hess @ dsidq1
-        hess = add_block_in_band(hess, blk, 1.0, ntotal, nband, model.nv, 0)
-      # last step
-      elif t == horizon - 1:
-        # indices
-        idxt = slice((t - 1) * model.nv, (t + 1) * model.nv)
-
-        # subblock
-        dsidq01 = dsdq012[t][idx, 0 : (2 * model.nv)].reshape(
-            (dim, 2 * model.nv)
-        )
-
-        # gradient
-        grad[idxt] += (dsidq01.T @ normi_grad).ravel()
-
-        # Hessian
-        blk = dsidq01.T @ normi_hess @ dsidq01
-        hess = add_block_in_band(
-            hess, blk, 1.0, ntotal, nband, 2 * model.nv, (t - 1) * model.nv
-        )
-      else:
-        # indices
-        idxt = slice((t - 1) * model.nv, (t + 2) * model.nv)
+      # indices
+      idxt = slice((t - 1) * model.nv, (t + 2) * model.nv)
 
-        # subblock
-        dsidq012 = dsdq012[t][idx, :].reshape((dim, 3 * model.nv))
+      # subblock
+      dsidq012 = dsdq012[t][idx, :].reshape((dim, 3 * model.nv))
 
-        # gradient
-        grad[idxt] += (dsidq012.T @ normi_grad).ravel()
+      # gradient
+      grad[idxt] += (dsidq012.T @ normi_grad).ravel()
 
-        # Hessian
-        blk = dsidq012.T @ normi_hess @ dsidq012
-        hess = add_block_in_band(
-            hess, blk, 1.0, ntotal, nband, 3 * model.nv, (t - 1) * model.nv
-        )
+      # Hessian
+      blk = dsidq012.T @ normi_hess @ dsidq012
+      hess = add_block_in_band(
+          hess, blk, 1.0, ntotal, nband, 3 * model.nv, (t - 1) * model.nv
+      )
 
   return grad, hess
 
@@ -816,6 +638,7 @@ def configuration_update(
     update: npt.ArrayLike,
     step: float,
     horizon: int,
+    pinned: npt.ArrayLike,
 ) -> npt.ArrayLike:
   """Update configuration.
 
@@ -827,6 +650,7 @@ def configuration_update(
       update (npt.ArrayLike): search direction
       step (float): size of update to configurations
       horizon (int): number of timesteps
+      pinned(npt.ArrayLike): time indices of fixed qpos variables
 
   Returns:
       npt.ArrayLike: updated configuration trajectory
@@ -834,10 +658,20 @@ def configuration_update(
   qpos_new = np.zeros((model.nq, horizon))
 
   # loop over configurations
+  tpin = 0
   for t in range(horizon):
+    # current qpos
     q = np.array(qpos[:, t])
-    dq = update[(t * model.nv) : ((t + 1) * model.nv)]
-    mujoco.mj_integratePos(model, q, dq, step)
+
+    # skip pinned qpos
+    if not pinned[t]:
+      dq = update[(tpin * model.nv) : ((tpin + 1) * model.nv)]
+      mujoco.mj_integratePos(model, q, dq, step)
+
+      # increment
+      tpin += 1
+
+    # set new qpos
     qpos_new[:, t] = q
 
   return qpos_new
@@ -901,6 +735,7 @@ class DirectOptimizer:
     force_target: target qfrc_actuator values (nv x horizon).
     weights_sensor: weights for sensor norms (nsensor x horizon).
     weights_force: weights for force norms (nv x horizon).
+    pinned: time indices that qpos is fixed (horizon).
     _dvdq0: Jacobian of velocity wrt previous configuration ((nv x nv) x horizon).
     _dvdq1: Jacobian of velocity wrt current configuration ((nv x nv) x horizon).
     _dadq0: Jacobian of acceleration wrt previous configuration ((nv x nv) x horizon).
@@ -918,7 +753,8 @@ class DirectOptimizer:
     cost_force: sum of weighted force norms.
     cost_sensor: sum of weighted sensor norms.
     cost_initial: initial total cost.
-    _ntotal: number of decision variables (nv * horizon).
+    _ntotal: number of qpos variables (nv * horizon).
+    _ntotal_pin: number of decision variables (_ntotal - sum(pinned) * nv).
     _nband: cost Hessian band dimension (3 * nv).
     _gradient: gradient of cost wrt to decision variables (nv * horizon).
     _hessian: band representation of cost Hessian wrt decision variables (nv * horizon x 3 * nv).
@@ -971,6 +807,9 @@ def __init__(self, model: mujoco.MjModel, horizon: int):
     self.weights_sensor = np.zeros((model.nsensor, horizon))
     self.weights_force = np.zeros((model.nv, horizon))
 
+    # pinned
+    self.pinned = np.zeros(horizon, dtype=bool)
+
     # finite-difference velocity and acceleration Jacobians (wrt configurations)
     self._dvdq0 = [np.zeros((model.nv, model.nv)) for t in range(horizon)]
     self._dvdq1 = [np.zeros((model.nv, model.nv)) for t in range(horizon)]
@@ -1006,6 +845,7 @@ def __init__(self, model: mujoco.MjModel, horizon: int):
 
     # cost derivatives
     self._ntotal = model.nv * horizon
+    self._ntotal_pin = self._ntotal
     self._nband = 3 * model.nv
     self._gradient = np.zeros(self._ntotal)
     self._hessian = np.zeros((self._ntotal, self._nband))
@@ -1174,8 +1014,49 @@ def _cost_derivatives(
         self.horizon,
     )
 
-    self._gradient = force_gradient + sensor_gradient
-    self._hessian = force_hessian + sensor_hessian
+    # dimension
+    nv = self.model.nv
+
+    # total gradient, Hessian
+    tpin = 0
+    for t in range(self.horizon):
+      # slices
+      idx_t = slice(t * nv, (t + 1) * nv)
+      idx_tpin = slice(tpin * nv, (tpin + 1) * nv)
+
+      if self.pinned[t]:
+        # gradient
+        force_gradient[idx_t] = 0.0
+        sensor_gradient[idx_t] = 0.0
+
+        # Hessian
+        force_hessian[idx_t, :] = 0.0
+        sensor_hessian[idx_t, :] = 0.0
+
+        if t + 1 < self.horizon:
+          idx_tt = slice((t + 1) * nv, (t + 2) * nv)
+          idx_c1 = slice(nv, 2 * nv)
+
+          force_hessian[idx_tt, idx_c1] = 0.0
+          sensor_hessian[idx_tt, idx_c1] = 0.0
+
+        if t + 2 < self.horizon:
+          idx_ttt = slice((t + 2) * nv, (t + 3) * nv)
+          idx_c0 = slice(0, nv)
+
+          force_hessian[idx_ttt, idx_c0] = 0.0
+          sensor_hessian[idx_ttt, idx_c0] = 0.0
+
+        continue
+
+      # set data
+      self._gradient[idx_tpin] = force_gradient[idx_t] + sensor_gradient[idx_t]
+      self._hessian[idx_tpin, :] = (
+          force_hessian[idx_t, :] + sensor_hessian[idx_t, :]
+      )
+
+      # increment
+      tpin += 1
 
   def _eval_search_direction(self) -> bool:
     """Compute search direction.
@@ -1185,17 +1066,29 @@ def _eval_search_direction(self) -> bool:
     """
     # factorize cost Hessian
     self._hessian_factor = np.array(self._hessian.ravel())
-    min_diag = mujoco.mju_cholFactorBand(
-        self._hessian_factor,
-        self._ntotal,
-        self._nband,
-        0,
-        self._regularization,
-        0.0,
-    )
+    while self._regularization < self.regularization_max:
+      # attempt factorization
+      min_diag = mujoco.mju_cholFactorBand(
+          self._hessian_factor,
+          self._ntotal_pin,
+          self._nband,
+          0,
+          self._regularization,
+          0.0,
+      )
+
+      # check factorization
+      if min_diag > 0.0:
+        break
+
+      # increase regularization
+      self._regularization = np.minimum(
+          self.regularization_max,
+          self._regularization * self.regularization_scale,
+      )
 
     # check rank deficient
-    if min_diag < 1.0e-16:
+    if self._regularization >= self.regularization_max:
       self._status_msg = "rank deficient cost Hessian"
       return False
 
@@ -1204,11 +1097,14 @@ def _eval_search_direction(self) -> bool:
         self._search_direction,
         self._hessian_factor,
         self._gradient,
-        self._ntotal,
+        self._ntotal_pin,
         self._nband,
         0,
     )
 
+    # update Hessian
+    self._hessian[:, -1] += self._regularization
+
     # check small direction
     if (
         np.linalg.norm(self._search_direction, np.inf)
@@ -1227,12 +1123,12 @@ def _update_regularization(self) -> bool:
     """
     # compute expected = g' d + 0.5 d' H d
     expected = np.dot(self._gradient, self._search_direction)
-    tmp = np.zeros(self._ntotal)
+    tmp = np.zeros(self._ntotal_pin)
     mujoco.mju_bandMulMatVec(
         tmp,
         self._hessian,
         self._search_direction,
-        self._ntotal,
+        self._ntotal_pin,
         self._nband,
         0,
         1,
@@ -1271,6 +1167,15 @@ def optimize(self):
     self._iterations_step = 0
     self._iterations_search = 0
 
+    # ntotal pinned
+    self._ntotal_pin = self._ntotal - np.sum(self.pinned) * self.model.nv
+
+    # resize
+    self._gradient.resize(self._ntotal_pin)
+    self._hessian.resize((self._ntotal_pin, self._nband))
+    self._hessian_factor.resize((self._ntotal_pin, self._nband))
+    self._search_direction.resize(self._ntotal_pin)
+
     # initial cost
     self.cost_initial = self.cost(self.qpos)
     current_cost = self.cost_initial
@@ -1287,7 +1192,7 @@ def optimize(self):
       self._cost_derivatives(self.qpos)
 
       # check gradient tolerance
-      self._gradient_norm = np.linalg.norm(self._gradient) / self._ntotal
+      self._gradient_norm = np.linalg.norm(self._gradient) / self._ntotal_pin
       if self._gradient_norm < self.gradient_tolerance:
         self._status_msg = "gradient tolerance achieved"
         return
@@ -1316,6 +1221,7 @@ def optimize(self):
             self._search_direction,
             -1.0,
             self.horizon,
+            self.pinned,
         )
 
         # candidate cost
@@ -1362,7 +1268,7 @@ def status(self):
     print("\n")
     print(
         " gradient norm  :",
-        np.linalg.norm(self._gradient) / self._ntotal,
+        np.linalg.norm(self._gradient) / self._ntotal_pin,
     )
     print(" regularization :", self._regularization)
     print("\n")
diff --git a/python/mujoco_mpc/demos/direct/particle_trajopt.py b/python/mujoco_mpc/demos/direct/particle_trajopt.py
index 858d385c4..ed9d358e8 100644
--- a/python/mujoco_mpc/demos/direct/particle_trajopt.py
+++ b/python/mujoco_mpc/demos/direct/particle_trajopt.py
@@ -1,3 +1,4 @@
+# %%
 # Copyright 2023 DeepMind Technologies Limited
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,13 +14,12 @@
 # limitations under the License.
 
 import matplotlib.pyplot as plt
-import mediapy as media
 import mujoco
-from mujoco_mpc import direct as direct_lib
+import direct_optimizer
 import numpy as np
-
-
 # %%
+## Example
+
 # 2D Particle Model
 xml = """
 <mujoco model="Particle">
@@ -40,11 +40,6 @@
 
   <option timestep="0.01"></option>
 
-  <default>
-    <joint type="hinge" axis="0 0 1" limited="true" range="-.29 .29" damping="1"/>
-    <motor gear=".1" ctrlrange="-1 1" ctrllimited="true"/>
-  </default>
-
   <worldbody>
     <light name="light" pos="0 0 1"/>
     <camera name="fixed" pos="0 0 .75" quat="1 0 0 0"/>
@@ -77,10 +72,10 @@
 
 model = mujoco.MjModel.from_xml_string(xml)
 data = mujoco.MjData(model)
-renderer = mujoco.Renderer(model)
+
 # %%
 # initialization
-T = 100
+T = 400
 q0 = np.array([-0.25, -0.25])
 qM = np.array([-0.25, 0.25])
 qN = np.array([0.25, -0.25])
@@ -97,6 +92,7 @@
 
 # time
 time = [t * model.opt.timestep for t in range(T)]
+
 # %%
 # plot position
 fig = plt.figure()
@@ -111,110 +107,74 @@
 plt.legend()
 plt.xlabel("X")
 plt.ylabel("Y")
+
 # %%
-# optimizer model
-model_optimizer = mujoco.MjModel.from_xml_string(xml)
-
-# direct optimizer
-configuration_length = T + 2
-optimizer = direct_lib.Direct(
-    model=model_optimizer,
-    configuration_length=configuration_length,
-)
-# %%
-# set data
-for t in range(configuration_length):
-  # unpack
-  qt = np.zeros(model.nq)
-  st = np.zeros(model.nsensordata)
-  mt = np.zeros(model.nsensor)
-  ft = np.zeros(model.nv)
-  ct = np.zeros(model.nu)
-  tt = np.array([t * model.opt.timestep])
+# create optimizer
+optimizer = direct_optimizer.DirectOptimizer(model, T)
+
+# settings
+optimizer.max_iterations = 10
+optimizer.max_search_iterations = 10
+
+# force weight
+fw = 5.0e2
 
+# set data
+for t in range(T):
   # set initial state
   if t == 0 or t == 1:
-    qt = q0
-    st = q0
-    mt = np.array([1, 1])
+    optimizer.pinned[t] = True
+    optimizer.qpos[:, t] = q0
+    if t == 1:
+      optimizer.weights_force[:, t] = fw
 
   # set goal
-  elif t >= configuration_length - 2:
-    qt = qT
-    st = qT
-    mt = np.array([1, 1])
+  elif t >= T - 2:
+    optimizer.pinned[t] = True
+    optimizer.qpos[:, t] = qT
+    if t == T - 2:
+      optimizer.weights_force[:, t] = fw
 
   # set waypoint
-  elif t == 25:
-    st = qM
-    mt = np.array([1, 1])
+  elif t == 100:
+    optimizer.pinned[t] = False
+    optimizer.qpos[:, t] = qM
+    optimizer.sensor_target[: model.nq, t] = qM
+    optimizer.weights_force[:, t] = fw
+    optimizer.weights_sensor[:, t] = 1.0
 
   # set waypoint
-  elif t == 75:
-    st = qN
-    mt = np.array([1, 1])
+  elif t == 300:
+    optimizer.pinned[t] = False
+    optimizer.qpos[:, t] = qN
+    optimizer.sensor_target[: model.nq, t] = qN
+    optimizer.weights_force[:, t] = fw
+    optimizer.weights_sensor[:, t] = 1.0
 
   # initialize qpos
   else:
-    qt = qinterp[:, t - 1]
-    mt = np.array([0, 0])
-
-  # set data
-  data_ = optimizer.data(
-      t,
-      configuration=qt,
-      sensor_measurement=st,
-      sensor_mask=mt,
-      force_measurement=ft,
-      time=tt,
-  )
-# %%
-# set std
-optimizer.noise(process=np.array([1000.0, 1000.0]), sensor=np.array([1.0, 1.0]))
-
-# set settings
-optimizer.settings(
-    sensor_flag=True,
-    force_flag=True,
-    max_smoother_iterations=1000,
-    max_search_iterations=1000,
-    regularization_initial=1.0e-12,
-    gradient_tolerance=1.0e-6,
-    search_direction_tolerance=1.0e-6,
-    cost_tolerance=1.0e-6,
-    first_step_position_sensors=True,
-    last_step_position_sensors=True,
-    last_step_velocity_sensors=True,
-)
+    optimizer.pinned[t] = False
+    optimizer.qpos[:, t] = qinterp[:, t]
+    optimizer.weights_force[:, t] = fw
+    optimizer.weights_sensor[:, t] = 0.0
 
 # optimize
 optimizer.optimize()
 
-# costs
-optimizer.print_cost()
-
 # status
-optimizer.print_status()
-# %%
-# get estimated trajectories
-q_est = np.zeros((model_optimizer.nq, configuration_length))
-v_est = np.zeros((model_optimizer.nv, configuration_length))
-s_est = np.zeros((model_optimizer.nsensordata, configuration_length))
-f_est = np.zeros((model_optimizer.nv, configuration_length))
-t_est = np.zeros(configuration_length)
-for t in range(configuration_length):
-  data_ = optimizer.data(t)
-  q_est[:, t] = data_["configuration"]
-  v_est[:, t] = data_["velocity"]
-  s_est[:, t] = data_["sensor_prediction"]
-  f_est[:, t] = data_["force_prediction"]
-  t_est[t] = data_["time"]
+optimizer.status()
+
 # %%
 # plot position
 fig = plt.figure()
 
 plt.plot(qinterp[0, :], qinterp[1, :], label="interpolation", color="black")
-plt.plot(q_est[0, :], q_est[1, :], label="direct trajopt", color="orange")
+plt.plot(
+    optimizer.qpos[0, :],
+    optimizer.qpos[1, :],
+    label="direct trajopt",
+    color="orange",
+)
 plt.plot(q0[0], q0[1], color="magenta", label="waypoint", marker="o")
 plt.plot(qM[0], qM[1], color="magenta", marker="o")
 plt.plot(qN[0], qN[1], color="magenta", marker="o")
@@ -224,37 +184,22 @@
 plt.xlabel("X")
 plt.ylabel("Y")
 
-# plot velocity
-fig = plt.figure()
-
-# velocity
-plt.plot(t_est[1:] - model.opt.timestep, v_est[0, 1:], label="v0", color="cyan")
-plt.plot(
-    t_est[1:] - model.opt.timestep, v_est[1, 1:], label="v1", color="orange"
+# %%
+# recover ctrl
+mujoco.mj_forward(model, data)
+ctrl = np.vstack(
+    [
+        np.linalg.pinv(data.actuator_moment.T) @ optimizer.force[:, t]
+        for t in range(T)
+    ]
 )
 
+# plot ctrl
+fig = plt.figure()
+times = [t * model.opt.timestep for t in range(T)]
+
+plt.step(times, ctrl[:, 0], label="action 0", color="orange")
+plt.step(times, ctrl[:, 1], label="action 1", color="magenta")
 plt.legend()
 plt.xlabel("Time (s)")
-plt.ylabel("Velocity")
-# %%
-# frames optimized
-frames_opt = []
-
-# simulate
-for t in range(configuration_length - 1):
-  # get solution from optimizer
-  data_ = optimizer.data(t)
-
-  # set configuration
-  data.qpos = q_est[:, t]
-  data.qvel = v_est[:, t]
-
-  mujoco.mj_forward(model, data)
-
-  # render and save frames
-  renderer.update_scene(data)
-  pixels = renderer.render()
-  frames_opt.append(pixels)
-
-# display video
-# media.show_video(frames_opt, fps=1.0 / model.opt.timestep, loop=False)
+plt.ylabel("ctrl")
diff --git a/python/mujoco_mpc/demos/direct/particle_trajopt2.py b/python/mujoco_mpc/demos/direct/particle_trajopt2.py
deleted file mode 100644
index 0ceabeaeb..000000000
--- a/python/mujoco_mpc/demos/direct/particle_trajopt2.py
+++ /dev/null
@@ -1,377 +0,0 @@
-# Copyright 2023 DeepMind Technologies Limited
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import matplotlib.pyplot as plt
-import mujoco
-from direct import direct_optimizer
-import numpy as np
-from numpy import typing as npt
-
-# %%
-## Example
-
-# 2D Particle Model
-xml = """
-<mujoco model="Particle">
-  <visual>
-    <headlight ambient=".4 .4 .4" diffuse=".8 .8 .8" specular="0.1 0.1 0.1"/>
-    <map znear=".01"/>
-    <quality shadowsize="2048"/>
-    <global elevation="-15"/>
-  </visual>
-
-  <asset>
-    <texture name="blue_grid" type="2d" builtin="checker" rgb1=".02 .14 .44" rgb2=".27 .55 1" width="300" height="300" mark="edge" markrgb="1 1 1"/>
-    <material name="blue_grid" texture="blue_grid" texrepeat="1 1" texuniform="true" reflectance=".2"/>
-    <texture name="skybox" type="skybox" builtin="gradient" rgb1=".66 .79 1" rgb2=".9 .91 .93" width="800" height="800"/>
-    <material name="self" rgba=".7 .5 .3 1"/>
-    <material name="decoration" rgba=".2 .6 .3 1"/>
-  </asset>
-
-  <option timestep="0.01"></option>
-
-  <worldbody>
-    <light name="light" pos="0 0 1"/>
-    <camera name="fixed" pos="0 0 .75" quat="1 0 0 0"/>
-    <geom name="ground" type="plane" pos="0 0 0" size=".3 .3 .1" material="blue_grid"/>
-    <geom name="wall_x" type="plane" pos="-.3 0 .02" zaxis="1 0 0"  size=".02 .3 .02" material="decoration"/>
-    <geom name="wall_y" type="plane" pos="0 -.3 .02" zaxis="0 1 0"  size=".3 .02 .02" material="decoration"/>
-    <geom name="wall_neg_x" type="plane" pos=".3 0 .02" zaxis="-1 0 0"  size=".02 .3 .02" material="decoration"/>
-    <geom name="wall_neg_y" type="plane" pos="0 .3 .02" zaxis="0 -1 0"  size=".3 .02 .02" material="decoration"/>
-
-    <body name="pointmass" pos="0 0 .01">
-      <camera name="cam0" pos="0 -0.3 0.3" xyaxes="1 0 0 0 0.7 0.7"/>
-      <joint name="root_x" type="slide"  pos="0 0 0" axis="1 0 0" />
-      <joint name="root_y" type="slide"  pos="0 0 0" axis="0 1 0" />
-      <geom name="pointmass" type="sphere" size=".01" material="self" mass=".3"/>
-      <site name="tip" pos="0 0 0" size="0.01"/>
-    </body>
-  </worldbody>
-
-  <actuator>
-    <motor name="x_motor" joint="root_x" gear="1" ctrllimited="true" ctrlrange="-1 1"/>
-    <motor name="y_motor" joint="root_y" gear="1" ctrllimited="true" ctrlrange="-1 1"/>
-  </actuator>
-
-  <sensor>
-    <jointpos name="x" joint="root_x" />
-    <jointpos name="y" joint="root_y" />
-  </sensor>
-</mujoco>
-"""
-
-model = mujoco.MjModel.from_xml_string(xml)
-data = mujoco.MjData(model)
-
-# %%
-# initialization
-T = 400
-q0 = np.array([-0.25, -0.25])
-qM = np.array([-0.25, 0.25])
-qN = np.array([0.25, -0.25])
-qT = np.array([0.25, 0.25])
-
-# compute linear interpolation
-qinterp = np.zeros((model.nq, T))
-for t in range(T):
-  # slope
-  slope = (qT - q0) / T
-
-  # interpolation
-  qinterp[:, t] = q0 + t * slope
-
-# time
-time = [t * model.opt.timestep for t in range(T)]
-
-# %%
-# plot position
-fig = plt.figure()
-
-# arm position
-plt.plot(qinterp[0, :], qinterp[1, :], label="interpolation", color="black")
-plt.plot(q0[0], q0[1], color="magenta", label="waypoint", marker="o")
-plt.plot(qM[0], qM[1], color="magenta", marker="o")
-plt.plot(qN[0], qN[1], color="magenta", marker="o")
-plt.plot(qT[0], qT[1], color="magenta", marker="o")
-
-plt.legend()
-plt.xlabel("X")
-plt.ylabel("Y")
-
-# %%
-# create optimizer
-optimizer = direct_optimizer.DirectOptimizer(model, T)
-
-# settings
-optimizer.max_iterations = 10
-optimizer.max_search_iterations = 10
-
-# force weight
-fw = 5.0e2
-
-# set data
-for t in range(T):
-  # set initial state
-  if t == 0 or t == 1:
-    optimizer.qpos[:, t] = q0
-    optimizer.sensor_target[: model.nq, t] = q0
-    optimizer.weights_force[:, t] = fw
-    optimizer.weights_sensor[:, t] = 1.0
-
-  # set goal
-  elif t >= T - 2:
-    optimizer.qpos[:, t] = qT
-    optimizer.sensor_target[: model.nq, t] = qT
-    optimizer.weights_force[:, t] = fw
-    optimizer.weights_sensor[:, t] = 1.0
-
-  # set waypoint
-  elif t == 100:
-    optimizer.qpos[:, t] = qM
-    optimizer.sensor_target[: model.nq, t] = qM
-    optimizer.weights_force[:, t] = fw
-    optimizer.weights_sensor[:, t] = 1.0
-
-  # set waypoint
-  elif t == 300:
-    optimizer.qpos[:, t] = qN
-    optimizer.sensor_target[: model.nq, t] = qN
-    optimizer.weights_force[:, t] = fw
-    optimizer.weights_sensor[:, t] = 1.0
-
-  # initialize qpos
-  else:
-    optimizer.qpos[:, t] = qinterp[:, t]
-    optimizer.weights_force[:, t] = fw
-    optimizer.weights_sensor[:, t] = 0.0
-
-# optimize
-optimizer.optimize()
-
-# status
-optimizer.status()
-
-# %%
-# plot position
-fig = plt.figure()
-
-plt.plot(qinterp[0, :], qinterp[1, :], label="interpolation", color="black")
-plt.plot(
-    optimizer.qpos[0, :],
-    optimizer.qpos[1, :],
-    label="direct trajopt",
-    color="orange",
-)
-plt.plot(q0[0], q0[1], color="magenta", label="waypoint", marker="o")
-plt.plot(qM[0], qM[1], color="magenta", marker="o")
-plt.plot(qN[0], qN[1], color="magenta", marker="o")
-plt.plot(qT[0], qT[1], color="magenta", marker="o")
-
-plt.legend()
-plt.xlabel("X")
-plt.ylabel("Y")
-
-# %%
-# recover ctrl
-mujoco.mj_forward(model, data)
-ctrl = np.vstack([data.actuator_moment @ optimizer.force[:, t] for t in range(T)])
-
-# plot ctrl
-fig = plt.figure()
-times = [t * model.opt.timestep for t in range(T)]
-
-plt.step(times, ctrl[:, 0], label="action 0", color="orange")
-plt.step(times, ctrl[:, 1], label="action 1", color="magenta")
-plt.legend()
-plt.xlabel("Time (s)")
-plt.ylabel("ctrl")
-
-# %%
-# trajectories
-T = 3
-qpos = np.zeros((model.nq, T))
-qvel = np.zeros((model.nv, T))
-qacc = np.zeros((model.nv, T))
-ctrl = np.zeros((model.nu, T))
-qfrc = np.zeros((model.nv, T))
-sensor = np.zeros((model.nsensordata, T))
-noisy_sensor = np.zeros((model.nsensordata, T))
-time = np.zeros(T)
-
-# set initial state
-mujoco.mj_resetData(model, data)
-
-# simulate
-for t in range(T):
-  # forward dynamics
-  mujoco.mj_forward(model, data)
-
-  # cache
-  qpos[:, t] = data.qpos
-  qvel[:, t] = data.qvel
-  qacc[:, t] = data.qacc
-  ctrl[:, t] = data.ctrl
-  qfrc[:, t] = data.qfrc_actuator
-  sensor[:, t] = data.sensordata
-  time[t] = data.time
-
-  # noisy sensors
-  noisy_sensor[:, t] = sensor[:, t]
-
-  # intergrate with Euler
-  mujoco.mj_Euler(model, data)
-
-# create optimizer
-optimizer = direct_optimizer.DirectOptimizer(model, T)
-
-# initialize
-optimizer.qpos = 0.0 * np.ones((model.nq, T))
-
-# set data
-optimizer.sensor_target = sensor
-optimizer.force_target = qfrc
-optimizer.ctrl = ctrl
-
-# set weights
-optimizer.weights_force[:, :] = 1.0
-optimizer.weights_sensor[:, :] = 1.0
-
-# settings
-optimizer.max_iterations = 10
-optimizer.max_search_iterations = 10
-
-# optimize
-optimizer.optimize()
-
-# status
-optimizer.status()
-
-# %%
-def test_gradient(
-    optimizer: direct_optimizer.DirectOptimizer,
-    qpos: npt.ArrayLike,
-    eps: float = 1.0e-10,
-    verbose: bool = False,
-):
-  # evaluate nominal cost
-  c0 = optimizer.cost(qpos)
-
-  # evaluate optimizer gradient
-  optimizer._cost_derivatives(qpos)
-  g0 = np.array(optimizer._gradient)
-
-  # finite difference gradient
-  g = np.zeros(optimizer._ntotal)
-
-  # horizon
-  T = optimizer.horizon
-
-  # loop over inputs
-  for i in range(optimizer._ntotal):
-    # nudge
-    nudge = np.zeros(optimizer._ntotal)
-    nudge[i] += eps
-
-    # qpos nudge
-    qnudge = direct_optimizer.configuration_update(
-        optimizer.model, qpos, nudge, 1.0, T)
-
-    # evaluate
-    c = optimizer.cost(qnudge)
-
-    # derivative
-    g[i] = (c - c0) / eps
-
-  if verbose:
-    print("gradient optimizer: \n", g0)
-    print("gradient finite difference: \n", g)
-
-  # return max difference
-  return np.linalg.norm(g - g0, np.Inf)
-
-
-def test_hessian(
-    optimizer: direct_optimizer.DirectOptimizer,
-    qpos: npt.ArrayLike,
-    eps: float = 1.0e-5,
-    verbose: bool = False,
-):
-  # evaluate nominal cost
-  c0 = optimizer.cost(qpos)
-
-  # evaluate optimizer Hessian
-  optimizer._cost_derivatives(qpos)
-  h0 = np.zeros((optimizer._ntotal, optimizer._ntotal))
-  mujoco.mju_band2Dense(
-      h0, optimizer._hessian.ravel(), optimizer._ntotal, optimizer._nband, 0, 1
-  )
-
-  # finite difference gradient
-  h = np.zeros((optimizer._ntotal, optimizer._ntotal))
-
-  # horizon
-  T = optimizer.horizon
-
-  for i in range(optimizer._ntotal):
-    for j in range(i, optimizer._ntotal):
-      # workspace
-      w1 = np.zeros(optimizer._ntotal)
-      w2 = np.zeros(optimizer._ntotal)
-      w3 = np.zeros(optimizer._ntotal)
-
-      # workspace 1
-      w1[i] += eps
-      w1[j] += eps
-
-      # qpos nudge 1
-      qnudge1 = direct_optimizer.configuration_update(
-          optimizer.model, qpos, w1, 1.0, T)
-
-      cij = optimizer.cost(qnudge1)
-
-      # workspace 2
-      w2[i] += eps
-
-      # qpos nudge 2
-      qnudge2 = direct_optimizer.configuration_update(
-          optimizer.model, qpos, w2, 1.0, T)
-
-      ci = optimizer.cost(qnudge2)
-
-      # workspace 3
-      w3[j] += eps
-
-      # qpos nudge 3
-      qnudge3 = direct_optimizer.configuration_update(
-          optimizer.model, qpos, w3, 1.0, T)
-
-      cj = optimizer.cost(qnudge3)
-
-      # Hessian value
-      h[i, j] = (cij - ci - cj + c0) / (eps * eps)
-      h[j, i] = (cij - ci - cj + c0) / (eps * eps)
-
-  if verbose:
-    print("Hessian optimizer: \n", h0)
-    print("Hessian finite difference: \n", h)
-
-  # return maximum difference
-  return np.linalg.norm((h - h0).ravel(), np.Inf)
-
-
-# %%
-test_gradient(optimizer, np.ones((model.nq, T)))
-
-# %%
-test_hessian(optimizer, np.zeros((model.nq, T)))
diff --git a/python/mujoco_mpc/ui_agent_test.py b/python/mujoco_mpc/ui_agent_test.py
index 7fbbadef0..c5e79094a 100644
--- a/python/mujoco_mpc/ui_agent_test.py
+++ b/python/mujoco_mpc/ui_agent_test.py
@@ -137,7 +137,8 @@ def test_get_cost_weights(self):
       agent.set_state(qpos=[0, 0.5], qvel=[1, 1])
       terms_dict = agent.get_cost_term_values()
       terms = list(terms_dict.values())
-      self.assertFalse(np.any(np.isclose(terms, 0, rtol=0, atol=1e-6)))
+      self.assertFalse(np.any(np.isclose(terms, 0, rtol=0, atol=1e-6)),
+                       f"no values should be close to 0. got {terms}")
 
   def test_set_state_with_lists(self):
     model_path = (
@@ -169,6 +170,18 @@ def test_set_get_mode(self):
       agent.set_mode("default_mode")
       self.assertEqual(agent.get_mode(), "default_mode")
 
+  def test_get_all_modes(self):
+    model_path = (
+        pathlib.Path(__file__).parent.parent.parent
+        / "mjpc/tasks/quadruped/task_flat.xml"
+    )
+    model = mujoco.MjModel.from_xml_path(str(model_path))
+    with self.get_agent(task_id="Quadruped Flat", model=model) as agent:
+      self.assertEqual(
+          tuple(agent.get_all_modes()),
+          ("Quadruped", "Biped", "Walk", "Scramble", "Flip"),
+      )
+
   @absltest.skip("asset import issue")
   def test_get_set_mode(self):
     model_path = (