diff --git a/CMakeLists.txt b/CMakeLists.txt index 20d732d51..4717c5b8c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,7 +61,7 @@ set(MUJOCO_MPC_MUJOCO_GIT_TAG ) set(MUJOCO_MPC_MENAGERIE_GIT_TAG - aef3ee5c07ea51506e893a62fd832773ff0162c8 + 94ea114fa8c60a0fd542c8e1ffeb997204acbea2 CACHE STRING "Git revision for MuJoCo Menagerie." ) diff --git a/mjpc/CMakeLists.txt b/mjpc/CMakeLists.txt index 7c086410f..276d625c3 100644 --- a/mjpc/CMakeLists.txt +++ b/mjpc/CMakeLists.txt @@ -186,16 +186,14 @@ if(APPLE) target_link_libraries(mjpc "-framework Cocoa") endif() -add_executable( - testspeed +add_library( + libtestspeed STATIC testspeed_app.cc testspeed.h testspeed.cc ) target_link_libraries( - testspeed - absl::flags - absl::flags_parse + libtestspeed absl::random_random absl::strings libmjpc @@ -203,6 +201,18 @@ target_link_libraries( threadpool Threads::Threads ) +target_include_directories(libtestspeed PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/..) + +add_executable( + testspeed + testspeed_app.cc +) +target_link_libraries( + testspeed + libtestspeed + absl::flags + absl::flags_parse +) target_include_directories(testspeed PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/..) target_compile_options(testspeed PUBLIC ${MJPC_COMPILE_OPTIONS}) target_link_options(testspeed PRIVATE ${MJPC_LINK_OPTIONS}) diff --git a/mjpc/grpc/ui_agent_service.cc b/mjpc/grpc/ui_agent_service.cc index 8b49b01b6..48a92a7d2 100644 --- a/mjpc/grpc/ui_agent_service.cc +++ b/mjpc/grpc/ui_agent_service.cc @@ -35,12 +35,16 @@ namespace mjpc::agent_grpc { using ::agent::GetActionRequest; using ::agent::GetActionResponse; -using ::agent::GetModeRequest; -using ::agent::GetModeResponse; +using ::agent::GetAllModesRequest; +using ::agent::GetAllModesResponse; +using ::agent::GetBestTrajectoryRequest; +using ::agent::GetBestTrajectoryResponse; using ::agent::GetResidualsRequest; using ::agent::GetResidualsResponse; using ::agent::GetCostValuesAndWeightsRequest; using ::agent::GetCostValuesAndWeightsResponse; +using ::agent::GetModeRequest; +using ::agent::GetModeResponse; using ::agent::GetStateRequest; using ::agent::GetStateResponse; using ::agent::GetTaskParametersRequest; @@ -222,6 +226,24 @@ grpc::Status UiAgentService::GetMode(grpc::ServerContext* context, }); } +grpc::Status UiAgentService::GetAllModes(grpc::ServerContext* context, + const GetAllModesRequest* request, + GetAllModesResponse* response) { + return RunBeforeStep( + context, [request, response](mjpc::Agent* agent, const mjModel* model, + mjData* data) { + return grpc_agent_util::GetAllModes(request, agent, response); + }); +} + +grpc::Status UiAgentService::GetBestTrajectory( + grpc::ServerContext* context, const GetBestTrajectoryRequest* request, + GetBestTrajectoryResponse* response) { + // TODO - Implement. + return {grpc::StatusCode::UNIMPLEMENTED, + "GetBestTrajectory is not implemented."}; +} + grpc::Status UiAgentService::SetAnything(grpc::ServerContext* context, const SetAnythingRequest* request, SetAnythingResponse* response) { diff --git a/mjpc/grpc/ui_agent_service.h b/mjpc/grpc/ui_agent_service.h index dcff0d9f0..7ad7366bc 100644 --- a/mjpc/grpc/ui_agent_service.h +++ b/mjpc/grpc/ui_agent_service.h @@ -15,13 +15,16 @@ #ifndef MJPC_MJPC_GRPC_UI_AGENT_SERVICE_H_ #define MJPC_MJPC_GRPC_UI_AGENT_SERVICE_H_ +#include #include #include #include +#include #include #include #include // mjpc fork +#include #include namespace mjpc::agent_grpc { @@ -95,6 +98,15 @@ class UiAgentService final : public agent::Agent::Service { const agent::GetModeRequest* request, agent::GetModeResponse* response) override; + grpc::Status GetAllModes(grpc::ServerContext* context, + const agent::GetAllModesRequest* request, + agent::GetAllModesResponse* response) override; + + grpc::Status GetBestTrajectory( + grpc::ServerContext* context, + const agent::GetBestTrajectoryRequest* request, + agent::GetBestTrajectoryResponse* response) override; + grpc::Status SetAnything(grpc::ServerContext* context, const agent::SetAnythingRequest* request, agent::SetAnythingResponse* response) override; diff --git a/mjpc/planners/cross_entropy/planner.cc b/mjpc/planners/cross_entropy/planner.cc index d24328767..af4cf395f 100644 --- a/mjpc/planners/cross_entropy/planner.cc +++ b/mjpc/planners/cross_entropy/planner.cc @@ -106,11 +106,9 @@ void CrossEntropyPlanner::Allocate() { trajectory[i].Allocate(kMaxTrajectoryHorizon); candidate_policy[i].Allocate(model, *task, kMaxTrajectoryHorizon); } - - // elite average trajectory - elite_avg.Initialize(num_state, model->nu, task->num_residual, - task->num_trace, kMaxTrajectoryHorizon); - elite_avg.Allocate(kMaxTrajectoryHorizon); + nominal_trajectory.Initialize(num_state, model->nu, task->num_residual, + task->num_trace, kMaxTrajectoryHorizon); + nominal_trajectory.Allocate(kMaxTrajectoryHorizon); } // reset memory to zeros @@ -143,7 +141,7 @@ void CrossEntropyPlanner::Reset(int horizon, trajectory[i].Reset(kMaxTrajectoryHorizon); candidate_policy[i].Reset(horizon); } - elite_avg.Reset(kMaxTrajectoryHorizon); + nominal_trajectory.Reset(kMaxTrajectoryHorizon); for (const auto& d : data_) { mju_zero(d->ctrl, model->nu); @@ -161,11 +159,6 @@ void CrossEntropyPlanner::SetState(const State& state) { // optimize nominal policy using random sampling void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) { - // check horizon - if (horizon != elite_avg.horizon) { - NominalTrajectory(horizon, pool); - } - // if num_trajectory_ has changed, use it in this new iteration. // num_trajectory_ might change while this function runs. Keep it constant // for the duration of this function. @@ -220,36 +213,14 @@ void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) { int num_spline_points = resampled_policy.num_spline_points; int num_parameters = resampled_policy.num_parameters; - // reset parameters scratch to zero - std::fill(parameters_scratch.begin(), parameters_scratch.end(), 0.0); - - // reset elite average - elite_avg.Reset(horizon); - - // set elite average trajectory times - for (int tt = 0; tt <= horizon; tt++) { - elite_avg.times[tt] = time + tt * model->opt.timestep; - } - - // best elite - int idx = trajectory_order[0]; + // averaged return over elites + double avg_return = 0.0; - // add parameters - mju_copy(parameters_scratch.data(), candidate_policy[idx].parameters.data(), - num_parameters); + // reset parameters scratch + std::fill(parameters_scratch.begin(), parameters_scratch.end(), 0.0); - // copy first elite trajectory - mju_copy(elite_avg.actions.data(), trajectory[idx].actions.data(), - model->nu * (horizon - 1)); - mju_copy(elite_avg.trace.data(), trajectory[idx].trace.data(), - trajectory[idx].dim_trace * horizon); - mju_copy(elite_avg.residual.data(), trajectory[idx].residual.data(), - elite_avg.dim_residual * horizon); - mju_copy(elite_avg.costs.data(), trajectory[idx].costs.data(), horizon); - elite_avg.total_return = trajectory[idx].total_return; - - // loop over remaining elites to compute average - for (int i = 1; i < n_elite; i++) { + // loop over elites to compute average + for (int i = 0; i < n_elite; i++) { // ordered trajectory index int idx = trajectory_order[i]; @@ -257,29 +228,14 @@ void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) { mju_addTo(parameters_scratch.data(), candidate_policy[idx].parameters.data(), num_parameters); - // add elite trajectory - mju_addTo(elite_avg.actions.data(), trajectory[idx].actions.data(), - model->nu * (horizon - 1)); - mju_addTo(elite_avg.trace.data(), trajectory[idx].trace.data(), - trajectory[idx].dim_trace * horizon); - mju_addTo(elite_avg.residual.data(), trajectory[idx].residual.data(), - elite_avg.dim_residual * horizon); - mju_addTo(elite_avg.costs.data(), trajectory[idx].costs.data(), horizon); - elite_avg.total_return += trajectory[idx].total_return; + // add total return + avg_return += trajectory[idx].total_return; } // normalize mju_scl(parameters_scratch.data(), parameters_scratch.data(), 1.0 / n_elite, num_parameters); - mju_scl(elite_avg.actions.data(), elite_avg.actions.data(), 1.0 / n_elite, - model->nu * (horizon - 1)); - mju_scl(elite_avg.trace.data(), elite_avg.trace.data(), 1.0 / n_elite, - elite_avg.dim_trace * horizon); - mju_scl(elite_avg.residual.data(), elite_avg.residual.data(), 1.0 / n_elite, - elite_avg.dim_residual * horizon); - mju_scl(elite_avg.costs.data(), elite_avg.costs.data(), 1.0 / n_elite, - horizon); - elite_avg.total_return /= n_elite; + avg_return /= n_elite; // loop over elites to compute variance std::fill(variance.begin(), variance.end(), 0.0); // reset variance to zero @@ -304,16 +260,15 @@ void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) { } // improvement: compare nominal to elite average - improvement = mju_max( - elite_avg.total_return - trajectory[trajectory_order[0]].total_return, - 0.0); + improvement = + mju_max(avg_return - trajectory[trajectory_order[0]].total_return, 0.0); // stop timer policy_update_compute_time = GetDuration(policy_update_start); } // compute trajectory using nominal policy -void CrossEntropyPlanner::NominalTrajectory(int horizon, ThreadPool& pool) { +void CrossEntropyPlanner::NominalTrajectory(int horizon) { // set policy auto nominal_policy = [&cp = resampled_policy]( double* action, const double* state, double time) { @@ -321,8 +276,12 @@ void CrossEntropyPlanner::NominalTrajectory(int horizon, ThreadPool& pool) { }; // rollout nominal policy - elite_avg.Rollout(nominal_policy, task, model, data_[0].get(), state.data(), - time, mocap.data(), userdata.data(), horizon); + nominal_trajectory.Rollout(nominal_policy, task, model, + data_[ThreadPool::WorkerId()].get(), state.data(), + time, mocap.data(), userdata.data(), horizon); +} +void CrossEntropyPlanner::NominalTrajectory(int horizon, ThreadPool& pool) { + NominalTrajectory(horizon); } // set action from policy @@ -363,6 +322,8 @@ void CrossEntropyPlanner::ResamplePolicy(int horizon) { LinearRange(resampled_policy.times.data(), time_shift, resampled_policy.times[0], num_spline_points); + + resampled_policy.representation = policy.representation; } // add random noise to nominal policy @@ -446,12 +407,18 @@ void CrossEntropyPlanner::Rollouts(int num_trajectory, int horizon, state.data(), time, mocap.data(), userdata.data(), horizon); }); } - pool.WaitCount(count_before + num_trajectory); + // nominal + pool.Schedule([&s = *this, horizon]() { s.NominalTrajectory(horizon); }); + + // wait + pool.WaitCount(count_before + num_trajectory + 1); pool.ResetCount(); } -// returns the nominal trajectory (this is the purple trace) -const Trajectory* CrossEntropyPlanner::BestTrajectory() { return &elite_avg; } +// returns the **nominal** trajectory (this is the purple trace) +const Trajectory* CrossEntropyPlanner::BestTrajectory() { + return &nominal_trajectory; +} // visualize planner-specific traces void CrossEntropyPlanner::Traces(mjvScene* scn) { diff --git a/mjpc/planners/cross_entropy/planner.h b/mjpc/planners/cross_entropy/planner.h index 05db5e815..ec11f2925 100644 --- a/mjpc/planners/cross_entropy/planner.h +++ b/mjpc/planners/cross_entropy/planner.h @@ -57,6 +57,7 @@ class CrossEntropyPlanner : public Planner { // compute trajectory using nominal policy void NominalTrajectory(int horizon, ThreadPool& pool) override; + void NominalTrajectory(int horizon); // set action from policy void ActionFromPolicy(double* action, const double* state, double time, @@ -111,7 +112,7 @@ class CrossEntropyPlanner : public Planner { // trajectories Trajectory trajectory[kMaxTrajectory]; - Trajectory elite_avg; + Trajectory nominal_trajectory; // order of indices of rolled out trajectories, ordered by total return std::vector trajectory_order; @@ -129,9 +130,6 @@ class CrossEntropyPlanner : public Planner { // improvement double improvement; - // flags - int processed_noise_status; - // timing std::atomic noise_compute_time; double rollouts_compute_time; diff --git a/mjpc/spline/spline.h b/mjpc/spline/spline.h index 737b5c809..2a8a875eb 100644 --- a/mjpc/spline/spline.h +++ b/mjpc/spline/spline.h @@ -102,45 +102,41 @@ class TimeSpline { } // Copyable, Movable. - IteratorT( - const IteratorT& other) = default; - IteratorT& operator=( - const IteratorT& other) = default; - IteratorT(IteratorT&& other) = - default; - IteratorT& operator=( - IteratorT&& other) = default; + IteratorT(const IteratorT& other) = default; + IteratorT& operator=(const IteratorT& other) = default; + IteratorT(IteratorT&& other) = default; + IteratorT& operator=(IteratorT&& other) = default; reference operator*() { return node_; } pointer operator->() { return &node_; } pointer operator->() const { return &node_; } - IteratorT& operator++() { + IteratorT& operator++() { ++index_; node_ = index_ == spline_->Size() ? NodeType() : spline_->NodeAt(index_); return *this; } - IteratorT operator++(int) { - IteratorT tmp = *this; + IteratorT operator++(int) { + IteratorT tmp = *this; ++(*this); return tmp; } - IteratorT& operator--() { + IteratorT& operator--() { --index_; node_ = spline_->NodeAt(index_); return *this; } - IteratorT operator--(int) { - IteratorT tmp = *this; + IteratorT operator--(int) { + IteratorT tmp = *this; --(*this); return tmp; } - IteratorT& operator+=(difference_type n) { + IteratorT& operator+=(difference_type n) { if (n != 0) { index_ += n; node_ = @@ -149,29 +145,25 @@ class TimeSpline { return *this; } - IteratorT& operator-=(difference_type n) { - return *this += -n; - } + IteratorT& operator-=(difference_type n) { return *this += -n; } - IteratorT operator+(difference_type n) const { - IteratorT tmp(*this); + IteratorT operator+(difference_type n) const { + IteratorT tmp(*this); tmp += n; return tmp; } - IteratorT operator-(difference_type n) const { - IteratorT tmp(*this); + IteratorT operator-(difference_type n) const { + IteratorT tmp(*this); tmp -= n; return tmp; } - friend IteratorT operator+( - difference_type n, const IteratorT& it) { + friend IteratorT operator+(difference_type n, const IteratorT& it) { return it + n; } - friend difference_type operator-(const IteratorT& x, - const IteratorT& y) { + friend difference_type operator-(const IteratorT& x, const IteratorT& y) { CHECK_EQ(x.spline_, y.spline_) << "Comparing iterators from different splines"; if (x != y) return (x.index_ - y.index_); @@ -180,35 +172,29 @@ class TimeSpline { NodeType operator[](difference_type n) const { return *(*this + n); } - friend bool operator==(const IteratorT& x, - const IteratorT& y) { + friend bool operator==(const IteratorT& x, const IteratorT& y) { return x.spline_ == y.spline_ && x.index_ == y.index_; } - friend bool operator!=(const IteratorT& x, - const IteratorT& y) { + friend bool operator!=(const IteratorT& x, const IteratorT& y) { return !(x == y); } - friend bool operator<(const IteratorT& x, - const IteratorT& y) { + friend bool operator<(const IteratorT& x, const IteratorT& y) { CHECK_EQ(x.spline_, y.spline_) << "Comparing iterators from different splines"; return x.index_ < y.index_; } - friend bool operator>(const IteratorT& x, - const IteratorT& y) { + friend bool operator>(const IteratorT& x, const IteratorT& y) { return y < x; } - friend bool operator<=(const IteratorT& x, - const IteratorT& y) { + friend bool operator<=(const IteratorT& x, const IteratorT& y) { return !(y < x); } - friend bool operator>=(const IteratorT& x, - const IteratorT& y) { + friend bool operator>=(const IteratorT& x, const IteratorT& y) { return !(x < y); } diff --git a/mjpc/tasks/bimanual/task.xml b/mjpc/tasks/bimanual/task.xml index 619718786..1a7df9783 100644 --- a/mjpc/tasks/bimanual/task.xml +++ b/mjpc/tasks/bimanual/task.xml @@ -51,7 +51,6 @@ "0 -0.96 1.16 0 -0.3 0 0.002 0.002 0 -0.96 1.16 0 -0.3 0 0.002 0.002 0.1 0.2 0.3 1 0 0 0" - act ="-0.1 0 0 0 0 0 0.01 0.1 0 0 0 0 0 0.01" ctrl="-0.1 0 0 0 0 0 0.01 0.1 0 0 0 0 0 0.01"/> diff --git a/mjpc/tasks/cartpole/task.xml b/mjpc/tasks/cartpole/task.xml index 917d247fe..46c2002b6 100644 --- a/mjpc/tasks/cartpole/task.xml +++ b/mjpc/tasks/cartpole/task.xml @@ -44,4 +44,7 @@ + + + diff --git a/mjpc/tasks/humanoid/stand/stand.cc b/mjpc/tasks/humanoid/stand/stand.cc index 9bf54888f..5f961a092 100644 --- a/mjpc/tasks/humanoid/stand/stand.cc +++ b/mjpc/tasks/humanoid/stand/stand.cc @@ -17,7 +17,6 @@ #include #include -#include "mjpc/task.h" #include "mjpc/utilities.h" diff --git a/mjpc/tasks/quadrotor/quadrotor.h b/mjpc/tasks/quadrotor/quadrotor.h index 024a2f95a..018a90d27 100644 --- a/mjpc/tasks/quadrotor/quadrotor.h +++ b/mjpc/tasks/quadrotor/quadrotor.h @@ -15,6 +15,7 @@ #ifndef MJPC_TASKS_QUADROTOR_QUADROTOR_H_ #define MJPC_TASKS_QUADROTOR_QUADROTOR_H_ +#include #include #include #include "mjpc/task.h" @@ -51,7 +52,7 @@ class Quadrotor : public Task { ResidualFn* InternalResidual() override { return &residual_; } private: - int current_mode_; + int current_mode_ = 0; ResidualFn residual_; }; } // namespace mjpc diff --git a/mjpc/tasks/swimmer/swimmer.cc b/mjpc/tasks/swimmer/swimmer.cc index 59350dfa7..a12b745b6 100644 --- a/mjpc/tasks/swimmer/swimmer.cc +++ b/mjpc/tasks/swimmer/swimmer.cc @@ -18,7 +18,6 @@ #include #include -#include "mjpc/task.h" #include "mjpc/utilities.h" namespace mjpc { diff --git a/mjpc/test/tasks/CMakeLists.txt b/mjpc/test/tasks/CMakeLists.txt index 40639adaa..a6cc6277c 100644 --- a/mjpc/test/tasks/CMakeLists.txt +++ b/mjpc/test/tasks/CMakeLists.txt @@ -13,4 +13,4 @@ # limitations under the License. test(task_test) -target_link_libraries(task_test load gmock) +target_link_libraries(task_test load gmock libtestspeed) diff --git a/mjpc/test/tasks/task_test.cc b/mjpc/test/tasks/task_test.cc index 13c0fb29f..8e3c4a52a 100644 --- a/mjpc/test/tasks/task_test.cc +++ b/mjpc/test/tasks/task_test.cc @@ -13,9 +13,14 @@ // limitations under the License. #include "mjpc/task.h" +#include +#include #include "gtest/gtest.h" #include +#include "mjpc/norm.h" +#include "mjpc/tasks/tasks.h" +#include "mjpc/testspeed.h" #include "mjpc/test/load.h" namespace mjpc { @@ -93,5 +98,15 @@ TEST(TasksTest, Task) { mj_deleteModel(model); } +TEST(StepAllTasksTest, Task) { + auto tasks = GetTasks(); + for (auto& task : tasks) { + double cost = SynchronousPlanningCost( + task->Name(), /*planner_thread_count=*/1, + /*steps_per_planning_iteration=*/100, /*total_time=*/0.1); + EXPECT_GT(cost, 0) << "Task " << task->Name() << " failed."; + } +} + } // namespace } // namespace mjpc diff --git a/mjpc/testspeed.cc b/mjpc/testspeed.cc index d0d9a2fdb..0eebc87df 100644 --- a/mjpc/testspeed.cc +++ b/mjpc/testspeed.cc @@ -41,8 +41,9 @@ void residual_callback(const mjModel* model, mjData* data, int stage) { } // namespace // Run synchronous planning, print timing info,return 0 if nothing failed. -int TestSpeed(std::string task_name, int planner_thread_count, - int steps_per_planning_iteration, double total_time) { +double SynchronousPlanningCost(std::string task_name, int planner_thread_count, + int steps_per_planning_iteration, + double total_time) { std::cout << "Test MJPC Speed\n"; std::cout << " MuJoCo version " << mj_versionString() << "\n"; if (mjVERSION_HEADER != mj_version()) { @@ -63,7 +64,7 @@ int TestSpeed(std::string task_name, int planner_thread_count, mjModel* model = load_model.model.release(); if (!model) { std::cerr << load_model.error << "\n"; - return 1; + return -1; } mjData* data = mj_makeData(model); mj_forward(model, data); @@ -123,6 +124,7 @@ int TestSpeed(std::string task_name, int planner_thread_count, mj_deleteData(data); mj_deleteModel(model); - return 0; + mjcb_sensor = nullptr; + return total_cost; } } // namespace mjpc diff --git a/mjpc/testspeed.h b/mjpc/testspeed.h index ea728869f..53bd6f9ff 100644 --- a/mjpc/testspeed.h +++ b/mjpc/testspeed.h @@ -18,8 +18,9 @@ #include namespace mjpc { -int TestSpeed(std::string task_name, int planner_thread_count, - int steps_per_planning_iteration, double total_time); +double SynchronousPlanningCost(std::string task_name, int planner_thread_count, + int steps_per_planning_iteration, + double total_time); } // namespace mjpc #endif // MJPC_MJPC_TESTSPEED_H_ diff --git a/mjpc/testspeed_app.cc b/mjpc/testspeed_app.cc index 28a5a27e0..181ba6be0 100644 --- a/mjpc/testspeed_app.cc +++ b/mjpc/testspeed_app.cc @@ -34,6 +34,10 @@ int main(int argc, char** argv) { int steps_per_planning_iteration = absl::GetFlag(FLAGS_steps_per_planning_iteration); double total_time = absl::GetFlag(FLAGS_total_time); - return mjpc::TestSpeed(task_name, planner_thread_count, - steps_per_planning_iteration, total_time); + double cost = + mjpc::SynchronousPlanningCost(task_name, planner_thread_count, + steps_per_planning_iteration, total_time); + if (cost < 0) { + return -1; + } } diff --git a/mjpc/testspeed_test.cc b/mjpc/testspeed_test.cc deleted file mode 100644 index bf479c555..000000000 --- a/mjpc/testspeed_test.cc +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2024 DeepMind Technologies Limited -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "mjpc/testspeed.h" - -#include "gtest/gtest.h" - -namespace { - -TEST(TestSeed, Test) { - EXPECT_EQ( - mjpc::TestSpeed("Cartpole", /*planner_thread_count=*/10, - /*steps_per_planning_iteration=*/10, /*total_time=*/10), - 0); -} -} // namespace diff --git a/python/mujoco_mpc/demos/direct/box_drop_smoother.py b/python/mujoco_mpc/demos/direct/api_examples/box_drop_smoother.py similarity index 100% rename from python/mujoco_mpc/demos/direct/box_drop_smoother.py rename to python/mujoco_mpc/demos/direct/api_examples/box_drop_smoother.py diff --git a/python/mujoco_mpc/demos/direct/cartpole_trajopt.py b/python/mujoco_mpc/demos/direct/api_examples/cartpole_trajopt.py similarity index 100% rename from python/mujoco_mpc/demos/direct/cartpole_trajopt.py rename to python/mujoco_mpc/demos/direct/api_examples/cartpole_trajopt.py diff --git a/python/mujoco_mpc/demos/direct/particle_smoother.py b/python/mujoco_mpc/demos/direct/api_examples/particle_smoother.py similarity index 100% rename from python/mujoco_mpc/demos/direct/particle_smoother.py rename to python/mujoco_mpc/demos/direct/api_examples/particle_smoother.py diff --git a/python/mujoco_mpc/demos/direct/api_examples/particle_trajopt.py b/python/mujoco_mpc/demos/direct/api_examples/particle_trajopt.py new file mode 100644 index 000000000..858d385c4 --- /dev/null +++ b/python/mujoco_mpc/demos/direct/api_examples/particle_trajopt.py @@ -0,0 +1,260 @@ +# Copyright 2023 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import matplotlib.pyplot as plt +import mediapy as media +import mujoco +from mujoco_mpc import direct as direct_lib +import numpy as np + + +# %% +# 2D Particle Model +xml = """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +""" + +model = mujoco.MjModel.from_xml_string(xml) +data = mujoco.MjData(model) +renderer = mujoco.Renderer(model) +# %% +# initialization +T = 100 +q0 = np.array([-0.25, -0.25]) +qM = np.array([-0.25, 0.25]) +qN = np.array([0.25, -0.25]) +qT = np.array([0.25, 0.25]) + +# compute linear interpolation +qinterp = np.zeros((model.nq, T)) +for t in range(T): + # slope + slope = (qT - q0) / T + + # interpolation + qinterp[:, t] = q0 + t * slope + +# time +time = [t * model.opt.timestep for t in range(T)] +# %% +# plot position +fig = plt.figure() + +# arm position +plt.plot(qinterp[0, :], qinterp[1, :], label="interpolation", color="black") +plt.plot(q0[0], q0[1], color="magenta", label="waypoint", marker="o") +plt.plot(qM[0], qM[1], color="magenta", marker="o") +plt.plot(qN[0], qN[1], color="magenta", marker="o") +plt.plot(qT[0], qT[1], color="magenta", marker="o") + +plt.legend() +plt.xlabel("X") +plt.ylabel("Y") +# %% +# optimizer model +model_optimizer = mujoco.MjModel.from_xml_string(xml) + +# direct optimizer +configuration_length = T + 2 +optimizer = direct_lib.Direct( + model=model_optimizer, + configuration_length=configuration_length, +) +# %% +# set data +for t in range(configuration_length): + # unpack + qt = np.zeros(model.nq) + st = np.zeros(model.nsensordata) + mt = np.zeros(model.nsensor) + ft = np.zeros(model.nv) + ct = np.zeros(model.nu) + tt = np.array([t * model.opt.timestep]) + + # set initial state + if t == 0 or t == 1: + qt = q0 + st = q0 + mt = np.array([1, 1]) + + # set goal + elif t >= configuration_length - 2: + qt = qT + st = qT + mt = np.array([1, 1]) + + # set waypoint + elif t == 25: + st = qM + mt = np.array([1, 1]) + + # set waypoint + elif t == 75: + st = qN + mt = np.array([1, 1]) + + # initialize qpos + else: + qt = qinterp[:, t - 1] + mt = np.array([0, 0]) + + # set data + data_ = optimizer.data( + t, + configuration=qt, + sensor_measurement=st, + sensor_mask=mt, + force_measurement=ft, + time=tt, + ) +# %% +# set std +optimizer.noise(process=np.array([1000.0, 1000.0]), sensor=np.array([1.0, 1.0])) + +# set settings +optimizer.settings( + sensor_flag=True, + force_flag=True, + max_smoother_iterations=1000, + max_search_iterations=1000, + regularization_initial=1.0e-12, + gradient_tolerance=1.0e-6, + search_direction_tolerance=1.0e-6, + cost_tolerance=1.0e-6, + first_step_position_sensors=True, + last_step_position_sensors=True, + last_step_velocity_sensors=True, +) + +# optimize +optimizer.optimize() + +# costs +optimizer.print_cost() + +# status +optimizer.print_status() +# %% +# get estimated trajectories +q_est = np.zeros((model_optimizer.nq, configuration_length)) +v_est = np.zeros((model_optimizer.nv, configuration_length)) +s_est = np.zeros((model_optimizer.nsensordata, configuration_length)) +f_est = np.zeros((model_optimizer.nv, configuration_length)) +t_est = np.zeros(configuration_length) +for t in range(configuration_length): + data_ = optimizer.data(t) + q_est[:, t] = data_["configuration"] + v_est[:, t] = data_["velocity"] + s_est[:, t] = data_["sensor_prediction"] + f_est[:, t] = data_["force_prediction"] + t_est[t] = data_["time"] +# %% +# plot position +fig = plt.figure() + +plt.plot(qinterp[0, :], qinterp[1, :], label="interpolation", color="black") +plt.plot(q_est[0, :], q_est[1, :], label="direct trajopt", color="orange") +plt.plot(q0[0], q0[1], color="magenta", label="waypoint", marker="o") +plt.plot(qM[0], qM[1], color="magenta", marker="o") +plt.plot(qN[0], qN[1], color="magenta", marker="o") +plt.plot(qT[0], qT[1], color="magenta", marker="o") + +plt.legend() +plt.xlabel("X") +plt.ylabel("Y") + +# plot velocity +fig = plt.figure() + +# velocity +plt.plot(t_est[1:] - model.opt.timestep, v_est[0, 1:], label="v0", color="cyan") +plt.plot( + t_est[1:] - model.opt.timestep, v_est[1, 1:], label="v1", color="orange" +) + +plt.legend() +plt.xlabel("Time (s)") +plt.ylabel("Velocity") +# %% +# frames optimized +frames_opt = [] + +# simulate +for t in range(configuration_length - 1): + # get solution from optimizer + data_ = optimizer.data(t) + + # set configuration + data.qpos = q_est[:, t] + data.qvel = v_est[:, t] + + mujoco.mj_forward(model, data) + + # render and save frames + renderer.update_scene(data) + pixels = renderer.render() + frames_opt.append(pixels) + +# display video +# media.show_video(frames_opt, fps=1.0 / model.opt.timestep, loop=False) diff --git a/python/mujoco_mpc/demos/direct/direct_optimizer.py b/python/mujoco_mpc/demos/direct/direct_optimizer.py index 21a625380..ec08e49af 100644 --- a/python/mujoco_mpc/demos/direct/direct_optimizer.py +++ b/python/mujoco_mpc/demos/direct/direct_optimizer.py @@ -201,68 +201,18 @@ def inverse_dynamics( force = np.zeros((model.nv, horizon)) # loop over horizon - for t in range(0, horizon): - # first step - if t == 0: - # set data - data.qpos = qpos[:, t] - data.qvel = np.zeros(model.nv) - data.qacc = np.zeros(model.nv) - - # evaluate position sensors - mujoco.mj_fwdPosition(model, data) - mujoco.mj_sensorPos(model, data) - - # zero memory - sensor[:, t] = 0.0 - - # set position sensors - for i in range(model.nsensor): - if model.sensor_needstage[i] == mujoco.mjtStage.mjSTAGE_POS: - adr = model.sensor_adr[i] - dim = model.sensor_dim[i] - idx = slice(adr, adr + dim) - sensor[idx, t] = data.sensordata[idx] - - # last step - elif t == horizon - 1: - # set data - data.qpos = qpos[:, t] - data.qvel = qvel[:, t] - data.qacc = np.zeros(model.nv) - - # evaluate position and velocity sensors - mujoco.mj_fwdPosition(model, data) - mujoco.mj_sensorPos(model, data) - mujoco.mj_fwdVelocity(model, data) - mujoco.mj_sensorVel(model, data) - - # zero memory - sensor[:, t] = 0.0 - - # only set position and velocity sensors - for i in range(model.nsensor): - needstage = model.sensor_needstage[i] - if ( - needstage == mujoco.mjtStage.mjSTAGE_POS - or needstage == mujoco.mjtStage.mjSTAGE_VEL - ): - adr = model.sensor_adr[i] - dim = model.sensor_dim[i] - idx = slice(adr, adr + dim) - sensor[idx, t] = data.sensordata[idx] - else: - # set data - data.qpos = qpos[:, t] - data.qvel = qvel[:, t] - data.qacc = qacc[:, t] + for t in range(1, horizon - 1): + # set data + data.qpos = qpos[:, t] + data.qvel = qvel[:, t] + data.qacc = qacc[:, t] - # inverse dynamics - mujoco.mj_inverse(model, data) + # inverse dynamics + mujoco.mj_inverse(model, data) - # copy sensor and force - sensor[:, t] = data.sensordata - force[:, t] = data.qfrc_inverse + # copy sensor and force + sensor[:, t] = data.sensordata + force[:, t] = data.qfrc_inverse return sensor, force @@ -319,101 +269,34 @@ def diff_inverse_dynamics( dadf = np.zeros((model.nv, model.nv)) # loop over horizon - for t in range(0, horizon): - # first step - if t == 0: - # set data - data.qpos = qpos[:, t] - data.qvel = np.zeros(model.nv) - data.qacc = np.zeros(model.nv) - - # Jacobian - mujoco.mjd_inverseFD( - model, - data, - eps, - flg_actuation, - None, - None, - None, - dqds, - None, - None, - None, - ) - - # transpose - dsdq[t] = np.transpose(dqds) - - # zero velocity and acceleration sensor derivatives - for i in range(model.nsensor): - if model.sensor_needstage[i] != mujoco.mjtStage.mjSTAGE_POS: - adr = model.sensor_adr[i] - dim = model.sensor_dim[i] - dsdq[t][adr : (adr + dim), 0 : model.nv] = 0.0 - - # last step - elif t == horizon - 1: - # set data - data.qpos = qpos[:, t] - data.qvel = qvel[:, t] - data.qacc = np.zeros(model.nv) - - # Jacobian - mujoco.mjd_inverseFD( - model, - data, - eps, - flg_actuation, - None, - None, - None, - dqds, - dvds, - None, - None, - ) - - # transpose - dsdq[t] = np.transpose(dqds) - dsdv[t] = np.transpose(dvds) - - # zero acceleration sensor derivatives - for i in range(model.nsensor): - if model.sensor_needstage[i] == mujoco.mjtStage.mjSTAGE_ACC: - adr = model.sensor_adr[i] - dim = model.sensor_dim[i] - idx = slice(adr, adr + dim) - dsdq[t][idx, 0 : model.nv] = 0.0 - dsdv[t][idx, 0 : model.nv] = 0.0 - else: - # set data - data.qpos = qpos[:, t] - data.qvel = qvel[:, t] - data.qacc = qacc[:, t] - - # Jacobian - mujoco.mjd_inverseFD( - model, - data, - eps, - flg_actuation, - dqdf, - dvdf, - dadf, - dqds, - dvds, - dads, - None, - ) + for t in range(1, horizon - 1): + # set data + data.qpos = qpos[:, t] + data.qvel = qvel[:, t] + data.qacc = qacc[:, t] + + # Jacobian + mujoco.mjd_inverseFD( + model, + data, + eps, + flg_actuation, + dqdf, + dvdf, + dadf, + dqds, + dvds, + dads, + None, + ) - # transpose - dsdq[t] = np.transpose(dqds) - dsdv[t] = np.transpose(dvds) - dsda[t] = np.transpose(dads) - dfdq[t] = np.transpose(dqdf) - dfdv[t] = np.transpose(dvdf) - dfda[t] = np.transpose(dadf) + # transpose + dsdq[t] = np.transpose(dqds) + dsdv[t] = np.transpose(dvds) + dsda[t] = np.transpose(dads) + dfdq[t] = np.transpose(dqdf) + dfdv[t] = np.transpose(dvdf) + dfda[t] = np.transpose(dadf) return dfdq, dfdv, dfda, dsdq, dsdv, dsda @@ -454,23 +337,12 @@ def diff_sensor( np.zeros((model.nsensordata, 3 * model.nv)) for _ in range(horizon) ] # loop over horizon - for t in range(0, horizon): - # first step - if t == 0: - dsdq012[t][:, 0 : model.nv] = 0.0 - dsdq012[t][:, model.nv : (2 * model.nv)] = dsdq[t] - dsdq012[t][:, (2 * model.nv) : (3 * model.nv)] = 0.0 - # last step - elif t == horizon - 1: - dsdq012[t][:, 0 : model.nv] = dsdv[t] @ dvdq0[t] - dsdq012[t][:, model.nv : (2 * model.nv)] = dsdq[t] + dsdv[t] @ dvdq1[t] - dsdq012[t][:, (2 * model.nv) : (3 * model.nv)] = 0.0 - else: - dsdq012[t][:, 0 : model.nv] = dsdv[t] @ dvdq0[t] + dsda[t] @ dadq0[t] - dsdq012[t][:, model.nv : (2 * model.nv)] = ( - dsdq[t] + dsdv[t] @ dvdq1[t] + dsda[t] @ dadq1[t] - ) - dsdq012[t][:, (2 * model.nv) : (3 * model.nv)] = dsda[t] @ dadq2[t] + for t in range(1, horizon - 1): + dsdq012[t][:, 0 : model.nv] = dsdv[t] @ dvdq0[t] + dsda[t] @ dadq0[t] + dsdq012[t][:, model.nv : (2 * model.nv)] = ( + dsdq[t] + dsdv[t] @ dvdq1[t] + dsda[t] @ dadq1[t] + ) + dsdq012[t][:, (2 * model.nv) : (3 * model.nv)] = dsda[t] @ dadq2[t] return dsdq012 @@ -510,19 +382,12 @@ def diff_force( dfdq012 = [np.zeros((model.nv, 3 * model.nv)) for _ in range(horizon)] # loop over horizon - for t in range(horizon): - # first step - if t == 0: - dfdq012[t][:, :] = 0.0 - # last step - elif t == horizon - 1: - dfdq012[t][:, :] = 0.0 - else: - dfdq012[t][:, 0 : model.nv] = dfdv[t] @ dvdq0[t] + dfda[t] @ dadq0[t] - dfdq012[t][:, model.nv : (2 * model.nv)] = ( - dfdq[t] + dfdv[t] @ dvdq1[t] + dfda[t] @ dadq1[t] - ) - dfdq012[t][:, (2 * model.nv) : (3 * model.nv)] = dfda[t] @ dadq2[t] + for t in range(1, horizon - 1): + dfdq012[t][:, 0 : model.nv] = dfdv[t] @ dvdq0[t] + dfda[t] @ dadq0[t] + dfdq012[t][:, model.nv : (2 * model.nv)] = ( + dfdq[t] + dfdv[t] @ dvdq1[t] + dfda[t] @ dadq1[t] + ) + dfdq012[t][:, (2 * model.nv) : (3 * model.nv)] = dfda[t] @ dadq2[t] return dfdq012 @@ -654,18 +519,14 @@ def cost_sensor( cost = 0.0 # loop over horizon - for t in range(horizon): + for t in range(1, horizon - 1): # residual res = sensor[:, t] - target[:, t] # loop over sensors for i in range(model.nsensor): - # skip + # stage needstage = model.sensor_needstage[i] - if t == 0 and needstage != mujoco.mjtStage.mjSTAGE_POS: - continue - if t == horizon - 1 and needstage == mujoco.mjtStage.mjSTAGE_ACC: - continue # sensor i adr = model.sensor_adr[i] @@ -722,18 +583,14 @@ def diff_cost_sensor( hess = np.zeros((ntotal, nband)) # loop over horizon - for t in range(horizon): + for t in range(1, horizon - 1): # residual res = sensor[:, t] - target[:, t] # loop over sensors for i in range(model.nsensor): - # skip + # stage needstage = model.sensor_needstage[i] - if t == 0 and needstage != mujoco.mjtStage.mjSTAGE_POS: - continue - if t == horizon - 1 and needstage == mujoco.mjtStage.mjSTAGE_ACC: - continue # adr adr = model.sensor_adr[i] @@ -756,55 +613,20 @@ def diff_cost_sensor( # quadratic norm Hessian normi_hess = (scale * weights[i, t] * np.eye(dim)).reshape((dim, dim)) - # first step - if t == 0: - # indices - idxt = slice(0, model.nv) - - # subblock - dsidq1 = dsdq012[t][idx, model.nv : (2 * model.nv)].reshape( - (dim, model.nv) - ) - - # gradient - grad[idxt] += (dsidq1.T @ normi_grad).ravel() - - # Hessian - blk = dsidq1.T @ normi_hess @ dsidq1 - hess = add_block_in_band(hess, blk, 1.0, ntotal, nband, model.nv, 0) - # last step - elif t == horizon - 1: - # indices - idxt = slice((t - 1) * model.nv, (t + 1) * model.nv) - - # subblock - dsidq01 = dsdq012[t][idx, 0 : (2 * model.nv)].reshape( - (dim, 2 * model.nv) - ) - - # gradient - grad[idxt] += (dsidq01.T @ normi_grad).ravel() - - # Hessian - blk = dsidq01.T @ normi_hess @ dsidq01 - hess = add_block_in_band( - hess, blk, 1.0, ntotal, nband, 2 * model.nv, (t - 1) * model.nv - ) - else: - # indices - idxt = slice((t - 1) * model.nv, (t + 2) * model.nv) + # indices + idxt = slice((t - 1) * model.nv, (t + 2) * model.nv) - # subblock - dsidq012 = dsdq012[t][idx, :].reshape((dim, 3 * model.nv)) + # subblock + dsidq012 = dsdq012[t][idx, :].reshape((dim, 3 * model.nv)) - # gradient - grad[idxt] += (dsidq012.T @ normi_grad).ravel() + # gradient + grad[idxt] += (dsidq012.T @ normi_grad).ravel() - # Hessian - blk = dsidq012.T @ normi_hess @ dsidq012 - hess = add_block_in_band( - hess, blk, 1.0, ntotal, nband, 3 * model.nv, (t - 1) * model.nv - ) + # Hessian + blk = dsidq012.T @ normi_hess @ dsidq012 + hess = add_block_in_band( + hess, blk, 1.0, ntotal, nband, 3 * model.nv, (t - 1) * model.nv + ) return grad, hess @@ -816,6 +638,7 @@ def configuration_update( update: npt.ArrayLike, step: float, horizon: int, + pinned: npt.ArrayLike, ) -> npt.ArrayLike: """Update configuration. @@ -827,6 +650,7 @@ def configuration_update( update (npt.ArrayLike): search direction step (float): size of update to configurations horizon (int): number of timesteps + pinned(npt.ArrayLike): time indices of fixed qpos variables Returns: npt.ArrayLike: updated configuration trajectory @@ -834,10 +658,20 @@ def configuration_update( qpos_new = np.zeros((model.nq, horizon)) # loop over configurations + tpin = 0 for t in range(horizon): + # current qpos q = np.array(qpos[:, t]) - dq = update[(t * model.nv) : ((t + 1) * model.nv)] - mujoco.mj_integratePos(model, q, dq, step) + + # skip pinned qpos + if not pinned[t]: + dq = update[(tpin * model.nv) : ((tpin + 1) * model.nv)] + mujoco.mj_integratePos(model, q, dq, step) + + # increment + tpin += 1 + + # set new qpos qpos_new[:, t] = q return qpos_new @@ -901,6 +735,7 @@ class DirectOptimizer: force_target: target qfrc_actuator values (nv x horizon). weights_sensor: weights for sensor norms (nsensor x horizon). weights_force: weights for force norms (nv x horizon). + pinned: time indices that qpos is fixed (horizon). _dvdq0: Jacobian of velocity wrt previous configuration ((nv x nv) x horizon). _dvdq1: Jacobian of velocity wrt current configuration ((nv x nv) x horizon). _dadq0: Jacobian of acceleration wrt previous configuration ((nv x nv) x horizon). @@ -918,7 +753,8 @@ class DirectOptimizer: cost_force: sum of weighted force norms. cost_sensor: sum of weighted sensor norms. cost_initial: initial total cost. - _ntotal: number of decision variables (nv * horizon). + _ntotal: number of qpos variables (nv * horizon). + _ntotal_pin: number of decision variables (_ntotal - sum(pinned) * nv). _nband: cost Hessian band dimension (3 * nv). _gradient: gradient of cost wrt to decision variables (nv * horizon). _hessian: band representation of cost Hessian wrt decision variables (nv * horizon x 3 * nv). @@ -971,6 +807,9 @@ def __init__(self, model: mujoco.MjModel, horizon: int): self.weights_sensor = np.zeros((model.nsensor, horizon)) self.weights_force = np.zeros((model.nv, horizon)) + # pinned + self.pinned = np.zeros(horizon, dtype=bool) + # finite-difference velocity and acceleration Jacobians (wrt configurations) self._dvdq0 = [np.zeros((model.nv, model.nv)) for t in range(horizon)] self._dvdq1 = [np.zeros((model.nv, model.nv)) for t in range(horizon)] @@ -1006,6 +845,7 @@ def __init__(self, model: mujoco.MjModel, horizon: int): # cost derivatives self._ntotal = model.nv * horizon + self._ntotal_pin = self._ntotal self._nband = 3 * model.nv self._gradient = np.zeros(self._ntotal) self._hessian = np.zeros((self._ntotal, self._nband)) @@ -1174,8 +1014,49 @@ def _cost_derivatives( self.horizon, ) - self._gradient = force_gradient + sensor_gradient - self._hessian = force_hessian + sensor_hessian + # dimension + nv = self.model.nv + + # total gradient, Hessian + tpin = 0 + for t in range(self.horizon): + # slices + idx_t = slice(t * nv, (t + 1) * nv) + idx_tpin = slice(tpin * nv, (tpin + 1) * nv) + + if self.pinned[t]: + # gradient + force_gradient[idx_t] = 0.0 + sensor_gradient[idx_t] = 0.0 + + # Hessian + force_hessian[idx_t, :] = 0.0 + sensor_hessian[idx_t, :] = 0.0 + + if t + 1 < self.horizon: + idx_tt = slice((t + 1) * nv, (t + 2) * nv) + idx_c1 = slice(nv, 2 * nv) + + force_hessian[idx_tt, idx_c1] = 0.0 + sensor_hessian[idx_tt, idx_c1] = 0.0 + + if t + 2 < self.horizon: + idx_ttt = slice((t + 2) * nv, (t + 3) * nv) + idx_c0 = slice(0, nv) + + force_hessian[idx_ttt, idx_c0] = 0.0 + sensor_hessian[idx_ttt, idx_c0] = 0.0 + + continue + + # set data + self._gradient[idx_tpin] = force_gradient[idx_t] + sensor_gradient[idx_t] + self._hessian[idx_tpin, :] = ( + force_hessian[idx_t, :] + sensor_hessian[idx_t, :] + ) + + # increment + tpin += 1 def _eval_search_direction(self) -> bool: """Compute search direction. @@ -1185,17 +1066,29 @@ def _eval_search_direction(self) -> bool: """ # factorize cost Hessian self._hessian_factor = np.array(self._hessian.ravel()) - min_diag = mujoco.mju_cholFactorBand( - self._hessian_factor, - self._ntotal, - self._nband, - 0, - self._regularization, - 0.0, - ) + while self._regularization < self.regularization_max: + # attempt factorization + min_diag = mujoco.mju_cholFactorBand( + self._hessian_factor, + self._ntotal_pin, + self._nband, + 0, + self._regularization, + 0.0, + ) + + # check factorization + if min_diag > 0.0: + break + + # increase regularization + self._regularization = np.minimum( + self.regularization_max, + self._regularization * self.regularization_scale, + ) # check rank deficient - if min_diag < 1.0e-16: + if self._regularization >= self.regularization_max: self._status_msg = "rank deficient cost Hessian" return False @@ -1204,11 +1097,14 @@ def _eval_search_direction(self) -> bool: self._search_direction, self._hessian_factor, self._gradient, - self._ntotal, + self._ntotal_pin, self._nband, 0, ) + # update Hessian + self._hessian[:, -1] += self._regularization + # check small direction if ( np.linalg.norm(self._search_direction, np.inf) @@ -1227,12 +1123,12 @@ def _update_regularization(self) -> bool: """ # compute expected = g' d + 0.5 d' H d expected = np.dot(self._gradient, self._search_direction) - tmp = np.zeros(self._ntotal) + tmp = np.zeros(self._ntotal_pin) mujoco.mju_bandMulMatVec( tmp, self._hessian, self._search_direction, - self._ntotal, + self._ntotal_pin, self._nband, 0, 1, @@ -1271,6 +1167,15 @@ def optimize(self): self._iterations_step = 0 self._iterations_search = 0 + # ntotal pinned + self._ntotal_pin = self._ntotal - np.sum(self.pinned) * self.model.nv + + # resize + self._gradient.resize(self._ntotal_pin) + self._hessian.resize((self._ntotal_pin, self._nband)) + self._hessian_factor.resize((self._ntotal_pin, self._nband)) + self._search_direction.resize(self._ntotal_pin) + # initial cost self.cost_initial = self.cost(self.qpos) current_cost = self.cost_initial @@ -1287,7 +1192,7 @@ def optimize(self): self._cost_derivatives(self.qpos) # check gradient tolerance - self._gradient_norm = np.linalg.norm(self._gradient) / self._ntotal + self._gradient_norm = np.linalg.norm(self._gradient) / self._ntotal_pin if self._gradient_norm < self.gradient_tolerance: self._status_msg = "gradient tolerance achieved" return @@ -1316,6 +1221,7 @@ def optimize(self): self._search_direction, -1.0, self.horizon, + self.pinned, ) # candidate cost @@ -1362,7 +1268,7 @@ def status(self): print("\n") print( " gradient norm :", - np.linalg.norm(self._gradient) / self._ntotal, + np.linalg.norm(self._gradient) / self._ntotal_pin, ) print(" regularization :", self._regularization) print("\n") diff --git a/python/mujoco_mpc/demos/direct/particle_trajopt.py b/python/mujoco_mpc/demos/direct/particle_trajopt.py index 858d385c4..ed9d358e8 100644 --- a/python/mujoco_mpc/demos/direct/particle_trajopt.py +++ b/python/mujoco_mpc/demos/direct/particle_trajopt.py @@ -1,3 +1,4 @@ +# %% # Copyright 2023 DeepMind Technologies Limited # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -13,13 +14,12 @@ # limitations under the License. import matplotlib.pyplot as plt -import mediapy as media import mujoco -from mujoco_mpc import direct as direct_lib +import direct_optimizer import numpy as np - - # %% +## Example + # 2D Particle Model xml = """ @@ -40,11 +40,6 @@ - - - - - @@ -77,10 +72,10 @@ model = mujoco.MjModel.from_xml_string(xml) data = mujoco.MjData(model) -renderer = mujoco.Renderer(model) + # %% # initialization -T = 100 +T = 400 q0 = np.array([-0.25, -0.25]) qM = np.array([-0.25, 0.25]) qN = np.array([0.25, -0.25]) @@ -97,6 +92,7 @@ # time time = [t * model.opt.timestep for t in range(T)] + # %% # plot position fig = plt.figure() @@ -111,110 +107,74 @@ plt.legend() plt.xlabel("X") plt.ylabel("Y") + # %% -# optimizer model -model_optimizer = mujoco.MjModel.from_xml_string(xml) - -# direct optimizer -configuration_length = T + 2 -optimizer = direct_lib.Direct( - model=model_optimizer, - configuration_length=configuration_length, -) -# %% -# set data -for t in range(configuration_length): - # unpack - qt = np.zeros(model.nq) - st = np.zeros(model.nsensordata) - mt = np.zeros(model.nsensor) - ft = np.zeros(model.nv) - ct = np.zeros(model.nu) - tt = np.array([t * model.opt.timestep]) +# create optimizer +optimizer = direct_optimizer.DirectOptimizer(model, T) + +# settings +optimizer.max_iterations = 10 +optimizer.max_search_iterations = 10 + +# force weight +fw = 5.0e2 +# set data +for t in range(T): # set initial state if t == 0 or t == 1: - qt = q0 - st = q0 - mt = np.array([1, 1]) + optimizer.pinned[t] = True + optimizer.qpos[:, t] = q0 + if t == 1: + optimizer.weights_force[:, t] = fw # set goal - elif t >= configuration_length - 2: - qt = qT - st = qT - mt = np.array([1, 1]) + elif t >= T - 2: + optimizer.pinned[t] = True + optimizer.qpos[:, t] = qT + if t == T - 2: + optimizer.weights_force[:, t] = fw # set waypoint - elif t == 25: - st = qM - mt = np.array([1, 1]) + elif t == 100: + optimizer.pinned[t] = False + optimizer.qpos[:, t] = qM + optimizer.sensor_target[: model.nq, t] = qM + optimizer.weights_force[:, t] = fw + optimizer.weights_sensor[:, t] = 1.0 # set waypoint - elif t == 75: - st = qN - mt = np.array([1, 1]) + elif t == 300: + optimizer.pinned[t] = False + optimizer.qpos[:, t] = qN + optimizer.sensor_target[: model.nq, t] = qN + optimizer.weights_force[:, t] = fw + optimizer.weights_sensor[:, t] = 1.0 # initialize qpos else: - qt = qinterp[:, t - 1] - mt = np.array([0, 0]) - - # set data - data_ = optimizer.data( - t, - configuration=qt, - sensor_measurement=st, - sensor_mask=mt, - force_measurement=ft, - time=tt, - ) -# %% -# set std -optimizer.noise(process=np.array([1000.0, 1000.0]), sensor=np.array([1.0, 1.0])) - -# set settings -optimizer.settings( - sensor_flag=True, - force_flag=True, - max_smoother_iterations=1000, - max_search_iterations=1000, - regularization_initial=1.0e-12, - gradient_tolerance=1.0e-6, - search_direction_tolerance=1.0e-6, - cost_tolerance=1.0e-6, - first_step_position_sensors=True, - last_step_position_sensors=True, - last_step_velocity_sensors=True, -) + optimizer.pinned[t] = False + optimizer.qpos[:, t] = qinterp[:, t] + optimizer.weights_force[:, t] = fw + optimizer.weights_sensor[:, t] = 0.0 # optimize optimizer.optimize() -# costs -optimizer.print_cost() - # status -optimizer.print_status() -# %% -# get estimated trajectories -q_est = np.zeros((model_optimizer.nq, configuration_length)) -v_est = np.zeros((model_optimizer.nv, configuration_length)) -s_est = np.zeros((model_optimizer.nsensordata, configuration_length)) -f_est = np.zeros((model_optimizer.nv, configuration_length)) -t_est = np.zeros(configuration_length) -for t in range(configuration_length): - data_ = optimizer.data(t) - q_est[:, t] = data_["configuration"] - v_est[:, t] = data_["velocity"] - s_est[:, t] = data_["sensor_prediction"] - f_est[:, t] = data_["force_prediction"] - t_est[t] = data_["time"] +optimizer.status() + # %% # plot position fig = plt.figure() plt.plot(qinterp[0, :], qinterp[1, :], label="interpolation", color="black") -plt.plot(q_est[0, :], q_est[1, :], label="direct trajopt", color="orange") +plt.plot( + optimizer.qpos[0, :], + optimizer.qpos[1, :], + label="direct trajopt", + color="orange", +) plt.plot(q0[0], q0[1], color="magenta", label="waypoint", marker="o") plt.plot(qM[0], qM[1], color="magenta", marker="o") plt.plot(qN[0], qN[1], color="magenta", marker="o") @@ -224,37 +184,22 @@ plt.xlabel("X") plt.ylabel("Y") -# plot velocity -fig = plt.figure() - -# velocity -plt.plot(t_est[1:] - model.opt.timestep, v_est[0, 1:], label="v0", color="cyan") -plt.plot( - t_est[1:] - model.opt.timestep, v_est[1, 1:], label="v1", color="orange" +# %% +# recover ctrl +mujoco.mj_forward(model, data) +ctrl = np.vstack( + [ + np.linalg.pinv(data.actuator_moment.T) @ optimizer.force[:, t] + for t in range(T) + ] ) +# plot ctrl +fig = plt.figure() +times = [t * model.opt.timestep for t in range(T)] + +plt.step(times, ctrl[:, 0], label="action 0", color="orange") +plt.step(times, ctrl[:, 1], label="action 1", color="magenta") plt.legend() plt.xlabel("Time (s)") -plt.ylabel("Velocity") -# %% -# frames optimized -frames_opt = [] - -# simulate -for t in range(configuration_length - 1): - # get solution from optimizer - data_ = optimizer.data(t) - - # set configuration - data.qpos = q_est[:, t] - data.qvel = v_est[:, t] - - mujoco.mj_forward(model, data) - - # render and save frames - renderer.update_scene(data) - pixels = renderer.render() - frames_opt.append(pixels) - -# display video -# media.show_video(frames_opt, fps=1.0 / model.opt.timestep, loop=False) +plt.ylabel("ctrl") diff --git a/python/mujoco_mpc/demos/direct/particle_trajopt2.py b/python/mujoco_mpc/demos/direct/particle_trajopt2.py deleted file mode 100644 index 0ceabeaeb..000000000 --- a/python/mujoco_mpc/demos/direct/particle_trajopt2.py +++ /dev/null @@ -1,377 +0,0 @@ -# Copyright 2023 DeepMind Technologies Limited -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import matplotlib.pyplot as plt -import mujoco -from direct import direct_optimizer -import numpy as np -from numpy import typing as npt - -# %% -## Example - -# 2D Particle Model -xml = """ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -""" - -model = mujoco.MjModel.from_xml_string(xml) -data = mujoco.MjData(model) - -# %% -# initialization -T = 400 -q0 = np.array([-0.25, -0.25]) -qM = np.array([-0.25, 0.25]) -qN = np.array([0.25, -0.25]) -qT = np.array([0.25, 0.25]) - -# compute linear interpolation -qinterp = np.zeros((model.nq, T)) -for t in range(T): - # slope - slope = (qT - q0) / T - - # interpolation - qinterp[:, t] = q0 + t * slope - -# time -time = [t * model.opt.timestep for t in range(T)] - -# %% -# plot position -fig = plt.figure() - -# arm position -plt.plot(qinterp[0, :], qinterp[1, :], label="interpolation", color="black") -plt.plot(q0[0], q0[1], color="magenta", label="waypoint", marker="o") -plt.plot(qM[0], qM[1], color="magenta", marker="o") -plt.plot(qN[0], qN[1], color="magenta", marker="o") -plt.plot(qT[0], qT[1], color="magenta", marker="o") - -plt.legend() -plt.xlabel("X") -plt.ylabel("Y") - -# %% -# create optimizer -optimizer = direct_optimizer.DirectOptimizer(model, T) - -# settings -optimizer.max_iterations = 10 -optimizer.max_search_iterations = 10 - -# force weight -fw = 5.0e2 - -# set data -for t in range(T): - # set initial state - if t == 0 or t == 1: - optimizer.qpos[:, t] = q0 - optimizer.sensor_target[: model.nq, t] = q0 - optimizer.weights_force[:, t] = fw - optimizer.weights_sensor[:, t] = 1.0 - - # set goal - elif t >= T - 2: - optimizer.qpos[:, t] = qT - optimizer.sensor_target[: model.nq, t] = qT - optimizer.weights_force[:, t] = fw - optimizer.weights_sensor[:, t] = 1.0 - - # set waypoint - elif t == 100: - optimizer.qpos[:, t] = qM - optimizer.sensor_target[: model.nq, t] = qM - optimizer.weights_force[:, t] = fw - optimizer.weights_sensor[:, t] = 1.0 - - # set waypoint - elif t == 300: - optimizer.qpos[:, t] = qN - optimizer.sensor_target[: model.nq, t] = qN - optimizer.weights_force[:, t] = fw - optimizer.weights_sensor[:, t] = 1.0 - - # initialize qpos - else: - optimizer.qpos[:, t] = qinterp[:, t] - optimizer.weights_force[:, t] = fw - optimizer.weights_sensor[:, t] = 0.0 - -# optimize -optimizer.optimize() - -# status -optimizer.status() - -# %% -# plot position -fig = plt.figure() - -plt.plot(qinterp[0, :], qinterp[1, :], label="interpolation", color="black") -plt.plot( - optimizer.qpos[0, :], - optimizer.qpos[1, :], - label="direct trajopt", - color="orange", -) -plt.plot(q0[0], q0[1], color="magenta", label="waypoint", marker="o") -plt.plot(qM[0], qM[1], color="magenta", marker="o") -plt.plot(qN[0], qN[1], color="magenta", marker="o") -plt.plot(qT[0], qT[1], color="magenta", marker="o") - -plt.legend() -plt.xlabel("X") -plt.ylabel("Y") - -# %% -# recover ctrl -mujoco.mj_forward(model, data) -ctrl = np.vstack([data.actuator_moment @ optimizer.force[:, t] for t in range(T)]) - -# plot ctrl -fig = plt.figure() -times = [t * model.opt.timestep for t in range(T)] - -plt.step(times, ctrl[:, 0], label="action 0", color="orange") -plt.step(times, ctrl[:, 1], label="action 1", color="magenta") -plt.legend() -plt.xlabel("Time (s)") -plt.ylabel("ctrl") - -# %% -# trajectories -T = 3 -qpos = np.zeros((model.nq, T)) -qvel = np.zeros((model.nv, T)) -qacc = np.zeros((model.nv, T)) -ctrl = np.zeros((model.nu, T)) -qfrc = np.zeros((model.nv, T)) -sensor = np.zeros((model.nsensordata, T)) -noisy_sensor = np.zeros((model.nsensordata, T)) -time = np.zeros(T) - -# set initial state -mujoco.mj_resetData(model, data) - -# simulate -for t in range(T): - # forward dynamics - mujoco.mj_forward(model, data) - - # cache - qpos[:, t] = data.qpos - qvel[:, t] = data.qvel - qacc[:, t] = data.qacc - ctrl[:, t] = data.ctrl - qfrc[:, t] = data.qfrc_actuator - sensor[:, t] = data.sensordata - time[t] = data.time - - # noisy sensors - noisy_sensor[:, t] = sensor[:, t] - - # intergrate with Euler - mujoco.mj_Euler(model, data) - -# create optimizer -optimizer = direct_optimizer.DirectOptimizer(model, T) - -# initialize -optimizer.qpos = 0.0 * np.ones((model.nq, T)) - -# set data -optimizer.sensor_target = sensor -optimizer.force_target = qfrc -optimizer.ctrl = ctrl - -# set weights -optimizer.weights_force[:, :] = 1.0 -optimizer.weights_sensor[:, :] = 1.0 - -# settings -optimizer.max_iterations = 10 -optimizer.max_search_iterations = 10 - -# optimize -optimizer.optimize() - -# status -optimizer.status() - -# %% -def test_gradient( - optimizer: direct_optimizer.DirectOptimizer, - qpos: npt.ArrayLike, - eps: float = 1.0e-10, - verbose: bool = False, -): - # evaluate nominal cost - c0 = optimizer.cost(qpos) - - # evaluate optimizer gradient - optimizer._cost_derivatives(qpos) - g0 = np.array(optimizer._gradient) - - # finite difference gradient - g = np.zeros(optimizer._ntotal) - - # horizon - T = optimizer.horizon - - # loop over inputs - for i in range(optimizer._ntotal): - # nudge - nudge = np.zeros(optimizer._ntotal) - nudge[i] += eps - - # qpos nudge - qnudge = direct_optimizer.configuration_update( - optimizer.model, qpos, nudge, 1.0, T) - - # evaluate - c = optimizer.cost(qnudge) - - # derivative - g[i] = (c - c0) / eps - - if verbose: - print("gradient optimizer: \n", g0) - print("gradient finite difference: \n", g) - - # return max difference - return np.linalg.norm(g - g0, np.Inf) - - -def test_hessian( - optimizer: direct_optimizer.DirectOptimizer, - qpos: npt.ArrayLike, - eps: float = 1.0e-5, - verbose: bool = False, -): - # evaluate nominal cost - c0 = optimizer.cost(qpos) - - # evaluate optimizer Hessian - optimizer._cost_derivatives(qpos) - h0 = np.zeros((optimizer._ntotal, optimizer._ntotal)) - mujoco.mju_band2Dense( - h0, optimizer._hessian.ravel(), optimizer._ntotal, optimizer._nband, 0, 1 - ) - - # finite difference gradient - h = np.zeros((optimizer._ntotal, optimizer._ntotal)) - - # horizon - T = optimizer.horizon - - for i in range(optimizer._ntotal): - for j in range(i, optimizer._ntotal): - # workspace - w1 = np.zeros(optimizer._ntotal) - w2 = np.zeros(optimizer._ntotal) - w3 = np.zeros(optimizer._ntotal) - - # workspace 1 - w1[i] += eps - w1[j] += eps - - # qpos nudge 1 - qnudge1 = direct_optimizer.configuration_update( - optimizer.model, qpos, w1, 1.0, T) - - cij = optimizer.cost(qnudge1) - - # workspace 2 - w2[i] += eps - - # qpos nudge 2 - qnudge2 = direct_optimizer.configuration_update( - optimizer.model, qpos, w2, 1.0, T) - - ci = optimizer.cost(qnudge2) - - # workspace 3 - w3[j] += eps - - # qpos nudge 3 - qnudge3 = direct_optimizer.configuration_update( - optimizer.model, qpos, w3, 1.0, T) - - cj = optimizer.cost(qnudge3) - - # Hessian value - h[i, j] = (cij - ci - cj + c0) / (eps * eps) - h[j, i] = (cij - ci - cj + c0) / (eps * eps) - - if verbose: - print("Hessian optimizer: \n", h0) - print("Hessian finite difference: \n", h) - - # return maximum difference - return np.linalg.norm((h - h0).ravel(), np.Inf) - - -# %% -test_gradient(optimizer, np.ones((model.nq, T))) - -# %% -test_hessian(optimizer, np.zeros((model.nq, T))) diff --git a/python/mujoco_mpc/ui_agent_test.py b/python/mujoco_mpc/ui_agent_test.py index 7fbbadef0..c5e79094a 100644 --- a/python/mujoco_mpc/ui_agent_test.py +++ b/python/mujoco_mpc/ui_agent_test.py @@ -137,7 +137,8 @@ def test_get_cost_weights(self): agent.set_state(qpos=[0, 0.5], qvel=[1, 1]) terms_dict = agent.get_cost_term_values() terms = list(terms_dict.values()) - self.assertFalse(np.any(np.isclose(terms, 0, rtol=0, atol=1e-6))) + self.assertFalse(np.any(np.isclose(terms, 0, rtol=0, atol=1e-6)), + f"no values should be close to 0. got {terms}") def test_set_state_with_lists(self): model_path = ( @@ -169,6 +170,18 @@ def test_set_get_mode(self): agent.set_mode("default_mode") self.assertEqual(agent.get_mode(), "default_mode") + def test_get_all_modes(self): + model_path = ( + pathlib.Path(__file__).parent.parent.parent + / "mjpc/tasks/quadruped/task_flat.xml" + ) + model = mujoco.MjModel.from_xml_path(str(model_path)) + with self.get_agent(task_id="Quadruped Flat", model=model) as agent: + self.assertEqual( + tuple(agent.get_all_modes()), + ("Quadruped", "Biped", "Walk", "Scramble", "Flip"), + ) + @absltest.skip("asset import issue") def test_get_set_mode(self): model_path = (