Skip to content

Commit

Permalink
Merge of 7476409
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 605995514
Change-Id: I563e07fa96973767bf44ef3716b3182489e4a699
  • Loading branch information
copybara-github committed Feb 11, 2024
2 parents 3350d36 + 7476409 commit 4aedf43
Show file tree
Hide file tree
Showing 31 changed files with 499 additions and 93 deletions.
2 changes: 2 additions & 0 deletions mjpc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ add_library(
tasks/tasks.h
tasks/acrobot/acrobot.cc
tasks/acrobot/acrobot.h
tasks/allegro/allegro.cc
tasks/allegro/allegro.h
tasks/bimanual/bimanual.cc
tasks/bimanual/bimanual.h
tasks/cartpole/cartpole.cc
Expand Down
8 changes: 7 additions & 1 deletion mjpc/planners/gradient/planner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ void GradientPlanner::Reset(int horizon,
expected = 0.0;
improvement = 0.0;
surprise = 0.0;

// derivative skip
derivative_skip_ = GetNumberOrDefault(0, model, "derivative_skip");
}

// set state
Expand Down Expand Up @@ -191,6 +194,7 @@ void GradientPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {

// update policy
double c_best = c_prev;
int skip = derivative_skip_;
for (int i = 0; i < settings.max_rollout; i++) {
// ----- model derivatives ----- //
// start timer
Expand All @@ -200,7 +204,8 @@ void GradientPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {
model_derivative.Compute(
model, data_, trajectory[0].states.data(), trajectory[0].actions.data(),
trajectory[0].times.data(), dim_state, dim_state_derivative, dim_action,
dim_sensor, horizon, settings.fd_tolerance, settings.fd_mode, pool);
dim_sensor, horizon, settings.fd_tolerance, settings.fd_mode, pool,
skip);

// stop timer
model_derivative_time += GetDuration(model_derivative_start);
Expand Down Expand Up @@ -468,6 +473,7 @@ void GradientPlanner::GUI(mjUI& ui) {
{mjITEM_SELECT, "Spline", 2, &policy.representation,
"Zero\nLinear\nCubic"},
{mjITEM_SLIDERINT, "Spline Pts", 2, &policy.num_spline_points, "0 1"},
{mjITEM_SLIDERINT, "Deriv. Skip", 2, &derivative_skip_, "0 16"},
{mjITEM_END}};

// set number of trajectory slider limits
Expand Down
1 change: 1 addition & 0 deletions mjpc/planners/gradient/planner.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ class GradientPlanner : public Planner {

private:
mutable std::shared_mutex mtx_;
int derivative_skip_ = 0;
};

} // namespace mjpc
Expand Down
6 changes: 5 additions & 1 deletion mjpc/planners/ilqg/planner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,9 @@ void iLQGPlanner::Reset(int horizon, const double* initial_repeated_action) {
improvement = 0.0;
expected = 0.0;
surprise = 0.0;

// derivative skip
derivative_skip_ = GetNumberOrDefault(0, model, "derivative_skip");
}

// set state
Expand Down Expand Up @@ -248,6 +251,7 @@ void iLQGPlanner::GUI(mjUI& ui) {
"Zero\nLinear\nCubic"},
{mjITEM_SELECT, "Reg. Type", 2, &settings.regularization_type,
"Control\nFeedback\nValue\nNone"},
{mjITEM_SLIDERINT, "Deriv. Skip", 2, &derivative_skip_, "0 16"},
{mjITEM_CHECKINT, "Terminal Print", 2, &settings.verbose, ""},
{mjITEM_END}};

Expand Down Expand Up @@ -393,7 +397,7 @@ void iLQGPlanner::Iteration(int horizon, ThreadPool& pool) {
candidate_policy[0].trajectory.actions.data(),
candidate_policy[0].trajectory.times.data(), dim_state,
dim_state_derivative, dim_action, dim_sensor, horizon,
settings.fd_tolerance, settings.fd_mode, pool);
settings.fd_tolerance, settings.fd_mode, pool, derivative_skip_);

// stop timer
double model_derivative_time = GetDuration(model_derivative_start);
Expand Down
1 change: 1 addition & 0 deletions mjpc/planners/ilqg/planner.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ class iLQGPlanner : public Planner {
private:
int num_trajectory_ = 1;
int num_rollouts_gui_ = 1;
int derivative_skip_ = 0;
};

} // namespace mjpc
Expand Down
141 changes: 110 additions & 31 deletions mjpc/planners/model_derivatives.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,40 +48,119 @@ void ModelDerivatives::Compute(const mjModel* m,
const double* h, int dim_state,
int dim_state_derivative, int dim_action,
int dim_sensor, int T, double tol, int mode,
ThreadPool& pool) {
{
int count_before = pool.GetCount();
for (int t = 0; t < T; t++) {
pool.Schedule([&m, &data, &A = A, &B = B, &C = C, &D = D, &x, &u, &h,
dim_state, dim_state_derivative, dim_action, dim_sensor,
tol, mode, t, T]() {
mjData* d = data[ThreadPool::WorkerId()].get();
// set state
SetState(m, d, x + t * dim_state);
d->time = h[t];

// set action
mju_copy(d->ctrl, u + t * dim_action, dim_action);
ThreadPool& pool, int skip) {
// reset indices
evaluate_.clear();
interpolate_.clear();

// Jacobians
if (t == T - 1) {
// Jacobians
mjd_transitionFD(m, d, tol, mode, nullptr, nullptr,
DataAt(C, t * (dim_sensor * dim_state_derivative)),
nullptr);
} else {
// derivatives
mjd_transitionFD(
m, d, tol, mode,
DataAt(A, t * (dim_state_derivative * dim_state_derivative)),
DataAt(B, t * (dim_state_derivative * dim_action)),
DataAt(C, t * (dim_sensor * dim_state_derivative)),
DataAt(D, t * (dim_sensor * dim_action)));
}
});
// evaluate indices
int s = skip + 1;
evaluate_.push_back(0);
for (int t = s; t < T - s; t += s) {
evaluate_.push_back(t);
}
evaluate_.push_back(T - 2);
evaluate_.push_back(T - 1);

// interpolate indices
for (int t = 0, e = 0; t < T; t++) {
if (e == evaluate_.size() || evaluate_[e] > t) {
interpolate_.push_back(t);
} else {
e++;
}
pool.WaitCount(count_before + T);
}

// evaluate derivatives
int count_before = pool.GetCount();
for (int t : evaluate_) {
pool.Schedule([&m, &data, &A = A, &B = B, &C = C, &D = D, &x, &u, &h,
dim_state, dim_state_derivative, dim_action, dim_sensor, tol,
mode, t, T]() {
mjData* d = data[ThreadPool::WorkerId()].get();
// set state
SetState(m, d, x + t * dim_state);
d->time = h[t];

// set action
mju_copy(d->ctrl, u + t * dim_action, dim_action);

// Jacobians
if (t == T - 1) {
// Jacobians
mjd_transitionFD(m, d, tol, mode, nullptr, nullptr,
DataAt(C, t * (dim_sensor * dim_state_derivative)),
nullptr);
} else {
// derivatives
mjd_transitionFD(
m, d, tol, mode,
DataAt(A, t * (dim_state_derivative * dim_state_derivative)),
DataAt(B, t * (dim_state_derivative * dim_action)),
DataAt(C, t * (dim_sensor * dim_state_derivative)),
DataAt(D, t * (dim_sensor * dim_action)));
}
});
}
pool.WaitCount(count_before + evaluate_.size());
pool.ResetCount();

// interpolate derivatives
count_before = pool.GetCount();
for (int t : interpolate_) {
pool.Schedule([&A = A, &B = B, &C = C, &D = D, &evaluate_ = this->evaluate_,
dim_state_derivative, dim_action, dim_sensor, t]() {
// find interval
int bounds[2];
FindInterval(bounds, evaluate_, t, evaluate_.size());
int e0 = evaluate_[bounds[0]];
int e1 = evaluate_[bounds[1]];

// normalized input
double tt = double(t - e0) / double(e1 - e0);
if (bounds[0] == bounds[1]) {
tt = 0.0;
}

// A
int nA = dim_state_derivative * dim_state_derivative;
double* Ai = DataAt(A, t * nA);
const double* AL = DataAt(A, e0 * nA);
const double* AU = DataAt(A, e1 * nA);

mju_scl(Ai, AL, 1.0 - tt, nA);
mju_addToScl(Ai, AU, tt, nA);

// B
int nB = dim_state_derivative * dim_action;
double* Bi = DataAt(B, t * nB);
const double* BL = DataAt(B, e0 * nB);
const double* BU = DataAt(B, e1 * nB);

mju_scl(Bi, BL, 1.0 - tt, nB);
mju_addToScl(Bi, BU, tt, nB);

// C
int nC = dim_sensor * dim_state_derivative;
double* Ci = DataAt(C, t * nC);
const double* CL = DataAt(C, e0 * nC);
const double* CU = DataAt(C, e1 * nC);

mju_scl(Ci, CL, 1.0 - tt, nC);
mju_addToScl(Ci, CU, tt, nC);

// D
int nD = dim_sensor * dim_action;
double* Di = DataAt(D, t * nD);
const double* DL = DataAt(D, e0 * nD);
const double* DU = DataAt(D, e1 * nD);

mju_scl(Di, DL, 1.0 - tt, nD);
mju_addToScl(Di, DU, tt, nD);
});
}

pool.WaitCount(count_before + interpolate_.size());
pool.ResetCount();
}

Expand Down
6 changes: 5 additions & 1 deletion mjpc/planners/model_derivatives.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class ModelDerivatives {
void Compute(const mjModel* m, const std::vector<UniqueMjData>& data,
const double* x, const double* u, const double* h, int dim_state,
int dim_state_derivative, int dim_action, int dim_sensor, int T,
double tol, int mode, ThreadPool& pool);
double tol, int mode, ThreadPool& pool, int skip = 0);

// Jacobians
std::vector<double> A; // model Jacobians wrt state
Expand All @@ -56,6 +56,10 @@ class ModelDerivatives {
// (T * dim_sensor * dim_state_derivative)
std::vector<double> D; // output Jacobians wrt action
// (T * dim_sensor * dim_action)

// indices
std::vector<int> evaluate_;
std::vector<int> interpolate_;
};

} // namespace mjpc
Expand Down
3 changes: 0 additions & 3 deletions mjpc/planners/sample_gradient/planner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,6 @@ void SampleGradientPlanner::Initialize(mjModel* model, const Task& task) {
// task
this->task = &task;

// rollout parameters
timestep_power = 1.0;

// exploration noise
noise_exploration = GetNumberOrDefault(0.1, model, "sampling_exploration");

Expand Down
3 changes: 0 additions & 3 deletions mjpc/planners/sample_gradient/planner.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,6 @@ class SampleGradientPlanner : public Planner {
// order of indices of rolled out trajectories, ordered by total return
std::vector<int> trajectory_order;

// rollout parameters
double timestep_power;

// zero-mean Gaussian noise standard deviation
double noise_exploration;
std::vector<double> noise;
Expand Down
9 changes: 9 additions & 0 deletions mjpc/tasks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@

add_custom_target(
copy_menagerie_resources ALL
COMMAND ${CMAKE_COMMAND} -E copy
${menagerie_SOURCE_DIR}/wonik_allegro/right_hand.xml
${CMAKE_CURRENT_BINARY_DIR}/allegro/right_hand.xml
COMMAND ${CMAKE_COMMAND} -E copy_directory
${menagerie_SOURCE_DIR}/wonik_allegro/assets
${CMAKE_CURRENT_BINARY_DIR}/allegro/assets
COMMAND patch -o ${CMAKE_CURRENT_BINARY_DIR}/allegro/right_hand_modified.xml
${CMAKE_CURRENT_BINARY_DIR}/allegro/right_hand.xml
<${CMAKE_CURRENT_SOURCE_DIR}/allegro/right_hand.xml.patch
COMMAND ${CMAKE_COMMAND} -E copy_directory
${menagerie_SOURCE_DIR}/shadow_hand/assets
${CMAKE_CURRENT_BINARY_DIR}/hand/assets
Expand Down
Loading

0 comments on commit 4aedf43

Please sign in to comment.