Skip to content

Commit

Permalink
[SymForce] Optimizer improvements
Browse files Browse the repository at this point in the history
- No longer allocate and copy the hessian for damping, just the diagonal
- Covariance computation no longer does any damping (we no longer have
  an extra copy of the hessian to damp, and I don't want to make the
  argument non-const - damping here is a bit dubious anyway imo)
- diagonal_damping_min also applies when keep_max_diagonal_damping is
  off
- Linear error is computed without computing the Jacobian, and it's just
  always on.  Expense is linear in number of optimized variables
  (vector-vector products and sums), based on
  http://www2.imm.dtu.dk/pubdb/edoc/imm3215.pdf
- Gain ratio is similarly always computed and logged
- Only suggest turning on debug_checks if it isn't already on
- `update_` stores the update, not the negative update.  I think we were
  also doing an extra allocation here in the pass to `Update`, although
  I did not confirm

Reviewers: ryan-b,xipeng
Topic: sf-opt-1
GitOrigin-RevId: 372bf6863d4813c867e09f5f87896c265fe4af05
  • Loading branch information
aaron-skydio authored and ryan-brott-skydio committed Feb 13, 2024
1 parent 7da9bed commit bd7c210
Show file tree
Hide file tree
Showing 7 changed files with 77 additions and 57 deletions.
4 changes: 4 additions & 0 deletions symforce/opt/internal/levenberg_marquardt_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ class LevenbergMarquardtState {
have_cached_error_ = false;
}

Linearization<MatrixType>& GetLinearization() {
return linearization_;
}

const Linearization<MatrixType>& GetLinearization() const {
return linearization_;
}
Expand Down
8 changes: 5 additions & 3 deletions symforce/opt/levenberg_marquardt_solver.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,9 @@ class LevenbergMarquardtSolver {
void ComputeCovariance(const MatrixType& hessian_lower, MatrixX<Scalar>& covariance);

private:
MatrixType DampHessian(const MatrixType& hessian_lower, bool& have_max_diagonal,
VectorX<Scalar>& max_diagonal, Scalar lambda) const;
void DampHessian(MatrixType& hessian_lower, bool& have_max_diagonal,
VectorX<Scalar>& max_diagonal, Scalar lambda, VectorX<Scalar>& damping_vector,
VectorX<Scalar>& undamped_diagonal) const;

void CheckHessianDiagonal(const MatrixType& hessian_lower_damped, Scalar lambda);

Expand Down Expand Up @@ -239,7 +240,8 @@ class LevenbergMarquardtSolver {

// Working storage to avoid reallocation
VectorX<Scalar> update_;
MatrixType H_damped_;
VectorX<Scalar> damping_vector_;
VectorX<Scalar> undamped_diagonal_;
Eigen::Array<bool, Eigen::Dynamic, 1> zero_diagonal_;
std::vector<int> zero_diagonal_indices_;

Expand Down
90 changes: 46 additions & 44 deletions symforce/opt/levenberg_marquardt_solver.tcc
Original file line number Diff line number Diff line change
Expand Up @@ -20,36 +20,37 @@ namespace sym {
// ----------------------------------------------------------------------------

template <typename ScalarType, typename LinearSolverType>
typename LevenbergMarquardtSolver<ScalarType, LinearSolverType>::MatrixType
LevenbergMarquardtSolver<ScalarType, LinearSolverType>::DampHessian(const MatrixType& hessian_lower,
bool& have_max_diagonal,
VectorX<Scalar>& max_diagonal,
const Scalar lambda) const {
void LevenbergMarquardtSolver<ScalarType, LinearSolverType>::DampHessian(
MatrixType& hessian_lower, bool& have_max_diagonal, VectorX<Scalar>& max_diagonal,
const Scalar lambda, VectorX<Scalar>& damping_vector,
VectorX<Scalar>& undamped_diagonal) const {
SYM_TIME_SCOPE("LM<{}>: DampHessian", id_);
MatrixType H_damped = hessian_lower;

undamped_diagonal = hessian_lower.diagonal();

if (p_.use_diagonal_damping) {
if (p_.keep_max_diagonal_damping) {
if (!have_max_diagonal) {
max_diagonal = H_damped.diagonal();
max_diagonal = max_diagonal.cwiseMax(p_.diagonal_damping_min);
max_diagonal = undamped_diagonal.cwiseMax(p_.diagonal_damping_min);
} else {
max_diagonal = max_diagonal.cwiseMax(H_damped.diagonal());
max_diagonal = max_diagonal.cwiseMax(undamped_diagonal);
}

have_max_diagonal = true;

H_damped.diagonal().array() += max_diagonal.array() * lambda;
damping_vector = max_diagonal * lambda;
} else {
H_damped.diagonal().array() += H_damped.diagonal().array() * lambda;
damping_vector = undamped_diagonal.cwiseMax(p_.diagonal_damping_min) * lambda;
}
} else {
damping_vector = VectorX<Scalar>::Zero(hessian_lower.rows());
}

if (p_.use_unit_damping) {
H_damped.diagonal().array() += lambda;
damping_vector.array() += lambda;
}

return H_damped;
hessian_lower.diagonal() += damping_vector;
}

template <typename ScalarType, typename LinearSolverType>
Expand Down Expand Up @@ -100,27 +101,23 @@ void LevenbergMarquardtSolver<ScalarType, LinearSolverType>::PopulateIterationSt
iteration_stats.new_error = new_error;
iteration_stats.relative_reduction = relative_reduction;

if (p_.include_jacobians) {
{
SYM_TIME_SCOPE("LM<{}>: IterationStats - LinearErrorFromValues", id_);
iteration_stats.new_error_linear = state.Init().GetLinearization().LinearError(update_);
iteration_stats.new_error_linear =
state.Init().Error() +
state.Init().GetLinearization().LinearDeltaError(update_, damping_vector_);
}

if (p_.verbose) {
SYM_TIME_SCOPE("LM<{}>: IterationStats - Print", id_);
if (p_.include_jacobians) {
spdlog::info(
"LM<{}> [iter {:4d}] lambda: {:.3e}, error prev/linear/new: {:.3e}/{:.3e}/{:.3e}, "
"rel reduction: {:.5e}",
id_, iteration_stats.iteration, iteration_stats.current_lambda, state.Init().Error(),
iteration_stats.new_error_linear, iteration_stats.new_error,
iteration_stats.relative_reduction);
} else {
spdlog::info(
"LM<{}> [iter {:4d}] lambda: {:.3e}, error prev/new: {:.3e}/{:.3e}, "
"rel reduction: {:.5e}",
id_, iteration_stats.iteration, iteration_stats.current_lambda, state.Init().Error(),
iteration_stats.new_error, iteration_stats.relative_reduction);
}
spdlog::info(
"LM<{}> [iter {:4d}] lambda: {:.3e}, error prev/linear/new: {:.3e}/{:.3e}/{:.3e}, "
"rel reduction: {:.5e}, gain ratio: {:.5e}",
id_, iteration_stats.iteration, iteration_stats.current_lambda, state.Init().Error(),
iteration_stats.new_error_linear, iteration_stats.new_error,
iteration_stats.relative_reduction,
(state.Init().Error() - new_error) /
(state.Init().Error() - iteration_stats.new_error_linear));
}

if (p_.debug_stats) {
Expand Down Expand Up @@ -202,7 +199,9 @@ LevenbergMarquardtSolver<ScalarType, LinearSolverType>::Iterate(

if (!std::isfinite(state_.Init().Error())) {
spdlog::warn("LM<{}> Encountered non-finite initial error: {}", id_, state_.Init().Error());
spdlog::warn("LM<{}> Turn on debug_checks to see which factor is causing this", id_);
if (!p_.debug_checks) {
spdlog::warn("LM<{}> Turn on debug_checks to see which factor is causing this", id_);
}
return {{optimization_status_t::FAILED, FailureReason::INITIAL_ERROR_NOT_FINITE}};
}
}
Expand All @@ -214,15 +213,14 @@ LevenbergMarquardtSolver<ScalarType, LinearSolverType>::Iterate(
solver_analyzed_ = true;
}

// TODO(aaron): Get rid of this copy
H_damped_ = DampHessian(state_.Init().GetLinearization().hessian_lower, have_max_diagonal_,
max_diagonal_, current_lambda_);
DampHessian(state_.Init().GetLinearization().hessian_lower, have_max_diagonal_, max_diagonal_,
current_lambda_, damping_vector_, undamped_diagonal_);

CheckHessianDiagonal(H_damped_, current_lambda_);
CheckHessianDiagonal(state_.Init().GetLinearization().hessian_lower, current_lambda_);

{
SYM_TIME_SCOPE("LM<{}>: SparseFactorize", id_);
const bool success = linear_solver_.Factorize(H_damped_);
const bool success = linear_solver_.Factorize(state_.Init().GetLinearization().hessian_lower);
// TODO(brad): Instead try recovering from this (ultimately by increasing lambda).
SYM_ASSERT(success, "Internal Error: Damped hessian factorization failed");

Expand All @@ -236,7 +234,12 @@ LevenbergMarquardtSolver<ScalarType, LinearSolverType>::Iterate(

{
SYM_TIME_SCOPE("LM<{}>: SparseSolve", id_);
update_ = linear_solver_.Solve(state_.Init().GetLinearization().rhs);
update_ = -linear_solver_.Solve(state_.Init().GetLinearization().rhs);
}

{
SYM_TIME_SCOPE("LM<{}>: ResetHessianDiagonal", id_);
state_.Init().GetLinearization().hessian_lower.diagonal() = undamped_diagonal_;
}

if (p_.debug_checks && !update_.array().isFinite().all()) {
Expand All @@ -245,7 +248,7 @@ LevenbergMarquardtSolver<ScalarType, LinearSolverType>::Iterate(

{
SYM_TIME_SCOPE("LM<{}>: Update", id_);
Update(state_.Init().values, index_, -update_, state_.New().values);
Update(state_.Init().values, index_, update_, state_.New().values);
}

state_.New().Relinearize(func);
Expand All @@ -260,7 +263,9 @@ LevenbergMarquardtSolver<ScalarType, LinearSolverType>::Iterate(

if (!std::isfinite(new_error)) {
spdlog::warn("LM<{}> Encountered non-finite error: {}", id_, new_error);
spdlog::warn("LM<{}> Turn on debug_checks to see which factor is causing this", id_);
if (!p_.debug_checks) {
spdlog::warn("LM<{}> Turn on debug_checks to see which factor is causing this", id_);
}
}

optional<std::pair<optimization_status_t, FailureReason>> status{};
Expand Down Expand Up @@ -320,14 +325,11 @@ void LevenbergMarquardtSolver<ScalarType, LinearSolverType>::ComputeCovariance(
const MatrixType& hessian_lower, MatrixX<Scalar>& covariance) {
SYM_TIME_SCOPE("LM<{}>: ComputeCovariance()", id_);

H_damped_ = hessian_lower;
H_damped_.diagonal().array() += epsilon_;

// TODO(hayk, aaron): This solver assumes a dense RHS, should add support for a sparse RHS
const bool success = linear_solver_.Factorize(H_damped_);
// TODO(brad): Instead try recovering from this by damping by something larger than epsilon_
const bool success = linear_solver_.Factorize(hessian_lower);
// TODO(brad): Instead try recovering from this by damping?
SYM_ASSERT(success, "Internal Error: damped hessian factorization failed");
covariance = MatrixX<Scalar>::Identity(H_damped_.rows(), H_damped_.rows());
covariance = MatrixX<Scalar>::Identity(hessian_lower.rows(), hessian_lower.rows());
linear_solver_.SolveInPlace(covariance);
}

Expand Down
19 changes: 13 additions & 6 deletions symforce/opt/linearization.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,22 @@ struct Linearization {
initialized_ = initialized;
}

inline double Error() const {
inline Scalar Error() const {
SYM_ASSERT(IsInitialized());
return 0.5 * residual.squaredNorm();
return Scalar{0.5} * residual.squaredNorm();
}

inline double LinearError(const Vector& x_update) const {
SYM_ASSERT(jacobian.cols() == x_update.size());
const auto linear_residual_new = -jacobian * x_update + residual;
return 0.5 * linear_residual_new.squaredNorm();
/**
* Returns the change in error predicted by the Linearization at the given update
*
* @param x_update The update to the values
* @param damping_vector The vector added to the diagonal of the hessian during the linear solve
*/
inline Scalar LinearDeltaError(const Vector& x_update, const Vector& damping_vector) const {
SYM_ASSERT(IsInitialized());
// See Section 3.2 of "Methods For Non-Linear Least Squares Problems" 2nd Edition.
// http://www2.imm.dtu.dk/pubdb/edoc/imm3215.pdf
return Scalar{0.5} * x_update.dot(rhs - damping_vector.cwiseProduct(x_update));
}

// Sparse storage
Expand Down
3 changes: 2 additions & 1 deletion symforce/pybind/cc_linearization.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ void AddLinearizationWrapper(pybind11::module_ module) {
.def("set_initialized", &sym::SparseLinearizationd::SetInitialized,
py::arg("initialized") = true)
.def("error", &sym::SparseLinearizationd::Error)
.def("linear_error", &sym::SparseLinearizationd::LinearError, py::arg("x_update"))
.def("linear_delta_error", &sym::SparseLinearizationd::LinearDeltaError, py::arg("x_update"),
py::arg("damping_vector"))
.def(py::pickle(
[](const sym::SparseLinearizationd& linearization) { // __getstate__
return py::make_tuple(linearization.residual, linearization.hessian_lower,
Expand Down
4 changes: 3 additions & 1 deletion symforce/pybind/cc_sym.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,9 @@ class Linearization:
"""
Returns whether the linearization is currently valid for the corresponding values. Accessing any of the members when this is false could result in unexpected behavior.
"""
def linear_error(self, x_update: numpy.ndarray) -> float: ...
def linear_delta_error(
self, x_update: numpy.ndarray, damping_vector: numpy.ndarray
) -> float: ...
def reset(self) -> None:
"""
Set to invalid.
Expand Down
6 changes: 4 additions & 2 deletions test/symforce_cc_sym_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,8 +777,10 @@ def test_optimizer(self) -> None:

lin.set_initialized()
self.assertIsInstance(lin.error(), T.Scalar)
self.assertIsInstance(lin.linear_error(x_update=np.array([0.01])), T.Scalar)
lin.linear_error(np.array([0.01]))
self.assertIsInstance(
lin.linear_delta_error(x_update=np.array([0.01]), damping_vector=np.array([0.0])),
T.Scalar,
)

with self.subTest(msg="cc_sym.Linearization is pickleable"):
linearization = cc_sym.Linearization()
Expand Down

0 comments on commit bd7c210

Please sign in to comment.