[SymForce] Optimizer improvements

- No longer allocate and copy the hessian for damping, just the diagonal - Covariance computation no longer does any damping (we no longer have an extra copy of the hessian to damp, and I don't want to make the argument non-const - damping here is a bit dubious anyway imo) - diagonal_damping_min also applies when keep_max_diagonal_damping is off - Linear error is computed without computing the Jacobian, and it's just always on. Expense is linear in number of optimized variables (vector-vector products and sums), based on http://www2.imm.dtu.dk/pubdb/edoc/imm3215.pdf - Gain ratio is similarly always computed and logged - Only suggest turning on debug_checks if it isn't already on - `update_` stores the update, not the negative update. I think we were also doing an extra allocation here in the pass to `Update`, although I did not confirm Reviewers: ryan-b,xipeng Topic: sf-opt-1 GitOrigin-RevId: 372bf6863d4813c867e09f5f87896c265fe4af05
symforce-org · Feb 13, 2024 · bd7c210 · bd7c210
1 parent 7da9bed
commit bd7c210
Show file tree

Hide file tree

Showing 7 changed files with 77 additions and 57 deletions.
diff --git a/symforce/opt/internal/levenberg_marquardt_state.h b/symforce/opt/internal/levenberg_marquardt_state.h
@@ -56,6 +56,10 @@ class LevenbergMarquardtState {
       have_cached_error_ = false;
     }
 
+    Linearization<MatrixType>& GetLinearization() {
+      return linearization_;
+    }
+
     const Linearization<MatrixType>& GetLinearization() const {
       return linearization_;
     }

diff --git a/symforce/opt/levenberg_marquardt_solver.h b/symforce/opt/levenberg_marquardt_solver.h
@@ -202,8 +202,9 @@ class LevenbergMarquardtSolver {
   void ComputeCovariance(const MatrixType& hessian_lower, MatrixX<Scalar>& covariance);
 
  private:
-  MatrixType DampHessian(const MatrixType& hessian_lower, bool& have_max_diagonal,
-                         VectorX<Scalar>& max_diagonal, Scalar lambda) const;
+  void DampHessian(MatrixType& hessian_lower, bool& have_max_diagonal,
+                   VectorX<Scalar>& max_diagonal, Scalar lambda, VectorX<Scalar>& damping_vector,
+                   VectorX<Scalar>& undamped_diagonal) const;
 
   void CheckHessianDiagonal(const MatrixType& hessian_lower_damped, Scalar lambda);
 
@@ -239,7 +240,8 @@ class LevenbergMarquardtSolver {
 
   // Working storage to avoid reallocation
   VectorX<Scalar> update_;
-  MatrixType H_damped_;
+  VectorX<Scalar> damping_vector_;
+  VectorX<Scalar> undamped_diagonal_;
   Eigen::Array<bool, Eigen::Dynamic, 1> zero_diagonal_;
   std::vector<int> zero_diagonal_indices_;
 

diff --git a/symforce/opt/levenberg_marquardt_solver.tcc b/symforce/opt/levenberg_marquardt_solver.tcc
@@ -20,36 +20,37 @@ namespace sym {
 // ----------------------------------------------------------------------------
 
 template <typename ScalarType, typename LinearSolverType>
-typename LevenbergMarquardtSolver<ScalarType, LinearSolverType>::MatrixType
-LevenbergMarquardtSolver<ScalarType, LinearSolverType>::DampHessian(const MatrixType& hessian_lower,
-                                                                    bool& have_max_diagonal,
-                                                                    VectorX<Scalar>& max_diagonal,
-                                                                    const Scalar lambda) const {
+void LevenbergMarquardtSolver<ScalarType, LinearSolverType>::DampHessian(
+    MatrixType& hessian_lower, bool& have_max_diagonal, VectorX<Scalar>& max_diagonal,
+    const Scalar lambda, VectorX<Scalar>& damping_vector,
+    VectorX<Scalar>& undamped_diagonal) const {
   SYM_TIME_SCOPE("LM<{}>: DampHessian", id_);
-  MatrixType H_damped = hessian_lower;
+
+  undamped_diagonal = hessian_lower.diagonal();
 
   if (p_.use_diagonal_damping) {
     if (p_.keep_max_diagonal_damping) {
       if (!have_max_diagonal) {
-        max_diagonal = H_damped.diagonal();
-        max_diagonal = max_diagonal.cwiseMax(p_.diagonal_damping_min);
+        max_diagonal = undamped_diagonal.cwiseMax(p_.diagonal_damping_min);
       } else {
-        max_diagonal = max_diagonal.cwiseMax(H_damped.diagonal());
+        max_diagonal = max_diagonal.cwiseMax(undamped_diagonal);
       }
 
       have_max_diagonal = true;
 
-      H_damped.diagonal().array() += max_diagonal.array() * lambda;
+      damping_vector = max_diagonal * lambda;
     } else {
-      H_damped.diagonal().array() += H_damped.diagonal().array() * lambda;
+      damping_vector = undamped_diagonal.cwiseMax(p_.diagonal_damping_min) * lambda;
     }
+  } else {
+    damping_vector = VectorX<Scalar>::Zero(hessian_lower.rows());
   }
 
   if (p_.use_unit_damping) {
-    H_damped.diagonal().array() += lambda;
+    damping_vector.array() += lambda;
   }
 
-  return H_damped;
+  hessian_lower.diagonal() += damping_vector;
 }
 
 template <typename ScalarType, typename LinearSolverType>
@@ -100,27 +101,23 @@ void LevenbergMarquardtSolver<ScalarType, LinearSolverType>::PopulateIterationSt
   iteration_stats.new_error = new_error;
   iteration_stats.relative_reduction = relative_reduction;
 
-  if (p_.include_jacobians) {
+  {
     SYM_TIME_SCOPE("LM<{}>: IterationStats - LinearErrorFromValues", id_);
-    iteration_stats.new_error_linear = state.Init().GetLinearization().LinearError(update_);
+    iteration_stats.new_error_linear =
+        state.Init().Error() +
+        state.Init().GetLinearization().LinearDeltaError(update_, damping_vector_);
   }
 
   if (p_.verbose) {
     SYM_TIME_SCOPE("LM<{}>: IterationStats - Print", id_);
-    if (p_.include_jacobians) {
-      spdlog::info(
-          "LM<{}> [iter {:4d}] lambda: {:.3e}, error prev/linear/new: {:.3e}/{:.3e}/{:.3e}, "
-          "rel reduction: {:.5e}",
-          id_, iteration_stats.iteration, iteration_stats.current_lambda, state.Init().Error(),
-          iteration_stats.new_error_linear, iteration_stats.new_error,
-          iteration_stats.relative_reduction);
-    } else {
-      spdlog::info(
-          "LM<{}> [iter {:4d}] lambda: {:.3e}, error prev/new: {:.3e}/{:.3e}, "
-          "rel reduction: {:.5e}",
-          id_, iteration_stats.iteration, iteration_stats.current_lambda, state.Init().Error(),
-          iteration_stats.new_error, iteration_stats.relative_reduction);
-    }
+    spdlog::info(
+        "LM<{}> [iter {:4d}] lambda: {:.3e}, error prev/linear/new: {:.3e}/{:.3e}/{:.3e}, "
+        "rel reduction: {:.5e}, gain ratio: {:.5e}",
+        id_, iteration_stats.iteration, iteration_stats.current_lambda, state.Init().Error(),
+        iteration_stats.new_error_linear, iteration_stats.new_error,
+        iteration_stats.relative_reduction,
+        (state.Init().Error() - new_error) /
+            (state.Init().Error() - iteration_stats.new_error_linear));
   }
 
   if (p_.debug_stats) {
@@ -202,7 +199,9 @@ LevenbergMarquardtSolver<ScalarType, LinearSolverType>::Iterate(
 
     if (!std::isfinite(state_.Init().Error())) {
       spdlog::warn("LM<{}> Encountered non-finite initial error: {}", id_, state_.Init().Error());
-      spdlog::warn("LM<{}> Turn on debug_checks to see which factor is causing this", id_);
+      if (!p_.debug_checks) {
+        spdlog::warn("LM<{}> Turn on debug_checks to see which factor is causing this", id_);
+      }
       return {{optimization_status_t::FAILED, FailureReason::INITIAL_ERROR_NOT_FINITE}};
     }
   }
@@ -214,15 +213,14 @@ LevenbergMarquardtSolver<ScalarType, LinearSolverType>::Iterate(
     solver_analyzed_ = true;
   }
 
-  // TODO(aaron): Get rid of this copy
-  H_damped_ = DampHessian(state_.Init().GetLinearization().hessian_lower, have_max_diagonal_,
-                          max_diagonal_, current_lambda_);
+  DampHessian(state_.Init().GetLinearization().hessian_lower, have_max_diagonal_, max_diagonal_,
+              current_lambda_, damping_vector_, undamped_diagonal_);
 
-  CheckHessianDiagonal(H_damped_, current_lambda_);
+  CheckHessianDiagonal(state_.Init().GetLinearization().hessian_lower, current_lambda_);
 
   {
     SYM_TIME_SCOPE("LM<{}>: SparseFactorize", id_);
-    const bool success = linear_solver_.Factorize(H_damped_);
+    const bool success = linear_solver_.Factorize(state_.Init().GetLinearization().hessian_lower);
     // TODO(brad): Instead try recovering from this (ultimately by increasing lambda).
     SYM_ASSERT(success, "Internal Error: Damped hessian factorization failed");
 
@@ -236,7 +234,12 @@ LevenbergMarquardtSolver<ScalarType, LinearSolverType>::Iterate(
 
   {
     SYM_TIME_SCOPE("LM<{}>: SparseSolve", id_);
-    update_ = linear_solver_.Solve(state_.Init().GetLinearization().rhs);
+    update_ = -linear_solver_.Solve(state_.Init().GetLinearization().rhs);
+  }
+
+  {
+    SYM_TIME_SCOPE("LM<{}>: ResetHessianDiagonal", id_);
+    state_.Init().GetLinearization().hessian_lower.diagonal() = undamped_diagonal_;
   }
 
   if (p_.debug_checks && !update_.array().isFinite().all()) {
@@ -245,7 +248,7 @@ LevenbergMarquardtSolver<ScalarType, LinearSolverType>::Iterate(
 
   {
     SYM_TIME_SCOPE("LM<{}>: Update", id_);
-    Update(state_.Init().values, index_, -update_, state_.New().values);
+    Update(state_.Init().values, index_, update_, state_.New().values);
   }
 
   state_.New().Relinearize(func);
@@ -260,7 +263,9 @@ LevenbergMarquardtSolver<ScalarType, LinearSolverType>::Iterate(
 
   if (!std::isfinite(new_error)) {
     spdlog::warn("LM<{}> Encountered non-finite error: {}", id_, new_error);
-    spdlog::warn("LM<{}> Turn on debug_checks to see which factor is causing this", id_);
+    if (!p_.debug_checks) {
+      spdlog::warn("LM<{}> Turn on debug_checks to see which factor is causing this", id_);
+    }
   }
 
   optional<std::pair<optimization_status_t, FailureReason>> status{};
@@ -320,14 +325,11 @@ void LevenbergMarquardtSolver<ScalarType, LinearSolverType>::ComputeCovariance(
     const MatrixType& hessian_lower, MatrixX<Scalar>& covariance) {
   SYM_TIME_SCOPE("LM<{}>: ComputeCovariance()", id_);
 
-  H_damped_ = hessian_lower;
-  H_damped_.diagonal().array() += epsilon_;
-
   // TODO(hayk, aaron): This solver assumes a dense RHS, should add support for a sparse RHS
-  const bool success = linear_solver_.Factorize(H_damped_);
-  // TODO(brad): Instead try recovering from this by damping by something larger than epsilon_
+  const bool success = linear_solver_.Factorize(hessian_lower);
+  // TODO(brad): Instead try recovering from this by damping?
   SYM_ASSERT(success, "Internal Error: damped hessian factorization failed");
-  covariance = MatrixX<Scalar>::Identity(H_damped_.rows(), H_damped_.rows());
+  covariance = MatrixX<Scalar>::Identity(hessian_lower.rows(), hessian_lower.rows());
   linear_solver_.SolveInPlace(covariance);
 }
 

diff --git a/symforce/opt/linearization.h b/symforce/opt/linearization.h
@@ -47,15 +47,22 @@ struct Linearization {
     initialized_ = initialized;
   }
 
-  inline double Error() const {
+  inline Scalar Error() const {
     SYM_ASSERT(IsInitialized());
-    return 0.5 * residual.squaredNorm();
+    return Scalar{0.5} * residual.squaredNorm();
   }
 
-  inline double LinearError(const Vector& x_update) const {
-    SYM_ASSERT(jacobian.cols() == x_update.size());
-    const auto linear_residual_new = -jacobian * x_update + residual;
-    return 0.5 * linear_residual_new.squaredNorm();
+  /**
+   * Returns the change in error predicted by the Linearization at the given update
+   *
+   * @param x_update The update to the values
+   * @param damping_vector The vector added to the diagonal of the hessian during the linear solve
+   */
+  inline Scalar LinearDeltaError(const Vector& x_update, const Vector& damping_vector) const {
+    SYM_ASSERT(IsInitialized());
+    // See Section 3.2 of "Methods For Non-Linear Least Squares Problems" 2nd Edition.
+    // http://www2.imm.dtu.dk/pubdb/edoc/imm3215.pdf
+    return Scalar{0.5} * x_update.dot(rhs - damping_vector.cwiseProduct(x_update));
   }
 
   // Sparse storage

diff --git a/symforce/pybind/cc_linearization.cc b/symforce/pybind/cc_linearization.cc
@@ -32,7 +32,8 @@ void AddLinearizationWrapper(pybind11::module_ module) {
       .def("set_initialized", &sym::SparseLinearizationd::SetInitialized,
            py::arg("initialized") = true)
       .def("error", &sym::SparseLinearizationd::Error)
-      .def("linear_error", &sym::SparseLinearizationd::LinearError, py::arg("x_update"))
+      .def("linear_delta_error", &sym::SparseLinearizationd::LinearDeltaError, py::arg("x_update"),
+           py::arg("damping_vector"))
       .def(py::pickle(
           [](const sym::SparseLinearizationd& linearization) {  //  __getstate__
             return py::make_tuple(linearization.residual, linearization.hessian_lower,

diff --git a/symforce/pybind/cc_sym.pyi b/symforce/pybind/cc_sym.pyi
@@ -234,7 +234,9 @@ class Linearization:
         """
         Returns whether the linearization is currently valid for the corresponding values. Accessing any of the members when this is false could result in unexpected behavior.
         """
-    def linear_error(self, x_update: numpy.ndarray) -> float: ...
+    def linear_delta_error(
+        self, x_update: numpy.ndarray, damping_vector: numpy.ndarray
+    ) -> float: ...
     def reset(self) -> None:
         """
         Set to invalid.

diff --git a/test/symforce_cc_sym_test.py b/test/symforce_cc_sym_test.py
@@ -777,8 +777,10 @@ def test_optimizer(self) -> None:
 
             lin.set_initialized()
             self.assertIsInstance(lin.error(), T.Scalar)
-            self.assertIsInstance(lin.linear_error(x_update=np.array([0.01])), T.Scalar)
-            lin.linear_error(np.array([0.01]))
+            self.assertIsInstance(
+                lin.linear_delta_error(x_update=np.array([0.01]), damping_vector=np.array([0.0])),
+                T.Scalar,
+            )
 
         with self.subTest(msg="cc_sym.Linearization is pickleable"):
             linearization = cc_sym.Linearization()