[SymForce] hessian_storage_col_starts is flat

Instead of being a vector of vectors. This noticeably improves performance of UpdateFromLinearizedDenseFactorIntoSparse Topic: sf-hessian-cols-flat Reviewers: bradley,ryan-b,chao,veeresh,dominic,peter GitOrigin-RevId: 9eccb233f1cdee268f07048f85cd6e4c2f288883
Krishan0507 · Aug 22, 2023 · 7d5f4f7 · 7d5f4f7
1 parent bd0f987
commit 7d5f4f7
Show file tree

Hide file tree

Showing 3 changed files with 25 additions and 28 deletions.
diff --git a/lcmtypes/symforce.lcm b/lcmtypes/symforce.lcm
@@ -74,12 +74,9 @@ struct linearization_dense_key_helper_t {
   // For each column of this key's block in the factor jacobian, the sparse storage valuePtr array
   // offsets
   int32_t jacobian_storage_col_starts[];
-  // For each other key (from 0 to this key's index), the sparse storage valuePtr array offsets
-  // NOTE(hayk): Currently num_other_cols is not constant in actual use - this type likely
-  // can't be serialized right now. Perhaps try to store a flatter structure of slices.
-  int32_t num_other_keys;
-  int32_t num_other_cols;
-  int32_t hessian_storage_col_starts[num_other_keys][num_other_cols];
+  // For this key, then each other key (from 0 to this key's index); and each column in that key;
+  // the sparse storage valuePtr array offsets
+  int32_t hessian_storage_col_starts[];
 };
 
 // Index information for a linearized factor into the combined problem

diff --git a/symforce/opt/internal/linearizer_utils.h b/symforce/opt/internal/linearizer_utils.h
@@ -113,34 +113,30 @@ void ComputeKeyHelperSparseColOffsets(
       }
     }
 
-    key_helper.hessian_storage_col_starts.resize(key_i + 1);
-    key_helper.num_other_keys = key_helper.hessian_storage_col_starts.size();
+    // Build the hessian storage col starts.  The insertion order here must match the order these
+    // are accessed in Linearizer::UpdateFromLinearizedDenseFactorIntoSparse
+    auto& col_starts = key_helper.hessian_storage_col_starts;
 
     // Diagonal block
-    std::vector<int32_t>& diag_col_starts = key_helper.hessian_storage_col_starts[key_i];
-    diag_col_starts.resize(key_helper.tangent_dim);
     for (int32_t col = 0; col < key_helper.tangent_dim; ++col) {
-      diag_col_starts[col] = hessian_row_col_to_storage_offset.at(
-          std::make_pair(key_helper.combined_offset + col, key_helper.combined_offset + col));
+      col_starts.push_back(hessian_row_col_to_storage_offset.at(
+          std::make_pair(key_helper.combined_offset + col, key_helper.combined_offset + col)));
     }
 
     // Off diagonal blocks
     for (int key_j = 0; key_j < key_i; key_j++) {
       const linearization_dense_key_helper_t& j_key_helper = factor_helper.key_helpers[key_j];
-      std::vector<int32_t>& col_starts = key_helper.hessian_storage_col_starts[key_j];
 
       // If key_j comes after key_i in the full problem, we need to transpose things
       if (j_key_helper.combined_offset < key_helper.combined_offset) {
-        col_starts.resize(j_key_helper.tangent_dim);
         for (int32_t j_col = 0; j_col < j_key_helper.tangent_dim; ++j_col) {
-          col_starts[j_col] = hessian_row_col_to_storage_offset.at(
-              std::make_pair(key_helper.combined_offset, j_key_helper.combined_offset + j_col));
+          col_starts.push_back(hessian_row_col_to_storage_offset.at(
+              std::make_pair(key_helper.combined_offset, j_key_helper.combined_offset + j_col)));
         }
       } else {
-        col_starts.resize(key_helper.tangent_dim);
         for (int32_t i_col = 0; i_col < key_helper.tangent_dim; ++i_col) {
-          col_starts[i_col] = hessian_row_col_to_storage_offset.at(
-              std::make_pair(j_key_helper.combined_offset, key_helper.combined_offset + i_col));
+          col_starts.push_back(hessian_row_col_to_storage_offset.at(
+              std::make_pair(j_key_helper.combined_offset, key_helper.combined_offset + i_col)));
         }
       }
     }

diff --git a/symforce/opt/linearizer.cc b/symforce/opt/linearizer.cc
@@ -331,11 +331,12 @@ void Linearizer<ScalarType>::UpdateFromLinearizedDenseFactorIntoSparse(
         linearized_factor.rhs.segment(key_helper.factor_offset, key_helper.tangent_dim);
 
     // Add contribution from diagonal hessian block, column by column
+    auto col_start_iter = key_helper.hessian_storage_col_starts.begin();
     for (int col_block = 0; col_block < key_helper.tangent_dim; ++col_block) {
-      const std::vector<int32_t>& diag_col_starts = key_helper.hessian_storage_col_starts[key_i];
-      Eigen::Map<VectorX<Scalar>>(
-          linearization.hessian_lower.valuePtr() + diag_col_starts[col_block],
-          key_helper.tangent_dim - col_block) +=
+      const auto col_start = *col_start_iter;
+      col_start_iter++;
+      Eigen::Map<VectorX<Scalar>>(linearization.hessian_lower.valuePtr() + col_start,
+                                  key_helper.tangent_dim - col_block) +=
           linearized_factor.hessian.block(key_helper.factor_offset + col_block,
                                           key_helper.factor_offset + col_block,
                                           key_helper.tangent_dim - col_block, 1);
@@ -347,19 +348,22 @@ void Linearizer<ScalarType>::UpdateFromLinearizedDenseFactorIntoSparse(
     // of the block is contiguous in sparse storage.
     for (int key_j = 0; key_j < key_i; key_j++) {
       const linearization_dense_key_helper_t& key_helper_j = factor_helper.key_helpers[key_j];
-      const std::vector<int32_t>& col_starts = key_helper.hessian_storage_col_starts[key_j];
 
       if (key_helper_j.combined_offset < key_helper.combined_offset) {
-        for (int32_t col_j = 0; col_j < static_cast<int32_t>(col_starts.size()); ++col_j) {
-          Eigen::Map<VectorX<Scalar>>(linearization.hessian_lower.valuePtr() + col_starts[col_j],
+        for (int32_t col_j = 0; col_j < static_cast<int32_t>(key_helper_j.tangent_dim); ++col_j) {
+          const auto col_start = *col_start_iter;
+          col_start_iter++;
+          Eigen::Map<VectorX<Scalar>>(linearization.hessian_lower.valuePtr() + col_start,
                                       key_helper.tangent_dim) +=
               linearized_factor.hessian.block(key_helper.factor_offset,
                                               key_helper_j.factor_offset + col_j,
                                               key_helper.tangent_dim, 1);
         }
       } else {
-        for (int32_t col_i = 0; col_i < static_cast<int32_t>(col_starts.size()); ++col_i) {
-          Eigen::Map<VectorX<Scalar>>(linearization.hessian_lower.valuePtr() + col_starts[col_i],
+        for (int32_t col_i = 0; col_i < static_cast<int32_t>(key_helper.tangent_dim); ++col_i) {
+          const auto col_start = *col_start_iter;
+          col_start_iter++;
+          Eigen::Map<VectorX<Scalar>>(linearization.hessian_lower.valuePtr() + col_start,
                                       key_helper_j.tangent_dim) +=
               linearized_factor.hessian
                   .block(key_helper.factor_offset + col_i, key_helper_j.factor_offset, 1,