datastax · michaeljmarshall · Feb 10, 2026 · Dec 1, 2025 · Feb 3, 2026 · Feb 6, 2026
diff --git a/src/java/org/apache/cassandra/index/sai/disk/vector/CompactionGraph.java b/src/java/org/apache/cassandra/index/sai/disk/vector/CompactionGraph.java
@@ -343,6 +343,7 @@ public InsertionResult maybeAddVector(ByteBuffer term, int segmentRowId) throws
                         compressor = ((ProductQuantization) compressor).refine(new ListRandomAccessVectorValues(trainingVectors, dimension));
                         trainingVectors.clear(); // don't need these anymore so let GC reclaim if it wants to
 
+                        long originalBytesUsed = compressedVectors.ramBytesUsed();
                         // re-encode the vectors added so far
                         int encodedVectorCount = compressedVectors.count();
                         compressedVectors = new MutablePQVectors((ProductQuantization) compressor);
@@ -358,6 +359,10 @@ public InsertionResult maybeAddVector(ByteBuffer term, int segmentRowId) throws
                                      });
                         }).join();
 
+                        // Update bytes to account for new encoding. This isn't expected to change, but just
+                        // in case it does, we track it here.
+                        bytesUsed += (compressedVectors.ramBytesUsed() - originalBytesUsed);
+
                         // Keep the existing edges but recompute their scores
                         builder = GraphIndexBuilder.rescore(builder, BuildScoreProvider.pqBuildScoreProvider(similarityFunction, (PQVectors) compressedVectors));
                     }
@@ -381,12 +386,15 @@ public InsertionResult maybeAddVector(ByteBuffer term, int segmentRowId) throws
                 for (int i = 0; i < dimension; i++)
                     vectorsByOrdinalBufferedWriter.writeFloat(vector.get(i));
 
+                // Track the bytes used as a result of this operation
+                long compressedVectorsBytesUsed = compressedVectors.ramBytesUsed();
                 // Fill in any holes in the pqVectors (setZero has the side effect of increasing the count)
                 while (compressedVectors.count() < ordinal)
                     compressedVectors.setZero(compressedVectors.count());
                 compressedVectors.encodeAndSet(ordinal, vector);
 
                 bytesUsed += postings.ramBytesUsed();
+                bytesUsed += (compressedVectors.ramBytesUsed() - compressedVectorsBytesUsed);
                 return new InsertionResult(bytesUsed, ordinal, vector);
             }
 

diff --git a/test/unit/org/apache/cassandra/index/sai/cql/VectorSiftSmallTest.java b/test/unit/org/apache/cassandra/index/sai/cql/VectorSiftSmallTest.java
@@ -38,6 +38,7 @@
 import org.apache.cassandra.index.sai.StorageAttachedIndex;
 import org.apache.cassandra.index.sai.disk.v1.SegmentBuilder;
 import org.apache.cassandra.index.sai.disk.v2.V2VectorIndexSearcher;
+import org.apache.cassandra.index.sai.disk.vector.CompactionGraph;
 import org.apache.cassandra.index.sai.disk.vector.NVQUtil;
 
 import static org.junit.Assert.assertEquals;
@@ -221,6 +222,20 @@ public void testCompaction() throws Throwable
             var recall = testRecall(topK, queryVectors, groundTruth);
             assertTrue("Post-compaction recall is " + recall, recall > postCompactionRecall);
         }
+
+        // Set force PQ training size to ensure we hit the refine code path and apply it to half the vectors.
+        // TODO this test fails as of this commit due to recall issues. Will investigate further.
+        CompactionGraph.PQ_TRAINING_SIZE = baseVectors.size() / 2;
+
+        // Compact again to take the CompactionGraph code path that calls the refine logic
+        compact();
+        for (int topK : List.of(1, 100))
+        {
+            var recall = testRecall(topK, queryVectors, groundTruth);
+            // This assertion will fail until we address the design the bug discussed
+            // in https://github.com/riptano/cndb/issues/16637.
+            // assertTrue("Post-compaction recall is " + recall, recall > postCompactionRecall);
+        }
     }
 
     // exercise the path where we use the PQ from the first segment (constructed on-heap)