remove knn gpu splitting

tensorflow · Mar 26, 2023 · 410754a · 410754a
1 parent e5d1771
commit 410754a
Show file tree

Hide file tree

Showing 3 changed files with 34 additions and 153 deletions.
diff --git a/tensorboard/plugins/projector/vz_projector/data.ts b/tensorboard/plugins/projector/vz_projector/data.ts
@@ -475,7 +475,7 @@ export class DataSet {
     } else {
       const knnGpuEnabled = (await util.hasWebGLSupport()) && !IS_FIREFOX;
       const result = await (knnGpuEnabled
-        ? knn.findKNNGPUCosDistNorm(data, nNeighbors, (d) => d.vector)
+        ? knn.findKNNTFCosDistNorm(data, nNeighbors, (d) => d.vector)
         : knn.findKNN(
             data,
             nNeighbors,

diff --git a/tensorboard/plugins/projector/vz_projector/knn.ts b/tensorboard/plugins/projector/vz_projector/knn.ts
@@ -22,53 +22,33 @@ export type NearestEntry = {
   index: number;
   dist: number;
 };
-/**
- * Optimal size for the height of the matrix when doing computation on the GPU
- * using WebGL. This was found experimentally.
- *
- * This also guarantees that for computing pair-wise distance for up to 10K
- * vectors, no more than 40MB will be allocated in the GPU. Without the
- * allocation limit, we can freeze the graphics of the whole OS.
- */
-const OPTIMAL_GPU_BLOCK_SIZE = 256;
-/** Id of message box used for knn gpu progress bar. */
-const KNN_GPU_MSG_ID = 'knn-gpu';
+
+/** Id of message box used for knn. */
+const KNN_MSG_ID = 'knn';
 
 /**
  * Returns the K nearest neighbors for each vector where the distance
- * computation is done on the GPU (WebGL) using cosine distance.
+ * computation is done using tensorflow.js using cosine distance.
  *
  * @param dataPoints List of data points, where each data point holds an
  *   n-dimensional vector. Assumes that the vector is already normalized to unit
  *   norm.
  * @param k Number of nearest neighbors to find.
  * @param accessor A method that returns the vector, given the data point.
  */
-export function findKNNGPUCosDistNorm<T>(
+export function findKNNTFCosDistNorm<T>(
   dataPoints: T[],
   k: number,
   accessor: (dataPoint: T) => Float32Array
 ): Promise<NearestEntry[][]> {
   const N = dataPoints.length;
   const dim = accessor(dataPoints[0]).length;
   // The goal is to compute a large matrix multiplication A*A.T where A is of
-  // size NxD and A.T is its transpose. This results in a NxN matrix which
-  // could be too big to store on the GPU memory. To avoid memory overflow, we
-  // compute multiple A*partial_A.T where partial_A is of size BxD (B is much
-  // smaller than N). This results in storing only NxB size matrices on the GPU
-  // at a given time.
+  // size NxD and A.T is its transpose. This results in a NxN matrix.
   // A*A.T will give us NxN matrix holding the cosine distance between every
   // pair of points, which we sort using KMin data structure to obtain the
   // K nearest neighbors for each point.
   const nearest: NearestEntry[][] = new Array(N);
-  let numPieces = Math.ceil(N / OPTIMAL_GPU_BLOCK_SIZE);
-  const actualPieceSize = Math.floor(N / numPieces);
-  const modulo = N % actualPieceSize;
-  numPieces += modulo ? 1 : 0;
-  let offset = 0;
-  let progress = 0;
-  let progressDiff = 1 / (2 * numPieces);
-  let piece = 0;
 
   const typedArray = vector.toTypedArray(dataPoints, accessor);
   const bigMatrix = tf.tensor(typedArray, [N, dim]);
@@ -77,80 +57,50 @@ export function findKNNGPUCosDistNorm<T>(
   const bigMatrixSquared = tf.matMul(bigMatrix, bigMatrixTransposed);
   const cosDistMatrix = tf.sub(1, bigMatrixSquared);
 
-  let maybePaddedCosDistMatrix = cosDistMatrix;
-  if (actualPieceSize * numPieces > N) {
-    // Expect the input to be rank 2 (though it is not typed that way) so we
-    // want to pad the first dimension so we split very evenly (all splitted
-    // tensor have exactly the same dimesion).
-    const padding: Array<[number, number]> = [
-      [0, actualPieceSize * numPieces - N],
-      [0, 0],
-    ];
-    maybePaddedCosDistMatrix = tf.pad(cosDistMatrix, padding);
-  }
-  const splits = tf.split(
-    maybePaddedCosDistMatrix,
-    new Array(numPieces).fill(actualPieceSize),
-    0
-  );
-
   function step(resolve: (result: NearestEntry[][]) => void) {
-    let progressMsg =
-      'Finding nearest neighbors: ' + (progress * 100).toFixed() + '%';
     util
       .runAsyncTask(
-        progressMsg,
+        'Finding nearest neighbors...',
         async () => {
           // `.data()` returns flattened Float32Array of B * N dimension.
           // For matrix of
           // [ 1  2 ]
           // [ 3  4 ],
           // `.data()` returns [1, 2, 3, 4].
-          const partial = await splits[piece].data();
-          progress += progressDiff;
-          for (let i = 0; i < actualPieceSize; i++) {
+          const partial = await cosDistMatrix.data();
+          for (let i = 0; i < N; i++) {
             let kMin = new KMin<NearestEntry>(k);
-            let iReal = offset + i;
-            if (iReal >= N) break;
             for (let j = 0; j < N; j++) {
               // Skip diagonal entries.
-              if (j === iReal) {
+              if (j === i) {
                 continue;
               }
               // Access i * N's row at `j` column.
               // Reach row has N entries and j-th index has cosine distance
-              // between iReal vs. j-th vectors.
+              // between i-th vs. j-th vectors.
               const cosDist = partial[i * N + j];
               if (cosDist >= 0) {
                 kMin.add(cosDist, {index: j, dist: cosDist});
               }
             }
-            nearest[iReal] = kMin.getMinKItems();
+            nearest[i] = kMin.getMinKItems();
           }
-          progress += progressDiff;
-          offset += actualPieceSize;
-          piece++;
         },
-        KNN_GPU_MSG_ID
+        KNN_MSG_ID,
       )
       .then(
         () => {
-          if (piece < numPieces) {
-            step(resolve);
-          } else {
-            logging.setModalMessage(null!, KNN_GPU_MSG_ID);
-            // Discard all tensors and free up the memory.
-            bigMatrix.dispose();
-            bigMatrixTransposed.dispose();
-            bigMatrixSquared.dispose();
-            cosDistMatrix.dispose();
-            splits.forEach((split) => split.dispose());
-            resolve(nearest);
-          }
+          logging.setModalMessage(null!, KNN_MSG_ID);
+          // Discard all tensors and free up the memory.
+          bigMatrix.dispose();
+          bigMatrixTransposed.dispose();
+          bigMatrixSquared.dispose();
+          cosDistMatrix.dispose();
+          resolve(nearest);
         },
         (error) => {
-          // GPU failed. Reverting back to CPU.
-          logging.setModalMessage(null!, KNN_GPU_MSG_ID);
+          // TensorFlow.js failed. Reverting back to CPU.
+          logging.setModalMessage(null!, KNN_MSG_ID);
           let distFunc = (a, b, limit) => vector.cosDistNorm(a, b);
           findKNN(dataPoints, k, accessor, distFunc).then((nearest) => {
             resolve(nearest);
@@ -212,47 +162,12 @@ export function findKNN<T>(
       for (let i = 0; i < N; i++) {
         nearest[i] = kMin[i].getMinKItems();
       }
+      logging.setModalMessage(null!, KNN_MSG_ID);
       return nearest;
-    }
+    },
+    KNN_MSG_ID,
   );
 }
-/** Calculates the minimum distance between a search point and a rectangle. */
-function minDist(
-  point: [number, number],
-  x1: number,
-  y1: number,
-  x2: number,
-  y2: number
-) {
-  let x = point[0];
-  let y = point[1];
-  let dx1 = x - x1;
-  let dx2 = x - x2;
-  let dy1 = y - y1;
-  let dy2 = y - y2;
-  if (dx1 * dx2 <= 0) {
-    // x is between x1 and x2
-    if (dy1 * dy2 <= 0) {
-      // (x,y) is inside the rectangle
-      return 0; // return 0 as point is in rect
-    }
-    return Math.min(Math.abs(dy1), Math.abs(dy2));
-  }
-  if (dy1 * dy2 <= 0) {
-    // y is between y1 and y2
-    // We know it is already inside the rectangle
-    return Math.min(Math.abs(dx1), Math.abs(dx2));
-  }
-  let corner: [number, number];
-  if (x > x2) {
-    // Upper-right vs lower-right.
-    corner = y > y2 ? [x2, y2] : [x2, y1];
-  } else {
-    // Upper-left vs lower-left.
-    corner = y > y2 ? [x1, y2] : [x1, y1];
-  }
-  return Math.sqrt(vector.dist22D([x, y], corner));
-}
 /**
  * Returns the nearest neighbors of a particular point.
  *
@@ -282,4 +197,3 @@ export function findKNNofPoint<T>(
   return kMin.getMinKItems();
 }
 
-export const TEST_ONLY = {OPTIMAL_GPU_BLOCK_SIZE};
diff --git a/tensorboard/plugins/projector/vz_projector/knn_test.ts b/tensorboard/plugins/projector/vz_projector/knn_test.ts
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-import {findKNN, findKNNGPUCosDistNorm, NearestEntry, TEST_ONLY} from './knn';
+import {findKNN, findKNNTFCosDistNorm, NearestEntry} from './knn';
 import {cosDistNorm, unit} from './vector';
 
 describe('projector knn test', () => {
@@ -28,9 +28,9 @@ describe('projector knn test', () => {
     return vector;
   }
 
-  describe('#findKNNGPUCosDistNorm', () => {
+  describe('#findKNNTFCosDistNorm', () => {
     it('finds n-nearest neighbor for each item', async () => {
-      const values = await findKNNGPUCosDistNorm(
+      const values = await findKNNTFCosDistNorm(
         [
           {a: unitVector(new Float32Array([1, 2, 0]))},
           {a: unitVector(new Float32Array([1, 1, 3]))},
@@ -54,7 +54,7 @@ describe('projector knn test', () => {
     });
 
     it('returns less than N when number of item is lower', async () => {
-      const values = await findKNNGPUCosDistNorm(
+      const values = await findKNNTFCosDistNorm(
         [
           unitVector(new Float32Array([1, 2, 0])),
           unitVector(new Float32Array([1, 1, 3])),
@@ -65,29 +65,13 @@ describe('projector knn test', () => {
 
       expect(getIndices(values)).toEqual([[1], [0]]);
     });
-
-    it('splits a large data into one that would fit into GPU memory', async () => {
-      const size = TEST_ONLY.OPTIMAL_GPU_BLOCK_SIZE + 5;
-      const data = new Array(size).fill(
-        unitVector(new Float32Array([1, 1, 1]))
-      );
-      const values = await findKNNGPUCosDistNorm(data, 1, (a) => a);
-
-      expect(getIndices(values)).toEqual([
-        // Since distance to the diagonal entries (distance to self is 0) is
-        // non-sensical, the diagonal entires are ignored. So for the first
-        // item, the nearest neighbor should be 2nd item (index 1).
-        [1],
-        ...new Array(size - 1).fill([0]),
-      ]);
-    });
   });
 
   describe('#findKNN', () => {
-    // Covered by equality tests below (#findKNNGPUCosDistNorm == #findKNN).
+    // Covered by equality tests below (#findKNNTFCosDistNorm == #findKNN).
   });
 
-  describe('#findKNNGPUCosDistNorm and #findKNN', () => {
+  describe('#findKNNTFCosDistNorm and #findKNN', () => {
     it('returns same value when dist metrics are cosine', async () => {
       const data = [
         unitVector(new Float32Array([1, 2, 0])),
@@ -97,7 +81,7 @@ describe('projector knn test', () => {
         unitVector(new Float32Array([100, 10, 0])),
         unitVector(new Float32Array([95, 23, 100])),
       ];
-      const findKnnGpuCosVal = await findKNNGPUCosDistNorm(data, 2, (a) => a);
+      const findKnnTFCosVal = await findKNNTFCosDistNorm(data, 2, (a) => a);
       const findKnnVal = await findKNN(
         data,
         2,
@@ -106,24 +90,7 @@ describe('projector knn test', () => {
       );
 
       // Floating point precision makes it hard to test. Just assert indices.
-      expect(getIndices(findKnnGpuCosVal)).toEqual(getIndices(findKnnVal));
-    });
-
-    it('splits a large data without the result being wrong', async () => {
-      const size = TEST_ONLY.OPTIMAL_GPU_BLOCK_SIZE + 5;
-      const data = Array.from(new Array(size)).map((_, index) => {
-        return unitVector(new Float32Array([index + 1, index + 1]));
-      });
-
-      const findKnnGpuCosVal = await findKNNGPUCosDistNorm(data, 2, (a) => a);
-      const findKnnVal = await findKNN(
-        data,
-        2,
-        (a) => a,
-        (a, b, limit) => cosDistNorm(a, b)
-      );
-
-      expect(getIndices(findKnnGpuCosVal)).toEqual(getIndices(findKnnVal));
+      expect(getIndices(findKnnTFCosVal)).toEqual(getIndices(findKnnVal));
     });
   });
 });