tensorflow · lina128 · Mar 16, 2022 · Mar 15, 2022 · Nov 9, 2021 · Mar 15, 2022
diff --git a/tfjs-backend-webgl/src/backend_webgl.ts b/tfjs-backend-webgl/src/backend_webgl.ts
@@ -19,8 +19,7 @@
 import './flags_webgl';
 
 import * as tf from '@tensorflow/tfjs-core';
-import {backend_util, BackendValues, buffer, DataId, DataStorage, DataToGPUWebGLOption, DataType, DataValues, engine, env, GPUData, kernel_impls, KernelBackend, MemoryInfo, NumericDataType, Rank, RecursiveArray, scalar, ShapeMap, Tensor, Tensor2D, TensorBuffer, TensorInfo, tidy, TimingInfo, TypedArray, util} from '@tensorflow/tfjs-core';
-
+import {backend_util, BackendValues, buffer, DataId, DataStorage, DataToGPUWebGLOption, DataType, DataValues, engine, env, GPUData, kernel_impls, KernelBackend, MemoryInfo, nextFrame, NumericDataType, Rank, RecursiveArray, scalar, ShapeMap, Tensor, Tensor2D, TensorBuffer, TensorInfo, tidy, TimingInfo, TypedArray, util} from '@tensorflow/tfjs-core';
 import {getWebGLContext} from './canvas_util';
 import {DecodeMatrixProgram} from './decode_matrix_gpu';
 import {DecodeMatrixPackedProgram} from './decode_matrix_packed_gpu';
@@ -30,7 +29,7 @@ import {EncodeMatrixProgram} from './encode_matrix_gpu';
 import {EncodeMatrixPackedProgram} from './encode_matrix_packed_gpu';
 import {GPGPUContext} from './gpgpu_context';
 import * as gpgpu_math from './gpgpu_math';
-import {GPGPUBinary, GPGPUProgram, TensorData} from './gpgpu_math';
+import {getUniformLocations, GPGPUBinary, GPGPUProgram, TensorData} from './gpgpu_math';
 import {simpleAbsImplCPU} from './kernel_utils/shared';
 import {PackProgram} from './pack_gpu';
 import {ReshapePackedProgram} from './reshape_packed_gpu';
@@ -549,15 +548,16 @@ export class MathBackendWebGL extends KernelBackend {
     };
 
     return (async () => {
-      if (env()
-        .getNumber('WEBGL_DISJOINT_QUERY_TIMER_EXTENSION_RELIABLE') > 0) {
+      if (env().getNumber('WEBGL_DISJOINT_QUERY_TIMER_EXTENSION_RELIABLE') >
+          0) {
         const kernelMs = await Promise.all(flattenedActiveTimerQueries);
 
         res['kernelMs'] = util.sum(kernelMs);
         res['getExtraProfileInfo'] = () =>
-          kernelMs.map((d, i) => ({name: flattenedActiveTimerNames[i], ms: d}))
-            .map(d => `${d.name}: ${d.ms}`)
-            .join(', ');
+            kernelMs
+                .map((d, i) => ({name: flattenedActiveTimerNames[i], ms: d}))
+                .map(d => `${d.name}: ${d.ms}`)
+                .join(', ');
       } else {
         res['kernelMs'] = {
           error: 'WebGL query timers are not supported in this environment.'
@@ -949,8 +949,10 @@ export class MathBackendWebGL extends KernelBackend {
       query = this.startTimer();
     }
 
-    gpgpu_math.runProgram(
-        this.gpgpu, binary, inputsData, outputData, customUniformValues);
+    if (!env().get('ENGINE_COMPILE_ONLY')) {
+      gpgpu_math.runProgram(
+          this.gpgpu, binary, inputsData, outputData, customUniformValues);
+    }
 
     dataToDispose.forEach(info => this.disposeIntermediateTensorInfo(info));
 
@@ -1130,16 +1132,21 @@ export class MathBackendWebGL extends KernelBackend {
 
       // Have the original texture assume the identity of the encoded output.
       const outputTexData = this.texData.get(encodedOutputTarget.dataId);
-      texData.texture = outputTexData.texture;
       texData.texShape = outputTexData.texShape;
       texData.isPacked = outputTexData.isPacked;
       texData.usage = outputTexData.usage;
 
+      if (!env().get('ENGINE_COMPILE_ONLY')) {
+        texData.texture = outputTexData.texture;
+        // Once uploaded, don't store the values on cpu.
+        texData.values = null;
+        this.texData.delete(encodedOutputTarget.dataId);
+      } else {
+        this.disposeData(encodedOutputTarget.dataId);
+      }
+
       this.disposeIntermediateTensorInfo(tempDenseInputHandle);
-      this.texData.delete(encodedOutputTarget.dataId);
 
-      // Once uploaded, don't store the values on cpu.
-      texData.values = null;
       if (shouldTimeProgram) {
         this.uploadWaitMs += util.now() - start;
       }
@@ -1180,6 +1187,87 @@ export class MathBackendWebGL extends KernelBackend {
   private computeBytes(shape: [number, number], dtype: DataType) {
     return shape[0] * shape[1] * util.bytesPerElement(dtype);
   }
+
+  checkCompileCompletion() {
+    for (const [, binary] of Object.entries(this.binaryCache)) {
+      this.checkCompletion_(binary);
+    }
+  }
+
+  async checkCompileCompletionAsync(): Promise<boolean[]> {
+    const ps = [];
+    if (this.gpgpu.parallelCompilationExtension) {
+      for (const [, binary] of Object.entries(this.binaryCache)) {
+        ps.push(this.checkCompletionAsync_(binary));
+      }
+      return Promise.all(ps);
+    } else {
+      for (const [, binary] of Object.entries(this.binaryCache)) {
+        const p: Promise<boolean> = new Promise((resolve) => {
+          try {
+            this.checkCompletion_(binary);
+            resolve(true);
+          } catch (error) {
+            throw error;
+          }
+        });
+        ps.push(p);
+      }
+      return Promise.all(ps);
+    }
+  }
+
+  private async checkCompletionAsync_(binary: GPGPUBinary): Promise<boolean> {
+    if (this.gpgpu.gl.getProgramParameter(
+            binary.webGLProgram,
+            this.gpgpu.parallelCompilationExtension.COMPLETION_STATUS_KHR)) {
+      return this.checkCompletion_(binary);
+    } else {
+      await nextFrame();
+      return this.checkCompletionAsync_(binary);
+    }
+  }
+
+  private checkCompletion_(binary: GPGPUBinary): boolean {
+    if (this.gpgpu.gl.getProgramParameter(
+            binary.webGLProgram, this.gpgpu.gl.LINK_STATUS) === false) {
+      console.log(this.gpgpu.gl.getProgramInfoLog(binary.webGLProgram));
+      if (this.gpgpu.gl.getShaderParameter(
+              binary.fragmentShader, this.gpgpu.gl.COMPILE_STATUS) === false) {
+        webgl_util.logShaderSourceAndInfoLog(
+            binary.source,
+            this.gpgpu.gl.getShaderInfoLog(binary.fragmentShader));
+        throw new Error('Failed to compile fragment shader.');
+      }
+      throw new Error('Failed to link vertex and fragment shaders.');
+    }
+    return true;
+  }
+
+  getUniformLocations() {
+    for (const [, binary] of Object.entries(this.binaryCache)) {
+      const {
+        uniformLocations,
+        customUniformLocations,
+        infLoc,
+        nanLoc,
+        inShapesLocations,
+        inTexShapesLocations,
+        outShapeLocation,
+        outShapeStridesLocation,
+        outTexShapeLocation
+      } = getUniformLocations(this.gpgpu, binary.program, binary.webGLProgram);
+      binary.uniformLocations = uniformLocations;
+      binary.customUniformLocations = customUniformLocations;
+      binary.infLoc = infLoc;
+      binary.nanLoc = nanLoc;
+      binary.inShapesLocations = inShapesLocations;
+      binary.inTexShapesLocations = inTexShapesLocations;
+      binary.outShapeLocation = outShapeLocation;
+      binary.outShapeStridesLocation = outShapeStridesLocation;
+      binary.outTexShapeLocation = outTexShapeLocation;
+    }
+  }
 }
 
 function float32ToTypedArray<D extends NumericDataType>(

diff --git a/tfjs-backend-webgl/src/backend_webgl_test.ts b/tfjs-backend-webgl/src/backend_webgl_test.ts
@@ -1033,3 +1033,84 @@ describeWithFlags('custom canvas ', WEBGL_ENVS, () => {
     tf.removeBackend(customBackendName);
   });
 });
+describeWithFlags('Parallel compilation', WEBGL_ENVS, () => {
+  // TODO(lina128): Also test async after parallel compilation flag is
+  // implemented in context object. We have to keep the test sync for now,
+  // because it's a global flag, the async test will affect other tests.
+  it('does not have memory leak.', () => {
+    const savedWebGLCPUForward = tf.env().get('WEBGL_CPU_FORWARD');
+    tf.env().set('WEBGL_CPU_FORWARD', false);
+
+    const customWebGLBackendName = 'my-webgl';
+    tf.copyRegisteredKernels('webgl', customWebGLBackendName);
+    tf.registerBackend(customWebGLBackendName, () => {
+      return new MathBackendWebGL();
+    });
+    tf.setBackend(customWebGLBackendName);
+
+    const a0 = tf.tensor1d([1, 1, 1]);
+    const b0 = tf.tensor1d([1, 1, 1]);
+    const c0 = tf.add(a0, b0);
+    const data = c0.dataSync();
+    const numOfBinaryCacheNoParallelCompillation =
+        Object.keys(getBinaryCache(tf.ENV.getNumber('WEBGL_VERSION'))).length;
+    expectArraysClose(data, [2, 2, 2]);
+    tf.dispose([a0, b0, c0]);
+    tf.removeBackend(customWebGLBackendName);
+
+    // TODO(lina128): Also test use an existing backend after parallel
+    // compilation flag is implemented in context object. The current approach
+    // assumes there's no binary cache, and it doesn't check existing cache.
+    const customWebGLBackendName1 = 'my-webgl1';
+    tf.copyRegisteredKernels('webgl', customWebGLBackendName1);
+    tf.registerBackend(customWebGLBackendName1, () => {
+      return new MathBackendWebGL();
+    });
+    tf.setBackend(customWebGLBackendName1);
+    const webGLBackend = tf.backend() as MathBackendWebGL;
+
+    const startNumBytes = (tf.memory() as WebGLMemoryInfo).numBytesInGPU;
+    const startTensor = tf.memory().numTensors;
+    const startDataBuckets = webGLBackend.numDataIds();
+
+    const a1 = tf.tensor1d([1, 1, 1]);
+    const b1 = tf.tensor1d([1, 1, 1]);
+
+    // Pre-compile round.
+    tf.env().set('ENGINE_COMPILE_ONLY', true);
+    const c1 = tf.add(a1, b1);
+    webGLBackend.checkCompileCompletion();
+    webGLBackend.getUniformLocations();
+
+    // Warm-up upload and download round.
+    tf.env().set('ENGINE_COMPILE_ONLY', false);
+    const c2 = tf.add(a1, b1);
+    c2.dataSync();
+
+    // Actual inference.
+    const c3 = tf.add(a1, b1);
+    expectArraysEqual(c3.dataSync(), [2, 2, 2]);
+
+    tf.dispose([a1, b1, c1, c2, c3]);
+    const endNumBytes = (tf.memory() as WebGLMemoryInfo).numBytesInGPU;
+    const endTensor = tf.memory().numTensors;
+    const endDataBuckets = webGLBackend.numDataIds();
+
+    // We only check numBytesInGPU. For parallel compilation,
+    // numBytesInGPUAllocated will be more because of the two pass uploadToGPU,
+    // but they will all be freed, resulting in endNumbytes equal to
+    // startNumBytes.
+    expect(startNumBytes).toEqual(endNumBytes);
+    expect(startTensor).toEqual(endTensor);
+    expect(endDataBuckets).toEqual(startDataBuckets);
+
+    const numOfBinaryCacheWithParallelCompillation =
+        Object.keys(getBinaryCache(tf.ENV.getNumber('WEBGL_VERSION'))).length;
+    expect(numOfBinaryCacheWithParallelCompillation)
+        .toEqual(numOfBinaryCacheNoParallelCompillation);
+
+    tf.removeBackend(customWebGLBackendName1);
+
+    tf.env().set('WEBGL_CPU_FORWARD', savedWebGLCPUForward);
+  });
+});
diff --git a/tfjs-backend-webgl/src/gpgpu_context.ts b/tfjs-backend-webgl/src/gpgpu_context.ts
@@ -21,7 +21,7 @@ import {getWebGLContext, setWebGLContext} from './canvas_util';
 import * as gpgpu_util from './gpgpu_util';
 import * as tex_util from './tex_util';
 import {Texture, TextureConfig} from './tex_util';
-import {WebGL1DisjointQueryTimerExtension, WebGL2DisjointQueryTimerExtension} from './webgl_types';
+import {WebGL1DisjointQueryTimerExtension, WebGL2DisjointQueryTimerExtension, WebGLParallelCompilationExtension} from './webgl_types';
 import * as webgl_util from './webgl_util';
 
 export interface FenceContext {
@@ -37,6 +37,7 @@ export class GPGPUContext {
   colorBufferHalfFloatExtension: {};
   disjointQueryTimerExtension: WebGL2DisjointQueryTimerExtension|
       WebGL1DisjointQueryTimerExtension;
+  parallelCompilationExtension: WebGLParallelCompilationExtension;
   vertexBuffer: WebGLBuffer;
   indexBuffer: WebGLBuffer;
   framebuffer: WebGLFramebuffer;
@@ -58,6 +59,8 @@ export class GPGPUContext {
     // WebGL 2.0 enables texture floats without an extension.
     let COLOR_BUFFER_FLOAT = 'WEBGL_color_buffer_float';
     const COLOR_BUFFER_HALF_FLOAT = 'EXT_color_buffer_half_float';
+    this.parallelCompilationExtension =
+        this.gl.getExtension('KHR_parallel_shader_compile');
     if (env().getNumber('WEBGL_VERSION') === 1) {
       const TEXTURE_FLOAT = 'OES_texture_float';
       const TEXTURE_HALF_FLOAT = 'OES_texture_half_float';

diff --git a/tfjs-backend-webgl/src/gpgpu_math.ts b/tfjs-backend-webgl/src/gpgpu_math.ts
@@ -64,6 +64,18 @@ export interface GPGPUBinary {
   outTexShapeLocation?: WebGLUniformLocation;
 }
 
+export interface GPGPUBinaryLocations {
+  uniformLocations: {[name: string]: WebGLUniformLocation};
+  customUniformLocations?: WebGLUniformLocation[];
+  infLoc: WebGLUniformLocation;
+  nanLoc: WebGLUniformLocation;
+  inShapesLocations?: {[name: string]: WebGLUniformLocation};
+  inTexShapesLocations?: {[name: string]: WebGLUniformLocation};
+  outShapeLocation?: WebGLUniformLocation;
+  outShapeStridesLocation?: WebGLUniformLocation;
+  outTexShapeLocation?: WebGLUniformLocation;
+}
+
 export interface TensorData {
   shape: number[];
   texData: TextureData;
@@ -101,18 +113,58 @@ export function compileProgram<T extends Tensor, K extends Tensor>(
   const fragmentShader = createFragmentShader(gpgpu.gl, source);
   const webGLProgram = gpgpu.createProgram(fragmentShader);
 
-  // Add special uniforms (NAN, INFINITY)
+  if (!env().get('ENGINE_COMPILE_ONLY')) {
+    return {
+      program,
+      fragmentShader,
+      source,
+      webGLProgram,
+      inShapeInfos,
+      outShapeInfo,
+      ...getUniformLocations(gpgpu, program, webGLProgram)
+    };
+  } else {
+    return {
+      program,
+      fragmentShader,
+      source,
+      webGLProgram,
+      inShapeInfos,
+      outShapeInfo,
+      uniformLocations: null,
+      customUniformLocations: null,
+      infLoc: null,
+      nanLoc: null,
+      inShapesLocations: null,
+      inTexShapesLocations: null,
+      outShapeLocation: null,
+      outShapeStridesLocation: null,
+      outTexShapeLocation: null
+    };
+  }
+}
+
+export function getUniformLocations(
+    gpgpu: GPGPUContext, program: GPGPUProgram,
+    webGLProgram: WebGLProgram): GPGPUBinaryLocations {
+  const uniformLocations: {[name: string]: WebGLUniformLocation} = {};
+  const inShapesLocations: {[name: string]: WebGLUniformLocation} = {};
+  const inTexShapesLocations: {[name: string]: WebGLUniformLocation} = {};
+  const customUniformLocations: WebGLUniformLocation[] = [];
+  let outShapeLocation: WebGLUniformLocation;
+  let outTexShapeLocation: WebGLUniformLocation;
+  let outShapeStridesLocation: WebGLUniformLocation;
   let infLoc: WebGLUniformLocation = null;
-  const nanLoc = gpgpu.getUniformLocation(webGLProgram, 'NAN', false);
+  let nanLoc: WebGLUniformLocation = null;
+
+  // Add special uniforms (NAN, INFINITY)
+  nanLoc = gpgpu.getUniformLocation(webGLProgram, 'NAN', false);
   if (env().getNumber('WEBGL_VERSION') === 1) {
     infLoc = gpgpu.getUniformLocation(webGLProgram, 'INFINITY', false);
   }
 
   // Add user-defined uniforms
   const shouldThrow = false;
-  const uniformLocations: {[name: string]: WebGLUniformLocation} = {};
-  const inShapesLocations: {[name: string]: WebGLUniformLocation} = {};
-  const inTexShapesLocations: {[name: string]: WebGLUniformLocation} = {};
   for (let i = 0; i < program.variableNames.length; i++) {
     const varName = program.variableNames[i];
     uniformLocations[varName] =
@@ -127,9 +179,6 @@ export function compileProgram<T extends Tensor, K extends Tensor>(
     }
   }
 
-  let outShapeLocation: WebGLUniformLocation;
-  let outTexShapeLocation: WebGLUniformLocation;
-  let outShapeStridesLocation: WebGLUniformLocation;
   if (program.enableShapeUniforms) {
     outShapeLocation =
         gpgpu.getUniformLocation(webGLProgram, 'outShape', shouldThrow);
@@ -139,7 +188,6 @@ export function compileProgram<T extends Tensor, K extends Tensor>(
         gpgpu.getUniformLocation(webGLProgram, 'outTexShape', shouldThrow);
   }
 
-  const customUniformLocations: WebGLUniformLocation[] = [];
   if (program.customUniforms) {
     program.customUniforms.forEach((d, i) => {
       customUniformLocations[i] =
@@ -148,14 +196,8 @@ export function compileProgram<T extends Tensor, K extends Tensor>(
   }
 
   return {
-    program,
-    fragmentShader,
-    source,
-    webGLProgram,
     uniformLocations,
     customUniformLocations,
-    inShapeInfos,
-    outShapeInfo,
     infLoc,
     nanLoc,
     inShapesLocations,