sampathweb
diff --git a/‎.kokoro/github/ubuntu/gpu/build.sh‎
Lines changed: 2 additions & 0 deletions b/‎.kokoro/github/ubuntu/gpu/build.sh‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎keras_cv/models/feature_extractor/clip/clip_encoder.py‎
Lines changed: 6 additions & 37 deletions b/‎keras_cv/models/feature_extractor/clip/clip_encoder.py‎
Lines changed: 6 additions & 37 deletions
diff --git a/‎keras_cv/models/feature_extractor/clip/clip_image_model.py‎
Lines changed: 12 additions & 9 deletions b/‎keras_cv/models/feature_extractor/clip/clip_image_model.py‎
Lines changed: 12 additions & 9 deletions
@@ -69,6 +69,7 @@ then
       keras_cv/models/object_detection/yolo_v8 \
       keras_cv/models/object_detection_3d \
       keras_cv/models/segmentation \
+      keras_cv/models/feature_extractor/clip \
       keras_cv/models/stable_diffusion
 else
    pytest --cache-clear --check_gpu --run_large --durations 0 \
@@ -83,5 +84,6 @@ else
       keras_cv/models/object_detection/yolo_v8 \
       keras_cv/models/object_detection_3d \
       keras_cv/models/segmentation \
+      keras_cv/models/feature_extractor/clip \
       keras_cv/models/stable_diffusion
 fi
@@ -11,27 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import numpy as np
-
 from keras_cv.api_export import keras_cv_export
 from keras_cv.backend import keras
 from keras_cv.backend import ops
 
 
-def get_initializer(initializer_range=0.02):
-    """
-    Creates a `keras.initializers.TruncatedNormal` with the given range.
-
-    Args:
-        initializer_range (*float*, defaults to 0.02): Standard deviation of the
-        initializer range.
-
-    Returns:
-        `keras.initializers.TruncatedNormal`: The truncated normal initializer.
-    """
-    return keras.initializers.TruncatedNormal(stddev=initializer_range)
-
-
 @keras_cv_export("keras_cv.models.feature_extractor.QuickGELU")
 class QuickGELU(keras.layers.Layer):
     def __init__(self, **kwargs):
@@ -54,13 +38,6 @@ def __init__(
         self.proj_dim = proj_dim
         self.num_heads = num_heads
         self.num_hidden_layers = num_hidden_layers
-        self.fc_std = np.power(2 * self.proj_dim, -0.5) * 0.02
-
-        self.in_proj_std = (
-            np.power(self.proj_dim, -0.5)
-            * (np.power(2 * self.num_hidden_layers, -0.5))
-            * 0.02
-        )
         self.attn = CLIPAttention(
             self.proj_dim,
             self.num_heads,
@@ -156,9 +133,14 @@ def __init__(self, width, num_layers, heads, **kwargs):
         ]
 
     def build(self, input_shape):
-        super().build(input_shape)
         for block in self.resblocks:
             block.build(input_shape)
+        self.built = True
+
+    def compute_output_shape(self, input_shape):
+        for block in self.resblocks:
+            input_shape = block.compute_output_shape(input_shape)
+        return input_shape
 
     def call(
         self,
@@ -174,9 +156,6 @@ def call(
             )
         return x
 
-    def compute_output_shape(self, inputs_shape):
-        return inputs_shape
-
     def get_config(self):
         config = super().get_config()
         config.update(
@@ -213,30 +192,20 @@ def __init__(
             )
 
         self.scale = self.head_dim**-0.5
-        in_proj_std = (
-            (self.proj_dim**-0.5)
-            * ((2 * self.num_hidden_layers) ** -0.5)
-            * 0.02
-        )
-        out_proj_std = (self.proj_dim**-0.5) * 0.02
         self.q_proj = keras.layers.Dense(
             units=self.proj_dim,
-            kernel_initializer=get_initializer(in_proj_std),
             name="q_proj",
         )
         self.k_proj = keras.layers.Dense(
             units=self.proj_dim,
-            kernel_initializer=get_initializer(in_proj_std),
             name="k_proj",
         )
         self.v_proj = keras.layers.Dense(
             units=self.proj_dim,
-            kernel_initializer=get_initializer(in_proj_std),
             name="v_proj",
         )
         self.out_proj = keras.layers.Dense(
             units=self.proj_dim,
-            kernel_initializer=get_initializer(out_proj_std),
             name="out_proj",
         )
 
 
@@ -16,10 +16,8 @@
 from keras_cv.backend import keras
 from keras_cv.backend import ops
 from keras_cv.models.feature_extractor.clip.clip_encoder import CLIPEncoder
-from keras_cv.models.feature_extractor.clip.clip_encoder import get_initializer
 
 
-@keras_cv_export("keras_cv.models.feature_extractor.CLIPPatchingAndEmbedding")
 class CLIPPatchingAndEmbedding(keras.layers.Layer):
     def __init__(
         self, width, patch_size, input_resolution, output_dim, **kwargs
@@ -33,7 +31,6 @@ def __init__(
             padding="valid",
             use_bias=False,
             data_format="channels_last",
-            kernel_initializer=get_initializer(0.02),
             name="patch_embed.embedding",
         )
         self.width = width
@@ -42,17 +39,13 @@ def __init__(
         self.num_patches = ops.power(
             (self.input_resolution // self.patch_size), 2
         )
-        self.class_embedding_initializer = get_initializer(
-            ops.power(self.width, -0.5) * 0.02
-        )
         self.output_dim = output_dim
 
     def build(self, input_shape):
         super().build(input_shape)
         self.conv1.build(input_shape)
         self.class_embedding = self.add_weight(
             shape=((self.width,)),
-            initializer=self.class_embedding_initializer,
             name="patch_embed.class_embedding",
         )
 
@@ -67,6 +60,13 @@ def build(self, input_shape):
             name="patch_embed.positional_embedding",
         )
 
+    def compute_output_shape(self, input_shape):
+        return [
+            None,
+            (self.input_resolution // self.patch_size) ** 2 + 1,
+            self.width,
+        ]
+
     def call(self, x):
         batch_size = ops.shape(x)[0]
         patch_embeddings = self.conv1(x)  # shape = [*, grid, grid, channel]
@@ -143,12 +143,15 @@ def __init__(
         )
 
     def build(self, input_shape):
-        super().build(input_shape)
         self.embeddings.build(input_shape)
         self.pre_norm.build([None, None, self.width])
         self.encoder.build(None)
         self.post_norm.build([None, self.width])
-        self.image_projector.build([None, None, self.width])
+        self.image_projector.build([None, self.width])
+        self.built = True
+
+    def compute_output_shape(self, input_shape):
+        return [input_shape[0], self.output_dim]
 
     def call(self, image):
         x = self.embeddings(image)