From a4b968eac2d775edf2edddc684a187a570334e0f Mon Sep 17 00:00:00 2001
From: KAZUYUKI TANIMURA <ktanimura@apple.com>
Date: Tue, 18 Jun 2024 18:47:49 -0700
Subject: [PATCH] test: Enable Spark 4.0 tests (#537)

## Rationale for this change

To be ready for Spark 4.0

## What changes are included in this PR?

This PR enables the spark-4.0 tests with comet enabled except for the ones listed in https://github.com/apache/datafusion-comet/issues/551

## How are these changes tested?

ANSI is enabled for Spark-4.0
---
 .github/workflows/spark_sql_test_ansi.yml     |   27 +-
 .../org/apache/comet/parquet/BatchReader.java |    1 +
 .../java/org/apache/comet/parquet/Native.java |    1 +
 .../org/apache/comet/parquet/TypeUtil.java    |   19 +-
 .../java/org/apache/comet/parquet/Utils.java  |   11 +-
 .../apache/comet/shims/ShimFileFormat.scala   |   18 +
 .../apache/comet/shims/ShimFileFormat.scala   |    6 +-
 core/benches/parquet_read.rs                  |    2 +-
 core/src/errors.rs                            |   50 +-
 core/src/execution/operators/mod.rs           |    8 +
 core/src/jvm_bridge/mod.rs                    |    1 +
 core/src/parquet/mod.rs                       |    4 +-
 core/src/parquet/read/column.rs               |   34 +-
 core/src/parquet/read/values.rs               |   16 +
 core/src/parquet/util/jni.rs                  |    7 +-
 dev/diffs/4.0.0-preview1.diff                 | 2816 +++++++++++++++++
 pom.xml                                       |   17 +-
 .../comet/CometSparkSessionExtensions.scala   |    6 +-
 .../apache/comet/serde/QueryPlanSerde.scala   |    9 +-
 .../apache/comet/shims/CometExprShim.scala    |    4 +
 .../apache/comet/shims/CometExprShim.scala    |    6 +
 .../sql/comet/shims/ShimCometScanExec.scala   |   21 +-
 .../apache/comet/shims/CometExprShim.scala    |    6 +
 .../comet/parquet/ParquetReadSuite.scala      |    2 +-
 24 files changed, 3021 insertions(+), 71 deletions(-)
 create mode 100644 dev/diffs/4.0.0-preview1.diff

diff --git a/.github/workflows/spark_sql_test_ansi.yml b/.github/workflows/spark_sql_test_ansi.yml
index 337e59efe..34a393115 100644
--- a/.github/workflows/spark_sql_test_ansi.yml
+++ b/.github/workflows/spark_sql_test_ansi.yml
@@ -22,17 +22,15 @@ concurrency:
   cancel-in-progress: true
 
 on:
-  # enable the following once Ansi support is completed
-  #  push:
-  #    paths-ignore:
-  #      - "doc/**"
-  #      - "**.md"
-  #  pull_request:
-  #    paths-ignore:
-  #      - "doc/**"
-  #      - "**.md"
-
-  # manual trigger ONLY
+  push:
+    paths-ignore:
+      - "docs/**"
+      - "**.md"
+  pull_request:
+    paths-ignore:
+      - "docs/**"
+      - "**.md"
+  # manual trigger
   # https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
   workflow_dispatch:
 
@@ -44,8 +42,8 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest]
-        java-version: [11]
-        spark-version: [{short: '3.4', full: '3.4.2'}]
+        java-version: [17]
+        spark-version: [{short: '4.0', full: '4.0.0-preview1'}]
         module:
           - {name: "catalyst", args1: "catalyst/test", args2: ""}
           - {name: "sql/core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest}
@@ -75,7 +73,8 @@ jobs:
       - name: Run Spark tests
         run: |
           cd apache-spark
-          ENABLE_COMET=true ENABLE_COMET_ANSI_MODE=true build/sbt ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}"
+          rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups
+          RUST_BACKTRACE=1 ENABLE_COMET=true ENABLE_COMET_ANSI_MODE=true build/sbt ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}"
         env:
           LC_ALL: "C.UTF-8"
 
diff --git a/common/src/main/java/org/apache/comet/parquet/BatchReader.java b/common/src/main/java/org/apache/comet/parquet/BatchReader.java
index bf8e6e550..4b63f84ef 100644
--- a/common/src/main/java/org/apache/comet/parquet/BatchReader.java
+++ b/common/src/main/java/org/apache/comet/parquet/BatchReader.java
@@ -285,6 +285,7 @@ public void init() throws URISyntaxException, IOException {
     missingColumns = new boolean[columns.size()];
     List<String[]> paths = requestedSchema.getPaths();
     StructField[] nonPartitionFields = sparkSchema.fields();
+    ShimFileFormat.findRowIndexColumnIndexInSchema(sparkSchema);
     for (int i = 0; i < requestedSchema.getFieldCount(); i++) {
       Type t = requestedSchema.getFields().get(i);
       Preconditions.checkState(
diff --git a/common/src/main/java/org/apache/comet/parquet/Native.java b/common/src/main/java/org/apache/comet/parquet/Native.java
index 0887ae12f..b40e27e73 100644
--- a/common/src/main/java/org/apache/comet/parquet/Native.java
+++ b/common/src/main/java/org/apache/comet/parquet/Native.java
@@ -75,6 +75,7 @@ public static native long initColumnReader(
       int precision,
       int expectedPrecision,
       int scale,
+      int expectedScale,
       int tu,
       boolean isAdjustedUtc,
       int batchSize,
diff --git a/common/src/main/java/org/apache/comet/parquet/TypeUtil.java b/common/src/main/java/org/apache/comet/parquet/TypeUtil.java
index b8b7ff525..bfbb7d0d2 100644
--- a/common/src/main/java/org/apache/comet/parquet/TypeUtil.java
+++ b/common/src/main/java/org/apache/comet/parquet/TypeUtil.java
@@ -27,6 +27,7 @@
 import org.apache.parquet.schema.PrimitiveType;
 import org.apache.parquet.schema.Type;
 import org.apache.parquet.schema.Types;
+import org.apache.spark.package$;
 import org.apache.spark.sql.execution.datasources.SchemaColumnConvertNotSupportedException;
 import org.apache.spark.sql.types.*;
 
@@ -169,6 +170,7 @@ && isUnsignedIntTypeMatched(logicalTypeAnnotation, 64)) {
         break;
       case INT96:
         if (sparkType == TimestampNTZType$.MODULE$) {
+          if (isSpark40Plus()) return; // Spark 4.0+ supports Timestamp NTZ with INT96
           convertErrorForTimestampNTZ(typeName.name());
         } else if (sparkType == DataTypes.TimestampType) {
           return;
@@ -218,7 +220,8 @@ private static void validateTimestampType(
     // Throw an exception if the Parquet type is TimestampLTZ and the Catalyst type is TimestampNTZ.
     // This is to avoid mistakes in reading the timestamp values.
     if (((TimestampLogicalTypeAnnotation) logicalTypeAnnotation).isAdjustedToUTC()
-        && sparkType == TimestampNTZType$.MODULE$) {
+        && sparkType == TimestampNTZType$.MODULE$
+        && !isSpark40Plus()) {
       convertErrorForTimestampNTZ("int64 time(" + logicalTypeAnnotation + ")");
     }
   }
@@ -232,12 +235,14 @@ private static void convertErrorForTimestampNTZ(String parquetType) {
   }
 
   private static boolean canReadAsIntDecimal(ColumnDescriptor descriptor, DataType dt) {
-    if (!DecimalType.is32BitDecimalType(dt)) return false;
+    if (!DecimalType.is32BitDecimalType(dt) && !(isSpark40Plus() && dt instanceof DecimalType))
+      return false;
     return isDecimalTypeMatched(descriptor, dt);
   }
 
   private static boolean canReadAsLongDecimal(ColumnDescriptor descriptor, DataType dt) {
-    if (!DecimalType.is64BitDecimalType(dt)) return false;
+    if (!DecimalType.is64BitDecimalType(dt) && !(isSpark40Plus() && dt instanceof DecimalType))
+      return false;
     return isDecimalTypeMatched(descriptor, dt);
   }
 
@@ -261,7 +266,9 @@ private static boolean isDecimalTypeMatched(ColumnDescriptor descriptor, DataTyp
       DecimalLogicalTypeAnnotation decimalType = (DecimalLogicalTypeAnnotation) typeAnnotation;
       // It's OK if the required decimal precision is larger than or equal to the physical decimal
       // precision in the Parquet metadata, as long as the decimal scale is the same.
-      return decimalType.getPrecision() <= d.precision() && decimalType.getScale() == d.scale();
+      return decimalType.getPrecision() <= d.precision()
+          && (decimalType.getScale() == d.scale()
+              || (isSpark40Plus() && decimalType.getScale() <= d.scale()));
     }
     return false;
   }
@@ -278,4 +285,8 @@ private static boolean isUnsignedIntTypeMatched(
         && !((IntLogicalTypeAnnotation) logicalTypeAnnotation).isSigned()
         && ((IntLogicalTypeAnnotation) logicalTypeAnnotation).getBitWidth() == bitWidth;
   }
+
+  private static boolean isSpark40Plus() {
+    return package$.MODULE$.SPARK_VERSION().compareTo("4.0") >= 0;
+  }
 }
diff --git a/common/src/main/java/org/apache/comet/parquet/Utils.java b/common/src/main/java/org/apache/comet/parquet/Utils.java
index 99f3a4edd..2d4b83a67 100644
--- a/common/src/main/java/org/apache/comet/parquet/Utils.java
+++ b/common/src/main/java/org/apache/comet/parquet/Utils.java
@@ -115,7 +115,7 @@ public static long initColumnReader(
       promotionInfo = new TypePromotionInfo(readType);
     } else {
       // If type promotion is not enable, we'll just use the Parquet primitive type and precision.
-      promotionInfo = new TypePromotionInfo(primitiveTypeId, precision);
+      promotionInfo = new TypePromotionInfo(primitiveTypeId, precision, scale);
     }
 
     return Native.initColumnReader(
@@ -131,6 +131,7 @@ public static long initColumnReader(
         precision,
         promotionInfo.precision,
         scale,
+        promotionInfo.scale,
         tu,
         isAdjustedUtc,
         batchSize,
@@ -144,10 +145,13 @@ static class TypePromotionInfo {
     int physicalTypeId;
     // Decimal precision from the Spark read schema, or -1 if it's not decimal type.
     int precision;
+    // Decimal scale from the Spark read schema, or -1 if it's not decimal type.
+    int scale;
 
-    TypePromotionInfo(int physicalTypeId, int precision) {
+    TypePromotionInfo(int physicalTypeId, int precision, int scale) {
       this.physicalTypeId = physicalTypeId;
       this.precision = precision;
+      this.scale = scale;
     }
 
     TypePromotionInfo(DataType sparkReadType) {
@@ -159,13 +163,16 @@ static class TypePromotionInfo {
       int physicalTypeId = getPhysicalTypeId(primitiveType.getPrimitiveTypeName());
       LogicalTypeAnnotation annotation = primitiveType.getLogicalTypeAnnotation();
       int precision = -1;
+      int scale = -1;
       if (annotation instanceof LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) {
         LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalAnnotation =
             (LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) annotation;
         precision = decimalAnnotation.getPrecision();
+        scale = decimalAnnotation.getScale();
       }
       this.physicalTypeId = physicalTypeId;
       this.precision = precision;
+      this.scale = scale;
     }
   }
 
diff --git a/common/src/main/spark-3.x/org/apache/comet/shims/ShimFileFormat.scala b/common/src/main/spark-3.x/org/apache/comet/shims/ShimFileFormat.scala
index 685e8f566..c34c947b5 100644
--- a/common/src/main/spark-3.x/org/apache/comet/shims/ShimFileFormat.scala
+++ b/common/src/main/spark-3.x/org/apache/comet/shims/ShimFileFormat.scala
@@ -19,6 +19,8 @@
 
 package org.apache.comet.shims
 
+import org.apache.spark.sql.types.{LongType, StructField, StructType}
+
 object ShimFileFormat {
 
   // TODO: remove after dropping Spark 3.3 support and directly use FileFormat.ROW_INDEX
@@ -29,4 +31,20 @@ object ShimFileFormat {
   // TODO: remove after dropping Spark 3.3 support and directly use
   //       FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME
   val ROW_INDEX_TEMPORARY_COLUMN_NAME: String = s"_tmp_metadata_$ROW_INDEX"
+
+  // TODO: remove after dropping Spark 3.3 support and directly use
+  //       RowIndexUtil.findRowIndexColumnIndexInSchema
+  def findRowIndexColumnIndexInSchema(sparkSchema: StructType): Int = {
+    sparkSchema.fields.zipWithIndex.find { case (field: StructField, _: Int) =>
+      field.name == ShimFileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME
+    } match {
+      case Some((field: StructField, idx: Int)) =>
+        if (field.dataType != LongType) {
+          throw new RuntimeException(
+            s"${ShimFileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME} must be of LongType")
+        }
+        idx
+      case _ => -1
+    }
+  }
 }
diff --git a/common/src/main/spark-4.0/org/apache/comet/shims/ShimFileFormat.scala b/common/src/main/spark-4.0/org/apache/comet/shims/ShimFileFormat.scala
index 2f386869a..1702db135 100644
--- a/common/src/main/spark-4.0/org/apache/comet/shims/ShimFileFormat.scala
+++ b/common/src/main/spark-4.0/org/apache/comet/shims/ShimFileFormat.scala
@@ -19,13 +19,15 @@
 
 package org.apache.comet.shims
 
-import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
+import org.apache.spark.sql.execution.datasources.parquet.ParquetRowIndexUtil
+import org.apache.spark.sql.types.StructType
 
 object ShimFileFormat {
   // A name for a temporary column that holds row indexes computed by the file format reader
   // until they can be placed in the _metadata struct.
   val ROW_INDEX_TEMPORARY_COLUMN_NAME = ParquetFileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME
 
-  val OPTION_RETURNING_BATCH = FileFormat.OPTION_RETURNING_BATCH
+  def findRowIndexColumnIndexInSchema(sparkSchema: StructType): Int =
+    ParquetRowIndexUtil.findRowIndexColumnIndexInSchema(sparkSchema)
 }
diff --git a/core/benches/parquet_read.rs b/core/benches/parquet_read.rs
index 612d081c7..32463c077 100644
--- a/core/benches/parquet_read.rs
+++ b/core/benches/parquet_read.rs
@@ -54,7 +54,7 @@ fn bench(c: &mut Criterion) {
         );
         b.iter(|| {
             let cd = ColumnDescriptor::new(t.clone(), 0, 0, ColumnPath::from(Vec::new()));
-            let promition_info = TypePromotionInfo::new(PhysicalType::INT32, -1);
+            let promition_info = TypePromotionInfo::new(PhysicalType::INT32, -1, -1);
             let mut column_reader = TestColumnReader::new(
                 cd,
                 promition_info,
diff --git a/core/src/errors.rs b/core/src/errors.rs
index b38c5e90b..b6f4d0889 100644
--- a/core/src/errors.rs
+++ b/core/src/errors.rs
@@ -21,6 +21,7 @@ use arrow::error::ArrowError;
 use datafusion_common::DataFusionError;
 use jni::errors::{Exception, ToException};
 use regex::Regex;
+
 use std::{
     any::Any,
     convert,
@@ -37,6 +38,7 @@ use std::{
 use jni::sys::{jboolean, jbyte, jchar, jdouble, jfloat, jint, jlong, jobject, jshort};
 
 use crate::execution::operators::ExecutionError;
+use jni::objects::{GlobalRef, JThrowable};
 use jni::JNIEnv;
 use lazy_static::lazy_static;
 use parquet::errors::ParquetError;
@@ -160,7 +162,11 @@ pub enum CometError {
     },
 
     #[error("{class}: {msg}")]
-    JavaException { class: String, msg: String },
+    JavaException {
+        class: String,
+        msg: String,
+        throwable: GlobalRef,
+    },
 }
 
 pub fn init() {
@@ -208,6 +214,15 @@ impl From<CometError> for ExecutionError {
     fn from(value: CometError) -> Self {
         match value {
             CometError::Execution { source } => source,
+            CometError::JavaException {
+                class,
+                msg,
+                throwable,
+            } => ExecutionError::JavaException {
+                class,
+                msg,
+                throwable,
+            },
             _ => ExecutionError::GeneralError(value.to_string()),
         }
     }
@@ -379,17 +394,34 @@ pub fn unwrap_or_throw_default<T: JNIDefault>(
     }
 }
 
-fn throw_exception<E: ToException>(env: &mut JNIEnv, error: &E, backtrace: Option<String>) {
+fn throw_exception(env: &mut JNIEnv, error: &CometError, backtrace: Option<String>) {
     // If there isn't already an exception?
     if env.exception_check().is_ok() {
         // ... then throw new exception
-        let exception = error.to_exception();
-        match backtrace {
-            Some(backtrace_string) => env.throw_new(
-                exception.class,
-                to_stacktrace_string(exception.msg, backtrace_string).unwrap(),
-            ),
-            _ => env.throw_new(exception.class, exception.msg),
+        match error {
+            CometError::JavaException {
+                class: _,
+                msg: _,
+                throwable,
+            } => env.throw(<&JThrowable>::from(throwable.as_obj())),
+            CometError::Execution {
+                source:
+                    ExecutionError::JavaException {
+                        class: _,
+                        msg: _,
+                        throwable,
+                    },
+            } => env.throw(<&JThrowable>::from(throwable.as_obj())),
+            _ => {
+                let exception = error.to_exception();
+                match backtrace {
+                    Some(backtrace_string) => env.throw_new(
+                        exception.class,
+                        to_stacktrace_string(exception.msg, backtrace_string).unwrap(),
+                    ),
+                    _ => env.throw_new(exception.class, exception.msg),
+                }
+            }
         }
         .expect("Thrown exception")
     }
diff --git a/core/src/execution/operators/mod.rs b/core/src/execution/operators/mod.rs
index 13a0d9627..d0cc7ac68 100644
--- a/core/src/execution/operators/mod.rs
+++ b/core/src/execution/operators/mod.rs
@@ -25,6 +25,7 @@ use arrow::{
 
 use arrow::compute::{cast_with_options, CastOptions};
 use arrow_schema::ArrowError;
+use jni::objects::GlobalRef;
 use std::{fmt::Debug, sync::Arc};
 
 mod scan;
@@ -52,6 +53,13 @@ pub enum ExecutionError {
     /// DataFusion error
     #[error("Error from DataFusion: {0}.")]
     DataFusionError(String),
+
+    #[error("{class}: {msg}")]
+    JavaException {
+        class: String,
+        msg: String,
+        throwable: GlobalRef,
+    },
 }
 
 /// Copy an Arrow Array
diff --git a/core/src/jvm_bridge/mod.rs b/core/src/jvm_bridge/mod.rs
index 41376f03b..3f61c0324 100644
--- a/core/src/jvm_bridge/mod.rs
+++ b/core/src/jvm_bridge/mod.rs
@@ -385,5 +385,6 @@ pub(crate) fn convert_exception(
     Ok(CometError::JavaException {
         class: exception_class_name_str,
         msg: message_str,
+        throwable: env.new_global_ref(throwable)?,
     })
 }
diff --git a/core/src/parquet/mod.rs b/core/src/parquet/mod.rs
index 4f87d15de..e6acaa26b 100644
--- a/core/src/parquet/mod.rs
+++ b/core/src/parquet/mod.rs
@@ -72,6 +72,7 @@ pub extern "system" fn Java_org_apache_comet_parquet_Native_initColumnReader(
     precision: jint,
     read_precision: jint,
     scale: jint,
+    read_scale: jint,
     time_unit: jint,
     is_adjusted_utc: jboolean,
     batch_size: jint,
@@ -94,7 +95,8 @@ pub extern "system" fn Java_org_apache_comet_parquet_Native_initColumnReader(
             is_adjusted_utc,
             jni_path,
         )?;
-        let promotion_info = TypePromotionInfo::new_from_jni(read_primitive_type, read_precision);
+        let promotion_info =
+            TypePromotionInfo::new_from_jni(read_primitive_type, read_precision, read_scale);
         let ctx = Context {
             column_reader: ColumnReader::get(
                 desc,
diff --git a/core/src/parquet/read/column.rs b/core/src/parquet/read/column.rs
index 6fc73f93f..22bade6b3 100644
--- a/core/src/parquet/read/column.rs
+++ b/core/src/parquet/read/column.rs
@@ -132,11 +132,17 @@ impl ColumnReader {
                             (32, false) => typed_reader!(UInt32ColumnReader, Int64),
                             _ => unimplemented!("Unsupported INT32 annotation: {:?}", lt),
                         },
-                        LogicalType::Decimal { scale, precision } => {
-                            if use_decimal_128 {
+                        LogicalType::Decimal {
+                            scale,
+                            precision: _,
+                        } => {
+                            if use_decimal_128 || scale < &promotion_info.scale {
                                 typed_reader!(
                                     Int32DecimalColumnReader,
-                                    ArrowDataType::Decimal128(*precision as u8, *scale as i8)
+                                    ArrowDataType::Decimal128(
+                                        promotion_info.precision as u8,
+                                        promotion_info.scale as i8
+                                    )
                                 )
                             } else {
                                 typed_reader!(Int32ColumnReader, Int32)
@@ -168,11 +174,17 @@ impl ColumnReader {
                             ),
                             _ => panic!("Unsupported INT64 annotation: {:?}", lt),
                         },
-                        LogicalType::Decimal { scale, precision } => {
-                            if use_decimal_128 {
+                        LogicalType::Decimal {
+                            scale,
+                            precision: _,
+                        } => {
+                            if use_decimal_128 || scale < &promotion_info.scale {
                                 typed_reader!(
                                     Int64DecimalColumnReader,
-                                    ArrowDataType::Decimal128(*precision as u8, *scale as i8)
+                                    ArrowDataType::Decimal128(
+                                        promotion_info.precision as u8,
+                                        promotion_info.scale as i8
+                                    )
                                 )
                             } else {
                                 typed_reader!(Int64ColumnReader, Int64)
@@ -248,7 +260,10 @@ impl ColumnReader {
             PhysicalType::FIXED_LEN_BYTE_ARRAY => {
                 if let Some(logical_type) = desc.logical_type() {
                     match logical_type {
-                        LogicalType::Decimal { precision, scale } => {
+                        LogicalType::Decimal {
+                            precision,
+                            scale: _,
+                        } => {
                             if !use_decimal_128 && precision <= DECIMAL_MAX_INT_DIGITS {
                                 typed_reader!(FLBADecimal32ColumnReader, Int32)
                             } else if !use_decimal_128 && precision <= DECIMAL_MAX_LONG_DIGITS {
@@ -256,7 +271,10 @@ impl ColumnReader {
                             } else {
                                 typed_reader!(
                                     FLBADecimalColumnReader,
-                                    ArrowDataType::Decimal128(precision as u8, scale as i8)
+                                    ArrowDataType::Decimal128(
+                                        promotion_info.precision as u8,
+                                        promotion_info.scale as i8
+                                    )
                                 )
                             }
                         }
diff --git a/core/src/parquet/read/values.rs b/core/src/parquet/read/values.rs
index 7f1195fa9..ebed5f95b 100644
--- a/core/src/parquet/read/values.rs
+++ b/core/src/parquet/read/values.rs
@@ -28,6 +28,7 @@ use crate::{
     parquet::{data_type::*, read::DECIMAL_BYTE_WIDTH, ParquetMutableVector},
     unlikely,
 };
+use arrow::datatypes::DataType as ArrowDataType;
 
 pub fn get_decoder<T: DataType>(
     value_data: Buffer,
@@ -651,6 +652,12 @@ macro_rules! make_plain_decimal_impl {
 
                     debug_assert!(byte_width <= DECIMAL_BYTE_WIDTH);
 
+                    let src_scale = src.desc.type_scale() as u32;
+                    let dst_scale = match dst.arrow_type {
+                        ArrowDataType::Decimal128(_percision, scale) => scale as u32,
+                        _ => unreachable!()
+                    };
+
                     for _ in 0..num {
                         let s = &mut dst_data[dst_offset..];
 
@@ -674,6 +681,15 @@ macro_rules! make_plain_decimal_impl {
                             }
                         }
 
+                        if dst_scale > src_scale {
+                            let exp = dst_scale - src_scale;
+                            let mul = 10_i128.pow(exp);
+                            let v = s.as_mut_ptr() as *mut i128;
+                            unsafe {
+                                 v.write_unaligned(v.read_unaligned() * mul);
+                            }
+                        }
+
                         src_offset += byte_width;
                         dst_offset += DECIMAL_BYTE_WIDTH;
                     }
diff --git a/core/src/parquet/util/jni.rs b/core/src/parquet/util/jni.rs
index 62787213f..cde9fff0f 100644
--- a/core/src/parquet/util/jni.rs
+++ b/core/src/parquet/util/jni.rs
@@ -96,21 +96,24 @@ pub fn convert_encoding(ordinal: jint) -> Encoding {
 pub struct TypePromotionInfo {
     pub(crate) physical_type: PhysicalType,
     pub(crate) precision: i32,
+    pub(crate) scale: i32,
 }
 
 impl TypePromotionInfo {
-    pub fn new_from_jni(physical_type_id: jint, precision: jint) -> Self {
+    pub fn new_from_jni(physical_type_id: jint, precision: jint, scale: jint) -> Self {
         let physical_type = convert_physical_type(physical_type_id);
         Self {
             physical_type,
             precision,
+            scale,
         }
     }
 
-    pub fn new(physical_type: PhysicalType, precision: i32) -> Self {
+    pub fn new(physical_type: PhysicalType, precision: i32, scale: i32) -> Self {
         Self {
             physical_type,
             precision,
+            scale,
         }
     }
 }
diff --git a/dev/diffs/4.0.0-preview1.diff b/dev/diffs/4.0.0-preview1.diff
new file mode 100644
index 000000000..4031015df
--- /dev/null
+++ b/dev/diffs/4.0.0-preview1.diff
@@ -0,0 +1,2816 @@
+diff --git a/pom.xml b/pom.xml
+index a4b1b2c3c9f..a2315d2a95b 100644
+--- a/pom.xml
++++ b/pom.xml
+@@ -147,6 +147,8 @@
+     <chill.version>0.10.0</chill.version>
+     <ivy.version>2.5.2</ivy.version>
+     <oro.version>2.0.8</oro.version>
++    <spark.version.short>4.0</spark.version.short>
++    <comet.version>0.1.0-SNAPSHOT</comet.version>
+     <!--
+     If you change codahale.metrics.version, you also need to change
+     the link to metrics.dropwizard.io in docs/monitoring.md.
+@@ -2848,6 +2850,25 @@
+         <artifactId>arpack</artifactId>
+         <version>${netlib.ludovic.dev.version}</version>
+       </dependency>
++      <dependency>
++        <groupId>org.apache.comet</groupId>
++        <artifactId>comet-spark-spark${spark.version.short}_${scala.binary.version}</artifactId>
++        <version>${comet.version}</version>
++        <exclusions>
++          <exclusion>
++            <groupId>org.apache.spark</groupId>
++            <artifactId>spark-sql_${scala.binary.version}</artifactId>
++          </exclusion>
++          <exclusion>
++            <groupId>org.apache.spark</groupId>
++            <artifactId>spark-core_${scala.binary.version}</artifactId>
++          </exclusion>
++          <exclusion>
++            <groupId>org.apache.spark</groupId>
++            <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
++          </exclusion>
++        </exclusions>
++      </dependency>
+       <!-- SPARK-16484 add `datasketches-java` for support Datasketches HllSketch -->
+       <dependency>
+         <groupId>org.apache.datasketches</groupId>
+diff --git a/sql/core/pom.xml b/sql/core/pom.xml
+index 19f6303be36..31e1d27700f 100644
+--- a/sql/core/pom.xml
++++ b/sql/core/pom.xml
+@@ -77,6 +77,10 @@
+       <groupId>org.apache.spark</groupId>
+       <artifactId>spark-tags_${scala.binary.version}</artifactId>
+     </dependency>
++    <dependency>
++      <groupId>org.apache.comet</groupId>
++      <artifactId>comet-spark-spark${spark.version.short}_${scala.binary.version}</artifactId>
++    </dependency>
+ 
+     <!--
+       This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+index 466e4cf8131..7ead2715b5d 100644
+--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
++++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+@@ -1376,6 +1376,14 @@ object SparkSession extends Logging {
+     }
+   }
+ 
++  private def loadCometExtension(sparkContext: SparkContext): Seq[String] = {
++    if (sparkContext.getConf.getBoolean("spark.comet.enabled", false)) {
++      Seq("org.apache.comet.CometSparkSessionExtensions")
++    } else {
++      Seq.empty
++    }
++  }
++
+   /**
+    * Initialize extensions specified in [[StaticSQLConf]]. The classes will be applied to the
+    * extensions passed into this function.
+@@ -1385,7 +1393,8 @@ object SparkSession extends Logging {
+       extensions: SparkSessionExtensions): SparkSessionExtensions = {
+     val extensionConfClassNames = sparkContext.getConf.get(StaticSQLConf.SPARK_SESSION_EXTENSIONS)
+       .getOrElse(Seq.empty)
+-    extensionConfClassNames.foreach { extensionConfClassName =>
++    val extensionClassNames = extensionConfClassNames ++ loadCometExtension(sparkContext)
++    extensionClassNames.foreach { extensionConfClassName =>
+       try {
+         val extensionConfClass = Utils.classForName(extensionConfClassName)
+         val extensionConf = extensionConfClass.getConstructor().newInstance()
+diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
+index 7c45b02ee84..9f2b608c9f5 100644
+--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
++++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
+@@ -18,6 +18,7 @@
+ package org.apache.spark.sql.execution
+ 
+ import org.apache.spark.annotation.DeveloperApi
++import org.apache.spark.sql.comet.CometScanExec
+ import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, QueryStageExec}
+ import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
+ import org.apache.spark.sql.execution.exchange.ReusedExchangeExec
+@@ -67,6 +68,7 @@ private[execution] object SparkPlanInfo {
+     // dump the file scan metadata (e.g file path) to event log
+     val metadata = plan match {
+       case fileScan: FileSourceScanLike => fileScan.metadata
++      case cometScan: CometScanExec => cometScan.metadata
+       case _ => Map[String, String]()
+     }
+     new SparkPlanInfo(
+diff --git a/sql/core/src/test/resources/sql-tests/inputs/collations.sql b/sql/core/src/test/resources/sql-tests/inputs/collations.sql
+index 619eb4470e9..8465382a007 100644
+--- a/sql/core/src/test/resources/sql-tests/inputs/collations.sql
++++ b/sql/core/src/test/resources/sql-tests/inputs/collations.sql
+@@ -1,5 +1,8 @@
+ -- test cases for collation support
+ 
++-- TODO: https://github.com/apache/datafusion-comet/issues/551
++--SET spark.comet.enabled = false
++
+ -- Create a test table with data
+ create table t1(utf8_binary string collate utf8_binary, utf8_binary_lcase string collate utf8_binary_lcase) using parquet;
+ insert into t1 values('aaa', 'aaa');
+diff --git a/sql/core/src/test/resources/sql-tests/inputs/explain-aqe.sql b/sql/core/src/test/resources/sql-tests/inputs/explain-aqe.sql
+index 7aef901da4f..f3d6e18926d 100644
+--- a/sql/core/src/test/resources/sql-tests/inputs/explain-aqe.sql
++++ b/sql/core/src/test/resources/sql-tests/inputs/explain-aqe.sql
+@@ -2,3 +2,4 @@
+ 
+ --SET spark.sql.adaptive.enabled=true
+ --SET spark.sql.maxMetadataStringLength = 500
++--SET spark.comet.enabled = false
+diff --git a/sql/core/src/test/resources/sql-tests/inputs/explain-cbo.sql b/sql/core/src/test/resources/sql-tests/inputs/explain-cbo.sql
+index eeb2180f7a5..afd1b5ec289 100644
+--- a/sql/core/src/test/resources/sql-tests/inputs/explain-cbo.sql
++++ b/sql/core/src/test/resources/sql-tests/inputs/explain-cbo.sql
+@@ -1,5 +1,6 @@
+ --SET spark.sql.cbo.enabled=true
+ --SET spark.sql.maxMetadataStringLength = 500
++--SET spark.comet.enabled = false
+ 
+ CREATE TABLE explain_temp1(a INT, b INT) USING PARQUET;
+ CREATE TABLE explain_temp2(c INT, d INT) USING PARQUET;
+diff --git a/sql/core/src/test/resources/sql-tests/inputs/explain.sql b/sql/core/src/test/resources/sql-tests/inputs/explain.sql
+index 698ca009b4f..57d774a3617 100644
+--- a/sql/core/src/test/resources/sql-tests/inputs/explain.sql
++++ b/sql/core/src/test/resources/sql-tests/inputs/explain.sql
+@@ -1,6 +1,7 @@
+ --SET spark.sql.codegen.wholeStage = true
+ --SET spark.sql.adaptive.enabled = false
+ --SET spark.sql.maxMetadataStringLength = 500
++--SET spark.comet.enabled = false
+ 
+ -- Test tables
+ CREATE table  explain_temp1 (key int, val int) USING PARQUET;
+diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part1.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part1.sql
+index 1152d77da0c..f77493f690b 100644
+--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part1.sql
++++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part1.sql
+@@ -7,6 +7,9 @@
+ 
+ -- avoid bit-exact output here because operations may not be bit-exact.
+ -- SET extra_float_digits = 0;
++-- Disable Comet exec due to floating point precision difference
++--SET spark.comet.exec.enabled = false
++
+ 
+ -- Test aggregate operator with codegen on and off.
+ --CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part3.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part3.sql
+index 41fd4de2a09..44cd244d3b0 100644
+--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part3.sql
++++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part3.sql
+@@ -5,6 +5,9 @@
+ -- AGGREGATES [Part 3]
+ -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/aggregates.sql#L352-L605
+ 
++-- Disable Comet exec due to floating point precision difference
++--SET spark.comet.exec.enabled = false
++
+ -- Test aggregate operator with codegen on and off.
+ --CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+ --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/float8.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/float8.sql
+index 932cdb95fcf..bbafaadb960 100644
+--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/float8.sql
++++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/float8.sql
+@@ -5,6 +5,9 @@
+ -- FLOAT8
+ -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/float8.sql
+ 
++-- TODO: https://github.com/apache/datafusion-comet/issues/551
++--SET spark.comet.enabled = false
++
+ CREATE TABLE FLOAT8_TBL(f1 double) USING parquet;
+ 
+ -- PostgreSQL implicitly casts string literals to data with floating point types, but
+diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/groupingsets.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/groupingsets.sql
+index f06b0276b00..1f39a1b3da8 100644
+--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/groupingsets.sql
++++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/groupingsets.sql
+@@ -5,6 +5,9 @@
+ 
+ -- test data sources
+ 
++-- TODO: https://github.com/apache/datafusion-comet/issues/551
++--SET spark.comet.enabled = false
++
+ create temp view gstest1(a,b,v)
+   as values (1,1,10),(1,1,11),(1,2,12),(1,2,13),(1,3,14),
+             (2,3,15),
+diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int4.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int4.sql
+index 3a409eea348..26e9aaf215c 100644
+--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int4.sql
++++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int4.sql
+@@ -6,6 +6,9 @@
+ -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/int4.sql
+ --
+ 
++-- TODO: https://github.com/apache/datafusion-comet/issues/551
++--SET spark.comet.enabled = false
++
+ CREATE TABLE INT4_TBL(f1 int) USING parquet;
+ 
+ -- [SPARK-28023] Trim the string when cast string type to other types
+diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int8.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int8.sql
+index fac23b4a26f..2b73732c33f 100644
+--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int8.sql
++++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int8.sql
+@@ -1,6 +1,10 @@
+ --
+ -- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ --
++
++-- Disable Comet exec due to floating point precision difference
++--SET spark.comet.exec.enabled = false
++
+ --
+ -- INT8
+ -- Test int8 64-bit integers.
+diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/select_having.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/select_having.sql
+index 0efe0877e9b..423d3b3d76d 100644
+--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/select_having.sql
++++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/select_having.sql
+@@ -1,6 +1,10 @@
+ --
+ -- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ --
++
++-- Disable Comet exec due to floating point precision difference
++--SET spark.comet.exec.enabled = false
++
+ --
+ -- SELECT_HAVING
+ -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/select_having.sql
+diff --git a/sql/core/src/test/resources/sql-tests/inputs/view-schema-binding-config.sql b/sql/core/src/test/resources/sql-tests/inputs/view-schema-binding-config.sql
+index e803254ea64..74db78aee38 100644
+--- a/sql/core/src/test/resources/sql-tests/inputs/view-schema-binding-config.sql
++++ b/sql/core/src/test/resources/sql-tests/inputs/view-schema-binding-config.sql
+@@ -1,6 +1,9 @@
+ -- This test suits check the spark.sql.viewSchemaBindingMode configuration.
+ -- It can be DISABLED and COMPENSATION
+ 
++-- TODO: https://github.com/apache/datafusion-comet/issues/551
++--SET spark.comet.enabled = false
++
+ -- Verify the default binding is true
+ SET spark.sql.legacy.viewSchemaBindingMode;
+ 
+diff --git a/sql/core/src/test/resources/sql-tests/inputs/view-schema-compensation.sql b/sql/core/src/test/resources/sql-tests/inputs/view-schema-compensation.sql
+index 21a3ce1e122..316788b2989 100644
+--- a/sql/core/src/test/resources/sql-tests/inputs/view-schema-compensation.sql
++++ b/sql/core/src/test/resources/sql-tests/inputs/view-schema-compensation.sql
+@@ -1,4 +1,8 @@
+ -- This test suite checks the WITH SCHEMA COMPENSATION clause
++
++-- TODO: https://github.com/apache/datafusion-comet/issues/551
++--SET spark.comet.enabled = false
++
+ -- Disable ANSI mode to ensure we are forcing it explicitly in the CASTS
+ SET spark.sql.ansi.enabled = false;
+ 
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+index d023fb82185..3774fa99148 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+@@ -38,7 +38,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeConstants
+ import org.apache.spark.sql.execution.{ColumnarToRowExec, ExecSubqueryExpression, RDDScanExec, SparkPlan, SparkPlanInfo}
+ import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, AQEPropagateEmptyRelation}
+ import org.apache.spark.sql.execution.columnar._
+-import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
++import org.apache.spark.sql.execution.exchange.ShuffleExchangeLike
+ import org.apache.spark.sql.execution.ui.SparkListenerSQLAdaptiveExecutionUpdate
+ import org.apache.spark.sql.functions._
+ import org.apache.spark.sql.internal.SQLConf
+@@ -519,7 +519,8 @@ class CachedTableSuite extends QueryTest with SQLTestUtils
+       df.collect()
+     }
+     assert(
+-      collect(df.queryExecution.executedPlan) { case e: ShuffleExchangeExec => e }.size == expected)
++      collect(df.queryExecution.executedPlan) {
++        case _: ShuffleExchangeLike => 1 }.size == expected)
+   }
+ 
+   test("A cached table preserves the partitioning and ordering of its cached SparkPlan") {
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+index 620ee430cab..9d383a4bff9 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.util.AUTO_GENERATED_ALIAS
+ import org.apache.spark.sql.execution.WholeStageCodegenExec
+ import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+ import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec}
+-import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
++import org.apache.spark.sql.execution.exchange.ShuffleExchangeLike
+ import org.apache.spark.sql.expressions.Window
+ import org.apache.spark.sql.functions._
+ import org.apache.spark.sql.internal.SQLConf
+@@ -813,7 +813,7 @@ class DataFrameAggregateSuite extends QueryTest
+       assert(objHashAggPlans.nonEmpty)
+ 
+       val exchangePlans = collect(aggPlan) {
+-        case shuffle: ShuffleExchangeExec => shuffle
++        case shuffle: ShuffleExchangeLike => shuffle
+       }
+       assert(exchangePlans.length == 1)
+     }
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+index f6fd6b501d7..11870c85d82 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+@@ -435,7 +435,9 @@ class DataFrameJoinSuite extends QueryTest
+ 
+     withTempDatabase { dbName =>
+       withTable(table1Name, table2Name) {
+-        withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
++        withSQLConf(
++            SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
++            "spark.comet.enabled" -> "false") {
+           spark.range(50).write.saveAsTable(s"$dbName.$table1Name")
+           spark.range(100).write.saveAsTable(s"$dbName.$table2Name")
+ 
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+index 760ee802608..db4dc90475e 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+@@ -40,7 +40,7 @@ import org.apache.spark.sql.connector.FakeV2Provider
+ import org.apache.spark.sql.execution.{FilterExec, LogicalRDD, QueryExecution, SortExec, WholeStageCodegenExec}
+ import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+ import org.apache.spark.sql.execution.aggregate.HashAggregateExec
+-import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ReusedExchangeExec, ShuffleExchangeExec, ShuffleExchangeLike}
++import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ReusedExchangeExec, ShuffleExchangeLike}
+ import org.apache.spark.sql.expressions.{Aggregator, Window}
+ import org.apache.spark.sql.functions._
+ import org.apache.spark.sql.internal.SQLConf
+@@ -1401,7 +1401,7 @@ class DataFrameSuite extends QueryTest
+           fail("Should not have back to back Aggregates")
+         }
+         atFirstAgg = true
+-      case e: ShuffleExchangeExec => atFirstAgg = false
++      case e: ShuffleExchangeLike => atFirstAgg = false
+       case _ =>
+     }
+   }
+@@ -1591,7 +1591,7 @@ class DataFrameSuite extends QueryTest
+       checkAnswer(join, df)
+       assert(
+         collect(join.queryExecution.executedPlan) {
+-          case e: ShuffleExchangeExec => true }.size === 1)
++          case _: ShuffleExchangeLike => true }.size === 1)
+       assert(
+         collect(join.queryExecution.executedPlan) { case e: ReusedExchangeExec => true }.size === 1)
+       val broadcasted = broadcast(join)
+@@ -1599,7 +1599,7 @@ class DataFrameSuite extends QueryTest
+       checkAnswer(join2, df)
+       assert(
+         collect(join2.queryExecution.executedPlan) {
+-          case e: ShuffleExchangeExec => true }.size == 1)
++          case _: ShuffleExchangeLike => true }.size == 1)
+       assert(
+         collect(join2.queryExecution.executedPlan) {
+           case e: BroadcastExchangeExec => true }.size === 1)
+@@ -2000,7 +2000,7 @@ class DataFrameSuite extends QueryTest
+ 
+     // Assert that no extra shuffle introduced by cogroup.
+     val exchanges = collect(df3.queryExecution.executedPlan) {
+-      case h: ShuffleExchangeExec => h
++      case h: ShuffleExchangeLike => h
+     }
+     assert(exchanges.size == 2)
+   }
+@@ -2299,7 +2299,8 @@ class DataFrameSuite extends QueryTest
+     assert(df2.isLocal)
+   }
+ 
+-  test("SPARK-35886: PromotePrecision should be subexpr replaced") {
++  test("SPARK-35886: PromotePrecision should be subexpr replaced",
++    IgnoreComet("TODO: fix Comet for this test")) {
+     withTable("tbl") {
+       sql(
+         """
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+index 16a493b5290..3f0b70e2d59 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+@@ -42,7 +42,7 @@ import org.apache.spark.sql.catalyst.trees.DataFrameQueryContext
+ import org.apache.spark.sql.catalyst.util.sideBySide
+ import org.apache.spark.sql.execution.{LogicalRDD, RDDScanExec, SQLExecution}
+ import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+-import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ShuffleExchangeExec}
++import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ShuffleExchangeExec, ShuffleExchangeLike}
+ import org.apache.spark.sql.execution.streaming.MemoryStream
+ import org.apache.spark.sql.expressions.UserDefinedFunction
+ import org.apache.spark.sql.functions._
+@@ -2360,7 +2360,7 @@ class DatasetSuite extends QueryTest
+ 
+     // Assert that no extra shuffle introduced by cogroup.
+     val exchanges = collect(df3.queryExecution.executedPlan) {
+-      case h: ShuffleExchangeExec => h
++      case h: ShuffleExchangeLike => h
+     }
+     assert(exchanges.size == 2)
+   }
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
+index 2c24cc7d570..50a2ce86117 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
+@@ -22,6 +22,7 @@ import org.scalatest.GivenWhenThen
+ import org.apache.spark.sql.catalyst.expressions.{DynamicPruningExpression, Expression}
+ import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode._
+ import org.apache.spark.sql.catalyst.plans.ExistenceJoin
++import org.apache.spark.sql.comet.CometScanExec
+ import org.apache.spark.sql.connector.catalog.{InMemoryTableCatalog, InMemoryTableWithV2FilterCatalog}
+ import org.apache.spark.sql.execution._
+ import org.apache.spark.sql.execution.adaptive._
+@@ -262,6 +263,9 @@ abstract class DynamicPartitionPruningSuiteBase
+       case s: BatchScanExec => s.runtimeFilters.collect {
+         case d: DynamicPruningExpression => d.child
+       }
++      case s: CometScanExec => s.partitionFilters.collect {
++        case d: DynamicPruningExpression => d.child
++      }
+       case _ => Nil
+     }
+   }
+@@ -665,7 +669,8 @@ abstract class DynamicPartitionPruningSuiteBase
+     }
+   }
+ 
+-  test("partition pruning in broadcast hash joins with aliases") {
++  test("partition pruning in broadcast hash joins with aliases",
++    IgnoreComet("TODO: https://github.com/apache/datafusion-comet/issues/551")) {
+     Given("alias with simple join condition, using attribute names only")
+     withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") {
+       val df = sql(
+@@ -755,7 +760,8 @@ abstract class DynamicPartitionPruningSuiteBase
+     }
+   }
+ 
+-  test("partition pruning in broadcast hash joins") {
++  test("partition pruning in broadcast hash joins",
++    IgnoreComet("TODO: https://github.com/apache/datafusion-comet/issues/551")) {
+     Given("disable broadcast pruning and disable subquery duplication")
+     withSQLConf(
+       SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true",
+@@ -990,7 +996,8 @@ abstract class DynamicPartitionPruningSuiteBase
+     }
+   }
+ 
+-  test("different broadcast subqueries with identical children") {
++  test("different broadcast subqueries with identical children",
++    IgnoreComet("TODO: https://github.com/apache/datafusion-comet/issues/551")) {
+     withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") {
+       withTable("fact", "dim") {
+         spark.range(100).select(
+@@ -1187,7 +1194,8 @@ abstract class DynamicPartitionPruningSuiteBase
+     }
+   }
+ 
+-  test("Make sure dynamic pruning works on uncorrelated queries") {
++  test("Make sure dynamic pruning works on uncorrelated queries",
++    IgnoreComet("TODO: Support SubqueryBroadcastExec in Comet: #242")) {
+     withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") {
+       val df = sql(
+         """
+@@ -1238,7 +1246,8 @@ abstract class DynamicPartitionPruningSuiteBase
+     }
+   }
+ 
+-  test("Plan broadcast pruning only when the broadcast can be reused") {
++  test("Plan broadcast pruning only when the broadcast can be reused",
++    IgnoreComet("TODO: Support SubqueryBroadcastExec in Comet: #242")) {
+     Given("dynamic pruning filter on the build side")
+     withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") {
+       val df = sql(
+@@ -1311,7 +1320,8 @@ abstract class DynamicPartitionPruningSuiteBase
+     }
+   }
+ 
+-  test("SPARK-32817: DPP throws error when the broadcast side is empty") {
++  test("SPARK-32817: DPP throws error when the broadcast side is empty",
++    IgnoreComet("TODO: https://github.com/apache/datafusion-comet/issues/551")) {
+     withSQLConf(
+       SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true",
+       SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true",
+@@ -1471,7 +1481,8 @@ abstract class DynamicPartitionPruningSuiteBase
+     checkAnswer(df, Row(3, 2) :: Row(3, 2) :: Row(3, 2) :: Row(3, 2) :: Nil)
+   }
+ 
+-  test("SPARK-36444: Remove OptimizeSubqueries from batch of PartitionPruning") {
++  test("SPARK-36444: Remove OptimizeSubqueries from batch of PartitionPruning",
++    IgnoreComet("TODO: https://github.com/apache/datafusion-comet/issues/551")) {
+     withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true") {
+       val df = sql(
+         """
+@@ -1486,7 +1497,7 @@ abstract class DynamicPartitionPruningSuiteBase
+   }
+ 
+   test("SPARK-38148: Do not add dynamic partition pruning if there exists static partition " +
+-    "pruning") {
++    "pruning", IgnoreComet("TODO: Support SubqueryBroadcastExec in Comet: #242")) {
+     withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true") {
+       Seq(
+         "f.store_id = 1" -> false,
+@@ -1558,7 +1569,8 @@ abstract class DynamicPartitionPruningSuiteBase
+     }
+   }
+ 
+-  test("SPARK-38674: Remove useless deduplicate in SubqueryBroadcastExec") {
++  test("SPARK-38674: Remove useless deduplicate in SubqueryBroadcastExec",
++    IgnoreComet("TODO: https://github.com/apache/datafusion-comet/issues/551")) {
+     withTable("duplicate_keys") {
+       withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true") {
+         Seq[(Int, String)]((1, "NL"), (1, "NL"), (3, "US"), (3, "US"), (3, "US"))
+@@ -1589,7 +1601,8 @@ abstract class DynamicPartitionPruningSuiteBase
+     }
+   }
+ 
+-  test("SPARK-39338: Remove dynamic pruning subquery if pruningKey's references is empty") {
++  test("SPARK-39338: Remove dynamic pruning subquery if pruningKey's references is empty",
++    IgnoreComet("TODO: https://github.com/apache/datafusion-comet/issues/551")) {
+     withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true") {
+       val df = sql(
+         """
+@@ -1618,7 +1631,8 @@ abstract class DynamicPartitionPruningSuiteBase
+     }
+   }
+ 
+-  test("SPARK-39217: Makes DPP support the pruning side has Union") {
++  test("SPARK-39217: Makes DPP support the pruning side has Union",
++    IgnoreComet("TODO: https://github.com/apache/datafusion-comet/issues/551")) {
+     withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true") {
+       val df = sql(
+         """
+@@ -1730,6 +1744,8 @@ abstract class DynamicPartitionPruningV1Suite extends DynamicPartitionPruningDat
+               case s: BatchScanExec =>
+                 // we use f1 col for v2 tables due to schema pruning
+                 s.output.exists(_.exists(_.argString(maxFields = 100).contains("f1")))
++              case s: CometScanExec =>
++                s.output.exists(_.exists(_.argString(maxFields = 100).contains("fid")))
+               case _ => false
+             }
+           assert(scanOption.isDefined)
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
+index b2aaaceb26a..625522f36ae 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
+@@ -467,7 +467,8 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
+     }
+   }
+ 
+-  test("Explain formatted output for scan operator for datasource V2") {
++  test("Explain formatted output for scan operator for datasource V2",
++      IgnoreComet("Comet explain output is different")) {
+     withTempDir { dir =>
+       Seq("parquet", "orc", "csv", "json").foreach { fmt =>
+         val basePath = dir.getCanonicalPath + "/" + fmt
+@@ -545,7 +546,9 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
+   }
+ }
+ 
+-class ExplainSuiteAE extends ExplainSuiteHelper with EnableAdaptiveExecutionSuite {
++// Ignored when Comet is enabled. Comet changes expected query plans.
++class ExplainSuiteAE extends ExplainSuiteHelper with EnableAdaptiveExecutionSuite
++    with IgnoreCometSuite {
+   import testImplicits._
+ 
+   test("SPARK-35884: Explain Formatted") {
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+index 49a33d1c925..9a540abd0c2 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+@@ -33,6 +33,7 @@ import org.apache.spark.sql.catalyst.expressions.{AttributeReference, GreaterTha
+ import org.apache.spark.sql.catalyst.expressions.IntegralLiteralTestUtils.{negativeInt, positiveInt}
+ import org.apache.spark.sql.catalyst.plans.logical.Filter
+ import org.apache.spark.sql.catalyst.types.DataTypeUtils
++import org.apache.spark.sql.comet.{CometBatchScanExec, CometScanExec, CometSortMergeJoinExec}
+ import org.apache.spark.sql.execution.{ExplainMode, FileSourceScanLike, SimpleMode}
+ import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+ import org.apache.spark.sql.execution.datasources.FilePartition
+@@ -951,6 +952,7 @@ class FileBasedDataSourceSuite extends QueryTest
+             assert(bJoinExec.isEmpty)
+             val smJoinExec = collect(joinedDF.queryExecution.executedPlan) {
+               case smJoin: SortMergeJoinExec => smJoin
++              case smJoin: CometSortMergeJoinExec => smJoin
+             }
+             assert(smJoinExec.nonEmpty)
+           }
+@@ -1011,6 +1013,7 @@ class FileBasedDataSourceSuite extends QueryTest
+ 
+           val fileScan = df.queryExecution.executedPlan collectFirst {
+             case BatchScanExec(_, f: FileScan, _, _, _, _) => f
++            case CometBatchScanExec(BatchScanExec(_, f: FileScan, _, _, _, _), _) => f
+           }
+           assert(fileScan.nonEmpty)
+           assert(fileScan.get.partitionFilters.nonEmpty)
+@@ -1052,6 +1055,7 @@ class FileBasedDataSourceSuite extends QueryTest
+ 
+           val fileScan = df.queryExecution.executedPlan collectFirst {
+             case BatchScanExec(_, f: FileScan, _, _, _, _) => f
++            case CometBatchScanExec(BatchScanExec(_, f: FileScan, _, _, _, _), _) => f
+           }
+           assert(fileScan.nonEmpty)
+           assert(fileScan.get.partitionFilters.isEmpty)
+@@ -1236,6 +1240,8 @@ class FileBasedDataSourceSuite extends QueryTest
+           val filters = df.queryExecution.executedPlan.collect {
+             case f: FileSourceScanLike => f.dataFilters
+             case b: BatchScanExec => b.scan.asInstanceOf[FileScan].dataFilters
++            case b: CometScanExec => b.dataFilters
++            case b: CometBatchScanExec => b.scan.asInstanceOf[FileScan].dataFilters
+           }.flatten
+           assert(filters.contains(GreaterThan(scan.logicalPlan.output.head, Literal(5L))))
+         }
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/IgnoreComet.scala b/sql/core/src/test/scala/org/apache/spark/sql/IgnoreComet.scala
+new file mode 100644
+index 00000000000..4b31bea33de
+--- /dev/null
++++ b/sql/core/src/test/scala/org/apache/spark/sql/IgnoreComet.scala
+@@ -0,0 +1,42 @@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one or more
++ * contributor license agreements.  See the NOTICE file distributed with
++ * this work for additional information regarding copyright ownership.
++ * The ASF licenses this file to You under the Apache License, Version 2.0
++ * (the "License"); you may not use this file except in compliance with
++ * the License.  You may obtain a copy of the License at
++ *
++ *    http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing, software
++ * distributed under the License is distributed on an "AS IS" BASIS,
++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
++ * See the License for the specific language governing permissions and
++ * limitations under the License.
++ */
++
++package org.apache.spark.sql
++
++import org.scalactic.source.Position
++import org.scalatest.Tag
++
++import org.apache.spark.sql.test.SQLTestUtils
++
++/**
++ * Tests with this tag will be ignored when Comet is enabled (e.g., via `ENABLE_COMET`).
++ */
++case class IgnoreComet(reason: String) extends Tag("DisableComet")
++
++/**
++ * Helper trait that disables Comet for all tests regardless of default config values.
++ */
++trait IgnoreCometSuite extends SQLTestUtils {
++  override protected def test(testName: String, testTags: Tag*)(testFun: => Any)
++    (implicit pos: Position): Unit = {
++    if (isCometEnabled) {
++      ignore(testName + " (disabled when Comet is on)", testTags: _*)(testFun)
++    } else {
++      super.test(testName, testTags: _*)(testFun)
++    }
++  }
++}
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala
+index 53e47f428c3..a55d8f0c161 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala
+@@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide
+ import org.apache.spark.sql.catalyst.plans.PlanTest
+ import org.apache.spark.sql.catalyst.plans.logical._
+ import org.apache.spark.sql.catalyst.rules.RuleExecutor
++import org.apache.spark.sql.comet.{CometHashJoinExec, CometSortMergeJoinExec}
+ import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+ import org.apache.spark.sql.execution.joins._
+ import org.apache.spark.sql.internal.SQLConf
+@@ -362,6 +363,7 @@ class JoinHintSuite extends PlanTest with SharedSparkSession with AdaptiveSparkP
+     val executedPlan = df.queryExecution.executedPlan
+     val shuffleHashJoins = collect(executedPlan) {
+       case s: ShuffledHashJoinExec => s
++      case c: CometHashJoinExec => c.originalPlan.asInstanceOf[ShuffledHashJoinExec]
+     }
+     assert(shuffleHashJoins.size == 1)
+     assert(shuffleHashJoins.head.buildSide == buildSide)
+@@ -371,6 +373,7 @@ class JoinHintSuite extends PlanTest with SharedSparkSession with AdaptiveSparkP
+     val executedPlan = df.queryExecution.executedPlan
+     val shuffleMergeJoins = collect(executedPlan) {
+       case s: SortMergeJoinExec => s
++      case c: CometSortMergeJoinExec => c
+     }
+     assert(shuffleMergeJoins.size == 1)
+   }
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+index fcb937d82ba..f519436ba50 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+@@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
+ import org.apache.spark.sql.catalyst.expressions.{Ascending, GenericRow, SortOrder}
+ import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, JoinSelectionHelper}
+ import org.apache.spark.sql.catalyst.plans.logical.{Filter, HintInfo, Join, JoinHint, NO_BROADCAST_AND_REPLICATION}
++import org.apache.spark.sql.comet._
+ import org.apache.spark.sql.execution.{BinaryExecNode, FilterExec, ProjectExec, SortExec, SparkPlan, WholeStageCodegenExec}
+ import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+ import org.apache.spark.sql.execution.exchange.{ShuffleExchangeExec, ShuffleExchangeLike}
+@@ -805,7 +806,8 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
+     }
+   }
+ 
+-  test("test SortMergeJoin (with spill)") {
++  test("test SortMergeJoin (with spill)",
++      IgnoreComet("TODO: Comet SMJ doesn't support spill yet")) {
+     withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "1",
+       SQLConf.SORT_MERGE_JOIN_EXEC_BUFFER_IN_MEMORY_THRESHOLD.key -> "0",
+       SQLConf.SORT_MERGE_JOIN_EXEC_BUFFER_SPILL_THRESHOLD.key -> "1") {
+@@ -1180,9 +1182,11 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
+       val plan = df1.join(df2.hint("SHUFFLE_HASH"), $"k1" === $"k2", joinType)
+         .groupBy($"k1").count()
+         .queryExecution.executedPlan
+-      assert(collect(plan) { case _: ShuffledHashJoinExec => true }.size === 1)
++      assert(collect(plan) {
++        case _: ShuffledHashJoinExec | _: CometHashJoinExec => true }.size === 1)
+       // No extra shuffle before aggregate
+-      assert(collect(plan) { case _: ShuffleExchangeExec => true }.size === 2)
++      assert(collect(plan) {
++        case _: ShuffleExchangeLike => true }.size === 2)
+     })
+   }
+ 
+@@ -1199,10 +1203,11 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
+         .join(df4.hint("SHUFFLE_MERGE"), $"k1" === $"k4", joinType)
+         .queryExecution
+         .executedPlan
+-      assert(collect(plan) { case _: SortMergeJoinExec => true }.size === 2)
++      assert(collect(plan) {
++        case _: SortMergeJoinExec | _: CometSortMergeJoinExec => true }.size === 2)
+       assert(collect(plan) { case _: BroadcastHashJoinExec => true }.size === 1)
+       // No extra sort before last sort merge join
+-      assert(collect(plan) { case _: SortExec => true }.size === 3)
++      assert(collect(plan) { case _: SortExec | _: CometSortExec => true }.size === 3)
+     })
+ 
+     // Test shuffled hash join
+@@ -1212,10 +1217,13 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
+         .join(df4.hint("SHUFFLE_MERGE"), $"k1" === $"k4", joinType)
+         .queryExecution
+         .executedPlan
+-      assert(collect(plan) { case _: SortMergeJoinExec => true }.size === 2)
+-      assert(collect(plan) { case _: ShuffledHashJoinExec => true }.size === 1)
++      assert(collect(plan) {
++        case _: SortMergeJoinExec | _: CometSortMergeJoinExec => true }.size === 2)
++      assert(collect(plan) {
++        case _: ShuffledHashJoinExec | _: CometHashJoinExec => true }.size === 1)
+       // No extra sort before last sort merge join
+-      assert(collect(plan) { case _: SortExec => true }.size === 3)
++      assert(collect(plan) {
++        case _: SortExec | _: CometSortExec => true }.size === 3)
+     })
+   }
+ 
+@@ -1306,12 +1314,12 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
+     inputDFs.foreach { case (df1, df2, joinExprs) =>
+       val smjDF = df1.join(df2.hint("SHUFFLE_MERGE"), joinExprs, "full")
+       assert(collect(smjDF.queryExecution.executedPlan) {
+-        case _: SortMergeJoinExec => true }.size === 1)
++        case _: SortMergeJoinExec | _: CometSortMergeJoinExec => true }.size === 1)
+       val smjResult = smjDF.collect()
+ 
+       val shjDF = df1.join(df2.hint("SHUFFLE_HASH"), joinExprs, "full")
+       assert(collect(shjDF.queryExecution.executedPlan) {
+-        case _: ShuffledHashJoinExec => true }.size === 1)
++        case _: ShuffledHashJoinExec | _: CometHashJoinExec => true }.size === 1)
+       // Same result between shuffled hash join and sort merge join
+       checkAnswer(shjDF, smjResult)
+     }
+@@ -1489,7 +1497,8 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
+           val plan = sql(getAggQuery(selectExpr, joinType)).queryExecution.executedPlan
+           assert(collect(plan) { case _: BroadcastNestedLoopJoinExec => true }.size === 1)
+           // Have shuffle before aggregation
+-          assert(collect(plan) { case _: ShuffleExchangeExec => true }.size === 1)
++          assert(collect(plan) {
++            case _: ShuffleExchangeLike => true }.size === 1)
+       }
+ 
+       def getJoinQuery(selectExpr: String, joinType: String): String = {
+@@ -1518,9 +1527,12 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
+           }
+           val plan = sql(getJoinQuery(selectExpr, joinType)).queryExecution.executedPlan
+           assert(collect(plan) { case _: BroadcastNestedLoopJoinExec => true }.size === 1)
+-          assert(collect(plan) { case _: SortMergeJoinExec => true }.size === 3)
++          assert(collect(plan) {
++            case _: SortMergeJoinExec => true
++            case _: CometSortMergeJoinExec => true
++          }.size === 3)
+           // No extra sort on left side before last sort merge join
+-          assert(collect(plan) { case _: SortExec => true }.size === 5)
++          assert(collect(plan) { case _: SortExec | _: CometSortExec => true }.size === 5)
+       }
+ 
+       // Test output ordering is not preserved
+@@ -1529,9 +1541,12 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
+           val selectExpr = "/*+ BROADCAST(left_t) */ k1 as k0"
+           val plan = sql(getJoinQuery(selectExpr, joinType)).queryExecution.executedPlan
+           assert(collect(plan) { case _: BroadcastNestedLoopJoinExec => true }.size === 1)
+-          assert(collect(plan) { case _: SortMergeJoinExec => true }.size === 3)
++          assert(collect(plan) {
++            case _: SortMergeJoinExec => true
++            case _: CometSortMergeJoinExec => true
++          }.size === 3)
+           // Have sort on left side before last sort merge join
+-          assert(collect(plan) { case _: SortExec => true }.size === 6)
++          assert(collect(plan) { case _: SortExec | _: CometSortExec => true }.size === 6)
+       }
+ 
+       // Test singe partition
+@@ -1541,7 +1556,8 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
+            |FROM range(0, 10, 1, 1) t1 FULL OUTER JOIN range(0, 10, 1, 1) t2
+            |""".stripMargin)
+       val plan = fullJoinDF.queryExecution.executedPlan
+-      assert(collect(plan) { case _: ShuffleExchangeExec => true}.size == 1)
++      assert(collect(plan) {
++        case _: ShuffleExchangeLike => true}.size == 1)
+       checkAnswer(fullJoinDF, Row(100))
+     }
+   }
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala
+index 34c6c49bc49..f5dea07a213 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala
+@@ -69,7 +69,7 @@ import org.apache.spark.tags.ExtendedSQLTest
+  * }}}
+  */
+ // scalastyle:on line.size.limit
+-trait PlanStabilitySuite extends DisableAdaptiveExecutionSuite {
++trait PlanStabilitySuite extends DisableAdaptiveExecutionSuite with IgnoreCometSuite {
+ 
+   protected val baseResourcePath = {
+     // use the same way as `SQLQueryTestSuite` to get the resource path
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+index 56c364e2084..11779ee3b4b 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+@@ -1510,7 +1510,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
+     checkAnswer(sql("select -0.001"), Row(BigDecimal("-0.001")))
+   }
+ 
+-  test("external sorting updates peak execution memory") {
++  test("external sorting updates peak execution memory",
++    IgnoreComet("TODO: native CometSort does not update peak execution memory")) {
+     AccumulatorSuite.verifyPeakExecutionMemorySet(sparkContext, "external sort") {
+       sql("SELECT * FROM testData2 ORDER BY a ASC, b ASC").collect()
+     }
+@@ -4454,7 +4455,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
+   }
+ 
+   test("SPARK-39166: Query context of binary arithmetic should be serialized to executors" +
+-    " when WSCG is off") {
++    " when WSCG is off",
++    IgnoreComet("TODO: https://github.com/apache/datafusion-comet/issues/551")) {
+     withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false",
+       SQLConf.ANSI_ENABLED.key -> "true") {
+       withTable("t") {
+@@ -4475,7 +4477,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
+   }
+ 
+   test("SPARK-39175: Query context of Cast should be serialized to executors" +
+-    " when WSCG is off") {
++    " when WSCG is off",
++    IgnoreComet("TODO: https://github.com/apache/datafusion-comet/issues/551")) {
+     withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false",
+       SQLConf.ANSI_ENABLED.key -> "true") {
+       withTable("t") {
+@@ -4502,7 +4505,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
+   }
+ 
+   test("SPARK-39190,SPARK-39208,SPARK-39210: Query context of decimal overflow error should " +
+-    "be serialized to executors when WSCG is off") {
++    "be serialized to executors when WSCG is off",
++    IgnoreComet("TODO: https://github.com/apache/datafusion-comet/issues/551")) {
+     withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false",
+       SQLConf.ANSI_ENABLED.key -> "true") {
+       withTable("t") {
+@@ -4639,7 +4643,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
+       Row(1, 2, 3, 1, 2, 3, 1, 1))
+   }
+ 
+-  test("SPARK-40389: Don't eliminate a cast which can cause overflow") {
++  test("SPARK-40389: Don't eliminate a cast which can cause overflow",
++    IgnoreComet("TODO: https://github.com/apache/datafusion-comet/issues/551")) {
+     withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+       withTable("dt") {
+         sql("create table dt using parquet as select 9000000000BD as d")
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+index 68f14f13bbd..4b8e967102f 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+@@ -22,10 +22,11 @@ import scala.collection.mutable.ArrayBuffer
+ import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
+ import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi}
+ import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, Join, LogicalPlan, Project, Sort, Union}
++import org.apache.spark.sql.comet.CometScanExec
+ import org.apache.spark.sql.execution._
+ import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, DisableAdaptiveExecution}
+ import org.apache.spark.sql.execution.datasources.FileScanRDD
+-import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
++import org.apache.spark.sql.execution.exchange.ShuffleExchangeLike
+ import org.apache.spark.sql.execution.joins.{BaseJoinExec, BroadcastHashJoinExec, BroadcastNestedLoopJoinExec}
+ import org.apache.spark.sql.internal.SQLConf
+ import org.apache.spark.sql.test.SharedSparkSession
+@@ -1541,6 +1542,12 @@ class SubquerySuite extends QueryTest
+             fs.inputRDDs().forall(
+               _.asInstanceOf[FileScanRDD].filePartitions.forall(
+                 _.files.forall(_.urlEncodedPath.contains("p=0"))))
++        case WholeStageCodegenExec(ColumnarToRowExec(InputAdapter(
++        fs @ CometScanExec(_, _, _, partitionFilters, _, _, _, _, _, _)))) =>
++          partitionFilters.exists(ExecSubqueryExpression.hasSubquery) &&
++            fs.inputRDDs().forall(
++              _.asInstanceOf[FileScanRDD].filePartitions.forall(
++                _.files.forall(_.urlEncodedPath.contains("p=0"))))
+         case _ => false
+       })
+     }
+@@ -2106,7 +2113,7 @@ class SubquerySuite extends QueryTest
+ 
+       df.collect()
+       val exchanges = collect(df.queryExecution.executedPlan) {
+-        case s: ShuffleExchangeExec => s
++        case s: ShuffleExchangeLike => s
+       }
+       assert(exchanges.size === 1)
+     }
+@@ -2668,7 +2675,8 @@ class SubquerySuite extends QueryTest
+     }
+   }
+ 
+-  test("SPARK-43402: FileSourceScanExec supports push down data filter with scalar subquery") {
++  test("SPARK-43402: FileSourceScanExec supports push down data filter with scalar subquery",
++    IgnoreComet("TODO: https://github.com/apache/datafusion-comet/issues/551")) {
+     def checkFileSourceScan(query: String, answer: Seq[Row]): Unit = {
+       val df = sql(query)
+       checkAnswer(df, answer)
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
+index 1de535df246..cc7ffc4eeb3 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
+@@ -26,6 +26,7 @@ import test.org.apache.spark.sql.connector._
+ import org.apache.spark.SparkUnsupportedOperationException
+ import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row}
+ import org.apache.spark.sql.catalyst.InternalRow
++import org.apache.spark.sql.comet.CometSortExec
+ import org.apache.spark.sql.connector.catalog.{PartitionInternalRow, SupportsRead, Table, TableCapability, TableProvider}
+ import org.apache.spark.sql.connector.catalog.TableCapability._
+ import org.apache.spark.sql.connector.expressions.{Expression, FieldReference, Literal, NamedReference, NullOrdering, SortDirection, SortOrder, Transform}
+@@ -36,7 +37,7 @@ import org.apache.spark.sql.connector.read.partitioning.{KeyGroupedPartitioning,
+ import org.apache.spark.sql.execution.SortExec
+ import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+ import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, DataSourceV2Relation, DataSourceV2ScanRelation}
+-import org.apache.spark.sql.execution.exchange.{Exchange, ShuffleExchangeExec}
++import org.apache.spark.sql.execution.exchange.{Exchange, ShuffleExchangeExec, ShuffleExchangeLike}
+ import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector
+ import org.apache.spark.sql.expressions.Window
+ import org.apache.spark.sql.functions._
+@@ -278,13 +279,13 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
+           val groupByColJ = df.groupBy($"j").agg(sum($"i"))
+           checkAnswer(groupByColJ, Seq(Row(2, 8), Row(4, 2), Row(6, 5)))
+           assert(collectFirst(groupByColJ.queryExecution.executedPlan) {
+-            case e: ShuffleExchangeExec => e
++            case e: ShuffleExchangeLike => e
+           }.isDefined)
+ 
+           val groupByIPlusJ = df.groupBy($"i" + $"j").agg(count("*"))
+           checkAnswer(groupByIPlusJ, Seq(Row(5, 2), Row(6, 2), Row(8, 1), Row(9, 1)))
+           assert(collectFirst(groupByIPlusJ.queryExecution.executedPlan) {
+-            case e: ShuffleExchangeExec => e
++            case e: ShuffleExchangeLike => e
+           }.isDefined)
+         }
+       }
+@@ -344,10 +345,11 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
+ 
+                 val (shuffleExpected, sortExpected) = groupByExpects
+                 assert(collectFirst(groupBy.queryExecution.executedPlan) {
+-                  case e: ShuffleExchangeExec => e
++                  case e: ShuffleExchangeLike => e
+                 }.isDefined === shuffleExpected)
+                 assert(collectFirst(groupBy.queryExecution.executedPlan) {
+                   case e: SortExec => e
++                  case c: CometSortExec => c
+                 }.isDefined === sortExpected)
+               }
+ 
+@@ -362,10 +364,11 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
+ 
+                 val (shuffleExpected, sortExpected) = windowFuncExpects
+                 assert(collectFirst(windowPartByColIOrderByColJ.queryExecution.executedPlan) {
+-                  case e: ShuffleExchangeExec => e
++                  case e: ShuffleExchangeLike => e
+                 }.isDefined === shuffleExpected)
+                 assert(collectFirst(windowPartByColIOrderByColJ.queryExecution.executedPlan) {
+                   case e: SortExec => e
++                  case c: CometSortExec => c
+                 }.isDefined === sortExpected)
+               }
+             }
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/FileDataSourceV2FallBackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/FileDataSourceV2FallBackSuite.scala
+index c6060dcdd51..78fd88be9bb 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/FileDataSourceV2FallBackSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/FileDataSourceV2FallBackSuite.scala
+@@ -21,6 +21,7 @@ import scala.collection.mutable.ArrayBuffer
+ import org.apache.spark.{SparkConf, SparkException}
+ import org.apache.spark.sql.QueryTest
+ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
++import org.apache.spark.sql.comet.CometScanExec
+ import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, Table, TableCapability}
+ import org.apache.spark.sql.connector.read.ScanBuilder
+ import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}
+@@ -188,7 +189,11 @@ class FileDataSourceV2FallBackSuite extends QueryTest with SharedSparkSession {
+             val df = spark.read.format(format).load(path.getCanonicalPath)
+             checkAnswer(df, inputData.toDF())
+             assert(
+-              df.queryExecution.executedPlan.exists(_.isInstanceOf[FileSourceScanExec]))
++              df.queryExecution.executedPlan.exists {
++                case _: FileSourceScanExec | _: CometScanExec => true
++                case _ => false
++              }
++            )
+           }
+         } finally {
+           spark.listenerManager.unregister(listener)
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
+index 10a32441b6c..5e5d763ee70 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
+@@ -23,6 +23,7 @@ import org.apache.spark.sql.{DataFrame, Row}
+ import org.apache.spark.sql.catalyst.InternalRow
+ import org.apache.spark.sql.catalyst.expressions.{Literal, TransformExpression}
+ import org.apache.spark.sql.catalyst.plans.physical
++import org.apache.spark.sql.comet.CometSortMergeJoinExec
+ import org.apache.spark.sql.connector.catalog.{Column, Identifier, InMemoryTableCatalog}
+ import org.apache.spark.sql.connector.catalog.functions._
+ import org.apache.spark.sql.connector.distributions.Distributions
+@@ -31,7 +32,7 @@ import org.apache.spark.sql.connector.expressions.Expressions._
+ import org.apache.spark.sql.execution.SparkPlan
+ import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
+ import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
+-import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
++import org.apache.spark.sql.execution.exchange.ShuffleExchangeLike
+ import org.apache.spark.sql.execution.joins.SortMergeJoinExec
+ import org.apache.spark.sql.internal.SQLConf
+ import org.apache.spark.sql.internal.SQLConf._
+@@ -298,13 +299,14 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
+         Row("bbb", 20, 250.0), Row("bbb", 20, 350.0), Row("ccc", 30, 400.50)))
+   }
+ 
+-  private def collectShuffles(plan: SparkPlan): Seq[ShuffleExchangeExec] = {
++  private def collectShuffles(plan: SparkPlan): Seq[ShuffleExchangeLike] = {
+     // here we skip collecting shuffle operators that are not associated with SMJ
+     collect(plan) {
+       case s: SortMergeJoinExec => s
++      case c: CometSortMergeJoinExec => c.originalPlan
+     }.flatMap(smj =>
+       collect(smj) {
+-        case s: ShuffleExchangeExec => s
++        case s: ShuffleExchangeLike => s
+       })
+   }
+ 
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
+index 8238eabc7fe..c960fd75a9e 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
+@@ -31,7 +31,7 @@ import org.mockito.Mockito.{mock, spy, when}
+ import org.scalatest.time.SpanSugar._
+ 
+ import org.apache.spark._
+-import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, Encoder, KryoData, QueryTest, Row, SaveMode}
++import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, Encoder, IgnoreComet, KryoData, QueryTest, Row, SaveMode}
+ import org.apache.spark.sql.catalyst.FunctionIdentifier
+ import org.apache.spark.sql.catalyst.analysis.{NamedParameter, UnresolvedGenerator}
+ import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder}
+@@ -266,7 +266,8 @@ class QueryExecutionErrorsSuite
+   }
+ 
+   test("INCONSISTENT_BEHAVIOR_CROSS_VERSION: " +
+-    "compatibility with Spark 2.4/3.2 in reading/writing dates") {
++    "compatibility with Spark 2.4/3.2 in reading/writing dates",
++    IgnoreComet("Comet doesn't completely support datetime rebase mode yet")) {
+ 
+     // Fail to read ancient datetime values.
+     withSQLConf(SQLConf.PARQUET_REBASE_MODE_IN_READ.key -> EXCEPTION.toString) {
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala
+index 418ca3430bb..eb8267192f8 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala
+@@ -23,7 +23,7 @@ import scala.util.Random
+ import org.apache.hadoop.fs.Path
+ 
+ import org.apache.spark.SparkConf
+-import org.apache.spark.sql.{DataFrame, QueryTest}
++import org.apache.spark.sql.{DataFrame, IgnoreComet, QueryTest}
+ import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
+ import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan
+ import org.apache.spark.sql.internal.SQLConf
+@@ -195,7 +195,7 @@ class DataSourceV2ScanExecRedactionSuite extends DataSourceScanRedactionTest {
+     }
+   }
+ 
+-  test("FileScan description") {
++  test("FileScan description", IgnoreComet("Comet doesn't use BatchScan")) {
+     Seq("json", "orc", "parquet").foreach { format =>
+       withTempPath { path =>
+         val dir = path.getCanonicalPath
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala
+index 743ec41dbe7..9f30d6c8e04 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala
+@@ -53,6 +53,10 @@ class LogicalPlanTagInSparkPlanSuite extends TPCDSQuerySuite with DisableAdaptiv
+     case ColumnarToRowExec(i: InputAdapter) => isScanPlanTree(i.child)
+     case p: ProjectExec => isScanPlanTree(p.child)
+     case f: FilterExec => isScanPlanTree(f.child)
++    // Comet produces scan plan tree like:
++    // ColumnarToRow
++    //  +- ReusedExchange
++    case _: ReusedExchangeExec => false
+     case _: LeafExecNode => true
+     case _ => false
+   }
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+index 15de4c5cc5b..6a85dfb6883 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution
+ 
+ import org.apache.spark.SparkUnsupportedOperationException
+ import org.apache.spark.rdd.RDD
+-import org.apache.spark.sql.{execution, DataFrame, Row}
++import org.apache.spark.sql.{execution, DataFrame, IgnoreCometSuite, Row}
+ import org.apache.spark.sql.catalyst.InternalRow
+ import org.apache.spark.sql.catalyst.expressions._
+ import org.apache.spark.sql.catalyst.plans._
+@@ -36,7 +36,9 @@ import org.apache.spark.sql.internal.SQLConf
+ import org.apache.spark.sql.test.SharedSparkSession
+ import org.apache.spark.sql.types._
+ 
+-class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
++// Ignore this suite when Comet is enabled. This suite tests the Spark planner and Comet planner
++// comes out with too many difference. Simply ignoring this suite for now.
++class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper with IgnoreCometSuite {
+   import testImplicits._
+ 
+   setupTestData()
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala
+index b5bac8079c4..544c1ddc697 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala
+@@ -17,7 +17,8 @@
+ 
+ package org.apache.spark.sql.execution
+ 
+-import org.apache.spark.sql.{DataFrame, QueryTest, Row}
++import org.apache.spark.sql.{DataFrame, IgnoreComet, QueryTest, Row}
++import org.apache.spark.sql.comet.CometProjectExec
+ import org.apache.spark.sql.connector.SimpleWritableDataSource
+ import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, DisableAdaptiveExecutionSuite, EnableAdaptiveExecutionSuite}
+ import org.apache.spark.sql.internal.SQLConf
+@@ -34,7 +35,10 @@ abstract class RemoveRedundantProjectsSuiteBase
+   private def assertProjectExecCount(df: DataFrame, expected: Int): Unit = {
+     withClue(df.queryExecution) {
+       val plan = df.queryExecution.executedPlan
+-      val actual = collectWithSubqueries(plan) { case p: ProjectExec => p }.size
++      val actual = collectWithSubqueries(plan) {
++        case p: ProjectExec => p
++        case p: CometProjectExec => p
++      }.size
+       assert(actual == expected)
+     }
+   }
+@@ -112,7 +116,8 @@ abstract class RemoveRedundantProjectsSuiteBase
+     assertProjectExec(query, 1, 3)
+   }
+ 
+-  test("join with ordering requirement") {
++  test("join with ordering requirement",
++    IgnoreComet("TODO: https://github.com/apache/datafusion-comet/issues/551")) {
+     val query = "select * from (select key, a, c, b from testView) as t1 join " +
+       "(select key, a, b, c from testView) as t2 on t1.key = t2.key where t2.a > 50"
+     assertProjectExec(query, 2, 2)
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala
+index 005e764cc30..92ec088efab 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala
+@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution
+ 
+ import org.apache.spark.sql.{DataFrame, QueryTest}
+ import org.apache.spark.sql.catalyst.plans.physical.{RangePartitioning, UnknownPartitioning}
++import org.apache.spark.sql.comet.CometSortExec
+ import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, DisableAdaptiveExecutionSuite, EnableAdaptiveExecutionSuite}
+ import org.apache.spark.sql.execution.joins.ShuffledJoin
+ import org.apache.spark.sql.internal.SQLConf
+@@ -33,7 +34,7 @@ abstract class RemoveRedundantSortsSuiteBase
+ 
+   private def checkNumSorts(df: DataFrame, count: Int): Unit = {
+     val plan = df.queryExecution.executedPlan
+-    assert(collectWithSubqueries(plan) { case s: SortExec => s }.length == count)
++    assert(collectWithSubqueries(plan) { case _: SortExec | _: CometSortExec => 1 }.length == count)
+   }
+ 
+   private def checkSorts(query: String, enabledCount: Int, disabledCount: Int): Unit = {
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ReplaceHashWithSortAggSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ReplaceHashWithSortAggSuite.scala
+index 47679ed7865..9ffbaecb98e 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ReplaceHashWithSortAggSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ReplaceHashWithSortAggSuite.scala
+@@ -18,6 +18,7 @@
+ package org.apache.spark.sql.execution
+ 
+ import org.apache.spark.sql.{DataFrame, QueryTest}
++import org.apache.spark.sql.comet.CometHashAggregateExec
+ import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, DisableAdaptiveExecutionSuite, EnableAdaptiveExecutionSuite}
+ import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec}
+ import org.apache.spark.sql.internal.SQLConf
+@@ -31,7 +32,7 @@ abstract class ReplaceHashWithSortAggSuiteBase
+   private def checkNumAggs(df: DataFrame, hashAggCount: Int, sortAggCount: Int): Unit = {
+     val plan = df.queryExecution.executedPlan
+     assert(collectWithSubqueries(plan) {
+-      case s @ (_: HashAggregateExec | _: ObjectHashAggregateExec) => s
++      case s @ (_: HashAggregateExec | _: ObjectHashAggregateExec | _: CometHashAggregateExec ) => s
+     }.length == hashAggCount)
+     assert(collectWithSubqueries(plan) { case s: SortAggregateExec => s }.length == sortAggCount)
+   }
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
+index 3aaf61ffba4..93752e2a535 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
+@@ -22,6 +22,7 @@ import org.apache.spark.rdd.MapPartitionsWithEvaluatorRDD
+ import org.apache.spark.sql.{Dataset, QueryTest, Row, SaveMode}
+ import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode
+ import org.apache.spark.sql.catalyst.expressions.codegen.{ByteCodeStats, CodeAndComment, CodeGenerator}
++import org.apache.spark.sql.comet.CometSortMergeJoinExec
+ import org.apache.spark.sql.execution.adaptive.DisableAdaptiveExecutionSuite
+ import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, SortAggregateExec}
+ import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
+@@ -237,6 +238,7 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
+       assert(twoJoinsDF.queryExecution.executedPlan.collect {
+         case WholeStageCodegenExec(_ : ShuffledHashJoinExec) if hint == "SHUFFLE_HASH" => true
+         case WholeStageCodegenExec(_ : SortMergeJoinExec) if hint == "SHUFFLE_MERGE" => true
++        case _: CometSortMergeJoinExec if hint == "SHUFFLE_MERGE" => true
+       }.size === 2)
+       checkAnswer(twoJoinsDF,
+         Seq(Row(0, 0, 0), Row(1, 1, null), Row(2, 2, 2), Row(3, 3, null), Row(4, 4, null),
+@@ -360,6 +362,7 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
+       .join(df1.hint("SHUFFLE_MERGE"), $"k3" === $"k1", "right_outer")
+     assert(twoJoinsDF.queryExecution.executedPlan.collect {
+       case WholeStageCodegenExec(_ : SortMergeJoinExec) => true
++      case _: CometSortMergeJoinExec => true
+     }.size === 2)
+     checkAnswer(twoJoinsDF,
+       Seq(Row(0, 0, 0), Row(1, 1, 1), Row(2, 2, 2), Row(3, 3, 3), Row(4, null, 4), Row(5, null, 5),
+@@ -382,8 +385,7 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
+     val twoJoinsDF = df3.join(df2.hint("SHUFFLE_MERGE"), $"k3" === $"k2", "left_semi")
+       .join(df1.hint("SHUFFLE_MERGE"), $"k3" === $"k1", "left_semi")
+     assert(twoJoinsDF.queryExecution.executedPlan.collect {
+-      case WholeStageCodegenExec(ProjectExec(_, _ : SortMergeJoinExec)) |
+-           WholeStageCodegenExec(_ : SortMergeJoinExec) => true
++      case _: SortMergeJoinExec => true
+     }.size === 2)
+     checkAnswer(twoJoinsDF, Seq(Row(0), Row(1), Row(2), Row(3)))
+   }
+@@ -404,8 +406,7 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
+     val twoJoinsDF = df1.join(df2.hint("SHUFFLE_MERGE"), $"k1" === $"k2", "left_anti")
+       .join(df3.hint("SHUFFLE_MERGE"), $"k1" === $"k3", "left_anti")
+     assert(twoJoinsDF.queryExecution.executedPlan.collect {
+-      case WholeStageCodegenExec(ProjectExec(_, _ : SortMergeJoinExec)) |
+-           WholeStageCodegenExec(_ : SortMergeJoinExec) => true
++      case _: SortMergeJoinExec => true
+     }.size === 2)
+     checkAnswer(twoJoinsDF, Seq(Row(6), Row(7), Row(8), Row(9)))
+   }
+@@ -538,7 +539,9 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
+     val plan = df.queryExecution.executedPlan
+     assert(plan.exists(p =>
+       p.isInstanceOf[WholeStageCodegenExec] &&
+-        p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[SortExec]))
++        p.asInstanceOf[WholeStageCodegenExec].collect {
++          case _: SortExec => true
++        }.nonEmpty))
+     assert(df.collect() === Array(Row(1), Row(2), Row(3)))
+   }
+ 
+@@ -718,7 +721,9 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
+           .write.mode(SaveMode.Overwrite).parquet(path)
+ 
+         withSQLConf(SQLConf.WHOLESTAGE_MAX_NUM_FIELDS.key -> "255",
+-            SQLConf.WHOLESTAGE_SPLIT_CONSUME_FUNC_BY_OPERATOR.key -> "true") {
++            SQLConf.WHOLESTAGE_SPLIT_CONSUME_FUNC_BY_OPERATOR.key -> "true",
++            // Disable Comet native execution because this checks wholestage codegen.
++            "spark.comet.exec.enabled" -> "false") {
+           val projection = Seq.tabulate(columnNum)(i => s"c$i + c$i as newC$i")
+           val df = spark.read.parquet(path).selectExpr(projection: _*)
+ 
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+index a7efd0aa75e..fa65bda2051 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+@@ -28,11 +28,13 @@ import org.apache.spark.SparkException
+ import org.apache.spark.rdd.RDD
+ import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent, SparkListenerJobStart}
+ import org.apache.spark.shuffle.sort.SortShuffleManager
+-import org.apache.spark.sql.{DataFrame, Dataset, QueryTest, Row, SparkSession, Strategy}
++import org.apache.spark.sql.{DataFrame, Dataset, IgnoreComet, QueryTest, Row, SparkSession, Strategy}
+ import org.apache.spark.sql.catalyst.InternalRow
+ import org.apache.spark.sql.catalyst.expressions.Attribute
+ import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight}
+ import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
++import org.apache.spark.sql.comet._
++import org.apache.spark.sql.comet.execution.shuffle.CometShuffleExchangeExec
+ import org.apache.spark.sql.execution.{CollectLimitExec, ColumnarToRowExec, LocalTableScanExec, PartialReducerPartitionSpec, QueryExecution, ReusedSubqueryExec, ShuffledRowRDD, SortExec, SparkPlan, SparkPlanInfo, UnaryExecNode, UnionExec}
+ import org.apache.spark.sql.execution.aggregate.BaseAggregateExec
+ import org.apache.spark.sql.execution.columnar.{InMemoryTableScanExec, InMemoryTableScanLike}
+@@ -120,6 +122,7 @@ class AdaptiveQueryExecSuite
+   private def findTopLevelBroadcastHashJoin(plan: SparkPlan): Seq[BroadcastHashJoinExec] = {
+     collect(plan) {
+       case j: BroadcastHashJoinExec => j
++      case j: CometBroadcastHashJoinExec => j.originalPlan.asInstanceOf[BroadcastHashJoinExec]
+     }
+   }
+ 
+@@ -132,30 +135,38 @@ class AdaptiveQueryExecSuite
+   private def findTopLevelSortMergeJoin(plan: SparkPlan): Seq[SortMergeJoinExec] = {
+     collect(plan) {
+       case j: SortMergeJoinExec => j
++      case j: CometSortMergeJoinExec =>
++        assert(j.originalPlan.isInstanceOf[SortMergeJoinExec])
++        j.originalPlan.asInstanceOf[SortMergeJoinExec]
+     }
+   }
+ 
+   private def findTopLevelShuffledHashJoin(plan: SparkPlan): Seq[ShuffledHashJoinExec] = {
+     collect(plan) {
+       case j: ShuffledHashJoinExec => j
++      case j: CometHashJoinExec => j.originalPlan.asInstanceOf[ShuffledHashJoinExec]
+     }
+   }
+ 
+   private def findTopLevelBaseJoin(plan: SparkPlan): Seq[BaseJoinExec] = {
+     collect(plan) {
+       case j: BaseJoinExec => j
++      case c: CometHashJoinExec => c.originalPlan.asInstanceOf[BaseJoinExec]
++      case c: CometSortMergeJoinExec => c.originalPlan.asInstanceOf[BaseJoinExec]
+     }
+   }
+ 
+   private def findTopLevelSort(plan: SparkPlan): Seq[SortExec] = {
+     collect(plan) {
+       case s: SortExec => s
++      case s: CometSortExec => s.originalPlan.asInstanceOf[SortExec]
+     }
+   }
+ 
+   private def findTopLevelAggregate(plan: SparkPlan): Seq[BaseAggregateExec] = {
+     collect(plan) {
+       case agg: BaseAggregateExec => agg
++      case agg: CometHashAggregateExec => agg.originalPlan.asInstanceOf[BaseAggregateExec]
+     }
+   }
+ 
+@@ -205,6 +216,7 @@ class AdaptiveQueryExecSuite
+       val parts = rdd.partitions
+       assert(parts.forall(rdd.preferredLocations(_).nonEmpty))
+     }
++
+     assert(numShuffles === (numLocalReads.length + numShufflesWithoutLocalRead))
+   }
+ 
+@@ -213,7 +225,7 @@ class AdaptiveQueryExecSuite
+     val plan = df.queryExecution.executedPlan
+     assert(plan.isInstanceOf[AdaptiveSparkPlanExec])
+     val shuffle = plan.asInstanceOf[AdaptiveSparkPlanExec].executedPlan.collect {
+-      case s: ShuffleExchangeExec => s
++      case s: ShuffleExchangeLike => s
+     }
+     assert(shuffle.size == 1)
+     assert(shuffle(0).outputPartitioning.numPartitions == numPartition)
+@@ -229,7 +241,8 @@ class AdaptiveQueryExecSuite
+       assert(smj.size == 1)
+       val bhj = findTopLevelBroadcastHashJoin(adaptivePlan)
+       assert(bhj.size == 1)
+-      checkNumLocalShuffleReads(adaptivePlan)
++      // Comet shuffle changes shuffle metrics
++      // checkNumLocalShuffleReads(adaptivePlan)
+     }
+   }
+ 
+@@ -256,7 +269,8 @@ class AdaptiveQueryExecSuite
+     }
+   }
+ 
+-  test("Reuse the parallelism of coalesced shuffle in local shuffle read") {
++  test("Reuse the parallelism of coalesced shuffle in local shuffle read",
++      IgnoreComet("Comet shuffle changes shuffle partition size")) {
+     withSQLConf(
+       SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+       SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "80",
+@@ -288,7 +302,8 @@ class AdaptiveQueryExecSuite
+     }
+   }
+ 
+-  test("Reuse the default parallelism in local shuffle read") {
++  test("Reuse the default parallelism in local shuffle read",
++      IgnoreComet("Comet shuffle changes shuffle partition size")) {
+     withSQLConf(
+       SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+       SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "80",
+@@ -302,7 +317,8 @@ class AdaptiveQueryExecSuite
+       val localReads = collect(adaptivePlan) {
+         case read: AQEShuffleReadExec if read.isLocalRead => read
+       }
+-      assert(localReads.length == 2)
++      // Comet shuffle changes shuffle metrics
++      assert(localReads.length == 1)
+       val localShuffleRDD0 = localReads(0).execute().asInstanceOf[ShuffledRowRDD]
+       val localShuffleRDD1 = localReads(1).execute().asInstanceOf[ShuffledRowRDD]
+       // the final parallelism is math.max(1, numReduces / numMappers): math.max(1, 5/2) = 2
+@@ -351,7 +367,7 @@ class AdaptiveQueryExecSuite
+     }
+   }
+ 
+-  test("Scalar subquery") {
++  test("Scalar subquery", IgnoreComet("Comet shuffle changes shuffle metrics")) {
+     withSQLConf(
+         SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+         SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "80") {
+@@ -366,7 +382,7 @@ class AdaptiveQueryExecSuite
+     }
+   }
+ 
+-  test("Scalar subquery in later stages") {
++  test("Scalar subquery in later stages", IgnoreComet("Comet shuffle changes shuffle metrics")) {
+     withSQLConf(
+         SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+         SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "80") {
+@@ -382,7 +398,7 @@ class AdaptiveQueryExecSuite
+     }
+   }
+ 
+-  test("multiple joins") {
++  test("multiple joins", IgnoreComet("Comet shuffle changes shuffle metrics")) {
+     withSQLConf(
+         SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+         SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "80") {
+@@ -427,7 +443,7 @@ class AdaptiveQueryExecSuite
+     }
+   }
+ 
+-  test("multiple joins with aggregate") {
++  test("multiple joins with aggregate", IgnoreComet("Comet shuffle changes shuffle metrics")) {
+     withSQLConf(
+         SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+         SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "80") {
+@@ -472,7 +488,7 @@ class AdaptiveQueryExecSuite
+     }
+   }
+ 
+-  test("multiple joins with aggregate 2") {
++  test("multiple joins with aggregate 2", IgnoreComet("Comet shuffle changes shuffle metrics")) {
+     withSQLConf(
+         SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+         SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "500") {
+@@ -537,7 +553,7 @@ class AdaptiveQueryExecSuite
+     }
+   }
+ 
+-  test("Exchange reuse with subqueries") {
++  test("Exchange reuse with subqueries", IgnoreComet("Comet shuffle changes shuffle metrics")) {
+     withSQLConf(
+         SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+         SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "80") {
+@@ -568,7 +584,9 @@ class AdaptiveQueryExecSuite
+       assert(smj.size == 1)
+       val bhj = findTopLevelBroadcastHashJoin(adaptivePlan)
+       assert(bhj.size == 1)
+-      checkNumLocalShuffleReads(adaptivePlan)
++      // Comet shuffle changes shuffle metrics,
++      // so we can't check the number of local shuffle reads.
++      // checkNumLocalShuffleReads(adaptivePlan)
+       // Even with local shuffle read, the query stage reuse can also work.
+       val ex = findReusedExchange(adaptivePlan)
+       assert(ex.nonEmpty)
+@@ -589,7 +607,9 @@ class AdaptiveQueryExecSuite
+       assert(smj.size == 1)
+       val bhj = findTopLevelBroadcastHashJoin(adaptivePlan)
+       assert(bhj.size == 1)
+-      checkNumLocalShuffleReads(adaptivePlan)
++      // Comet shuffle changes shuffle metrics,
++      // so we can't check the number of local shuffle reads.
++      // checkNumLocalShuffleReads(adaptivePlan)
+       // Even with local shuffle read, the query stage reuse can also work.
+       val ex = findReusedExchange(adaptivePlan)
+       assert(ex.isEmpty)
+@@ -598,7 +618,8 @@ class AdaptiveQueryExecSuite
+     }
+   }
+ 
+-  test("Broadcast exchange reuse across subqueries") {
++  test("Broadcast exchange reuse across subqueries",
++      IgnoreComet("Comet shuffle changes shuffle metrics")) {
+     withSQLConf(
+         SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+         SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "20000000",
+@@ -693,7 +714,8 @@ class AdaptiveQueryExecSuite
+       val bhj = findTopLevelBroadcastHashJoin(adaptivePlan)
+       assert(bhj.size == 1)
+       // There is still a SMJ, and its two shuffles can't apply local read.
+-      checkNumLocalShuffleReads(adaptivePlan, 2)
++      // Comet shuffle changes shuffle metrics
++      // checkNumLocalShuffleReads(adaptivePlan, 2)
+     }
+   }
+ 
+@@ -815,7 +837,8 @@ class AdaptiveQueryExecSuite
+     }
+   }
+ 
+-  test("SPARK-29544: adaptive skew join with different join types") {
++  test("SPARK-29544: adaptive skew join with different join types",
++      IgnoreComet("Comet shuffle has different partition metrics")) {
+     Seq("SHUFFLE_MERGE", "SHUFFLE_HASH").foreach { joinHint =>
+       def getJoinNode(plan: SparkPlan): Seq[ShuffledJoin] = if (joinHint == "SHUFFLE_MERGE") {
+         findTopLevelSortMergeJoin(plan)
+@@ -1123,7 +1146,8 @@ class AdaptiveQueryExecSuite
+     }
+   }
+ 
+-  test("metrics of the shuffle read") {
++  test("metrics of the shuffle read",
++      IgnoreComet("Comet shuffle changes the metrics")) {
+     withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
+       val (_, adaptivePlan) = runAdaptiveAndVerifyResult(
+         "SELECT key FROM testData GROUP BY key")
+@@ -1718,7 +1742,7 @@ class AdaptiveQueryExecSuite
+         val (_, adaptivePlan) = runAdaptiveAndVerifyResult(
+           "SELECT id FROM v1 GROUP BY id DISTRIBUTE BY id")
+         assert(collect(adaptivePlan) {
+-          case s: ShuffleExchangeExec => s
++          case s: ShuffleExchangeLike => s
+         }.length == 1)
+       }
+     }
+@@ -1798,7 +1822,8 @@ class AdaptiveQueryExecSuite
+     }
+   }
+ 
+-  test("SPARK-33551: Do not use AQE shuffle read for repartition") {
++  test("SPARK-33551: Do not use AQE shuffle read for repartition",
++      IgnoreComet("Comet shuffle changes partition size")) {
+     def hasRepartitionShuffle(plan: SparkPlan): Boolean = {
+       find(plan) {
+         case s: ShuffleExchangeLike =>
+@@ -1983,6 +2008,9 @@ class AdaptiveQueryExecSuite
+     def checkNoCoalescePartitions(ds: Dataset[Row], origin: ShuffleOrigin): Unit = {
+       assert(collect(ds.queryExecution.executedPlan) {
+         case s: ShuffleExchangeExec if s.shuffleOrigin == origin && s.numPartitions == 2 => s
++        case c: CometShuffleExchangeExec
++          if c.originalPlan.shuffleOrigin == origin &&
++            c.originalPlan.numPartitions == 2 => c
+       }.size == 1)
+       ds.collect()
+       val plan = ds.queryExecution.executedPlan
+@@ -1991,6 +2019,9 @@ class AdaptiveQueryExecSuite
+       }.isEmpty)
+       assert(collect(plan) {
+         case s: ShuffleExchangeExec if s.shuffleOrigin == origin && s.numPartitions == 2 => s
++        case c: CometShuffleExchangeExec
++          if c.originalPlan.shuffleOrigin == origin &&
++            c.originalPlan.numPartitions == 2 => c
+       }.size == 1)
+       checkAnswer(ds, testData)
+     }
+@@ -2147,7 +2178,8 @@ class AdaptiveQueryExecSuite
+     }
+   }
+ 
+-  test("SPARK-35264: Support AQE side shuffled hash join formula") {
++  test("SPARK-35264: Support AQE side shuffled hash join formula",
++      IgnoreComet("Comet shuffle changes the partition size")) {
+     withTempView("t1", "t2") {
+       def checkJoinStrategy(shouldShuffleHashJoin: Boolean): Unit = {
+         Seq("100", "100000").foreach { size =>
+@@ -2233,7 +2265,8 @@ class AdaptiveQueryExecSuite
+     }
+   }
+ 
+-  test("SPARK-35725: Support optimize skewed partitions in RebalancePartitions") {
++  test("SPARK-35725: Support optimize skewed partitions in RebalancePartitions",
++      IgnoreComet("Comet shuffle changes shuffle metrics")) {
+     withTempView("v") {
+       withSQLConf(
+         SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+@@ -2332,7 +2365,7 @@ class AdaptiveQueryExecSuite
+               runAdaptiveAndVerifyResult(s"SELECT $repartition key1 FROM skewData1 " +
+                 s"JOIN skewData2 ON key1 = key2 GROUP BY key1")
+             val shuffles1 = collect(adaptive1) {
+-              case s: ShuffleExchangeExec => s
++              case s: ShuffleExchangeLike => s
+             }
+             assert(shuffles1.size == 3)
+             // shuffles1.head is the top-level shuffle under the Aggregate operator
+@@ -2345,7 +2378,7 @@ class AdaptiveQueryExecSuite
+               runAdaptiveAndVerifyResult(s"SELECT $repartition key1 FROM skewData1 " +
+                 s"JOIN skewData2 ON key1 = key2")
+             val shuffles2 = collect(adaptive2) {
+-              case s: ShuffleExchangeExec => s
++              case s: ShuffleExchangeLike => s
+             }
+             if (hasRequiredDistribution) {
+               assert(shuffles2.size == 3)
+@@ -2379,7 +2412,8 @@ class AdaptiveQueryExecSuite
+     }
+   }
+ 
+-  test("SPARK-35794: Allow custom plugin for cost evaluator") {
++  test("SPARK-35794: Allow custom plugin for cost evaluator",
++      IgnoreComet("Comet shuffle changes shuffle metrics")) {
+     CostEvaluator.instantiate(
+       classOf[SimpleShuffleSortCostEvaluator].getCanonicalName, spark.sparkContext.getConf)
+     intercept[IllegalArgumentException] {
+@@ -2545,6 +2579,7 @@ class AdaptiveQueryExecSuite
+           val (_, adaptive) = runAdaptiveAndVerifyResult(query)
+           assert(adaptive.collect {
+             case sort: SortExec => sort
++            case sort: CometSortExec => sort
+           }.size == 1)
+           val read = collect(adaptive) {
+             case read: AQEShuffleReadExec => read
+@@ -2562,7 +2597,8 @@ class AdaptiveQueryExecSuite
+     }
+   }
+ 
+-  test("SPARK-37357: Add small partition factor for rebalance partitions") {
++  test("SPARK-37357: Add small partition factor for rebalance partitions",
++      IgnoreComet("Comet shuffle changes shuffle metrics")) {
+     withTempView("v") {
+       withSQLConf(
+         SQLConf.ADAPTIVE_OPTIMIZE_SKEWS_IN_REBALANCE_PARTITIONS_ENABLED.key -> "true",
+@@ -2674,7 +2710,7 @@ class AdaptiveQueryExecSuite
+           runAdaptiveAndVerifyResult("SELECT key1 FROM skewData1 JOIN skewData2 ON key1 = key2 " +
+             "JOIN skewData3 ON value2 = value3")
+         val shuffles1 = collect(adaptive1) {
+-          case s: ShuffleExchangeExec => s
++          case s: ShuffleExchangeLike => s
+         }
+         assert(shuffles1.size == 4)
+         val smj1 = findTopLevelSortMergeJoin(adaptive1)
+@@ -2685,7 +2721,7 @@ class AdaptiveQueryExecSuite
+           runAdaptiveAndVerifyResult("SELECT key1 FROM skewData1 JOIN skewData2 ON key1 = key2 " +
+             "JOIN skewData3 ON value1 = value3")
+         val shuffles2 = collect(adaptive2) {
+-          case s: ShuffleExchangeExec => s
++          case s: ShuffleExchangeLike => s
+         }
+         assert(shuffles2.size == 4)
+         val smj2 = findTopLevelSortMergeJoin(adaptive2)
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCustomMetadataStructSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCustomMetadataStructSuite.scala
+index 05872d41131..0dd83608bbd 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCustomMetadataStructSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCustomMetadataStructSuite.scala
+@@ -21,7 +21,7 @@ import java.io.File
+ 
+ import org.apache.hadoop.fs.{FileStatus, Path}
+ 
+-import org.apache.spark.sql.{DataFrame, Dataset, QueryTest, Row}
++import org.apache.spark.sql.{DataFrame, Dataset, IgnoreComet, QueryTest, Row}
+ import org.apache.spark.sql.catalyst.InternalRow
+ import org.apache.spark.sql.catalyst.expressions.{Expression, FileSourceConstantMetadataStructField, FileSourceGeneratedMetadataStructField, Literal}
+ import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
+@@ -134,7 +134,8 @@ class FileSourceCustomMetadataStructSuite extends QueryTest with SharedSparkSess
+     }
+   }
+ 
+-  test("[SPARK-43226] extra constant metadata fields with extractors") {
++  test("[SPARK-43226] extra constant metadata fields with extractors",
++    IgnoreComet("TODO: https://github.com/apache/datafusion-comet/issues/551")) {
+     withTempData("parquet", FILE_SCHEMA) { (_, f0, f1) =>
+       val format = new TestFileFormat(extraConstantMetadataFields) {
+         val extractPartitionNumber = { pf: PartitionedFile =>
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
+index 0a0b23d1e60..5685926250f 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
+@@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.expressions.Concat
+ import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+ import org.apache.spark.sql.catalyst.plans.logical.Expand
+ import org.apache.spark.sql.catalyst.types.DataTypeUtils
++import org.apache.spark.sql.comet.CometScanExec
+ import org.apache.spark.sql.execution.FileSourceScanExec
+ import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+ import org.apache.spark.sql.functions._
+@@ -868,6 +869,7 @@ abstract class SchemaPruningSuite
+     val fileSourceScanSchemata =
+       collect(df.queryExecution.executedPlan) {
+         case scan: FileSourceScanExec => scan.requiredSchema
++        case scan: CometScanExec => scan.requiredSchema
+       }
+     assert(fileSourceScanSchemata.size === expectedSchemaCatalogStrings.size,
+       s"Found ${fileSourceScanSchemata.size} file sources in dataframe, " +
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/V1WriteCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/V1WriteCommandSuite.scala
+index 04a7b4834f4..3b0fd1eb5aa 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/V1WriteCommandSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/V1WriteCommandSuite.scala
+@@ -17,7 +17,7 @@
+ 
+ package org.apache.spark.sql.execution.datasources
+ 
+-import org.apache.spark.sql.{QueryTest, Row}
++import org.apache.spark.sql.{IgnoreComet, QueryTest, Row}
+ import org.apache.spark.sql.catalyst.expressions.{Ascending, AttributeReference, NullsFirst, SortOrder}
+ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Sort}
+ import org.apache.spark.sql.execution.{QueryExecution, SortExec}
+@@ -305,7 +305,8 @@ class V1WriteCommandSuite extends QueryTest with SharedSparkSession with V1Write
+     }
+   }
+ 
+-  test("v1 write with AQE changing SMJ to BHJ") {
++  test("v1 write with AQE changing SMJ to BHJ",
++      IgnoreComet("TODO: Comet SMJ to BHJ by AQE")) {
+     withPlannedWrite { enabled =>
+       withTable("t") {
+         sql(
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
+index 5c118ac12b7..fede1a94488 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
+@@ -28,7 +28,7 @@ import org.apache.hadoop.fs.{FileStatus, FileSystem, GlobFilter, Path}
+ import org.mockito.Mockito.{mock, when}
+ 
+ import org.apache.spark.{SparkException, SparkUnsupportedOperationException}
+-import org.apache.spark.sql.{DataFrame, QueryTest, Row}
++import org.apache.spark.sql.{DataFrame, IgnoreCometSuite, QueryTest, Row}
+ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+ import org.apache.spark.sql.execution.datasources.PartitionedFile
+ import org.apache.spark.sql.functions.col
+@@ -38,7 +38,9 @@ import org.apache.spark.sql.test.SharedSparkSession
+ import org.apache.spark.sql.types._
+ import org.apache.spark.util.Utils
+ 
+-class BinaryFileFormatSuite extends QueryTest with SharedSparkSession {
++// For some reason this suite is flaky w/ or w/o Comet when running in Github workflow.
++// Since it isn't related to Comet, we disable it for now.
++class BinaryFileFormatSuite extends QueryTest with SharedSparkSession with IgnoreCometSuite {
+   import BinaryFileFormat._
+ 
+   private var testDir: String = _
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala
+index cd6f41b4ef4..4b6a17344bc 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala
+@@ -28,7 +28,7 @@ import org.apache.parquet.hadoop.ParquetOutputFormat
+ 
+ import org.apache.spark.TestUtils
+ import org.apache.spark.memory.MemoryMode
+-import org.apache.spark.sql.Row
++import org.apache.spark.sql.{IgnoreComet, Row}
+ import org.apache.spark.sql.catalyst.util.DateTimeUtils
+ import org.apache.spark.sql.internal.SQLConf
+ import org.apache.spark.sql.test.SharedSparkSession
+@@ -201,7 +201,8 @@ class ParquetEncodingSuite extends ParquetCompatibilityTest with SharedSparkSess
+     }
+   }
+ 
+-  test("parquet v2 pages - rle encoding for boolean value columns") {
++  test("parquet v2 pages - rle encoding for boolean value columns",
++      IgnoreComet("Comet doesn't support RLE encoding yet")) {
+     val extraOptions = Map[String, String](
+       ParquetOutputFormat.WRITER_VERSION -> ParquetProperties.WriterVersion.PARQUET_2_0.toString
+     )
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+index 795e9f46a8d..6285a1e388b 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+@@ -1100,7 +1100,11 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
+           // When a filter is pushed to Parquet, Parquet can apply it to every row.
+           // So, we can check the number of rows returned from the Parquet
+           // to make sure our filter pushdown work.
+-          assert(stripSparkFilter(df).count() == 1)
++          // Similar to Spark's vectorized reader, Comet doesn't do row-level filtering but relies
++          // on Spark to apply the data filters after columnar batches are returned
++          if (!isCometEnabled) {
++            assert(stripSparkFilter(df).count() == 1)
++          }
+         }
+       }
+     }
+@@ -1585,7 +1589,11 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
+           // than the total length but should not be a single record.
+           // Note that, if record level filtering is enabled, it should be a single record.
+           // If no filter is pushed down to Parquet, it should be the total length of data.
+-          assert(actual > 1 && actual < data.length)
++          // Only enable Comet test iff it's scan only, since with native execution
++          // `stripSparkFilter` can't remove the native filter
++          if (!isCometEnabled || isCometScanOnly) {
++            assert(actual > 1 && actual < data.length)
++          }
+         }
+       }
+     }
+@@ -1612,7 +1620,11 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
+         // than the total length but should not be a single record.
+         // Note that, if record level filtering is enabled, it should be a single record.
+         // If no filter is pushed down to Parquet, it should be the total length of data.
+-        assert(actual > 1 && actual < data.length)
++        // Only enable Comet test iff it's scan only, since with native execution
++        // `stripSparkFilter` can't remove the native filter
++        if (!isCometEnabled || isCometScanOnly) {
++          assert(actual > 1 && actual < data.length)
++        }
+       }
+     }
+   }
+@@ -1748,7 +1760,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
+     }
+   }
+ 
+-  test("SPARK-17091: Convert IN predicate to Parquet filter push-down") {
++  test("SPARK-17091: Convert IN predicate to Parquet filter push-down",
++      IgnoreComet("IN predicate is not yet supported in Comet, see issue #36")) {
+     val schema = StructType(Seq(
+       StructField("a", IntegerType, nullable = false)
+     ))
+@@ -1991,7 +2004,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
+     }
+   }
+ 
+-  test("Support Parquet column index") {
++  test("Support Parquet column index",
++      IgnoreComet("Comet doesn't support Parquet column index yet")) {
+     // block 1:
+     //                      null count  min                                       max
+     // page-0                         0  0                                         99
+@@ -2211,7 +2225,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
+     }
+   }
+ 
+-  test("SPARK-47120: subquery literal filter pushdown") {
++  test("SPARK-47120: subquery literal filter pushdown",
++    IgnoreComet("TODO: https://github.com/apache/datafusion-comet/issues/551")) {
+     withTable("t1", "t2") {
+       sql("create table t1(d date) using parquet")
+       sql("create table t2(d date) using parquet")
+@@ -2301,7 +2316,11 @@ class ParquetV1FilterSuite extends ParquetFilterSuite {
+           assert(pushedParquetFilters.exists(_.getClass === filterClass),
+             s"${pushedParquetFilters.map(_.getClass).toList} did not contain ${filterClass}.")
+ 
+-          checker(stripSparkFilter(query), expected)
++          // Similar to Spark's vectorized reader, Comet doesn't do row-level filtering but relies
++          // on Spark to apply the data filters after columnar batches are returned
++          if (!isCometEnabled) {
++            checker(stripSparkFilter(query), expected)
++          }
+         } else {
+           assert(selectedFilters.isEmpty, "There is filter pushed down")
+         }
+@@ -2362,7 +2381,11 @@ class ParquetV2FilterSuite extends ParquetFilterSuite {
+           assert(pushedParquetFilters.exists(_.getClass === filterClass),
+             s"${pushedParquetFilters.map(_.getClass).toList} did not contain ${filterClass}.")
+ 
+-          checker(stripSparkFilter(query), expected)
++          // Similar to Spark's vectorized reader, Comet doesn't do row-level filtering but relies
++          // on Spark to apply the data filters after columnar batches are returned
++          if (!isCometEnabled) {
++            checker(stripSparkFilter(query), expected)
++          }
+ 
+         case _ => assert(false, "Can not match ParquetTable in the query.")
+       }
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+index 4fb8faa43a3..984fd1a9892 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+@@ -1297,7 +1297,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
+     }
+   }
+ 
+-  test("SPARK-40128 read DELTA_LENGTH_BYTE_ARRAY encoded strings") {
++  test("SPARK-40128 read DELTA_LENGTH_BYTE_ARRAY encoded strings",
++      IgnoreComet("Comet doesn't support DELTA encoding yet")) {
+     withAllParquetReaders {
+       checkAnswer(
+         // "fruit" column in this file is encoded using DELTA_LENGTH_BYTE_ARRAY.
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+index a329d3fdc3c..d29523a41f7 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+@@ -1042,7 +1042,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
+         checkAnswer(readParquet(schema2, path), df)
+       }
+ 
+-      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
++      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false",
++          "spark.comet.enabled" -> "false") {
+         val schema1 = "a DECIMAL(3, 2), b DECIMAL(18, 3), c DECIMAL(37, 3)"
+         checkAnswer(readParquet(schema1, path), df)
+         val schema2 = "a DECIMAL(3, 0), b DECIMAL(18, 1), c DECIMAL(37, 1)"
+@@ -1066,7 +1067,8 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
+       val df = sql(s"SELECT 1 a, 123456 b, ${Int.MaxValue.toLong * 10} c, CAST('1.2' AS BINARY) d")
+       df.write.parquet(path.toString)
+ 
+-      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
++      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false",
++          "spark.comet.enabled" -> "false") {
+         checkAnswer(readParquet("a DECIMAL(3, 2)", path), sql("SELECT 1.00"))
+         checkAnswer(readParquet("a DECIMAL(11, 2)", path), sql("SELECT 1.00"))
+         checkAnswer(readParquet("b DECIMAL(3, 2)", path), Row(null))
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRebaseDatetimeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRebaseDatetimeSuite.scala
+index 6d9092391a9..6da095120d1 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRebaseDatetimeSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRebaseDatetimeSuite.scala
+@@ -21,7 +21,7 @@ import java.nio.file.{Files, Paths, StandardCopyOption}
+ import java.sql.{Date, Timestamp}
+ 
+ import org.apache.spark.{SPARK_VERSION_SHORT, SparkConf, SparkException, SparkUpgradeException}
+-import org.apache.spark.sql.{QueryTest, Row, SPARK_LEGACY_DATETIME_METADATA_KEY, SPARK_LEGACY_INT96_METADATA_KEY, SPARK_TIMEZONE_METADATA_KEY}
++import org.apache.spark.sql.{IgnoreCometSuite, QueryTest, Row, SPARK_LEGACY_DATETIME_METADATA_KEY, SPARK_LEGACY_INT96_METADATA_KEY, SPARK_TIMEZONE_METADATA_KEY}
+ import org.apache.spark.sql.catalyst.util.DateTimeTestUtils
+ import org.apache.spark.sql.internal.{LegacyBehaviorPolicy, SQLConf}
+ import org.apache.spark.sql.internal.LegacyBehaviorPolicy.{CORRECTED, EXCEPTION, LEGACY}
+@@ -30,9 +30,11 @@ import org.apache.spark.sql.internal.SQLConf.ParquetOutputTimestampType.{INT96,
+ import org.apache.spark.sql.test.SharedSparkSession
+ import org.apache.spark.tags.SlowSQLTest
+ 
++// Comet is disabled for this suite because it doesn't support datetime rebase mode
+ abstract class ParquetRebaseDatetimeSuite
+   extends QueryTest
+   with ParquetTest
++  with IgnoreCometSuite
+   with SharedSparkSession {
+ 
+   import testImplicits._
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowIndexSuite.scala
+index 95378d94674..0c915fdc634 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowIndexSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowIndexSuite.scala
+@@ -27,6 +27,7 @@ import org.apache.parquet.hadoop.ParquetWriter.DEFAULT_BLOCK_SIZE
+ 
+ import org.apache.spark.SparkException
+ import org.apache.spark.sql.QueryTest
++import org.apache.spark.sql.comet.{CometBatchScanExec, CometScanExec}
+ import org.apache.spark.sql.execution.FileSourceScanExec
+ import org.apache.spark.sql.execution.datasources.FileFormat
+ import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
+@@ -245,6 +246,12 @@ class ParquetRowIndexSuite extends QueryTest with SharedSparkSession {
+             case f: FileSourceScanExec =>
+               numPartitions += f.inputRDD.partitions.length
+               numOutputRows += f.metrics("numOutputRows").value
++            case b: CometScanExec =>
++              numPartitions += b.inputRDD.partitions.length
++              numOutputRows += b.metrics("numOutputRows").value
++            case b: CometBatchScanExec =>
++              numPartitions += b.inputRDD.partitions.length
++              numOutputRows += b.metrics("numOutputRows").value
+             case _ =>
+           }
+           assert(numPartitions > 0)
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
+index 5c0b7def039..151184bc98c 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
+@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.datasources.parquet
+ import org.apache.spark.SparkConf
+ import org.apache.spark.sql.DataFrame
+ import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
++import org.apache.spark.sql.comet.CometBatchScanExec
+ import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+ import org.apache.spark.sql.execution.datasources.SchemaPruningSuite
+ import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
+@@ -56,6 +57,7 @@ class ParquetV2SchemaPruningSuite extends ParquetSchemaPruningSuite {
+     val fileSourceScanSchemata =
+       collect(df.queryExecution.executedPlan) {
+         case scan: BatchScanExec => scan.scan.asInstanceOf[ParquetScan].readDataSchema
++        case scan: CometBatchScanExec => scan.scan.asInstanceOf[ParquetScan].readDataSchema
+       }
+     assert(fileSourceScanSchemata.size === expectedSchemaCatalogStrings.size,
+       s"Found ${fileSourceScanSchemata.size} file sources in dataframe, " +
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
+index 25f6af1cc33..37b40cb5524 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
+@@ -27,7 +27,7 @@ import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
+ import org.apache.parquet.schema.Type._
+ 
+ import org.apache.spark.SparkException
+-import org.apache.spark.sql.{AnalysisException, Row}
++import org.apache.spark.sql.{AnalysisException, IgnoreComet, Row}
+ import org.apache.spark.sql.catalyst.expressions.Cast.toSQLType
+ import org.apache.spark.sql.execution.datasources.SchemaColumnConvertNotSupportedException
+ import org.apache.spark.sql.functions.desc
+@@ -1037,7 +1037,8 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
+     e
+   }
+ 
+-  test("schema mismatch failure error message for parquet reader") {
++  test("schema mismatch failure error message for parquet reader",
++      IgnoreComet("Comet doesn't work with vectorizedReaderEnabled = false")) {
+     withTempPath { dir =>
+       val e = testSchemaMismatch(dir.getCanonicalPath, vectorizedReaderEnabled = false)
+       val expectedMessage = "Encountered error while reading file"
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTypeWideningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTypeWideningSuite.scala
+index 4bd35e0789b..6bfedb65078 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTypeWideningSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTypeWideningSuite.scala
+@@ -24,7 +24,7 @@ import org.apache.parquet.format.converter.ParquetMetadataConverter
+ import org.apache.parquet.hadoop.{ParquetFileReader, ParquetOutputFormat}
+ 
+ import org.apache.spark.SparkException
+-import org.apache.spark.sql.{DataFrame, QueryTest, Row}
++import org.apache.spark.sql.{DataFrame, IgnoreCometSuite, QueryTest, Row}
+ import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+ import org.apache.spark.sql.execution.datasources.SchemaColumnConvertNotSupportedException
+ import org.apache.spark.sql.functions.col
+@@ -38,7 +38,8 @@ class ParquetTypeWideningSuite
+     extends QueryTest
+     with ParquetTest
+     with SharedSparkSession
+-    with AdaptiveSparkPlanHelper {
++    with AdaptiveSparkPlanHelper
++    with IgnoreCometSuite { // TODO: https://github.com/apache/datafusion-comet/issues/551
+ 
+   import testImplicits._
+ 
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
+index b8f3ea3c6f3..bbd44221288 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
+@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.debug
+ import java.io.ByteArrayOutputStream
+ 
+ import org.apache.spark.rdd.RDD
++import org.apache.spark.sql.IgnoreComet
+ import org.apache.spark.sql.catalyst.InternalRow
+ import org.apache.spark.sql.catalyst.expressions.Attribute
+ import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
+@@ -125,7 +126,8 @@ class DebuggingSuite extends DebuggingSuiteBase with DisableAdaptiveExecutionSui
+          | id LongType: {}""".stripMargin))
+   }
+ 
+-  test("SPARK-28537: DebugExec cannot debug columnar related queries") {
++  test("SPARK-28537: DebugExec cannot debug columnar related queries",
++      IgnoreComet("Comet does not use FileScan")) {
+     withTempPath { workDir =>
+       val workDirPath = workDir.getAbsolutePath
+       val input = spark.range(5).toDF("id")
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+index 45c775e6c46..41240792038 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+@@ -46,8 +46,10 @@ import org.apache.spark.sql.util.QueryExecutionListener
+ import org.apache.spark.util.{AccumulatorContext, JsonProtocol}
+ 
+ // Disable AQE because metric info is different with AQE on/off
++// This test suite runs tests against the metrics of physical operators.
++// Disabling it for Comet because the metrics are different with Comet enabled.
+ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
+-  with DisableAdaptiveExecutionSuite {
++  with DisableAdaptiveExecutionSuite with IgnoreCometSuite {
+   import testImplicits._
+ 
+   /**
+@@ -765,7 +767,8 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
+     }
+   }
+ 
+-  test("SPARK-26327: FileSourceScanExec metrics") {
++  test("SPARK-26327: FileSourceScanExec metrics",
++      IgnoreComet("Spark uses row-based Parquet reader while Comet is vectorized")) {
+     withTable("testDataForScan") {
+       spark.range(10).selectExpr("id", "id % 3 as p")
+         .write.partitionBy("p").saveAsTable("testDataForScan")
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala
+index 0ab8691801d..d9125f658ad 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala
+@@ -18,6 +18,7 @@
+ package org.apache.spark.sql.execution.python
+ 
+ import org.apache.spark.sql.catalyst.plans.logical.{ArrowEvalPython, BatchEvalPython, Limit, LocalLimit}
++import org.apache.spark.sql.comet._
+ import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan, SparkPlanTest}
+ import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
+ import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan
+@@ -108,6 +109,7 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession {
+ 
+           val scanNodes = query.queryExecution.executedPlan.collect {
+             case scan: FileSourceScanExec => scan
++            case scan: CometScanExec => scan
+           }
+           assert(scanNodes.length == 1)
+           assert(scanNodes.head.output.map(_.name) == Seq("a"))
+@@ -120,11 +122,16 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession {
+ 
+           val scanNodes = query.queryExecution.executedPlan.collect {
+             case scan: FileSourceScanExec => scan
++            case scan: CometScanExec => scan
+           }
+           assert(scanNodes.length == 1)
+           // $"a" is not null and $"a" > 1
+-          assert(scanNodes.head.dataFilters.length == 2)
+-          assert(scanNodes.head.dataFilters.flatMap(_.references.map(_.name)).distinct == Seq("a"))
++          val dataFilters = scanNodes.head match {
++            case scan: FileSourceScanExec => scan.dataFilters
++            case scan: CometScanExec => scan.dataFilters
++          }
++          assert(dataFilters.length == 2)
++          assert(dataFilters.flatMap(_.references.map(_.name)).distinct == Seq("a"))
+         }
+       }
+     }
+@@ -145,6 +152,7 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession {
+ 
+           val scanNodes = query.queryExecution.executedPlan.collect {
+             case scan: BatchScanExec => scan
++            case scan: CometBatchScanExec => scan
+           }
+           assert(scanNodes.length == 1)
+           assert(scanNodes.head.output.map(_.name) == Seq("a"))
+@@ -157,6 +165,7 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession {
+ 
+           val scanNodes = query.queryExecution.executedPlan.collect {
+             case scan: BatchScanExec => scan
++            case scan: CometBatchScanExec => scan
+           }
+           assert(scanNodes.length == 1)
+           // $"a" is not null and $"a" > 1
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/AsyncProgressTrackingMicroBatchExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/AsyncProgressTrackingMicroBatchExecutionSuite.scala
+index 6ff07449c0c..9f95cff99e5 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/AsyncProgressTrackingMicroBatchExecutionSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/AsyncProgressTrackingMicroBatchExecutionSuite.scala
+@@ -37,8 +37,10 @@ import org.apache.spark.sql.streaming.{StreamingQuery, StreamingQueryException,
+ import org.apache.spark.sql.streaming.util.StreamManualClock
+ import org.apache.spark.util.Utils
+ 
++// For some reason this suite is flaky w/ or w/o Comet when running in Github workflow.
++// Since it isn't related to Comet, we disable it for now.
+ class AsyncProgressTrackingMicroBatchExecutionSuite
+-  extends StreamTest with BeforeAndAfter with Matchers {
++  extends StreamTest with BeforeAndAfter with Matchers with IgnoreCometSuite {
+ 
+   import testImplicits._
+ 
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
+index 3573bafe482..a21767840a2 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
+@@ -25,10 +25,11 @@ import org.apache.spark.sql.catalyst.expressions
+ import org.apache.spark.sql.catalyst.expressions._
+ import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
+ import org.apache.spark.sql.catalyst.types.DataTypeUtils
+-import org.apache.spark.sql.execution.{FileSourceScanExec, SortExec, SparkPlan}
++import org.apache.spark.sql.comet._
++import org.apache.spark.sql.execution.{ColumnarToRowExec, FileSourceScanExec, SortExec, SparkPlan}
+ import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, AdaptiveSparkPlanHelper}
+ import org.apache.spark.sql.execution.datasources.BucketingUtils
+-import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
++import org.apache.spark.sql.execution.exchange.{ShuffleExchangeExec, ShuffleExchangeLike}
+ import org.apache.spark.sql.execution.joins.SortMergeJoinExec
+ import org.apache.spark.sql.functions._
+ import org.apache.spark.sql.internal.SQLConf
+@@ -102,12 +103,20 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils with Adapti
+     }
+   }
+ 
+-  private def getFileScan(plan: SparkPlan): FileSourceScanExec = {
+-    val fileScan = collect(plan) { case f: FileSourceScanExec => f }
++  private def getFileScan(plan: SparkPlan): SparkPlan = {
++    val fileScan = collect(plan) {
++      case f: FileSourceScanExec => f
++      case f: CometScanExec => f
++    }
+     assert(fileScan.nonEmpty, plan)
+     fileScan.head
+   }
+ 
++  private def getBucketScan(plan: SparkPlan): Boolean = getFileScan(plan) match {
++    case fs: FileSourceScanExec => fs.bucketedScan
++    case bs: CometScanExec => bs.bucketedScan
++  }
++
+   // To verify if the bucket pruning works, this function checks two conditions:
+   //   1) Check if the pruned buckets (before filtering) are empty.
+   //   2) Verify the final result is the same as the expected one
+@@ -156,7 +165,8 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils with Adapti
+           val planWithoutBucketedScan = bucketedDataFrame.filter(filterCondition)
+             .queryExecution.executedPlan
+           val fileScan = getFileScan(planWithoutBucketedScan)
+-          assert(!fileScan.bucketedScan, s"except no bucketed scan but found\n$fileScan")
++          val bucketedScan = getBucketScan(planWithoutBucketedScan)
++          assert(!bucketedScan, s"except no bucketed scan but found\n$fileScan")
+ 
+           val bucketColumnType = bucketedDataFrame.schema.apply(bucketColumnIndex).dataType
+           val rowsWithInvalidBuckets = fileScan.execute().filter(row => {
+@@ -452,28 +462,44 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils with Adapti
+         val joinOperator = if (joined.sparkSession.sessionState.conf.adaptiveExecutionEnabled) {
+           val executedPlan =
+             joined.queryExecution.executedPlan.asInstanceOf[AdaptiveSparkPlanExec].executedPlan
+-          assert(executedPlan.isInstanceOf[SortMergeJoinExec])
+-          executedPlan.asInstanceOf[SortMergeJoinExec]
++          executedPlan match {
++            case s: SortMergeJoinExec => s
++            case b: CometSortMergeJoinExec =>
++              b.originalPlan match {
++                case s: SortMergeJoinExec => s
++                case o => fail(s"expected SortMergeJoinExec, but found\n$o")
++              }
++            case o => fail(s"expected SortMergeJoinExec, but found\n$o")
++          }
+         } else {
+           val executedPlan = joined.queryExecution.executedPlan
+-          assert(executedPlan.isInstanceOf[SortMergeJoinExec])
+-          executedPlan.asInstanceOf[SortMergeJoinExec]
++          executedPlan match {
++            case s: SortMergeJoinExec => s
++            case ColumnarToRowExec(child) =>
++              child.asInstanceOf[CometSortMergeJoinExec].originalPlan match {
++                case s: SortMergeJoinExec => s
++                case o => fail(s"expected SortMergeJoinExec, but found\n$o")
++              }
++            case o => fail(s"expected SortMergeJoinExec, but found\n$o")
++          }
+         }
+ 
+         // check existence of shuffle
+         assert(
+-          joinOperator.left.exists(_.isInstanceOf[ShuffleExchangeExec]) == shuffleLeft,
++          joinOperator.left.exists(op => op.isInstanceOf[ShuffleExchangeLike]) == shuffleLeft,
+           s"expected shuffle in plan to be $shuffleLeft but found\n${joinOperator.left}")
+         assert(
+-          joinOperator.right.exists(_.isInstanceOf[ShuffleExchangeExec]) == shuffleRight,
++          joinOperator.right.exists(op => op.isInstanceOf[ShuffleExchangeLike]) == shuffleRight,
+           s"expected shuffle in plan to be $shuffleRight but found\n${joinOperator.right}")
+ 
+         // check existence of sort
+         assert(
+-          joinOperator.left.exists(_.isInstanceOf[SortExec]) == sortLeft,
++          joinOperator.left.exists(op => op.isInstanceOf[SortExec] || op.isInstanceOf[CometExec] &&
++            op.asInstanceOf[CometExec].originalPlan.isInstanceOf[SortExec]) == sortLeft,
+           s"expected sort in the left child to be $sortLeft but found\n${joinOperator.left}")
+         assert(
+-          joinOperator.right.exists(_.isInstanceOf[SortExec]) == sortRight,
++          joinOperator.right.exists(op => op.isInstanceOf[SortExec] || op.isInstanceOf[CometExec] &&
++            op.asInstanceOf[CometExec].originalPlan.isInstanceOf[SortExec]) == sortRight,
+           s"expected sort in the right child to be $sortRight but found\n${joinOperator.right}")
+ 
+         // check the output partitioning
+@@ -836,11 +862,11 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils with Adapti
+       df1.write.format("parquet").bucketBy(8, "i").saveAsTable("bucketed_table")
+ 
+       val scanDF = spark.table("bucketed_table").select("j")
+-      assert(!getFileScan(scanDF.queryExecution.executedPlan).bucketedScan)
++      assert(!getBucketScan(scanDF.queryExecution.executedPlan))
+       checkAnswer(scanDF, df1.select("j"))
+ 
+       val aggDF = spark.table("bucketed_table").groupBy("j").agg(max("k"))
+-      assert(!getFileScan(aggDF.queryExecution.executedPlan).bucketedScan)
++      assert(!getBucketScan(aggDF.queryExecution.executedPlan))
+       checkAnswer(aggDF, df1.groupBy("j").agg(max("k")))
+     }
+   }
+@@ -1029,15 +1055,21 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils with Adapti
+           Seq(true, false).foreach { aqeEnabled =>
+             withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> aqeEnabled.toString) {
+               val plan = sql(query).queryExecution.executedPlan
+-              val shuffles = collect(plan) { case s: ShuffleExchangeExec => s }
++              val shuffles = collect(plan) { case s: ShuffleExchangeLike => s }
+               assert(shuffles.length == expectedNumShuffles)
+ 
+               val scans = collect(plan) {
+                 case f: FileSourceScanExec if f.optionalNumCoalescedBuckets.isDefined => f
++                case b: CometScanExec if b.optionalNumCoalescedBuckets.isDefined => b
+               }
+               if (expectedCoalescedNumBuckets.isDefined) {
+                 assert(scans.length == 1)
+-                assert(scans.head.optionalNumCoalescedBuckets == expectedCoalescedNumBuckets)
++                scans.head match {
++                  case f: FileSourceScanExec =>
++                    assert(f.optionalNumCoalescedBuckets == expectedCoalescedNumBuckets)
++                  case b: CometScanExec =>
++                    assert(b.optionalNumCoalescedBuckets == expectedCoalescedNumBuckets)
++                }
+               } else {
+                 assert(scans.isEmpty)
+               }
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
+index 6f897a9c0b7..b0723634f68 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
+@@ -20,6 +20,7 @@ package org.apache.spark.sql.sources
+ import java.io.File
+ 
+ import org.apache.spark.SparkException
++import org.apache.spark.sql.IgnoreCometSuite
+ import org.apache.spark.sql.catalyst.TableIdentifier
+ import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTableType}
+ import org.apache.spark.sql.catalyst.parser.ParseException
+@@ -27,7 +28,10 @@ import org.apache.spark.sql.internal.SQLConf.BUCKETING_MAX_BUCKETS
+ import org.apache.spark.sql.test.SharedSparkSession
+ import org.apache.spark.util.Utils
+ 
+-class CreateTableAsSelectSuite extends DataSourceTest with SharedSparkSession {
++// For some reason this suite is flaky w/ or w/o Comet when running in Github workflow.
++// Since it isn't related to Comet, we disable it for now.
++class CreateTableAsSelectSuite extends DataSourceTest with SharedSparkSession
++    with IgnoreCometSuite {
+   import testImplicits._
+ 
+   protected override lazy val sql = spark.sql _
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala
+index c5c56f081d8..197cd241f48 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala
+@@ -18,6 +18,7 @@
+ package org.apache.spark.sql.sources
+ 
+ import org.apache.spark.sql.QueryTest
++import org.apache.spark.sql.comet.CometScanExec
+ import org.apache.spark.sql.execution.FileSourceScanExec
+ import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, DisableAdaptiveExecutionSuite, EnableAdaptiveExecutionSuite}
+ import org.apache.spark.sql.internal.SQLConf
+@@ -68,7 +69,10 @@ abstract class DisableUnnecessaryBucketedScanSuite
+ 
+     def checkNumBucketedScan(query: String, expectedNumBucketedScan: Int): Unit = {
+       val plan = sql(query).queryExecution.executedPlan
+-      val bucketedScan = collect(plan) { case s: FileSourceScanExec if s.bucketedScan => s }
++      val bucketedScan = collect(plan) {
++        case s: FileSourceScanExec if s.bucketedScan => s
++        case s: CometScanExec if s.bucketedScan => s
++      }
+       assert(bucketedScan.length == expectedNumBucketedScan)
+     }
+ 
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
+index 04193d5189a..d83d03f8e0d 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
+@@ -34,6 +34,7 @@ import org.apache.spark.paths.SparkPath
+ import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd}
+ import org.apache.spark.sql.{AnalysisException, DataFrame}
+ import org.apache.spark.sql.catalyst.util.stringToFile
++import org.apache.spark.sql.comet.CometBatchScanExec
+ import org.apache.spark.sql.execution.DataSourceScanExec
+ import org.apache.spark.sql.execution.datasources._
+ import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, DataSourceV2Relation, FileScan, FileTable}
+@@ -749,6 +750,8 @@ class FileStreamSinkV2Suite extends FileStreamSinkSuite {
+       val fileScan = df.queryExecution.executedPlan.collect {
+         case batch: BatchScanExec if batch.scan.isInstanceOf[FileScan] =>
+           batch.scan.asInstanceOf[FileScan]
++        case batch: CometBatchScanExec if batch.scan.isInstanceOf[FileScan] =>
++          batch.scan.asInstanceOf[FileScan]
+       }.headOption.getOrElse {
+         fail(s"No FileScan in query\n${df.queryExecution}")
+       }
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateDistributionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateDistributionSuite.scala
+index b597a244710..b2e8be41065 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateDistributionSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateDistributionSuite.scala
+@@ -21,6 +21,7 @@ import java.io.File
+ 
+ import org.apache.commons.io.FileUtils
+ 
++import org.apache.spark.sql.IgnoreComet
+ import org.apache.spark.sql.catalyst.streaming.InternalOutputModes.Update
+ import org.apache.spark.sql.execution.streaming.{FlatMapGroupsWithStateExec, MemoryStream}
+ import org.apache.spark.sql.internal.SQLConf
+@@ -91,7 +92,7 @@ class FlatMapGroupsWithStateDistributionSuite extends StreamTest
+   }
+ 
+   test("SPARK-38204: flatMapGroupsWithState should require StatefulOpClusteredDistribution " +
+-    "from children - without initial state") {
++    "from children - without initial state", IgnoreComet("TODO: fix Comet for this test")) {
+     // function will return -1 on timeout and returns count of the state otherwise
+     val stateFunc =
+       (key: (String, String), values: Iterator[(String, String, Long)],
+@@ -243,7 +244,8 @@ class FlatMapGroupsWithStateDistributionSuite extends StreamTest
+   }
+ 
+   test("SPARK-38204: flatMapGroupsWithState should require ClusteredDistribution " +
+-    "from children if the query starts from checkpoint in 3.2.x - without initial state") {
++    "from children if the query starts from checkpoint in 3.2.x - without initial state",
++    IgnoreComet("TODO: fix Comet for this test")) {
+     // function will return -1 on timeout and returns count of the state otherwise
+     val stateFunc =
+       (key: (String, String), values: Iterator[(String, String, Long)],
+@@ -335,7 +337,8 @@ class FlatMapGroupsWithStateDistributionSuite extends StreamTest
+   }
+ 
+   test("SPARK-38204: flatMapGroupsWithState should require ClusteredDistribution " +
+-    "from children if the query starts from checkpoint in prior to 3.2") {
++    "from children if the query starts from checkpoint in prior to 3.2",
++    IgnoreComet("TODO: fix Comet for this test")) {
+     // function will return -1 on timeout and returns count of the state otherwise
+     val stateFunc =
+       (key: (String, String), values: Iterator[(String, String, Long)],
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
+index b35e996106f..326f72235e0 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
+@@ -25,7 +25,7 @@ import org.scalatest.exceptions.TestFailedException
+ 
+ import org.apache.spark.SparkException
+ import org.apache.spark.api.java.function.FlatMapGroupsWithStateFunction
+-import org.apache.spark.sql.{DataFrame, Encoder}
++import org.apache.spark.sql.{DataFrame, Encoder, IgnoreCometSuite}
+ import org.apache.spark.sql.catalyst.InternalRow
+ import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection, UnsafeRow}
+ import org.apache.spark.sql.catalyst.plans.logical.FlatMapGroupsWithState
+@@ -46,8 +46,9 @@ case class RunningCount(count: Long)
+ 
+ case class Result(key: Long, count: Int)
+ 
++// TODO: fix Comet to enable this suite
+ @SlowSQLTest
+-class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest {
++class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest with IgnoreCometSuite {
+ 
+   import testImplicits._
+ 
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateWithInitialStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateWithInitialStateSuite.scala
+index 2a2a83d35e1..e3b7b290b3e 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateWithInitialStateSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateWithInitialStateSuite.scala
+@@ -18,7 +18,7 @@
+ package org.apache.spark.sql.streaming
+ 
+ import org.apache.spark.SparkException
+-import org.apache.spark.sql.{AnalysisException, Dataset, KeyValueGroupedDataset}
++import org.apache.spark.sql.{AnalysisException, Dataset, IgnoreComet, KeyValueGroupedDataset}
+ import org.apache.spark.sql.catalyst.streaming.InternalOutputModes.Update
+ import org.apache.spark.sql.execution.streaming.MemoryStream
+ import org.apache.spark.sql.execution.streaming.state.FlatMapGroupsWithStateExecHelper
+@@ -253,7 +253,8 @@ class FlatMapGroupsWithStateWithInitialStateSuite extends StateStoreMetricsTest
+     assert(e.message.contains(expectedError))
+   }
+ 
+-  test("flatMapGroupsWithState - initial state - initial state has flatMapGroupsWithState") {
++  test("flatMapGroupsWithState - initial state - initial state has flatMapGroupsWithState",
++    IgnoreComet("TODO: fix Comet for this test")) {
+     val initialStateDS = Seq(("keyInStateAndData", new RunningCount(1))).toDS()
+     val initialState: KeyValueGroupedDataset[String, RunningCount] =
+       initialStateDS.groupByKey(_._1).mapValues(_._2)
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+index 1fce992126b..6d3ea74e0fc 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+@@ -38,6 +38,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{Range, RepartitionByExpressi
+ import org.apache.spark.sql.catalyst.streaming.{InternalOutputModes, StreamingRelationV2}
+ import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
+ import org.apache.spark.sql.catalyst.util.DateTimeUtils
++import org.apache.spark.sql.comet.CometLocalLimitExec
+ import org.apache.spark.sql.execution.{LocalLimitExec, SimpleMode, SparkPlan}
+ import org.apache.spark.sql.execution.command.ExplainCommand
+ import org.apache.spark.sql.execution.streaming._
+@@ -1116,11 +1117,12 @@ class StreamSuite extends StreamTest {
+       val localLimits = execPlan.collect {
+         case l: LocalLimitExec => l
+         case l: StreamingLocalLimitExec => l
++        case l: CometLocalLimitExec => l
+       }
+ 
+       require(
+         localLimits.size == 1,
+-        s"Cant verify local limit optimization with this plan:\n$execPlan")
++        s"Cant verify local limit optimization ${localLimits.size} with this plan:\n$execPlan")
+ 
+       if (expectStreamingLimit) {
+         assert(
+@@ -1128,7 +1130,8 @@ class StreamSuite extends StreamTest {
+           s"Local limit was not StreamingLocalLimitExec:\n$execPlan")
+       } else {
+         assert(
+-          localLimits.head.isInstanceOf[LocalLimitExec],
++          localLimits.head.isInstanceOf[LocalLimitExec] ||
++            localLimits.head.isInstanceOf[CometLocalLimitExec],
+           s"Local limit was not LocalLimitExec:\n$execPlan")
+       }
+     }
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationDistributionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationDistributionSuite.scala
+index b4c4ec7acbf..20579284856 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationDistributionSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationDistributionSuite.scala
+@@ -23,6 +23,7 @@ import org.apache.commons.io.FileUtils
+ import org.scalatest.Assertions
+ 
+ import org.apache.spark.sql.catalyst.plans.physical.UnspecifiedDistribution
++import org.apache.spark.sql.comet.CometHashAggregateExec
+ import org.apache.spark.sql.execution.aggregate.BaseAggregateExec
+ import org.apache.spark.sql.execution.streaming.{MemoryStream, StateStoreRestoreExec, StateStoreSaveExec}
+ import org.apache.spark.sql.functions.count
+@@ -67,6 +68,7 @@ class StreamingAggregationDistributionSuite extends StreamTest
+         // verify aggregations in between, except partial aggregation
+         val allAggregateExecs = query.lastExecution.executedPlan.collect {
+           case a: BaseAggregateExec => a
++          case c: CometHashAggregateExec => c.originalPlan
+         }
+ 
+         val aggregateExecsWithoutPartialAgg = allAggregateExecs.filter {
+@@ -201,6 +203,7 @@ class StreamingAggregationDistributionSuite extends StreamTest
+         // verify aggregations in between, except partial aggregation
+         val allAggregateExecs = executedPlan.collect {
+           case a: BaseAggregateExec => a
++          case c: CometHashAggregateExec => c.originalPlan
+         }
+ 
+         val aggregateExecsWithoutPartialAgg = allAggregateExecs.filter {
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
+index e05cb4d3c35..dc65a4fe18e 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
+@@ -31,7 +31,7 @@ import org.apache.spark.scheduler.ExecutorCacheTaskLocation
+ import org.apache.spark.sql.{DataFrame, Row, SparkSession}
+ import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression}
+ import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
+-import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
++import org.apache.spark.sql.execution.exchange.ShuffleExchangeLike
+ import org.apache.spark.sql.execution.streaming.{MemoryStream, StatefulOperatorStateInfo, StreamingSymmetricHashJoinExec, StreamingSymmetricHashJoinHelper}
+ import org.apache.spark.sql.execution.streaming.state.{RocksDBStateStoreProvider, StateStore, StateStoreProviderId}
+ import org.apache.spark.sql.functions._
+@@ -620,14 +620,28 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
+ 
+         val numPartitions = spark.sessionState.conf.getConf(SQLConf.SHUFFLE_PARTITIONS)
+ 
+-        assert(query.lastExecution.executedPlan.collect {
+-          case j @ StreamingSymmetricHashJoinExec(_, _, _, _, _, _, _, _, _,
+-            ShuffleExchangeExec(opA: HashPartitioning, _, _, _),
+-            ShuffleExchangeExec(opB: HashPartitioning, _, _, _))
+-              if partitionExpressionsColumns(opA.expressions) === Seq("a", "b")
+-                && partitionExpressionsColumns(opB.expressions) === Seq("a", "b")
+-                && opA.numPartitions == numPartitions && opB.numPartitions == numPartitions => j
+-        }.size == 1)
++        val join = query.lastExecution.executedPlan.collect {
++          case j: StreamingSymmetricHashJoinExec => j
++        }.head
++        val opA = join.left.collect {
++          case s: ShuffleExchangeLike
++            if s.outputPartitioning.isInstanceOf[HashPartitioning] &&
++              partitionExpressionsColumns(
++                s.outputPartitioning
++                  .asInstanceOf[HashPartitioning].expressions) === Seq("a", "b") =>
++            s.outputPartitioning
++              .asInstanceOf[HashPartitioning]
++        }.head
++        val opB = join.right.collect {
++          case s: ShuffleExchangeLike
++            if s.outputPartitioning.isInstanceOf[HashPartitioning] &&
++              partitionExpressionsColumns(
++                s.outputPartitioning
++                  .asInstanceOf[HashPartitioning].expressions) === Seq("a", "b") =>
++            s.outputPartitioning
++              .asInstanceOf[HashPartitioning]
++        }.head
++        assert(opA.numPartitions == numPartitions && opB.numPartitions == numPartitions)
+       })
+   }
+ 
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
+index af07aceaed1..ed0b5e6d9be 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
+@@ -22,7 +22,7 @@ import java.util
+ 
+ import org.scalatest.BeforeAndAfter
+ 
+-import org.apache.spark.sql.{AnalysisException, Row, SaveMode}
++import org.apache.spark.sql.{AnalysisException, IgnoreComet, Row, SaveMode}
+ import org.apache.spark.sql.catalyst.TableIdentifier
+ import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
+ import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
+@@ -334,7 +334,8 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
+     }
+   }
+ 
+-  test("explain with table on DSv1 data source") {
++  test("explain with table on DSv1 data source",
++      IgnoreComet("Comet explain output is different")) {
+     val tblSourceName = "tbl_src"
+     val tblTargetName = "tbl_target"
+     val tblSourceQualified = s"default.$tblSourceName"
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+index 5fbf379644f..32711763ec1 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+@@ -41,6 +41,7 @@ import org.apache.spark.sql.catalyst.plans.PlanTest
+ import org.apache.spark.sql.catalyst.plans.PlanTestBase
+ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+ import org.apache.spark.sql.catalyst.util._
++import org.apache.spark.sql.comet._
+ import org.apache.spark.sql.execution.FilterExec
+ import org.apache.spark.sql.execution.adaptive.DisableAdaptiveExecution
+ import org.apache.spark.sql.execution.datasources.DataSourceUtils
+@@ -127,7 +128,11 @@ private[sql] trait SQLTestUtils extends SparkFunSuite with SQLTestUtilsBase with
+         }
+       }
+     } else {
+-      super.test(testName, testTags: _*)(testFun)
++      if (isCometEnabled && testTags.exists(_.isInstanceOf[IgnoreComet])) {
++        ignore(testName + " (disabled when Comet is on)", testTags: _*)(testFun)
++      } else {
++        super.test(testName, testTags: _*)(testFun)
++      }
+     }
+   }
+ 
+@@ -243,6 +248,32 @@ private[sql] trait SQLTestUtilsBase
+     protected override def _sqlContext: SQLContext = self.spark.sqlContext
+   }
+ 
++  /**
++   * Whether Comet extension is enabled
++   */
++  protected def isCometEnabled: Boolean = {
++    val v = System.getenv("ENABLE_COMET")
++    v != null && v.toBoolean
++  }
++
++  /**
++   * Whether to enable ansi mode This is only effective when
++   * [[isCometEnabled]] returns true.
++   */
++  protected def enableCometAnsiMode: Boolean = {
++    val v = System.getenv("ENABLE_COMET_ANSI_MODE")
++    v != null && v.toBoolean
++  }
++
++  /**
++   * Whether Spark should only apply Comet scan optimization. This is only effective when
++   * [[isCometEnabled]] returns true.
++   */
++  protected def isCometScanOnly: Boolean = {
++    val v = System.getenv("ENABLE_COMET_SCAN_ONLY")
++    v != null && v.toBoolean
++  }
++
+   protected override def withSQLConf[T](pairs: (String, String)*)(f: => T): T = {
+     SparkSession.setActiveSession(spark)
+     super.withSQLConf(pairs: _*)(f)
+@@ -434,6 +465,8 @@ private[sql] trait SQLTestUtilsBase
+     val schema = df.schema
+     val withoutFilters = df.queryExecution.executedPlan.transform {
+       case FilterExec(_, child) => child
++      case CometFilterExec(_, _, _, child, _) => child
++      case CometProjectExec(_, _, _, _, CometFilterExec(_, _, _, child, _), _) => child
+     }
+ 
+     spark.internalCreateDataFrame(withoutFilters.execute(), schema)
+diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
+index ed2e309fa07..e071fc44960 100644
+--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
++++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
+@@ -74,6 +74,28 @@ trait SharedSparkSessionBase
+       // this rule may potentially block testing of other optimization rules such as
+       // ConstantPropagation etc.
+       .set(SQLConf.OPTIMIZER_EXCLUDED_RULES.key, ConvertToLocalRelation.ruleName)
++    // Enable Comet if `ENABLE_COMET` environment variable is set
++    if (isCometEnabled) {
++      conf
++        .set("spark.sql.extensions", "org.apache.comet.CometSparkSessionExtensions")
++        .set("spark.comet.enabled", "true")
++
++      if (!isCometScanOnly) {
++        conf
++          .set("spark.comet.exec.enabled", "true")
++          .set("spark.comet.exec.all.enabled", "true")
++          .set("spark.shuffle.manager",
++            "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager")
++          .set("spark.comet.exec.shuffle.enabled", "true")
++          .set("spark.comet.memoryOverhead", "10g")
++      }
++
++      if (enableCometAnsiMode) {
++        conf
++          .set("spark.sql.ansi.enabled", "true")
++          .set("spark.comet.ansi.enabled", "true")
++      }
++    }
+     conf.set(
+       StaticSQLConf.WAREHOUSE_PATH,
+       conf.get(StaticSQLConf.WAREHOUSE_PATH) + "/" + getClass.getCanonicalName)
+diff --git a/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceWithActualMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceWithActualMetricsSuite.scala
+index 1b7909534a0..45b90ef10a3 100644
+--- a/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceWithActualMetricsSuite.scala
++++ b/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceWithActualMetricsSuite.scala
+@@ -46,7 +46,7 @@ class SqlResourceWithActualMetricsSuite
+   import testImplicits._
+ 
+   // Exclude nodes which may not have the metrics
+-  val excludedNodes = List("WholeStageCodegen", "Project", "SerializeFromObject")
++  val excludedNodes = List("WholeStageCodegen", "Project", "SerializeFromObject", "RowToColumnar")
+ 
+   implicit val formats: DefaultFormats = new DefaultFormats {
+     override def dateFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss")
+diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/DynamicPartitionPruningHiveScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/DynamicPartitionPruningHiveScanSuite.scala
+index 52abd248f3a..7a199931a08 100644
+--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/DynamicPartitionPruningHiveScanSuite.scala
++++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/DynamicPartitionPruningHiveScanSuite.scala
+@@ -19,6 +19,7 @@ package org.apache.spark.sql.hive
+ 
+ import org.apache.spark.sql._
+ import org.apache.spark.sql.catalyst.expressions.{DynamicPruningExpression, Expression}
++import org.apache.spark.sql.comet._
+ import org.apache.spark.sql.execution._
+ import org.apache.spark.sql.execution.adaptive.{DisableAdaptiveExecutionSuite, EnableAdaptiveExecutionSuite}
+ import org.apache.spark.sql.hive.execution.HiveTableScanExec
+@@ -35,6 +36,9 @@ abstract class DynamicPartitionPruningHiveScanSuiteBase
+       case s: FileSourceScanExec => s.partitionFilters.collect {
+         case d: DynamicPruningExpression => d.child
+       }
++      case s: CometScanExec => s.partitionFilters.collect {
++        case d: DynamicPruningExpression => d.child
++      }
+       case h: HiveTableScanExec => h.partitionPruningPred.collect {
+         case d: DynamicPruningExpression => d.child
+       }
+diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
+index 4b000fff0eb..e867b6c0d9b 100644
+--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
++++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
+@@ -660,7 +660,8 @@ abstract class AggregationQuerySuite extends QueryTest with SQLTestUtils with Te
+         Row(3, 4, 4, 3, null) :: Nil)
+   }
+ 
+-  test("single distinct multiple columns set") {
++  test("single distinct multiple columns set",
++    IgnoreComet("TODO: fix Comet for this test")) {
+     checkAnswer(
+       spark.sql(
+         """
+diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
+index 3f8de93b330..fc06e81f41a 100644
+--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
++++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
+@@ -53,24 +53,53 @@ object TestHive
+     new SparkContext(
+       System.getProperty("spark.sql.test.master", "local[1]"),
+       "TestSQLContext",
+-      new SparkConf()
+-        .set("spark.sql.test", "")
+-        .set(SQLConf.CODEGEN_FALLBACK.key, "false")
+-        .set(SQLConf.CODEGEN_FACTORY_MODE.key, CodegenObjectFactoryMode.CODEGEN_ONLY.toString)
+-        .set(HiveUtils.HIVE_METASTORE_BARRIER_PREFIXES.key,
+-          "org.apache.spark.sql.hive.execution.PairSerDe")
+-        .set(WAREHOUSE_PATH.key, TestHiveContext.makeWarehouseDir().toURI.getPath)
+-        // SPARK-8910
+-        .set(UI_ENABLED, false)
+-        .set(config.UNSAFE_EXCEPTION_ON_MEMORY_LEAK, true)
+-        // Hive changed the default of hive.metastore.disallow.incompatible.col.type.changes
+-        // from false to true. For details, see the JIRA HIVE-12320 and HIVE-17764.
+-        .set("spark.hadoop.hive.metastore.disallow.incompatible.col.type.changes", "false")
+-        // Disable ConvertToLocalRelation for better test coverage. Test cases built on
+-        // LocalRelation will exercise the optimization rules better by disabling it as
+-        // this rule may potentially block testing of other optimization rules such as
+-        // ConstantPropagation etc.
+-        .set(SQLConf.OPTIMIZER_EXCLUDED_RULES.key, ConvertToLocalRelation.ruleName))) {
++      {
++        val conf = new SparkConf()
++          .set("spark.sql.test", "")
++          .set(SQLConf.CODEGEN_FALLBACK.key, "false")
++          .set(SQLConf.CODEGEN_FACTORY_MODE.key, CodegenObjectFactoryMode.CODEGEN_ONLY.toString)
++          .set(HiveUtils.HIVE_METASTORE_BARRIER_PREFIXES.key,
++            "org.apache.spark.sql.hive.execution.PairSerDe")
++          .set(WAREHOUSE_PATH.key, TestHiveContext.makeWarehouseDir().toURI.getPath)
++          // SPARK-8910
++          .set(UI_ENABLED, false)
++          .set(config.UNSAFE_EXCEPTION_ON_MEMORY_LEAK, true)
++          // Hive changed the default of hive.metastore.disallow.incompatible.col.type.changes
++          // from false to true. For details, see the JIRA HIVE-12320 and HIVE-17764.
++          .set("spark.hadoop.hive.metastore.disallow.incompatible.col.type.changes", "false")
++          // Disable ConvertToLocalRelation for better test coverage. Test cases built on
++          // LocalRelation will exercise the optimization rules better by disabling it as
++          // this rule may potentially block testing of other optimization rules such as
++          // ConstantPropagation etc.
++          .set(SQLConf.OPTIMIZER_EXCLUDED_RULES.key, ConvertToLocalRelation.ruleName)
++
++        val v = System.getenv("ENABLE_COMET")
++        if (v != null && v.toBoolean) {
++          conf
++            .set("spark.sql.extensions", "org.apache.comet.CometSparkSessionExtensions")
++            .set("spark.comet.enabled", "true")
++
++          val v = System.getenv("ENABLE_COMET_SCAN_ONLY")
++          if (v == null || !v.toBoolean) {
++            conf
++              .set("spark.comet.exec.enabled", "true")
++              .set("spark.comet.exec.all.enabled", "true")
++              .set("spark.shuffle.manager",
++                "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager")
++              .set("spark.comet.exec.shuffle.enabled", "true")
++          }
++
++          val a = System.getenv("ENABLE_COMET_ANSI_MODE")
++          if (a != null && a.toBoolean) {
++            conf
++              .set("spark.sql.ansi.enabled", "true")
++              .set("spark.comet.ansi.enabled", "true")
++          }
++        }
++
++        conf
++      }
++    )) {
+   override def conf: SQLConf = sparkSession.sessionState.conf
+ }
+ 
diff --git a/pom.xml b/pom.xml
index 4407a9d12..16ca60a3b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -57,6 +57,8 @@ under the License.
     <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
     <spotless.version>2.43.0</spotless.version>
     <jacoco.version>0.8.11</jacoco.version>
+    <semanticdb.version>4.8.8</semanticdb.version>
+    <slf4j.version>2.0.6</slf4j.version>
     <jni.dir>${project.basedir}/../core/target/debug</jni.dir>
     <platform>darwin</platform>
     <arch>x86_64</arch>
@@ -236,6 +238,12 @@ under the License.
         <artifactId>protobuf-java</artifactId>
         <version>${protobuf.version}</version>
       </dependency>
+      <dependency>
+        <groupId>org.slf4j</groupId>
+        <artifactId>slf4j-api</artifactId>
+        <version>${slf4j.version}</version>
+        <scope>${spark.maven.scope}</scope>
+      </dependency>
 
       <!-- Shaded deps marked as provided. These are promoted to compile scope
            in the modules where we want the shaded classes to appear in the
@@ -524,6 +532,7 @@ under the License.
         <spark.version>3.3.2</spark.version>
         <spark.version.short>3.3</spark.version.short>
         <parquet.version>1.12.0</parquet.version>
+        <slf4j.version>1.7.32</slf4j.version>
         <additional.3_4.test.source>not-needed-yet</additional.3_4.test.source>
         <shims.minorVerSrc>spark-3.3</shims.minorVerSrc>
       </properties>
@@ -539,15 +548,17 @@ under the License.
     </profile>
 
     <profile>
-      <!-- FIXME: this is WIP. Tests may fail -->
+      <!-- FIXME: this is WIP. Tests may fail https://github.com/apache/datafusion-comet/issues/551 -->
       <id>spark-4.0</id>
       <properties>
         <!-- Use Scala 2.13 by default -->
-        <scala.version>2.13.13</scala.version>
+        <scala.version>2.13.14</scala.version>
         <scala.binary.version>2.13</scala.binary.version>
         <spark.version>4.0.0-preview1</spark.version>
         <spark.version.short>4.0</spark.version.short>
         <parquet.version>1.13.1</parquet.version>
+        <semanticdb.version>4.9.5</semanticdb.version>
+        <slf4j.version>2.0.13</slf4j.version>
         <shims.majorVerSrc>spark-4.0</shims.majorVerSrc>
         <shims.minorVerSrc>not-needed-yet</shims.minorVerSrc>
         <!-- Use jdk17 by default -->
@@ -632,7 +643,7 @@ under the License.
                   <compilerPlugin>
                     <groupId>org.scalameta</groupId>
                     <artifactId>semanticdb-scalac_${scala.version}</artifactId>
-                    <version>4.8.8</version>
+                    <version>${semanticdb.version}</version>
                   </compilerPlugin>
                 </compilerPlugins>
               </configuration>
diff --git a/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala b/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala
index e939b43a1..c19395684 100644
--- a/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala
+++ b/spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala
@@ -44,7 +44,7 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 import org.apache.comet.CometConf._
-import org.apache.comet.CometSparkSessionExtensions.{createMessage, getCometShuffleNotEnabledReason, isANSIEnabled, isCometBroadCastForceEnabled, isCometEnabled, isCometExecEnabled, isCometJVMShuffleMode, isCometNativeShuffleMode, isCometOperatorEnabled, isCometScan, isCometScanEnabled, isCometShuffleEnabled, isSchemaSupported, isSpark34Plus, shouldApplyRowToColumnar, withInfo, withInfos}
+import org.apache.comet.CometSparkSessionExtensions.{createMessage, getCometShuffleNotEnabledReason, isANSIEnabled, isCometBroadCastForceEnabled, isCometEnabled, isCometExecEnabled, isCometJVMShuffleMode, isCometNativeShuffleMode, isCometOperatorEnabled, isCometScan, isCometScanEnabled, isCometShuffleEnabled, isSchemaSupported, isSpark34Plus, isSpark40Plus, shouldApplyRowToColumnar, withInfo, withInfos}
 import org.apache.comet.parquet.{CometParquetScan, SupportsComet}
 import org.apache.comet.serde.OperatorOuterClass.Operator
 import org.apache.comet.serde.QueryPlanSerde
@@ -715,7 +715,9 @@ class CometSparkSessionExtensions
       // enabled.
       if (isANSIEnabled(conf)) {
         if (COMET_ANSI_MODE_ENABLED.get()) {
-          logWarning("Using Comet's experimental support for ANSI mode.")
+          if (!isSpark40Plus) {
+            logWarning("Using Comet's experimental support for ANSI mode.")
+          }
         } else {
           logInfo("Comet extension disabled for ANSI mode")
           return plan
diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
index 67ecfe52d..13abaa0c4 100644
--- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
@@ -63,7 +63,7 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde with CometExprShim
         _: DoubleType | _: StringType | _: BinaryType | _: TimestampType | _: DecimalType |
         _: DateType | _: BooleanType | _: NullType =>
       true
-    case dt if dt.typeName == "timestamp_ntz" => true
+    case dt if isTimestampNTZType(dt) => true
     case dt =>
       emitWarning(s"unsupported Spark data type: $dt")
       false
@@ -87,7 +87,7 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde with CometExprShim
       case _: BinaryType => 8
       case _: TimestampType => 9
       case _: DecimalType => 10
-      case dt if dt.typeName == "timestamp_ntz" => 11
+      case dt if isTimestampNTZType(dt) => 11
       case _: DateType => 12
       case _: NullType => 13
       case _: ArrayType => 14
@@ -1033,6 +1033,8 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde with CometExprShim
                   com.google.protobuf.ByteString.copyFrom(value.asInstanceOf[Array[Byte]])
                 exprBuilder.setBytesVal(byteStr)
               case _: DateType => exprBuilder.setIntVal(value.asInstanceOf[Int])
+              case dt if isTimestampNTZType(dt) =>
+                exprBuilder.setLongVal(value.asInstanceOf[Long])
               case dt =>
                 logWarning(s"Unexpected date type '$dt' for literal value '$value'")
             }
@@ -2241,8 +2243,7 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde with CometExprShim
     case _: ByteType | _: ShortType | _: IntegerType | _: LongType | _: FloatType |
         _: DoubleType | _: StringType | _: DateType | _: DecimalType | _: BooleanType =>
       true
-    // `TimestampNTZType` is private in Spark 3.3.
-    case dt if dt.typeName == "timestamp_ntz" => true
+    case dt if isTimestampNTZType(dt) => true
     case _ => false
   }
 
diff --git a/spark/src/main/spark-3.3/org/apache/comet/shims/CometExprShim.scala b/spark/src/main/spark-3.3/org/apache/comet/shims/CometExprShim.scala
index 150656c23..aa6db06d8 100644
--- a/spark/src/main/spark-3.3/org/apache/comet/shims/CometExprShim.scala
+++ b/spark/src/main/spark-3.3/org/apache/comet/shims/CometExprShim.scala
@@ -20,6 +20,7 @@ package org.apache.comet.shims
 
 import org.apache.comet.expressions.CometEvalMode
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.types.DataType
 
 /**
  * `CometExprShim` acts as a shim for for parsing expressions from different Spark versions.
@@ -32,5 +33,8 @@ trait CometExprShim {
         (unhex.child, Literal(false))
     }
 
+    protected def isTimestampNTZType(dt: DataType): Boolean =
+        dt.typeName == "timestamp_ntz" // `TimestampNTZType` is private
+
     protected def evalMode(c: Cast): CometEvalMode.Value = CometEvalMode.fromBoolean(c.ansiEnabled)
 }
diff --git a/spark/src/main/spark-3.4/org/apache/comet/shims/CometExprShim.scala b/spark/src/main/spark-3.4/org/apache/comet/shims/CometExprShim.scala
index 5f4e3fba2..7709957b4 100644
--- a/spark/src/main/spark-3.4/org/apache/comet/shims/CometExprShim.scala
+++ b/spark/src/main/spark-3.4/org/apache/comet/shims/CometExprShim.scala
@@ -20,6 +20,7 @@ package org.apache.comet.shims
 
 import org.apache.comet.expressions.CometEvalMode
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.types.{DataType, TimestampNTZType}
 
 /**
  * `CometExprShim` acts as a shim for for parsing expressions from different Spark versions.
@@ -32,6 +33,11 @@ trait CometExprShim {
         (unhex.child, Literal(unhex.failOnError))
     }
 
+    protected def isTimestampNTZType(dt: DataType): Boolean = dt match {
+        case _: TimestampNTZType => true
+        case _ => false
+    }
+
     protected def evalMode(c: Cast): CometEvalMode.Value =
         CometEvalModeUtil.fromSparkEvalMode(c.evalMode)
 }
diff --git a/spark/src/main/spark-3.x/org/apache/spark/sql/comet/shims/ShimCometScanExec.scala b/spark/src/main/spark-3.x/org/apache/spark/sql/comet/shims/ShimCometScanExec.scala
index 65fb59a38..900b19895 100644
--- a/spark/src/main/spark-3.x/org/apache/spark/sql/comet/shims/ShimCometScanExec.scala
+++ b/spark/src/main/spark-3.x/org/apache/spark/sql/comet/shims/ShimCometScanExec.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.execution.datasources.parquet.ParquetOptions
 import org.apache.spark.sql.execution.{FileSourceScanExec, PartitionedFileUtil}
 import org.apache.spark.sql.execution.datasources.{FilePartition, FileScanRDD, HadoopFsRelation, PartitionDirectory, PartitionedFile}
-import org.apache.spark.sql.types.{LongType, StructField, StructType}
+import org.apache.spark.sql.types.StructType
 
 trait ShimCometScanExec {
   def wrapped: FileSourceScanExec
@@ -87,24 +87,9 @@ trait ShimCometScanExec {
       .asInstanceOf[SparkException]
   }
 
-  // Copied from Spark 3.4 RowIndexUtil due to PARQUET-2161 (tracked in SPARK-39634)
-  // TODO: remove after PARQUET-2161 becomes available in Parquet
-  private def findRowIndexColumnIndexInSchema(sparkSchema: StructType): Int = {
-    sparkSchema.fields.zipWithIndex.find { case (field: StructField, _: Int) =>
-      field.name == ShimFileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME
-    } match {
-      case Some((field: StructField, idx: Int)) =>
-        if (field.dataType != LongType) {
-          throw new RuntimeException(
-            s"${ShimFileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME} must be of LongType")
-        }
-        idx
-      case _ => -1
-    }
-  }
-
   protected def isNeededForSchema(sparkSchema: StructType): Boolean = {
-    findRowIndexColumnIndexInSchema(sparkSchema) >= 0
+    // TODO: remove after PARQUET-2161 becomes available in Parquet (tracked in SPARK-39634)
+    ShimFileFormat.findRowIndexColumnIndexInSchema(sparkSchema) >= 0
   }
 
   protected def getPartitionedFile(f: FileStatus, p: PartitionDirectory): PartitionedFile =
diff --git a/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala b/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala
index 5f4e3fba2..7709957b4 100644
--- a/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala
+++ b/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala
@@ -20,6 +20,7 @@ package org.apache.comet.shims
 
 import org.apache.comet.expressions.CometEvalMode
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.types.{DataType, TimestampNTZType}
 
 /**
  * `CometExprShim` acts as a shim for for parsing expressions from different Spark versions.
@@ -32,6 +33,11 @@ trait CometExprShim {
         (unhex.child, Literal(unhex.failOnError))
     }
 
+    protected def isTimestampNTZType(dt: DataType): Boolean = dt match {
+        case _: TimestampNTZType => true
+        case _ => false
+    }
+
     protected def evalMode(c: Cast): CometEvalMode.Value =
         CometEvalModeUtil.fromSparkEvalMode(c.evalMode)
 }
diff --git a/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala b/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala
index bc18d8f10..9e6cbc0a6 100644
--- a/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/parquet/ParquetReadSuite.scala
@@ -1116,7 +1116,7 @@ abstract class ParquetReadSuite extends CometTestBase {
   }
 
   test("row group skipping doesn't overflow when reading into larger type") {
-    // Spark 4.0 no longer fails for widening types
+    // Spark 4.0 no longer fails for widening types SPARK-40876
     // https://github.com/apache/spark/commit/3361f25dc0ff6e5233903c26ee105711b79ba967
     assume(isSpark34Plus && !isSpark40Plus)