diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 7a6c49f9135d0..b2b6a38916eeb 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -14,28 +14,6 @@ on:
         required: true
 
 jobs:
-  # This is on the top to give the most visibility in case of failures
-  hadoop-2:
-    name: Hadoop 2 build
-    runs-on: ubuntu-20.04
-    steps:
-    - name: Checkout Spark repository
-      uses: actions/checkout@v2
-    - name: Cache Coursier local repository
-      uses: actions/cache@v2
-      with:
-        path: ~/.cache/coursier
-        key: hadoop-2-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
-        restore-keys: |
-          hadoop-2-coursier-
-    - name: Install Java 8
-      uses: actions/setup-java@v1
-      with:
-        java-version: 1.8
-    - name: Build with SBT
-      run: |
-        ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Phadoop-2.7 compile test:compile
-
   # Build: build Spark and run the tests for specified modules.
   build:
     name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
@@ -45,7 +23,7 @@ jobs:
       fail-fast: false
       matrix:
         java:
-          - 1.8
+          - 8
         hadoop:
           - hadoop3.2
         hive:
@@ -71,26 +49,26 @@ jobs:
         include:
           # Hive tests
           - modules: hive
-            java: 1.8
+            java: 8
             hadoop: hadoop3.2
             hive: hive2.3
             included-tags: org.apache.spark.tags.SlowHiveTest
             comment: "- slow tests"
           - modules: hive
-            java: 1.8
+            java: 8
             hadoop: hadoop3.2
             hive: hive2.3
             excluded-tags: org.apache.spark.tags.SlowHiveTest
             comment: "- other tests"
           # SQL tests
           - modules: sql
-            java: 1.8
+            java: 8
             hadoop: hadoop3.2
             hive: hive2.3
             included-tags: org.apache.spark.tags.ExtendedSQLTest
             comment: "- slow tests"
           - modules: sql
-            java: 1.8
+            java: 8
             hadoop: hadoop3.2
             hive: hive2.3
             excluded-tags: org.apache.spark.tags.ExtendedSQLTest
@@ -123,16 +101,10 @@ jobs:
           build/zinc-*
           build/scala-*
           build/*.jar
+          ~/.sbt
         key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
         restore-keys: |
           build-
-    - name: Cache Maven local repository
-      uses: actions/cache@v2
-      with:
-        path: ~/.m2/repository
-        key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{ hashFiles('**/pom.xml') }}
-        restore-keys: |
-          ${{ matrix.java }}-${{ matrix.hadoop }}-maven-
     - name: Cache Coursier local repository
       uses: actions/cache@v2
       with:
@@ -140,7 +112,7 @@ jobs:
         key: ${{ matrix.java }}-${{ matrix.hadoop }}-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
         restore-keys: |
           ${{ matrix.java }}-${{ matrix.hadoop }}-coursier-
-    - name: Install JDK ${{ matrix.java }}
+    - name: Install Java ${{ matrix.java }}
       uses: actions/setup-java@v1
       with:
         java-version: ${{ matrix.java }}
@@ -163,9 +135,7 @@ jobs:
       run: |
         # Hive and SQL tests become flaky when running in parallel as it's too intensive.
         if [[ "$MODULES_TO_TEST" == "hive" ]] || [[ "$MODULES_TO_TEST" == "sql" ]]; then export SERIAL_SBT_TESTS=1; fi
-        mkdir -p ~/.m2
         ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
-        rm -rf ~/.m2/repository/org/apache/spark
     - name: Upload test results to report
       if: always()
       uses: actions/upload-artifact@v2
@@ -218,16 +188,10 @@ jobs:
           build/zinc-*
           build/scala-*
           build/*.jar
+          ~/.sbt
         key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
         restore-keys: |
           build-
-    - name: Cache Maven local repository
-      uses: actions/cache@v2
-      with:
-        path: ~/.m2/repository
-        key: pyspark-maven-${{ hashFiles('**/pom.xml') }}
-        restore-keys: |
-          pyspark-maven-
     - name: Cache Coursier local repository
       uses: actions/cache@v2
       with:
@@ -250,24 +214,22 @@ jobs:
     # Run the tests.
     - name: Run tests
       run: |
-        mkdir -p ~/.m2
         ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST"
-        rm -rf ~/.m2/repository/org/apache/spark
     - name: Upload test results to report
       if: always()
       uses: actions/upload-artifact@v2
       with:
-        name: test-results-${{ matrix.modules }}--1.8-hadoop3.2-hive2.3
+        name: test-results-${{ matrix.modules }}--8-hadoop3.2-hive2.3
         path: "**/target/test-reports/*.xml"
     - name: Upload unit tests log files
       if: failure()
       uses: actions/upload-artifact@v2
       with:
-        name: unit-tests-log-${{ matrix.modules }}--1.8-hadoop3.2-hive2.3
+        name: unit-tests-log-${{ matrix.modules }}--8-hadoop3.2-hive2.3
         path: "**/target/unit-tests.log"
 
   sparkr:
-    name: Build modules - sparkr
+    name: "Build modules: sparkr"
     runs-on: ubuntu-20.04
     container:
       image: dongjoon/apache-spark-github-action-image:20201025
@@ -294,16 +256,10 @@ jobs:
           build/zinc-*
           build/scala-*
           build/*.jar
+          ~/.sbt
         key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
         restore-keys: |
           build-
-    - name: Cache Maven local repository
-      uses: actions/cache@v2
-      with:
-        path: ~/.m2/repository
-        key: sparkr-maven-${{ hashFiles('**/pom.xml') }}
-        restore-keys: |
-          sparkr-maven-
     - name: Cache Coursier local repository
       uses: actions/cache@v2
       with:
@@ -313,18 +269,16 @@ jobs:
           sparkr-coursier-
     - name: Run tests
       run: |
-        mkdir -p ~/.m2
         # The followings are also used by `r-lib/actions/setup-r` to avoid
         # R issues at docker environment
         export TZ=UTC
         export _R_CHECK_SYSTEM_CLOCK_=FALSE
         ./dev/run-tests --parallelism 2 --modules sparkr
-        rm -rf ~/.m2/repository/org/apache/spark
     - name: Upload test results to report
       if: always()
       uses: actions/upload-artifact@v2
       with:
-        name: test-results-sparkr--1.8-hadoop3.2-hive2.3
+        name: test-results-sparkr--8-hadoop3.2-hive2.3
         path: "**/target/test-reports/*.xml"
 
   # Static analysis, and documentation build
@@ -334,17 +288,37 @@ jobs:
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v2
+    # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
+    - name: Cache Scala, SBT, Maven and Zinc
+      uses: actions/cache@v2
+      with:
+        path: |
+          build/apache-maven-*
+          build/zinc-*
+          build/scala-*
+          build/*.jar
+          ~/.sbt
+        key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
+        restore-keys: |
+          build-
+    - name: Cache Coursier local repository
+      uses: actions/cache@v2
+      with:
+        path: ~/.cache/coursier
+        key: docs-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
+        restore-keys: |
+          docs-coursier-
     - name: Cache Maven local repository
       uses: actions/cache@v2
       with:
         path: ~/.m2/repository
-        key: docs-maven-repo-${{ hashFiles('**/pom.xml') }}
+        key: docs-maven-${{ hashFiles('**/pom.xml') }}
         restore-keys: |
           docs-maven-
-    - name: Install JDK 1.8
+    - name: Install Java 8
       uses: actions/setup-java@v1
       with:
-        java-version: 1.8
+        java-version: 8
     - name: Install Python 3.6
       uses: actions/setup-python@v2
       with:
@@ -395,8 +369,8 @@ jobs:
         cd docs
         jekyll build
 
-  java11:
-    name: Java 11 build
+  java-11:
+    name: Java 11 build with Maven
     runs-on: ubuntu-20.04
     steps:
     - name: Checkout Spark repository
@@ -416,12 +390,12 @@ jobs:
       run: |
         export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
         export MAVEN_CLI_OPTS="--no-transfer-progress"
-        mkdir -p ~/.m2
+        # It uses Maven's 'install' intentionally, see https://github.com/apache/spark/pull/26414.
         ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 install
         rm -rf ~/.m2/repository/org/apache/spark
 
   scala-213:
-    name: Scala 2.13 build
+    name: Scala 2.13 build with SBT
     runs-on: ubuntu-20.04
     steps:
     - name: Checkout Spark repository
@@ -433,11 +407,32 @@ jobs:
         key: scala-213-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
         restore-keys: |
           scala-213-coursier-
-    - name: Install Java 11
+    - name: Install Java 8
       uses: actions/setup-java@v1
       with:
-        java-version: 11
+        java-version: 8
     - name: Build with SBT
       run: |
         ./dev/change-scala-version.sh 2.13
         ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Djava.version=11 -Pscala-2.13 compile test:compile
+
+  hadoop-2:
+    name: Hadoop 2 build with SBT
+    runs-on: ubuntu-20.04
+    steps:
+    - name: Checkout Spark repository
+      uses: actions/checkout@v2
+    - name: Cache Coursier local repository
+      uses: actions/cache@v2
+      with:
+        path: ~/.cache/coursier
+        key: hadoop-2-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
+        restore-keys: |
+          hadoop-2-coursier-
+    - name: Install Java 8
+      uses: actions/setup-java@v1
+      with:
+        java-version: 8
+    - name: Build with SBT
+      run: |
+        ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Phadoop-2.7 compile test:compile
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 404a6968ea429..b927a6b96b810 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -292,6 +292,7 @@ exportMethods("%<=>%",
               "floor",
               "format_number",
               "format_string",
+              "from_avro",
               "from_csv",
               "from_json",
               "from_unixtime",
@@ -416,6 +417,7 @@ exportMethods("%<=>%",
               "timestamp_seconds",
               "toDegrees",
               "toRadians",
+              "to_avro",
               "to_csv",
               "to_date",
               "to_json",
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index bcd798a8c31e2..039d28a3a37b6 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -361,6 +361,50 @@ NULL
 #' }
 NULL
 
+#' Avro processing functions for Column operations
+#'
+#' Avro processing functions defined for \code{Column}.
+#'
+#' @param x Column to compute on.
+#' @param jsonFormatSchema character Avro schema in JSON string format
+#' @param ... additional argument(s) passed as parser options.
+#' @name column_avro_functions
+#' @rdname column_avro_functions
+#' @family avro functions
+#' @note Avro is built-in but external data source module since Spark 2.4.
+#'   Please deploy the application as per
+#'   \href{https://spark.apache.org/docs/latest/sql-data-sources-avro.html#deploying}{
+#'     the deployment section
+#'   } of "Apache Avro Data Source Guide".
+#' @examples
+#' \dontrun{
+#' df <- createDataFrame(iris)
+#' schema <- paste(
+#'   c(
+#'     '{"type": "record", "namespace": "example.avro", "name": "Iris", "fields": [',
+#'     '{"type": ["double", "null"], "name": "Sepal_Length"},',
+#'     '{"type": ["double", "null"], "name": "Sepal_Width"},',
+#'     '{"type": ["double", "null"], "name": "Petal_Length"},',
+#'     '{"type": ["double", "null"], "name": "Petal_Width"},',
+#'     '{"type": ["string", "null"], "name": "Species"}]}'
+#'   ),
+#'   collapse="\\n"
+#' )
+#'
+#' df_serialized <- select(
+#'   df,
+#'   alias(to_avro(alias(struct(column("*")), "fields")), "payload")
+#' )
+#'
+#' df_deserialized <- select(
+#'   df_serialized,
+#'   from_avro(df_serialized$payload, schema)
+#' )
+#'
+#' head(df_deserialized)
+#' }
+NULL
+
 #' @details
 #' \code{lit}: A new Column is created to represent the literal value.
 #' If the parameter is a Column, it is returned unchanged.
@@ -4547,3 +4591,60 @@ setMethod("vector_to_array",
             )
             column(jc)
           })
+
+#' @details
+#' \code{from_avro} Converts a binary column of Avro format into its corresponding catalyst value.
+#' The specified schema must match the read data, otherwise the behavior is undefined:
+#' it may fail or return arbitrary result.
+#' To deserialize the data with a compatible and evolved schema, the expected Avro schema can be
+#' set via the option avroSchema.
+#'
+#' @rdname column_avro_functions
+#' @aliases from_avro from_avro,Column-method
+#' @note from_avro since 3.1.0
+setMethod("from_avro",
+          signature(x = "characterOrColumn"),
+          function(x, jsonFormatSchema, ...) {
+            x <- if (is.character(x)) {
+              column(x)
+            } else {
+              x
+            }
+
+            options <- varargsToStrEnv(...)
+            jc <- callJStatic(
+              "org.apache.spark.sql.avro.functions", "from_avro",
+              x@jc,
+              jsonFormatSchema,
+              options
+            )
+            column(jc)
+          })
+
+#' @details
+#' \code{to_avro} Converts a column into binary of Avro format.
+#'
+#' @rdname column_avro_functions
+#' @aliases to_avro to_avro,Column-method
+#' @note to_avro since 3.1.0
+setMethod("to_avro",
+          signature(x = "characterOrColumn"),
+          function(x, jsonFormatSchema = NULL) {
+            x <- if (is.character(x)) {
+              column(x)
+            } else {
+              x
+            }
+
+            jc <- if (is.null(jsonFormatSchema)) {
+              callJStatic("org.apache.spark.sql.avro.functions", "to_avro", x@jc)
+            } else {
+              callJStatic(
+                "org.apache.spark.sql.avro.functions",
+                "to_avro",
+                x@jc,
+                jsonFormatSchema
+              )
+            }
+            column(jc)
+          })
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index e372ae27e315a..1fe6599bf1b97 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -950,7 +950,6 @@ setGeneric("current_date", function(x = "missing") { standardGeneric("current_da
 #' @name NULL
 setGeneric("current_timestamp", function(x = "missing") { standardGeneric("current_timestamp") })
 
-
 #' @rdname column_datetime_diff_functions
 #' @name NULL
 setGeneric("datediff", function(y, x) { standardGeneric("datediff") })
@@ -1015,6 +1014,10 @@ setGeneric("expr", function(x) { standardGeneric("expr") })
 #' @name NULL
 setGeneric("flatten", function(x) { standardGeneric("flatten") })
 
+#' @rdname column_avro_functions
+#' @name NULL
+setGeneric("from_avro", function(x, ...) { standardGeneric("from_avro") })
+
 #' @rdname column_datetime_diff_functions
 #' @name NULL
 setGeneric("from_utc_timestamp", function(y, x) { standardGeneric("from_utc_timestamp") })
@@ -1388,6 +1391,10 @@ setGeneric("sumDistinct", function(x) { standardGeneric("sumDistinct") })
 #' @name timestamp_seconds
 setGeneric("timestamp_seconds", function(x) { standardGeneric("timestamp_seconds") })
 
+#' @rdname column_avro_functions
+#' @name NULL
+setGeneric("to_avro", function(x, ...) { standardGeneric("to_avro") })
+
 #' @rdname column_collection_functions
 #' @name NULL
 setGeneric("transform_keys", function(x, f) {  standardGeneric("transform_keys") })
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 3a0d359e2ae79..45de1ef1bd3d1 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1841,6 +1841,32 @@ test_that("column functions", {
   )
 })
 
+test_that("avro column functions", {
+  skip_if_not(
+    grepl("spark-avro", sparkR.conf("spark.jars", "")),
+    "spark-avro jar not present"
+  )
+
+  schema <- '{"namespace": "example.avro",
+    "type": "record",
+    "name": "User",
+    "fields": [
+      {"name": "name", "type": "string"},
+      {"name": "favorite_color", "type": ["string", "null"]}
+    ]
+  }'
+
+  c0 <- column("foo")
+  c1 <- from_avro(c0, schema)
+  expect_s4_class(c1, "Column")
+  c2 <- from_avro("foo", schema)
+  expect_s4_class(c2, "Column")
+  c3 <- to_avro(c1)
+  expect_s4_class(c3, "Column")
+  c4 <- to_avro(c1, schema)
+  expect_s4_class(c4, "Column")
+})
+
 test_that("column binary mathfunctions", {
   lines <- c("{\"a\":1, \"b\":5}",
              "{\"a\":2, \"b\":6}",
diff --git a/R/run-tests.sh b/R/run-tests.sh
index 51ca7d600caf0..edc2b2b60b60e 100755
--- a/R/run-tests.sh
+++ b/R/run-tests.sh
@@ -23,7 +23,18 @@ FAILED=0
 LOGFILE=$FWDIR/unit-tests.out
 rm -f $LOGFILE
 
-SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --driver-java-options "-Dlog4j.configuration=file:$FWDIR/log4j.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true" --conf spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
+SPARK_AVRO_JAR_PATH=$(find $FWDIR/../external/avro/ -name "spark-avro*jar" -print | egrep -v "tests.jar|test-sources.jar|sources.jar|javadoc.jar")
+
+if [[ $(echo $SPARK_AVRO_JAR_PATH | wc -l) -eq 1 ]]; then
+  SPARK_JARS=$SPARK_AVRO_JAR_PATH
+fi
+
+if [ -z "$SPARK_JARS" ]; then
+  SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --driver-java-options "-Dlog4j.configuration=file:$FWDIR/log4j.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true" --conf spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
+else
+  SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --jars $SPARK_JARS --driver-java-options "-Dlog4j.configuration=file:$FWDIR/log4j.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true" --conf spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
+fi
+
 FAILED=$((PIPESTATUS[0]||$FAILED))
 
 NUM_TEST_WARNING="$(grep -c -e 'Warnings ----------------' $LOGFILE)"
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/ChunkFetchRequestHandler.java b/common/network-common/src/main/java/org/apache/spark/network/server/ChunkFetchRequestHandler.java
index 82810dacdad84..9a71cf593e28c 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/ChunkFetchRequestHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/ChunkFetchRequestHandler.java
@@ -88,12 +88,14 @@ public void processFetchRequest(
       logger.trace("Received req from {} to fetch block {}", getRemoteAddress(channel),
         msg.streamChunkId);
     }
-    long chunksBeingTransferred = streamManager.chunksBeingTransferred();
-    if (chunksBeingTransferred >= maxChunksBeingTransferred) {
-      logger.warn("The number of chunks being transferred {} is above {}, close the connection.",
-        chunksBeingTransferred, maxChunksBeingTransferred);
-      channel.close();
-      return;
+    if (maxChunksBeingTransferred < Long.MAX_VALUE) {
+      long chunksBeingTransferred = streamManager.chunksBeingTransferred();
+      if (chunksBeingTransferred >= maxChunksBeingTransferred) {
+        logger.warn("The number of chunks being transferred {} is above {}, close the connection.",
+          chunksBeingTransferred, maxChunksBeingTransferred);
+        channel.close();
+        return;
+      }
     }
     ManagedBuffer buf;
     try {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java b/common/network-common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java
index f178928006902..4a30f8de07827 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java
@@ -124,12 +124,14 @@ private void processStreamRequest(final StreamRequest req) {
         req.streamId);
     }
 
-    long chunksBeingTransferred = streamManager.chunksBeingTransferred();
-    if (chunksBeingTransferred >= maxChunksBeingTransferred) {
-      logger.warn("The number of chunks being transferred {} is above {}, close the connection.",
-        chunksBeingTransferred, maxChunksBeingTransferred);
-      channel.close();
-      return;
+    if (maxChunksBeingTransferred < Long.MAX_VALUE) {
+      long chunksBeingTransferred = streamManager.chunksBeingTransferred();
+      if (chunksBeingTransferred >= maxChunksBeingTransferred) {
+        logger.warn("The number of chunks being transferred {} is above {}, close the connection.",
+          chunksBeingTransferred, maxChunksBeingTransferred);
+        channel.close();
+        return;
+      }
     }
     ManagedBuffer buf;
     try {
diff --git a/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala b/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
index 4d765481eb836..09fa91655fba5 100644
--- a/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
+++ b/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
@@ -21,7 +21,6 @@ import java.util.{Properties, Timer, TimerTask}
 
 import scala.collection.JavaConverters._
 import scala.concurrent.duration._
-import scala.language.postfixOps
 import scala.util.{Failure, Success => ScalaSuccess, Try}
 
 import org.apache.spark.annotation.{Experimental, Since}
diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index c3152d9225107..cdec1982b4487 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -35,7 +35,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.rpc.{RpcCallContext, RpcEndpoint, RpcEndpointRef, RpcEnv}
-import org.apache.spark.scheduler.{ExecutorCacheTaskLocation, MapStatus}
+import org.apache.spark.scheduler.MapStatus
 import org.apache.spark.shuffle.MetadataFetchFailedException
 import org.apache.spark.storage.{BlockId, BlockManagerId, ShuffleBlockId}
 import org.apache.spark.util._
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index d68015454de9d..0440a9de6ab31 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1542,8 +1542,8 @@ class SparkContext(config: SparkConf) extends Logging {
     val schemeCorrectedURI = uri.getScheme match {
       case null => new File(path).getCanonicalFile.toURI
       case "local" =>
-        logWarning("File with 'local' scheme is not supported to add to file server, since " +
-          "it is already available on every node.")
+        logWarning(s"File with 'local' scheme $path is not supported to add to file server, " +
+          s"since it is already available on every node.")
         return
       case _ => uri
     }
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
index d7a09b599794e..136da80d48dee 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
@@ -24,13 +24,8 @@ import java.nio.charset.StandardCharsets.UTF_8
 import java.util.concurrent.atomic.AtomicBoolean
 
 import scala.collection.JavaConverters._
-import scala.collection.mutable.ArrayBuffer
 import scala.util.control.NonFatal
 
-import org.json4s.JsonAST._
-import org.json4s.JsonDSL._
-import org.json4s.jackson.JsonMethods.{compact, render}
-
 import org.apache.spark._
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.{BUFFER_SIZE, EXECUTOR_CORES}
@@ -86,6 +81,7 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
   private val conf = SparkEnv.get.conf
   protected val bufferSize: Int = conf.get(BUFFER_SIZE)
   private val reuseWorker = conf.get(PYTHON_WORKER_REUSE)
+  protected val simplifiedTraceback: Boolean = false
 
   // All the Python functions should have the same exec, version and envvars.
   protected val envVars: java.util.Map[String, String] = funcs.head.funcs.head.envVars
@@ -133,6 +129,9 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
     if (reuseWorker) {
       envVars.put("SPARK_REUSE_WORKER", "1")
     }
+    if (simplifiedTraceback) {
+      envVars.put("SPARK_SIMPLIFIED_TRACEBACK", "1")
+    }
     // SPARK-30299 this could be wrong with standalone mode when executor
     // cores might not be correct because it defaults to all cores on the box.
     val execCores = execCoresProp.map(_.toInt).getOrElse(conf.get(EXECUTOR_CORES))
diff --git a/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala b/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala
index 5a6fa507963f0..dc2587a62ae40 100644
--- a/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.api.python
 
-import java.nio.ByteOrder
-import java.nio.charset.StandardCharsets
 import java.util.{ArrayList => JArrayList}
 
 import scala.collection.JavaConverters._
diff --git a/core/src/main/scala/org/apache/spark/api/r/RRunner.scala b/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
index 20ab6fc2f348d..41c66024272b9 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
@@ -19,7 +19,6 @@ package org.apache.spark.api.r
 
 import java.io._
 
-import org.apache.spark._
 import org.apache.spark.broadcast.Broadcast
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala
index 17733d99cd5bc..d76fb7f9a20b3 100644
--- a/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala
@@ -22,7 +22,6 @@ import org.json4s.JsonDSL._
 
 import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, WorkerStateResponse}
 import org.apache.spark.deploy.master._
-import org.apache.spark.deploy.master.RecoveryState.MasterState
 import org.apache.spark.deploy.worker.ExecutorRunner
 import org.apache.spark.resource.{ResourceInformation, ResourceRequirement}
 
@@ -208,7 +207,8 @@ private[deploy] object JsonProtocol {
    *         master
    *         `completeddrivers` a list of Json objects of [[DriverInfo]] of the completed drivers
    *         of the master
-   *         `status` status of the master, see [[MasterState]]
+   *         `status` status of the master,
+   *         see [[org.apache.spark.deploy.master.RecoveryState.MasterState]]
    */
   def writeMasterState(obj: MasterStateResponse): JObject = {
     val aliveWorkers = obj.workers.filter(_.isAlive())
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 9a316e8c5b5a9..4b17661496808 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -391,6 +391,7 @@ private[spark] class SparkSubmit extends Logging {
           downloadFileList(_, targetDir, sparkConf, hadoopConf, secMgr)
         }.orNull
         args.files = renameResourcesToLocalFS(args.files, localFiles)
+        args.pyFiles = renameResourcesToLocalFS(args.pyFiles, localPyFiles)
       }
     }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/BasicEventFilterBuilder.scala b/core/src/main/scala/org/apache/spark/deploy/history/BasicEventFilterBuilder.scala
index c659d32d16314..57b05ff245258 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/BasicEventFilterBuilder.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/BasicEventFilterBuilder.scala
@@ -19,7 +19,6 @@ package org.apache.spark.deploy.history
 
 import scala.collection.mutable
 
-import org.apache.spark.SparkContext
 import org.apache.spark.deploy.history.EventFilter.FilterStatistics
 import org.apache.spark.internal.Logging
 import org.apache.spark.scheduler._
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index e1b0fc5e45d6e..e5341aff8ce66 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -21,7 +21,7 @@ import java.io.{File, FileNotFoundException, IOException}
 import java.lang.{Long => JLong}
 import java.nio.file.Files
 import java.util.{Date, NoSuchElementException, ServiceLoader}
-import java.util.concurrent.{ConcurrentHashMap, ExecutorService, Future, TimeUnit}
+import java.util.concurrent.{ConcurrentHashMap, ExecutorService, TimeUnit}
 import java.util.zip.ZipOutputStream
 
 import scala.collection.JavaConverters._
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HybridStore.scala b/core/src/main/scala/org/apache/spark/deploy/history/HybridStore.scala
index 58714f16e8417..1b8c7ff26e9f5 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HybridStore.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HybridStore.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.deploy.history
 
-import java.io.IOException
 import java.util.Collection
 import java.util.concurrent.ConcurrentHashMap
 import java.util.concurrent.atomic.AtomicBoolean
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index a582a5d045855..cccd3da323774 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -22,9 +22,7 @@ import java.util.{Date, Locale}
 import java.util.concurrent.{ScheduledFuture, TimeUnit}
 
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
-import scala.collection.mutable
 import scala.util.Random
-import scala.util.control.NonFatal
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
 import org.apache.spark.deploy.{ApplicationDescription, DriverDescription, ExecutorState, SparkHadoopUtil}
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
index 035f9d379471c..af94bd6d9e0f2 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.deploy.master.ui
 
 import java.net.{InetAddress, NetworkInterface, SocketException}
-import java.util.Locale
 import javax.servlet.http.{HttpServlet, HttpServletRequest, HttpServletResponse}
 
 import org.apache.spark.deploy.DeployMessages.{DecommissionWorkersOnHosts, MasterStateResponse, RequestMasterState}
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index b2bc6b3b68007..6a1fd57873c3a 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.executor
 
-import java.io.File
 import java.net.URL
 import java.nio.ByteBuffer
 import java.util.Locale
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index 1a0ad566633da..f7246448959e9 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -22,7 +22,7 @@ import java.lang.Thread.UncaughtExceptionHandler
 import java.lang.management.ManagementFactory
 import java.net.{URI, URL}
 import java.nio.ByteBuffer
-import java.util.Properties
+import java.util.{Locale, Properties}
 import java.util.concurrent._
 import java.util.concurrent.atomic.AtomicBoolean
 import javax.annotation.concurrent.GuardedBy
@@ -110,7 +110,9 @@ private[spark] class Executor(
       .build()
     Executors.newCachedThreadPool(threadFactory).asInstanceOf[ThreadPoolExecutor]
   }
-  private val executorSource = new ExecutorSource(threadPool, executorId)
+  private val schemes = conf.get(EXECUTOR_METRICS_FILESYSTEM_SCHEMES)
+    .toLowerCase(Locale.ROOT).split(",").map(_.trim).filter(_.nonEmpty)
+  private val executorSource = new ExecutorSource(threadPool, executorId, schemes)
   // Pool used for threads that supervise task killing / cancellation
   private val taskReaperPool = ThreadUtils.newDaemonCachedThreadPool("Task reaper")
   // For tasks which are in the process of being killed, this map holds the most recently created
diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala
index 50207aeb3ef6b..d2765d061d662 100644
--- a/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala
@@ -27,7 +27,10 @@ import org.apache.hadoop.fs.FileSystem
 import org.apache.spark.metrics.source.Source
 
 private[spark]
-class ExecutorSource(threadPool: ThreadPoolExecutor, executorId: String) extends Source {
+class ExecutorSource(
+    threadPool: ThreadPoolExecutor,
+    executorId: String,
+    fileSystemSchemes: Array[String]) extends Source {
 
   private def fileStats(scheme: String) : Option[FileSystem.Statistics] =
     FileSystem.getAllStatistics.asScala.find(s => s.getScheme.equals(scheme))
@@ -70,7 +73,7 @@ class ExecutorSource(threadPool: ThreadPoolExecutor, executorId: String) extends
   })
 
   // Gauge for file system stats of this executor
-  for (scheme <- Array("hdfs", "file")) {
+  for (scheme <- fileSystemSchemes) {
     registerFileSystemStat(scheme, "read_bytes", _.getBytesRead(), 0L)
     registerFileSystemStat(scheme, "write_bytes", _.getBytesWritten(), 0L)
     registerFileSystemStat(scheme, "read_ops", _.getReadOps(), 0)
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 2bb1290963f87..4bc49514fc5ad 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -271,6 +271,13 @@ package object config {
       .timeConf(TimeUnit.MILLISECONDS)
       .createWithDefaultString("0")
 
+  private[spark] val EXECUTOR_METRICS_FILESYSTEM_SCHEMES =
+    ConfigBuilder("spark.executor.metrics.fileSystemSchemes")
+      .doc("The file system schemes to report in executor metrics.")
+      .version("3.1.0")
+      .stringConf
+      .createWithDefaultString("file,hdfs")
+
   private[spark] val EXECUTOR_JAVA_OPTIONS =
     ConfigBuilder(SparkLauncher.EXECUTOR_EXTRA_JAVA_OPTIONS)
       .withPrepended(SparkLauncher.EXECUTOR_DEFAULT_JAVA_OPTIONS)
diff --git a/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala b/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala
index 62fbc166167d3..cafb39ea82ad9 100644
--- a/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala
+++ b/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala
@@ -22,7 +22,7 @@ import scala.reflect.ClassTag
 import org.apache.spark.TaskContext
 import org.apache.spark.network.buffer.ManagedBuffer
 import org.apache.spark.network.client.StreamCallbackWithID
-import org.apache.spark.storage.{BlockId, ShuffleBlockId, StorageLevel}
+import org.apache.spark.storage.{BlockId, StorageLevel}
 
 private[spark]
 trait BlockDataManager {
diff --git a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
index c7f5a97e35612..635efc3e22628 100644
--- a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
@@ -23,7 +23,6 @@ import scala.concurrent.{Future, Promise}
 import scala.concurrent.duration.Duration
 import scala.reflect.ClassTag
 
-import org.apache.spark.internal.Logging
 import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer, NioManagedBuffer}
 import org.apache.spark.network.shuffle.{BlockFetchingListener, BlockStoreClient, DownloadFileManager}
 import org.apache.spark.storage.{BlockId, EncryptedManagedBuffer, StorageLevel}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
index 806fbf52795bc..828849812bbd1 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
@@ -19,9 +19,7 @@ package org.apache.spark.network.netty
 
 import java.io.IOException
 import java.nio.ByteBuffer
-import java.util
 import java.util.{HashMap => JHashMap, Map => JMap}
-import java.util.concurrent.CompletableFuture
 
 import scala.collection.JavaConverters._
 import scala.concurrent.{Future, Promise}
@@ -35,11 +33,11 @@ import org.apache.spark.ExecutorDeadException
 import org.apache.spark.internal.config
 import org.apache.spark.network._
 import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
-import org.apache.spark.network.client.{RpcResponseCallback, TransportClient, TransportClientBootstrap, TransportClientFactory}
+import org.apache.spark.network.client.{RpcResponseCallback, TransportClientBootstrap}
 import org.apache.spark.network.crypto.{AuthClientBootstrap, AuthServerBootstrap}
 import org.apache.spark.network.server._
 import org.apache.spark.network.shuffle.{BlockFetchingListener, DownloadFileManager, OneForOneBlockFetcher, RetryingBlockFetcher}
-import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, GetLocalDirsForExecutors, LocalDirsForExecutors, UploadBlock, UploadBlockStream}
+import org.apache.spark.network.shuffle.protocol.{UploadBlock, UploadBlockStream}
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.serializer.JavaSerializer
diff --git a/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala
index 324cba5b4de42..f0239cdd9136d 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala
@@ -19,7 +19,6 @@ package org.apache.spark.rdd
 
 import java.io._
 
-import scala.Serializable
 import scala.collection.Map
 import scala.collection.immutable.NumericRange
 import scala.collection.mutable.ArrayBuffer
diff --git a/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala
index 576a83f6ab4d9..5093a12777ad3 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala
@@ -20,7 +20,6 @@ package org.apache.spark.rdd
 import java.io.{FileNotFoundException, IOException}
 import java.util.concurrent.TimeUnit
 
-import scala.collection.mutable
 import scala.reflect.ClassTag
 import scala.util.control.NonFatal
 
diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceAllocator.scala b/core/src/main/scala/org/apache/spark/resource/ResourceAllocator.scala
index 482d9e94c6dd9..22d10a975ad0f 100644
--- a/core/src/main/scala/org/apache/spark/resource/ResourceAllocator.scala
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceAllocator.scala
@@ -20,7 +20,6 @@ package org.apache.spark.resource
 import scala.collection.mutable
 
 import org.apache.spark.SparkException
-import org.apache.spark.util.collection.OpenHashMap
 
 /**
  * Trait used to help executor/worker allocate resources.
@@ -40,7 +39,7 @@ trait ResourceAllocator {
    * can be a multiple, such that each address can be allocated up to [[slotsPerAddress]]
    * times.
    *
-   * TODO Use [[OpenHashMap]] instead to gain better performance.
+   * TODO Use [[org.apache.spark.util.collection.OpenHashMap]] instead to gain better performance.
    */
   private lazy val addressAvailabilityMap = {
     mutable.HashMap(resourceAddresses.map(_ -> slotsPerAddress): _*)
diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala b/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
index 5a9435653920f..837b2d80aace6 100644
--- a/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
@@ -29,8 +29,8 @@ import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.api.resource.ResourceDiscoveryPlugin
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config.{CPUS_PER_TASK, EXECUTOR_CORES, RESOURCES_DISCOVERY_PLUGIN, SPARK_TASK_PREFIX}
-import org.apache.spark.internal.config.Tests.{RESOURCES_WARNING_TESTING, SKIP_VALIDATE_CORES_TESTING}
+import org.apache.spark.internal.config.{EXECUTOR_CORES, RESOURCES_DISCOVERY_PLUGIN, SPARK_TASK_PREFIX}
+import org.apache.spark.internal.config.Tests.{RESOURCES_WARNING_TESTING}
 import org.apache.spark.util.Utils
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index 57e219999b0d0..b939e40f3b60c 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -26,9 +26,6 @@ import scala.collection.mutable
 import scala.collection.mutable.{ArrayBuffer, Buffer, HashMap, HashSet}
 import scala.util.Random
 
-import com.google.common.base.Ticker
-import com.google.common.cache.CacheBuilder
-
 import org.apache.spark._
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.executor.ExecutorMetrics
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
index eda1cb52d4abc..e084453be0789 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
@@ -22,7 +22,6 @@ import java.nio.ByteBuffer
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.resource.{ResourceInformation, ResourceProfile}
 import org.apache.spark.rpc.RpcEndpointRef
-import org.apache.spark.scheduler.ExecutorDecommissionInfo
 import org.apache.spark.scheduler.ExecutorLossReason
 import org.apache.spark.util.SerializableBuffer
 
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
index 72460180f5908..d9b8eddcf8cd0 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
@@ -22,11 +22,9 @@ import java.util.concurrent.ConcurrentHashMap
 import scala.collection.JavaConverters._
 
 import org.apache.spark._
-import org.apache.spark.internal.{config, Logging}
-import org.apache.spark.scheduler.MapStatus
+import org.apache.spark.internal.Logging
 import org.apache.spark.shuffle._
-import org.apache.spark.shuffle.api.{ShuffleDataIO, ShuffleExecutorComponents}
-import org.apache.spark.util.Utils
+import org.apache.spark.shuffle.api.ShuffleExecutorComponents
 import org.apache.spark.util.collection.OpenHashSet
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
index 5c6543fe28a18..affa85b76cf19 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
@@ -22,8 +22,7 @@ import java.util.{List => JList}
 import scala.collection.JavaConverters._
 import scala.collection.mutable.HashMap
 
-import org.apache.spark.{JobExecutionStatus, SparkConf, SparkException}
-import org.apache.spark.resource.ResourceProfileManager
+import org.apache.spark.{JobExecutionStatus, SparkConf}
 import org.apache.spark.status.api.v1
 import org.apache.spark.ui.scope._
 import org.apache.spark.util.Utils
diff --git a/core/src/main/scala/org/apache/spark/status/KVUtils.scala b/core/src/main/scala/org/apache/spark/status/KVUtils.scala
index 45348be5c98b9..c79f2dcd86533 100644
--- a/core/src/main/scala/org/apache/spark/status/KVUtils.scala
+++ b/core/src/main/scala/org/apache/spark/status/KVUtils.scala
@@ -21,7 +21,6 @@ import java.io.File
 
 import scala.annotation.meta.getter
 import scala.collection.JavaConverters._
-import scala.language.implicitConversions
 import scala.reflect.{classTag, ClassTag}
 
 import com.fasterxml.jackson.annotation.JsonInclude
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 3909c02c5bb1f..924601f92c5b8 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -55,7 +55,6 @@ import org.apache.spark.rpc.RpcEnv
 import org.apache.spark.scheduler.ExecutorCacheTaskLocation
 import org.apache.spark.serializer.{SerializerInstance, SerializerManager}
 import org.apache.spark.shuffle.{MigratableResolver, ShuffleManager, ShuffleWriteMetricsReporter}
-import org.apache.spark.shuffle.{ShuffleManager, ShuffleWriteMetricsReporter}
 import org.apache.spark.storage.BlockManagerMessages.{DecommissionBlockManager, ReplicateBlock}
 import org.apache.spark.storage.memory._
 import org.apache.spark.unsafe.Platform
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
index 9699515c626bf..7a55039db1b60 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.storage
 
 import java.io.IOException
-import java.util.concurrent.ExecutorService
 import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.JavaConverters._
@@ -28,7 +27,7 @@ import scala.util.control.NonFatal
 import org.apache.spark._
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config
-import org.apache.spark.shuffle.{MigratableResolver, ShuffleBlockInfo}
+import org.apache.spark.shuffle.ShuffleBlockInfo
 import org.apache.spark.storage.BlockManagerMessages.ReplicateBlock
 import org.apache.spark.util.ThreadUtils
 
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
index b8c5cbd121861..a7532a9870fae 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
@@ -33,7 +33,7 @@ import org.apache.spark.{MapOutputTrackerMaster, SparkConf}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.network.shuffle.ExternalBlockStoreClient
-import org.apache.spark.rpc.{IsolatedRpcEndpoint, RpcCallContext, RpcEndpointAddress, RpcEndpointRef, RpcEnv}
+import org.apache.spark.rpc.{IsolatedRpcEndpoint, RpcCallContext, RpcEndpointRef, RpcEnv}
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.{CoarseGrainedClusterMessages, CoarseGrainedSchedulerBackend}
 import org.apache.spark.storage.BlockManagerMessages._
diff --git a/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala b/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala
index c0a135e04bac5..a3a528cddee37 100644
--- a/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala
@@ -27,7 +27,6 @@ import org.apache.hadoop.fs.viewfs.ViewFileSystem
 import org.apache.hadoop.hdfs.DistributedFileSystem
 
 import org.apache.spark._
-import org.apache.spark.annotation.Private
 import org.apache.spark.internal.Logging
 import org.apache.spark.metrics.source.HiveCatalogMetrics
 
@@ -45,8 +44,6 @@ private[spark] object HadoopFSUtils extends Logging {
    * @param paths Input paths to list
    * @param hadoopConf Hadoop configuration
    * @param filter Path filter used to exclude leaf files from result
-   * @param isRootLevel Whether the input paths are at the root level, i.e., they are the root
-   *                    paths as opposed to nested paths encountered during recursive calls of this.
    * @param ignoreMissingFiles Ignore missing files that happen during recursive listing
    *                           (e.g., due to race conditions)
    * @param ignoreLocality Whether to fetch data locality info when listing leaf files. If false,
@@ -57,11 +54,22 @@ private[spark] object HadoopFSUtils extends Logging {
    * @param parallelismMax The maximum parallelism for listing. If the number of input paths is
    *                       larger than this value, parallelism will be throttled to this value
    *                       to avoid generating too many tasks.
-   * @param filterFun Optional predicate on the leaf files. Files who failed the check will be
-   *                  excluded from the results
    * @return for each input path, the set of discovered files for the path
    */
   def parallelListLeafFiles(
+    sc: SparkContext,
+    paths: Seq[Path],
+    hadoopConf: Configuration,
+    filter: PathFilter,
+    ignoreMissingFiles: Boolean,
+    ignoreLocality: Boolean,
+    parallelismThreshold: Int,
+    parallelismMax: Int): Seq[(Path, Seq[FileStatus])] = {
+    parallelListLeafFilesInternal(sc, paths, hadoopConf, filter, isRootLevel = true,
+      ignoreMissingFiles, ignoreLocality, parallelismThreshold, parallelismMax)
+  }
+
+  private def parallelListLeafFilesInternal(
       sc: SparkContext,
       paths: Seq[Path],
       hadoopConf: Configuration,
@@ -70,8 +78,7 @@ private[spark] object HadoopFSUtils extends Logging {
       ignoreMissingFiles: Boolean,
       ignoreLocality: Boolean,
       parallelismThreshold: Int,
-      parallelismMax: Int,
-      filterFun: Option[String => Boolean] = None): Seq[(Path, Seq[FileStatus])] = {
+      parallelismMax: Int): Seq[(Path, Seq[FileStatus])] = {
 
     // Short-circuits parallel listing when serial listing is likely to be faster.
     if (paths.size <= parallelismThreshold) {
@@ -85,8 +92,7 @@ private[spark] object HadoopFSUtils extends Logging {
           ignoreLocality = ignoreLocality,
           isRootPath = isRootLevel,
           parallelismThreshold = parallelismThreshold,
-          parallelismMax = parallelismMax,
-          filterFun = filterFun)
+          parallelismMax = parallelismMax)
         (path, leafFiles)
       }
     }
@@ -126,58 +132,16 @@ private[spark] object HadoopFSUtils extends Logging {
               ignoreMissingFiles = ignoreMissingFiles,
               ignoreLocality = ignoreLocality,
               isRootPath = isRootLevel,
-              filterFun = filterFun,
               parallelismThreshold = Int.MaxValue,
               parallelismMax = 0)
             (path, leafFiles)
           }.iterator
-        }.map { case (path, statuses) =>
-            val serializableStatuses = statuses.map { status =>
-              // Turn FileStatus into SerializableFileStatus so we can send it back to the driver
-              val blockLocations = status match {
-                case f: LocatedFileStatus =>
-                  f.getBlockLocations.map { loc =>
-                    SerializableBlockLocation(
-                      loc.getNames,
-                      loc.getHosts,
-                      loc.getOffset,
-                      loc.getLength)
-                  }
-
-                case _ =>
-                  Array.empty[SerializableBlockLocation]
-              }
-
-              SerializableFileStatus(
-                status.getPath.toString,
-                status.getLen,
-                status.isDirectory,
-                status.getReplication,
-                status.getBlockSize,
-                status.getModificationTime,
-                status.getAccessTime,
-                blockLocations)
-            }
-            (path.toString, serializableStatuses)
         }.collect()
     } finally {
       sc.setJobDescription(previousJobDescription)
     }
 
-    // turn SerializableFileStatus back to Status
-    statusMap.map { case (path, serializableStatuses) =>
-      val statuses = serializableStatuses.map { f =>
-        val blockLocations = f.blockLocations.map { loc =>
-          new BlockLocation(loc.names, loc.hosts, loc.offset, loc.length)
-        }
-        new LocatedFileStatus(
-          new FileStatus(
-            f.length, f.isDir, f.blockReplication, f.blockSize, f.modificationTime,
-            new Path(f.path)),
-          blockLocations)
-      }
-      (new Path(path), statuses)
-    }
+    statusMap.toSeq
   }
 
   // scalastyle:off argcount
@@ -197,7 +161,6 @@ private[spark] object HadoopFSUtils extends Logging {
       ignoreMissingFiles: Boolean,
       ignoreLocality: Boolean,
       isRootPath: Boolean,
-      filterFun: Option[String => Boolean],
       parallelismThreshold: Int,
       parallelismMax: Int): Seq[FileStatus] = {
 
@@ -245,19 +208,11 @@ private[spark] object HadoopFSUtils extends Logging {
         Array.empty[FileStatus]
     }
 
-    def doFilter(statuses: Array[FileStatus]) = filterFun match {
-      case Some(shouldFilterOut) =>
-        statuses.filterNot(status => shouldFilterOut(status.getPath.getName))
-      case None =>
-        statuses
-    }
-
-    val filteredStatuses = doFilter(statuses)
     val allLeafStatuses = {
-      val (dirs, topLevelFiles) = filteredStatuses.partition(_.isDirectory)
+      val (dirs, topLevelFiles) = statuses.partition(_.isDirectory)
       val nestedFiles: Seq[FileStatus] = contextOpt match {
         case Some(context) if dirs.size > parallelismThreshold =>
-          parallelListLeafFiles(
+          parallelListLeafFilesInternal(
             context,
             dirs.map(_.getPath),
             hadoopConf = hadoopConf,
@@ -265,7 +220,6 @@ private[spark] object HadoopFSUtils extends Logging {
             isRootLevel = false,
             ignoreMissingFiles = ignoreMissingFiles,
             ignoreLocality = ignoreLocality,
-            filterFun = filterFun,
             parallelismThreshold = parallelismThreshold,
             parallelismMax = parallelismMax
           ).flatMap(_._2)
@@ -279,7 +233,6 @@ private[spark] object HadoopFSUtils extends Logging {
               ignoreMissingFiles = ignoreMissingFiles,
               ignoreLocality = ignoreLocality,
               isRootPath = false,
-              filterFun = filterFun,
               parallelismThreshold = parallelismThreshold,
               parallelismMax = parallelismMax)
           }
@@ -289,8 +242,7 @@ private[spark] object HadoopFSUtils extends Logging {
     }
 
     val missingFiles = mutable.ArrayBuffer.empty[String]
-    val filteredLeafStatuses = doFilter(allLeafStatuses)
-    val resolvedLeafStatuses = filteredLeafStatuses.flatMap {
+    val resolvedLeafStatuses = allLeafStatuses.flatMap {
       case f: LocatedFileStatus =>
         Some(f)
 
@@ -339,22 +291,4 @@ private[spark] object HadoopFSUtils extends Logging {
     resolvedLeafStatuses
   }
   // scalastyle:on argcount
-
-  /** A serializable variant of HDFS's BlockLocation. */
-  private case class SerializableBlockLocation(
-    names: Array[String],
-    hosts: Array[String],
-    offset: Long,
-    length: Long)
-
-  /** A serializable variant of HDFS's FileStatus. */
-  private case class SerializableFileStatus(
-    path: String,
-    length: Long,
-    isDir: Boolean,
-    blockReplication: Short,
-    blockSize: Long,
-    modificationTime: Long,
-    accessTime: Long,
-    blockLocations: Array[SerializableBlockLocation])
 }
diff --git a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
index 78206c51c1028..d45dc937910d9 100644
--- a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
@@ -23,7 +23,6 @@ import java.util.concurrent.locks.ReentrantLock
 
 import scala.concurrent.{Awaitable, ExecutionContext, ExecutionContextExecutor, Future}
 import scala.concurrent.duration.{Duration, FiniteDuration}
-import scala.language.higherKinds
 import scala.util.control.NonFatal
 
 import com.google.common.util.concurrent.ThreadFactoryBuilder
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 7f1f3a71acab8..b743ab6507117 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -28,7 +28,7 @@ import java.nio.channels.{Channels, FileChannel, WritableByteChannel}
 import java.nio.charset.StandardCharsets
 import java.nio.file.Files
 import java.security.SecureRandom
-import java.util.{Arrays, Locale, Properties, Random, UUID}
+import java.util.{Locale, Properties, Random, UUID}
 import java.util.concurrent._
 import java.util.concurrent.TimeUnit.NANOSECONDS
 import java.util.zip.GZIPInputStream
diff --git a/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala b/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala
index fae6c4af1240c..e6d3377120e56 100644
--- a/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark
 
 import scala.concurrent.duration._
-import scala.language.implicitConversions
 
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.matchers.must.Matchers
diff --git a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
index e1d4eff0a62cb..e47181719a9db 100644
--- a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
@@ -21,7 +21,7 @@ import scala.collection.mutable
 import scala.concurrent.duration._
 
 import org.mockito.ArgumentMatchers.any
-import org.mockito.Mockito.{mock, verify, when}
+import org.mockito.Mockito.{mock, when}
 import org.scalatest.{BeforeAndAfterAll, PrivateMethodTester}
 import org.scalatest.concurrent.Eventually._
 
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileCompactorSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileCompactorSuite.scala
index ac39f022d5ca6..7d07af4d7246b 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileCompactorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileCompactorSuite.scala
@@ -23,10 +23,9 @@ import scala.io.{Codec, Source}
 import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
 import org.json4s.jackson.JsonMethods.parse
 
-import org.apache.spark.{SparkConf, SparkFunSuite, Success}
+import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.deploy.history.EventLogTestHelper.writeEventsToRollingWriter
-import org.apache.spark.executor.ExecutorMetrics
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.ExecutorInfo
 import org.apache.spark.status.ListenerEventsTestHelper._
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala
index 35de457ec48ce..be83ec12f92f5 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala
@@ -21,7 +21,6 @@ import java.io.DataOutputStream
 import java.net.{HttpURLConnection, URL}
 import java.nio.charset.StandardCharsets
 import java.util.Date
-import javax.servlet.http.HttpServletResponse
 
 import scala.collection.mutable.HashMap
 
@@ -32,7 +31,6 @@ import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.DeployMessages.{DecommissionWorkersOnHosts, KillDriverResponse, RequestKillDriver}
 import org.apache.spark.deploy.DeployTestUtils._
 import org.apache.spark.deploy.master._
-import org.apache.spark.internal.config.UI
 import org.apache.spark.rpc.{RpcEndpointRef, RpcEnv}
 
 
diff --git a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
index 8e58beff74290..31049d104e63d 100644
--- a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.executor
 
-import java.io.{Externalizable, File, ObjectInput, ObjectOutput}
+import java.io.{Externalizable, ObjectInput, ObjectOutput}
 import java.lang.Thread.UncaughtExceptionHandler
 import java.nio.ByteBuffer
 import java.util.Properties
@@ -41,7 +41,6 @@ import org.scalatestplus.mockito.MockitoSugar
 import org.apache.spark._
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.broadcast.Broadcast
-import org.apache.spark.deploy.{SimpleApplicationTest, SparkSubmitSuite}
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.memory.TestMemoryManager
@@ -53,7 +52,7 @@ import org.apache.spark.scheduler.{DirectTaskResult, FakeTask, ResultTask, Task,
 import org.apache.spark.serializer.{JavaSerializer, SerializerInstance, SerializerManager}
 import org.apache.spark.shuffle.FetchFailedException
 import org.apache.spark.storage.{BlockManager, BlockManagerId}
-import org.apache.spark.util.{LongAccumulator, UninterruptibleThread, Utils}
+import org.apache.spark.util.{LongAccumulator, UninterruptibleThread}
 
 class ExecutorSuite extends SparkFunSuite
     with LocalSparkContext with MockitoSugar with Eventually with PrivateMethodTester {
diff --git a/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala b/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
index fab7aea6c47aa..f1d7053c34594 100644
--- a/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
@@ -29,7 +29,6 @@ import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
 import org.apache.spark.internal.Logging
-import org.apache.spark.util.Utils
 
 /**
  * Tests the correctness of
diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala
index fa1a75d076051..182c3c09e0524 100644
--- a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala
@@ -24,7 +24,7 @@ import scala.reflect.ClassTag
 import scala.util.Random
 
 import org.mockito.ArgumentMatchers.any
-import org.mockito.Mockito.{mock, times, verify, when}
+import org.mockito.Mockito.{mock, when}
 import org.scalatest.BeforeAndAfterEach
 import org.scalatest.matchers.must.Matchers
 import org.scalatest.matchers.should.Matchers._
diff --git a/core/src/test/scala/org/apache/spark/resource/ResourceUtilsSuite.scala b/core/src/test/scala/org/apache/spark/resource/ResourceUtilsSuite.scala
index 278a72a7192d8..e8e8682e20ed4 100644
--- a/core/src/test/scala/org/apache/spark/resource/ResourceUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/resource/ResourceUtilsSuite.scala
@@ -26,10 +26,8 @@ import org.json4s.{DefaultFormats, Extraction}
 import org.apache.spark.{LocalSparkContext, SparkConf, SparkException, SparkFunSuite}
 import org.apache.spark.TestUtils._
 import org.apache.spark.internal.config._
-import org.apache.spark.internal.config.Tests._
 import org.apache.spark.resource.ResourceUtils._
 import org.apache.spark.resource.TestResourceIDs._
-import org.apache.spark.scheduler.LiveListenerBus
 import org.apache.spark.util.Utils
 
 class ResourceUtilsSuite extends SparkFunSuite
diff --git a/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
index e4ec62f8efc5b..b7ac9ecac2387 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
@@ -25,7 +25,6 @@ import org.scalatest.concurrent.Eventually
 import org.scalatest.time.SpanSugar._
 
 import org.apache.spark._
-import org.apache.spark.internal.config
 import org.apache.spark.internal.config.Tests.TEST_NO_STAGE_RETRY
 
 class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext with Eventually {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
index 47e37fc55cefe..65d51e57ee308 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
@@ -188,7 +188,6 @@ class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkCo
   }
 
   test("extra resources from executor") {
-    import TestUtils._
 
     val execCores = 3
     val conf = new SparkConf()
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index 99be1faab8b85..58aa246b7358f 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -42,7 +42,7 @@ import org.apache.spark.resource.ResourceUtils.{FPGA, GPU}
 import org.apache.spark.scheduler.SchedulingMode.SchedulingMode
 import org.apache.spark.shuffle.{FetchFailedException, MetadataFetchFailedException}
 import org.apache.spark.storage.{BlockId, BlockManagerId, BlockManagerMaster}
-import org.apache.spark.util.{AccumulatorContext, AccumulatorV2, CallSite, LongAccumulator, ThreadUtils, Utils}
+import org.apache.spark.util.{AccumulatorContext, AccumulatorV2, CallSite, LongAccumulator, Utils}
 
 class DAGSchedulerEventProcessLoopTester(dagScheduler: DAGScheduler)
   extends DAGSchedulerEventProcessLoop(dagScheduler) {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
index 0c60c42c054cf..b6a59c8bbd944 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
@@ -34,7 +34,7 @@ import org.apache.spark.internal.config
 import org.apache.spark.resource.{ExecutorResourceRequests, ResourceProfile, TaskResourceRequests}
 import org.apache.spark.resource.ResourceUtils._
 import org.apache.spark.resource.TestResourceIDs._
-import org.apache.spark.util.{Clock, ManualClock, SystemClock}
+import org.apache.spark.util.{Clock, ManualClock}
 
 class FakeSchedulerBackend extends SchedulerBackend {
   def start(): Unit = {}
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index e01e278f60205..a760dda3897df 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -1768,7 +1768,6 @@ class TaskSetManagerSuite
   }
 
   test("TaskSetManager passes task resource along") {
-    import TestUtils._
 
     sc = new SparkContext("local", "test")
     sc.conf.set(TASK_GPU_ID.amountConf, "2")
diff --git a/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala
index 4a92cbcb85847..1c2326db6dc99 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala
@@ -19,14 +19,12 @@ package org.apache.spark.scheduler
 
 import java.util.concurrent.Semaphore
 
-import scala.concurrent.TimeoutException
 import scala.concurrent.duration._
 
-import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkException, SparkFunSuite,
-  TestUtils}
+import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkFunSuite, TestUtils}
 import org.apache.spark.internal.config
 import org.apache.spark.scheduler.cluster.StandaloneSchedulerBackend
-import org.apache.spark.util.{RpcUtils, SerializableBuffer, ThreadUtils}
+import org.apache.spark.util.ThreadUtils
 
 class WorkerDecommissionSuite extends SparkFunSuite with LocalSparkContext {
 
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala
index 9c0699bc981f8..d2bf385e10796 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.storage
 
 import java.util.Properties
 
-import scala.concurrent.{Await, ExecutionContext, Future}
+import scala.concurrent.{ExecutionContext, Future}
 import scala.language.implicitConversions
 import scala.reflect.ClassTag
 
diff --git a/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala b/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
index 48e0d218c0e5c..d02d7f862df80 100644
--- a/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.ui
 
-import java.util.Locale
 import javax.servlet.http.HttpServletRequest
 
 import scala.xml.Node
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 857749e84764d..20624c743bc22 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -18,8 +18,7 @@
 package org.apache.spark.util
 
 import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataOutput, DataOutputStream, File,
-  FileOutputStream, InputStream, PrintStream, SequenceInputStream}
-import java.lang.{Double => JDouble, Float => JFloat}
+  FileOutputStream, PrintStream, SequenceInputStream}
 import java.lang.reflect.Field
 import java.net.{BindException, ServerSocket, URI}
 import java.nio.{ByteBuffer, ByteOrder}
diff --git a/dev/.rat-excludes b/dev/.rat-excludes
index 0e892a927906a..7da330dfe1fbf 100644
--- a/dev/.rat-excludes
+++ b/dev/.rat-excludes
@@ -42,11 +42,11 @@ jquery.dataTables.1.10.20.min.js
 jquery.mustache.js
 jsonFormatter.min.css
 jsonFormatter.min.js
-.*avsc
-.*txt
-.*json
-.*data
-.*log
+.*\.avsc
+.*\.txt
+.*\.json
+.*\.data
+.*\.log
 pyspark-coverage-site/*
 cloudpickle/*
 join.py
@@ -98,17 +98,17 @@ local-1430917381535_2
 DESCRIPTION
 NAMESPACE
 test_support/*
-.*Rd
+.*\.Rd
 help/*
 html/*
 INDEX
 .lintr
 gen-java.*
-.*avpr
-.*parquet
+.*\.avpr
+.*\.parquet
 spark-deps-.*
-.*csv
-.*tsv
+.*\.csv
+.*\.tsv
 .*\.sql
 .Rbuildignore
 META-INF/*
@@ -125,3 +125,11 @@ application_1578436911597_0052
 config.properties
 app-20200706201101-0003
 py.typed
+_metadata
+_SUCCESS
+part-00000
+.*\.res
+flights_tiny.txt.1
+over1k
+over10k
+exported_table/*
diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index 8c1ab9e3c1cfe..bcf05506855c5 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -7,7 +7,7 @@ activation/1.1.1//activation-1.1.1.jar
 aircompressor/0.10//aircompressor-0.10.jar
 algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar
 antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
-antlr4-runtime/4.7.1//antlr4-runtime-4.7.1.jar
+antlr4-runtime/4.8-1//antlr4-runtime-4.8-1.jar
 aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar
 aopalliance/1.0//aopalliance-1.0.jar
 apacheds-i18n/2.0.0-M15//apacheds-i18n-2.0.0-M15.jar
@@ -155,26 +155,26 @@ jsr305/3.0.0//jsr305-3.0.0.jar
 jta/1.1//jta-1.1.jar
 jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar
 kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
-kubernetes-client/4.11.1//kubernetes-client-4.11.1.jar
-kubernetes-model-admissionregistration/4.11.1//kubernetes-model-admissionregistration-4.11.1.jar
-kubernetes-model-apiextensions/4.11.1//kubernetes-model-apiextensions-4.11.1.jar
-kubernetes-model-apps/4.11.1//kubernetes-model-apps-4.11.1.jar
-kubernetes-model-autoscaling/4.11.1//kubernetes-model-autoscaling-4.11.1.jar
-kubernetes-model-batch/4.11.1//kubernetes-model-batch-4.11.1.jar
-kubernetes-model-certificates/4.11.1//kubernetes-model-certificates-4.11.1.jar
-kubernetes-model-common/4.11.1//kubernetes-model-common-4.11.1.jar
-kubernetes-model-coordination/4.11.1//kubernetes-model-coordination-4.11.1.jar
-kubernetes-model-core/4.11.1//kubernetes-model-core-4.11.1.jar
-kubernetes-model-discovery/4.11.1//kubernetes-model-discovery-4.11.1.jar
-kubernetes-model-events/4.11.1//kubernetes-model-events-4.11.1.jar
-kubernetes-model-extensions/4.11.1//kubernetes-model-extensions-4.11.1.jar
-kubernetes-model-metrics/4.11.1//kubernetes-model-metrics-4.11.1.jar
-kubernetes-model-networking/4.11.1//kubernetes-model-networking-4.11.1.jar
-kubernetes-model-policy/4.11.1//kubernetes-model-policy-4.11.1.jar
-kubernetes-model-rbac/4.11.1//kubernetes-model-rbac-4.11.1.jar
-kubernetes-model-scheduling/4.11.1//kubernetes-model-scheduling-4.11.1.jar
-kubernetes-model-settings/4.11.1//kubernetes-model-settings-4.11.1.jar
-kubernetes-model-storageclass/4.11.1//kubernetes-model-storageclass-4.11.1.jar
+kubernetes-client/4.12.0//kubernetes-client-4.12.0.jar
+kubernetes-model-admissionregistration/4.12.0//kubernetes-model-admissionregistration-4.12.0.jar
+kubernetes-model-apiextensions/4.12.0//kubernetes-model-apiextensions-4.12.0.jar
+kubernetes-model-apps/4.12.0//kubernetes-model-apps-4.12.0.jar
+kubernetes-model-autoscaling/4.12.0//kubernetes-model-autoscaling-4.12.0.jar
+kubernetes-model-batch/4.12.0//kubernetes-model-batch-4.12.0.jar
+kubernetes-model-certificates/4.12.0//kubernetes-model-certificates-4.12.0.jar
+kubernetes-model-common/4.12.0//kubernetes-model-common-4.12.0.jar
+kubernetes-model-coordination/4.12.0//kubernetes-model-coordination-4.12.0.jar
+kubernetes-model-core/4.12.0//kubernetes-model-core-4.12.0.jar
+kubernetes-model-discovery/4.12.0//kubernetes-model-discovery-4.12.0.jar
+kubernetes-model-events/4.12.0//kubernetes-model-events-4.12.0.jar
+kubernetes-model-extensions/4.12.0//kubernetes-model-extensions-4.12.0.jar
+kubernetes-model-metrics/4.12.0//kubernetes-model-metrics-4.12.0.jar
+kubernetes-model-networking/4.12.0//kubernetes-model-networking-4.12.0.jar
+kubernetes-model-policy/4.12.0//kubernetes-model-policy-4.12.0.jar
+kubernetes-model-rbac/4.12.0//kubernetes-model-rbac-4.12.0.jar
+kubernetes-model-scheduling/4.12.0//kubernetes-model-scheduling-4.12.0.jar
+kubernetes-model-settings/4.12.0//kubernetes-model-settings-4.12.0.jar
+kubernetes-model-storageclass/4.12.0//kubernetes-model-storageclass-4.12.0.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
 libthrift/0.12.0//libthrift-0.12.0.jar
@@ -195,7 +195,6 @@ objenesis/2.6//objenesis-2.6.jar
 okhttp/3.12.12//okhttp-3.12.12.jar
 okio/1.14.0//okio-1.14.0.jar
 opencsv/2.3//opencsv-2.3.jar
-openshift-model/4.11.1//openshift-model-4.11.1.jar
 orc-core/1.5.12//orc-core-1.5.12.jar
 orc-mapreduce/1.5.12//orc-mapreduce-1.5.12.jar
 orc-shims/1.5.12//orc-shims-1.5.12.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index fcb993033221e..cd274bef7045b 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -7,7 +7,7 @@ activation/1.1.1//activation-1.1.1.jar
 aircompressor/0.10//aircompressor-0.10.jar
 algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar
 antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
-antlr4-runtime/4.7.1//antlr4-runtime-4.7.1.jar
+antlr4-runtime/4.8-1//antlr4-runtime-4.8-1.jar
 aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar
 arpack_combined_all/0.1//arpack_combined_all-0.1.jar
 arrow-format/2.0.0//arrow-format-2.0.0.jar
@@ -125,26 +125,26 @@ jsr305/3.0.0//jsr305-3.0.0.jar
 jta/1.1//jta-1.1.jar
 jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar
 kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
-kubernetes-client/4.11.1//kubernetes-client-4.11.1.jar
-kubernetes-model-admissionregistration/4.11.1//kubernetes-model-admissionregistration-4.11.1.jar
-kubernetes-model-apiextensions/4.11.1//kubernetes-model-apiextensions-4.11.1.jar
-kubernetes-model-apps/4.11.1//kubernetes-model-apps-4.11.1.jar
-kubernetes-model-autoscaling/4.11.1//kubernetes-model-autoscaling-4.11.1.jar
-kubernetes-model-batch/4.11.1//kubernetes-model-batch-4.11.1.jar
-kubernetes-model-certificates/4.11.1//kubernetes-model-certificates-4.11.1.jar
-kubernetes-model-common/4.11.1//kubernetes-model-common-4.11.1.jar
-kubernetes-model-coordination/4.11.1//kubernetes-model-coordination-4.11.1.jar
-kubernetes-model-core/4.11.1//kubernetes-model-core-4.11.1.jar
-kubernetes-model-discovery/4.11.1//kubernetes-model-discovery-4.11.1.jar
-kubernetes-model-events/4.11.1//kubernetes-model-events-4.11.1.jar
-kubernetes-model-extensions/4.11.1//kubernetes-model-extensions-4.11.1.jar
-kubernetes-model-metrics/4.11.1//kubernetes-model-metrics-4.11.1.jar
-kubernetes-model-networking/4.11.1//kubernetes-model-networking-4.11.1.jar
-kubernetes-model-policy/4.11.1//kubernetes-model-policy-4.11.1.jar
-kubernetes-model-rbac/4.11.1//kubernetes-model-rbac-4.11.1.jar
-kubernetes-model-scheduling/4.11.1//kubernetes-model-scheduling-4.11.1.jar
-kubernetes-model-settings/4.11.1//kubernetes-model-settings-4.11.1.jar
-kubernetes-model-storageclass/4.11.1//kubernetes-model-storageclass-4.11.1.jar
+kubernetes-client/4.12.0//kubernetes-client-4.12.0.jar
+kubernetes-model-admissionregistration/4.12.0//kubernetes-model-admissionregistration-4.12.0.jar
+kubernetes-model-apiextensions/4.12.0//kubernetes-model-apiextensions-4.12.0.jar
+kubernetes-model-apps/4.12.0//kubernetes-model-apps-4.12.0.jar
+kubernetes-model-autoscaling/4.12.0//kubernetes-model-autoscaling-4.12.0.jar
+kubernetes-model-batch/4.12.0//kubernetes-model-batch-4.12.0.jar
+kubernetes-model-certificates/4.12.0//kubernetes-model-certificates-4.12.0.jar
+kubernetes-model-common/4.12.0//kubernetes-model-common-4.12.0.jar
+kubernetes-model-coordination/4.12.0//kubernetes-model-coordination-4.12.0.jar
+kubernetes-model-core/4.12.0//kubernetes-model-core-4.12.0.jar
+kubernetes-model-discovery/4.12.0//kubernetes-model-discovery-4.12.0.jar
+kubernetes-model-events/4.12.0//kubernetes-model-events-4.12.0.jar
+kubernetes-model-extensions/4.12.0//kubernetes-model-extensions-4.12.0.jar
+kubernetes-model-metrics/4.12.0//kubernetes-model-metrics-4.12.0.jar
+kubernetes-model-networking/4.12.0//kubernetes-model-networking-4.12.0.jar
+kubernetes-model-policy/4.12.0//kubernetes-model-policy-4.12.0.jar
+kubernetes-model-rbac/4.12.0//kubernetes-model-rbac-4.12.0.jar
+kubernetes-model-scheduling/4.12.0//kubernetes-model-scheduling-4.12.0.jar
+kubernetes-model-settings/4.12.0//kubernetes-model-settings-4.12.0.jar
+kubernetes-model-storageclass/4.12.0//kubernetes-model-storageclass-4.12.0.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
 libthrift/0.12.0//libthrift-0.12.0.jar
@@ -165,7 +165,6 @@ objenesis/2.6//objenesis-2.6.jar
 okhttp/3.12.12//okhttp-3.12.12.jar
 okio/1.14.0//okio-1.14.0.jar
 opencsv/2.3//opencsv-2.3.jar
-openshift-model/4.11.1//openshift-model-4.11.1.jar
 orc-core/1.5.12//orc-core-1.5.12.jar
 orc-mapreduce/1.5.12//orc-mapreduce-1.5.12.jar
 orc-shims/1.5.12//orc-shims-1.5.12.jar
diff --git a/docs/_config.yml b/docs/_config.yml
index 3be9807f81082..cd341063a1f92 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -26,3 +26,15 @@ SCALA_VERSION: "2.12.10"
 MESOS_VERSION: 1.0.0
 SPARK_ISSUE_TRACKER_URL: https://issues.apache.org/jira/browse/SPARK
 SPARK_GITHUB_URL: https://github.com/apache/spark
+# Before a new release, we should apply a new `apiKey` for the new Spark documentation
+# on https://docsearch.algolia.com/. Otherwise, after release, the search results are always based
+# on the latest documentation(https://spark.apache.org/docs/latest/) even when visiting the
+# documentation of previous releases.
+DOCSEARCH_SCRIPT: |
+  docsearch({
+      apiKey: 'b18ca3732c502995563043aa17bc6ecb',
+      indexName: 'apache_spark',
+      inputSelector: '#docsearch-input',
+      enhancedSearchInput: true,
+      debug: false // Set debug to true if you want to inspect the dropdown
+  });
diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html
index 65af17ed2e4a1..de98f29acf3b7 100755
--- a/docs/_layouts/global.html
+++ b/docs/_layouts/global.html
@@ -187,13 +187,7 @@ <h1 class="title">{{ page.title }}</h1>
             // 2. a JavaScript snippet to be inserted in your website that will bind this Algolia index
             //    to your search input and display its results in a dropdown UI. If you want to find more
             //    details on how works DocSearch, check the docs of DocSearch.
-            docsearch({
-                apiKey: 'b18ca3732c502995563043aa17bc6ecb',
-                indexName: 'apache_spark',
-                inputSelector: '#docsearch-input',
-                enhancedSearchInput: true,
-                debug: false // Set debug to true if you want to inspect the dropdown
-        });
+            {{site.DOCSEARCH_SCRIPT}}
         </script>
 
         <!-- MathJax Section -->
diff --git a/docs/monitoring.md b/docs/monitoring.md
index a07a113445981..15a6cbd910210 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -1175,6 +1175,8 @@ This is the component with the largest amount of instrumented metrics
 These metrics are exposed by Spark executors. 
  
 - namespace=executor (metrics are of type counter or gauge)
+  - **notes:**
+    - `spark.executor.metrics.fileSystemSchemes` (default: `file,hdfs`) determines the exposed file system metrics.
   - bytesRead.count
   - bytesWritten.count
   - cpuTime.count
diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md
index 578ab90fedfca..80591bd08650a 100644
--- a/docs/running-on-mesos.md
+++ b/docs/running-on-mesos.md
@@ -734,6 +734,38 @@ See the [configuration page](configuration.html) for information on Spark config
   </td>
   <td>2.1.0</td>
 </tr>
+<tr>
+  <td><code>spark.mesos.dispatcher.queue</code></td>
+  <td><code>(none)</code></td>
+  <td>
+    Set the name of the dispatcher queue to which the application is submitted.
+    The specified queue must be added to the dispatcher with <code>spark.mesos.dispatcher.queue.[QueueName]</code>.
+    If no queue is specified, then the application is submitted to the "default" queue with 0.0 priority.
+  </td>
+  <td>3.1.0</td>
+</tr>
+<tr>
+  <td><code>spark.mesos.dispatcher.queue.[QueueName]</code></td>
+  <td><code>0.0</code></td>
+  <td>
+    Add a new queue for submitted drivers with the specified priority.
+    Higher numbers indicate higher priority.
+    The user can specify multiple queues to define a workload management policy for queued drivers in the dispatcher.
+    A driver can then be submitted to a specific queue with <code>spark.mesos.dispatcher.queue</code>.
+    By default, the dispatcher has a single queue with 0.0 priority (cannot be overridden).
+    It is possible to implement a consistent and overall workload management policy throughout the lifecycle of drivers
+    by mapping priority queues to weighted Mesos roles, and by specifying a
+    <code>spark.mesos.role</code> along with a <code>spark.mesos.dispatcher.queue</code> when submitting an application.
+    For example, with the URGENT Mesos role:
+    <pre>
+    spark.mesos.dispatcher.queue.URGENT=1.0
+
+    spark.mesos.dispatcher.queue=URGENT
+    spark.mesos.role=URGENT
+    </pre>
+  </td>
+  <td>3.1.0</td>
+</tr>
 <tr>
   <td><code>spark.mesos.gpus.max</code></td>
   <td><code>0</code></td>
diff --git a/docs/sql-data-sources-avro.md b/docs/sql-data-sources-avro.md
index 69b165ed28bae..9ecc6eb91da5a 100644
--- a/docs/sql-data-sources-avro.md
+++ b/docs/sql-data-sources-avro.md
@@ -88,8 +88,6 @@ Kafka key-value record will be augmented with some metadata, such as the ingesti
 * If the "value" field that contains your data is in Avro, you could use `from_avro()` to extract your data, enrich it, clean it, and then push it downstream to Kafka again or write it out to a file.
 * `to_avro()` can be used to turn structs into Avro records. This method is particularly useful when you would like to re-encode multiple columns into a single one when writing data out to Kafka.
 
-Both functions are currently only available in Scala, Java, and Python.
-
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 {% highlight scala %}
@@ -183,6 +181,38 @@ query = output\
   .option("topic", "topic2")\
   .start()
 
+{% endhighlight %}
+</div>
+<div data-lang="r" markdown="1">
+{% highlight r %}
+
+# `from_avro` requires Avro schema in JSON string format.
+jsonFormatSchema <- paste0(readLines("examples/src/main/resources/user.avsc"), collapse=" ")
+
+df <- read.stream(
+  "kafka",
+  kafka.bootstrap.servers = "host1:port1,host2:port2",
+  subscribe = "topic1"
+)
+
+# 1. Decode the Avro data into a struct;
+# 2. Filter by column `favorite_color`;
+# 3. Encode the column `name` in Avro format.
+
+output <- select(
+  filter(
+    select(df, alias(from_avro("value", jsonFormatSchema), "user")),
+    column("user.favorite_color") == "red"
+  ),
+  alias(to_avro("user.name"), "value")
+)
+
+write.stream(
+  output,
+  "kafka",
+  kafka.bootstrap.servers = "host1:port1,host2:port2",
+  topic = "topic2"
+)
 {% endhighlight %}
 </div>
 </div>
diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index c2b36033e318e..fd7208615a09f 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -61,6 +61,27 @@ Spark SQL has three kinds of type conversions: explicit casting, type coercion,
 When `spark.sql.ansi.enabled` is set to `true`, explicit casting by `CAST` syntax throws a runtime exception for illegal cast patterns defined in the standard, e.g. casts from a string to an integer.
 On the other hand, `INSERT INTO` syntax throws an analysis exception when the ANSI mode enabled via `spark.sql.storeAssignmentPolicy=ANSI`.
 
+The type conversion of Spark ANSI mode follows the syntax rules of section 6.13 "cast specification" in [ISO/IEC 9075-2:2011 Information technology — Database languages - SQL — Part 2: Foundation (SQL/Foundation)"](https://www.iso.org/standard/53682.html), except it specially allows the following
+ straightforward type conversions which are disallowed as per the ANSI standard:
+* NumericType <=> BooleanType
+* StringType <=> BinaryType
+
+ The valid combinations of target data type and source data type in a `CAST` expression are given by the following table.
+“Y” indicates that the combination is syntactically valid without restriction and “N” indicates that the combination is not valid.
+    
+| From\To   | NumericType | StringType | DateType | TimestampType | IntervalType | BooleanType | BinaryType | ArrayType | MapType | StructType |
+|-----------|---------|--------|------|-----------|----------|---------|--------|-------|-----|--------|
+| NumericType   | Y       | Y      | N    | N         | N        | Y       | N      | N     | N   | N      |
+| StringType    | Y       | Y      | Y    | Y         | Y        | Y       | Y      | N     | N   | N      |
+| DateType      | N       | Y      | Y    | Y         | N        | N       | N      | N     | N   | N      |
+| TimestampType | N       | Y      | Y    | Y         | N        | N       | N      | N     | N   | N      |
+| IntervalType  | N       | Y      | N    | N         | Y        | N       | N      | N     | N   | N      |
+| BooleanType   | Y       | Y      | N    | N         | N        | Y       | N      | N     | N   | N      |
+| BinaryType    | Y       | N      | N    | N         | N        | N       | Y      | N     | N   | N      |
+| ArrayType     | N       | N      | N    | N         | N        | N       | N      | Y     | N   | N      |
+| MapType       | N       | N      | N    | N         | N        | N       | N      | N     | Y   | N      |
+| StructType    | N       | N      | N    | N         | N        | N       | N      | N     | N   | Y      |
+
 Currently, the ANSI mode affects explicit casting and assignment casting only.
 In future releases, the behaviour of type coercion might change along with the other two type conversion rules.
 
@@ -112,12 +133,14 @@ SELECT * FROM t;
 The behavior of some SQL functions can be different under ANSI mode (`spark.sql.ansi.enabled=true`).
   - `size`: This function returns null for null input.
   - `element_at`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices. 
+  - `element_at`: This function throws `NoSuchElementException` if key does not exist in map. 
   - `elt`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices.
 
 ### SQL Operators
 
 The behavior of some SQL operators can be different under ANSI mode (`spark.sql.ansi.enabled=true`).
   - `array_col[index]`: This operator throws `ArrayIndexOutOfBoundsException` if using invalid indices.
+  - `map_col[key]`: This operator throws `NoSuchElementException` if key does not exist in map.
 
 ### SQL Keywords
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
index 86d00cac9485f..487cb27b93fe8 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
@@ -41,7 +41,6 @@ object DeveloperApiExample {
       .builder
       .appName("DeveloperApiExample")
       .getOrCreate()
-    import spark.implicits._
 
     // Prepare training data.
     val training = spark.createDataFrame(Seq(
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala
index 2845028dd0814..7a7501ee84526 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala
@@ -30,7 +30,6 @@ object RankingMetricsExample {
       .builder
       .appName("RankingMetricsExample")
       .getOrCreate()
-    import spark.implicits._
     // $example on$
     // Read in the ratings data
     val ratings = spark.read.textFile("data/mllib/sample_movielens_data.txt").rdd.map { line =>
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala b/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala
index fde281087c267..b17b86c08314b 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala
@@ -42,8 +42,6 @@ object SparkSQLExample {
       .config("spark.some.config.option", "some-value")
       .getOrCreate()
 
-    // For implicit conversions like converting RDDs to DataFrames
-    import spark.implicits._
     // $example off:init_session$
 
     runBasicDataFrameExample(spark)
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
index c685c89f0dfc8..09c849960c1b5 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
@@ -27,7 +27,7 @@ import org.apache.avro.Schema.Type._
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.sql.catalyst.util.RandomUUIDGenerator
 import org.apache.spark.sql.types._
-import org.apache.spark.sql.types.Decimal.{maxPrecisionForBytes, minBytesForPrecision}
+import org.apache.spark.sql.types.Decimal.minBytesForPrecision
 
 /**
  * This object contains method that are used to convert sparkSQL schemas to avro schemas and vice
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index c9c6bcecac14e..d3bfb716f515c 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -44,7 +44,7 @@ import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.plans.logical.Filter
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{withDefaultTimeZone, LA, UTC}
 import org.apache.spark.sql.execution.{FormattedMode, SparkPlan}
-import org.apache.spark.sql.execution.datasources.{CommonFileDataSourceSuite, DataSource, FilePartition, PartitionedFile}
+import org.apache.spark.sql.execution.datasources.{CommonFileDataSourceSuite, DataSource, FilePartition}
 import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.internal.SQLConf
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala
index 9ad083f1cfde5..a1b0f7d22216b 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.kafka010
 
-import org.apache.kafka.common.TopicPartition
-
 import org.apache.spark.SparkEnv
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.Network.NETWORK_TIMEOUT
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
index 6599e7e0fe707..c25b8b4e510a0 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
@@ -19,12 +19,9 @@ package org.apache.spark.sql.kafka010
 
 import java.{util => ju}
 
-import org.apache.kafka.clients.consumer.ConsumerConfig
-
 import org.apache.spark.SparkEnv
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.Network.NETWORK_TIMEOUT
-import org.apache.spark.scheduler.ExecutorCacheTaskLocation
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.connector.read.{InputPartition, PartitionReaderFactory}
 import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, Offset, ReadAllAvailable, ReadLimit, ReadMaxRows, SupportsAdmissionControl}
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
index 6d30bd2a6d2cd..adcc20c25cb5f 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
@@ -23,7 +23,7 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 import scala.util.control.NonFatal
 
-import org.apache.kafka.clients.consumer.{Consumer, ConsumerConfig, KafkaConsumer, OffsetAndTimestamp}
+import org.apache.kafka.clients.consumer.{Consumer, ConsumerConfig, OffsetAndTimestamp}
 import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.SparkEnv
@@ -33,10 +33,12 @@ import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.util.{UninterruptibleThread, UninterruptibleThreadRunner}
 
 /**
- * This class uses Kafka's own [[KafkaConsumer]] API to read data offsets from Kafka.
+ * This class uses Kafka's own [[org.apache.kafka.clients.consumer.KafkaConsumer]] API to
+ * read data offsets from Kafka.
  * The [[ConsumerStrategy]] class defines which Kafka topics and partitions should be read
  * by this source. These strategies directly correspond to the different consumption options
- * in. This class is designed to return a configured [[KafkaConsumer]] that is used by the
+ * in. This class is designed to return a configured
+ * [[org.apache.kafka.clients.consumer.KafkaConsumer]] that is used by the
  * [[KafkaSource]] to query for the offsets. See the docs on
  * [[org.apache.spark.sql.kafka010.ConsumerStrategy]]
  * for more details.
@@ -50,7 +52,8 @@ private[kafka010] class KafkaOffsetReader(
     driverGroupIdPrefix: String) extends Logging {
 
   /**
-   * [[UninterruptibleThreadRunner]] ensures that all [[KafkaConsumer]] communication called in an
+   * [[UninterruptibleThreadRunner]] ensures that all
+   * [[org.apache.kafka.clients.consumer.KafkaConsumer]] communication called in an
    * [[UninterruptibleThread]]. In the case of streaming queries, we are already running in an
    * [[UninterruptibleThread]], however for batch mode this is not the case.
    */
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
index 413a0c4de8bea..69a66e2209773 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
@@ -17,13 +17,10 @@
 
 package org.apache.spark.sql.kafka010
 
-import org.apache.kafka.common.TopicPartition
-
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.Network.NETWORK_TIMEOUT
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{Row, SQLContext}
-import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.sources.{BaseRelation, TableScan}
 import org.apache.spark.sql.types.StructType
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
index 57879c7ca31cf..71ccb5f952f0a 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
@@ -18,11 +18,7 @@
 package org.apache.spark.sql.kafka010
 
 import java.{util => ju}
-import java.io._
-import java.nio.charset.StandardCharsets
 
-import org.apache.commons.io.IOUtils
-import org.apache.kafka.clients.consumer.ConsumerConfig
 import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.SparkContext
@@ -35,7 +31,6 @@ import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.connector.read.streaming
 import org.apache.spark.sql.connector.read.streaming.{ReadAllAvailable, ReadLimit, ReadMaxRows, SupportsAdmissionControl}
 import org.apache.spark.sql.execution.streaming._
-import org.apache.spark.sql.kafka010.KafkaSource._
 import org.apache.spark.sql.kafka010.KafkaSourceProvider._
 import org.apache.spark.sql.types._
 
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index 748d623a0a32a..3ace0874674b6 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -30,7 +30,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.kafka010.KafkaConfigUpdater
 import org.apache.spark.sql.{AnalysisException, DataFrame, SaveMode, SQLContext}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
-import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, Table, TableCapability, TableProvider}
+import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, Table, TableCapability}
 import org.apache.spark.sql.connector.read.{Batch, Scan, ScanBuilder}
 import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream}
 import org.apache.spark.sql.connector.write.{BatchWrite, LogicalWriteInfo, SupportsTruncate, WriteBuilder}
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
index fe783ffe53a3b..08f673455d729 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
@@ -1178,7 +1178,6 @@ class KafkaMicroBatchV2SourceSuite extends KafkaMicroBatchSourceSuiteBase {
   }
 
   testWithUninterruptibleThread("minPartitions is supported") {
-    import testImplicits._
 
     val topic = newTopic()
     val tp = new TopicPartition(topic, 0)
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
index c5f3086b38c99..43ed4a8378a8c 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
@@ -26,7 +26,6 @@ import javax.security.auth.login.Configuration
 
 import scala.collection.JavaConverters._
 import scala.io.Source
-import scala.util.Random
 import scala.util.control.NonFatal
 
 import com.google.common.io.Files
@@ -38,13 +37,12 @@ import org.apache.hadoop.minikdc.MiniKdc
 import org.apache.hadoop.security.UserGroupInformation
 import org.apache.kafka.clients.CommonClientConfigs
 import org.apache.kafka.clients.admin._
-import org.apache.kafka.clients.consumer.KafkaConsumer
 import org.apache.kafka.clients.producer._
 import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.config.SaslConfigs
 import org.apache.kafka.common.network.ListenerName
 import org.apache.kafka.common.security.auth.SecurityProtocol.{PLAINTEXT, SASL_PLAINTEXT}
-import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}
+import org.apache.kafka.common.serialization.StringSerializer
 import org.apache.kafka.common.utils.SystemTime
 import org.apache.zookeeper.server.{NIOServerCnxnFactory, ZooKeeperServer}
 import org.apache.zookeeper.server.auth.SASLAuthenticationProvider
diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
index 307a69f9b84c5..bc790418decd3 100644
--- a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
+++ b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
@@ -36,7 +36,7 @@ import org.apache.kafka.common.security.auth.SecurityProtocol.{SASL_PLAINTEXT, S
 import org.apache.kafka.common.security.scram.ScramLoginModule
 import org.apache.kafka.common.security.token.delegation.DelegationToken
 
-import org.apache.spark.{SparkConf, SparkEnv}
+import org.apache.spark.SparkConf
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.deploy.security.HadoopDelegationTokenManager
 import org.apache.spark.internal.Logging
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockScheduler.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockScheduler.scala
index ac81f92f86109..c0724909bc350 100644
--- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockScheduler.scala
+++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockScheduler.scala
@@ -19,8 +19,6 @@ package org.apache.spark.streaming.kafka010.mocks
 
 import java.util.concurrent.{ScheduledFuture, TimeUnit}
 
-import scala.collection.mutable.PriorityQueue
-
 import kafka.utils.Scheduler
 import org.apache.kafka.common.utils.Time
 import org.jmock.lib.concurrent.DeterministicScheduler
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala
index 11e949536f2b6..770eb2d89d522 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala
@@ -21,7 +21,6 @@ import java.util.concurrent._
 import scala.util.control.NonFatal
 
 import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorCheckpointer
-import com.amazonaws.services.kinesis.clientlibrary.lib.worker.ShutdownReason
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.streaming.Duration
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala
index 8c3931a1c87fd..e778d083b3f70 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala
@@ -50,8 +50,6 @@ private[kinesis] class KinesisInputDStream[T: ClassTag](
     val metricsEnabledDimensions: Set[String]
   ) extends ReceiverInputDStream[T](_ssc) {
 
-  import KinesisReadConfigurations._
-
   private[streaming]
   override def createBlockRDD(time: Time, blockInfos: Seq[ReceivedBlockInfo]): RDD[T] = {
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/package.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/package.scala
index d26acf924c0a3..7bc86c4871cfb 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/attribute/package.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/package.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.ml
 
-import org.apache.spark.ml.attribute.{Attribute, AttributeGroup}
-
 /**
  * ==ML attributes==
  *
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
index 95f37671e1399..9191b3ec4bc2b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
@@ -67,6 +67,10 @@ private[classification] trait LinearSVCParams extends ClassifierParams with HasR
  * This binary classifier optimizes the Hinge Loss using the OWLQN optimizer.
  * Only supports L2 regularization currently.
  *
+ * Since 3.1.0, it supports stacking instances into blocks and using GEMV for
+ * better performance.
+ * The block size will be 1.0 MB, if param maxBlockSizeInMB is set 0.0 by default.
+ *
  */
 @Since("2.2.0")
 class LinearSVC @Since("2.2.0") (
@@ -154,7 +158,7 @@ class LinearSVC @Since("2.2.0") (
 
   /**
    * Sets the value of param [[maxBlockSizeInMB]].
-   * Default is 0.0.
+   * Default is 0.0, then 1.0 MB will be chosen.
    *
    * @group expertSetParam
    */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index a43ad466a7c80..057196dd67a52 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -50,7 +50,7 @@ import org.apache.spark.util.VersionUtils
 private[classification] trait LogisticRegressionParams extends ProbabilisticClassifierParams
   with HasRegParam with HasElasticNetParam with HasMaxIter with HasFitIntercept with HasTol
   with HasStandardization with HasWeightCol with HasThreshold with HasAggregationDepth
-  with HasBlockSize {
+  with HasMaxBlockSizeInMB {
 
   import org.apache.spark.ml.classification.LogisticRegression.supportedFamilyNames
 
@@ -245,7 +245,7 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
 
   setDefault(regParam -> 0.0, elasticNetParam -> 0.0, maxIter -> 100, tol -> 1E-6,
     fitIntercept -> true, family -> "auto", standardization -> true, threshold -> 0.5,
-    aggregationDepth -> 2, blockSize -> 1)
+    aggregationDepth -> 2, maxBlockSizeInMB -> 0.0)
 
   protected def usingBoundConstrainedOptimization: Boolean = {
     isSet(lowerBoundsOnCoefficients) || isSet(upperBoundsOnCoefficients) ||
@@ -276,6 +276,10 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
  *
  * This class supports fitting traditional logistic regression model by LBFGS/OWLQN and
  * bound (box) constrained logistic regression model by LBFGSB.
+ *
+ * Since 3.1.0, it supports stacking instances into blocks and using GEMV/GEMM for
+ * better performance.
+ * The block size will be 1.0 MB, if param maxBlockSizeInMB is set 0.0 by default.
  */
 @Since("1.2.0")
 class LogisticRegression @Since("1.2.0") (
@@ -426,22 +430,13 @@ class LogisticRegression @Since("1.2.0") (
   def setUpperBoundsOnIntercepts(value: Vector): this.type = set(upperBoundsOnIntercepts, value)
 
   /**
-   * Set block size for stacking input data in matrices.
-   * If blockSize == 1, then stacking will be skipped, and each vector is treated individually;
-   * If blockSize &gt; 1, then vectors will be stacked to blocks, and high-level BLAS routines
-   * will be used if possible (for example, GEMV instead of DOT, GEMM instead of GEMV).
-   * Recommended size is between 10 and 1000. An appropriate choice of the block size depends
-   * on the sparsity and dim of input datasets, the underlying BLAS implementation (for example,
-   * f2jBLAS, OpenBLAS, intel MKL) and its configuration (for example, number of threads).
-   * Note that existing BLAS implementations are mainly optimized for dense matrices, if the
-   * input dataset is sparse, stacking may bring no performance gain, the worse is possible
-   * performance regression.
-   * Default is 1.
+   * Sets the value of param [[maxBlockSizeInMB]].
+   * Default is 0.0, then 1.0 MB will be chosen.
    *
    * @group expertSetParam
    */
   @Since("3.1.0")
-  def setBlockSize(value: Int): this.type = set(blockSize, value)
+  def setMaxBlockSizeInMB(value: Double): this.type = set(maxBlockSizeInMB, value)
 
   private def assertBoundConstrainedOptimizationParamsValid(
       numCoefficientSets: Int,
@@ -495,31 +490,24 @@ class LogisticRegression @Since("1.2.0") (
     this
   }
 
-  override protected[spark] def train(dataset: Dataset[_]): LogisticRegressionModel = {
-    val handlePersistence = dataset.storageLevel == StorageLevel.NONE
-    train(dataset, handlePersistence)
-  }
-
   protected[spark] def train(
-      dataset: Dataset[_],
-      handlePersistence: Boolean): LogisticRegressionModel = instrumented { instr =>
+      dataset: Dataset[_]): LogisticRegressionModel = instrumented { instr =>
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
     instr.logParams(this, labelCol, weightCol, featuresCol, predictionCol, rawPredictionCol,
       probabilityCol, regParam, elasticNetParam, standardization, threshold, thresholds, maxIter,
-      tol, fitIntercept, blockSize)
+      tol, fitIntercept, maxBlockSizeInMB)
+
+    if (dataset.storageLevel != StorageLevel.NONE) {
+      instr.logWarning(s"Input instances will be standardized, blockified to blocks, and " +
+        s"then cached during training. Be careful of double caching!")
+    }
 
     val instances = extractInstances(dataset)
       .setName("training instances")
 
-    if (handlePersistence && $(blockSize) == 1) {
-      instances.persist(StorageLevel.MEMORY_AND_DISK)
-    }
-
-    var requestedMetrics = Seq("mean", "std", "count")
-    if ($(blockSize) != 1) requestedMetrics +:= "numNonZeros"
     val (summarizer, labelSummarizer) = Summarizer
-      .getClassificationSummarizers(instances, $(aggregationDepth), requestedMetrics)
+      .getClassificationSummarizers(instances, $(aggregationDepth), Seq("mean", "std", "count"))
 
     val numFeatures = summarizer.mean.size
     val histogram = labelSummarizer.histogram
@@ -547,14 +535,13 @@ class LogisticRegression @Since("1.2.0") (
     instr.logNamedValue("lowestLabelWeight", labelSummarizer.histogram.min.toString)
     instr.logNamedValue("highestLabelWeight", labelSummarizer.histogram.max.toString)
     instr.logSumOfWeights(summarizer.weightSum)
-    if ($(blockSize) > 1) {
-      val scale = 1.0 / summarizer.count / numFeatures
-      val sparsity = 1 - summarizer.numNonzeros.toArray.map(_ * scale).sum
-      instr.logNamedValue("sparsity", sparsity.toString)
-      if (sparsity > 0.5) {
-        instr.logWarning(s"sparsity of input dataset is $sparsity, " +
-          s"which may hurt performance in high-level BLAS.")
-      }
+
+    var actualBlockSizeInMB = $(maxBlockSizeInMB)
+    if (actualBlockSizeInMB == 0) {
+      // TODO: for Multinomial logistic regression, take numClasses into account
+      actualBlockSizeInMB = InstanceBlock.DefaultBlockSizeInMB
+      require(actualBlockSizeInMB > 0, "inferred actual BlockSizeInMB must > 0")
+      instr.logNamedValue("actualBlockSizeInMB", actualBlockSizeInMB.toString)
     }
 
     val isMultinomial = checkMultinomial(numClasses)
@@ -584,7 +571,6 @@ class LogisticRegression @Since("1.2.0") (
       } else {
         Vectors.dense(if (numClasses == 2) Double.PositiveInfinity else Double.NegativeInfinity)
       }
-      if (instances.getStorageLevel != StorageLevel.NONE) instances.unpersist()
       return createModel(dataset, numClasses, coefMatrix, interceptVec, Array(0.0))
     }
 
@@ -636,14 +622,9 @@ class LogisticRegression @Since("1.2.0") (
        Note that the intercept in scaled space and original space is the same;
        as a result, no scaling is needed.
      */
-    val (allCoefficients, objectiveHistory) = if ($(blockSize) == 1) {
-      trainOnRows(instances, featuresStd, numClasses, initialCoefWithInterceptMatrix,
-        regularization, optimizer)
-    } else {
-      trainOnBlocks(instances, featuresStd, numClasses, initialCoefWithInterceptMatrix,
-        regularization, optimizer)
-    }
-    if (instances.getStorageLevel != StorageLevel.NONE) instances.unpersist()
+    val (allCoefficients, objectiveHistory) =
+      trainImpl(instances, actualBlockSizeInMB, featuresStd, numClasses,
+        initialCoefWithInterceptMatrix, regularization, optimizer)
 
     if (allCoefficients == null) {
       val msg = s"${optimizer.getClass.getName} failed."
@@ -949,40 +930,9 @@ class LogisticRegression @Since("1.2.0") (
     initialCoefWithInterceptMatrix
   }
 
-  private def trainOnRows(
-      instances: RDD[Instance],
-      featuresStd: Array[Double],
-      numClasses: Int,
-      initialCoefWithInterceptMatrix: Matrix,
-      regularization: Option[L2Regularization],
-      optimizer: FirstOrderMinimizer[BDV[Double], DiffFunction[BDV[Double]]]) = {
-    val bcFeaturesStd = instances.context.broadcast(featuresStd)
-    val getAggregatorFunc = new LogisticAggregator(bcFeaturesStd, numClasses, $(fitIntercept),
-      checkMultinomial(numClasses))(_)
-
-    val costFun = new RDDLossFunction(instances, getAggregatorFunc,
-      regularization, $(aggregationDepth))
-    val states = optimizer.iterations(new CachedDiffFunction(costFun),
-      new BDV[Double](initialCoefWithInterceptMatrix.toArray))
-
-    /*
-       Note that in Logistic Regression, the objective history (loss + regularization)
-       is log-likelihood which is invariant under feature standardization. As a result,
-       the objective history from optimizer is the same as the one in the original space.
-     */
-    val arrayBuilder = mutable.ArrayBuilder.make[Double]
-    var state: optimizer.State = null
-    while (states.hasNext) {
-      state = states.next()
-      arrayBuilder += state.adjustedValue
-    }
-    bcFeaturesStd.destroy()
-
-    (if (state == null) null else state.x.toArray, arrayBuilder.result)
-  }
-
-  private def trainOnBlocks(
+  private def trainImpl(
       instances: RDD[Instance],
+      actualBlockSizeInMB: Double,
       featuresStd: Array[Double],
       numClasses: Int,
       initialCoefWithInterceptMatrix: Matrix,
@@ -996,9 +946,11 @@ class LogisticRegression @Since("1.2.0") (
       val func = StandardScalerModel.getTransformFunc(Array.empty, inverseStd, false, true)
       iter.map { case Instance(label, weight, vec) => Instance(label, weight, func(vec)) }
     }
-    val blocks = InstanceBlock.blokify(standardized, $(blockSize))
+
+    val maxMemUsage = (actualBlockSizeInMB * 1024L * 1024L).ceil.toLong
+    val blocks = InstanceBlock.blokifyWithMaxMemUsage(standardized, maxMemUsage)
       .persist(StorageLevel.MEMORY_AND_DISK)
-      .setName(s"training blocks (blockSize=${$(blockSize)})")
+      .setName(s"training blocks (blockSizeInMB=$actualBlockSizeInMB)")
 
     val getAggregatorFunc = new BlockLogisticAggregator(numFeatures, numClasses, $(fitIntercept),
       checkMultinomial(numClasses))(_)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala
index cd245dd723348..2c7186015d400 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala
@@ -17,13 +17,10 @@
 
 package org.apache.spark.ml.feature
 
-import scala.collection.mutable.ArrayBuilder
-
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.annotation.Since
 import org.apache.spark.ml._
-import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, NominalAttribute}
 import org.apache.spark.ml.linalg._
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
@@ -31,7 +28,7 @@ import org.apache.spark.ml.stat.Summarizer
 import org.apache.spark.ml.util._
 import org.apache.spark.sql._
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.types.{StructField, StructType}
+import org.apache.spark.sql.types.StructType
 
 
 /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala
index 6ff970cc72dfd..ac63024768d77 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.ml
 
-import org.apache.spark.ml.feature.{HashingTF, IDF, IDFModel, VectorAssembler}
-
 /**
  * == Feature transformers ==
  *
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/AFTAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/AFTAggregator.scala
index 8a5d7fe34e7a0..fd59b4b71c41b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/AFTAggregator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/AFTAggregator.scala
@@ -18,8 +18,8 @@
 package org.apache.spark.ml.optim.aggregator
 
 import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.ml.feature._
 import org.apache.spark.ml.linalg._
-import org.apache.spark.ml.regression.AFTPoint
 
 /**
  * AFTAggregator computes the gradient and loss for a AFT loss function,
@@ -108,7 +108,7 @@ import org.apache.spark.ml.regression.AFTPoint
 private[ml] class AFTAggregator(
     bcFeaturesStd: Broadcast[Array[Double]],
     fitIntercept: Boolean)(bcCoefficients: Broadcast[Vector])
-  extends DifferentiableLossAggregator[AFTPoint, AFTAggregator] {
+  extends DifferentiableLossAggregator[Instance, AFTAggregator] {
 
   protected override val dim: Int = bcCoefficients.value.size
 
@@ -116,10 +116,10 @@ private[ml] class AFTAggregator(
    * Add a new training data to this AFTAggregator, and update the loss and gradient
    * of the objective function.
    *
-   * @param data The AFTPoint representation for one data point to be added into this aggregator.
+   * @param data The Instance representation for one data point to be added into this aggregator.
    * @return This AFTAggregator object.
    */
-  def add(data: AFTPoint): this.type = {
+  def add(data: Instance): this.type = {
     val coefficients = bcCoefficients.value.toArray
     val intercept = coefficients(dim - 2)
     // sigma is the scale parameter of the AFT model
@@ -127,7 +127,7 @@ private[ml] class AFTAggregator(
 
     val xi = data.features
     val ti = data.label
-    val delta = data.censor
+    val delta = data.weight
 
     require(ti > 0.0, "The lifetime or label should be  greater than 0.")
 
@@ -176,7 +176,7 @@ private[ml] class AFTAggregator(
  */
 private[ml] class BlockAFTAggregator(
     fitIntercept: Boolean)(bcCoefficients: Broadcast[Vector])
-  extends DifferentiableLossAggregator[(Matrix, Array[Double], Array[Double]),
+  extends DifferentiableLossAggregator[InstanceBlock,
     BlockAFTAggregator] {
 
   protected override val dim: Int = bcCoefficients.value.size
@@ -196,16 +196,13 @@ private[ml] class BlockAFTAggregator(
    *
    * @return This BlockAFTAggregator object.
    */
-  def add(block: (Matrix, Array[Double], Array[Double])): this.type = {
-    val (matrix, labels, censors) = block
-    require(matrix.isTransposed)
-    require(numFeatures == matrix.numCols, s"Dimensions mismatch when adding new " +
-      s"instance. Expecting $numFeatures but got ${matrix.numCols}.")
-    require(labels.forall(_ > 0.0), "The lifetime or label should be  greater than 0.")
-
-    val size = matrix.numRows
-    require(labels.length == size && censors.length == size)
+  def add(block: InstanceBlock): this.type = {
+    require(block.matrix.isTransposed)
+    require(numFeatures == block.numFeatures, s"Dimensions mismatch when adding new " +
+      s"instance. Expecting $numFeatures but got ${block.numFeatures}.")
+    require(block.labels.forall(_ > 0.0), "The lifetime or label should be  greater than 0.")
 
+    val size = block.size
     val intercept = coefficientsArray(dim - 2)
     // sigma is the scale parameter of the AFT model
     val sigma = math.exp(coefficientsArray(dim - 1))
@@ -216,26 +213,30 @@ private[ml] class BlockAFTAggregator(
     } else {
       Vectors.zeros(size).toDense
     }
-    BLAS.gemv(1.0, matrix, linear, 1.0, vec)
+    BLAS.gemv(1.0, block.matrix, linear, 1.0, vec)
 
     // in-place convert margins to gradient scales
     // then, vec represents gradient scales
+    var localLossSum = 0.0
     var i = 0
     var sigmaGradSum = 0.0
     while (i < size) {
-      val ti = labels(i)
-      val delta = censors(i)
+      val ti = block.getLabel(i)
+      // here use Instance.weight to store censor for convenience
+      val delta = block.getWeight(i)
       val margin = vec(i)
       val epsilon = (math.log(ti) - margin) / sigma
       val expEpsilon = math.exp(epsilon)
-      lossSum += delta * math.log(sigma) - delta * epsilon + expEpsilon
+      localLossSum += delta * math.log(sigma) - delta * epsilon + expEpsilon
       val multiplier = (delta - expEpsilon) / sigma
       vec.values(i) = multiplier
       sigmaGradSum += delta + multiplier * sigma * epsilon
       i += 1
     }
+    lossSum += localLossSum
+    weightSum += size
 
-    matrix match {
+    block.matrix match {
       case dm: DenseMatrix =>
         BLAS.nativeBLAS.dgemv("N", dm.numCols, dm.numRows, 1.0, dm.values, dm.numCols,
           vec.values, 1, 1.0, gradientSumArray, 1)
@@ -249,7 +250,6 @@ private[ml] class BlockAFTAggregator(
 
     if (fitIntercept) gradientSumArray(dim - 2) += vec.values.sum
     gradientSumArray(dim - 1) += sigmaGradSum
-    weightSum += size
 
     this
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeAggregator.scala
index b1990f7c60f64..3d72512563154 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeAggregator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeAggregator.scala
@@ -162,24 +162,26 @@ private[ml] class BlockHingeAggregator(
 
     // in-place convert dotProducts to gradient scales
     // then, vec represents gradient scales
+    var localLossSum = 0.0
     var i = 0
     while (i < size) {
       val weight = block.getWeight(i)
       if (weight > 0) {
-        weightSum += weight
         // Our loss function with {0, 1} labels is max(0, 1 - (2y - 1) (f_w(x)))
         // Therefore the gradient is -(2y - 1)*x
         val label = block.getLabel(i)
         val labelScaled = label + label - 1.0
         val loss = (1.0 - labelScaled * vec(i)) * weight
         if (loss > 0) {
-          lossSum += loss
+          localLossSum += loss
           val gradScale = -labelScaled * weight
           vec.values(i) = gradScale
         } else { vec.values(i) = 0.0 }
       } else { vec.values(i) = 0.0 }
       i += 1
     }
+    lossSum += localLossSum
+    weightSum += block.weightIter.sum
 
     // predictions are all correct, no gradient signal
     if (vec.values.forall(_ == 0)) return this
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HuberAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HuberAggregator.scala
index 59ecc038e5569..35582dbc990e6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HuberAggregator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HuberAggregator.scala
@@ -167,7 +167,6 @@ private[ml] class BlockHuberAggregator(
 
   protected override val dim: Int = bcParameters.value.size
   private val numFeatures = if (fitIntercept) dim - 2 else dim - 1
-  private val sigma = bcParameters.value(dim - 1)
   private val intercept = if (fitIntercept) bcParameters.value(dim - 2) else 0.0
   // make transient so we do not serialize between aggregation stages
   @transient private lazy val linear = Vectors.dense(bcParameters.value.toArray.take(numFeatures))
@@ -187,7 +186,9 @@ private[ml] class BlockHuberAggregator(
       s"instance weights ${block.weightIter.mkString("[", ",", "]")} has to be >= 0.0")
 
     if (block.weightIter.forall(_ == 0)) return this
+
     val size = block.size
+    val sigma = bcParameters.value(dim - 1)
 
     // vec here represents margins or dotProducts
     val vec = if (fitIntercept) {
@@ -200,23 +201,23 @@ private[ml] class BlockHuberAggregator(
     // in-place convert margins to multipliers
     // then, vec represents multipliers
     var sigmaGradSum = 0.0
+    var localLossSum = 0.0
     var i = 0
     while (i < size) {
       val weight = block.getWeight(i)
       if (weight > 0) {
-        weightSum += weight
         val label = block.getLabel(i)
         val margin = vec(i)
         val linearLoss = label - margin
 
         if (math.abs(linearLoss) <= sigma * epsilon) {
-          lossSum += 0.5 * weight * (sigma + math.pow(linearLoss, 2.0) / sigma)
+          localLossSum += 0.5 * weight * (sigma + math.pow(linearLoss, 2.0) / sigma)
           val linearLossDivSigma = linearLoss / sigma
           val multiplier = -1.0 * weight * linearLossDivSigma
           vec.values(i) = multiplier
           sigmaGradSum += 0.5 * weight * (1.0 - math.pow(linearLossDivSigma, 2.0))
         } else {
-          lossSum += 0.5 * weight *
+          localLossSum += 0.5 * weight *
             (sigma + 2.0 * epsilon * math.abs(linearLoss) - sigma * epsilon * epsilon)
           val sign = if (linearLoss >= 0) -1.0 else 1.0
           val multiplier = weight * sign * epsilon
@@ -226,6 +227,8 @@ private[ml] class BlockHuberAggregator(
       } else { vec.values(i) = 0.0 }
       i += 1
     }
+    lossSum += localLossSum
+    weightSum += block.weightIter.sum
 
     block.matrix match {
       case dm: DenseMatrix =>
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LeastSquaresAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LeastSquaresAggregator.scala
index fa3bda00d802d..d5e1ea980840b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LeastSquaresAggregator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LeastSquaresAggregator.scala
@@ -267,9 +267,6 @@ private[ml] class BlockLeastSquaresAggregator(
     val offset = if (fitIntercept) labelMean / labelStd - sum else 0.0
     (Vectors.dense(coefficientsArray), offset)
   }
-  // do not use tuple assignment above because it will circumvent the @transient tag
-  @transient private lazy val effectiveCoefficientsVec = effectiveCoefAndOffset._1
-  @transient private lazy val offset = effectiveCoefAndOffset._2
 
   /**
    * Add a new training instance block to this BlockLeastSquaresAggregator, and update the loss
@@ -286,7 +283,9 @@ private[ml] class BlockLeastSquaresAggregator(
       s"instance weights ${block.weightIter.mkString("[", ",", "]")} has to be >= 0.0")
 
     if (block.weightIter.forall(_ == 0)) return this
+
     val size = block.size
+    val (effectiveCoefficientsVec, offset) = effectiveCoefAndOffset
 
     // vec here represents diffs
     val vec = new DenseVector(Array.tabulate(size)(i => offset - block.getLabel(i) / labelStd))
@@ -294,16 +293,18 @@ private[ml] class BlockLeastSquaresAggregator(
 
     // in-place convert diffs to multipliers
     // then, vec represents multipliers
+    var localLossSum = 0.0
     var i = 0
     while (i < size) {
       val weight = block.getWeight(i)
       val diff = vec(i)
-      lossSum += weight * diff * diff / 2
-      weightSum += weight
+      localLossSum += weight * diff * diff / 2
       val multiplier = weight * diff
       vec.values(i) = multiplier
       i += 1
     }
+    lossSum += localLossSum
+    weightSum += block.weightIter.sum
 
     val gradSumVec = new DenseVector(gradientSumArray)
     BLAS.gemv(1.0, block.matrix.transpose, vec, 1.0, gradSumVec)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregator.scala
index a331122776b5c..2496c789f8da6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregator.scala
@@ -466,24 +466,26 @@ private[ml] class BlockLogisticAggregator(
 
     // in-place convert margins to multiplier
     // then, vec represents multiplier
+    var localLossSum = 0.0
     var i = 0
     while (i < size) {
       val weight = block.getWeight(i)
       if (weight > 0) {
-        weightSum += weight
         val label = block.getLabel(i)
         val margin = vec(i)
         if (label > 0) {
           // The following is equivalent to log(1 + exp(margin)) but more numerically stable.
-          lossSum += weight * Utils.log1pExp(margin)
+          localLossSum += weight * Utils.log1pExp(margin)
         } else {
-          lossSum += weight * (Utils.log1pExp(margin) - margin)
+          localLossSum += weight * (Utils.log1pExp(margin) - margin)
         }
         val multiplier = weight * (1.0 / (1.0 + math.exp(margin)) - label)
         vec.values(i) = multiplier
       } else { vec.values(i) = 0.0 }
       i += 1
     }
+    lossSum += localLossSum
+    weightSum += block.weightIter.sum
 
     // predictions are all correct, no gradient signal
     if (vec.values.forall(_ == 0)) return
@@ -514,10 +516,11 @@ private[ml] class BlockLogisticAggregator(
     // mat here represents margins, shape: S X C
     val mat = DenseMatrix.zeros(size, numClasses)
     if (fitIntercept) {
+      val localCoefficientsArray = coefficientsArray
       val offset = numClasses * numFeatures
       var j = 0
       while (j < numClasses) {
-        val intercept = coefficientsArray(offset + j)
+        val intercept = localCoefficientsArray(offset + j)
         var i = 0
         while (i < size) { mat.update(i, j, intercept); i += 1 }
         j += 1
@@ -527,13 +530,13 @@ private[ml] class BlockLogisticAggregator(
 
     // in-place convert margins to multipliers
     // then, mat represents multipliers
+    var localLossSum = 0.0
     var i = 0
     val tmp = Array.ofDim[Double](numClasses)
     val interceptGradSumArr = if (fitIntercept) Array.ofDim[Double](numClasses) else null
     while (i < size) {
       val weight = block.getWeight(i)
       if (weight > 0) {
-        weightSum += weight
         val label = block.getLabel(i)
 
         var maxMargin = Double.NegativeInfinity
@@ -566,15 +569,17 @@ private[ml] class BlockLogisticAggregator(
         }
 
         if (maxMargin > 0) {
-          lossSum += weight * (math.log(sum) - marginOfLabel + maxMargin)
+          localLossSum += weight * (math.log(sum) - marginOfLabel + maxMargin)
         } else {
-          lossSum += weight * (math.log(sum) - marginOfLabel)
+          localLossSum += weight * (math.log(sum) - marginOfLabel)
         }
       } else {
         var j = 0; while (j < numClasses) { mat.update(i, j, 0.0); j += 1 }
       }
       i += 1
     }
+    lossSum += localLossSum
+    weightSum += block.weightIter.sum
 
     // mat (multipliers):             S X C, dense                                N
     // mat.transpose (multipliers):   C X S, dense                                T
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
index 0640fe355fdd6..2f6b9c1e11aac 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
@@ -111,8 +111,8 @@ private[shared] object SharedParamsCodeGen {
         isValid = "ParamValidators.gt(0)", isExpertParam = true),
       ParamDesc[Double]("maxBlockSizeInMB", "Maximum memory in MB for stacking input data " +
         "into blocks. Data is stacked within partitions. If more than remaining data size in a " +
-        "partition then it is adjusted to the data size. If 0, try to infer an appropriate " +
-        "value. Must be >= 0.",
+        "partition then it is adjusted to the data size. Default 0.0 represents choosing " +
+        "optimal value, depends on specific algorithm. Must be >= 0.",
         Some("0.0"), isValid = "ParamValidators.gtEq(0.0)", isExpertParam = true)
     )
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
index 2fbda45a9e97a..425bf91fd00ba 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
@@ -570,10 +570,10 @@ trait HasBlockSize extends Params {
 trait HasMaxBlockSizeInMB extends Params {
 
   /**
-   * Param for Maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value. Must be &gt;= 0..
+   * Param for Maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. Default 0.0 represents choosing optimal value, depends on specific algorithm. Must be &gt;= 0..
    * @group expertParam
    */
-  final val maxBlockSizeInMB: DoubleParam = new DoubleParam(this, "maxBlockSizeInMB", "Maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value. Must be >= 0.", ParamValidators.gtEq(0.0))
+  final val maxBlockSizeInMB: DoubleParam = new DoubleParam(this, "maxBlockSizeInMB", "Maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. Default 0.0 represents choosing optimal value, depends on specific algorithm. Must be >= 0.", ParamValidators.gtEq(0.0))
 
   setDefault(maxBlockSizeInMB, 0.0)
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index a0e5924a7ee3a..088f6a682be82 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -31,7 +31,7 @@ import org.apache.hadoop.fs.Path
 import org.json4s.DefaultFormats
 import org.json4s.JsonDSL._
 
-import org.apache.spark.{Dependency, Partitioner, ShuffleDependency, SparkContext, SparkException}
+import org.apache.spark.{Partitioner, SparkException}
 import org.apache.spark.annotation.Since
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml.{Estimator, Model}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/TopByKeyAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/TopByKeyAggregator.scala
index 517179c0eb9ae..ed41169070c59 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/TopByKeyAggregator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/TopByKeyAggregator.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.ml.recommendation
 
-import scala.language.implicitConversions
 import scala.reflect.runtime.universe.TypeTag
 
 import org.apache.spark.sql.{Encoder, Encoders}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
index 3870a71a91a20..4d214dc74ed8b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
@@ -27,7 +27,7 @@ import org.apache.spark.SparkException
 import org.apache.spark.annotation.Since
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml.PredictorParams
-import org.apache.spark.ml.feature.StandardScalerModel
+import org.apache.spark.ml.feature._
 import org.apache.spark.ml.linalg._
 import org.apache.spark.ml.optim.aggregator._
 import org.apache.spark.ml.optim.loss.RDDLossFunction
@@ -47,8 +47,8 @@ import org.apache.spark.storage.StorageLevel
  * Params for accelerated failure time (AFT) regression.
  */
 private[regression] trait AFTSurvivalRegressionParams extends PredictorParams
-  with HasMaxIter with HasTol with HasFitIntercept with HasAggregationDepth with HasBlockSize
-  with Logging {
+  with HasMaxIter with HasTol with HasFitIntercept with HasAggregationDepth
+  with HasMaxBlockSizeInMB with Logging {
 
   /**
    * Param for censor column name.
@@ -92,7 +92,8 @@ private[regression] trait AFTSurvivalRegressionParams extends PredictorParams
 
   setDefault(censorCol -> "censor",
     quantileProbabilities -> Array(0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99),
-    fitIntercept -> true, maxIter -> 100, tol -> 1E-6, aggregationDepth -> 2, blockSize -> 1)
+    fitIntercept -> true, maxIter -> 100, tol -> 1E-6, aggregationDepth -> 2,
+    maxBlockSizeInMB -> 0.0)
 
   /** Checks whether the input has quantiles column name. */
   private[regression] def hasQuantilesCol: Boolean = {
@@ -127,6 +128,10 @@ private[regression] trait AFTSurvivalRegressionParams extends PredictorParams
  * (see <a href="https://en.wikipedia.org/wiki/Accelerated_failure_time_model">
  * Accelerated failure time model (Wikipedia)</a>)
  * based on the Weibull distribution of the survival time.
+ *
+ * Since 3.1.0, it supports stacking instances into blocks and using GEMV for
+ * better performance.
+ * The block size will be 1.0 MB, if param maxBlockSizeInMB is set 0.0 by default.
  */
 @Since("1.6.0")
 class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: String)
@@ -184,55 +189,39 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S
   def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value)
 
   /**
-   * Set block size for stacking input data in matrices.
-   * If blockSize == 1, then stacking will be skipped, and each vector is treated individually;
-   * If blockSize &gt; 1, then vectors will be stacked to blocks, and high-level BLAS routines
-   * will be used if possible (for example, GEMV instead of DOT, GEMM instead of GEMV).
-   * Recommended size is between 10 and 1000. An appropriate choice of the block size depends
-   * on the sparsity and dim of input datasets, the underlying BLAS implementation (for example,
-   * f2jBLAS, OpenBLAS, intel MKL) and its configuration (for example, number of threads).
-   * Note that existing BLAS implementations are mainly optimized for dense matrices, if the
-   * input dataset is sparse, stacking may bring no performance gain, the worse is possible
-   * performance regression.
-   * Default is 1.
+   * Sets the value of param [[maxBlockSizeInMB]].
+   * Default is 0.0, then 1.0 MB will be chosen.
    *
    * @group expertSetParam
    */
   @Since("3.1.0")
-  def setBlockSize(value: Int): this.type = set(blockSize, value)
-
-  /**
-   * Extract [[featuresCol]], [[labelCol]] and [[censorCol]] from input dataset,
-   * and put it in an RDD with strong types.
-   */
-  protected[ml] def extractAFTPoints(dataset: Dataset[_]): RDD[AFTPoint] = {
-    dataset.select(col($(featuresCol)), col($(labelCol)).cast(DoubleType),
-      col($(censorCol)).cast(DoubleType)).rdd.map {
-        case Row(features: Vector, label: Double, censor: Double) =>
-          AFTPoint(features, label, censor)
-      }
-  }
+  def setMaxBlockSizeInMB(value: Double): this.type = set(maxBlockSizeInMB, value)
 
   override protected def train(
       dataset: Dataset[_]): AFTSurvivalRegressionModel = instrumented { instr =>
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
     instr.logParams(this, labelCol, featuresCol, censorCol, predictionCol, quantilesCol,
-      fitIntercept, maxIter, tol, aggregationDepth, blockSize)
+      fitIntercept, maxIter, tol, aggregationDepth, maxBlockSizeInMB)
     instr.logNamedValue("quantileProbabilities.size", $(quantileProbabilities).length)
 
-    val instances = extractAFTPoints(dataset)
-      .setName("training instances")
-
-    if ($(blockSize) == 1 && dataset.storageLevel == StorageLevel.NONE) {
-      instances.persist(StorageLevel.MEMORY_AND_DISK)
+    if (dataset.storageLevel != StorageLevel.NONE) {
+      instr.logWarning(s"Input instances will be standardized, blockified to blocks, and " +
+        s"then cached during training. Be careful of double caching!")
     }
 
-    var requestedMetrics = Seq("mean", "std", "count")
-    if ($(blockSize) != 1) requestedMetrics +:= "numNonZeros"
+    val instances = dataset.select(col($(featuresCol)), col($(labelCol)).cast(DoubleType),
+      col($(censorCol)).cast(DoubleType))
+      .rdd.map { case Row(features: Vector, label: Double, censor: Double) =>
+        require(censor == 1.0 || censor == 0.0, "censor must be 1.0 or 0.0")
+        // AFT does not support instance weighting,
+        // here use Instance.weight to store censor for convenience
+        Instance(label, censor, features)
+      }.setName("training instances")
+
     val summarizer = instances.treeAggregate(
-      Summarizer.createSummarizerBuffer(requestedMetrics: _*))(
-      seqOp = (c: SummarizerBuffer, v: AFTPoint) => c.add(v.features),
+      Summarizer.createSummarizerBuffer("mean", "std", "count"))(
+      seqOp = (c: SummarizerBuffer, i: Instance) => c.add(i.features),
       combOp = (c1: SummarizerBuffer, c2: SummarizerBuffer) => c1.merge(c2),
       depth = $(aggregationDepth)
     )
@@ -241,14 +230,12 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S
     val numFeatures = featuresStd.length
     instr.logNumFeatures(numFeatures)
     instr.logNumExamples(summarizer.count)
-    if ($(blockSize) > 1) {
-      val scale = 1.0 / summarizer.count / numFeatures
-      val sparsity = 1 - summarizer.numNonzeros.toArray.map(_ * scale).sum
-      instr.logNamedValue("sparsity", sparsity.toString)
-      if (sparsity > 0.5) {
-        instr.logWarning(s"sparsity of input dataset is $sparsity, " +
-          s"which may hurt performance in high-level BLAS.")
-      }
+
+    var actualBlockSizeInMB = $(maxBlockSizeInMB)
+    if (actualBlockSizeInMB == 0) {
+      actualBlockSizeInMB = InstanceBlock.DefaultBlockSizeInMB
+      require(actualBlockSizeInMB > 0, "inferred actual BlockSizeInMB must > 0")
+      instr.logNamedValue("actualBlockSizeInMB", actualBlockSizeInMB.toString)
     }
 
     if (!$(fitIntercept) && (0 until numFeatures).exists { i =>
@@ -268,12 +255,8 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S
      */
     val initialParameters = Vectors.zeros(numFeatures + 2)
 
-    val (rawCoefficients, objectiveHistory) = if ($(blockSize) == 1) {
-      trainOnRows(instances, featuresStd, optimizer, initialParameters)
-    } else {
-      trainOnBlocks(instances, featuresStd, optimizer, initialParameters)
-    }
-    if (instances.getStorageLevel != StorageLevel.NONE) instances.unpersist()
+    val (rawCoefficients, objectiveHistory) =
+      trainImpl(instances, actualBlockSizeInMB, featuresStd, optimizer, initialParameters)
 
     if (rawCoefficients == null) {
       val msg = s"${optimizer.getClass.getName} failed."
@@ -290,47 +273,24 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S
     new AFTSurvivalRegressionModel(uid, coefficients, intercept, scale)
   }
 
-  private def trainOnRows(
-      instances: RDD[AFTPoint],
+  private def trainImpl(
+      instances: RDD[Instance],
+      actualBlockSizeInMB: Double,
       featuresStd: Array[Double],
       optimizer: BreezeLBFGS[BDV[Double]],
       initialParameters: Vector): (Array[Double], Array[Double]) = {
     val bcFeaturesStd = instances.context.broadcast(featuresStd)
-    val getAggregatorFunc = new AFTAggregator(bcFeaturesStd, $(fitIntercept))(_)
-    val costFun = new RDDLossFunction(instances, getAggregatorFunc, None, $(aggregationDepth))
-
-    val states = optimizer.iterations(new CachedDiffFunction(costFun),
-      initialParameters.asBreeze.toDenseVector)
-
-    val arrayBuilder = mutable.ArrayBuilder.make[Double]
-    var state: optimizer.State = null
-    while (states.hasNext) {
-      state = states.next()
-      arrayBuilder += state.adjustedValue
-    }
-    bcFeaturesStd.destroy()
-
-    (if (state != null) state.x.toArray else null, arrayBuilder.result)
-  }
 
-  private def trainOnBlocks(
-      instances: RDD[AFTPoint],
-      featuresStd: Array[Double],
-      optimizer: BreezeLBFGS[BDV[Double]],
-      initialParameters: Vector): (Array[Double], Array[Double]) = {
-    val bcFeaturesStd = instances.context.broadcast(featuresStd)
-    val blocks = instances.mapPartitions { iter =>
+    val standardized = instances.mapPartitions { iter =>
       val inverseStd = bcFeaturesStd.value.map { std => if (std != 0) 1.0 / std else 0.0 }
       val func = StandardScalerModel.getTransformFunc(Array.empty, inverseStd, false, true)
-      iter.grouped($(blockSize)).map { seq =>
-        val matrix = Matrices.fromVectors(seq.map(point => func(point.features)))
-        val labels = seq.map(_.label).toArray
-        val censors = seq.map(_.censor).toArray
-        (matrix, labels, censors)
-      }
+      iter.map { case Instance(label, weight, vec) => Instance(label, weight, func(vec)) }
     }
-    blocks.persist(StorageLevel.MEMORY_AND_DISK)
-      .setName(s"training blocks (blockSize=${$(blockSize)})")
+
+    val maxMemUsage = (actualBlockSizeInMB * 1024L * 1024L).ceil.toLong
+    val blocks = InstanceBlock.blokifyWithMaxMemUsage(standardized, maxMemUsage)
+      .persist(StorageLevel.MEMORY_AND_DISK)
+      .setName(s"training blocks (blockSizeInMB=$actualBlockSizeInMB)")
 
     val getAggregatorFunc = new BlockAFTAggregator($(fitIntercept))(_)
     val costFun = new RDDLossFunction(blocks, getAggregatorFunc, None, $(aggregationDepth))
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 235a7f9b6ebd5..11a1984b0ab4c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -56,7 +56,7 @@ import org.apache.spark.util.VersionUtils.majorMinorVersion
 private[regression] trait LinearRegressionParams extends PredictorParams
     with HasRegParam with HasElasticNetParam with HasMaxIter with HasTol
     with HasFitIntercept with HasStandardization with HasWeightCol with HasSolver
-    with HasAggregationDepth with HasLoss with HasBlockSize {
+    with HasAggregationDepth with HasLoss with HasMaxBlockSizeInMB {
 
   import LinearRegression._
 
@@ -107,7 +107,7 @@ private[regression] trait LinearRegressionParams extends PredictorParams
 
   setDefault(regParam -> 0.0, fitIntercept -> true, standardization -> true,
     elasticNetParam -> 0.0, maxIter -> 100, tol -> 1E-6, solver -> Auto,
-    aggregationDepth -> 2, loss -> SquaredError, epsilon -> 1.35, blockSize -> 1)
+    aggregationDepth -> 2, loss -> SquaredError, epsilon -> 1.35, maxBlockSizeInMB -> 0.0)
 
   override protected def validateAndTransformSchema(
       schema: StructType,
@@ -175,6 +175,10 @@ private[regression] trait LinearRegressionParams extends PredictorParams
  *   $$
  * </blockquote>
  *
+ * Since 3.1.0, it supports stacking instances into blocks and using GEMV for
+ * better performance.
+ * The block size will be 1.0 MB, if param maxBlockSizeInMB is set 0.0 by default.
+ *
  * Note: Fitting with huber loss only supports none and L2 regularization.
  */
 @Since("1.3.0")
@@ -312,29 +316,26 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
   def setEpsilon(value: Double): this.type = set(epsilon, value)
 
   /**
-   * Set block size for stacking input data in matrices.
-   * If blockSize == 1, then stacking will be skipped, and each vector is treated individually;
-   * If blockSize &gt; 1, then vectors will be stacked to blocks, and high-level BLAS routines
-   * will be used if possible (for example, GEMV instead of DOT, GEMM instead of GEMV).
-   * Recommended size is between 10 and 1000. An appropriate choice of the block size depends
-   * on the sparsity and dim of input datasets, the underlying BLAS implementation (for example,
-   * f2jBLAS, OpenBLAS, intel MKL) and its configuration (for example, number of threads).
-   * Note that existing BLAS implementations are mainly optimized for dense matrices, if the
-   * input dataset is sparse, stacking may bring no performance gain, the worse is possible
-   * performance regression.
-   * Default is 1.
+   * Sets the value of param [[maxBlockSizeInMB]].
+   * Default is 0.0, then 1.0 MB will be chosen.
    *
    * @group expertSetParam
    */
   @Since("3.1.0")
-  def setBlockSize(value: Int): this.type = set(blockSize, value)
+  def setMaxBlockSizeInMB(value: Double): this.type = set(maxBlockSizeInMB, value)
 
-  override protected def train(dataset: Dataset[_]): LinearRegressionModel = instrumented { instr =>
+  override protected def train(
+      dataset: Dataset[_]): LinearRegressionModel = instrumented { instr =>
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
     instr.logParams(this, labelCol, featuresCol, weightCol, predictionCol, solver, tol,
       elasticNetParam, fitIntercept, maxIter, regParam, standardization, aggregationDepth, loss,
-      epsilon, blockSize)
+      epsilon, maxBlockSizeInMB)
+
+    if (dataset.storageLevel != StorageLevel.NONE) {
+      instr.logWarning(s"Input instances will be standardized, blockified to blocks, and " +
+        s"then cached during training. Be careful of double caching!")
+    }
 
     // Extract the number of features before deciding optimization solver.
     val numFeatures = MetadataUtils.getNumFeatures(dataset, $(featuresCol))
@@ -348,35 +349,26 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
     val instances = extractInstances(dataset)
       .setName("training instances")
 
-    if (dataset.storageLevel == StorageLevel.NONE && $(blockSize) == 1) {
-      instances.persist(StorageLevel.MEMORY_AND_DISK)
-    }
+    val (summarizer, labelSummarizer) = Summarizer
+      .getRegressionSummarizers(instances, $(aggregationDepth), Seq("mean", "std", "count"))
 
-    var requestedMetrics = Seq("mean", "std", "count")
-    if ($(blockSize) != 1) requestedMetrics +:= "numNonZeros"
-    val (featuresSummarizer, ySummarizer) = Summarizer
-      .getRegressionSummarizers(instances, $(aggregationDepth), requestedMetrics)
+    val yMean = labelSummarizer.mean(0)
+    val rawYStd = labelSummarizer.std(0)
 
-    val yMean = ySummarizer.mean(0)
-    val rawYStd = ySummarizer.std(0)
-
-    instr.logNumExamples(ySummarizer.count)
+    instr.logNumExamples(labelSummarizer.count)
     instr.logNamedValue(Instrumentation.loggerTags.meanOfLabels, yMean)
     instr.logNamedValue(Instrumentation.loggerTags.varianceOfLabels, rawYStd)
-    instr.logSumOfWeights(featuresSummarizer.weightSum)
-    if ($(blockSize) > 1) {
-      val scale = 1.0 / featuresSummarizer.count / numFeatures
-      val sparsity = 1 - featuresSummarizer.numNonzeros.toArray.map(_ * scale).sum
-      instr.logNamedValue("sparsity", sparsity.toString)
-      if (sparsity > 0.5) {
-        instr.logWarning(s"sparsity of input dataset is $sparsity, " +
-          s"which may hurt performance in high-level BLAS.")
-      }
+    instr.logSumOfWeights(summarizer.weightSum)
+
+    var actualBlockSizeInMB = $(maxBlockSizeInMB)
+    if (actualBlockSizeInMB == 0) {
+      actualBlockSizeInMB = InstanceBlock.DefaultBlockSizeInMB
+      require(actualBlockSizeInMB > 0, "inferred actual BlockSizeInMB must > 0")
+      instr.logNamedValue("actualBlockSizeInMB", actualBlockSizeInMB.toString)
     }
 
     if (rawYStd == 0.0) {
       if ($(fitIntercept) || yMean == 0.0) {
-        if (instances.getStorageLevel != StorageLevel.NONE) instances.unpersist()
         return trainWithConstantLabel(dataset, instr, numFeatures, yMean)
       } else {
         require($(regParam) == 0.0, "The standard deviation of the label is zero. " +
@@ -389,8 +381,8 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
     // if y is constant (rawYStd is zero), then y cannot be scaled. In this case
     // setting yStd=abs(yMean) ensures that y is not scaled anymore in l-bfgs algorithm.
     val yStd = if (rawYStd > 0) rawYStd else math.abs(yMean)
-    val featuresMean = featuresSummarizer.mean.toArray
-    val featuresStd = featuresSummarizer.std.toArray
+    val featuresMean = summarizer.mean.toArray
+    val featuresStd = summarizer.std.toArray
 
     if (!$(fitIntercept) && (0 until numFeatures).exists { i =>
       featuresStd(i) == 0.0 && featuresMean(i) != 0.0 }) {
@@ -426,14 +418,9 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
         Vectors.dense(Array.fill(dim)(1.0))
     }
 
-    val (parameters, objectiveHistory) = if ($(blockSize) == 1) {
-      trainOnRows(instances, yMean, yStd, featuresMean, featuresStd,
-        initialValues, regularization, optimizer)
-    } else {
-      trainOnBlocks(instances, yMean, yStd, featuresMean, featuresStd,
-        initialValues, regularization, optimizer)
-    }
-    if (instances.getStorageLevel != StorageLevel.NONE) instances.unpersist()
+    val (parameters, objectiveHistory) =
+      trainImpl(instances, actualBlockSizeInMB, yMean, yStd,
+        featuresMean, featuresStd, initialValues, regularization, optimizer)
 
     if (parameters == null) {
       val msg = s"${optimizer.getClass.getName} failed."
@@ -541,56 +528,9 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
     }
   }
 
-  private def trainOnRows(
-      instances: RDD[Instance],
-      yMean: Double,
-      yStd: Double,
-      featuresMean: Array[Double],
-      featuresStd: Array[Double],
-      initialValues: Vector,
-      regularization: Option[L2Regularization],
-      optimizer: FirstOrderMinimizer[BDV[Double], DiffFunction[BDV[Double]]]) = {
-    val bcFeaturesMean = instances.context.broadcast(featuresMean)
-    val bcFeaturesStd = instances.context.broadcast(featuresStd)
-
-    val costFun = $(loss) match {
-      case SquaredError =>
-        val getAggregatorFunc = new LeastSquaresAggregator(yStd, yMean, $(fitIntercept),
-          bcFeaturesStd, bcFeaturesMean)(_)
-        new RDDLossFunction(instances, getAggregatorFunc, regularization, $(aggregationDepth))
-      case Huber =>
-        val getAggregatorFunc = new HuberAggregator($(fitIntercept), $(epsilon), bcFeaturesStd)(_)
-        new RDDLossFunction(instances, getAggregatorFunc, regularization, $(aggregationDepth))
-    }
-
-    val states = optimizer.iterations(new CachedDiffFunction(costFun),
-      initialValues.asBreeze.toDenseVector)
-
-    /*
-       Note that in Linear Regression, the objective history (loss + regularization) returned
-       from optimizer is computed in the scaled space given by the following formula.
-       <blockquote>
-          $$
-          L &= 1/2n||\sum_i w_i(x_i - \bar{x_i}) / \hat{x_i} - (y - \bar{y}) / \hat{y}||^2
-               + regTerms \\
-          $$
-       </blockquote>
-     */
-    val arrayBuilder = mutable.ArrayBuilder.make[Double]
-    var state: optimizer.State = null
-    while (states.hasNext) {
-      state = states.next()
-      arrayBuilder += state.adjustedValue
-    }
-
-    bcFeaturesMean.destroy()
-    bcFeaturesStd.destroy()
-
-    (if (state == null) null else state.x.toArray, arrayBuilder.result)
-  }
-
-  private def trainOnBlocks(
+  private def trainImpl(
       instances: RDD[Instance],
+      actualBlockSizeInMB: Double,
       yMean: Double,
       yStd: Double,
       featuresMean: Array[Double],
@@ -606,9 +546,11 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
       val func = StandardScalerModel.getTransformFunc(Array.empty, inverseStd, false, true)
       iter.map { case Instance(label, weight, vec) => Instance(label, weight, func(vec)) }
     }
-    val blocks = InstanceBlock.blokify(standardized, $(blockSize))
+
+    val maxMemUsage = (actualBlockSizeInMB * 1024L * 1024L).ceil.toLong
+    val blocks = InstanceBlock.blokifyWithMaxMemUsage(standardized, maxMemUsage)
       .persist(StorageLevel.MEMORY_AND_DISK)
-      .setName(s"training blocks (blockSize=${$(blockSize)})")
+      .setName(s"training blocks (blockSizeInMB=$actualBlockSizeInMB)")
 
     val costFun = $(loss) match {
       case SquaredError =>
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index 21eb17dfaacb3..75262ac4fe06b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -30,7 +30,6 @@ import org.apache.spark.mllib.regression._
 import org.apache.spark.mllib.util.{DataValidators, Loader, Saveable}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.storage.StorageLevel
 
 /**
  * Classification model trained using Multinomial/Binary Logistic Regression.
@@ -339,10 +338,8 @@ class LogisticRegressionWithLBFGS
         // Convert our input into a DataFrame
         val spark = SparkSession.builder().sparkContext(input.context).getOrCreate()
         val df = spark.createDataFrame(input.map(_.asML))
-        // Determine if we should cache the DF
-        val handlePersistence = input.getStorageLevel == StorageLevel.NONE
         // Train our model
-        val mlLogisticRegressionModel = lr.train(df, handlePersistence)
+        val mlLogisticRegressionModel = lr.train(df)
         // convert the model
         val weights = Vectors.dense(mlLogisticRegressionModel.coefficients.toArray)
         createModel(weights, mlLogisticRegressionModel.intercept)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 51a6ae3c7e49b..d0b282db1ece8 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -593,8 +593,8 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
         .setMaxIter(5)
         .setFamily("multinomial")
       val model = mlor.fit(dataset)
-      Seq(4, 16, 64).foreach { blockSize =>
-        val model2 = mlor.setBlockSize(blockSize).fit(dataset)
+      Seq(0, 0.01, 0.1, 1, 2, 4).foreach { s =>
+        val model2 = mlor.setMaxBlockSizeInMB(s).fit(dataset)
         assert(model.interceptVector ~== model2.interceptVector relTol 1e-6)
         assert(model.coefficientMatrix ~== model2.coefficientMatrix relTol 1e-6)
       }
@@ -606,8 +606,8 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
         .setMaxIter(5)
         .setFamily("binomial")
       val model = blor.fit(dataset)
-      Seq(4, 16, 64).foreach { blockSize =>
-        val model2 = blor.setBlockSize(blockSize).fit(dataset)
+      Seq(0, 0.01, 0.1, 1, 2, 4).foreach { s =>
+        val model2 = blor.setMaxBlockSizeInMB(s).fit(dataset)
         assert(model.intercept ~== model2.intercept relTol 1e-6)
         assert(model.coefficients ~== model2.coefficients relTol 1e-6)
       }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/fpm/PrefixSpanSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/fpm/PrefixSpanSuite.scala
index 2252151af306b..cc8982f338702 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/fpm/PrefixSpanSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/fpm/PrefixSpanSuite.scala
@@ -17,7 +17,6 @@
 package org.apache.spark.ml.fpm
 
 import org.apache.spark.ml.util.MLTest
-import org.apache.spark.sql.DataFrame
 
 class PrefixSpanSuite extends MLTest {
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
index 63ccfa3834624..e745e7f67df98 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
@@ -436,8 +436,8 @@ class AFTSurvivalRegressionSuite extends MLTest with DefaultReadWriteTest {
         .setQuantileProbabilities(quantileProbabilities)
         .setQuantilesCol("quantiles")
       val model = aft.fit(dataset)
-      Seq(4, 16, 64).foreach { blockSize =>
-        val model2 = aft.setBlockSize(blockSize).fit(dataset)
+      Seq(0, 0.01, 0.1, 1, 2, 4).foreach { s =>
+        val model2 = aft.setMaxBlockSizeInMB(s).fit(dataset)
         assert(model.coefficients ~== model2.coefficients relTol 1e-9)
         assert(model.intercept ~== model2.intercept relTol 1e-9)
         assert(model.scale ~== model2.scale relTol 1e-9)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
index a30c47293c543..a0e17a4b40fd2 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
@@ -507,8 +507,6 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
 
     val residualDeviancesR = Array(3.809296, 3.70055)
 
-    import GeneralizedLinearRegression._
-
     var idx = 0
     val link = "log"
     val dataset = datasetPoissonLogWithZero
@@ -790,8 +788,6 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
     val expected = Seq(0.5108256, 0.1201443, 1.600000, 1.886792, 0.625, 0.530,
       -0.4700036, -0.6348783, 1.325782, 1.463641)
 
-    import GeneralizedLinearRegression._
-
     var idx = 0
     for (family <- GeneralizedLinearRegression.supportedFamilyNames.sortWith(_ < _)) {
       for (useWeight <- Seq(false, true)) {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
index fb70883bffc5f..b3098be0a36fb 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
@@ -672,8 +672,8 @@ class LinearRegressionSuite extends MLTest with DefaultReadWriteTest with PMMLRe
         .setLoss(loss)
         .setMaxIter(3)
       val model = lir.fit(dataset)
-      Seq(4, 16, 64).foreach { blockSize =>
-        val model2 = lir.setBlockSize(blockSize).fit(dataset)
+      Seq(0, 0.01, 0.1, 1, 2, 4).foreach { s =>
+        val model2 = lir.setMaxBlockSizeInMB(s).fit(dataset)
         assert(model.intercept ~== model2.intercept relTol 1e-9)
         assert(model.coefficients ~== model2.coefficients relTol 1e-9)
         assert(model.scale ~== model2.scale relTol 1e-9)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/stat/SummarizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/stat/SummarizerSuite.scala
index 68ba57c0d5fc8..e438a4135908e 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/stat/SummarizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/stat/SummarizerSuite.scala
@@ -29,7 +29,6 @@ class SummarizerSuite extends SparkFunSuite with MLlibTestSparkContext {
 
   import testImplicits._
   import Summarizer._
-  import SummaryBuilderImpl._
 
   private case class ExpectedMetrics(
       mean: Vector,
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala
index 2a83d0aaf9699..3ca6816ce7c0d 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala
@@ -19,7 +19,6 @@ package org.apache.spark.ml.tree.impl
 
 import scala.annotation.tailrec
 import scala.collection.mutable
-import scala.language.implicitConversions
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.classification.DecisionTreeClassificationModel
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/DefaultReadWriteTest.scala b/mllib/src/test/scala/org/apache/spark/ml/util/DefaultReadWriteTest.scala
index dd0139b94f098..c5bf202a2d337 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/util/DefaultReadWriteTest.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/util/DefaultReadWriteTest.scala
@@ -19,7 +19,6 @@ package org.apache.spark.ml.util
 
 import java.io.{File, IOException}
 
-import org.json4s.JNothing
 import org.scalatest.Suite
 
 import org.apache.spark.{SparkException, SparkFunSuite}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/PMMLReadWriteTest.scala b/mllib/src/test/scala/org/apache/spark/ml/util/PMMLReadWriteTest.scala
index d2c4832b12bac..19e9fe4bdb30e 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/util/PMMLReadWriteTest.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/util/PMMLReadWriteTest.scala
@@ -23,10 +23,7 @@ import org.dmg.pmml.PMML
 import org.scalatest.Suite
 
 import org.apache.spark.SparkContext
-import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.param._
-import org.apache.spark.mllib.util.MLlibTestSparkContext
-import org.apache.spark.sql.Dataset
 
 trait PMMLReadWriteTest extends TempDirectory { self: Suite =>
   /**
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
index 56d41403f74cc..8f311bbf9f840 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
@@ -20,7 +20,6 @@ package org.apache.spark.mllib.clustering
 import java.util.{ArrayList => JArrayList}
 
 import breeze.linalg.{argmax, argtopk, max, DenseMatrix => BDM}
-import org.scalatest.Assertions
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.graphx.Edge
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
index 9d7177e0a149e..0e789821aa5f3 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
@@ -22,7 +22,7 @@ import java.{util => ju}
 import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, SparseVector => BSV}
 
 import org.apache.spark.{SparkException, SparkFunSuite}
-import org.apache.spark.mllib.linalg.{DenseMatrix, DenseVector, Matrices, Matrix, SparseMatrix, SparseVector, Vectors}
+import org.apache.spark.mllib.linalg.{DenseMatrix, Matrices, Matrix, SparseMatrix}
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.util.TestingUtils._
 
diff --git a/pom.xml b/pom.xml
index 25c6da7100056..3ae2e7420e154 100644
--- a/pom.xml
+++ b/pom.xml
@@ -164,6 +164,7 @@
     <commons.collections.version>3.2.2</commons.collections.version>
     <scala.version>2.12.10</scala.version>
     <scala.binary.version>2.12</scala.binary.version>
+    <scalac.arg.unused-imports>-Ywarn-unused-import</scalac.arg.unused-imports>
     <scalatest-maven-plugin.version>2.0.0</scalatest-maven-plugin.version>
     <scalafmt.parameters>--test</scalafmt.parameters>
     <!-- for now, not running scalafmt as part of default verify pipeline -->
@@ -189,7 +190,7 @@
     <jodd.version>3.5.2</jodd.version>
     <jsr305.version>3.0.0</jsr305.version>
     <libthrift.version>0.12.0</libthrift.version>
-    <antlr4.version>4.7.1</antlr4.version>
+    <antlr4.version>4.8-1</antlr4.version>
     <jpam.version>1.1</jpam.version>
     <selenium.version>3.141.59</selenium.version>
     <htmlunit.version>2.40.0</htmlunit.version>
@@ -2537,6 +2538,7 @@
               <arg>-deprecation</arg>
               <arg>-feature</arg>
               <arg>-explaintypes</arg>
+              <arg>${scalac.arg.unused-imports}</arg>
               <arg>-target:jvm-1.8</arg>
             </args>
             <jvmArgs>
@@ -3266,6 +3268,7 @@
       <properties>
         <scala.version>2.13.3</scala.version>
         <scala.binary.version>2.13</scala.binary.version>
+        <scalac.arg.unused-imports>-Wconf:cat=unused-imports:e</scalac.arg.unused-imports>
       </properties>
       <dependencyManagement>
         <dependencies>
diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst
index 4039698d39958..9c9ff7fa7844b 100644
--- a/python/docs/source/getting_started/install.rst
+++ b/python/docs/source/getting_started/install.rst
@@ -48,7 +48,7 @@ If you want to install extra dependencies for a specific componenet, you can ins
 
     pip install pyspark[sql]
 
-For PySpark with a different Hadoop version, you can install it by using ``HADOOP_VERSION`` environment variables as below:
+For PySpark with/without a specific Hadoop version, you can install it by using ``HADOOP_VERSION`` environment variables as below:
 
 .. code-block:: bash
 
@@ -68,8 +68,13 @@ It is recommended to use ``-v`` option in ``pip`` to track the installation and
 
     HADOOP_VERSION=2.7 pip install pyspark -v
 
-Supported versions of Hadoop are ``HADOOP_VERSION=2.7`` and ``HADOOP_VERSION=3.2`` (default).
-Note that this installation of PySpark with a different version of Hadoop is experimental. It can change or be removed between minor releases.
+Supported values in ``HADOOP_VERSION`` are:
+
+- ``without``: Spark pre-built with user-provided Apache Hadoop
+- ``2.7``: Spark pre-built for Apache Hadoop 2.7
+- ``3.2``: Spark pre-built for Apache Hadoop 3.2 and later (default)
+
+Note that this installation way of PySpark with/without a specific Hadoop version is experimental. It can change or be removed between minor releases.
 
 
 Using Conda
diff --git a/python/docs/source/user_guide/arrow_pandas.rst b/python/docs/source/user_guide/arrow_pandas.rst
index fe04315f87ad5..91d8155523391 100644
--- a/python/docs/source/user_guide/arrow_pandas.rst
+++ b/python/docs/source/user_guide/arrow_pandas.rst
@@ -341,8 +341,9 @@ Supported SQL Types
 
 .. currentmodule:: pyspark.sql.types
 
-Currently, all Spark SQL data types are supported by Arrow-based conversion except :class:`MapType`,
+Currently, all Spark SQL data types are supported by Arrow-based conversion except
 :class:`ArrayType` of :class:`TimestampType`, and nested :class:`StructType`.
+:class: `MapType` is only supported when using PyArrow 2.0.0 and above.
 
 Setting Arrow Batch Size
 ~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 8f13f3275cb5b..50882fc895d6c 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -783,7 +783,7 @@ class LinearSVCTrainingSummary(LinearSVCSummary, _TrainingSummary):
 class _LogisticRegressionParams(_ProbabilisticClassifierParams, HasRegParam,
                                 HasElasticNetParam, HasMaxIter, HasFitIntercept, HasTol,
                                 HasStandardization, HasWeightCol, HasAggregationDepth,
-                                HasThreshold, HasBlockSize):
+                                HasThreshold, HasMaxBlockSizeInMB):
     """
     Params for :py:class:`LogisticRegression` and :py:class:`LogisticRegressionModel`.
 
@@ -836,7 +836,7 @@ class _LogisticRegressionParams(_ProbabilisticClassifierParams, HasRegParam,
     def __init__(self, *args):
         super(_LogisticRegressionParams, self).__init__(*args)
         self._setDefault(maxIter=100, regParam=0.0, tol=1E-6, threshold=0.5, family="auto",
-                         blockSize=1)
+                         maxBlockSizeInMB=0.0)
 
     @since("1.4.0")
     def setThreshold(self, value):
@@ -980,8 +980,8 @@ class LogisticRegression(_JavaProbabilisticClassifier, _LogisticRegressionParams
     LogisticRegressionModel...
     >>> blorModel.getProbabilityCol()
     'newProbability'
-    >>> blorModel.getBlockSize()
-    1
+    >>> blorModel.getMaxBlockSizeInMB()
+    0.0
     >>> blorModel.setThreshold(0.1)
     LogisticRegressionModel...
     >>> blorModel.getThreshold()
@@ -1047,7 +1047,7 @@ def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="p
                  aggregationDepth=2, family="auto",
                  lowerBoundsOnCoefficients=None, upperBoundsOnCoefficients=None,
                  lowerBoundsOnIntercepts=None, upperBoundsOnIntercepts=None,
-                 blockSize=1):
+                 maxBlockSizeInMB=0.0):
 
         """
         __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
@@ -1057,7 +1057,7 @@ def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="p
                  aggregationDepth=2, family="auto", \
                  lowerBoundsOnCoefficients=None, upperBoundsOnCoefficients=None, \
                  lowerBoundsOnIntercepts=None, upperBoundsOnIntercepts=None, \
-                 blockSize=1):
+                 maxBlockSizeInMB=0.0):
         If the threshold and thresholds Params are both set, they must be equivalent.
         """
         super(LogisticRegression, self).__init__()
@@ -1076,7 +1076,7 @@ def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="
                   aggregationDepth=2, family="auto",
                   lowerBoundsOnCoefficients=None, upperBoundsOnCoefficients=None,
                   lowerBoundsOnIntercepts=None, upperBoundsOnIntercepts=None,
-                  blockSize=1):
+                  maxBlockSizeInMB=0.0):
         """
         setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
@@ -1085,7 +1085,7 @@ def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="
                   aggregationDepth=2, family="auto", \
                   lowerBoundsOnCoefficients=None, upperBoundsOnCoefficients=None, \
                   lowerBoundsOnIntercepts=None, upperBoundsOnIntercepts=None, \
-                  blockSize=1):
+                  maxBlockSizeInMB=0.0):
         Sets params for logistic regression.
         If the threshold and thresholds Params are both set, they must be equivalent.
         """
@@ -1181,11 +1181,11 @@ def setAggregationDepth(self, value):
         return self._set(aggregationDepth=value)
 
     @since("3.1.0")
-    def setBlockSize(self, value):
+    def setMaxBlockSizeInMB(self, value):
         """
-        Sets the value of :py:attr:`blockSize`.
+        Sets the value of :py:attr:`maxBlockSizeInMB`.
         """
-        return self._set(blockSize=value)
+        return self._set(maxBlockSizeInMB=value)
 
 
 class LogisticRegressionModel(_JavaProbabilisticClassificationModel, _LogisticRegressionParams,
diff --git a/python/pyspark/ml/classification.pyi b/python/pyspark/ml/classification.pyi
index 9f72d24f63117..4bde851bb1e0d 100644
--- a/python/pyspark/ml/classification.pyi
+++ b/python/pyspark/ml/classification.pyi
@@ -257,7 +257,7 @@ class _LogisticRegressionParams(
     HasWeightCol,
     HasAggregationDepth,
     HasThreshold,
-    HasBlockSize,
+    HasMaxBlockSizeInMB,
 ):
     threshold: Param[float]
     family: Param[str]
@@ -305,7 +305,7 @@ class LogisticRegression(
         upperBoundsOnCoefficients: Optional[Matrix] = ...,
         lowerBoundsOnIntercepts: Optional[Vector] = ...,
         upperBoundsOnIntercepts: Optional[Vector] = ...,
-        blockSize: int = ...
+        maxBlockSizeInMB: float = ...
     ) -> None: ...
     def setParams(
         self,
@@ -330,7 +330,7 @@ class LogisticRegression(
         upperBoundsOnCoefficients: Optional[Matrix] = ...,
         lowerBoundsOnIntercepts: Optional[Vector] = ...,
         upperBoundsOnIntercepts: Optional[Vector] = ...,
-        blockSize: int = ...
+        maxBlockSizeInMB: float = ...
     ) -> LogisticRegression: ...
     def setFamily(self, value: str) -> LogisticRegression: ...
     def setLowerBoundsOnCoefficients(self, value: Matrix) -> LogisticRegression: ...
@@ -345,7 +345,7 @@ class LogisticRegression(
     def setStandardization(self, value: bool) -> LogisticRegression: ...
     def setWeightCol(self, value: str) -> LogisticRegression: ...
     def setAggregationDepth(self, value: int) -> LogisticRegression: ...
-    def setBlockSize(self, value: int) -> LogisticRegression: ...
+    def setMaxBlockSizeInMB(self, value: float) -> LogisticRegression: ...
 
 class LogisticRegressionModel(
     _JavaProbabilisticClassificationModel[Vector],
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index 53d26972c4b4a..bcab51f76bd49 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -168,8 +168,8 @@ def get$Name(self):
          "adjusted to the size of this data.", None, "TypeConverters.toInt"),
         ("maxBlockSizeInMB", "maximum memory in MB for stacking input data into blocks. Data is " +
          "stacked within partitions. If more than remaining data size in a partition then it " +
-         "is adjusted to the data size. If 0, try to infer an appropriate value. Must be >= 0.",
-         "0.0", "TypeConverters.toFloat")]
+         "is adjusted to the data size. Default 0.0 represents choosing optimal value, depends " +
+         "on specific algorithm. Must be >= 0.", "0.0", "TypeConverters.toFloat")]
 
     code = []
     for name, doc, defaultValueStr, typeConverter in shared:
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index cbef7386e2214..9311e4481e2b4 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -601,10 +601,10 @@ def getBlockSize(self):
 
 class HasMaxBlockSizeInMB(Params):
     """
-    Mixin for param maxBlockSizeInMB: maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value. Must be >= 0.
+    Mixin for param maxBlockSizeInMB: maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. Default 0.0 represents choosing optimal value, depends on specific algorithm. Must be >= 0.
     """
 
-    maxBlockSizeInMB = Param(Params._dummy(), "maxBlockSizeInMB", "maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value. Must be >= 0.", typeConverter=TypeConverters.toFloat)
+    maxBlockSizeInMB = Param(Params._dummy(), "maxBlockSizeInMB", "maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. Default 0.0 represents choosing optimal value, depends on specific algorithm. Must be >= 0.", typeConverter=TypeConverters.toFloat)
 
     def __init__(self):
         super(HasMaxBlockSizeInMB, self).__init__()
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index d1a5852fd65bd..5ce484d964a5a 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -24,7 +24,7 @@
 from pyspark.ml.base import _PredictorParams
 from pyspark.ml.param.shared import HasFeaturesCol, HasLabelCol, HasPredictionCol, HasWeightCol, \
     Param, Params, TypeConverters, HasMaxIter, HasTol, HasFitIntercept, HasAggregationDepth, \
-    HasBlockSize, HasRegParam, HasSolver, HasStepSize, HasSeed, HasElasticNetParam, \
+    HasMaxBlockSizeInMB, HasRegParam, HasSolver, HasStepSize, HasSeed, HasElasticNetParam, \
     HasStandardization, HasLoss, HasVarianceCol
 from pyspark.ml.tree import _DecisionTreeModel, _DecisionTreeParams, \
     _TreeEnsembleModel, _RandomForestParams, _GBTParams, _TreeRegressorParams
@@ -87,7 +87,7 @@ class _JavaRegressionModel(RegressionModel, JavaPredictionModel, metaclass=ABCMe
 
 class _LinearRegressionParams(_PredictorParams, HasRegParam, HasElasticNetParam, HasMaxIter,
                               HasTol, HasFitIntercept, HasStandardization, HasWeightCol, HasSolver,
-                              HasAggregationDepth, HasLoss, HasBlockSize):
+                              HasAggregationDepth, HasLoss, HasMaxBlockSizeInMB):
     """
     Params for :py:class:`LinearRegression` and :py:class:`LinearRegressionModel`.
 
@@ -107,7 +107,7 @@ class _LinearRegressionParams(_PredictorParams, HasRegParam, HasElasticNetParam,
     def __init__(self, *args):
         super(_LinearRegressionParams, self).__init__(*args)
         self._setDefault(maxIter=100, regParam=0.0, tol=1e-6, loss="squaredError", epsilon=1.35,
-                         blockSize=1)
+                         maxBlockSizeInMB=0.0)
 
     @since("2.3.0")
     def getEpsilon(self):
@@ -166,8 +166,8 @@ class LinearRegression(_JavaRegressor, _LinearRegressionParams, JavaMLWritable,
     LinearRegressionModel...
     >>> model.getMaxIter()
     5
-    >>> model.getBlockSize()
-    1
+    >>> model.getMaxBlockSizeInMB()
+    0.0
     >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
     >>> abs(model.predict(test0.head().features) - (-1.0)) < 0.001
     True
@@ -207,12 +207,12 @@ class LinearRegression(_JavaRegressor, _LinearRegressionParams, JavaMLWritable,
     def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                  maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
                  standardization=True, solver="auto", weightCol=None, aggregationDepth=2,
-                 loss="squaredError", epsilon=1.35, blockSize=1):
+                 loss="squaredError", epsilon=1.35, maxBlockSizeInMB=0.0):
         """
         __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
                  standardization=True, solver="auto", weightCol=None, aggregationDepth=2, \
-                 loss="squaredError", epsilon=1.35, blockSize=1)
+                 loss="squaredError", epsilon=1.35, maxBlockSizeInMB=0.0)
         """
         super(LinearRegression, self).__init__()
         self._java_obj = self._new_java_obj(
@@ -225,12 +225,12 @@ def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="p
     def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                   maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
                   standardization=True, solver="auto", weightCol=None, aggregationDepth=2,
-                  loss="squaredError", epsilon=1.35, blockSize=1):
+                  loss="squaredError", epsilon=1.35, maxBlockSizeInMB=0.0):
         """
         setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
                   standardization=True, solver="auto", weightCol=None, aggregationDepth=2, \
-                  loss="squaredError", epsilon=1.35, blockSize=1)
+                  loss="squaredError", epsilon=1.35, maxBlockSizeInMB=0.0)
         Sets params for linear regression.
         """
         kwargs = self._input_kwargs
@@ -307,11 +307,11 @@ def setLoss(self, value):
         return self._set(lossType=value)
 
     @since("3.1.0")
-    def setBlockSize(self, value):
+    def setMaxBlockSizeInMB(self, value):
         """
-        Sets the value of :py:attr:`blockSize`.
+        Sets the value of :py:attr:`maxBlockSizeInMB`.
         """
-        return self._set(blockSize=value)
+        return self._set(maxBlockSizeInMB=value)
 
 
 class LinearRegressionModel(_JavaRegressionModel, _LinearRegressionParams, GeneralJavaMLWritable,
@@ -1683,7 +1683,7 @@ def evaluateEachIteration(self, dataset, loss):
 
 
 class _AFTSurvivalRegressionParams(_PredictorParams, HasMaxIter, HasTol, HasFitIntercept,
-                                   HasAggregationDepth, HasBlockSize):
+                                   HasAggregationDepth, HasMaxBlockSizeInMB):
     """
     Params for :py:class:`AFTSurvivalRegression` and :py:class:`AFTSurvivalRegressionModel`.
 
@@ -1710,7 +1710,7 @@ def __init__(self, *args):
         super(_AFTSurvivalRegressionParams, self).__init__(*args)
         self._setDefault(censorCol="censor",
                          quantileProbabilities=[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99],
-                         maxIter=100, tol=1E-6, blockSize=1)
+                         maxIter=100, tol=1E-6, maxBlockSizeInMB=0.0)
 
     @since("1.6.0")
     def getCensorCol(self):
@@ -1762,8 +1762,8 @@ class AFTSurvivalRegression(_JavaRegressor, _AFTSurvivalRegressionParams,
     10
     >>> aftsr.clear(aftsr.maxIter)
     >>> model = aftsr.fit(df)
-    >>> model.getBlockSize()
-    1
+    >>> model.getMaxBlockSizeInMB()
+    0.0
     >>> model.setFeaturesCol("features")
     AFTSurvivalRegressionModel...
     >>> model.predict(Vectors.dense(6.3))
@@ -1802,12 +1802,12 @@ class AFTSurvivalRegression(_JavaRegressor, _AFTSurvivalRegressionParams,
     def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                  fitIntercept=True, maxIter=100, tol=1E-6, censorCol="censor",
                  quantileProbabilities=list([0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]),
-                 quantilesCol=None, aggregationDepth=2, blockSize=1):
+                 quantilesCol=None, aggregationDepth=2, maxBlockSizeInMB=0.0):
         """
         __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  fitIntercept=True, maxIter=100, tol=1E-6, censorCol="censor", \
                  quantileProbabilities=[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99], \
-                 quantilesCol=None, aggregationDepth=2, blockSize=1)
+                 quantilesCol=None, aggregationDepth=2, maxBlockSizeInMB=0.0)
         """
         super(AFTSurvivalRegression, self).__init__()
         self._java_obj = self._new_java_obj(
@@ -1820,12 +1820,12 @@ def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="p
     def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                   fitIntercept=True, maxIter=100, tol=1E-6, censorCol="censor",
                   quantileProbabilities=list([0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]),
-                  quantilesCol=None, aggregationDepth=2, blockSize=1):
+                  quantilesCol=None, aggregationDepth=2, maxBlockSizeInMB=0.0):
         """
         setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   fitIntercept=True, maxIter=100, tol=1E-6, censorCol="censor", \
                   quantileProbabilities=[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99], \
-                  quantilesCol=None, aggregationDepth=2, blockSize=1):
+                  quantilesCol=None, aggregationDepth=2, maxBlockSizeInMB=0.0):
         """
         kwargs = self._input_kwargs
         return self._set(**kwargs)
@@ -1883,11 +1883,11 @@ def setAggregationDepth(self, value):
         return self._set(aggregationDepth=value)
 
     @since("3.1.0")
-    def setBlockSize(self, value):
+    def setMaxBlockSizeInMB(self, value):
         """
-        Sets the value of :py:attr:`blockSize`.
+        Sets the value of :py:attr:`maxBlockSizeInMB`.
         """
-        return self._set(blockSize=value)
+        return self._set(maxBlockSizeInMB=value)
 
 
 class AFTSurvivalRegressionModel(_JavaRegressionModel, _AFTSurvivalRegressionParams,
diff --git a/python/pyspark/ml/regression.pyi b/python/pyspark/ml/regression.pyi
index 991eb4f12ac85..5cb0e7a5092f7 100644
--- a/python/pyspark/ml/regression.pyi
+++ b/python/pyspark/ml/regression.pyi
@@ -24,7 +24,7 @@ from pyspark.ml import PredictionModel, Predictor
 from pyspark.ml.base import _PredictorParams
 from pyspark.ml.param.shared import (
     HasAggregationDepth,
-    HasBlockSize,
+    HasMaxBlockSizeInMB,
     HasElasticNetParam,
     HasFeaturesCol,
     HasFitIntercept,
@@ -86,7 +86,7 @@ class _LinearRegressionParams(
     HasSolver,
     HasAggregationDepth,
     HasLoss,
-    HasBlockSize,
+    HasMaxBlockSizeInMB,
 ):
     solver: Param[str]
     loss: Param[str]
@@ -116,7 +116,7 @@ class LinearRegression(
         weightCol: Optional[str] = ...,
         aggregationDepth: int = ...,
         epsilon: float = ...,
-        blockSize: int = ...
+        maxBlockSizeInMB: float = ...
     ) -> None: ...
     def setParams(
         self,
@@ -134,7 +134,7 @@ class LinearRegression(
         weightCol: Optional[str] = ...,
         aggregationDepth: int = ...,
         epsilon: float = ...,
-        blockSize: int = ...
+        maxBlockSizeInMB: float = ...
     ) -> LinearRegression: ...
     def setEpsilon(self, value: float) -> LinearRegression: ...
     def setMaxIter(self, value: int) -> LinearRegression: ...
@@ -147,7 +147,7 @@ class LinearRegression(
     def setSolver(self, value: str) -> LinearRegression: ...
     def setAggregationDepth(self, value: int) -> LinearRegression: ...
     def setLoss(self, value: str) -> LinearRegression: ...
-    def setBlockSize(self, value: int) -> LinearRegression: ...
+    def setMaxBlockSizeInMB(self, value: float) -> LinearRegression: ...
 
 class LinearRegressionModel(
     _JavaRegressionModel[Vector],
@@ -522,7 +522,7 @@ class _AFTSurvivalRegressionParams(
     HasTol,
     HasFitIntercept,
     HasAggregationDepth,
-    HasBlockSize,
+    HasMaxBlockSizeInMB,
 ):
     censorCol: Param[str]
     quantileProbabilities: Param[List[float]]
@@ -551,7 +551,7 @@ class AFTSurvivalRegression(
         quantileProbabilities: List[float] = ...,
         quantilesCol: Optional[str] = ...,
         aggregationDepth: int = ...,
-        blockSize: int = ...
+        maxBlockSizeInMB: float = ...
     ) -> None: ...
     def setParams(
         self,
@@ -566,7 +566,7 @@ class AFTSurvivalRegression(
         quantileProbabilities: List[float] = ...,
         quantilesCol: Optional[str] = ...,
         aggregationDepth: int = ...,
-        blockSize: int = ...
+        maxBlockSizeInMB: float = ...
     ) -> AFTSurvivalRegression: ...
     def setCensorCol(self, value: str) -> AFTSurvivalRegression: ...
     def setQuantileProbabilities(self, value: List[float]) -> AFTSurvivalRegression: ...
@@ -575,7 +575,7 @@ class AFTSurvivalRegression(
     def setTol(self, value: float) -> AFTSurvivalRegression: ...
     def setFitIntercept(self, value: bool) -> AFTSurvivalRegression: ...
     def setAggregationDepth(self, value: int) -> AFTSurvivalRegression: ...
-    def setBlockSize(self, value: int) -> AFTSurvivalRegression: ...
+    def setMaxBlockSizeInMB(self, value: float) -> AFTSurvivalRegression: ...
 
 class AFTSurvivalRegressionModel(
     _JavaRegressionModel[Vector],
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 86a88a5bf341e..4af5d1f484ee4 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -3527,7 +3527,7 @@ def schema_of_json(json, options={}):
     Parameters
     ----------
     json : :class:`Column` or str
-        a JSON string or a string literal containing a JSON string.
+        a JSON string or a foldable string column containing a JSON string.
     options : dict, optional
         options to control parsing. accepts the same options as the JSON datasource
 
@@ -3564,7 +3564,7 @@ def schema_of_csv(csv, options={}):
     Parameters
     ----------
     csv : :class:`Column` or str
-        a CSV string or a string literal containing a CSV string.
+        a CSV string or a foldable string column containing a CSV string.
     options : dict, optional
         options to control parsing. accepts the same options as the CSV datasource
 
diff --git a/python/pyspark/sql/pandas/conversion.py b/python/pyspark/sql/pandas/conversion.py
index 3456c12e59c09..d8a241417532e 100644
--- a/python/pyspark/sql/pandas/conversion.py
+++ b/python/pyspark/sql/pandas/conversion.py
@@ -22,7 +22,7 @@
 from pyspark.sql.pandas.serializers import ArrowCollectSerializer
 from pyspark.sql.types import IntegralType
 from pyspark.sql.types import ByteType, ShortType, IntegerType, LongType, FloatType, \
-    DoubleType, BooleanType, TimestampType, StructType, DataType
+    DoubleType, BooleanType, MapType, TimestampType, StructType, DataType
 from pyspark.traceback_utils import SCCallSiteSync
 
 
@@ -100,7 +100,8 @@ def toPandas(self):
             # of PyArrow is found, if 'spark.sql.execution.arrow.pyspark.enabled' is enabled.
             if use_arrow:
                 try:
-                    from pyspark.sql.pandas.types import _check_series_localize_timestamps
+                    from pyspark.sql.pandas.types import _check_series_localize_timestamps, \
+                        _convert_map_items_to_dict
                     import pyarrow
                     # Rename columns to avoid duplicated column names.
                     tmp_column_names = ['col_{}'.format(i) for i in range(len(self.columns))]
@@ -117,6 +118,9 @@ def toPandas(self):
                             if isinstance(field.dataType, TimestampType):
                                 pdf[field.name] = \
                                     _check_series_localize_timestamps(pdf[field.name], timezone)
+                            elif isinstance(field.dataType, MapType):
+                                pdf[field.name] = \
+                                    _convert_map_items_to_dict(pdf[field.name])
                         return pdf
                     else:
                         return pd.DataFrame.from_records([], columns=self.columns)
diff --git a/python/pyspark/sql/pandas/functions.py b/python/pyspark/sql/pandas/functions.py
index 16462e8702a0b..750aa4b0e6c56 100644
--- a/python/pyspark/sql/pandas/functions.py
+++ b/python/pyspark/sql/pandas/functions.py
@@ -284,7 +284,6 @@ def calculate(iterator: Iterator[pd.Series]) -> Iterator[pd.Series]:
     should be checked for accuracy by users.
 
     Currently,
-    :class:`pyspark.sql.types.MapType`,
     :class:`pyspark.sql.types.ArrayType` of :class:`pyspark.sql.types.TimestampType` and
     nested :class:`pyspark.sql.types.StructType`
     are currently not supported as output types.
diff --git a/python/pyspark/sql/pandas/serializers.py b/python/pyspark/sql/pandas/serializers.py
index 73d36ee555fb5..2dcfdc1046049 100644
--- a/python/pyspark/sql/pandas/serializers.py
+++ b/python/pyspark/sql/pandas/serializers.py
@@ -117,7 +117,8 @@ def __init__(self, timezone, safecheck, assign_cols_by_name):
         self._assign_cols_by_name = assign_cols_by_name
 
     def arrow_to_pandas(self, arrow_column):
-        from pyspark.sql.pandas.types import _check_series_localize_timestamps
+        from pyspark.sql.pandas.types import _check_series_localize_timestamps, \
+            _convert_map_items_to_dict
         import pyarrow
 
         # If the given column is a date type column, creates a series of datetime.date directly
@@ -127,6 +128,8 @@ def arrow_to_pandas(self, arrow_column):
 
         if pyarrow.types.is_timestamp(arrow_column.type):
             return _check_series_localize_timestamps(s, self._timezone)
+        elif pyarrow.types.is_map(arrow_column.type):
+            return _convert_map_items_to_dict(s)
         else:
             return s
 
@@ -147,7 +150,8 @@ def _create_batch(self, series):
         """
         import pandas as pd
         import pyarrow as pa
-        from pyspark.sql.pandas.types import _check_series_convert_timestamps_internal
+        from pyspark.sql.pandas.types import _check_series_convert_timestamps_internal, \
+            _convert_dict_to_map_items
         from pandas.api.types import is_categorical_dtype
         # Make input conform to [(series1, type1), (series2, type2), ...]
         if not isinstance(series, (list, tuple)) or \
@@ -160,6 +164,8 @@ def create_array(s, t):
             # Ensure timestamp series are in expected form for Spark internal representation
             if t is not None and pa.types.is_timestamp(t):
                 s = _check_series_convert_timestamps_internal(s, self._timezone)
+            elif t is not None and pa.types.is_map(t):
+                s = _convert_dict_to_map_items(s)
             elif is_categorical_dtype(s.dtype):
                 # Note: This can be removed once minimum pyarrow version is >= 0.16.1
                 s = s.astype(s.dtypes.categories.dtype)
diff --git a/python/pyspark/sql/pandas/types.py b/python/pyspark/sql/pandas/types.py
index 67557120715ac..7e4d61b0d21b8 100644
--- a/python/pyspark/sql/pandas/types.py
+++ b/python/pyspark/sql/pandas/types.py
@@ -20,14 +20,15 @@
 pandas instances during the type conversion.
 """
 
-from pyspark.sql.types import ByteType, ShortType, IntegerType, LongType, FloatType, \
-    DoubleType, DecimalType, StringType, BinaryType, DateType, TimestampType, ArrayType, \
-    StructType, StructField, BooleanType
+from pyspark.sql.types import BooleanType, ByteType, ShortType, IntegerType, LongType, \
+    FloatType, DoubleType, DecimalType, StringType, BinaryType, DateType, TimestampType, \
+    ArrayType, MapType, StructType, StructField
 
 
 def to_arrow_type(dt):
     """ Convert Spark data type to pyarrow type
     """
+    from distutils.version import LooseVersion
     import pyarrow as pa
     if type(dt) == BooleanType:
         arrow_type = pa.bool_()
@@ -58,6 +59,13 @@ def to_arrow_type(dt):
         if type(dt.elementType) in [StructType, TimestampType]:
             raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
         arrow_type = pa.list_(to_arrow_type(dt.elementType))
+    elif type(dt) == MapType:
+        if LooseVersion(pa.__version__) < LooseVersion("2.0.0"):
+            raise TypeError("MapType is only supported with pyarrow 2.0.0 and above")
+        if type(dt.keyType) in [StructType, TimestampType] or \
+                type(dt.valueType) in [StructType, TimestampType]:
+            raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
+        arrow_type = pa.map_(to_arrow_type(dt.keyType), to_arrow_type(dt.valueType))
     elif type(dt) == StructType:
         if any(type(field.dataType) == StructType for field in dt):
             raise TypeError("Nested StructType not supported in conversion to Arrow")
@@ -81,6 +89,8 @@ def to_arrow_schema(schema):
 def from_arrow_type(at):
     """ Convert pyarrow type to Spark data type.
     """
+    from distutils.version import LooseVersion
+    import pyarrow as pa
     import pyarrow.types as types
     if types.is_boolean(at):
         spark_type = BooleanType()
@@ -110,6 +120,12 @@ def from_arrow_type(at):
         if types.is_timestamp(at.value_type):
             raise TypeError("Unsupported type in conversion from Arrow: " + str(at))
         spark_type = ArrayType(from_arrow_type(at.value_type))
+    elif types.is_map(at):
+        if LooseVersion(pa.__version__) < LooseVersion("2.0.0"):
+            raise TypeError("MapType is only supported with pyarrow 2.0.0 and above")
+        if types.is_timestamp(at.key_type) or types.is_timestamp(at.item_type):
+            raise TypeError("Unsupported type in conversion from Arrow: " + str(at))
+        spark_type = MapType(from_arrow_type(at.key_type), from_arrow_type(at.item_type))
     elif types.is_struct(at):
         if any(types.is_struct(field.type) for field in at):
             raise TypeError("Nested StructType not supported in conversion from Arrow: " + str(at))
@@ -306,3 +322,23 @@ def _check_series_convert_timestamps_tz_local(s, timezone):
         `pandas.Series` where if it is a timestamp, has been converted to tz-naive
     """
     return _check_series_convert_timestamps_localize(s, timezone, None)
+
+
+def _convert_map_items_to_dict(s):
+    """
+    Convert a series with items as list of (key, value), as made from an Arrow column of map type,
+    to dict for compatibility with non-arrow MapType columns.
+    :param s: pandas.Series of lists of (key, value) pairs
+    :return: pandas.Series of dictionaries
+    """
+    return s.apply(lambda m: None if m is None else {k: v for k, v in m})
+
+
+def _convert_dict_to_map_items(s):
+    """
+    Convert a series of dictionaries to list of (key, value) pairs to match expected data
+    for Arrow column of map type.
+    :param s: pandas.Series of dictionaries
+    :return: pandas.Series of lists of (key, value) pairs
+    """
+    return s.apply(lambda d: list(d.items()) if d is not None else None)
diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py
index 55d5e9017b345..e764c42d88a31 100644
--- a/python/pyspark/sql/tests/test_arrow.py
+++ b/python/pyspark/sql/tests/test_arrow.py
@@ -21,13 +21,13 @@
 import time
 import unittest
 import warnings
+from distutils.version import LooseVersion
 
 from pyspark import SparkContext, SparkConf
 from pyspark.sql import Row, SparkSession
 from pyspark.sql.functions import udf
 from pyspark.sql.types import StructType, StringType, IntegerType, LongType, \
-    FloatType, DoubleType, DecimalType, DateType, TimestampType, BinaryType, StructField, MapType, \
-    ArrayType
+    FloatType, DoubleType, DecimalType, DateType, TimestampType, BinaryType, StructField, ArrayType
 from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \
     pandas_requirement_message, pyarrow_requirement_message
 from pyspark.testing.utils import QuietTest
@@ -114,9 +114,10 @@ def create_pandas_data_frame(self):
         return pd.DataFrame(data=data_dict)
 
     def test_toPandas_fallback_enabled(self):
+        ts = datetime.datetime(2015, 11, 1, 0, 30)
         with self.sql_conf({"spark.sql.execution.arrow.pyspark.fallback.enabled": True}):
-            schema = StructType([StructField("map", MapType(StringType(), IntegerType()), True)])
-            df = self.spark.createDataFrame([({u'a': 1},)], schema=schema)
+            schema = StructType([StructField("a", ArrayType(TimestampType()), True)])
+            df = self.spark.createDataFrame([([ts],)], schema=schema)
             with QuietTest(self.sc):
                 with self.warnings_lock:
                     with warnings.catch_warnings(record=True) as warns:
@@ -129,10 +130,10 @@ def test_toPandas_fallback_enabled(self):
                         self.assertTrue(len(user_warns) > 0)
                         self.assertTrue(
                             "Attempting non-optimization" in str(user_warns[-1]))
-                        assert_frame_equal(pdf, pd.DataFrame({u'map': [{u'a': 1}]}))
+                        assert_frame_equal(pdf, pd.DataFrame({"a": [[ts]]}))
 
     def test_toPandas_fallback_disabled(self):
-        schema = StructType([StructField("map", MapType(StringType(), IntegerType()), True)])
+        schema = StructType([StructField("a", ArrayType(TimestampType()), True)])
         df = self.spark.createDataFrame([(None,)], schema=schema)
         with QuietTest(self.sc):
             with self.warnings_lock:
@@ -336,6 +337,62 @@ def test_toPandas_with_array_type(self):
                 self.assertTrue(expected[r][e] == result_arrow[r][e] and
                                 result[r][e] == result_arrow[r][e])
 
+    def test_createDataFrame_with_map_type(self):
+        map_data = [{"a": 1}, {"b": 2, "c": 3}, {}, None, {"d": None}]
+
+        pdf = pd.DataFrame({"id": [0, 1, 2, 3, 4], "m": map_data})
+        schema = "id long, m map<string, long>"
+
+        with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": False}):
+            df = self.spark.createDataFrame(pdf, schema=schema)
+
+        if LooseVersion(pa.__version__) < LooseVersion("2.0.0"):
+            with QuietTest(self.sc):
+                with self.assertRaisesRegex(Exception, "MapType.*only.*pyarrow 2.0.0"):
+                    self.spark.createDataFrame(pdf, schema=schema)
+        else:
+            df_arrow = self.spark.createDataFrame(pdf, schema=schema)
+
+            result = df.collect()
+            result_arrow = df_arrow.collect()
+
+            self.assertEqual(len(result), len(result_arrow))
+            for row, row_arrow in zip(result, result_arrow):
+                i, m = row
+                _, m_arrow = row_arrow
+                self.assertEqual(m, map_data[i])
+                self.assertEqual(m_arrow, map_data[i])
+
+    def test_toPandas_with_map_type(self):
+        pdf = pd.DataFrame({"id": [0, 1, 2, 3],
+                            "m": [{}, {"a": 1}, {"a": 1, "b": 2}, {"a": 1, "b": 2, "c": 3}]})
+
+        with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": False}):
+            df = self.spark.createDataFrame(pdf, schema="id long, m map<string, long>")
+
+        if LooseVersion(pa.__version__) < LooseVersion("2.0.0"):
+            with QuietTest(self.sc):
+                with self.assertRaisesRegex(Exception, "MapType.*only.*pyarrow 2.0.0"):
+                    df.toPandas()
+        else:
+            pdf_non, pdf_arrow = self._toPandas_arrow_toggle(df)
+            assert_frame_equal(pdf_arrow, pdf_non)
+
+    def test_toPandas_with_map_type_nulls(self):
+        pdf = pd.DataFrame({"id": [0, 1, 2, 3, 4],
+                            "m": [{"a": 1}, {"b": 2, "c": 3}, {}, None, {"d": None}]})
+
+        with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": False}):
+            df = self.spark.createDataFrame(pdf, schema="id long, m map<string, long>")
+
+        if LooseVersion(pa.__version__) < LooseVersion("2.0.0"):
+            with QuietTest(self.sc):
+                with self.assertRaisesRegex(Exception, "MapType.*only.*pyarrow 2.0.0"):
+                    df.toPandas()
+        else:
+            pdf_non, pdf_arrow = self._toPandas_arrow_toggle(df)
+            assert_frame_equal(pdf_arrow, pdf_non)
+
     def test_createDataFrame_with_int_col_names(self):
         import numpy as np
         pdf = pd.DataFrame(np.random.rand(4, 2))
@@ -345,26 +402,28 @@ def test_createDataFrame_with_int_col_names(self):
         self.assertEqual(pdf_col_names, df_arrow.columns)
 
     def test_createDataFrame_fallback_enabled(self):
+        ts = datetime.datetime(2015, 11, 1, 0, 30)
         with QuietTest(self.sc):
             with self.sql_conf({"spark.sql.execution.arrow.pyspark.fallback.enabled": True}):
                 with warnings.catch_warnings(record=True) as warns:
                     # we want the warnings to appear even if this test is run from a subclass
                     warnings.simplefilter("always")
                     df = self.spark.createDataFrame(
-                        pd.DataFrame([[{u'a': 1}]]), "a: map<string, int>")
+                        pd.DataFrame({"a": [[ts]]}), "a: array<timestamp>")
                     # Catch and check the last UserWarning.
                     user_warns = [
                         warn.message for warn in warns if isinstance(warn.message, UserWarning)]
                     self.assertTrue(len(user_warns) > 0)
                     self.assertTrue(
                         "Attempting non-optimization" in str(user_warns[-1]))
-                    self.assertEqual(df.collect(), [Row(a={u'a': 1})])
+                    self.assertEqual(df.collect(), [Row(a=[ts])])
 
     def test_createDataFrame_fallback_disabled(self):
         with QuietTest(self.sc):
             with self.assertRaisesRegexp(TypeError, 'Unsupported type'):
                 self.spark.createDataFrame(
-                    pd.DataFrame([[{u'a': 1}]]), "a: map<string, int>")
+                    pd.DataFrame({"a": [[datetime.datetime(2015, 11, 1, 0, 30)]]}),
+                    "a: array<timestamp>")
 
     # Regression test for SPARK-23314
     def test_timestamp_dst(self):
diff --git a/python/pyspark/sql/tests/test_pandas_cogrouped_map.py b/python/pyspark/sql/tests/test_pandas_cogrouped_map.py
index f9a7dd69b61fb..4afc1dfcc1c6e 100644
--- a/python/pyspark/sql/tests/test_pandas_cogrouped_map.py
+++ b/python/pyspark/sql/tests/test_pandas_cogrouped_map.py
@@ -176,9 +176,9 @@ def test_wrong_return_type(self):
         with QuietTest(self.sc):
             with self.assertRaisesRegexp(
                     NotImplementedError,
-                    'Invalid return type.*MapType'):
+                    'Invalid return type.*ArrayType.*TimestampType'):
                 left.groupby('id').cogroup(right.groupby('id')).applyInPandas(
-                    lambda l, r: l, 'id long, v map<int, int>')
+                    lambda l, r: l, 'id long, v array<timestamp>')
 
     def test_wrong_args(self):
         left = self.data1
diff --git a/python/pyspark/sql/tests/test_pandas_grouped_map.py b/python/pyspark/sql/tests/test_pandas_grouped_map.py
index 93e37125eaa33..ee68b95fc478d 100644
--- a/python/pyspark/sql/tests/test_pandas_grouped_map.py
+++ b/python/pyspark/sql/tests/test_pandas_grouped_map.py
@@ -26,7 +26,7 @@
     window
 from pyspark.sql.types import IntegerType, DoubleType, ArrayType, BinaryType, ByteType, \
     LongType, DecimalType, ShortType, FloatType, StringType, BooleanType, StructType, \
-    StructField, NullType, MapType, TimestampType
+    StructField, NullType, TimestampType
 from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \
     pandas_requirement_message, pyarrow_requirement_message
 from pyspark.testing.utils import QuietTest
@@ -246,10 +246,10 @@ def test_wrong_return_type(self):
         with QuietTest(self.sc):
             with self.assertRaisesRegexp(
                     NotImplementedError,
-                    'Invalid return type.*grouped map Pandas UDF.*MapType'):
+                    'Invalid return type.*grouped map Pandas UDF.*ArrayType.*TimestampType'):
                 pandas_udf(
                     lambda pdf: pdf,
-                    'id long, v map<int, int>',
+                    'id long, v array<timestamp>',
                     PandasUDFType.GROUPED_MAP)
 
     def test_wrong_args(self):
@@ -276,7 +276,6 @@ def test_wrong_args(self):
     def test_unsupported_types(self):
         common_err_msg = 'Invalid return type.*grouped map Pandas UDF.*'
         unsupported_types = [
-            StructField('map', MapType(StringType(), IntegerType())),
             StructField('arr_ts', ArrayType(TimestampType())),
             StructField('null', NullType()),
             StructField('struct', StructType([StructField('l', LongType())])),
diff --git a/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py b/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py
index 451308927629b..2cbcf31f6e7b3 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py
@@ -21,7 +21,7 @@
 from pyspark.sql import Row
 from pyspark.sql.functions import array, explode, col, lit, mean, sum, \
     udf, pandas_udf, PandasUDFType
-from pyspark.sql.types import ArrayType, TimestampType, DoubleType, MapType
+from pyspark.sql.types import ArrayType, TimestampType
 from pyspark.sql.utils import AnalysisException
 from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \
     pandas_requirement_message, pyarrow_requirement_message
@@ -159,7 +159,7 @@ def mean_and_std_udf(v):
 
         with QuietTest(self.sc):
             with self.assertRaisesRegexp(NotImplementedError, 'not supported'):
-                @pandas_udf(MapType(DoubleType(), DoubleType()), PandasUDFType.GROUPED_AGG)
+                @pandas_udf(ArrayType(TimestampType()), PandasUDFType.GROUPED_AGG)
                 def mean_and_std_udf(v):
                     return {v.mean(): v.std()}
 
diff --git a/python/pyspark/sql/tests/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
index 6d325c9085ce1..5da5d043ceca4 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_scalar.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
@@ -22,6 +22,7 @@
 import unittest
 from datetime import date, datetime
 from decimal import Decimal
+from distutils.version import LooseVersion
 
 from pyspark import TaskContext
 from pyspark.rdd import PythonEvalType
@@ -379,6 +380,20 @@ def test_vectorized_udf_nested_struct(self):
                         'Invalid return type with scalar Pandas UDFs'):
                     pandas_udf(lambda x: x, returnType=nested_type, functionType=udf_type)
 
+    def test_vectorized_udf_map_type(self):
+        data = [({},), ({"a": 1},), ({"a": 1, "b": 2},), ({"a": 1, "b": 2, "c": 3},)]
+        schema = StructType([StructField("map", MapType(StringType(), LongType()))])
+        df = self.spark.createDataFrame(data, schema=schema)
+        for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
+            if LooseVersion(pa.__version__) < LooseVersion("2.0.0"):
+                with QuietTest(self.sc):
+                    with self.assertRaisesRegex(Exception, "MapType.*not supported"):
+                        pandas_udf(lambda x: x, MapType(StringType(), LongType()), udf_type)
+            else:
+                map_f = pandas_udf(lambda x: x, MapType(StringType(), LongType()), udf_type)
+                result = df.select(map_f(col('map')))
+                self.assertEquals(df.collect(), result.collect())
+
     def test_vectorized_udf_complex(self):
         df = self.spark.range(10).select(
             col('id').cast('int').alias('a'),
@@ -504,8 +519,8 @@ def test_vectorized_udf_wrong_return_type(self):
             for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
                 with self.assertRaisesRegexp(
                         NotImplementedError,
-                        'Invalid return type.*scalar Pandas UDF.*MapType'):
-                    pandas_udf(lambda x: x, MapType(LongType(), LongType()), udf_type)
+                        'Invalid return type.*scalar Pandas UDF.*ArrayType.*TimestampType'):
+                    pandas_udf(lambda x: x, ArrayType(TimestampType()), udf_type)
 
     def test_vectorized_udf_return_scalar(self):
         df = self.spark.range(10)
@@ -577,8 +592,8 @@ def test_vectorized_udf_unsupported_types(self):
             for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
                 with self.assertRaisesRegexp(
                         NotImplementedError,
-                        'Invalid return type.*scalar Pandas UDF.*MapType'):
-                    pandas_udf(lambda x: x, MapType(StringType(), IntegerType()), udf_type)
+                        'Invalid return type.*scalar Pandas UDF.*ArrayType.*TimestampType'):
+                    pandas_udf(lambda x: x, ArrayType(TimestampType()), udf_type)
                 with self.assertRaisesRegexp(
                         NotImplementedError,
                         'Invalid return type.*scalar Pandas UDF.*ArrayType.StructType'):
diff --git a/python/pyspark/util.py b/python/pyspark/util.py
index 275a72b37be97..09c5963927456 100644
--- a/python/pyspark/util.py
+++ b/python/pyspark/util.py
@@ -16,10 +16,14 @@
 # limitations under the License.
 #
 
-import threading
+import itertools
+import os
+import platform
 import re
 import sys
+import threading
 import traceback
+import types
 
 from py4j.clientserver import ClientServer
 
@@ -76,6 +80,144 @@ def wrapper(*args, **kwargs):
     return wrapper
 
 
+def walk_tb(tb):
+    while tb is not None:
+        yield tb
+        tb = tb.tb_next
+
+
+def try_simplify_traceback(tb):
+    """
+    Simplify the traceback. It removes the tracebacks in the current package, and only
+    shows the traceback that is related to the thirdparty and user-specified codes.
+
+    Returns
+    -------
+    TracebackType or None
+      Simplified traceback instance. It returns None if it fails to simplify.
+
+    Notes
+    -----
+    This keeps the tracebacks once it sees they are from a different file even
+    though the following tracebacks are from the current package.
+
+    Examples
+    --------
+    >>> import importlib
+    >>> import sys
+    >>> import traceback
+    >>> import tempfile
+    >>> with tempfile.TemporaryDirectory() as tmp_dir:
+    ...     with open("%s/dummy_module.py" % tmp_dir, "w") as f:
+    ...         _ = f.write(
+    ...             'def raise_stop_iteration():\\n'
+    ...             '    raise StopIteration()\\n\\n'
+    ...             'def simple_wrapper(f):\\n'
+    ...             '    def wrapper(*a, **k):\\n'
+    ...             '        return f(*a, **k)\\n'
+    ...             '    return wrapper\\n')
+    ...         f.flush()
+    ...         spec = importlib.util.spec_from_file_location(
+    ...             "dummy_module", "%s/dummy_module.py" % tmp_dir)
+    ...         dummy_module = importlib.util.module_from_spec(spec)
+    ...         spec.loader.exec_module(dummy_module)
+    >>> def skip_doctest_traceback(tb):
+    ...     import pyspark
+    ...     root = os.path.dirname(pyspark.__file__)
+    ...     pairs = zip(walk_tb(tb), traceback.extract_tb(tb))
+    ...     for cur_tb, cur_frame in pairs:
+    ...         if cur_frame.filename.startswith(root):
+    ...             return cur_tb
+
+    Regular exceptions should show the file name of the current package as below.
+
+    >>> exc_info = None
+    >>> try:
+    ...     fail_on_stopiteration(dummy_module.raise_stop_iteration)()
+    ... except Exception as e:
+    ...     tb = sys.exc_info()[-1]
+    ...     e.__cause__ = None
+    ...     exc_info = "".join(
+    ...         traceback.format_exception(type(e), e, tb))
+    >>> print(exc_info)  # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
+    Traceback (most recent call last):
+      File ...
+        ...
+      File "/.../pyspark/util.py", line ...
+        ...
+    RuntimeError: ...
+    >>> "pyspark/util.py" in exc_info
+    True
+
+    If the traceback is simplified with this method, it hides the current package file name:
+
+    >>> exc_info = None
+    >>> try:
+    ...     fail_on_stopiteration(dummy_module.raise_stop_iteration)()
+    ... except Exception as e:
+    ...     tb = try_simplify_traceback(sys.exc_info()[-1])
+    ...     e.__cause__ = None
+    ...     exc_info = "".join(
+    ...         traceback.format_exception(
+    ...             type(e), e, try_simplify_traceback(skip_doctest_traceback(tb))))
+    >>> print(exc_info)  # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
+    RuntimeError: ...
+    >>> "pyspark/util.py" in exc_info
+    False
+
+    In the case below, the traceback contains the current package in the middle.
+    In this case, it just hides the top occurrence only.
+
+    >>> exc_info = None
+    >>> try:
+    ...     fail_on_stopiteration(dummy_module.simple_wrapper(
+    ...         fail_on_stopiteration(dummy_module.raise_stop_iteration)))()
+    ... except Exception as e:
+    ...     tb = sys.exc_info()[-1]
+    ...     e.__cause__ = None
+    ...     exc_info_a = "".join(
+    ...         traceback.format_exception(type(e), e, tb))
+    ...     exc_info_b = "".join(
+    ...         traceback.format_exception(
+    ...             type(e), e, try_simplify_traceback(skip_doctest_traceback(tb))))
+    >>> exc_info_a.count("pyspark/util.py")
+    2
+    >>> exc_info_b.count("pyspark/util.py")
+    1
+    """
+    if "pypy" in platform.python_implementation().lower():
+        # Traceback modification is not supported with PyPy in PySpark.
+        return None
+    if sys.version_info[:2] < (3, 7):
+        # Traceback creation is not supported Python < 3.7.
+        # See https://bugs.python.org/issue30579.
+        return None
+
+    import pyspark
+
+    root = os.path.dirname(pyspark.__file__)
+    tb_next = None
+    new_tb = None
+    pairs = zip(walk_tb(tb), traceback.extract_tb(tb))
+    last_seen = []
+
+    for cur_tb, cur_frame in pairs:
+        if not cur_frame.filename.startswith(root):
+            # Filter the stacktrace from the PySpark source itself.
+            last_seen = [(cur_tb, cur_frame)]
+            break
+
+    for cur_tb, cur_frame in reversed(list(itertools.chain(last_seen, pairs))):
+        # Once we have seen the file names outside, don't skip.
+        new_tb = types.TracebackType(
+            tb_next=tb_next,
+            tb_frame=cur_tb.tb_frame,
+            tb_lasti=cur_tb.tb_frame.f_lasti,
+            tb_lineno=cur_tb.tb_frame.f_lineno)
+        tb_next = new_tb
+    return new_tb
+
+
 def _print_missing_jar(lib_name, pkg_name, jar_name, spark_version):
     print("""
 ________________________________________________________________________________________________
@@ -183,6 +325,8 @@ def __del__(self):
 
 if __name__ == "__main__":
     import doctest
-    (failure_count, test_count) = doctest.testmod()
-    if failure_count:
-        sys.exit(-1)
+
+    if "pypy" not in platform.python_implementation().lower() and sys.version_info[:2] >= (3, 7):
+        (failure_count, test_count) = doctest.testmod()
+        if failure_count:
+            sys.exit(-1)
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 704e96ba0666b..1b09d327a5dfe 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -44,7 +44,7 @@
 from pyspark.sql.pandas.serializers import ArrowStreamPandasUDFSerializer, CogroupUDFSerializer
 from pyspark.sql.pandas.types import to_arrow_type
 from pyspark.sql.types import StructType
-from pyspark.util import fail_on_stopiteration
+from pyspark.util import fail_on_stopiteration, try_simplify_traceback
 from pyspark import shuffle
 
 pickleSer = PickleSerializer()
@@ -607,17 +607,19 @@ def process():
         # reuse.
         TaskContext._setTaskContext(None)
         BarrierTaskContext._setTaskContext(None)
-    except BaseException:
+    except BaseException as e:
         try:
-            exc_info = traceback.format_exc()
-            if isinstance(exc_info, bytes):
-                # exc_info may contains other encoding bytes, replace the invalid bytes and convert
-                # it back to utf-8 again
-                exc_info = exc_info.decode("utf-8", "replace").encode("utf-8")
-            else:
-                exc_info = exc_info.encode("utf-8")
+            exc_info = None
+            if os.environ.get("SPARK_SIMPLIFIED_TRACEBACK", False):
+                tb = try_simplify_traceback(sys.exc_info()[-1])
+                if tb is not None:
+                    e.__cause__ = None
+                    exc_info = "".join(traceback.format_exception(type(e), e, tb))
+            if exc_info is None:
+                exc_info = traceback.format_exc()
+
             write_int(SpecialLengths.PYTHON_EXCEPTION_THROWN, outfile)
-            write_with_length(exc_info, outfile)
+            write_with_length(exc_info.encode("utf-8"), outfile)
         except IOError:
             # JVM close the socket
             pass
diff --git a/repl/src/test/scala-2.12/org/apache/spark/repl/Repl2Suite.scala b/repl/src/test/scala-2.12/org/apache/spark/repl/Repl2Suite.scala
index 4ffa8beaf4740..90af9ec299efc 100644
--- a/repl/src/test/scala-2.12/org/apache/spark/repl/Repl2Suite.scala
+++ b/repl/src/test/scala-2.12/org/apache/spark/repl/Repl2Suite.scala
@@ -18,17 +18,12 @@
 package org.apache.spark.repl
 
 import java.io._
-import java.nio.file.Files
 
 import scala.tools.nsc.interpreter.SimpleReader
 
-import org.apache.log4j.{Level, LogManager, PropertyConfigurator}
 import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.{SparkContext, SparkFunSuite}
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 
 class Repl2Suite extends SparkFunSuite with BeforeAndAfterAll {
   test("propagation of local properties") {
diff --git a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
index 5428fa4ee9df7..f696e93e9cef2 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
@@ -28,7 +28,6 @@ import java.util.Collections
 import javax.tools.{JavaFileObject, SimpleJavaFileObject, ToolProvider}
 
 import scala.io.Source
-import scala.language.implicitConversions
 
 import com.google.common.io.Files
 import org.mockito.ArgumentMatchers.{any, anyString}
diff --git a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index 95d908cec5de0..6566d29d16e91 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -23,7 +23,7 @@ import java.nio.file.Files
 import org.apache.log4j.{Level, LogManager, PropertyConfigurator}
 import org.scalatest.BeforeAndAfterAll
 
-import org.apache.spark.{SparkContext, SparkFunSuite}
+import org.apache.spark.SparkFunSuite
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 9ae48f4da8b05..edeb95fdba684 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -30,7 +30,7 @@
   <properties>
     <sbt.project.name>kubernetes</sbt.project.name>
     <!-- Note: Please update the kubernetes client version in kubernetes/integration-tests/pom.xml -->
-    <kubernetes.client.version>4.11.1</kubernetes.client.version>
+    <kubernetes.client.version>4.12.0</kubernetes.client.version>
   </properties>
 
   <dependencies>
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
index e8bf8f9c9b505..7e5edd905781a 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
@@ -261,12 +261,19 @@ private[spark] object KubernetesUtils extends Logging {
       isLocalDependency(Utils.resolveURI(resource))
   }
 
-  def renameMainAppResource(resource: String, conf: SparkConf): String = {
+  def renameMainAppResource(
+      resource: String,
+      conf: Option[SparkConf] = None,
+      shouldUploadLocal: Boolean): String = {
     if (isLocalAndResolvable(resource)) {
-      SparkLauncher.NO_RESOURCE
+      if (shouldUploadLocal) {
+        uploadFileUri(resource, conf)
+      } else {
+        SparkLauncher.NO_RESOURCE
+      }
     } else {
       resource
-   }
+    }
   }
 
   def uploadFileUri(uri: String, conf: Option[SparkConf] = None): String = {
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
index 6503bc823ec0d..f5ba261c8f405 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
@@ -159,7 +159,7 @@ private[spark] class BasicDriverFeatureStep(conf: KubernetesDriverConf)
       KUBERNETES_DRIVER_SUBMIT_CHECK.key -> "true",
       MEMORY_OVERHEAD_FACTOR.key -> overheadFactor.toString)
     // try upload local, resolvable files to a hadoop compatible file system
-    Seq(JARS, FILES).foreach { key =>
+    Seq(JARS, FILES, SUBMIT_PYTHON_FILES).foreach { key =>
       val value = conf.get(key).filter(uri => KubernetesUtils.isLocalAndResolvable(uri))
       val resolved = KubernetesUtils.uploadAndTransformFileUris(value, Some(conf.sparkConf))
       if (resolved.nonEmpty) {
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala
index ebe44855f1d0d..d49381ba897d4 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala
@@ -62,7 +62,11 @@ private[spark] class DriverCommandFeatureStep(conf: KubernetesDriverConf)
   }
 
   private def configureForJava(pod: SparkPod, res: String): SparkPod = {
-    val driverContainer = baseDriverContainer(pod, res).build()
+    // re-write primary resource, app jar is also added to spark.jars by default in SparkSubmit
+    // no uploading takes place here
+    val newResName = KubernetesUtils
+      .renameMainAppResource(resource = res, shouldUploadLocal = false)
+    val driverContainer = baseDriverContainer(pod, newResName).build()
     SparkPod(pod.pod, driverContainer)
   }
 
@@ -73,7 +77,10 @@ private[spark] class DriverCommandFeatureStep(conf: KubernetesDriverConf)
           .withValue(conf.get(PYSPARK_MAJOR_PYTHON_VERSION))
         .build())
 
-    val pythonContainer = baseDriverContainer(pod, res)
+    // re-write primary resource to be the remote one and upload the related file
+    val newResName = KubernetesUtils
+      .renameMainAppResource(res, Option(conf.sparkConf), true)
+    val pythonContainer = baseDriverContainer(pod, newResName)
       .addAllToEnv(pythonEnvs.asJava)
       .build()
 
@@ -88,7 +95,7 @@ private[spark] class DriverCommandFeatureStep(conf: KubernetesDriverConf)
   private def baseDriverContainer(pod: SparkPod, resource: String): ContainerBuilder = {
     // re-write primary resource, app jar is also added to spark.jars by default in SparkSubmit
     val resolvedResource = if (conf.mainAppResource.isInstanceOf[JavaMainAppResource]) {
-      KubernetesUtils.renameMainAppResource(resource, conf.sparkConf)
+      KubernetesUtils.renameMainAppResource(resource, Option(conf.sparkConf), false)
     } else {
       resource
     }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/EnvSecretsFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/EnvSecretsFeatureStep.scala
index d78f04dcc40e6..222e19c5e20f1 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/EnvSecretsFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/EnvSecretsFeatureStep.scala
@@ -18,7 +18,7 @@ package org.apache.spark.deploy.k8s.features
 
 import scala.collection.JavaConverters._
 
-import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder, HasMetadata}
+import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder}
 
 import org.apache.spark.deploy.k8s.{KubernetesConf, SparkPod}
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountSecretsFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountSecretsFeatureStep.scala
index f4e1a3a326729..9de7686c8a9c0 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountSecretsFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountSecretsFeatureStep.scala
@@ -16,7 +16,7 @@
  */
 package org.apache.spark.deploy.k8s.features
 
-import io.fabric8.kubernetes.api.model.{ContainerBuilder, HasMetadata, PodBuilder, VolumeBuilder, VolumeMountBuilder}
+import io.fabric8.kubernetes.api.model.{ContainerBuilder, PodBuilder, VolumeBuilder, VolumeMountBuilder}
 
 import org.apache.spark.deploy.k8s.{KubernetesConf, SparkPod}
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
index c029b248f7ea4..863cb28bc827c 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
@@ -35,7 +35,6 @@ import org.apache.spark.deploy.k8s.KubernetesUtils.addOwnerReference
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT
 import org.apache.spark.resource.ResourceProfile
-import org.apache.spark.scheduler.cluster.SchedulerBackendUtils
 import org.apache.spark.util.{Clock, Utils}
 
 private[spark] class ExecutorPodsAllocator(
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
index cc5c2f4b6325d..151e98ba17e3b 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
@@ -25,7 +25,6 @@ import io.fabric8.kubernetes.client.Config
 import org.apache.spark.SparkContext
 import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesUtils, SparkKubernetesClientFactory}
 import org.apache.spark.deploy.k8s.Config._
-import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.internal.Logging
 import org.apache.spark.scheduler.{ExternalClusterManager, SchedulerBackend, TaskScheduler, TaskSchedulerImpl}
 import org.apache.spark.util.{SystemClock, ThreadUtils}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/Fabric8Aliases.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/Fabric8Aliases.scala
index 23055813a9786..5b36bd144d0f9 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/Fabric8Aliases.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/Fabric8Aliases.scala
@@ -17,7 +17,7 @@
 package org.apache.spark.deploy.k8s
 
 import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapList, DoneableConfigMap, DoneablePod, HasMetadata, Pod, PodList}
-import io.fabric8.kubernetes.client.{Watch, Watcher}
+import io.fabric8.kubernetes.client.Watch
 import io.fabric8.kubernetes.client.dsl.{FilterWatchListDeletable, MixedOperation, NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable, PodResource, Resource}
 
 object Fabric8Aliases {
@@ -25,9 +25,9 @@ object Fabric8Aliases {
   type CONFIG_MAPS = MixedOperation[
     ConfigMap, ConfigMapList, DoneableConfigMap, Resource[ConfigMap, DoneableConfigMap]]
   type LABELED_PODS = FilterWatchListDeletable[
-    Pod, PodList, java.lang.Boolean, Watch, Watcher[Pod]]
+    Pod, PodList, java.lang.Boolean, Watch]
   type LABELED_CONFIG_MAPS = FilterWatchListDeletable[
-    ConfigMap, ConfigMapList, java.lang.Boolean, Watch, Watcher[ConfigMap]]
+    ConfigMap, ConfigMapList, java.lang.Boolean, Watch]
   type SINGLE_POD = PodResource[Pod, DoneablePod]
   type RESOURCE_LIST = NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable[
     HasMetadata, Boolean]
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala
index 83d9481e6f2b0..0567f32c23134 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala
@@ -21,7 +21,6 @@ import io.fabric8.kubernetes.api.model.Pod
 
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.k8s.Config._
-import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.submit.{JavaMainAppResource, MainAppResource}
 
 /**
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala
index 7c231586af935..ef57a4b861508 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.deploy.k8s
 
 import scala.collection.JavaConverters._
 
-import io.fabric8.kubernetes.api.model.{Container, ContainerBuilder, PodBuilder}
+import io.fabric8.kubernetes.api.model.{ContainerBuilder, PodBuilder}
 
 import org.apache.spark.SparkFunSuite
 
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
index 26bd317de8ec6..4d4c4baeb12c0 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
@@ -26,7 +26,6 @@ import org.mockito.Mockito.{mock, never, verify, when}
 import scala.collection.JavaConverters._
 
 import org.apache.spark.{SparkConf, SparkException, SparkFunSuite}
-import org.apache.spark.deploy.k8s._
 import org.apache.spark.internal.config.ConfigEntry
 
 abstract class PodBuilderSuite extends SparkFunSuite {
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala
index 6a7366e9c6b7a..a44d465e35087 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala
@@ -20,11 +20,8 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.k8s._
-import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.submit._
-import org.apache.spark.internal.config._
-import org.apache.spark.util.Utils
 
 class DriverCommandFeatureStepSuite extends SparkFunSuite {
 
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala
index 18afd10395566..413371d056b26 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala
@@ -25,7 +25,6 @@ import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.k8s.{KubernetesTestConf, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.submit.JavaMainAppResource
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.util.ManualClock
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/HadoopConfDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/HadoopConfDriverFeatureStepSuite.scala
index e1c01dbdc7358..c078e69b8a14b 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/HadoopConfDriverFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/HadoopConfDriverFeatureStepSuite.scala
@@ -27,7 +27,6 @@ import io.fabric8.kubernetes.api.model.ConfigMap
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.submit.JavaMainAppResource
 import org.apache.spark.util.{SparkConfWithEnv, Utils}
 
 class HadoopConfDriverFeatureStepSuite extends SparkFunSuite {
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStepSuite.scala
index 41ca3a94ce7a7..094fcb39782f4 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStepSuite.scala
@@ -26,14 +26,13 @@ import com.google.common.io.Files
 import io.fabric8.kubernetes.api.model.{ConfigMap, Secret}
 import org.apache.commons.codec.binary.Base64
 import org.apache.hadoop.io.Text
-import org.apache.hadoop.security.{Credentials, UserGroupInformation}
+import org.apache.hadoop.security.UserGroupInformation
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.submit.JavaMainAppResource
 import org.apache.spark.internal.config._
 import org.apache.spark.util.Utils
 
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
index bbb89fd0a1c24..95ee37e3daa41 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
@@ -18,7 +18,7 @@ package org.apache.spark.deploy.k8s.features
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.SparkFunSuite
 import org.apache.spark.deploy.k8s._
 
 class MountVolumesFeatureStepSuite extends SparkFunSuite {
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
index 528b755c41605..8401f7102ad8e 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
@@ -29,7 +29,7 @@ import org.mockito.stubbing.Answer
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
-import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, KubernetesExecutorSpec, SparkPod}
+import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, KubernetesExecutorSpec}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.Fabric8Aliases._
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 5274c0579eb05..258d3dfc3df9d 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -28,7 +28,7 @@
   <properties>
     <download-maven-plugin.version>1.3.0</download-maven-plugin.version>
     <extraScalaTestArgs></extraScalaTestArgs>
-    <kubernetes-client.version>4.11.1</kubernetes-client.version>
+    <kubernetes-client.version>4.12.0</kubernetes-client.version>
     <sbt.project.name>kubernetes-integration-tests</sbt.project.name>
 
     <!-- Integration Test Configuration Properties -->
diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/log-config-test-log4j.properties b/resource-managers/kubernetes/integration-tests/src/test/resources/log-config-test-log4j.properties
new file mode 100644
index 0000000000000..d3e13d8542ba1
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/resources/log-config-test-log4j.properties
@@ -0,0 +1,23 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This log4j config file is for integration test SparkConfPropagateSuite.
+log4j.rootCategory=DEBUG, console
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c: %m%n
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
index e712b95cdbcea..8f6e9cd8af740 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
@@ -30,6 +30,7 @@ import org.scalatest.time.{Minutes, Span}
 import org.apache.spark.SparkException
 import org.apache.spark.deploy.k8s.integrationtest.DepsTestsSuite.{DEPS_TIMEOUT, FILE_CONTENTS, HOST_PATH}
 import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.{INTERVAL, MinikubeTag, TIMEOUT}
+import org.apache.spark.deploy.k8s.integrationtest.Utils.getExamplesJarName
 import org.apache.spark.deploy.k8s.integrationtest.backend.minikube.Minikube
 
 private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite =>
@@ -120,16 +121,18 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite =>
       .endSpec()
       .build()
 
-    kubernetesTestComponents
+    // try until the service from a previous test is deleted
+    Eventually.eventually(TIMEOUT, INTERVAL) (kubernetesTestComponents
       .kubernetesClient
       .services()
-      .create(minioService)
+      .create(minioService))
 
-    kubernetesTestComponents
+    // try until the stateful set of a previous test is deleted
+    Eventually.eventually(TIMEOUT, INTERVAL) (kubernetesTestComponents
       .kubernetesClient
       .apps()
       .statefulSets()
-      .create(minioStatefulSet)
+      .create(minioStatefulSet))
   }
 
  private def deleteMinioStorage(): Unit = {
@@ -138,47 +141,52 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite =>
       .apps()
       .statefulSets()
       .withName(cName)
+      .withGracePeriod(0)
       .delete()
 
     kubernetesTestComponents
       .kubernetesClient
       .services()
       .withName(svcName)
+      .withGracePeriod(0)
       .delete()
   }
 
   test("Launcher client dependencies", k8sTestTag, MinikubeTag) {
-    val packages = if (Utils.isHadoop3) {
-      "org.apache.hadoop:hadoop-aws:3.2.0"
-    } else {
-      "com.amazonaws:aws-java-sdk:1.7.4,org.apache.hadoop:hadoop-aws:2.7.6"
-    }
-    val fileName = Utils.createTempFile(FILE_CONTENTS, HOST_PATH)
-    try {
-      setupMinioStorage()
-      val minioUrlStr = getServiceUrl(svcName)
-      val minioUrl = new URL(minioUrlStr)
-      val minioHost = minioUrl.getHost
-      val minioPort = minioUrl.getPort
-      val examplesJar = Utils.getExamplesJarAbsolutePath(sparkHomeDir)
-      sparkAppConf
-        .set("spark.hadoop.fs.s3a.access.key", ACCESS_KEY)
-        .set("spark.hadoop.fs.s3a.secret.key", SECRET_KEY)
-        .set("spark.hadoop.fs.s3a.connection.ssl.enabled", "false")
-        .set("spark.hadoop.fs.s3a.endpoint", s"$minioHost:$minioPort")
-        .set("spark.kubernetes.file.upload.path", s"s3a://$BUCKET")
-        .set("spark.files", s"$HOST_PATH/$fileName")
-        .set("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
-        .set("spark.jars.packages", packages)
-        .set("spark.driver.extraJavaOptions", "-Divy.cache.dir=/tmp -Divy.home=/tmp")
-      createS3Bucket(ACCESS_KEY, SECRET_KEY, minioUrlStr)
+    tryDepsTest({
+      val fileName = Utils.createTempFile(FILE_CONTENTS, HOST_PATH)
+      sparkAppConf.set("spark.files", s"$HOST_PATH/$fileName")
+      val examplesJar = Utils.getTestFileAbsolutePath(getExamplesJarName(), sparkHomeDir)
       runSparkRemoteCheckAndVerifyCompletion(appResource = examplesJar,
         appArgs = Array(fileName),
         timeout = Option(DEPS_TIMEOUT))
-    } finally {
-      // make sure this always runs
-      deleteMinioStorage()
-    }
+    })
+  }
+
+  test("Launcher python client dependencies using a zip file", k8sTestTag, MinikubeTag) {
+    val inDepsFile = Utils.getTestFileAbsolutePath("py_container_checks.py", sparkHomeDir)
+    val outDepsFile = s"${inDepsFile.substring(0, inDepsFile.lastIndexOf("."))}.zip"
+    Utils.createZipFile(inDepsFile, outDepsFile)
+    testPythonDeps(outDepsFile)
+  }
+
+  private def testPythonDeps(depsFile: String): Unit = {
+    tryDepsTest({
+      val pySparkFiles = Utils.getTestFileAbsolutePath("pyfiles.py", sparkHomeDir)
+      setPythonSparkConfProperties(sparkAppConf)
+      runSparkApplicationAndVerifyCompletion(
+        appResource = pySparkFiles,
+        mainClass = "",
+        expectedLogOnCompletion = Seq(
+          "Python runtime version check is: True",
+          "Python environment version check is: True",
+          "Python runtime version check for executor is: True"),
+        appArgs = Array("python3"),
+        driverPodChecker = doBasicDriverPyPodCheck,
+        executorPodChecker = doBasicExecutorPyPodCheck,
+        appLocator = appLocator,
+        isJVM = false,
+        pyFiles = Option(depsFile)) })
   }
 
   private def extractS3Key(data: String, key: String): String = {
@@ -222,6 +230,48 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite =>
       url
     }
   }
+
+  private def getServiceHostAndPort(minioUrlStr : String) : (String, Int) = {
+    val minioUrl = new URL(minioUrlStr)
+    (minioUrl.getHost, minioUrl.getPort)
+  }
+
+  private def setCommonSparkConfPropertiesForS3Access(
+      conf: SparkAppConf,
+      minioUrlStr: String): Unit = {
+    val (minioHost, minioPort) = getServiceHostAndPort(minioUrlStr)
+    val packages = if (Utils.isHadoop3) {
+      "org.apache.hadoop:hadoop-aws:3.2.0"
+    } else {
+      "com.amazonaws:aws-java-sdk:1.7.4,org.apache.hadoop:hadoop-aws:2.7.6"
+    }
+    conf.set("spark.hadoop.fs.s3a.access.key", ACCESS_KEY)
+      .set("spark.hadoop.fs.s3a.secret.key", SECRET_KEY)
+      .set("spark.hadoop.fs.s3a.connection.ssl.enabled", "false")
+      .set("spark.hadoop.fs.s3a.endpoint", s"$minioHost:$minioPort")
+      .set("spark.kubernetes.file.upload.path", s"s3a://$BUCKET")
+      .set("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
+      .set("spark.jars.packages", packages)
+      .set("spark.driver.extraJavaOptions", "-Divy.cache.dir=/tmp -Divy.home=/tmp")
+  }
+
+  private def setPythonSparkConfProperties(conf: SparkAppConf): Unit = {
+    sparkAppConf.set("spark.kubernetes.container.image", pyImage)
+      .set("spark.kubernetes.pyspark.pythonVersion", "3")
+  }
+
+  private def tryDepsTest(runTest: => Unit): Unit = {
+    try {
+      setupMinioStorage()
+      val minioUrlStr = getServiceUrl(svcName)
+      createS3Bucket(ACCESS_KEY, SECRET_KEY, minioUrlStr)
+      setCommonSparkConfPropertiesForS3Access(sparkAppConf, minioUrlStr)
+      runTest
+    } finally {
+      // make sure this always runs
+      deleteMinioStorage()
+    }
+  }
 }
 
 private[spark] object DepsTestsSuite {
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
index f1d8217e31b71..cc226b341916d 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
@@ -41,10 +41,10 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 
 class KubernetesSuite extends SparkFunSuite
-  with BeforeAndAfterAll with BeforeAndAfter with BasicTestsSuite with SecretsTestsSuite
-  with PythonTestsSuite with ClientModeTestsSuite with PodTemplateSuite with PVTestsSuite
-  with DepsTestsSuite with DecommissionSuite with RTestsSuite with Logging with Eventually
-  with Matchers {
+  with BeforeAndAfterAll with BeforeAndAfter with BasicTestsSuite with SparkConfPropagateSuite
+  with SecretsTestsSuite with PythonTestsSuite with ClientModeTestsSuite with PodTemplateSuite
+  with PVTestsSuite with DepsTestsSuite with DecommissionSuite with RTestsSuite with Logging
+  with Eventually with Matchers {
 
 
   import KubernetesSuite._
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
index af980f0494369..0bf01e6b66427 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
@@ -21,7 +21,6 @@ import java.util.UUID
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
-import scala.collection.mutable.ArrayBuffer
 
 import io.fabric8.kubernetes.client.DefaultKubernetesClient
 import org.scalatest.concurrent.Eventually
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala
new file mode 100644
index 0000000000000..6d15201d19796
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.integrationtest
+
+import java.io.{BufferedWriter, File, FileWriter}
+import java.net.URL
+
+import scala.io.{BufferedSource, Source}
+
+import io.fabric8.kubernetes.api.model._
+
+import org.apache.spark.internal.config
+
+private[spark] trait SparkConfPropagateSuite { k8sSuite: KubernetesSuite =>
+  import KubernetesSuite.{k8sTestTag, SPARK_PI_MAIN_CLASS}
+
+  test("Verify logging configuration is picked from the provided SPARK_CONF_DIR/log4j.properties",
+    k8sTestTag) {
+    val loggingConfigFileName = "log-config-test-log4j.properties"
+    val loggingConfURL: URL = this.getClass.getClassLoader.getResource(loggingConfigFileName)
+    assert(loggingConfURL != null, "Logging configuration file not available.")
+
+    val content = Source.createBufferedSource(loggingConfURL.openStream()).getLines().mkString("\n")
+    val logConfFilePath = s"${sparkHomeDir.toFile}/conf/log4j.properties"
+
+    try {
+      val writer = new BufferedWriter(new FileWriter(logConfFilePath))
+      writer.write(content)
+      writer.close()
+
+      sparkAppConf.set("spark.driver.extraJavaOptions", "-Dlog4j.debug")
+
+      runSparkApplicationAndVerifyCompletion(
+        appResource = containerLocalSparkDistroExamplesJar,
+        mainClass = SPARK_PI_MAIN_CLASS,
+        expectedLogOnCompletion = (Seq("DEBUG",
+          s"log4j: Reading configuration from URL file:/opt/spark/conf/log4j.properties",
+          "Pi is roughly 3")),
+        appArgs = Array.empty[String],
+        driverPodChecker = doBasicDriverPodCheck,
+        executorPodChecker = doBasicExecutorPodCheck,
+        appLocator = appLocator,
+        isJVM = true)
+    } finally {
+      new File(logConfFilePath).delete()
+    }
+  }
+}
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
index 9bcd6e9503532..ee44cb5f85835 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
@@ -16,21 +16,21 @@
  */
 package org.apache.spark.deploy.k8s.integrationtest
 
-import java.io.{Closeable, File, PrintWriter}
+import java.io.{Closeable, File, FileInputStream, FileOutputStream, PrintWriter}
 import java.nio.file.{Files, Path}
 import java.util.concurrent.CountDownLatch
+import java.util.zip.{ZipEntry, ZipOutputStream}
 
 import scala.collection.JavaConverters._
-import scala.util.Try
 
 import io.fabric8.kubernetes.client.dsl.ExecListener
 import okhttp3.Response
+import org.apache.commons.compress.utils.IOUtils
 import org.apache.commons.io.output.ByteArrayOutputStream
 import org.apache.hadoop.util.VersionInfo
 
 import org.apache.spark.{SPARK_VERSION, SparkException}
 import org.apache.spark.internal.Logging
-import org.apache.spark.util.{Utils => SparkUtils}
 
 object Utils extends Logging {
 
@@ -114,23 +114,22 @@ object Utils extends Logging {
     filename
   }
 
-  def getExamplesJarAbsolutePath(sparkHomeDir: Path): String = {
-    val jarName = getExamplesJarName()
-    val jarPathsFound = Files
+  def getTestFileAbsolutePath(fileName: String, sparkHomeDir: Path): String = {
+    val filePathsFound = Files
       .walk(sparkHomeDir)
       .filter(Files.isRegularFile(_))
-      .filter((f: Path) => {f.toFile.getName == jarName})
+      .filter((f: Path) => {f.toFile.getName == fileName})
     // we should not have more than one here under current test build dir
     // we only need one though
-    val jarPath = jarPathsFound
+    val filePath = filePathsFound
       .iterator()
       .asScala
       .map(_.toAbsolutePath.toString)
       .toArray
       .headOption
-    jarPath match {
-      case Some(jar) => jar
-      case _ => throw new SparkException(s"No valid $jarName file was found " +
+    filePath match {
+      case Some(file) => file
+      case _ => throw new SparkException(s"No valid $fileName file was found " +
         s"under spark home test dir ${sparkHomeDir.toAbsolutePath}!")
     }
   }
@@ -138,4 +137,16 @@ object Utils extends Logging {
   def isHadoop3(): Boolean = {
     VersionInfo.getVersion.startsWith("3")
   }
+
+  def createZipFile(inFile: String, outFile: String): Unit = {
+    val fileToZip = new File(inFile)
+    val fis = new FileInputStream(fileToZip)
+    val fos = new FileOutputStream(outFile)
+    val zipOut = new ZipOutputStream(fos)
+    val zipEntry = new ZipEntry(fileToZip.getName)
+    zipOut.putNextEntry(zipEntry)
+    IOUtils.copy(fis, zipOut)
+    IOUtils.closeQuietly(fis)
+    IOUtils.closeQuietly(zipOut)
+  }
 }
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/cloud/KubeConfigBackend.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/cloud/KubeConfigBackend.scala
index be1834c0b5dea..0fbed4a220e68 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/cloud/KubeConfigBackend.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/cloud/KubeConfigBackend.scala
@@ -16,8 +16,6 @@
  */
 package org.apache.spark.deploy.k8s.integrationtest.backend.cloud
 
-import java.nio.file.Paths
-
 import io.fabric8.kubernetes.client.{Config, DefaultKubernetesClient}
 import io.fabric8.kubernetes.client.utils.Utils
 import org.apache.commons.lang3.StringUtils
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala
index e1c0d18b73a2b..bd42f6f05655f 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala
@@ -113,6 +113,16 @@ package object config {
       .stringConf
       .createOptional
 
+  private[spark] val DISPATCHER_QUEUE =
+    ConfigBuilder("spark.mesos.dispatcher.queue")
+      .doc("Set the name of the dispatcher queue to which the application is submitted. " +
+        "The specified queue must be added to the dispatcher " +
+        "with \"spark.mesos.dispatcher.queue.[QueueName]\". If no queue is specified, then " +
+        "the application is submitted to the \"default\" queue with 0.0 priority.")
+      .version("3.1.0")
+      .stringConf
+      .createWithDefaultString("default")
+
   private[spark] val DRIVER_LABELS =
     ConfigBuilder("spark.mesos.driver.labels")
       .doc("Mesos labels to add to the driver.  Labels are free-form key-value pairs. Key-value " +
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala
index b8c64a28c72cd..97ef153177674 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala
@@ -22,7 +22,7 @@ import javax.servlet.http.HttpServletRequest
 import scala.xml.Node
 
 import org.apache.spark.deploy.Command
-import org.apache.spark.deploy.mesos.MesosDriverDescription
+import org.apache.spark.deploy.mesos.{config, MesosDriverDescription}
 import org.apache.spark.scheduler.cluster.mesos.{MesosClusterRetryState, MesosClusterSubmissionState}
 import org.apache.spark.ui.{UIUtils, WebUIPage}
 
@@ -153,6 +153,13 @@ private[ui] class DriverPage(parent: MesosClusterUI) extends WebUIPage("driver")
     <tr>
       <td>Memory</td><td>{driver.mem}</td>
     </tr>
+    <tr>
+      <td>Queue</td>
+      <td>
+        {driver.conf.get(
+        "spark.mesos.dispatcher.queue", config.DISPATCHER_QUEUE.defaultValueString)}
+      </td>
+    </tr>
     <tr>
       <td>Submitted</td><td>{UIUtils.formatDate(driver.submissionDate)}</td>
     </tr>
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterPage.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterPage.scala
index 772906397546c..5c62ddb37684d 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterPage.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterPage.scala
@@ -23,7 +23,7 @@ import scala.xml.Node
 
 import org.apache.mesos.Protos.TaskStatus
 
-import org.apache.spark.deploy.mesos.MesosDriverDescription
+import org.apache.spark.deploy.mesos.{config, MesosDriverDescription}
 import org.apache.spark.deploy.mesos.config._
 import org.apache.spark.scheduler.cluster.mesos.MesosClusterSubmissionState
 import org.apache.spark.ui.{UIUtils, WebUIPage}
@@ -36,7 +36,7 @@ private[mesos] class MesosClusterPage(parent: MesosClusterUI) extends WebUIPage(
 
     val driverHeader = Seq("Driver ID")
     val historyHeader = historyServerURL.map(url => Seq("History")).getOrElse(Nil)
-    val submissionHeader = Seq("Submit Date", "Main Class", "Driver Resources")
+    val submissionHeader = Seq("Queue", "Submit Date", "Main Class", "Driver Resources")
     val sandboxHeader = Seq("Sandbox")
 
     val queuedHeaders = driverHeader ++ submissionHeader
@@ -69,6 +69,10 @@ private[mesos] class MesosClusterPage(parent: MesosClusterUI) extends WebUIPage(
     val id = submission.submissionId
     <tr>
       <td><a href={s"driver?id=$id"}>{id}</a></td>
+      <td>
+        {submission.conf.get(
+        "spark.mesos.dispatcher.queue", config.DISPATCHER_QUEUE.defaultValueString)}
+      </td>
       <td>{UIUtils.formatDate(submission.submissionDate)}</td>
       <td>{submission.command.mainClass}</td>
       <td>cpus: {submission.cores}, mem: {submission.mem}</td>
@@ -99,6 +103,10 @@ private[mesos] class MesosClusterPage(parent: MesosClusterUI) extends WebUIPage(
     <tr>
       <td><a href={s"driver?id=$id"}>{id}</a></td>
       {historyCol}
+      <td>
+        {state.driverDescription.conf.get(
+        "spark.mesos.dispatcher.queue", config.DISPATCHER_QUEUE.defaultValueString)}
+      </td>
       <td>{UIUtils.formatDate(state.driverDescription.submissionDate)}</td>
       <td>{state.driverDescription.command.mainClass}</td>
       <td>cpus: {state.driverDescription.cores}, mem: {state.driverDescription.mem}</td>
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterUI.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterUI.scala
index c0cdcda14291f..e260fb8e25f4c 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterUI.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterUI.scala
@@ -20,7 +20,6 @@ package org.apache.spark.deploy.mesos.ui
 import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.scheduler.cluster.mesos.MesosClusterScheduler
 import org.apache.spark.ui.{SparkUI, WebUI}
-import org.apache.spark.ui.JettyUtils._
 
 /**
  * UI that displays driver results from the [[org.apache.spark.deploy.mesos.MesosClusterDispatcher]]
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
index 39168a5e3c7a5..b18737cf6126d 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
@@ -21,6 +21,7 @@ import java.io.File
 import java.util.{Collections, Date, List => JList}
 
 import scala.collection.JavaConverters._
+import scala.collection.immutable
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
@@ -131,6 +132,8 @@ private[spark] class MesosClusterScheduler(
   private val queuedCapacity = conf.get(config.MAX_DRIVERS)
   private val retainedDrivers = conf.get(config.RETAINED_DRIVERS)
   private val maxRetryWaitTime = conf.get(config.CLUSTER_RETRY_WAIT_MAX_SECONDS)
+  private val queues: immutable.Map[String, Float] =
+    conf.getAllWithPrefix("spark.mesos.dispatcher.queue.").map(t => (t._1, t._2.toFloat)).toMap
   private val schedulerState = engineFactory.createEngine("scheduler")
   private val stateLock = new Object()
   // Keyed by submission id
@@ -144,7 +147,19 @@ private[spark] class MesosClusterScheduler(
   // state of the tasks from Mesos. Keyed by task Id.
   private val pendingRecover = new mutable.HashMap[String, AgentID]()
   // Stores all the submitted drivers that hasn't been launched, keyed by submission id
-  private val queuedDrivers = new ArrayBuffer[MesosDriverDescription]()
+  // and sorted by priority, then by submission date
+  private val driverOrdering = new Ordering[MesosDriverDescription] {
+    override def compare(x: MesosDriverDescription, y: MesosDriverDescription): Int = {
+      val xp = getDriverPriority(x)
+      val yp = getDriverPriority(y)
+      if (xp != yp) {
+        xp compare yp
+      } else {
+        y.submissionDate.compareTo(x.submissionDate)
+      }
+    }
+  }
+  private val queuedDrivers = new mutable.TreeSet[MesosDriverDescription]()(driverOrdering.reverse)
   // All supervised drivers that are waiting to retry after termination, keyed by submission id
   private val pendingRetryDrivers = new ArrayBuffer[MesosDriverDescription]()
   private val queuedDriversState = engineFactory.createEngine("driverQueue")
@@ -374,6 +389,16 @@ private[spark] class MesosClusterScheduler(
     s"${frameworkId}-${desc.submissionId}${retries}"
   }
 
+  private[mesos] def getDriverPriority(desc: MesosDriverDescription): Float = {
+    val defaultQueueName = config.DISPATCHER_QUEUE.defaultValueString
+    val queueName = desc.conf.get("spark.mesos.dispatcher.queue", defaultQueueName)
+    if (queueName != defaultQueueName) {
+      queues.getOrElse(queueName, throw new NoSuchElementException(queueName))
+    } else {
+      0.0f
+    }
+  }
+
   private def getDriverTaskId(desc: MesosDriverDescription): String = {
     val sId = desc.submissionId
     desc.retryState.map(state => sId + s"${RETRY_SEP}${state.retries.toString}").getOrElse(sId)
@@ -710,7 +735,7 @@ private[spark] class MesosClusterScheduler(
   }
 
   private def copyBuffer(
-      buffer: ArrayBuffer[MesosDriverDescription]): ArrayBuffer[MesosDriverDescription] = {
+      buffer: TraversableOnce[MesosDriverDescription]): ArrayBuffer[MesosDriverDescription] = {
     val newBuffer = new ArrayBuffer[MesosDriverDescription](buffer.size)
     buffer.copyToBuffer(newBuffer)
     newBuffer
@@ -827,13 +852,13 @@ private[spark] class MesosClusterScheduler(
       status: Int): Unit = {}
 
   private def removeFromQueuedDrivers(subId: String): Boolean = {
-    val index = queuedDrivers.indexWhere(_.submissionId == subId)
-    if (index != -1) {
-      queuedDrivers.remove(index)
+    val matchOption = queuedDrivers.find(_.submissionId == subId)
+    if (matchOption.isEmpty) {
+      false
+    } else {
+      queuedDrivers.remove(matchOption.get)
       queuedDriversState.expunge(subId)
       true
-    } else {
-      false
     }
   }
 
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
index bbe1ff495d8a6..efcef09132f5b 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
@@ -39,7 +39,7 @@ import org.apache.spark.launcher.{LauncherBackend, SparkAppHandle}
 import org.apache.spark.network.netty.SparkTransportConf
 import org.apache.spark.network.shuffle.mesos.MesosExternalBlockStoreClient
 import org.apache.spark.resource.ResourceProfile
-import org.apache.spark.rpc.{RpcEndpointAddress, RpcEndpointRef}
+import org.apache.spark.rpc.RpcEndpointAddress
 import org.apache.spark.scheduler.{ExecutorProcessLost, TaskSchedulerImpl}
 import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
 import org.apache.spark.util.Utils
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendUtil.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendUtil.scala
index 981b8e9df1747..a5a2611be3765 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendUtil.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendUtil.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.scheduler.cluster.mesos
 
-import org.apache.mesos.Protos.{ContainerInfo, Environment, Image, NetworkInfo, Parameter, Secret,
+import org.apache.mesos.Protos.{ContainerInfo, Image, NetworkInfo, Parameter, Secret,
   TaskState => MesosTaskState, Volume}
 import org.apache.mesos.Protos.ContainerInfo.{DockerInfo, MesosInfo}
 import org.apache.mesos.Protos.Environment.Variable
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
index 2be8835f77e36..b5a360167679e 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
@@ -29,10 +29,10 @@ import scala.util.control.NonFatal
 import com.google.common.base.Splitter
 import com.google.common.io.Files
 import org.apache.mesos.{MesosSchedulerDriver, Protos, Scheduler, SchedulerDriver}
-import org.apache.mesos.Protos.{SlaveID => AgentID, TaskState => MesosTaskState, _}
+import org.apache.mesos.Protos.{TaskState => MesosTaskState, _}
 import org.apache.mesos.Protos.FrameworkInfo.Capability
 import org.apache.mesos.Protos.Resource.ReservationInfo
-import org.apache.mesos.protobuf.{ByteString, GeneratedMessageV3}
+import org.apache.mesos.protobuf.GeneratedMessageV3
 
 import org.apache.spark.{SparkConf, SparkContext, SparkException}
 import org.apache.spark.TaskState
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
index 5ff7f99aadb2f..146a135afd795 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
@@ -603,6 +603,136 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
     assert(scheduler.getDriverCommandValue(driverDesc) == expectedCmd)
   }
 
+  test("SPARK-23499: Test dispatcher priority queue with non float value") {
+    val conf = new SparkConf()
+    conf.set("spark.mesos.dispatcher.queue.ROUTINE", "1.0")
+    conf.set("spark.mesos.dispatcher.queue.URGENT", "abc")
+    conf.set("spark.mesos.dispatcher.queue.EXCEPTIONAL", "3.0")
+    assertThrows[NumberFormatException] {
+      setScheduler(conf.getAll.toMap)
+    }
+  }
+
+  test("SPARK-23499: Get driver priority") {
+    val conf = new SparkConf()
+    conf.set("spark.mesos.dispatcher.queue.ROUTINE", "1.0")
+    conf.set("spark.mesos.dispatcher.queue.URGENT", "2.0")
+    conf.set("spark.mesos.dispatcher.queue.EXCEPTIONAL", "3.0")
+    setScheduler(conf.getAll.toMap)
+
+    val mem = 1000
+    val cpu = 1
+
+    // Test queue not declared in scheduler
+    var desc = new MesosDriverDescription("d1", "jar", mem, cpu, true,
+      command,
+      Map("spark.mesos.dispatcher.queue" -> "dummy"),
+      "s1",
+      new Date())
+
+    assertThrows[NoSuchElementException] {
+      scheduler.getDriverPriority(desc)
+    }
+
+    // Test with no specified queue
+    desc = new MesosDriverDescription("d1", "jar", mem, cpu, true,
+      command,
+      Map[String, String](),
+      "s2",
+      new Date())
+
+    assert(scheduler.getDriverPriority(desc) == 0.0f)
+
+    // Test with "default" queue specified
+    desc = new MesosDriverDescription("d1", "jar", mem, cpu, true,
+      command,
+      Map("spark.mesos.dispatcher.queue" -> "default"),
+      "s3",
+      new Date())
+
+    assert(scheduler.getDriverPriority(desc) == 0.0f)
+
+    // Test queue declared in scheduler
+    desc = new MesosDriverDescription("d1", "jar", mem, cpu, true,
+      command,
+      Map("spark.mesos.dispatcher.queue" -> "ROUTINE"),
+      "s4",
+      new Date())
+
+    assert(scheduler.getDriverPriority(desc) == 1.0f)
+
+    // Test other queue declared in scheduler
+    desc = new MesosDriverDescription("d1", "jar", mem, cpu, true,
+      command,
+      Map("spark.mesos.dispatcher.queue" -> "URGENT"),
+      "s5",
+      new Date())
+
+    assert(scheduler.getDriverPriority(desc) == 2.0f)
+  }
+
+  test("SPARK-23499: Can queue drivers with priority") {
+    val conf = new SparkConf()
+    conf.set("spark.mesos.dispatcher.queue.ROUTINE", "1.0")
+    conf.set("spark.mesos.dispatcher.queue.URGENT", "2.0")
+    conf.set("spark.mesos.dispatcher.queue.EXCEPTIONAL", "3.0")
+    setScheduler(conf.getAll.toMap)
+
+    val mem = 1000
+    val cpu = 1
+
+    val response0 = scheduler.submitDriver(
+      new MesosDriverDescription("d1", "jar", 100, 1, true, command,
+        Map("spark.mesos.dispatcher.queue" -> "ROUTINE"), "s0", new Date()))
+    assert(response0.success)
+
+    val response1 = scheduler.submitDriver(
+      new MesosDriverDescription("d1", "jar", 100, 1, true, command,
+        Map[String, String](), "s1", new Date()))
+    assert(response1.success)
+
+    val response2 = scheduler.submitDriver(
+      new MesosDriverDescription("d1", "jar", 100, 1, true, command,
+        Map("spark.mesos.dispatcher.queue" -> "EXCEPTIONAL"), "s2", new Date()))
+    assert(response2.success)
+
+    val response3 = scheduler.submitDriver(
+      new MesosDriverDescription("d1", "jar", 100, 1, true, command,
+        Map("spark.mesos.dispatcher.queue" -> "URGENT"), "s3", new Date()))
+    assert(response3.success)
+
+    val state = scheduler.getSchedulerState()
+    val queuedDrivers = state.queuedDrivers.toList
+    assert(queuedDrivers(0).submissionId == response2.submissionId)
+    assert(queuedDrivers(1).submissionId == response3.submissionId)
+    assert(queuedDrivers(2).submissionId == response0.submissionId)
+    assert(queuedDrivers(3).submissionId == response1.submissionId)
+  }
+
+  test("SPARK-23499: Can queue drivers with negative priority") {
+    val conf = new SparkConf()
+    conf.set("spark.mesos.dispatcher.queue.LOWER", "-1.0")
+    setScheduler(conf.getAll.toMap)
+
+    val mem = 1000
+    val cpu = 1
+
+    val response0 = scheduler.submitDriver(
+      new MesosDriverDescription("d1", "jar", 100, 1, true, command,
+        Map("spark.mesos.dispatcher.queue" -> "LOWER"), "s0", new Date()))
+    assert(response0.success)
+
+    val response1 = scheduler.submitDriver(
+      new MesosDriverDescription("d1", "jar", 100, 1, true, command,
+        Map[String, String](), "s1", new Date()))
+    assert(response1.success)
+
+    val state = scheduler.getSchedulerState()
+    val queuedDrivers = state.queuedDrivers.toList
+    assert(queuedDrivers(0).submissionId == response1.submissionId)
+    assert(queuedDrivers(1).submissionId == response0.submissionId)
+  }
+
   private def launchDriverTask(addlSparkConfVars: Map[String, String]): List[TaskInfo] = {
     setScheduler()
     val mem = 1000
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
index d9262bbac6586..ede39063cf1bd 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
@@ -19,12 +19,11 @@ package org.apache.spark.deploy.yarn
 
 import java.io.File
 import java.nio.ByteBuffer
-import java.util.{Collections, Locale}
+import java.util.Collections
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.{HashMap, ListBuffer}
 
-import org.apache.hadoop.HadoopIllegalArgumentException
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.io.DataOutputBuffer
 import org.apache.hadoop.security.UserGroupInformation
@@ -40,7 +39,6 @@ import org.apache.spark.{SecurityManager, SparkConf, SparkException}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.network.util.JavaUtils
-import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.util.Utils
 
 private[yarn] class ExecutorRunnable(
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/LocalityPreferredContainerPlacementStrategy.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/LocalityPreferredContainerPlacementStrategy.scala
index 5640f7ede33df..7ac5beac76e20 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/LocalityPreferredContainerPlacementStrategy.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/LocalityPreferredContainerPlacementStrategy.scala
@@ -21,7 +21,7 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable.{ArrayBuffer, HashMap, Set}
 
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.yarn.api.records.{ContainerId, Resource}
+import org.apache.hadoop.yarn.api.records.ContainerId
 import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest
 
 import org.apache.spark.SparkConf
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
index 0273de10993eb..09766bf97d8f3 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
@@ -26,7 +26,6 @@ import org.apache.hadoop.yarn.api.records.{ApplicationAccessType, ContainerId, P
 import org.apache.hadoop.yarn.util.ConverterUtils
 
 import org.apache.spark.{SecurityManager, SparkConf}
-import org.apache.spark.internal.config._
 import org.apache.spark.launcher.YarnCommandBuilderUtils
 import org.apache.spark.resource.ExecutorResourceRequest
 import org.apache.spark.util.Utils
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/launcher/YarnCommandBuilderUtils.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/launcher/YarnCommandBuilderUtils.scala
index 0c3d080cca254..d000287cb7a96 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/launcher/YarnCommandBuilderUtils.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/launcher/YarnCommandBuilderUtils.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.launcher
 
-import scala.collection.JavaConverters._
-import scala.collection.mutable.ListBuffer
 import scala.util.Properties
 
 /**
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
index 7f8dd590545c6..5b762f606112c 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
@@ -29,7 +29,6 @@ import org.scalatest.matchers.should.Matchers._
 import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.util.{ResetSystemProperties, Utils}
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java
new file mode 100644
index 0000000000000..cdfa082ced317
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.expressions.Transform;
+import org.apache.spark.sql.types.DataType;
+
+/**
+ * Interface for a metadata column.
+ * <p>
+ * A metadata column can expose additional metadata about a row. For example, rows from Kafka can
+ * use metadata columns to expose a message's topic, partition number, and offset.
+ * <p>
+ * A metadata column could also be the result of a transform applied to a value in the row. For
+ * example, a partition value produced by bucket(id, 16) could be exposed by a metadata column. In
+ * this case, {@link #transform()} should return a non-null {@link Transform} that produced the
+ * metadata column's values.
+ */
+@Evolving
+public interface MetadataColumn {
+  /**
+   * The name of this metadata column.
+   *
+   * @return a String name
+   */
+  String name();
+
+  /**
+   * The data type of values in this metadata column.
+   *
+   * @return a {@link DataType}
+   */
+  DataType dataType();
+
+  /**
+   * @return whether values produced by this metadata column may be null
+   */
+  default boolean isNullable() {
+    return true;
+  }
+
+  /**
+   * Documentation for this metadata column, or null.
+   *
+   * @return a documentation String
+   */
+  default String comment() {
+    return null;
+  }
+
+  /**
+   * The {@link Transform} used to produce this metadata column from data rows, or null.
+   *
+   * @return a {@link Transform} used to produce the column's values, or null if there isn't one
+   */
+  default Transform transform() {
+    return null;
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java
new file mode 100644
index 0000000000000..208abfc302582
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.read.SupportsPushDownRequiredColumns;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * An interface for exposing data columns for a table that are not in the table schema. For example,
+ * a file source could expose a "file" column that contains the path of the file that contained each
+ * row.
+ * <p>
+ * The columns returned by {@link #metadataColumns()} may be passed as {@link StructField} in
+ * requested projections. Sources that implement this interface and column projection using
+ * {@link SupportsPushDownRequiredColumns} must accept metadata fields passed to
+ * {@link SupportsPushDownRequiredColumns#pruneColumns(StructType)}.
+ * <p>
+ * If a table column and a metadata column have the same name, the metadata column will never be
+ * requested. It is recommended that Table implementations reject data column name that conflict
+ * with metadata column names.
+ */
+@Evolving
+public interface SupportsMetadataColumns extends Table {
+  /**
+   * Metadata columns that are supported by this {@link Table}.
+   * <p>
+   * The columns returned by this method may be passed as {@link StructField} in requested
+   * projections using {@link SupportsPushDownRequiredColumns#pruneColumns(StructType)}.
+   * <p>
+   * If a table column and a metadata column have the same name, the metadata column will never be
+   * requested and is ignored. It is recommended that Table implementations reject data column names
+   * that conflict with metadata column names.
+   *
+   * @return an array of {@link MetadataColumn}
+   */
+  MetadataColumn[] metadataColumns();
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SQLConfHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SQLConfHelper.scala
new file mode 100644
index 0000000000000..cee35cdb8d840
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SQLConfHelper.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst
+
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * Trait for getting the active SQLConf.
+ */
+trait SQLConfHelper {
+
+  /**
+   * The active config object within the current scope.
+   * See [[SQLConf.get]] for more information.
+   */
+  def conf: SQLConf = SQLConf.get
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index c65e181181e83..53c7f17ee6b2e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -30,7 +30,6 @@ import org.apache.spark.sql.catalyst.expressions.objects._
 import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
-import org.apache.spark.util.Utils
 
 
 /**
@@ -894,10 +893,6 @@ trait ScalaReflection extends Logging {
 
   import universe._
 
-  // The Predef.Map is scala.collection.immutable.Map.
-  // Since the map values can be mutable, we explicitly import scala.collection.Map at here.
-  import scala.collection.Map
-
   /**
    * Any codes calling `scala.reflect.api.Types.TypeApi.<:<` should be wrapped by this method to
    * clean up the Scala reflection garbage automatically. Otherwise, it will leak some objects to
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 690d66bec890d..8d95d8cf49d45 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -58,15 +58,14 @@ import org.apache.spark.util.Utils
  */
 object SimpleAnalyzer extends Analyzer(
   new CatalogManager(
-    new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true),
     FakeV2SessionCatalog,
     new SessionCatalog(
       new InMemoryCatalog,
-      EmptyFunctionRegistry,
-      new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true)) {
+      EmptyFunctionRegistry) {
       override def createDatabase(dbDefinition: CatalogDatabase, ignoreIfExists: Boolean): Unit = {}
-    }),
-  new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true))
+    })) {
+  override def resolver: Resolver = caseSensitiveResolution
+}
 
 object FakeV2SessionCatalog extends TableCatalog {
   private def fail() = throw new UnsupportedOperationException
@@ -130,10 +129,8 @@ object AnalysisContext {
  * Provides a logical query plan analyzer, which translates [[UnresolvedAttribute]]s and
  * [[UnresolvedRelation]]s into fully typed objects using information in a [[SessionCatalog]].
  */
-class Analyzer(
-    override val catalogManager: CatalogManager,
-    conf: SQLConf)
-  extends RuleExecutor[LogicalPlan] with CheckAnalysis with LookupCatalog {
+class Analyzer(override val catalogManager: CatalogManager)
+  extends RuleExecutor[LogicalPlan] with CheckAnalysis with LookupCatalog with SQLConfHelper {
 
   private val v1SessionCatalog: SessionCatalog = catalogManager.v1SessionCatalog
 
@@ -144,10 +141,8 @@ class Analyzer(
   override def isView(nameParts: Seq[String]): Boolean = v1SessionCatalog.isView(nameParts)
 
   // Only for tests.
-  def this(catalog: SessionCatalog, conf: SQLConf) = {
-    this(
-      new CatalogManager(conf, FakeV2SessionCatalog, catalog),
-      conf)
+  def this(catalog: SessionCatalog) = {
+    this(new CatalogManager(FakeV2SessionCatalog, catalog))
   }
 
   def executeAndCheck(plan: LogicalPlan, tracker: QueryPlanningTracker): LogicalPlan = {
@@ -226,6 +221,7 @@ class Analyzer(
       ResolveRelations ::
       ResolveTables ::
       ResolvePartitionSpec ::
+      AddMetadataColumns ::
       ResolveReferences ::
       ResolveCreateNamedStruct ::
       ResolveDeserializer ::
@@ -921,6 +917,29 @@ class Analyzer(
     }
   }
 
+  /**
+   * Adds metadata columns to output for child relations when nodes are missing resolved attributes.
+   *
+   * References to metadata columns are resolved using columns from [[LogicalPlan.metadataOutput]],
+   * but the relation's output does not include the metadata columns until the relation is replaced
+   * using [[DataSourceV2Relation.withMetadataColumns()]]. Unless this rule adds metadata to the
+   * relation's output, the analyzer will detect that nothing produces the columns.
+   *
+   * This rule only adds metadata columns when a node is resolved but is missing input from its
+   * children. This ensures that metadata columns are not added to the plan unless they are used. By
+   * checking only resolved nodes, this ensures that * expansion is already done so that metadata
+   * columns are not accidentally selected by *.
+   */
+  object AddMetadataColumns extends Rule[LogicalPlan] {
+    def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperatorsUp {
+      case node if node.resolved && node.children.nonEmpty && node.missingInput.nonEmpty =>
+        node resolveOperatorsUp {
+          case rel: DataSourceV2Relation =>
+            rel.withMetadataColumns()
+        }
+    }
+  }
+
   /**
    * Resolve table relations with concrete relations from v2 catalog.
    *
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
index 6eed152e6dd77..47a45b0e529c6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.Literal._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
index d3bb72badeb13..deeb8215d22c6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, LookupCatalog, SupportsNamespaces, TableCatalog, TableChange}
+import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, LookupCatalog, TableCatalog, TableChange}
 
 /**
  * Resolves catalogs from the multi-part identifiers in SQL statements, and convert the statements
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index 814ea8c9768ae..7dcc6a81b48cd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -304,7 +304,7 @@ object UnsupportedOperationChecker extends Logging {
 
             case LeftAnti =>
               if (right.isStreaming) {
-                throwError("Left anti joins with a streaming DataFrame/Dataset " +
+                throwError(s"$LeftAnti joins with a streaming DataFrame/Dataset " +
                     "on the right are not supported")
               }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
index 51eb3d033ddc4..2fa6bf0acea67 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
@@ -21,7 +21,6 @@ import org.apache.spark.sql.catalyst.catalog.SessionCatalog
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.DataType
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/timeZoneAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/timeZoneAnalysis.scala
index d8062744a4264..9234b58eb9f6e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/timeZoneAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/timeZoneAnalysis.scala
@@ -16,10 +16,10 @@
  */
 package org.apache.spark.sql.catalyst.analysis
 
+import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, ListQuery, TimeZoneAwareExpression}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.DataType
 
 /**
@@ -47,10 +47,7 @@ object ResolveTimeZone extends Rule[LogicalPlan] {
  * Mix-in trait for constructing valid [[Cast]] expressions.
  */
 trait CastSupport {
-  /**
-   * Configuration used to create a valid cast expression.
-   */
-  def conf: SQLConf
+  self: SQLConfHelper =>
 
   /**
    * Create a Cast expression with the session local time zone.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
index 83acfb8d4a71c..98bd84fb94bd6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
@@ -18,11 +18,10 @@
 package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.catalog.CatalogFunction
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan}
-import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, SupportsNamespaces, Table, TableCatalog}
+import org.apache.spark.sql.catalyst.plans.logical.LeafNode
+import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, Table, TableCatalog}
 
 /**
  * Holds the name of a namespace that has yet to be looked up in a catalog. It will be resolved to
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala
index 65601640fa044..06de023098a1c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.catalyst.expressions.Alias
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, View}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.internal.SQLConf
 
 /**
  * This file defines view types and analysis rules related to views.
@@ -54,8 +53,6 @@ import org.apache.spark.sql.internal.SQLConf
  * completely resolved during the batch of Resolution.
  */
 object EliminateView extends Rule[LogicalPlan] with CastSupport {
-  override def conf: SQLConf = SQLConf.get
-
   override def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
     // The child has the different output attributes with the View operator. Adds a Project over
     // the child of the view.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
index db930cf7890e6..5643bf8b3a9b7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.catalog
 
-import org.apache.spark.sql.catalyst.analysis.{FunctionAlreadyExistsException, NoSuchDatabaseException, NoSuchFunctionException, NoSuchPartitionException, NoSuchTableException}
+import org.apache.spark.sql.catalyst.analysis.{FunctionAlreadyExistsException, NoSuchDatabaseException, NoSuchFunctionException, NoSuchTableException}
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.types.StructType
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index c00d51dc3df1f..17ab6664df75c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -61,10 +61,11 @@ class SessionCatalog(
     externalCatalogBuilder: () => ExternalCatalog,
     globalTempViewManagerBuilder: () => GlobalTempViewManager,
     functionRegistry: FunctionRegistry,
-    conf: SQLConf,
     hadoopConf: Configuration,
     parser: ParserInterface,
-    functionResourceLoader: FunctionResourceLoader) extends Logging {
+    functionResourceLoader: FunctionResourceLoader,
+    cacheSize: Int = SQLConf.get.tableRelationCacheSize,
+    cacheTTL: Long = SQLConf.get.metadataCacheTTL) extends SQLConfHelper with Logging {
   import SessionCatalog._
   import CatalogTypes.TablePartitionSpec
 
@@ -77,18 +78,21 @@ class SessionCatalog(
       () => externalCatalog,
       () => new GlobalTempViewManager(conf.getConf(GLOBAL_TEMP_DATABASE)),
       functionRegistry,
-      conf,
       new Configuration(),
       new CatalystSqlParser(),
-      DummyFunctionResourceLoader)
+      DummyFunctionResourceLoader,
+      conf.tableRelationCacheSize,
+      conf.metadataCacheTTL)
+  }
+
+  // For testing only.
+  def this(externalCatalog: ExternalCatalog, functionRegistry: FunctionRegistry) = {
+    this(externalCatalog, functionRegistry, SQLConf.get)
   }
 
   // For testing only.
   def this(externalCatalog: ExternalCatalog) = {
-    this(
-      externalCatalog,
-      new SimpleFunctionRegistry,
-      new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true))
+    this(externalCatalog, new SimpleFunctionRegistry)
   }
 
   lazy val externalCatalog = externalCatalogBuilder()
@@ -136,9 +140,6 @@ class SessionCatalog(
   }
 
   private val tableRelationCache: Cache[QualifiedTableName, LogicalPlan] = {
-    val cacheSize = conf.tableRelationCacheSize
-    val cacheTTL = conf.metadataCacheTTL
-
     var builder = CacheBuilder.newBuilder()
       .maximumSize(cacheSize)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 9c93691ca3b41..ee7216e93ebb5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -28,7 +28,7 @@ import org.apache.commons.lang3.StringUtils
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow, TableIdentifier}
+import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow, SQLConfHelper, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, Cast, ExprId, Literal}
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -177,8 +177,7 @@ case class CatalogTablePartition(
 case class BucketSpec(
     numBuckets: Int,
     bucketColumnNames: Seq[String],
-    sortColumnNames: Seq[String]) {
-  def conf: SQLConf = SQLConf.get
+    sortColumnNames: Seq[String]) extends SQLConfHelper {
 
   if (numBuckets <= 0 || numBuckets > conf.bucketingMaxBuckets) {
     throw new AnalysisException(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index b61c4b8d065f2..4cd649b07a5c0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.expressions.objects.Invoke
 import org.apache.spark.sql.catalyst.plans.{Inner, JoinType}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
 
 /**
  * A collection of implicit conversions that create a DSL for constructing catalyst data structures.
@@ -102,6 +103,10 @@ package object dsl {
     def like(other: Expression, escapeChar: Char = '\\'): Expression =
       Like(expr, other, escapeChar)
     def rlike(other: Expression): Expression = RLike(expr, other)
+    def likeAll(others: Expression*): Expression =
+      LikeAll(expr, others.map(_.eval(EmptyRow).asInstanceOf[UTF8String]))
+    def notLikeAll(others: Expression*): Expression =
+      NotLikeAll(expr, others.map(_.eval(EmptyRow).asInstanceOf[UTF8String]))
     def contains(other: Expression): Expression = Contains(expr, other)
     def startsWith(other: Expression): Expression = StartsWith(expr, other)
     def endsWith(other: Expression): Expression = EndsWith(expr, other)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
index 3d5c1855f6975..9ab38044e6a88 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.catalyst.encoders
 
-import java.io.ObjectInputStream
-
 import scala.reflect.ClassTag
 import scala.reflect.runtime.universe.{typeTag, TypeTag}
 
@@ -33,7 +31,7 @@ import org.apache.spark.sql.catalyst.expressions.objects.{AssertNotNull, Initial
 import org.apache.spark.sql.catalyst.optimizer.{ReassignLambdaVariableID, SimplifyCasts}
 import org.apache.spark.sql.catalyst.plans.logical.{CatalystSerde, DeserializeToObject, LeafNode, LocalRelation}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{DataType, ObjectType, StringType, StructField, StructType}
+import org.apache.spark.sql.types.{ObjectType, StringType, StructField, StructType}
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.Utils
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 4af12d61e86d9..5afc308e52ead 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import java.math.{BigDecimal => JavaBigDecimal}
 import java.time.ZoneId
 import java.util.Locale
 import java.util.concurrent.TimeUnit._
@@ -25,6 +24,7 @@ import java.util.concurrent.TimeUnit._
 import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion}
+import org.apache.spark.sql.catalyst.expressions.Cast.{forceNullable, resolvableNullability}
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.util._
@@ -258,13 +258,18 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
 
   def dataType: DataType
 
+  /**
+   * Returns true iff we can cast `from` type to `to` type.
+   */
+  def canCast(from: DataType, to: DataType): Boolean
+
   override def toString: String = {
     val ansi = if (ansiEnabled) "ansi_" else ""
     s"${ansi}cast($child as ${dataType.simpleString})"
   }
 
   override def checkInputDataTypes(): TypeCheckResult = {
-    if (Cast.canCast(child.dataType, dataType)) {
+    if (canCast(child.dataType, dataType)) {
       TypeCheckResult.TypeCheckSuccess
     } else {
       TypeCheckResult.TypeCheckFailure(
@@ -1753,6 +1758,12 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
     copy(timeZoneId = Option(timeZoneId))
 
   override protected val ansiEnabled: Boolean = SQLConf.get.ansiEnabled
+
+  override def canCast(from: DataType, to: DataType): Boolean = if (ansiEnabled) {
+    AnsiCast.canCast(from, to)
+  } else {
+    Cast.canCast(from, to)
+  }
 }
 
 /**
@@ -1770,6 +1781,110 @@ case class AnsiCast(child: Expression, dataType: DataType, timeZoneId: Option[St
     copy(timeZoneId = Option(timeZoneId))
 
   override protected val ansiEnabled: Boolean = true
+
+  override def canCast(from: DataType, to: DataType): Boolean = AnsiCast.canCast(from, to)
+}
+
+object AnsiCast {
+  /**
+   * As per section 6.13 "cast specification" in "Information technology — Database languages " +
+   * "- SQL — Part 2: Foundation (SQL/Foundation)":
+   * If the <cast operand> is a <value expression>, then the valid combinations of TD and SD
+   * in a <cast specification> are given by the following table. “Y” indicates that the
+   * combination is syntactically valid without restriction; “M” indicates that the combination
+   * is valid subject to other Syntax Rules in this Sub- clause being satisfied; and “N” indicates
+   * that the combination is not valid:
+   * SD                   TD
+   *     EN AN C D T TS YM DT BO UDT B RT CT RW
+   * EN  Y  Y  Y N N  N  M  M  N   M N  M  N N
+   * AN  Y  Y  Y N N  N  N  N  N   M N  M  N N
+   * C   Y  Y  Y Y Y  Y  Y  Y  Y   M N  M  N N
+   * D   N  N  Y Y N  Y  N  N  N   M N  M  N N
+   * T   N  N  Y N Y  Y  N  N  N   M N  M  N N
+   * TS  N  N  Y Y Y  Y  N  N  N   M N  M  N N
+   * YM  M  N  Y N N  N  Y  N  N   M N  M  N N
+   * DT  M  N  Y N N  N  N  Y  N   M N  M  N N
+   * BO  N  N  Y N N  N  N  N  Y   M N  M  N N
+   * UDT M  M  M M M  M  M  M  M   M M  M  M N
+   * B   N  N  N N N  N  N  N  N   M Y  M  N N
+   * RT  M  M  M M M  M  M  M  M   M M  M  N N
+   * CT  N  N  N N N  N  N  N  N   M N  N  M N
+   * RW  N  N  N N N  N  N  N  N   N N  N  N M
+   *
+   * Where:
+   *   EN  = Exact Numeric
+   *   AN  = Approximate Numeric
+   *   C   = Character (Fixed- or Variable-Length, or Character Large Object)
+   *   D   = Date
+   *   T   = Time
+   *   TS  = Timestamp
+   *   YM  = Year-Month Interval
+   *   DT  = Day-Time Interval
+   *   BO  = Boolean
+   *   UDT  = User-Defined Type
+   *   B   = Binary (Fixed- or Variable-Length or Binary Large Object)
+   *   RT  = Reference type
+   *   CT  = Collection type
+   *   RW  = Row type
+   *
+   * Spark's ANSI mode follows the syntax rules, except it specially allow the following
+   * straightforward type conversions which are disallowed as per the SQL standard:
+   *   - Numeric <=> Boolean
+   *   - String <=> Binary
+   */
+  def canCast(from: DataType, to: DataType): Boolean = (from, to) match {
+    case (fromType, toType) if fromType == toType => true
+
+    case (NullType, _) => true
+
+    case (StringType, _: BinaryType) => true
+
+    case (StringType, BooleanType) => true
+    case (_: NumericType, BooleanType) => true
+
+    case (StringType, TimestampType) => true
+    case (DateType, TimestampType) => true
+
+    case (StringType, _: CalendarIntervalType) => true
+
+    case (StringType, DateType) => true
+    case (TimestampType, DateType) => true
+
+    case (_: NumericType, _: NumericType) => true
+    case (StringType, _: NumericType) => true
+    case (BooleanType, _: NumericType) => true
+
+    case (_: NumericType, StringType) => true
+    case (_: DateType, StringType) => true
+    case (_: TimestampType, StringType) => true
+    case (_: CalendarIntervalType, StringType) => true
+    case (BooleanType, StringType) => true
+    case (BinaryType, StringType) => true
+
+    case (ArrayType(fromType, fn), ArrayType(toType, tn)) =>
+      canCast(fromType, toType) &&
+        resolvableNullability(fn || forceNullable(fromType, toType), tn)
+
+    case (MapType(fromKey, fromValue, fn), MapType(toKey, toValue, tn)) =>
+      canCast(fromKey, toKey) &&
+        (!forceNullable(fromKey, toKey)) &&
+        canCast(fromValue, toValue) &&
+        resolvableNullability(fn || forceNullable(fromValue, toValue), tn)
+
+    case (StructType(fromFields), StructType(toFields)) =>
+      fromFields.length == toFields.length &&
+        fromFields.zip(toFields).forall {
+          case (fromField, toField) =>
+            canCast(fromField.dataType, toField.dataType) &&
+              resolvableNullability(
+                fromField.nullable || forceNullable(fromField.dataType, toField.dataType),
+                toField.nullable)
+        }
+
+    case (udt1: UserDefinedType[_], udt2: UserDefinedType[_]) if udt2.acceptsType(udt1) => true
+
+    case _ => false
+  }
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 35b192cc5544a..1d23953484046 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -24,9 +24,7 @@ import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult
 import org.apache.spark.sql.catalyst.expressions.aggregate.DeclarativeAggregate
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.trees.TreeNode
-import org.apache.spark.sql.catalyst.util.toPrettySQL
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -63,7 +61,8 @@ import org.apache.spark.sql.types._
  *                            functions.
  * - [[NamedExpression]]: An [[Expression]] that is named.
  * - [[TimeZoneAwareExpression]]: A common base trait for time zone aware expressions.
- * - [[SubqueryExpression]]: A base interface for expressions that contain a [[LogicalPlan]].
+ * - [[SubqueryExpression]]: A base interface for expressions that contain a
+ *                           [[org.apache.spark.sql.catalyst.plans.logical.LogicalPlan]].
  *
  * - [[LeafExpression]]: an expression that has no child.
  * - [[UnaryExpression]]: an expression that has one child.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala
index a2daec0b1ade1..91c9457af7de3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences
 import org.apache.spark.sql.catalyst.expressions.aggregate.NoOp
+import org.apache.spark.sql.internal.SQLConf
 
 
 /**
@@ -33,6 +34,15 @@ class InterpretedMutableProjection(expressions: Seq[Expression]) extends Mutable
   def this(expressions: Seq[Expression], inputSchema: Seq[Attribute]) =
     this(bindReferences(expressions, inputSchema))
 
+  private[this] val subExprEliminationEnabled = SQLConf.get.subexpressionEliminationEnabled
+  private[this] lazy val runtime =
+    new SubExprEvaluationRuntime(SQLConf.get.subexpressionEliminationCacheMaxEntries)
+  private[this] val exprs = if (subExprEliminationEnabled) {
+    runtime.proxyExpressions(expressions)
+  } else {
+    expressions
+  }
+
   private[this] val buffer = new Array[Any](expressions.size)
 
   override def initialize(partitionIndex: Int): Unit = {
@@ -76,11 +86,15 @@ class InterpretedMutableProjection(expressions: Seq[Expression]) extends Mutable
   }.toArray
 
   override def apply(input: InternalRow): InternalRow = {
+    if (subExprEliminationEnabled) {
+      runtime.setInput(input)
+    }
+
     var i = 0
     while (i < validExprs.length) {
-      val (expr, ordinal) = validExprs(i)
+      val (_, ordinal) = validExprs(i)
       // Store the result into buffer first, to make the projection atomic (needed by aggregation)
-      buffer(ordinal) = expr.eval(input)
+      buffer(ordinal) = exprs(ordinal).eval(input)
       i += 1
     }
     i = 0
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedSafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedSafeProjection.scala
index 70789dac1d87a..0e71892db666b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedSafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedSafeProjection.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.aggregate.NoOp
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, GenericArrayData, MapData}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 
@@ -30,6 +31,15 @@ import org.apache.spark.sql.types._
  */
 class InterpretedSafeProjection(expressions: Seq[Expression]) extends Projection {
 
+  private[this] val subExprEliminationEnabled = SQLConf.get.subexpressionEliminationEnabled
+  private[this] lazy val runtime =
+    new SubExprEvaluationRuntime(SQLConf.get.subexpressionEliminationCacheMaxEntries)
+  private[this] val exprs = if (subExprEliminationEnabled) {
+    runtime.proxyExpressions(expressions)
+  } else {
+    expressions
+  }
+
   private[this] val mutableRow = new SpecificInternalRow(expressions.map(_.dataType))
 
   private[this] val exprsWithWriters = expressions.zipWithIndex.filter {
@@ -49,7 +59,7 @@ class InterpretedSafeProjection(expressions: Seq[Expression]) extends Projection
         }
       }
     }
-    (e, f)
+    (exprs(i), f)
   }
 
   private def generateSafeValueConverter(dt: DataType): Any => Any = dt match {
@@ -97,6 +107,10 @@ class InterpretedSafeProjection(expressions: Seq[Expression]) extends Projection
   }
 
   override def apply(row: InternalRow): InternalRow = {
+    if (subExprEliminationEnabled) {
+      runtime.setInput(row)
+    }
+
     var i = 0
     while (i < exprsWithWriters.length) {
       val (expr, writer) = exprsWithWriters(i)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala
index 39a16e917c4a5..f3ca4f06cd372 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala
@@ -20,6 +20,7 @@ import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen.{UnsafeArrayWriter, UnsafeRowWriter, UnsafeWriter}
 import org.apache.spark.sql.catalyst.util.ArrayData
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{UserDefinedType, _}
 import org.apache.spark.unsafe.Platform
 
@@ -33,6 +34,15 @@ import org.apache.spark.unsafe.Platform
 class InterpretedUnsafeProjection(expressions: Array[Expression]) extends UnsafeProjection {
   import InterpretedUnsafeProjection._
 
+  private[this] val subExprEliminationEnabled = SQLConf.get.subexpressionEliminationEnabled
+  private[this] lazy val runtime =
+    new SubExprEvaluationRuntime(SQLConf.get.subexpressionEliminationCacheMaxEntries)
+  private[this] val exprs = if (subExprEliminationEnabled) {
+    runtime.proxyExpressions(expressions)
+  } else {
+    expressions.toSeq
+  }
+
   /** Number of (top level) fields in the resulting row. */
   private[this] val numFields = expressions.length
 
@@ -63,17 +73,21 @@ class InterpretedUnsafeProjection(expressions: Array[Expression]) extends Unsafe
   }
 
   override def initialize(partitionIndex: Int): Unit = {
-    expressions.foreach(_.foreach {
+    exprs.foreach(_.foreach {
       case n: Nondeterministic => n.initialize(partitionIndex)
       case _ =>
     })
   }
 
   override def apply(row: InternalRow): UnsafeRow = {
+    if (subExprEliminationEnabled) {
+      runtime.setInput(row)
+    }
+
     // Put the expression results in the intermediate row.
     var i = 0
     while (i < numFields) {
-      values(i) = expressions(i).eval(row)
+      values(i) = exprs(i).eval(row)
       i += 1
     }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala
index 6f1d9d065ab1a..241c761624b76 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala
@@ -55,8 +55,8 @@ case class ProjectionOverSchema(schema: StructType) {
         getProjection(child).map { projection => MapKeys(projection) }
       case MapValues(child) =>
         getProjection(child).map { projection => MapValues(projection) }
-      case GetMapValue(child, key) =>
-        getProjection(child).map { projection => GetMapValue(projection, key) }
+      case GetMapValue(child, key, failOnError) =>
+        getProjection(child).map { projection => GetMapValue(projection, key, failOnError) }
       case GetStructFieldObject(child, field: StructField) =>
         getProjection(child).map(p => (p, p.dataType)).map {
           case (projection, projSchema: StructType) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
index 6e2bd96784b94..0a69d5aa6b9ad 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.types.{AbstractDataType, AnyDataType, DataType, UserDefinedType}
+import org.apache.spark.sql.types.{AbstractDataType, AnyDataType, DataType}
 import org.apache.spark.util.Utils
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala
index adcc4be10687e..f2acb75ea6ac4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala
@@ -91,7 +91,7 @@ object SelectedField {
         }
         val newField = StructField(field.name, newFieldDataType, field.nullable)
         selectField(child, Option(ArrayType(struct(newField), containsNull)))
-      case GetMapValue(child, _) =>
+      case GetMapValue(child, _, _) =>
         // GetMapValue does not select a field from a struct (i.e. prune the struct) so it can't be
         // the top-level extractor. However it can be part of an extractor chain.
         val MapType(keyType, _, valueContainsNull) = child.dataType
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntime.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntime.scala
new file mode 100644
index 0000000000000..3189d81289903
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntime.scala
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.expressions
+
+import java.util.IdentityHashMap
+
+import scala.collection.JavaConverters._
+
+import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache}
+import com.google.common.util.concurrent.{ExecutionError, UncheckedExecutionException}
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
+import org.apache.spark.sql.types.DataType
+
+/**
+ * This class helps subexpression elimination for interpreted evaluation
+ * such as `InterpretedUnsafeProjection`. It maintains an evaluation cache.
+ * This class wraps `ExpressionProxy` around given expressions. The `ExpressionProxy`
+ * intercepts expression evaluation and loads from the cache first.
+ */
+class SubExprEvaluationRuntime(cacheMaxEntries: Int) {
+  // The id assigned to `ExpressionProxy`. `SubExprEvaluationRuntime` will use assigned ids of
+  // `ExpressionProxy` to decide the equality when loading from cache. `SubExprEvaluationRuntime`
+  // won't be use by multi-threads so we don't need to consider concurrency here.
+  private var proxyExpressionCurrentId = 0
+
+  private[sql] val cache: LoadingCache[ExpressionProxy, ResultProxy] = CacheBuilder.newBuilder()
+    .maximumSize(cacheMaxEntries)
+    .build(
+      new CacheLoader[ExpressionProxy, ResultProxy]() {
+        override def load(expr: ExpressionProxy): ResultProxy = {
+          ResultProxy(expr.proxyEval(currentInput))
+        }
+      })
+
+  private var currentInput: InternalRow = null
+
+  def getEval(proxy: ExpressionProxy): Any = try {
+    cache.get(proxy).result
+  } catch {
+    // Cache.get() may wrap the original exception. See the following URL
+    // http://google.github.io/guava/releases/14.0/api/docs/com/google/common/cache/
+    //   Cache.html#get(K,%20java.util.concurrent.Callable)
+    case e @ (_: UncheckedExecutionException | _: ExecutionError) =>
+      throw e.getCause
+  }
+
+  /**
+   * Sets given input row as current row for evaluating expressions. This cleans up the cache
+   * too as new input comes.
+   */
+  def setInput(input: InternalRow = null): Unit = {
+    currentInput = input
+    cache.invalidateAll()
+  }
+
+  /**
+   * Recursively replaces expression with its proxy expression in `proxyMap`.
+   */
+  private def replaceWithProxy(
+      expr: Expression,
+      proxyMap: IdentityHashMap[Expression, ExpressionProxy]): Expression = {
+    if (proxyMap.containsKey(expr)) {
+      proxyMap.get(expr)
+    } else {
+      expr.mapChildren(replaceWithProxy(_, proxyMap))
+    }
+  }
+
+  /**
+   * Finds subexpressions and wraps them with `ExpressionProxy`.
+   */
+  def proxyExpressions(expressions: Seq[Expression]): Seq[Expression] = {
+    val equivalentExpressions: EquivalentExpressions = new EquivalentExpressions
+
+    expressions.foreach(equivalentExpressions.addExprTree(_))
+
+    val proxyMap = new IdentityHashMap[Expression, ExpressionProxy]
+
+    val commonExprs = equivalentExpressions.getAllEquivalentExprs.filter(_.size > 1)
+    commonExprs.foreach { e =>
+      val expr = e.head
+      val proxy = ExpressionProxy(expr, proxyExpressionCurrentId, this)
+      proxyExpressionCurrentId += 1
+
+      proxyMap.putAll(e.map(_ -> proxy).toMap.asJava)
+    }
+
+    // Only adding proxy if we find subexpressions.
+    if (!proxyMap.isEmpty) {
+      expressions.map(replaceWithProxy(_, proxyMap))
+    } else {
+      expressions
+    }
+  }
+}
+
+/**
+ * A proxy for an catalyst `Expression`. Given a runtime object `SubExprEvaluationRuntime`,
+ * when this is asked to evaluate, it will load from the evaluation cache in the runtime first.
+ */
+case class ExpressionProxy(
+    child: Expression,
+    id: Int,
+    runtime: SubExprEvaluationRuntime) extends Expression {
+
+  final override def dataType: DataType = child.dataType
+  final override def nullable: Boolean = child.nullable
+  final override def children: Seq[Expression] = child :: Nil
+
+  // `ExpressionProxy` is for interpreted expression evaluation only. So cannot `doGenCode`.
+  final override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
+    throw new UnsupportedOperationException(s"Cannot generate code for expression: $this")
+
+  def proxyEval(input: InternalRow = null): Any = child.eval(input)
+
+  override def eval(input: InternalRow = null): Any = runtime.getEval(this)
+
+  override def equals(obj: Any): Boolean = obj match {
+    case other: ExpressionProxy => this.id == other.id
+    case _ => false
+  }
+
+  override def hashCode(): Int = this.id.hashCode()
+}
+
+/**
+ * A simple wrapper for holding `Any` in the cache of `SubExprEvaluationRuntime`.
+ */
+case class ResultProxy(result: Any)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 9aa827a58d87a..1ff4a93cf0acd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -38,9 +38,8 @@ import org.apache.spark.metrics.source.CodegenMetrics
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData, MapData, SQLOrderingUtil}
+import org.apache.spark.sql.catalyst.util.{ArrayData, MapData, SQLOrderingUtil}
 import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_MILLIS
-import org.apache.spark.sql.catalyst.util.DateTimeUtils._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.Platform
@@ -1555,8 +1554,8 @@ object CodeGenerator extends Logging {
   }
 
   /**
-   * Generates code creating a [[UnsafeArrayData]] or [[GenericArrayData]] based on
-   * given parameters.
+   * Generates code creating a [[UnsafeArrayData]] or
+   * [[org.apache.spark.sql.catalyst.util.GenericArrayData]] based on given parameters.
    *
    * @param arrayName name of the array to create
    * @param elementType data type of the elements in source array
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
index 7404030b661c8..c246d07f189b4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.catalyst.expressions.codegen
 
-import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala
index 070570d8f20b2..27b1f89f70870 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala
@@ -17,12 +17,8 @@
 
 package org.apache.spark.sql.catalyst.expressions.codegen
 
-import scala.collection.mutable
-import scala.collection.mutable.ArrayBuffer
-
 import org.apache.spark.sql.catalyst.expressions.{Attribute, UnsafeRow}
 import org.apache.spark.sql.types.StructType
-import org.apache.spark.unsafe.Platform
 
 abstract class UnsafeRowJoiner {
   def join(row1: UnsafeRow, row2: UnsafeRow): UnsafeRow
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index ee98ebf5a8a50..0765bfdd78fa6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -1911,7 +1911,9 @@ case class ArrayPosition(left: Expression, right: Expression)
       If `spark.sql.ansi.enabled` is set to true, it throws ArrayIndexOutOfBoundsException
       for invalid indices.
 
-    _FUNC_(map, key) - Returns value for given key, or NULL if the key is not contained in the map
+    _FUNC_(map, key) - Returns value for given key. The function returns NULL
+      if the key is not contained in the map and `spark.sql.ansi.enabled` is set to false.
+      If `spark.sql.ansi.enabled` is set to true, it throws NoSuchElementException instead.
   """,
   examples = """
     Examples:
@@ -1931,6 +1933,9 @@ case class ElementAt(
 
   @transient private lazy val mapKeyType = left.dataType.asInstanceOf[MapType].keyType
 
+  @transient private lazy val mapValueContainsNull =
+    left.dataType.asInstanceOf[MapType].valueContainsNull
+
   @transient private lazy val arrayContainsNull = left.dataType.asInstanceOf[ArrayType].containsNull
 
   @transient private lazy val ordering: Ordering[Any] = TypeUtils.getInterpretedOrdering(mapKeyType)
@@ -1989,7 +1994,7 @@ case class ElementAt(
   override def nullable: Boolean = left.dataType match {
     case _: ArrayType =>
       computeNullabilityFromArray(left, right, failOnError, nullability)
-    case _: MapType => true
+    case _: MapType => if (failOnError) mapValueContainsNull else true
   }
 
   override def nullSafeEval(value: Any, ordinal: Any): Any = doElementAt(value, ordinal)
@@ -2022,7 +2027,7 @@ case class ElementAt(
         }
       }
     case _: MapType =>
-      (value, ordinal) => getValueEval(value, ordinal, mapKeyType, ordering)
+      (value, ordinal) => getValueEval(value, ordinal, mapKeyType, ordering, failOnError)
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
@@ -2069,7 +2074,7 @@ case class ElementAt(
            """.stripMargin
         })
       case _: MapType =>
-        doGetValueGenCode(ctx, ev, left.dataType.asInstanceOf[MapType])
+        doGetValueGenCode(ctx, ev, left.dataType.asInstanceOf[MapType], failOnError)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
index 363d388692c9f..767650d022200 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
@@ -336,7 +336,12 @@ trait GetArrayItemUtil {
 trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes {
 
   // todo: current search is O(n), improve it.
-  def getValueEval(value: Any, ordinal: Any, keyType: DataType, ordering: Ordering[Any]): Any = {
+  def getValueEval(
+      value: Any,
+      ordinal: Any,
+      keyType: DataType,
+      ordering: Ordering[Any],
+      failOnError: Boolean): Any = {
     val map = value.asInstanceOf[MapData]
     val length = map.numElements()
     val keys = map.keyArray()
@@ -352,14 +357,24 @@ trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes {
       }
     }
 
-    if (!found || values.isNullAt(i)) {
+    if (!found) {
+      if (failOnError) {
+        throw new NoSuchElementException(s"Key $ordinal does not exist.")
+      } else {
+        null
+      }
+    } else if (values.isNullAt(i)) {
       null
     } else {
       values.get(i, dataType)
     }
   }
 
-  def doGetValueGenCode(ctx: CodegenContext, ev: ExprCode, mapType: MapType): ExprCode = {
+  def doGetValueGenCode(
+      ctx: CodegenContext,
+      ev: ExprCode,
+      mapType: MapType,
+      failOnError: Boolean): ExprCode = {
     val index = ctx.freshName("index")
     val length = ctx.freshName("length")
     val keys = ctx.freshName("keys")
@@ -368,12 +383,22 @@ trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes {
     val values = ctx.freshName("values")
     val keyType = mapType.keyType
     val nullCheck = if (mapType.valueContainsNull) {
-      s" || $values.isNullAt($index)"
+      s"""else if ($values.isNullAt($index)) {
+            ${ev.isNull} = true;
+          }
+       """
     } else {
       ""
     }
+
     val keyJavaType = CodeGenerator.javaType(keyType)
     nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
+      val keyNotFoundBranch = if (failOnError) {
+        s"""throw new NoSuchElementException("Key " + $eval2 + " does not exist.");"""
+      } else {
+        s"${ev.isNull} = true;"
+      }
+
       s"""
         final int $length = $eval1.numElements();
         final ArrayData $keys = $eval1.keyArray();
@@ -390,9 +415,9 @@ trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes {
           }
         }
 
-        if (!$found$nullCheck) {
-          ${ev.isNull} = true;
-        } else {
+        if (!$found) {
+          $keyNotFoundBranch
+        } $nullCheck else {
           ${ev.value} = ${CodeGenerator.getValue(values, dataType, index)};
         }
       """
@@ -405,9 +430,14 @@ trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes {
  *
  * We need to do type checking here as `key` expression maybe unresolved.
  */
-case class GetMapValue(child: Expression, key: Expression)
+case class GetMapValue(
+    child: Expression,
+    key: Expression,
+    failOnError: Boolean = SQLConf.get.ansiEnabled)
   extends GetMapValueUtil with ExtractValue with NullIntolerant {
 
+  def this(child: Expression, key: Expression) = this(child, key, SQLConf.get.ansiEnabled)
+
   @transient private lazy val ordering: Ordering[Any] =
     TypeUtils.getInterpretedOrdering(keyType)
 
@@ -442,10 +472,10 @@ case class GetMapValue(child: Expression, key: Expression)
 
   // todo: current search is O(n), improve it.
   override def nullSafeEval(value: Any, ordinal: Any): Any = {
-    getValueEval(value, ordinal, keyType, ordering)
+    getValueEval(value, ordinal, keyType, ordering, failOnError)
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    doGetValueGenCode(ctx, ev, child.dataType.asInstanceOf[MapType])
+    doGetValueGenCode(ctx, ev, child.dataType.asInstanceOf[MapType], failOnError)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
index 9fef8e9415e72..4454afb6c099b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
@@ -23,7 +23,7 @@ import java.util.concurrent.atomic.AtomicReference
 import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion, UnresolvedAttribute, UnresolvedException}
+import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion, UnresolvedException}
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.internal.SQLConf
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index f440534745ba1..53d6394d0d1f1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -18,14 +18,11 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import scala.collection.immutable.TreeSet
-import scala.collection.mutable
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.catalyst.CatalystTypeConverters.convertToScala
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReference
-import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LeafNode, LogicalPlan, Project}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index c9dd7c7acddde..b4d9921488d5f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -20,10 +20,12 @@ package org.apache.spark.sql.catalyst.expressions
 import java.util.Locale
 import java.util.regex.{Matcher, MatchResult, Pattern}
 
+import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.commons.text.StringEscapeUtils
 
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
 import org.apache.spark.sql.catalyst.expressions.codegen._
@@ -178,6 +180,88 @@ case class Like(left: Expression, right: Expression, escapeChar: Char)
   }
 }
 
+/**
+ * Optimized version of LIKE ALL, when all pattern values are literal.
+ */
+abstract class LikeAllBase extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
+
+  protected def patterns: Seq[UTF8String]
+
+  protected def isNotLikeAll: Boolean
+
+  override def inputTypes: Seq[DataType] = StringType :: Nil
+
+  override def dataType: DataType = BooleanType
+
+  override def nullable: Boolean = true
+
+  private lazy val hasNull: Boolean = patterns.contains(null)
+
+  private lazy val cache = patterns.filterNot(_ == null)
+    .map(s => Pattern.compile(StringUtils.escapeLikeRegex(s.toString, '\\')))
+
+  private lazy val matchFunc = if (isNotLikeAll) {
+    (p: Pattern, inputValue: String) => !p.matcher(inputValue).matches()
+  } else {
+    (p: Pattern, inputValue: String) => p.matcher(inputValue).matches()
+  }
+
+  override def eval(input: InternalRow): Any = {
+    val exprValue = child.eval(input)
+    if (exprValue == null) {
+      null
+    } else {
+      if (cache.forall(matchFunc(_, exprValue.toString))) {
+        if (hasNull) null else true
+      } else {
+        false
+      }
+    }
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val eval = child.genCode(ctx)
+    val patternClass = classOf[Pattern].getName
+    val javaDataType = CodeGenerator.javaType(child.dataType)
+    val pattern = ctx.freshName("pattern")
+    val valueArg = ctx.freshName("valueArg")
+    val patternCache = ctx.addReferenceObj("patternCache", cache.asJava)
+
+    val checkNotMatchCode = if (isNotLikeAll) {
+      s"$pattern.matcher($valueArg.toString()).matches()"
+    } else {
+      s"!$pattern.matcher($valueArg.toString()).matches()"
+    }
+
+    ev.copy(code =
+      code"""
+            |${eval.code}
+            |boolean ${ev.isNull} = false;
+            |boolean ${ev.value} = true;
+            |if (${eval.isNull}) {
+            |  ${ev.isNull} = true;
+            |} else {
+            |  $javaDataType $valueArg = ${eval.value};
+            |  for ($patternClass $pattern: $patternCache) {
+            |    if ($checkNotMatchCode) {
+            |      ${ev.value} = false;
+            |      break;
+            |    }
+            |  }
+            |  if (${ev.value} && $hasNull) ${ev.isNull} = true;
+            |}
+      """.stripMargin)
+  }
+}
+
+case class LikeAll(child: Expression, patterns: Seq[UTF8String]) extends LikeAllBase {
+  override def isNotLikeAll: Boolean = false
+}
+
+case class NotLikeAll(child: Expression, patterns: Seq[UTF8String]) extends LikeAllBase {
+  override def isNotLikeAll: Boolean = true
+}
+
 // scalastyle:off line.contains.tab
 @ExpressionDescription(
   usage = "str _FUNC_ regexp - Returns true if `str` matches `regexp`, or false otherwise.",
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonFilters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonFilters.scala
index d6adbe83584e3..0d5974af19ac3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonFilters.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonFilters.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.catalyst.json
 
 import org.apache.spark.sql.catalyst.{InternalRow, StructFilters}
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources
 import org.apache.spark.sql.types.StructType
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
index 7a21ce254a235..0ff11ca49f3d1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.catalyst.optimizer
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.types.StructType
 
 /**
  * Simplify redundant [[CreateNamedStruct]], [[CreateArray]] and [[CreateMap]] expressions.
@@ -71,7 +70,7 @@ object SimplifyExtractValueOps extends Rule[LogicalPlan] {
           // out of bounds, mimic the runtime behavior and return null
           Literal(null, ga.dataType)
         }
-      case GetMapValue(CreateMap(elems, _), key) => CaseKeyWhen(key, elems)
+      case GetMapValue(CreateMap(elems, _), key, _) => CaseKeyWhen(key, elems)
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala
index bfc36ec477a73..4434c29cbb3c4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala
@@ -17,10 +17,10 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import org.apache.spark.sql.catalyst.expressions.{Alias, And, ArrayTransform, CaseWhen, Coalesce, CreateArray, CreateMap, CreateNamedStruct, CreateStruct, EqualTo, ExpectsInputTypes, Expression, GetStructField, If, IsNull, KnownFloatingPointNormalized, LambdaFunction, Literal, NamedLambdaVariable, UnaryExpression}
+import org.apache.spark.sql.catalyst.expressions.{Alias, And, ArrayTransform, CaseWhen, Coalesce, CreateArray, CreateMap, CreateNamedStruct, EqualTo, ExpectsInputTypes, Expression, GetStructField, If, IsNull, KnownFloatingPointNormalized, LambdaFunction, Literal, NamedLambdaVariable, UnaryExpression}
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
 import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Subquery, Window}
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Window}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.types._
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index e492d01650097..c4b9936fa4c4f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -85,7 +85,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
         OptimizeWindowFunctions,
         CollapseWindow,
         CombineFilters,
-        CombineLimits,
+        EliminateLimits,
         CombineUnions,
         // Constant folding and strength reduction
         TransposeWindow,
@@ -377,9 +377,8 @@ object SimpleTestOptimizer extends SimpleTestOptimizer
 
 class SimpleTestOptimizer extends Optimizer(
   new CatalogManager(
-    new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true),
     FakeV2SessionCatalog,
-    new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, new SQLConf())))
+    new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry)))
 
 /**
  * Remove redundant aliases from a query plan. A redundant alias is an alias that does not change
@@ -1452,11 +1451,20 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
 }
 
 /**
- * Combines two adjacent [[Limit]] operators into one, merging the
- * expressions into one single expression.
+ * This rule optimizes Limit operators by:
+ * 1. Eliminate [[Limit]] operators if it's child max row <= limit.
+ * 2. Combines two adjacent [[Limit]] operators into one, merging the
+ *    expressions into one single expression.
  */
-object CombineLimits extends Rule[LogicalPlan] {
-  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+object EliminateLimits extends Rule[LogicalPlan] {
+  private def canEliminate(limitExpr: Expression, child: LogicalPlan): Boolean = {
+    limitExpr.foldable && child.maxRows.exists { _ <= limitExpr.eval().asInstanceOf[Int] }
+  }
+
+  def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
+    case Limit(l, child) if canEliminate(l, child) =>
+      child
+
     case GlobalLimit(le, GlobalLimit(ne, grandChild)) =>
       GlobalLimit(Least(Seq(ne, le)), grandChild)
     case LocalLimit(le, LocalLimit(ne, grandChild)) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
index 2627202c09c45..15d4561b47a23 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
@@ -22,7 +22,6 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
-import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Collapse plans consisting empty local relations generated by [[PruneFilters]].
@@ -47,8 +46,6 @@ object PropagateEmptyRelation extends Rule[LogicalPlan] with PredicateHelper wit
   private def nullValueProjectList(plan: LogicalPlan): Seq[NamedExpression] =
     plan.output.map{ a => Alias(cast(Literal(null), a.dataType), a.name)(a.exprId) }
 
-  override def conf: SQLConf = SQLConf.get
-
   def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
     case p: Union if p.children.exists(isEmptyLocalRelation) =>
       val newChildren = p.children.filterNot(isEmptyLocalRelation)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
index 33b398e11cde9..ef3de4738c75c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
@@ -22,7 +22,6 @@ import org.apache.spark.sql.catalyst.expressions.{LambdaFunction, Literal, MapFi
 import org.apache.spark.sql.catalyst.expressions.Literal.FalseLiteral
 import org.apache.spark.sql.catalyst.plans.logical.{Filter, Join, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.BooleanType
 import org.apache.spark.util.Utils
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/StarSchemaDetection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/StarSchemaDetection.scala
index 2aa762e2595ad..b65fc7f7e2bde 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/StarSchemaDetection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/StarSchemaDetection.scala
@@ -19,18 +19,16 @@ package org.apache.spark.sql.catalyst.optimizer
 
 import scala.annotation.tailrec
 
+import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Encapsulates star-schema detection logic.
  */
-object StarSchemaDetection extends PredicateHelper {
-
-  private def conf = SQLConf.get
+object StarSchemaDetection extends PredicateHelper with SQLConfHelper {
 
   /**
    * Star schema consists of one or more fact tables referencing a number of dimension
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
index 76b9bd03f216c..9aa7e3201ab1b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import java.time.LocalDate
-
 import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.expressions._
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
index cb076f6e35184..11532d22204a4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.catalyst.optimizer
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.analysis.CleanupAliases
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.SubExprUtils._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index a5b8c118d6c54..23de8ab09dd0a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -28,7 +28,7 @@ import org.antlr.v4.runtime.tree.{ParseTree, RuleNode, TerminalNode}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.{FunctionIdentifier, SQLConfHelper, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, FunctionResource, FunctionResourceType}
 import org.apache.spark.sql.catalyst.expressions._
@@ -51,11 +51,9 @@ import org.apache.spark.util.random.RandomSampler
  * The AstBuilder converts an ANTLR4 ParseTree into a catalyst Expression, LogicalPlan or
  * TableIdentifier.
  */
-class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
+class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logging {
   import ParserUtils._
 
-  protected def conf: SQLConf = SQLConf.get
-
   protected def typedVisit[T](ctx: ParseTree): T = {
     ctx.accept(this).asInstanceOf[T]
   }
@@ -1408,7 +1406,20 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
           case Some(SqlBaseParser.ANY) | Some(SqlBaseParser.SOME) =>
             getLikeQuantifierExprs(ctx.expression).reduceLeft(Or)
           case Some(SqlBaseParser.ALL) =>
-            getLikeQuantifierExprs(ctx.expression).reduceLeft(And)
+            validate(!ctx.expression.isEmpty, "Expected something between '(' and ')'.", ctx)
+            val expressions = ctx.expression.asScala.map(expression)
+            if (expressions.size > SQLConf.get.optimizerLikeAllConversionThreshold &&
+              expressions.forall(_.foldable) && expressions.forall(_.dataType == StringType)) {
+              // If there are many pattern expressions, will throw StackOverflowError.
+              // So we use LikeAll or NotLikeAll instead.
+              val patterns = expressions.map(_.eval(EmptyRow).asInstanceOf[UTF8String])
+              ctx.NOT match {
+                case null => LikeAll(e, patterns.toSeq)
+                case _ => NotLikeAll(e, patterns.toSeq)
+              }
+            } else {
+              getLikeQuantifierExprs(ctx.expression).reduceLeft(And)
+            }
           case _ =>
             val escapeChar = Option(ctx.escapeChar).map(string).map { str =>
               if (str.length != 1) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index 73a58f79ff132..ac3fbbf6b0512 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -23,19 +23,16 @@ import org.antlr.v4.runtime.tree.TerminalNodeImpl
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.{FunctionIdentifier, SQLConfHelper, TableIdentifier}
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.trees.Origin
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, StructType}
 
 /**
  * Base SQL parsing infrastructure.
  */
-abstract class AbstractSqlParser extends ParserInterface with Logging {
-
-  protected def conf: SQLConf = SQLConf.get
+abstract class AbstractSqlParser extends ParserInterface with SQLConfHelper with Logging {
 
   /** Creates/Resolves DataType for a given SQL string. */
   override def parseDataType(sqlText: String): DataType = parse(sqlText) { parser =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index b1884eac27f73..864ca4f57483d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.plans
 import scala.collection.mutable
 
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, TreeNode, TreeNodeTag}
 import org.apache.spark.sql.internal.SQLConf
@@ -35,15 +36,10 @@ import org.apache.spark.sql.types.{DataType, StructType}
  * The tree traverse APIs like `transform`, `foreach`, `collect`, etc. that are
  * inherited from `TreeNode`, do not traverse into query plans inside subqueries.
  */
-abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanType] {
+abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
+  extends TreeNode[PlanType] with SQLConfHelper {
   self: PlanType =>
 
-  /**
-   * The active config object within the current scope.
-   * See [[SQLConf.get]] for more information.
-   */
-  def conf: SQLConf = SQLConf.get
-
   def output: Seq[Attribute]
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala
index d8d18b46bcc74..2c6a716a2ed48 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala
@@ -17,10 +17,9 @@
 
 package org.apache.spark.sql.catalyst.plans.logical
 
-import org.apache.spark.sql.catalyst.analysis.CheckAnalysis
 import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
-import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, TreeNode}
+import org.apache.spark.sql.catalyst.trees.CurrentOrigin
 import org.apache.spark.util.Utils
 
 
@@ -33,7 +32,7 @@ import org.apache.spark.util.Utils
  * analyzed flag set to true.
  *
  * The analyzer rules should use the various resolve methods, in lieu of the various transform
- * methods defined in [[TreeNode]] and [[QueryPlan]].
+ * methods defined in [[org.apache.spark.sql.catalyst.trees.TreeNode]] and [[QueryPlan]].
  *
  * To prevent accidental use of the transform methods, this trait also overrides the transform
  * methods to throw exceptions in test mode, if they are used in the analyzer.
@@ -44,7 +43,8 @@ trait AnalysisHelper extends QueryPlan[LogicalPlan] { self: LogicalPlan =>
 
   /**
    * Recursively marks all nodes in this plan tree as analyzed.
-   * This should only be called by [[CheckAnalysis]].
+   * This should only be called by
+   * [[org.apache.spark.sql.catalyst.analysis.CheckAnalysis]].
    */
   private[catalyst] def setAnalyzed(): Unit = {
     if (!_analyzed) {
@@ -155,7 +155,7 @@ trait AnalysisHelper extends QueryPlan[LogicalPlan] { self: LogicalPlan =>
    * In analyzer, use [[resolveOperatorsDown()]] instead. If this is used in the analyzer,
    * an exception will be thrown in test mode. It is however OK to call this function within
    * the scope of a [[resolveOperatorsDown()]] call.
-   * @see [[TreeNode.transformDown()]].
+   * @see [[org.apache.spark.sql.catalyst.trees.TreeNode.transformDown()]].
    */
   override def transformDown(rule: PartialFunction[LogicalPlan, LogicalPlan]): LogicalPlan = {
     assertNotAnalysisRule()
@@ -164,7 +164,7 @@ trait AnalysisHelper extends QueryPlan[LogicalPlan] { self: LogicalPlan =>
 
   /**
    * Use [[resolveOperators()]] in the analyzer.
-   * @see [[TreeNode.transformUp()]]
+   * @see [[org.apache.spark.sql.catalyst.trees.TreeNode.transformUp()]]
    */
   override def transformUp(rule: PartialFunction[LogicalPlan, LogicalPlan]): LogicalPlan = {
     assertNotAnalysisRule()
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index 48dfc5fd57e63..ad5c3fd74e9b5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -33,6 +33,9 @@ abstract class LogicalPlan
   with QueryPlanConstraints
   with Logging {
 
+  /** Metadata fields that can be projected from this node */
+  def metadataOutput: Seq[Attribute] = children.flatMap(_.metadataOutput)
+
   /** Returns true if this subtree has data from a streaming data source. */
   def isStreaming: Boolean = children.exists(_.isStreaming)
 
@@ -86,7 +89,8 @@ abstract class LogicalPlan
     }
   }
 
-  private[this] lazy val childAttributes = AttributeSeq(children.flatMap(_.output))
+  private[this] lazy val childAttributes =
+    AttributeSeq(children.flatMap(c => c.output ++ c.metadataOutput))
 
   private[this] lazy val outputAttributes = AttributeSeq(output)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
index 49f89bed154bb..1346f80247a1f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
@@ -20,18 +20,10 @@ package org.apache.spark.sql.catalyst.plans.logical
 import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}
 import java.math.{MathContext, RoundingMode}
 
-import scala.util.control.NonFatal
-
 import net.jpountz.lz4.{LZ4BlockInputStream, LZ4BlockOutputStream}
 
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.catalog.CatalogColumnStat
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate._
-import org.apache.spark.sql.catalyst.util.{ArrayData, DateTimeUtils}
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 17bf704c6d67a..f96e07863fa69 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -17,17 +17,14 @@
 
 package org.apache.spark.sql.catalyst.plans.logical
 
-import scala.collection.mutable
-
 import org.apache.spark.sql.catalyst.AliasIdentifier
-import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation}
+import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable}
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction}
+import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, RangePartitioning, RoundRobinPartitioning}
 import org.apache.spark.sql.catalyst.util.truncatedString
-import org.apache.spark.sql.connector.catalog.Identifier
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.util.random.RandomSampler
@@ -886,6 +883,12 @@ case class SubqueryAlias(
     val qualifierList = identifier.qualifier :+ alias
     child.output.map(_.withQualifier(qualifierList))
   }
+
+  override def metadataOutput: Seq[Attribute] = {
+    val qualifierList = identifier.qualifier :+ alias
+    child.metadataOutput.map(_.withQualifier(qualifierList))
+  }
+
   override def doCanonicalize(): LogicalPlan = child.canonicalized
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
index a325b61fcc5a9..4b5e278fccdfb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.catalyst.plans.logical
 
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.util.Utils
 
 /**
  * A general hint for the child that is not yet resolved. This node is generated by the parser and
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ProjectEstimation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ProjectEstimation.scala
index 6925423f003ba..8e58c4f314df0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ProjectEstimation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ProjectEstimation.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.plans.logical.statsEstimation
 
-import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap}
+import org.apache.spark.sql.catalyst.expressions.AttributeMap
 import org.apache.spark.sql.catalyst.plans.logical.{Project, Statistics}
 
 object ProjectEstimation {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala
index a774217ecc832..4ef71bbc7c098 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala
@@ -18,10 +18,10 @@
 package org.apache.spark.sql.catalyst.rules
 
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.trees.TreeNode
-import org.apache.spark.sql.internal.SQLConf
 
-abstract class Rule[TreeType <: TreeNode[_]] extends Logging {
+abstract class Rule[TreeType <: TreeNode[_]] extends SQLConfHelper with Logging {
 
   /** Name for this rule, automatically inferred based on class name. */
   val ruleName: String = {
@@ -30,6 +30,4 @@ abstract class Rule[TreeType <: TreeNode[_]] extends Logging {
   }
 
   def apply(plan: TreeType): TreeType
-
-  def conf: SQLConf = SQLConf.get
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 1ab7bbdcff697..ff2b366a9bc75 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -33,7 +33,6 @@ import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat,
 import org.apache.spark.sql.catalyst.errors._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.JoinType
-import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.physical.{BroadcastMode, Partitioning}
 import org.apache.spark.sql.catalyst.util.StringUtils.PlanStringConcat
 import org.apache.spark.sql.catalyst.util.truncatedString
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala
index ebbf241088f80..44203316edd94 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala
@@ -22,7 +22,6 @@ import scala.reflect.ClassTag
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{SpecializedGetters, UnsafeArrayData}
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.Platform
 import org.apache.spark.unsafe.array.ByteArrayMethods
 
 object ArrayData {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala
index 1a78422e57a4c..46860ae1771de 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.util
 
-import java.time.{LocalDate, LocalDateTime, LocalTime, ZoneId}
+import java.time.{LocalDate, LocalDateTime, LocalTime}
 import java.time.temporal.ChronoField
 import java.util.{Calendar, TimeZone}
 import java.util.Calendar.{DAY_OF_MONTH, DST_OFFSET, ERA, HOUR_OF_DAY, MINUTE, MONTH, SECOND, YEAR, ZONE_OFFSET}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
index fc2ab99a3da8c..0779bf53fe446 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.connector.catalog
 import scala.collection.mutable
 
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException
 import org.apache.spark.sql.catalyst.catalog.SessionCatalog
 import org.apache.spark.sql.internal.SQLConf
@@ -37,9 +38,8 @@ import org.apache.spark.sql.internal.SQLConf
 //       need to track current database at all.
 private[sql]
 class CatalogManager(
-    conf: SQLConf,
     defaultSessionCatalog: CatalogPlugin,
-    val v1SessionCatalog: SessionCatalog) extends Logging {
+    val v1SessionCatalog: SessionCatalog) extends SQLConfHelper with Logging {
   import CatalogManager.SESSION_CATALOG_NAME
   import CatalogV2Util._
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala
index dfacf6e83ef57..8d91ea7c50cde 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala
@@ -21,7 +21,9 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{PartitionSpec, ResolvedPartitionSpec, UnresolvedPartitionSpec}
-import org.apache.spark.sql.connector.catalog.{SupportsAtomicPartitionManagement, SupportsDelete, SupportsPartitionManagement, SupportsRead, SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.catalyst.expressions.AttributeReference
+import org.apache.spark.sql.connector.catalog.{MetadataColumn, SupportsAtomicPartitionManagement, SupportsDelete, SupportsPartitionManagement, SupportsRead, SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.types.{StructField, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 object DataSourceV2Implicits {
@@ -78,6 +80,18 @@ object DataSourceV2Implicits {
     def supportsAny(capabilities: TableCapability*): Boolean = capabilities.exists(supports)
   }
 
+  implicit class MetadataColumnsHelper(metadata: Array[MetadataColumn]) {
+    def asStruct: StructType = {
+      val fields = metadata.map { metaCol =>
+        val field = StructField(metaCol.name, metaCol.dataType, metaCol.isNullable)
+        Option(metaCol.comment).map(field.withComment).getOrElse(field)
+      }
+      StructType(fields)
+    }
+
+    def toAttributes: Seq[AttributeReference] = asStruct.toAttributes
+  }
+
   implicit class OptionsHelper(options: Map[String, String]) {
     def asOptions: CaseInsensitiveStringMap = {
       new CaseInsensitiveStringMap(options.asJava)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
index 45d89498f5ae9..f541411daeff4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
@@ -21,10 +21,10 @@ import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, NamedRelat
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
 import org.apache.spark.sql.catalyst.util.truncatedString
-import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, Table, TableCapability}
-import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, Statistics => V2Statistics, SupportsReportStatistics}
+import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, MetadataColumn, SupportsMetadataColumns, Table, TableCapability}
+import org.apache.spark.sql.connector.read.{Scan, Statistics => V2Statistics, SupportsReportStatistics}
 import org.apache.spark.sql.connector.read.streaming.{Offset, SparkDataStream}
-import org.apache.spark.sql.connector.write.WriteBuilder
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.Utils
 
@@ -35,8 +35,9 @@ import org.apache.spark.util.Utils
  * @param output the output attributes of this relation.
  * @param catalog catalogPlugin for the table. None if no catalog is specified.
  * @param identifier the identifier for the table. None if no identifier is defined.
- * @param options The options for this table operation. It's used to create fresh [[ScanBuilder]]
- *                and [[WriteBuilder]].
+ * @param options The options for this table operation. It's used to create fresh
+ *                [[org.apache.spark.sql.connector.read.ScanBuilder]] and
+ *                [[org.apache.spark.sql.connector.write.WriteBuilder]].
  */
 case class DataSourceV2Relation(
     table: Table,
@@ -48,6 +49,21 @@ case class DataSourceV2Relation(
 
   import DataSourceV2Implicits._
 
+  override lazy val metadataOutput: Seq[AttributeReference] = table match {
+    case hasMeta: SupportsMetadataColumns =>
+      val resolve = SQLConf.get.resolver
+      val outputNames = outputSet.map(_.name)
+      def isOutputColumn(col: MetadataColumn): Boolean = {
+        outputNames.exists(name => resolve(col.name, name))
+      }
+      // filter out metadata columns that have names conflicting with output columns. if the table
+      // has a column "line" and the table can produce a metadata column called "line", then the
+      // data column should be returned, not the metadata column.
+      hasMeta.metadataColumns.filterNot(isOutputColumn).toAttributes
+    case _ =>
+      Nil
+  }
+
   override def name: String = table.name()
 
   override def skipSchemaResolution: Boolean = table.supports(TableCapability.ACCEPT_ANY_SCHEMA)
@@ -78,6 +94,14 @@ case class DataSourceV2Relation(
   override def newInstance(): DataSourceV2Relation = {
     copy(output = output.map(_.newInstance()))
   }
+
+  def withMetadataColumns(): DataSourceV2Relation = {
+    if (metadataOutput.nonEmpty) {
+      DataSourceV2Relation(table, output ++ metadataOutput, catalog, identifier, options)
+    } else {
+      this
+    }
+  }
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index f2e309013a5b6..fcf222c8fdab0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -216,6 +216,18 @@ object SQLConf {
         "for using switch statements in InSet must be non-negative and less than or equal to 600")
       .createWithDefault(400)
 
+  val OPTIMIZER_LIKE_ALL_CONVERSION_THRESHOLD =
+    buildConf("spark.sql.optimizer.likeAllConversionThreshold")
+      .internal()
+      .doc("Configure the maximum size of the pattern sequence in like all. Spark will convert " +
+        "the logical combination of like to avoid StackOverflowError. 200 is an empirical value " +
+        "that will not cause StackOverflowError.")
+      .version("3.1.0")
+      .intConf
+      .checkValue(threshold => threshold >= 0, "The maximum size of pattern sequence " +
+        "in like all must be non-negative")
+      .createWithDefault(200)
+
   val PLAN_CHANGE_LOG_LEVEL = buildConf("spark.sql.planChangeLog.level")
     .internal()
     .doc("Configures the log level for logging the change from the original plan to the new " +
@@ -539,6 +551,15 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
+  val SUBEXPRESSION_ELIMINATION_CACHE_MAX_ENTRIES =
+    buildConf("spark.sql.subexpressionElimination.cache.maxEntries")
+      .internal()
+      .doc("The maximum entries of the cache used for interpreted subexpression elimination.")
+      .version("3.1.0")
+      .intConf
+      .checkValue(_ >= 0, "The maximum must not be negative")
+      .createWithDefault(100)
+
   val CASE_SENSITIVE = buildConf("spark.sql.caseSensitive")
     .internal()
     .doc("Whether the query analyzer should be case sensitive or not. " +
@@ -815,6 +836,18 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
+  val HIVE_METASTORE_PARTITION_PRUNING_INSET_THRESHOLD =
+    buildConf("spark.sql.hive.metastorePartitionPruningInSetThreshold")
+      .doc("The threshold of set size for InSet predicate when pruning partitions through Hive " +
+        "Metastore. When the set size exceeds the threshold, we rewrite the InSet predicate " +
+        "to be greater than or equal to the minimum value in set and less than or equal to the " +
+        "maximum value in set. Larger values may cause Hive Metastore stack overflow.")
+      .version("3.1.0")
+      .internal()
+      .intConf
+      .checkValue(_ > 0, "The value of metastorePartitionPruningInSetThreshold must be positive")
+      .createWithDefault(1000)
+
   val HIVE_MANAGE_FILESOURCE_PARTITIONS =
     buildConf("spark.sql.hive.manageFilesourcePartitions")
       .doc("When true, enable metastore partition management for file source tables as well. " +
@@ -1256,7 +1289,7 @@ object SQLConf {
   val REMOVE_REDUNDANT_SORTS_ENABLED = buildConf("spark.sql.execution.removeRedundantSorts")
     .internal()
     .doc("Whether to remove redundant physical sort node")
-    .version("3.1.0")
+    .version("2.4.8")
     .booleanConf
     .createWithDefault(true)
 
@@ -1882,7 +1915,7 @@ object SQLConf {
         "1. pyspark.sql.DataFrame.toPandas " +
         "2. pyspark.sql.SparkSession.createDataFrame when its input is a Pandas DataFrame " +
         "The following data types are unsupported: " +
-        "MapType, ArrayType of TimestampType, and nested StructType.")
+        "ArrayType of TimestampType, and nested StructType.")
       .version("3.0.0")
       .fallbackConf(ARROW_EXECUTION_ENABLED)
 
@@ -1942,6 +1975,16 @@ object SQLConf {
       .version("3.0.0")
       .fallbackConf(BUFFER_SIZE)
 
+  val PYSPARK_SIMPLIFIEID_TRACEBACK =
+    buildConf("spark.sql.execution.pyspark.udf.simplifiedTraceback.enabled")
+      .doc(
+        "When true, the traceback from Python UDFs is simplified. It hides " +
+        "the Python worker, (de)serialization, etc from PySpark in tracebacks, and only " +
+        "shows the exception messages from UDFs. Note that this works only with CPython 3.7+.")
+      .version("3.1.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val PANDAS_GROUPED_MAP_ASSIGN_COLUMNS_BY_NAME =
     buildConf("spark.sql.legacy.execution.pandas.groupedMap.assignColumnsByName")
       .internal()
@@ -3006,6 +3049,8 @@ class SQLConf extends Serializable with Logging {
 
   def optimizerInSetSwitchThreshold: Int = getConf(OPTIMIZER_INSET_SWITCH_THRESHOLD)
 
+  def optimizerLikeAllConversionThreshold: Int = getConf(OPTIMIZER_LIKE_ALL_CONVERSION_THRESHOLD)
+
   def planChangeLogLevel: String = getConf(PLAN_CHANGE_LOG_LEVEL)
 
   def planChangeRules: Option[String] = getConf(PLAN_CHANGE_LOG_RULES)
@@ -3142,6 +3187,9 @@ class SQLConf extends Serializable with Logging {
 
   def metastorePartitionPruning: Boolean = getConf(HIVE_METASTORE_PARTITION_PRUNING)
 
+  def metastorePartitionPruningInSetThreshold: Int =
+    getConf(HIVE_METASTORE_PARTITION_PRUNING_INSET_THRESHOLD)
+
   def manageFilesourcePartitions: Boolean = getConf(HIVE_MANAGE_FILESOURCE_PARTITIONS)
 
   def filesourcePartitionFileCacheSize: Long = getConf(HIVE_FILESOURCE_PARTITION_FILE_CACHE_SIZE)
@@ -3233,6 +3281,9 @@ class SQLConf extends Serializable with Logging {
   def subexpressionEliminationEnabled: Boolean =
     getConf(SUBEXPRESSION_ELIMINATION_ENABLED)
 
+  def subexpressionEliminationCacheMaxEntries: Int =
+    getConf(SUBEXPRESSION_ELIMINATION_CACHE_MAX_ENTRIES)
+
   def autoBroadcastJoinThreshold: Long = getConf(AUTO_BROADCASTJOIN_THRESHOLD)
 
   def limitScaleUpFactor: Int = getConf(LIMIT_SCALE_UP_FACTOR)
@@ -3405,6 +3456,8 @@ class SQLConf extends Serializable with Logging {
 
   def pandasUDFBufferSize: Int = getConf(PANDAS_UDF_BUFFER_SIZE)
 
+  def pysparkSimplifiedTraceback: Boolean = getConf(PYSPARK_SIMPLIFIEID_TRACEBACK)
+
   def pandasGroupedMapAssignColumnsByName: Boolean =
     getConf(SQLConf.PANDAS_GROUPED_MAP_ASSIGN_COLUMNS_BY_NAME)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
index 043c88f88843c..7556a19f0d316 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
@@ -31,7 +31,7 @@ import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.Resolver
 import org.apache.spark.sql.catalyst.expressions.{Cast, Expression}
-import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.util.DataTypeJsonUtils.{DataTypeJsonDeserializer, DataTypeJsonSerializer}
 import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat
 import org.apache.spark.sql.internal.SQLConf
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
index 6be6d81ec3bb7..960e174f9c368 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.types
 
-import java.lang.{Long => JLong}
 import java.math.{BigDecimal => JavaBigDecimal, BigInteger, MathContext, RoundingMode}
 
 import scala.util.Try
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowJsonSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowJsonSuite.scala
index ac18b0f79b5f3..1962fca66c059 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowJsonSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowJsonSuite.scala
@@ -17,14 +17,13 @@
 package org.apache.spark.sql
 
 import java.sql.{Date, Timestamp}
-import java.time.{Instant, LocalDate}
+import java.time.LocalDate
 
 import org.json4s.JsonAST.{JArray, JBool, JDecimal, JDouble, JLong, JNull, JObject, JString, JValue}
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.encoders.{ExamplePoint, ExamplePointUDT}
 import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
index e8c7aed6d72ce..164bbd7f34d04 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
@@ -134,7 +134,6 @@ object ScroogeLikeExample {
 }
 
 trait ScroogeLikeExample extends Product1[Int] with Serializable {
-  import ScroogeLikeExample._
 
   def x: Int
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExternalCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExternalCatalogSuite.scala
index 3dd38091051d8..df99cd851cc3e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExternalCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExternalCatalogSuite.scala
@@ -27,13 +27,11 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogStorageFormat, CatalogTable, CatalogTableType, ExternalCatalog, InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project}
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 class AnalysisExternalCatalogSuite extends AnalysisTest with Matchers {
   private def getAnalyzer(externCatalog: ExternalCatalog, databasePath: File): Analyzer = {
-    val conf = new SQLConf()
-    val catalog = new SessionCatalog(externCatalog, FunctionRegistry.builtin, conf)
+    val catalog = new SessionCatalog(externCatalog, FunctionRegistry.builtin)
     catalog.createDatabase(
       CatalogDatabase("default", "", databasePath.toURI, Map.empty),
       ignoreIfExists = false)
@@ -44,7 +42,7 @@ class AnalysisExternalCatalogSuite extends AnalysisTest with Matchers {
         CatalogStorageFormat.empty,
         StructType(Seq(StructField("a", IntegerType, nullable = true)))),
       ignoreIfExists = false)
-    new Analyzer(catalog, conf)
+    new Analyzer(catalog)
   }
 
   test("query builtin functions don't call the external catalog") {
@@ -66,7 +64,7 @@ class AnalysisExternalCatalogSuite extends AnalysisTest with Matchers {
     withTempDir { tempDir =>
       val inMemoryCatalog = new InMemoryCatalog
       val externCatalog = spy(inMemoryCatalog)
-      val catalog = new SessionCatalog(externCatalog, FunctionRegistry.builtin, conf)
+      val catalog = new SessionCatalog(externCatalog, FunctionRegistry.builtin)
       catalog.createDatabase(
         CatalogDatabase("default", "", new URI(tempDir.toString), Map.empty),
         ignoreIfExists = false)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 37dcee1e59ee8..f0a24d4a56048 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import java.util.{Locale, TimeZone}
+import java.util.TimeZone
 
 import scala.reflect.ClassTag
 import scala.reflect.runtime.universe.TypeTag
@@ -771,22 +771,23 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     // RuleExecutor only throw exception or log warning when the rule is supposed to run
     // more than once.
     val maxIterations = 2
-    val conf = new SQLConf().copy(SQLConf.ANALYZER_MAX_ITERATIONS -> maxIterations)
-    val testAnalyzer = new Analyzer(
-      new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin, conf), conf)
+    withSQLConf(SQLConf.ANALYZER_MAX_ITERATIONS.key -> maxIterations.toString) {
+      val testAnalyzer = new Analyzer(
+        new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin))
 
-    val plan = testRelation2.select(
-      $"a" / Literal(2) as "div1",
-      $"a" / $"b" as "div2",
-      $"a" / $"c" as "div3",
-      $"a" / $"d" as "div4",
-      $"e" / $"e" as "div5")
+      val plan = testRelation2.select(
+        $"a" / Literal(2) as "div1",
+        $"a" / $"b" as "div2",
+        $"a" / $"c" as "div3",
+        $"a" / $"d" as "div4",
+        $"e" / $"e" as "div5")
 
-    val message = intercept[TreeNodeException[LogicalPlan]] {
-      testAnalyzer.execute(plan)
-    }.getMessage
-    assert(message.startsWith(s"Max iterations ($maxIterations) reached for batch Resolution, " +
-      s"please set '${SQLConf.ANALYZER_MAX_ITERATIONS.key}' to a larger value."))
+      val message = intercept[TreeNodeException[LogicalPlan]] {
+        testAnalyzer.execute(plan)
+      }.getMessage
+      assert(message.startsWith(s"Max iterations ($maxIterations) reached for batch Resolution, " +
+        s"please set '${SQLConf.ANALYZER_MAX_ITERATIONS.key}' to a larger value."))
+    }
   }
 
   test("SPARK-30886 Deprecate two-parameter TRIM/LTRIM/RTRIM") {
@@ -802,7 +803,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
 
       withLogAppender(logAppender) {
         val testAnalyzer1 = new Analyzer(
-          new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin, conf), conf)
+          new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin))
 
         val plan1 = testRelation2.select(
           UnresolvedFunction(f, $"a" :: Nil, isDistinct = false))
@@ -824,7 +825,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
 
         // New analyzer from new SessionState
         val testAnalyzer2 = new Analyzer(
-          new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin, conf), conf)
+          new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin))
         val plan4 = testRelation2.select(
           UnresolvedFunction(f, $"c" :: $"d" :: Nil, isDistinct = false))
         testAnalyzer2.execute(plan4)
@@ -933,9 +934,8 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     val maxIterations = 2
     val maxIterationsEnough = 5
     withSQLConf(SQLConf.ANALYZER_MAX_ITERATIONS.key -> maxIterations.toString) {
-      val conf = SQLConf.get
       val testAnalyzer = new Analyzer(
-        new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin, conf), conf)
+        new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin))
 
       val plan = testRelation2.select(
         $"a" / Literal(2) as "div1",
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
index 8c14ffffa17a5..37db4be502a83 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
@@ -34,7 +34,7 @@ trait AnalysisTest extends PlanTest {
   protected def extendedAnalysisRules: Seq[Rule[LogicalPlan]] = Nil
 
   protected def getAnalyzer: Analyzer = {
-    val catalog = new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin, conf)
+    val catalog = new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin)
     catalog.createDatabase(
       CatalogDatabase("default", "", new URI("loc"), Map.empty),
       ignoreIfExists = false)
@@ -43,7 +43,7 @@ trait AnalysisTest extends PlanTest {
     catalog.createTempView("TaBlE3", TestRelations.testRelation3, overrideIfExists = true)
     catalog.createGlobalTempView("TaBlE4", TestRelations.testRelation4, overrideIfExists = true)
     catalog.createGlobalTempView("TaBlE5", TestRelations.testRelation5, overrideIfExists = true)
-    new Analyzer(catalog, conf) {
+    new Analyzer(catalog) {
       override val extendedResolutionRules = EliminateSubqueryAliases +: extendedAnalysisRules
     }
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
index f433229595e9e..1c849fa21e4ea 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.plans.logical.{CreateTableAsSelect, LeafNode}
 import org.apache.spark.sql.connector.InMemoryTableCatalog
 import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
-import org.apache.spark.sql.connector.expressions.{Expressions, LogicalExpressions}
+import org.apache.spark.sql.connector.expressions.Expressions
 import org.apache.spark.sql.types.{DoubleType, LongType, StringType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
index 349237c2aa893..67bafbd4a8122 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
@@ -223,11 +223,11 @@ abstract class DataSourceV2StrictAnalysisSuite extends DataSourceV2AnalysisBaseS
 abstract class DataSourceV2AnalysisBaseSuite extends AnalysisTest {
 
   override def getAnalyzer: Analyzer = {
-    val catalog = new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin, conf)
+    val catalog = new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin)
     catalog.createDatabase(
       CatalogDatabase("default", "", new URI("loc"), Map.empty),
       ignoreIfExists = false)
-    new Analyzer(catalog, conf) {
+    new Analyzer(catalog) {
       override val extendedResolutionRules = EliminateSubqueryAliases :: Nil
     }
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
index d5991ff10ce6c..9892e62a9ce19 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
@@ -24,15 +24,14 @@ import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
 import org.apache.spark.sql.catalyst.expressions.aggregate._
-import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project, Union}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 
 class DecimalPrecisionSuite extends AnalysisTest with BeforeAndAfter {
-  private val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf)
-  private val analyzer = new Analyzer(catalog, conf)
+  private val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry)
+  private val analyzer = new Analyzer(catalog)
 
   private val relation = LocalRelation(
     AttributeReference("i", IntegerType)(),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/LookupFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/LookupFunctionsSuite.scala
index cea0f2a9cbc97..e0f3c9a835b6e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/LookupFunctionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/LookupFunctionsSuite.scala
@@ -24,19 +24,17 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog,
 import org.apache.spark.sql.catalyst.expressions.Alias
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.internal.SQLConf
 
 class LookupFunctionsSuite extends PlanTest {
 
   test("SPARK-23486: the functionExists for the Persistent function check") {
     val externalCatalog = new CustomInMemoryCatalog
-    val conf = new SQLConf()
-    val catalog = new SessionCatalog(externalCatalog, FunctionRegistry.builtin, conf)
+    val catalog = new SessionCatalog(externalCatalog, FunctionRegistry.builtin)
     val analyzer = {
       catalog.createDatabase(
         CatalogDatabase("default", "", new URI("loc"), Map.empty),
         ignoreIfExists = false)
-      new Analyzer(catalog, conf)
+      new Analyzer(catalog)
     }
 
     def table(ref: String): LogicalPlan = UnresolvedRelation(TableIdentifier(ref))
@@ -56,14 +54,13 @@ class LookupFunctionsSuite extends PlanTest {
 
   test("SPARK-23486: the functionExists for the Registered function check") {
     val externalCatalog = new InMemoryCatalog
-    val conf = new SQLConf()
     val customerFunctionReg = new CustomerFunctionRegistry
-    val catalog = new SessionCatalog(externalCatalog, customerFunctionReg, conf)
+    val catalog = new SessionCatalog(externalCatalog, customerFunctionReg)
     val analyzer = {
       catalog.createDatabase(
         CatalogDatabase("default", "", new URI("loc"), Map.empty),
         ignoreIfExists = false)
-      new Analyzer(catalog, conf)
+      new Analyzer(catalog)
     }
 
     def table(ref: String): LogicalPlan = UnresolvedRelation(TableIdentifier(ref))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveNaturalJoinSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveNaturalJoinSuite.scala
index e449b9669cc72..ea2284e5420bd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveNaturalJoinSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveNaturalJoinSuite.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelperSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelperSuite.scala
index 8cf41a02320d2..7566545f98355 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelperSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelperSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet}
 import org.apache.spark.sql.catalyst.optimizer.SimpleTestOptimizer
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
-import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, Filter, LeafNode, LocalRelation}
+import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, Filter, LeafNode}
 import org.apache.spark.sql.types.{IntegerType, MetadataBuilder, TimestampType}
 
 class StreamingJoinHelperSuite extends AnalysisTest {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TableLookupCacheSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TableLookupCacheSuite.scala
index 06ea531833a43..3e9a8b71a8fb6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TableLookupCacheSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TableLookupCacheSuite.scala
@@ -29,13 +29,11 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogStorageFor
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.connector.InMemoryTableCatalog
 import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogNotFoundException, Identifier, Table, V1Table}
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 class TableLookupCacheSuite extends AnalysisTest with Matchers {
   private def getAnalyzer(externalCatalog: ExternalCatalog, databasePath: File): Analyzer = {
-    val conf = new SQLConf()
-    val v1Catalog = new SessionCatalog(externalCatalog, FunctionRegistry.builtin, conf)
+    val v1Catalog = new SessionCatalog(externalCatalog, FunctionRegistry.builtin)
     v1Catalog.createDatabase(
       CatalogDatabase("default", "", databasePath.toURI, Map.empty),
       ignoreIfExists = false)
@@ -64,7 +62,7 @@ class TableLookupCacheSuite extends AnalysisTest with Matchers {
     when(catalogManager.currentCatalog).thenReturn(v2Catalog)
     when(catalogManager.currentNamespace).thenReturn(Array("default"))
 
-    new Analyzer(catalogManager, conf)
+    new Analyzer(catalogManager)
   }
 
   test("table lookups to external catalog are cached") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
index 21dde3ca8ca51..3be417de472c6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
@@ -32,7 +32,6 @@ import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.{IntegerType, LongType, MetadataBuilder}
-import org.apache.spark.unsafe.types.CalendarInterval
 
 /** A dummy command for testing unsupported operations. */
 case class DummyCommand() extends Command
@@ -393,16 +392,14 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
   testBinaryOperationInStreamingPlan(
     "single inner join in append mode",
     _.join(_, joinType = Inner),
-    outputMode = Append,
-    streamStreamSupported = true)
+    outputMode = Append)
 
   testBinaryOperationInStreamingPlan(
     "multiple inner joins in append mode",
     (x: LogicalPlan, y: LogicalPlan) => {
       x.join(y, joinType = Inner).join(streamRelation, joinType = Inner)
     },
-    outputMode = Append,
-    streamStreamSupported = true)
+    outputMode = Append)
 
   testBinaryOperationInStreamingPlan(
     "inner join in update mode",
@@ -419,209 +416,135 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
     batchStreamSupported = false,
     streamBatchSupported = false)
 
-  // Left outer joins: *-stream not allowed
+  // Left outer, left semi, left anti join: *-stream not allowed
+  Seq((LeftOuter, "LeftOuter join"), (LeftSemi, "LeftSemi join"), (LeftAnti, "LeftAnti join"))
+    .foreach { case (joinType, name) =>
+      testBinaryOperationInStreamingPlan(
+        name,
+        _.join(_, joinType = joinType),
+        batchStreamSupported = false,
+        streamStreamSupported = false,
+        expectedMsg = name)
+    }
+
+  // Right outer joins: stream-* not allowed
   testBinaryOperationInStreamingPlan(
-    "left outer join",
-    _.join(_, joinType = LeftOuter),
-    batchStreamSupported = false,
+    "right outer join",
+    _.join(_, joinType = RightOuter),
+    streamBatchSupported = false,
     streamStreamSupported = false,
     expectedMsg = "outer join")
 
-  // Left outer joins: update and complete mode not allowed
-  assertNotSupportedInStreamingPlan(
-    s"left outer join with stream-stream relations and update mode",
-    streamRelation.join(streamRelation, joinType = LeftOuter,
-      condition = Some(attribute === attribute)),
-    OutputMode.Update(),
-    Seq("is not supported in Update output mode"))
-  assertNotSupportedInStreamingPlan(
-    s"left outer join with stream-stream relations and complete mode",
-    Aggregate(Nil, aggExprs("d"), streamRelation.join(streamRelation, joinType = LeftOuter,
-      condition = Some(attribute === attribute))),
-    OutputMode.Complete(),
-    Seq("is not supported in Complete output mode"))
-
-  // Left outer joins: stream-stream allowed with join on watermark attribute
-  // Note that the attribute need not be watermarked on both sides.
-  assertSupportedInStreamingPlan(
-    s"left outer join with stream-stream relations and join on attribute with left watermark",
-    streamRelation.join(streamRelation, joinType = LeftOuter,
-      condition = Some(attributeWithWatermark === attribute)),
-    OutputMode.Append())
-  assertSupportedInStreamingPlan(
-    s"left outer join with stream-stream relations and join on attribute with right watermark",
-    streamRelation.join(streamRelation, joinType = LeftOuter,
-      condition = Some(attribute === attributeWithWatermark)),
-    OutputMode.Append())
-  assertNotSupportedInStreamingPlan(
-    s"left outer join with stream-stream relations and join on non-watermarked attribute",
-    streamRelation.join(streamRelation, joinType = LeftOuter,
-      condition = Some(attribute === attribute)),
-    OutputMode.Append(),
-    Seq("watermark in the join keys"))
-
-  // Left outer joins: stream-stream allowed with range condition yielding state value watermark
-  assertSupportedInStreamingPlan(
-    s"left outer join with stream-stream relations and state value watermark", {
-      val leftRelation = streamRelation
-      val rightTimeWithWatermark =
-        AttributeReference("b", IntegerType)().withMetadata(watermarkMetadata)
-      val rightRelation = new TestStreamingRelation(rightTimeWithWatermark)
-      leftRelation.join(
-        rightRelation,
-        joinType = LeftOuter,
-        condition = Some(attribute > rightTimeWithWatermark + 10))
-    },
-    OutputMode.Append())
-
-  // Left outer joins: stream-stream not allowed with insufficient range condition
-  assertNotSupportedInStreamingPlan(
-    s"left outer join with stream-stream relations and state value watermark", {
-      val leftRelation = streamRelation
-      val rightTimeWithWatermark =
-        AttributeReference("b", IntegerType)().withMetadata(watermarkMetadata)
-      val rightRelation = new TestStreamingRelation(rightTimeWithWatermark)
-      leftRelation.join(
-        rightRelation,
-        joinType = LeftOuter,
-        condition = Some(attribute < rightTimeWithWatermark + 10))
-    },
-    OutputMode.Append(),
-    Seq("appropriate range condition"))
-
-  // Left semi joins: stream-* not allowed
-  testBinaryOperationInStreamingPlan(
-    "left semi join",
-    _.join(_, joinType = LeftSemi),
-    streamStreamSupported = false,
-    batchStreamSupported = false,
-    expectedMsg = "LeftSemi join")
+  // Left outer, right outer, left semi joins
+  Seq(LeftOuter, RightOuter, LeftSemi).foreach { joinType =>
+    // Update mode not allowed
+    assertNotSupportedInStreamingPlan(
+      s"$joinType join with stream-stream relations and update mode",
+      streamRelation.join(streamRelation, joinType = joinType,
+        condition = Some(attribute === attribute)),
+      OutputMode.Update(),
+      Seq("is not supported in Update output mode"))
 
-  // Left semi joins: update and complete mode not allowed
-  assertNotSupportedInStreamingPlan(
-    "left semi join with stream-stream relations and update mode",
-    streamRelation.join(streamRelation, joinType = LeftSemi,
-      condition = Some(attribute === attribute)),
-    OutputMode.Update(),
-    Seq("is not supported in Update output mode"))
-  assertNotSupportedInStreamingPlan(
-    "left semi join with stream-stream relations and complete mode",
-    Aggregate(Nil, aggExprs("d"), streamRelation.join(streamRelation, joinType = LeftSemi,
-      condition = Some(attribute === attribute))),
-    OutputMode.Complete(),
-    Seq("is not supported in Complete output mode"))
-
-  // Left semi joins: stream-stream allowed with join on watermark attribute
-  // Note that the attribute need not be watermarked on both sides.
-  assertSupportedInStreamingPlan(
-    "left semi join with stream-stream relations and join on attribute with left watermark",
-    streamRelation.join(streamRelation, joinType = LeftSemi,
-      condition = Some(attributeWithWatermark === attribute)),
-    OutputMode.Append())
-  assertSupportedInStreamingPlan(
-    "left semi join with stream-stream relations and join on attribute with right watermark",
-    streamRelation.join(streamRelation, joinType = LeftSemi,
-      condition = Some(attribute === attributeWithWatermark)),
-    OutputMode.Append())
-  assertNotSupportedInStreamingPlan(
-    "left semi join with stream-stream relations and join on non-watermarked attribute",
-    streamRelation.join(streamRelation, joinType = LeftSemi,
-      condition = Some(attribute === attribute)),
-    OutputMode.Append(),
-    Seq("without a watermark in the join keys"))
+    // Complete mode not allowed
+    assertNotSupportedInStreamingPlan(
+      s"$joinType join with stream-stream relations and complete mode",
+      Aggregate(Nil, aggExprs("d"), streamRelation.join(streamRelation, joinType = joinType,
+        condition = Some(attribute === attribute))),
+      OutputMode.Complete(),
+      Seq("is not supported in Complete output mode"))
+
+    // Stream-stream allowed with join on watermark attribute
+    // Note that the attribute need not be watermarked on both sides.
+    assertSupportedInStreamingPlan(
+      s"$joinType join with stream-stream relations and join on attribute with left watermark",
+      streamRelation.join(streamRelation, joinType = joinType,
+        condition = Some(attributeWithWatermark === attribute)),
+      OutputMode.Append())
+    assertSupportedInStreamingPlan(
+      s"$joinType join with stream-stream relations and join on attribute with right watermark",
+      streamRelation.join(streamRelation, joinType = joinType,
+        condition = Some(attribute === attributeWithWatermark)),
+      OutputMode.Append())
+    assertNotSupportedInStreamingPlan(
+      s"$joinType join with stream-stream relations and join on non-watermarked attribute",
+      streamRelation.join(streamRelation, joinType = joinType,
+        condition = Some(attribute === attribute)),
+      OutputMode.Append(),
+      Seq("without a watermark in the join keys"))
+
+    val timeWithWatermark =
+      AttributeReference("b", IntegerType)().withMetadata(watermarkMetadata)
+    val relationWithWatermark = new TestStreamingRelation(timeWithWatermark)
+    val (leftRelation, rightRelation) =
+      if (joinType == RightOuter) {
+        (relationWithWatermark, streamRelation)
+      } else {
+        (streamRelation, relationWithWatermark)
+      }
 
-  // Left semi joins: stream-stream allowed with range condition yielding state value watermark
-  assertSupportedInStreamingPlan(
-    "left semi join with stream-stream relations and state value watermark", {
-      val leftRelation = streamRelation
-      val rightTimeWithWatermark =
-        AttributeReference("b", IntegerType)().withMetadata(watermarkMetadata)
-      val rightRelation = new TestStreamingRelation(rightTimeWithWatermark)
-      leftRelation.join(
-        rightRelation,
-        joinType = LeftSemi,
-        condition = Some(attribute > rightTimeWithWatermark + 10))
-    },
-    OutputMode.Append())
+    // stream-stream allowed with range condition yielding state value watermark
+    assertSupportedInStreamingPlan(
+      s"$joinType join with stream-stream relations and state value watermark",
+      leftRelation.join(rightRelation, joinType = joinType,
+        condition = Some(attribute > timeWithWatermark + 10)),
+      OutputMode.Append())
 
-  // Left semi joins: stream-stream not allowed with insufficient range condition
-  assertNotSupportedInStreamingPlan(
-    "left semi join with stream-stream relations and state value watermark", {
-      val leftRelation = streamRelation
-      val rightTimeWithWatermark =
-        AttributeReference("b", IntegerType)().withMetadata(watermarkMetadata)
-      val rightRelation = new TestStreamingRelation(rightTimeWithWatermark)
-      leftRelation.join(
-        rightRelation,
-        joinType = LeftSemi,
-        condition = Some(attribute < rightTimeWithWatermark + 10))
-    },
-    OutputMode.Append(),
-    Seq("appropriate range condition"))
+    // stream-stream not allowed with insufficient range condition
+    assertNotSupportedInStreamingPlan(
+      s"$joinType join with stream-stream relations and state value watermark",
+      leftRelation.join(rightRelation, joinType = joinType,
+        condition = Some(attribute < timeWithWatermark + 10)),
+      OutputMode.Append(),
+      Seq("is not supported without a watermark in the join keys, or a watermark on " +
+        "the nullable side and an appropriate range condition"))
+  }
 
-  // Left anti joins: stream-* not allowed
-  testBinaryOperationInStreamingPlan(
-    "left anti join",
-    _.join(_, joinType = LeftAnti),
-    streamStreamSupported = false,
-    batchStreamSupported = false,
-    expectedMsg = "Left anti join")
+  // stream-stream inner join doesn't emit late rows, whereas outer joins could
+  Seq((Inner, false), (LeftOuter, true), (RightOuter, true)).map {
+    case (joinType, expectFailure) =>
+      assertPassOnGlobalWatermarkLimit(
+        s"single $joinType join in Append mode",
+        streamRelation.join(streamRelation, joinType = RightOuter,
+          condition = Some(attributeWithWatermark === attribute)),
+        OutputMode.Append())
 
-  // Right outer joins: stream-* not allowed
-  testBinaryOperationInStreamingPlan(
-    "right outer join",
-    _.join(_, joinType = RightOuter),
-    streamBatchSupported = false,
-    streamStreamSupported = false,
-    expectedMsg = "outer join")
+      testGlobalWatermarkLimit(
+        s"streaming aggregation after stream-stream $joinType join in Append mode",
+        streamRelation.join(streamRelation, joinType = joinType,
+          condition = Some(attributeWithWatermark === attribute))
+          .groupBy("a")(count("*")),
+        OutputMode.Append(),
+        expectFailure = expectFailure)
 
-  // Right outer joins: stream-stream allowed with join on watermark attribute
-  // Note that the attribute need not be watermarked on both sides.
-  assertSupportedInStreamingPlan(
-    s"right outer join with stream-stream relations and join on attribute with left watermark",
-    streamRelation.join(streamRelation, joinType = RightOuter,
-      condition = Some(attributeWithWatermark === attribute)),
-    OutputMode.Append())
-  assertSupportedInStreamingPlan(
-    s"right outer join with stream-stream relations and join on attribute with right watermark",
-    streamRelation.join(streamRelation, joinType = RightOuter,
-      condition = Some(attribute === attributeWithWatermark)),
-    OutputMode.Append())
-  assertNotSupportedInStreamingPlan(
-    s"right outer join with stream-stream relations and join on non-watermarked attribute",
-    streamRelation.join(streamRelation, joinType = RightOuter,
-      condition = Some(attribute === attribute)),
-    OutputMode.Append(),
-    Seq("watermark in the join keys"))
+      Seq(Inner, LeftOuter, RightOuter).foreach { joinType2 =>
+        testGlobalWatermarkLimit(
+          s"streaming-stream $joinType2 after stream-stream $joinType join in Append mode",
+          streamRelation.join(
+            streamRelation.join(streamRelation, joinType = joinType,
+              condition = Some(attributeWithWatermark === attribute)),
+            joinType = joinType2,
+            condition = Some(attributeWithWatermark === attribute)),
+          OutputMode.Append(),
+          expectFailure = expectFailure)
+      }
 
-  // Right outer joins: stream-stream allowed with range condition yielding state value watermark
-  assertSupportedInStreamingPlan(
-    s"right outer join with stream-stream relations and state value watermark", {
-      val leftTimeWithWatermark =
-        AttributeReference("b", IntegerType)().withMetadata(watermarkMetadata)
-      val leftRelation = new TestStreamingRelation(leftTimeWithWatermark)
-      val rightRelation = streamRelation
-      leftRelation.join(
-        rightRelation,
-        joinType = RightOuter,
-        condition = Some(leftTimeWithWatermark + 10 < attribute))
-    },
-    OutputMode.Append())
+      testGlobalWatermarkLimit(
+        s"FlatMapGroupsWithState after stream-stream $joinType join in Append mode",
+        FlatMapGroupsWithState(
+          null, att, att, Seq(att), Seq(att), att, null, Append,
+          isMapGroupsWithState = false, null,
+          streamRelation.join(streamRelation, joinType = joinType,
+            condition = Some(attributeWithWatermark === attribute))),
+        OutputMode.Append(),
+        expectFailure = expectFailure)
 
-  // Right outer joins: stream-stream not allowed with insufficient range condition
-  assertNotSupportedInStreamingPlan(
-    s"right outer join with stream-stream relations and state value watermark", {
-      val leftTimeWithWatermark =
-        AttributeReference("b", IntegerType)().withMetadata(watermarkMetadata)
-      val leftRelation = new TestStreamingRelation(leftTimeWithWatermark)
-      val rightRelation = streamRelation
-      leftRelation.join(
-        rightRelation,
-        joinType = RightOuter,
-        condition = Some(leftTimeWithWatermark + 10 > attribute))
-    },
-    OutputMode.Append(),
-    Seq("appropriate range condition"))
+      testGlobalWatermarkLimit(
+        s"deduplicate after stream-stream $joinType join in Append mode",
+        Deduplicate(Seq(attribute), streamRelation.join(streamRelation, joinType = joinType,
+          condition = Some(attributeWithWatermark === attribute))),
+        OutputMode.Append(),
+        expectFailure = expectFailure)
+  }
 
   // Cogroup: only batch-batch is allowed
   testBinaryOperationInStreamingPlan(
@@ -744,53 +667,6 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
       OutputMode.Append())
   }
 
-  // stream-stream join
-  // stream-stream inner join doesn't emit late rows, whereas outer joins could
-  Seq((Inner, false), (LeftOuter, true), (RightOuter, true)).map { case (joinType, expectFailure) =>
-    assertPassOnGlobalWatermarkLimit(
-      s"single $joinType join in Append mode",
-      streamRelation.join(streamRelation, joinType = RightOuter,
-        condition = Some(attributeWithWatermark === attribute)),
-      OutputMode.Append())
-
-    testGlobalWatermarkLimit(
-      s"streaming aggregation after stream-stream $joinType join in Append mode",
-      streamRelation.join(streamRelation, joinType = joinType,
-        condition = Some(attributeWithWatermark === attribute))
-        .groupBy("a")(count("*")),
-      OutputMode.Append(),
-      expectFailure = expectFailure)
-
-    Seq(Inner, LeftOuter, RightOuter).map { joinType2 =>
-      testGlobalWatermarkLimit(
-        s"streaming-stream $joinType2 after stream-stream $joinType join in Append mode",
-        streamRelation.join(
-          streamRelation.join(streamRelation, joinType = joinType,
-            condition = Some(attributeWithWatermark === attribute)),
-          joinType = joinType2,
-          condition = Some(attributeWithWatermark === attribute)),
-        OutputMode.Append(),
-        expectFailure = expectFailure)
-    }
-
-    testGlobalWatermarkLimit(
-      s"FlatMapGroupsWithState after stream-stream $joinType join in Append mode",
-      FlatMapGroupsWithState(
-        null, att, att, Seq(att), Seq(att), att, null, Append,
-        isMapGroupsWithState = false, null,
-        streamRelation.join(streamRelation, joinType = joinType,
-          condition = Some(attributeWithWatermark === attribute))),
-      OutputMode.Append(),
-      expectFailure = expectFailure)
-
-    testGlobalWatermarkLimit(
-      s"deduplicate after stream-stream $joinType join in Append mode",
-      Deduplicate(Seq(attribute), streamRelation.join(streamRelation, joinType = joinType,
-        condition = Some(attributeWithWatermark === attribute))),
-      OutputMode.Append(),
-      expectFailure = expectFailure)
-  }
-
   // FlatMapGroupsWithState
   {
     assertPassOnGlobalWatermarkLimit(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index ad40cc010361c..f30ae70dceffa 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -1618,26 +1618,28 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
     import org.apache.spark.sql.catalyst.dsl.plans._
 
     Seq(true, false) foreach { caseSensitive =>
-      val conf = new SQLConf().copy(SQLConf.CASE_SENSITIVE -> caseSensitive)
-      val catalog = new SessionCatalog(newBasicCatalog(), new SimpleFunctionRegistry, conf)
-      catalog.setCurrentDatabase("db1")
-      try {
-        val analyzer = new Analyzer(catalog, conf)
-
-        // The analyzer should report the undefined function rather than the undefined table first.
-        val cause = intercept[AnalysisException] {
-          analyzer.execute(
-            UnresolvedRelation(TableIdentifier("undefined_table")).select(
-              UnresolvedFunction("undefined_fn", Nil, isDistinct = false)
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        val catalog = new SessionCatalog(newBasicCatalog(), new SimpleFunctionRegistry)
+        catalog.setCurrentDatabase("db1")
+        try {
+          val analyzer = new Analyzer(catalog)
+
+          // The analyzer should report the undefined function
+          // rather than the undefined table first.
+          val cause = intercept[AnalysisException] {
+            analyzer.execute(
+              UnresolvedRelation(TableIdentifier("undefined_table")).select(
+                UnresolvedFunction("undefined_fn", Nil, isDistinct = false)
+              )
             )
-          )
-        }
+          }
 
-        assert(cause.getMessage.contains("Undefined function: 'undefined_fn'"))
-        // SPARK-21318: the error message should contains the current database name
-        assert(cause.getMessage.contains("db1"))
-      } finally {
-        catalog.reset()
+          assert(cause.getMessage.contains("Undefined function: 'undefined_fn'"))
+          // SPARK-21318: the error message should contains the current database name
+          assert(cause.getMessage.contains("db1"))
+        } finally {
+          catalog.reset()
+        }
       }
     }
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index 61133e2db5cbd..afb76d8a5a68c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -38,9 +38,6 @@ import org.apache.spark.unsafe.types.UTF8String
 
 abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
 
-  // Whether it is required to set SQLConf.ANSI_ENABLED as true for testing numeric overflow.
-  protected def requiredAnsiEnabledForOverflowTestCases: Boolean
-
   protected def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase
 
   // expected cannot be null
@@ -55,8 +52,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
   test("null cast") {
     import DataTypeTestUtils._
 
-    // follow [[org.apache.spark.sql.catalyst.expressions.Cast.canCast]] logic
-    // to ensure we test every possible cast situation here
     atomicTypes.zip(atomicTypes).foreach { case (from, to) =>
       checkNullCast(from, to)
     }
@@ -65,14 +60,10 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     atomicTypes.foreach(dt => checkNullCast(dt, StringType))
     checkNullCast(StringType, BinaryType)
     checkNullCast(StringType, BooleanType)
-    checkNullCast(DateType, BooleanType)
-    checkNullCast(TimestampType, BooleanType)
     numericTypes.foreach(dt => checkNullCast(dt, BooleanType))
 
     checkNullCast(StringType, TimestampType)
-    checkNullCast(BooleanType, TimestampType)
     checkNullCast(DateType, TimestampType)
-    numericTypes.foreach(dt => checkNullCast(dt, TimestampType))
 
     checkNullCast(StringType, DateType)
     checkNullCast(TimestampType, DateType)
@@ -80,8 +71,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     checkNullCast(StringType, CalendarIntervalType)
     numericTypes.foreach(dt => checkNullCast(StringType, dt))
     numericTypes.foreach(dt => checkNullCast(BooleanType, dt))
-    numericTypes.foreach(dt => checkNullCast(DateType, dt))
-    numericTypes.foreach(dt => checkNullCast(TimestampType, dt))
     for (from <- numericTypes; to <- numericTypes) checkNullCast(from, to)
   }
 
@@ -215,6 +204,39 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(cast(cast(0, BooleanType), IntegerType), 0)
   }
 
+  test("cast from int") {
+    checkCast(0, false)
+    checkCast(1, true)
+    checkCast(-5, true)
+    checkCast(1, 1.toByte)
+    checkCast(1, 1.toShort)
+    checkCast(1, 1)
+    checkCast(1, 1.toLong)
+    checkCast(1, 1.0f)
+    checkCast(1, 1.0)
+    checkCast(123, "123")
+
+    checkEvaluation(cast(123, DecimalType.USER_DEFAULT), Decimal(123))
+    checkEvaluation(cast(123, DecimalType(3, 0)), Decimal(123))
+    checkEvaluation(cast(1, LongType), 1.toLong)
+  }
+
+  test("cast from long") {
+    checkCast(0L, false)
+    checkCast(1L, true)
+    checkCast(-5L, true)
+    checkCast(1L, 1.toByte)
+    checkCast(1L, 1.toShort)
+    checkCast(1L, 1)
+    checkCast(1L, 1.toLong)
+    checkCast(1L, 1.0f)
+    checkCast(1L, 1.0)
+    checkCast(123L, "123")
+
+    checkEvaluation(cast(123L, DecimalType.USER_DEFAULT), Decimal(123))
+    checkEvaluation(cast(123L, DecimalType(3, 0)), Decimal(123))
+  }
+
   test("cast from float") {
     checkCast(0.0f, false)
     checkCast(0.5f, true)
@@ -237,8 +259,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     checkCast(1.5, 1.toLong)
     checkCast(1.5, 1.5f)
     checkCast(1.5, "1.5")
-
-    checkEvaluation(cast(cast(1.toDouble, TimestampType), DoubleType), 1.toDouble)
   }
 
   test("cast from string") {
@@ -305,18 +325,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
       cast(cast("5", ByteType), ShortType), IntegerType), FloatType), DoubleType), LongType),
       5.toLong)
 
-    checkEvaluation(
-      cast(cast(cast(cast(cast(cast("5", ByteType), TimestampType),
-        DecimalType.SYSTEM_DEFAULT), LongType), StringType), ShortType),
-      5.toShort)
-    checkEvaluation(
-      cast(cast(cast(cast(cast(cast("5", TimestampType, UTC_OPT), ByteType),
-        DecimalType.SYSTEM_DEFAULT), LongType), StringType), ShortType),
-      null)
-    checkEvaluation(cast(cast(cast(cast(cast(cast("5", DecimalType.SYSTEM_DEFAULT),
-      ByteType), TimestampType), LongType), StringType), ShortType),
-      5.toShort)
-
     checkEvaluation(cast("23", DoubleType), 23d)
     checkEvaluation(cast("23", IntegerType), 23)
     checkEvaluation(cast("23", FloatType), 23f)
@@ -350,58 +358,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     checkCast(Decimal(1.5), "1.5")
   }
 
-  test("cast from date") {
-    val d = Date.valueOf("1970-01-01")
-    checkEvaluation(cast(d, ShortType), null)
-    checkEvaluation(cast(d, IntegerType), null)
-    checkEvaluation(cast(d, LongType), null)
-    checkEvaluation(cast(d, FloatType), null)
-    checkEvaluation(cast(d, DoubleType), null)
-    checkEvaluation(cast(d, DecimalType.SYSTEM_DEFAULT), null)
-    checkEvaluation(cast(d, DecimalType(10, 2)), null)
-    checkEvaluation(cast(d, StringType), "1970-01-01")
-
-    checkEvaluation(
-      cast(cast(d, TimestampType, UTC_OPT), StringType, UTC_OPT),
-      "1970-01-01 00:00:00")
-  }
-
-  test("cast from timestamp") {
-    val millis = 15 * 1000 + 3
-    val seconds = millis * 1000 + 3
-    val ts = new Timestamp(millis)
-    val tss = new Timestamp(seconds)
-    checkEvaluation(cast(ts, ShortType), 15.toShort)
-    checkEvaluation(cast(ts, IntegerType), 15)
-    checkEvaluation(cast(ts, LongType), 15.toLong)
-    checkEvaluation(cast(ts, FloatType), 15.003f)
-    checkEvaluation(cast(ts, DoubleType), 15.003)
-
-    checkEvaluation(cast(cast(tss, ShortType), TimestampType),
-      fromJavaTimestamp(ts) * MILLIS_PER_SECOND)
-    checkEvaluation(cast(cast(tss, IntegerType), TimestampType),
-      fromJavaTimestamp(ts) * MILLIS_PER_SECOND)
-    checkEvaluation(cast(cast(tss, LongType), TimestampType),
-      fromJavaTimestamp(ts) * MILLIS_PER_SECOND)
-    checkEvaluation(
-      cast(cast(millis.toFloat / MILLIS_PER_SECOND, TimestampType), FloatType),
-      millis.toFloat / MILLIS_PER_SECOND)
-    checkEvaluation(
-      cast(cast(millis.toDouble / MILLIS_PER_SECOND, TimestampType), DoubleType),
-      millis.toDouble / MILLIS_PER_SECOND)
-    checkEvaluation(
-      cast(cast(Decimal(1), TimestampType), DecimalType.SYSTEM_DEFAULT),
-      Decimal(1))
-
-    // A test for higher precision than millis
-    checkEvaluation(cast(cast(0.000001, TimestampType), DoubleType), 0.000001)
-
-    checkEvaluation(cast(Double.NaN, TimestampType), null)
-    checkEvaluation(cast(1.0 / 0.0, TimestampType), null)
-    checkEvaluation(cast(Float.NaN, TimestampType), null)
-    checkEvaluation(cast(1.0f / 0.0f, TimestampType), null)
-  }
-
   test("cast from array") {
     val array = Literal.create(Seq("123", "true", "f", null),
       ArrayType(StringType, containsNull = true))
@@ -635,16 +591,20 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(cast("", BooleanType), null)
   }
 
+  protected def checkInvalidCastFromNumericType(to: DataType): Unit = {
+    assert(cast(1.toByte, to).checkInputDataTypes().isFailure)
+    assert(cast(1.toShort, to).checkInputDataTypes().isFailure)
+    assert(cast(1, to).checkInputDataTypes().isFailure)
+    assert(cast(1L, to).checkInputDataTypes().isFailure)
+    assert(cast(1.0.toFloat, to).checkInputDataTypes().isFailure)
+    assert(cast(1.0, to).checkInputDataTypes().isFailure)
+  }
+
   test("SPARK-16729 type checking for casting to date type") {
     assert(cast("1234", DateType).checkInputDataTypes().isSuccess)
     assert(cast(new Timestamp(1), DateType).checkInputDataTypes().isSuccess)
     assert(cast(false, DateType).checkInputDataTypes().isFailure)
-    assert(cast(1.toByte, DateType).checkInputDataTypes().isFailure)
-    assert(cast(1.toShort, DateType).checkInputDataTypes().isFailure)
-    assert(cast(1, DateType).checkInputDataTypes().isFailure)
-    assert(cast(1L, DateType).checkInputDataTypes().isFailure)
-    assert(cast(1.0.toFloat, DateType).checkInputDataTypes().isFailure)
-    assert(cast(1.0, DateType).checkInputDataTypes().isFailure)
+    checkInvalidCastFromNumericType(DateType)
   }
 
   test("SPARK-20302 cast with same structure") {
@@ -686,117 +646,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     assert(ctx.inlinedMutableStates.length == 0)
   }
 
-  test("SPARK-22825 Cast array to string") {
-    val ret1 = cast(Literal.create(Array(1, 2, 3, 4, 5)), StringType)
-    checkEvaluation(ret1, "[1, 2, 3, 4, 5]")
-    val ret2 = cast(Literal.create(Array("ab", "cde", "f")), StringType)
-    checkEvaluation(ret2, "[ab, cde, f]")
-    Seq(false, true).foreach { omitNull =>
-      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> omitNull.toString) {
-        val ret3 = cast(Literal.create(Array("ab", null, "c")), StringType)
-        checkEvaluation(ret3, s"[ab,${if (omitNull) "" else " null"}, c]")
-      }
-    }
-    val ret4 =
-      cast(Literal.create(Array("ab".getBytes, "cde".getBytes, "f".getBytes)), StringType)
-    checkEvaluation(ret4, "[ab, cde, f]")
-    val ret5 = cast(
-      Literal.create(Array("2014-12-03", "2014-12-04", "2014-12-06").map(Date.valueOf)),
-      StringType)
-    checkEvaluation(ret5, "[2014-12-03, 2014-12-04, 2014-12-06]")
-    val ret6 = cast(
-      Literal.create(Array("2014-12-03 13:01:00", "2014-12-04 15:05:00")
-        .map(Timestamp.valueOf)),
-      StringType)
-    checkEvaluation(ret6, "[2014-12-03 13:01:00, 2014-12-04 15:05:00]")
-    val ret7 = cast(Literal.create(Array(Array(1, 2, 3), Array(4, 5))), StringType)
-    checkEvaluation(ret7, "[[1, 2, 3], [4, 5]]")
-    val ret8 = cast(
-      Literal.create(Array(Array(Array("a"), Array("b", "c")), Array(Array("d")))),
-      StringType)
-    checkEvaluation(ret8, "[[[a], [b, c]], [[d]]]")
-  }
-
-  test("SPARK-33291: Cast array with null elements to string") {
-    Seq(false, true).foreach { omitNull =>
-      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> omitNull.toString) {
-        val ret1 = cast(Literal.create(Array(null, null)), StringType)
-        checkEvaluation(
-          ret1,
-          s"[${if (omitNull) "" else "null"},${if (omitNull) "" else " null"}]")
-      }
-    }
-  }
-
-  test("SPARK-22973 Cast map to string") {
-    Seq(
-      false -> ("{", "}"),
-      true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
-      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) {
-        val ret1 = cast(Literal.create(Map(1 -> "a", 2 -> "b", 3 -> "c")), StringType)
-        checkEvaluation(ret1, s"${lb}1 -> a, 2 -> b, 3 -> c$rb")
-        val ret2 = cast(
-          Literal.create(Map("1" -> "a".getBytes, "2" -> null, "3" -> "c".getBytes)),
-          StringType)
-        checkEvaluation(ret2, s"${lb}1 -> a, 2 ->${if (legacyCast) "" else " null"}, 3 -> c$rb")
-        val ret3 = cast(
-          Literal.create(Map(
-            1 -> Date.valueOf("2014-12-03"),
-            2 -> Date.valueOf("2014-12-04"),
-            3 -> Date.valueOf("2014-12-05"))),
-          StringType)
-        checkEvaluation(ret3, s"${lb}1 -> 2014-12-03, 2 -> 2014-12-04, 3 -> 2014-12-05$rb")
-        val ret4 = cast(
-          Literal.create(Map(
-            1 -> Timestamp.valueOf("2014-12-03 13:01:00"),
-            2 -> Timestamp.valueOf("2014-12-04 15:05:00"))),
-          StringType)
-        checkEvaluation(ret4, s"${lb}1 -> 2014-12-03 13:01:00, 2 -> 2014-12-04 15:05:00$rb")
-        val ret5 = cast(
-          Literal.create(Map(
-            1 -> Array(1, 2, 3),
-            2 -> Array(4, 5, 6))),
-          StringType)
-        checkEvaluation(ret5, s"${lb}1 -> [1, 2, 3], 2 -> [4, 5, 6]$rb")
-      }
-    }
-  }
-
-  test("SPARK-22981 Cast struct to string") {
-    Seq(
-      false -> ("{", "}"),
-      true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
-      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) {
-        val ret1 = cast(Literal.create((1, "a", 0.1)), StringType)
-        checkEvaluation(ret1, s"${lb}1, a, 0.1$rb")
-        val ret2 = cast(Literal.create(Tuple3[Int, String, String](1, null, "a")), StringType)
-        checkEvaluation(ret2, s"${lb}1,${if (legacyCast) "" else " null"}, a$rb")
-        val ret3 = cast(Literal.create(
-          (Date.valueOf("2014-12-03"), Timestamp.valueOf("2014-12-03 15:05:00"))), StringType)
-        checkEvaluation(ret3, s"${lb}2014-12-03, 2014-12-03 15:05:00$rb")
-        val ret4 = cast(Literal.create(((1, "a"), 5, 0.1)), StringType)
-        checkEvaluation(ret4, s"$lb${lb}1, a$rb, 5, 0.1$rb")
-        val ret5 = cast(Literal.create((Seq(1, 2, 3), "a", 0.1)), StringType)
-        checkEvaluation(ret5, s"$lb[1, 2, 3], a, 0.1$rb")
-        val ret6 = cast(Literal.create((1, Map(1 -> "a", 2 -> "b", 3 -> "c"))), StringType)
-        checkEvaluation(ret6, s"${lb}1, ${lb}1 -> a, 2 -> b, 3 -> c$rb$rb")
-      }
-    }
-  }
-
-  test("SPARK-33291: Cast struct with null elements to string") {
-    Seq(
-      false -> ("{", "}"),
-      true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
-      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) {
-        val ret1 = cast(Literal.create(Tuple2[String, String](null, null)), StringType)
-        checkEvaluation(
-          ret1,
-          s"$lb${if (legacyCast) "" else "null"},${if (legacyCast) "" else " null"}$rb")
-      }
-    }
-  }
-
   test("up-cast") {
     def isCastSafe(from: NumericType, to: NumericType): Boolean = (from, to) match {
       case (_, dt: DecimalType) => dt.isWiderThan(from)
@@ -869,20 +718,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
-  test("Throw exception on casting out-of-range value to decimal type") {
-    withSQLConf(SQLConf.ANSI_ENABLED.key -> requiredAnsiEnabledForOverflowTestCases.toString) {
-      checkExceptionInExpression[ArithmeticException](
-        cast(Literal("134.12"), DecimalType(3, 2)), "cannot be represented")
-      checkExceptionInExpression[ArithmeticException](
-        cast(Literal(Timestamp.valueOf("2019-07-25 22:04:36")), DecimalType(3, 2)),
-        "cannot be represented")
-      checkExceptionInExpression[ArithmeticException](
-        cast(Literal(BigDecimal(134.12)), DecimalType(3, 2)), "cannot be represented")
-      checkExceptionInExpression[ArithmeticException](
-        cast(Literal(134.12), DecimalType(3, 2)), "cannot be represented")
-    }
-  }
-
   test("Process Infinity, -Infinity, NaN in case insensitive manner") {
     Seq("inf", "+inf", "infinity", "+infiNity", " infinity ").foreach { value =>
       checkEvaluation(cast(value, FloatType), Float.PositiveInfinity)
@@ -903,14 +738,15 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
       checkEvaluation(cast(value, DoubleType), Double.NaN)
     }
   }
+}
+
+abstract class AnsiCastSuiteBase extends CastSuiteBase {
 
   private def testIntMaxAndMin(dt: DataType): Unit = {
     assert(Seq(IntegerType, ShortType, ByteType).contains(dt))
     Seq(Int.MaxValue + 1L, Int.MinValue - 1L).foreach { value =>
       checkExceptionInExpression[ArithmeticException](cast(value, dt), "overflow")
       checkExceptionInExpression[ArithmeticException](cast(Decimal(value.toString), dt), "overflow")
-      checkExceptionInExpression[ArithmeticException](
-        cast(Literal(value * MICROS_PER_SECOND, TimestampType), dt), "overflow")
       checkExceptionInExpression[ArithmeticException](
         cast(Literal(value * 1.5f, FloatType), dt), "overflow")
       checkExceptionInExpression[ArithmeticException](
@@ -930,151 +766,219 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
-  test("Throw exception on casting out-of-range value to byte type") {
-    withSQLConf(SQLConf.ANSI_ENABLED.key -> requiredAnsiEnabledForOverflowTestCases.toString) {
-      testIntMaxAndMin(ByteType)
-      Seq(Byte.MaxValue + 1, Byte.MinValue - 1).foreach { value =>
-        checkExceptionInExpression[ArithmeticException](cast(value, ByteType), "overflow")
-        checkExceptionInExpression[ArithmeticException](
-          cast(Literal(value * MICROS_PER_SECOND, TimestampType), ByteType), "overflow")
-        checkExceptionInExpression[ArithmeticException](
-          cast(Literal(value.toFloat, FloatType), ByteType), "overflow")
-        checkExceptionInExpression[ArithmeticException](
-          cast(Literal(value.toDouble, DoubleType), ByteType), "overflow")
-      }
+  test("ANSI mode: Throw exception on casting out-of-range value to byte type") {
+    testIntMaxAndMin(ByteType)
+    Seq(Byte.MaxValue + 1, Byte.MinValue - 1).foreach { value =>
+      checkExceptionInExpression[ArithmeticException](cast(value, ByteType), "overflow")
+      checkExceptionInExpression[ArithmeticException](
+        cast(Literal(value.toFloat, FloatType), ByteType), "overflow")
+      checkExceptionInExpression[ArithmeticException](
+        cast(Literal(value.toDouble, DoubleType), ByteType), "overflow")
+    }
 
-      Seq(Byte.MaxValue, 0.toByte, Byte.MinValue).foreach { value =>
-        checkEvaluation(cast(value, ByteType), value)
-        checkEvaluation(cast(value.toString, ByteType), value)
-        checkEvaluation(cast(Decimal(value.toString), ByteType), value)
-        checkEvaluation(cast(Literal(value * MICROS_PER_SECOND, TimestampType), ByteType), value)
-        checkEvaluation(cast(Literal(value.toInt, DateType), ByteType), null)
-        checkEvaluation(cast(Literal(value.toFloat, FloatType), ByteType), value)
-        checkEvaluation(cast(Literal(value.toDouble, DoubleType), ByteType), value)
-      }
+    Seq(Byte.MaxValue, 0.toByte, Byte.MinValue).foreach { value =>
+      checkEvaluation(cast(value, ByteType), value)
+      checkEvaluation(cast(value.toString, ByteType), value)
+      checkEvaluation(cast(Decimal(value.toString), ByteType), value)
+      checkEvaluation(cast(Literal(value.toFloat, FloatType), ByteType), value)
+      checkEvaluation(cast(Literal(value.toDouble, DoubleType), ByteType), value)
     }
   }
 
-  test("Throw exception on casting out-of-range value to short type") {
-    withSQLConf(SQLConf.ANSI_ENABLED.key -> requiredAnsiEnabledForOverflowTestCases.toString) {
-      testIntMaxAndMin(ShortType)
-      Seq(Short.MaxValue + 1, Short.MinValue - 1).foreach { value =>
-        checkExceptionInExpression[ArithmeticException](cast(value, ShortType), "overflow")
-        checkExceptionInExpression[ArithmeticException](
-          cast(Literal(value * MICROS_PER_SECOND, TimestampType), ShortType), "overflow")
-        checkExceptionInExpression[ArithmeticException](
-          cast(Literal(value.toFloat, FloatType), ShortType), "overflow")
-        checkExceptionInExpression[ArithmeticException](
-          cast(Literal(value.toDouble, DoubleType), ShortType), "overflow")
-      }
+  test("ANSI mode: Throw exception on casting out-of-range value to short type") {
+    testIntMaxAndMin(ShortType)
+    Seq(Short.MaxValue + 1, Short.MinValue - 1).foreach { value =>
+      checkExceptionInExpression[ArithmeticException](cast(value, ShortType), "overflow")
+      checkExceptionInExpression[ArithmeticException](
+        cast(Literal(value.toFloat, FloatType), ShortType), "overflow")
+      checkExceptionInExpression[ArithmeticException](
+        cast(Literal(value.toDouble, DoubleType), ShortType), "overflow")
+    }
 
-      Seq(Short.MaxValue, 0.toShort, Short.MinValue).foreach { value =>
-        checkEvaluation(cast(value, ShortType), value)
-        checkEvaluation(cast(value.toString, ShortType), value)
-        checkEvaluation(cast(Decimal(value.toString), ShortType), value)
-        checkEvaluation(cast(Literal(value * MICROS_PER_SECOND, TimestampType), ShortType), value)
-        checkEvaluation(cast(Literal(value.toInt, DateType), ShortType), null)
-        checkEvaluation(cast(Literal(value.toFloat, FloatType), ShortType), value)
-        checkEvaluation(cast(Literal(value.toDouble, DoubleType), ShortType), value)
-      }
+    Seq(Short.MaxValue, 0.toShort, Short.MinValue).foreach { value =>
+      checkEvaluation(cast(value, ShortType), value)
+      checkEvaluation(cast(value.toString, ShortType), value)
+      checkEvaluation(cast(Decimal(value.toString), ShortType), value)
+      checkEvaluation(cast(Literal(value.toFloat, FloatType), ShortType), value)
+      checkEvaluation(cast(Literal(value.toDouble, DoubleType), ShortType), value)
     }
   }
 
-  test("Throw exception on casting out-of-range value to int type") {
-    withSQLConf(SQLConf.ANSI_ENABLED.key -> requiredAnsiEnabledForOverflowTestCases.toString) {
-      testIntMaxAndMin(IntegerType)
-      testLongMaxAndMin(IntegerType)
+  test("ANSI mode: Throw exception on casting out-of-range value to int type") {
+    testIntMaxAndMin(IntegerType)
+    testLongMaxAndMin(IntegerType)
 
-      Seq(Int.MaxValue, 0, Int.MinValue).foreach { value =>
-        checkEvaluation(cast(value, IntegerType), value)
-        checkEvaluation(cast(value.toString, IntegerType), value)
-        checkEvaluation(cast(Decimal(value.toString), IntegerType), value)
-        checkEvaluation(cast(Literal(value * MICROS_PER_SECOND, TimestampType), IntegerType), value)
-        checkEvaluation(cast(Literal(value * 1.0, DoubleType), IntegerType), value)
-      }
-      checkEvaluation(cast(Int.MaxValue + 0.9D, IntegerType), Int.MaxValue)
-      checkEvaluation(cast(Int.MinValue - 0.9D, IntegerType), Int.MinValue)
+    Seq(Int.MaxValue, 0, Int.MinValue).foreach { value =>
+      checkEvaluation(cast(value, IntegerType), value)
+      checkEvaluation(cast(value.toString, IntegerType), value)
+      checkEvaluation(cast(Decimal(value.toString), IntegerType), value)
+      checkEvaluation(cast(Literal(value * 1.0, DoubleType), IntegerType), value)
     }
+    checkEvaluation(cast(Int.MaxValue + 0.9D, IntegerType), Int.MaxValue)
+    checkEvaluation(cast(Int.MinValue - 0.9D, IntegerType), Int.MinValue)
   }
 
-  test("Throw exception on casting out-of-range value to long type") {
-    withSQLConf(SQLConf.ANSI_ENABLED.key -> requiredAnsiEnabledForOverflowTestCases.toString) {
-      testLongMaxAndMin(LongType)
+  test("ANSI mode: Throw exception on casting out-of-range value to long type") {
+    testLongMaxAndMin(LongType)
 
-      Seq(Long.MaxValue, 0, Long.MinValue).foreach { value =>
-        checkEvaluation(cast(value, LongType), value)
-        checkEvaluation(cast(value.toString, LongType), value)
-        checkEvaluation(cast(Decimal(value.toString), LongType), value)
-        checkEvaluation(cast(Literal(value, TimestampType), LongType),
-          Math.floorDiv(value, MICROS_PER_SECOND))
-      }
-      checkEvaluation(cast(Long.MaxValue + 0.9F, LongType), Long.MaxValue)
-      checkEvaluation(cast(Long.MinValue - 0.9F, LongType), Long.MinValue)
-      checkEvaluation(cast(Long.MaxValue + 0.9D, LongType), Long.MaxValue)
-      checkEvaluation(cast(Long.MinValue - 0.9D, LongType), Long.MinValue)
+    Seq(Long.MaxValue, 0, Long.MinValue).foreach { value =>
+      checkEvaluation(cast(value, LongType), value)
+      checkEvaluation(cast(value.toString, LongType), value)
+      checkEvaluation(cast(Decimal(value.toString), LongType), value)
     }
+    checkEvaluation(cast(Long.MaxValue + 0.9F, LongType), Long.MaxValue)
+    checkEvaluation(cast(Long.MinValue - 0.9F, LongType), Long.MinValue)
+    checkEvaluation(cast(Long.MaxValue + 0.9D, LongType), Long.MaxValue)
+    checkEvaluation(cast(Long.MinValue - 0.9D, LongType), Long.MinValue)
   }
-}
 
-/**
- * Test suite for data type casting expression [[Cast]].
- */
-class CastSuite extends CastSuiteBase {
-  // It is required to set SQLConf.ANSI_ENABLED as true for testing numeric overflow.
-  override protected def requiredAnsiEnabledForOverflowTestCases: Boolean = true
+  test("ANSI mode: Throw exception on casting out-of-range value to decimal type") {
+    checkExceptionInExpression[ArithmeticException](
+      cast(Literal("134.12"), DecimalType(3, 2)), "cannot be represented")
+    checkExceptionInExpression[ArithmeticException](
+      cast(Literal(BigDecimal(134.12)), DecimalType(3, 2)), "cannot be represented")
+    checkExceptionInExpression[ArithmeticException](
+      cast(Literal(134.12), DecimalType(3, 2)), "cannot be represented")
+  }
 
-  override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase = {
-    v match {
-      case lit: Expression => Cast(lit, targetType, timeZoneId)
-      case _ => Cast(Literal(v), targetType, timeZoneId)
+  test("ANSI mode: disallow type conversions between Numeric types and Timestamp type") {
+    import DataTypeTestUtils.numericTypes
+    checkInvalidCastFromNumericType(TimestampType)
+    val timestampLiteral = Literal(1L, TimestampType)
+    numericTypes.foreach { numericType =>
+      assert(cast(timestampLiteral, numericType).checkInputDataTypes().isFailure)
     }
   }
 
-  test("cast from int") {
-    checkCast(0, false)
-    checkCast(1, true)
-    checkCast(-5, true)
-    checkCast(1, 1.toByte)
-    checkCast(1, 1.toShort)
-    checkCast(1, 1)
-    checkCast(1, 1.toLong)
-    checkCast(1, 1.0f)
-    checkCast(1, 1.0)
-    checkCast(123, "123")
+  test("ANSI mode: disallow type conversions between Numeric types and Date type") {
+    import DataTypeTestUtils.numericTypes
+    checkInvalidCastFromNumericType(DateType)
+    val dateLiteral = Literal(1, DateType)
+    numericTypes.foreach { numericType =>
+      assert(cast(dateLiteral, numericType).checkInputDataTypes().isFailure)
+    }
+  }
 
-    checkEvaluation(cast(123, DecimalType.USER_DEFAULT), Decimal(123))
-    checkEvaluation(cast(123, DecimalType(3, 0)), Decimal(123))
-    checkEvaluation(cast(123, DecimalType(3, 1)), null)
-    checkEvaluation(cast(123, DecimalType(2, 0)), null)
+  test("ANSI mode: disallow type conversions between Numeric types and Binary type") {
+    import DataTypeTestUtils.numericTypes
+    checkInvalidCastFromNumericType(BinaryType)
+    val binaryLiteral = Literal(new Array[Byte](1.toByte), BinaryType)
+    numericTypes.foreach { numericType =>
+      assert(cast(binaryLiteral, numericType).checkInputDataTypes().isFailure)
+    }
   }
 
-  test("cast from long") {
-    checkCast(0L, false)
-    checkCast(1L, true)
-    checkCast(-5L, true)
-    checkCast(1L, 1.toByte)
-    checkCast(1L, 1.toShort)
-    checkCast(1L, 1)
-    checkCast(1L, 1.toLong)
-    checkCast(1L, 1.0f)
-    checkCast(1L, 1.0)
-    checkCast(123L, "123")
+  test("ANSI mode: disallow type conversions between Datatime types and Boolean types") {
+    val timestampLiteral = Literal(1L, TimestampType)
+    assert(cast(timestampLiteral, BooleanType).checkInputDataTypes().isFailure)
+    val dateLiteral = Literal(1, DateType)
+    assert(cast(dateLiteral, BooleanType).checkInputDataTypes().isFailure)
 
-    checkEvaluation(cast(123L, DecimalType.USER_DEFAULT), Decimal(123))
-    checkEvaluation(cast(123L, DecimalType(3, 0)), Decimal(123))
-    checkEvaluation(cast(123L, DecimalType(3, 1)), null)
+    val booleanLiteral = Literal(true, BooleanType)
+    assert(cast(booleanLiteral, TimestampType).checkInputDataTypes().isFailure)
+    assert(cast(booleanLiteral, DateType).checkInputDataTypes().isFailure)
+  }
 
-    checkEvaluation(cast(123L, DecimalType(2, 0)), null)
+  test("ANSI mode: disallow casting complex types as String type") {
+    assert(cast(Literal.create(Array(1, 2, 3, 4, 5)), StringType).checkInputDataTypes().isFailure)
+    assert(cast(Literal.create(Map(1 -> "a")), StringType).checkInputDataTypes().isFailure)
+    assert(cast(Literal.create((1, "a", 0.1)), StringType).checkInputDataTypes().isFailure)
   }
 
-  test("cast from int 2") {
-    checkEvaluation(cast(1, LongType), 1.toLong)
+  test("cast from invalid string to numeric should throw NumberFormatException") {
+    // cast to IntegerType
+    Seq(IntegerType, ShortType, ByteType, LongType).foreach { dataType =>
+      val array = Literal.create(Seq("123", "true", "f", null),
+        ArrayType(StringType, containsNull = true))
+      checkExceptionInExpression[NumberFormatException](
+        cast(array, ArrayType(dataType, containsNull = true)),
+        "invalid input syntax for type numeric: true")
+      checkExceptionInExpression[NumberFormatException](
+        cast("string", dataType), "invalid input syntax for type numeric: string")
+      checkExceptionInExpression[NumberFormatException](
+        cast("123-string", dataType), "invalid input syntax for type numeric: 123-string")
+      checkExceptionInExpression[NumberFormatException](
+        cast("2020-07-19", dataType), "invalid input syntax for type numeric: 2020-07-19")
+      checkExceptionInExpression[NumberFormatException](
+        cast("1.23", dataType), "invalid input syntax for type numeric: 1.23")
+    }
+
+    Seq(DoubleType, FloatType, DecimalType.USER_DEFAULT).foreach { dataType =>
+      checkExceptionInExpression[NumberFormatException](
+        cast("string", dataType), "invalid input syntax for type numeric: string")
+      checkExceptionInExpression[NumberFormatException](
+        cast("123.000.00", dataType), "invalid input syntax for type numeric: 123.000.00")
+      checkExceptionInExpression[NumberFormatException](
+        cast("abc.com", dataType), "invalid input syntax for type numeric: abc.com")
+    }
+  }
+
+  test("Fast fail for cast string type to decimal type in ansi mode") {
+    checkEvaluation(cast("12345678901234567890123456789012345678", DecimalType(38, 0)),
+      Decimal("12345678901234567890123456789012345678"))
+    checkExceptionInExpression[ArithmeticException](
+      cast("123456789012345678901234567890123456789", DecimalType(38, 0)),
+      "out of decimal type range")
+    checkExceptionInExpression[ArithmeticException](
+      cast("12345678901234567890123456789012345678", DecimalType(38, 1)),
+      "cannot be represented as Decimal(38, 1)")
 
+    checkEvaluation(cast("0.00000000000000000000000000000000000001", DecimalType(38, 0)),
+      Decimal("0"))
+    checkEvaluation(cast("0.00000000000000000000000000000000000000000001", DecimalType(38, 0)),
+      Decimal("0"))
+    checkEvaluation(cast("0.00000000000000000000000000000000000001", DecimalType(38, 18)),
+      Decimal("0E-18"))
+    checkEvaluation(cast("6E-120", DecimalType(38, 0)),
+      Decimal("0"))
+
+    checkEvaluation(cast("6E+37", DecimalType(38, 0)),
+      Decimal("60000000000000000000000000000000000000"))
+    checkExceptionInExpression[ArithmeticException](
+      cast("6E+38", DecimalType(38, 0)),
+      "out of decimal type range")
+    checkExceptionInExpression[ArithmeticException](
+      cast("6E+37", DecimalType(38, 1)),
+      "cannot be represented as Decimal(38, 1)")
+
+    checkExceptionInExpression[NumberFormatException](
+      cast("abcd", DecimalType(38, 1)),
+      "invalid input syntax for type numeric")
+  }
+}
+
+/**
+ * Test suite for data type casting expression [[Cast]].
+ */
+class CastSuite extends CastSuiteBase {
+
+  override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase = {
+    v match {
+      case lit: Expression => Cast(lit, targetType, timeZoneId)
+      case _ => Cast(Literal(v), targetType, timeZoneId)
+    }
+  }
+
+  test("null cast #2") {
+    import DataTypeTestUtils._
+
+    checkNullCast(DateType, BooleanType)
+    checkNullCast(TimestampType, BooleanType)
+    checkNullCast(BooleanType, TimestampType)
+    numericTypes.foreach(dt => checkNullCast(dt, TimestampType))
+    numericTypes.foreach(dt => checkNullCast(TimestampType, dt))
+    numericTypes.foreach(dt => checkNullCast(DateType, dt))
+  }
+
+  test("cast from long #2") {
+    checkEvaluation(cast(123L, DecimalType(3, 1)), null)
+    checkEvaluation(cast(123L, DecimalType(2, 0)), null)
+  }
+
+  test("cast from int #2") {
     checkEvaluation(cast(cast(1000, TimestampType), LongType), 1000.toLong)
     checkEvaluation(cast(cast(-1200, TimestampType), LongType), -1200.toLong)
 
-    checkEvaluation(cast(123, DecimalType.USER_DEFAULT), Decimal(123))
-    checkEvaluation(cast(123, DecimalType(3, 0)), Decimal(123))
     checkEvaluation(cast(123, DecimalType(3, 1)), null)
     checkEvaluation(cast(123, DecimalType(2, 0)), null)
   }
@@ -1343,6 +1247,58 @@ class CastSuite extends CastSuiteBase {
     }
   }
 
+  test("cast from date") {
+    val d = Date.valueOf("1970-01-01")
+    checkEvaluation(cast(d, ShortType), null)
+    checkEvaluation(cast(d, IntegerType), null)
+    checkEvaluation(cast(d, LongType), null)
+    checkEvaluation(cast(d, FloatType), null)
+    checkEvaluation(cast(d, DoubleType), null)
+    checkEvaluation(cast(d, DecimalType.SYSTEM_DEFAULT), null)
+    checkEvaluation(cast(d, DecimalType(10, 2)), null)
+    checkEvaluation(cast(d, StringType), "1970-01-01")
+
+    checkEvaluation(
+      cast(cast(d, TimestampType, UTC_OPT), StringType, UTC_OPT),
+      "1970-01-01 00:00:00")
+  }
+
+  test("cast from timestamp") {
+    val millis = 15 * 1000 + 3
+    val seconds = millis * 1000 + 3
+    val ts = new Timestamp(millis)
+    val tss = new Timestamp(seconds)
+    checkEvaluation(cast(ts, ShortType), 15.toShort)
+    checkEvaluation(cast(ts, IntegerType), 15)
+    checkEvaluation(cast(ts, LongType), 15.toLong)
+    checkEvaluation(cast(ts, FloatType), 15.003f)
+    checkEvaluation(cast(ts, DoubleType), 15.003)
+
+    checkEvaluation(cast(cast(tss, ShortType), TimestampType),
+      fromJavaTimestamp(ts) * MILLIS_PER_SECOND)
+    checkEvaluation(cast(cast(tss, IntegerType), TimestampType),
+      fromJavaTimestamp(ts) * MILLIS_PER_SECOND)
+    checkEvaluation(cast(cast(tss, LongType), TimestampType),
+      fromJavaTimestamp(ts) * MILLIS_PER_SECOND)
+    checkEvaluation(
+      cast(cast(millis.toFloat / MILLIS_PER_SECOND, TimestampType), FloatType),
+      millis.toFloat / MILLIS_PER_SECOND)
+    checkEvaluation(
+      cast(cast(millis.toDouble / MILLIS_PER_SECOND, TimestampType), DoubleType),
+      millis.toDouble / MILLIS_PER_SECOND)
+    checkEvaluation(
+      cast(cast(Decimal(1), TimestampType), DecimalType.SYSTEM_DEFAULT),
+      Decimal(1))
+
+    // A test for higher precision than millis
+    checkEvaluation(cast(cast(0.000001, TimestampType), DoubleType), 0.000001)
+
+    checkEvaluation(cast(Double.NaN, TimestampType), null)
+    checkEvaluation(cast(1.0 / 0.0, TimestampType), null)
+    checkEvaluation(cast(Float.NaN, TimestampType), null)
+    checkEvaluation(cast(1.0f / 0.0f, TimestampType), null)
+  }
+
   test("cast a timestamp before the epoch 1970-01-01 00:00:00Z") {
     withDefaultTimeZone(UTC) {
       val negativeTs = Timestamp.valueOf("1900-05-05 18:34:56.1")
@@ -1396,93 +1352,199 @@ class CastSuite extends CastSuiteBase {
 
     checkEvaluation(cast("abcd", DecimalType(38, 1)), null)
   }
+
+  test("SPARK-22825 Cast array to string") {
+    val ret1 = cast(Literal.create(Array(1, 2, 3, 4, 5)), StringType)
+    checkEvaluation(ret1, "[1, 2, 3, 4, 5]")
+    val ret2 = cast(Literal.create(Array("ab", "cde", "f")), StringType)
+    checkEvaluation(ret2, "[ab, cde, f]")
+    Seq(false, true).foreach { omitNull =>
+      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> omitNull.toString) {
+        val ret3 = cast(Literal.create(Array("ab", null, "c")), StringType)
+        checkEvaluation(ret3, s"[ab,${if (omitNull) "" else " null"}, c]")
+      }
+    }
+    val ret4 =
+      cast(Literal.create(Array("ab".getBytes, "cde".getBytes, "f".getBytes)), StringType)
+    checkEvaluation(ret4, "[ab, cde, f]")
+    val ret5 = cast(
+      Literal.create(Array("2014-12-03", "2014-12-04", "2014-12-06").map(Date.valueOf)),
+      StringType)
+    checkEvaluation(ret5, "[2014-12-03, 2014-12-04, 2014-12-06]")
+    val ret6 = cast(
+      Literal.create(Array("2014-12-03 13:01:00", "2014-12-04 15:05:00")
+        .map(Timestamp.valueOf)),
+      StringType)
+    checkEvaluation(ret6, "[2014-12-03 13:01:00, 2014-12-04 15:05:00]")
+    val ret7 = cast(Literal.create(Array(Array(1, 2, 3), Array(4, 5))), StringType)
+    checkEvaluation(ret7, "[[1, 2, 3], [4, 5]]")
+    val ret8 = cast(
+      Literal.create(Array(Array(Array("a"), Array("b", "c")), Array(Array("d")))),
+      StringType)
+    checkEvaluation(ret8, "[[[a], [b, c]], [[d]]]")
+  }
+
+  test("SPARK-33291: Cast array with null elements to string") {
+    Seq(false, true).foreach { omitNull =>
+      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> omitNull.toString) {
+        val ret1 = cast(Literal.create(Array(null, null)), StringType)
+        checkEvaluation(
+          ret1,
+          s"[${if (omitNull) "" else "null"},${if (omitNull) "" else " null"}]")
+      }
+    }
+  }
+
+  test("SPARK-22973 Cast map to string") {
+    Seq(
+      false -> ("{", "}"),
+      true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
+      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) {
+        val ret1 = cast(Literal.create(Map(1 -> "a", 2 -> "b", 3 -> "c")), StringType)
+        checkEvaluation(ret1, s"${lb}1 -> a, 2 -> b, 3 -> c$rb")
+        val ret2 = cast(
+          Literal.create(Map("1" -> "a".getBytes, "2" -> null, "3" -> "c".getBytes)),
+          StringType)
+        checkEvaluation(ret2, s"${lb}1 -> a, 2 ->${if (legacyCast) "" else " null"}, 3 -> c$rb")
+        val ret3 = cast(
+          Literal.create(Map(
+            1 -> Date.valueOf("2014-12-03"),
+            2 -> Date.valueOf("2014-12-04"),
+            3 -> Date.valueOf("2014-12-05"))),
+          StringType)
+        checkEvaluation(ret3, s"${lb}1 -> 2014-12-03, 2 -> 2014-12-04, 3 -> 2014-12-05$rb")
+        val ret4 = cast(
+          Literal.create(Map(
+            1 -> Timestamp.valueOf("2014-12-03 13:01:00"),
+            2 -> Timestamp.valueOf("2014-12-04 15:05:00"))),
+          StringType)
+        checkEvaluation(ret4, s"${lb}1 -> 2014-12-03 13:01:00, 2 -> 2014-12-04 15:05:00$rb")
+        val ret5 = cast(
+          Literal.create(Map(
+            1 -> Array(1, 2, 3),
+            2 -> Array(4, 5, 6))),
+          StringType)
+        checkEvaluation(ret5, s"${lb}1 -> [1, 2, 3], 2 -> [4, 5, 6]$rb")
+      }
+    }
+  }
+
+  test("SPARK-22981 Cast struct to string") {
+    Seq(
+      false -> ("{", "}"),
+      true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
+      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) {
+        val ret1 = cast(Literal.create((1, "a", 0.1)), StringType)
+        checkEvaluation(ret1, s"${lb}1, a, 0.1$rb")
+        val ret2 = cast(Literal.create(Tuple3[Int, String, String](1, null, "a")), StringType)
+        checkEvaluation(ret2, s"${lb}1,${if (legacyCast) "" else " null"}, a$rb")
+        val ret3 = cast(Literal.create(
+          (Date.valueOf("2014-12-03"), Timestamp.valueOf("2014-12-03 15:05:00"))), StringType)
+        checkEvaluation(ret3, s"${lb}2014-12-03, 2014-12-03 15:05:00$rb")
+        val ret4 = cast(Literal.create(((1, "a"), 5, 0.1)), StringType)
+        checkEvaluation(ret4, s"$lb${lb}1, a$rb, 5, 0.1$rb")
+        val ret5 = cast(Literal.create((Seq(1, 2, 3), "a", 0.1)), StringType)
+        checkEvaluation(ret5, s"$lb[1, 2, 3], a, 0.1$rb")
+        val ret6 = cast(Literal.create((1, Map(1 -> "a", 2 -> "b", 3 -> "c"))), StringType)
+        checkEvaluation(ret6, s"${lb}1, ${lb}1 -> a, 2 -> b, 3 -> c$rb$rb")
+      }
+    }
+  }
+
+  test("SPARK-33291: Cast struct with null elements to string") {
+    Seq(
+      false -> ("{", "}"),
+      true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
+      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) {
+        val ret1 = cast(Literal.create(Tuple2[String, String](null, null)), StringType)
+        checkEvaluation(
+          ret1,
+          s"$lb${if (legacyCast) "" else "null"},${if (legacyCast) "" else " null"}$rb")
+      }
+    }
+  }
+
+  test("data type casting II") {
+    checkEvaluation(
+      cast(cast(cast(cast(cast(cast("5", ByteType), TimestampType),
+        DecimalType.SYSTEM_DEFAULT), LongType), StringType), ShortType),
+        5.toShort)
+      checkEvaluation(
+        cast(cast(cast(cast(cast(cast("5", TimestampType, UTC_OPT), ByteType),
+          DecimalType.SYSTEM_DEFAULT), LongType), StringType), ShortType),
+        null)
+      checkEvaluation(cast(cast(cast(cast(cast(cast("5", DecimalType.SYSTEM_DEFAULT),
+        ByteType), TimestampType), LongType), StringType), ShortType),
+        5.toShort)
+  }
+
+  test("Cast from double II") {
+    checkEvaluation(cast(cast(1.toDouble, TimestampType), DoubleType), 1.toDouble)
+  }
 }
 
 /**
- * Test suite for data type casting expression [[AnsiCast]].
+ * Test suite for data type casting expression [[Cast]] with ANSI mode disabled.
  */
-class AnsiCastSuite extends CastSuiteBase {
-  // It is not required to set SQLConf.ANSI_ENABLED as true for testing numeric overflow.
-  override protected def requiredAnsiEnabledForOverflowTestCases: Boolean = false
+class CastSuiteWithAnsiModeOn extends AnsiCastSuiteBase {
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    SQLConf.get.setConf(SQLConf.ANSI_ENABLED, true)
+  }
+
+  override def afterAll(): Unit = {
+    super.afterAll()
+    SQLConf.get.unsetConf(SQLConf.ANSI_ENABLED)
+  }
 
   override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase = {
     v match {
-      case lit: Expression => AnsiCast(lit, targetType, timeZoneId)
-      case _ => AnsiCast(Literal(v), targetType, timeZoneId)
+      case lit: Expression => Cast(lit, targetType, timeZoneId)
+      case _ => Cast(Literal(v), targetType, timeZoneId)
     }
   }
+}
 
-  test("cast from invalid string to numeric should throw NumberFormatException") {
-    // cast to IntegerType
-    Seq(IntegerType, ShortType, ByteType, LongType).foreach { dataType =>
-      val array = Literal.create(Seq("123", "true", "f", null),
-        ArrayType(StringType, containsNull = true))
-      checkExceptionInExpression[NumberFormatException](
-        cast(array, ArrayType(dataType, containsNull = true)),
-        "invalid input syntax for type numeric: true")
-      checkExceptionInExpression[NumberFormatException](
-        cast("string", dataType), "invalid input syntax for type numeric: string")
-      checkExceptionInExpression[NumberFormatException](
-        cast("123-string", dataType), "invalid input syntax for type numeric: 123-string")
-      checkExceptionInExpression[NumberFormatException](
-        cast("2020-07-19", dataType), "invalid input syntax for type numeric: 2020-07-19")
-      checkExceptionInExpression[NumberFormatException](
-        cast("1.23", dataType), "invalid input syntax for type numeric: 1.23")
-    }
+/**
+ * Test suite for data type casting expression [[AnsiCast]] with ANSI mode enabled.
+ */
+class AnsiCastSuiteWithAnsiModeOn extends AnsiCastSuiteBase {
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    SQLConf.get.setConf(SQLConf.ANSI_ENABLED, true)
+  }
 
-    Seq(DoubleType, FloatType, DecimalType.USER_DEFAULT).foreach { dataType =>
-      checkExceptionInExpression[NumberFormatException](
-        cast("string", dataType), "invalid input syntax for type numeric: string")
-      checkExceptionInExpression[NumberFormatException](
-        cast("123.000.00", dataType), "invalid input syntax for type numeric: 123.000.00")
-      checkExceptionInExpression[NumberFormatException](
-        cast("abc.com", dataType), "invalid input syntax for type numeric: abc.com")
-    }
+  override def afterAll(): Unit = {
+    super.afterAll()
+    SQLConf.get.unsetConf(SQLConf.ANSI_ENABLED)
   }
 
-  test("cast a timestamp before the epoch 1970-01-01 00:00:00Z") {
-    def errMsg(t: String): String = s"Casting -2198208303900000 to $t causes overflow"
-    withDefaultTimeZone(UTC) {
-      val negativeTs = Timestamp.valueOf("1900-05-05 18:34:56.1")
-      assert(negativeTs.getTime < 0)
-      val expectedSecs = Math.floorDiv(negativeTs.getTime, MILLIS_PER_SECOND)
-      checkExceptionInExpression[ArithmeticException](cast(negativeTs, ByteType), errMsg("byte"))
-      checkExceptionInExpression[ArithmeticException](cast(negativeTs, ShortType), errMsg("short"))
-      checkExceptionInExpression[ArithmeticException](cast(negativeTs, IntegerType), errMsg("int"))
-      checkEvaluation(cast(negativeTs, LongType), expectedSecs)
+  override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase = {
+    v match {
+      case lit: Expression => AnsiCast(lit, targetType, timeZoneId)
+      case _ => AnsiCast(Literal(v), targetType, timeZoneId)
     }
   }
+}
 
-  test("Fast fail for cast string type to decimal type in ansi mode") {
-    checkEvaluation(cast("12345678901234567890123456789012345678", DecimalType(38, 0)),
-      Decimal("12345678901234567890123456789012345678"))
-    checkExceptionInExpression[ArithmeticException](
-      cast("123456789012345678901234567890123456789", DecimalType(38, 0)),
-      "out of decimal type range")
-    checkExceptionInExpression[ArithmeticException](
-      cast("12345678901234567890123456789012345678", DecimalType(38, 1)),
-      "cannot be represented as Decimal(38, 1)")
-
-    checkEvaluation(cast("0.00000000000000000000000000000000000001", DecimalType(38, 0)),
-      Decimal("0"))
-    checkEvaluation(cast("0.00000000000000000000000000000000000000000001", DecimalType(38, 0)),
-      Decimal("0"))
-    checkEvaluation(cast("0.00000000000000000000000000000000000001", DecimalType(38, 18)),
-      Decimal("0E-18"))
-    checkEvaluation(cast("6E-120", DecimalType(38, 0)),
-      Decimal("0"))
+/**
+ * Test suite for data type casting expression [[AnsiCast]] with ANSI mode disabled.
+ */
+class AnsiCastSuiteWithAnsiModeOff extends AnsiCastSuiteBase {
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    SQLConf.get.setConf(SQLConf.ANSI_ENABLED, false)
+  }
 
-    checkEvaluation(cast("6E+37", DecimalType(38, 0)),
-      Decimal("60000000000000000000000000000000000000"))
-    checkExceptionInExpression[ArithmeticException](
-      cast("6E+38", DecimalType(38, 0)),
-      "out of decimal type range")
-    checkExceptionInExpression[ArithmeticException](
-      cast("6E+37", DecimalType(38, 1)),
-      "cannot be represented as Decimal(38, 1)")
+  override def afterAll(): Unit = {
+    super.afterAll()
+    SQLConf.get.unsetConf(SQLConf.ANSI_ENABLED)
+  }
 
-    checkExceptionInExpression[NumberFormatException](
-      cast("abcd", DecimalType(38, 1)),
-      "invalid input syntax for type numeric")
+  override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase = {
+    v match {
+      case lit: Expression => AnsiCast(lit, targetType, timeZoneId)
+      case _ => AnsiCast(Literal(v), targetType, timeZoneId)
+    }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
index 6ee88c9eaef86..095894b9fffac 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
@@ -1915,4 +1915,19 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
       }
     }
   }
+
+  test("SPARK-33460: element_at NoSuchElementException") {
+    Seq(true, false).foreach { ansiEnabled =>
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiEnabled.toString) {
+        val map = Literal.create(Map(1 -> "a", 2 -> "b"), MapType(IntegerType, StringType))
+        val expr: Expression = ElementAt(map, Literal(5))
+        if (ansiEnabled) {
+          val errMsg = "Key 5 does not exist."
+          checkExceptionInExpression[Exception](expr, errMsg)
+        } else {
+          checkEvaluation(expr, null)
+        }
+      }
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
index 67ab2071de037..3d6f6937e780b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
@@ -85,6 +85,23 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
+  test("SPARK-33460: GetMapValue NoSuchElementException") {
+    Seq(true, false).foreach { ansiEnabled =>
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiEnabled.toString) {
+        val map = Literal.create(Map(1 -> "a", 2 -> "b"), MapType(IntegerType, StringType))
+
+        if (ansiEnabled) {
+          checkExceptionInExpression[Exception](
+            GetMapValue(map, Literal(5)),
+            "Key 5 does not exist."
+          )
+        } else {
+          checkEvaluation(GetMapValue(map, Literal(5)), null)
+        }
+      }
+    }
+  }
+
   test("SPARK-26637 handles GetArrayItem nullability correctly when input array size is constant") {
     // CreateArray case
     val a = AttributeReference("a", IntegerType, nullable = false)()
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala
index c31310bc54023..8f030b45e5d3e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala
@@ -80,4 +80,50 @@ class MutableProjectionSuite extends SparkFunSuite with ExpressionEvalHelper {
       assert(errMsg.contains("MutableProjection cannot use UnsafeRow for output data types:"))
     }
   }
+
+  test("SPARK-33473: subexpression elimination for interpreted MutableProjection") {
+    Seq("true", "false").foreach { enabled =>
+      withSQLConf(
+        SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> enabled,
+        SQLConf.CODEGEN_FACTORY_MODE.key -> CodegenObjectFactoryMode.NO_CODEGEN.toString) {
+        val one = BoundReference(0, DoubleType, true)
+        val two = BoundReference(1, DoubleType, true)
+
+        val mul = Multiply(one, two)
+        val mul2 = Multiply(mul, mul)
+        val sqrt = Sqrt(mul2)
+        val sum = Add(mul2, sqrt)
+
+        val proj = MutableProjection.create(Seq(sum))
+        val result = (d1: Double, d2: Double) =>
+          ((d1 * d2) * (d1 * d2)) + Math.sqrt((d1 * d2) * (d1 * d2))
+
+        val inputRows = Seq(
+          InternalRow.fromSeq(Seq(1.0, 2.0)),
+          InternalRow.fromSeq(Seq(2.0, 3.0)),
+          InternalRow.fromSeq(Seq(1.0, null)),
+          InternalRow.fromSeq(Seq(null, 2.0)),
+          InternalRow.fromSeq(Seq(3.0, 4.0)),
+          InternalRow.fromSeq(Seq(null, null))
+        )
+        val expectedResults = Seq(
+          result(1.0, 2.0),
+          result(2.0, 3.0),
+          null,
+          null,
+          result(3.0, 4.0),
+          null
+        )
+
+        inputRows.zip(expectedResults).foreach { case (inputRow, expected) =>
+          val projRow = proj.apply(inputRow)
+          if (expected != null) {
+            assert(projRow.getDouble(0) == expected)
+          } else {
+            assert(projRow.isNullAt(0))
+          }
+        }
+      }
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
index ff33324c3bb18..bc2b93e5390da 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
@@ -28,7 +28,7 @@ import scala.util.Random
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
 import org.apache.spark.sql.{RandomDataGenerator, Row}
-import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, JavaTypeInference, ScalaReflection}
+import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.ScroogeLikeExample
 import org.apache.spark.sql.catalyst.analysis.{ResolveTimeZone, SimpleAnalyzer, UnresolvedDeserializer}
 import org.apache.spark.sql.catalyst.dsl.expressions._
@@ -37,9 +37,8 @@ import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjectio
 import org.apache.spark.sql.catalyst.expressions.objects._
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project}
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, DateTimeUtils, GenericArrayData, IntervalUtils}
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
+import org.apache.spark.unsafe.types.UTF8String
 
 class InvokeTargetClass extends Serializable {
   def filterInt(e: Any): Any = e.asInstanceOf[Int] > 0
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
index 77a32a735f76d..cc5ab5dc7b4e0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
@@ -48,6 +48,30 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(mkExpr(regex), expected, create_row(input)) // check row input
   }
 
+  test("LIKE ALL") {
+    checkEvaluation(Literal.create(null, StringType).likeAll("%foo%", "%oo"), null)
+    checkEvaluation(Literal.create("foo", StringType).likeAll("%foo%", "%oo"), true)
+    checkEvaluation(Literal.create("foo", StringType).likeAll("%foo%", "%bar%"), false)
+    checkEvaluation(Literal.create("foo", StringType)
+      .likeAll("%foo%", Literal.create(null, StringType)), null)
+    checkEvaluation(Literal.create("foo", StringType)
+      .likeAll(Literal.create(null, StringType), "%foo%"), null)
+    checkEvaluation(Literal.create("foo", StringType)
+      .likeAll("%feo%", Literal.create(null, StringType)), false)
+    checkEvaluation(Literal.create("foo", StringType)
+      .likeAll(Literal.create(null, StringType), "%feo%"), false)
+    checkEvaluation(Literal.create("foo", StringType).notLikeAll("tee", "%yoo%"), true)
+    checkEvaluation(Literal.create("foo", StringType).notLikeAll("%oo%", "%yoo%"), false)
+    checkEvaluation(Literal.create("foo", StringType)
+      .notLikeAll("%foo%", Literal.create(null, StringType)), false)
+    checkEvaluation(Literal.create("foo", StringType)
+      .notLikeAll(Literal.create(null, StringType), "%foo%"), false)
+    checkEvaluation(Literal.create("foo", StringType)
+      .notLikeAll("%yoo%", Literal.create(null, StringType)), null)
+    checkEvaluation(Literal.create("foo", StringType)
+      .notLikeAll(Literal.create(null, StringType), "%yoo%"), null)
+  }
+
   test("LIKE Pattern") {
 
     // null handling
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala
new file mode 100644
index 0000000000000..64b619ca7766b
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.SparkFunSuite
+
+class SubExprEvaluationRuntimeSuite extends SparkFunSuite {
+
+  test("Evaluate ExpressionProxy should create cached result") {
+    val runtime = new SubExprEvaluationRuntime(1)
+    val proxy = ExpressionProxy(Literal(1), 0, runtime)
+    assert(runtime.cache.size() == 0)
+    proxy.eval()
+    assert(runtime.cache.size() == 1)
+    assert(runtime.cache.get(proxy) == ResultProxy(1))
+  }
+
+  test("SubExprEvaluationRuntime cannot exceed configured max entries") {
+    val runtime = new SubExprEvaluationRuntime(2)
+    assert(runtime.cache.size() == 0)
+
+    val proxy1 = ExpressionProxy(Literal(1), 0, runtime)
+    proxy1.eval()
+    assert(runtime.cache.size() == 1)
+    assert(runtime.cache.get(proxy1) == ResultProxy(1))
+
+    val proxy2 = ExpressionProxy(Literal(2), 1, runtime)
+    proxy2.eval()
+    assert(runtime.cache.size() == 2)
+    assert(runtime.cache.get(proxy2) == ResultProxy(2))
+
+    val proxy3 = ExpressionProxy(Literal(3), 2, runtime)
+    proxy3.eval()
+    assert(runtime.cache.size() == 2)
+    assert(runtime.cache.get(proxy3) == ResultProxy(3))
+  }
+
+  test("setInput should empty cached result") {
+    val runtime = new SubExprEvaluationRuntime(2)
+    val proxy1 = ExpressionProxy(Literal(1), 0, runtime)
+    assert(runtime.cache.size() == 0)
+    proxy1.eval()
+    assert(runtime.cache.size() == 1)
+    assert(runtime.cache.get(proxy1) == ResultProxy(1))
+
+    val proxy2 = ExpressionProxy(Literal(2), 1, runtime)
+    proxy2.eval()
+    assert(runtime.cache.size() == 2)
+    assert(runtime.cache.get(proxy2) == ResultProxy(2))
+
+    runtime.setInput()
+    assert(runtime.cache.size() == 0)
+  }
+
+  test("Wrap ExpressionProxy on subexpressions") {
+    val runtime = new SubExprEvaluationRuntime(1)
+
+    val one = Literal(1)
+    val two = Literal(2)
+    val mul = Multiply(one, two)
+    val mul2 = Multiply(mul, mul)
+    val sqrt = Sqrt(mul2)
+    val sum = Add(mul2, sqrt)
+
+    //  ( (one * two) * (one * two) ) + sqrt( (one * two) * (one * two) )
+    val proxyExpressions = runtime.proxyExpressions(Seq(sum))
+    val proxys = proxyExpressions.flatMap(_.collect {
+      case p: ExpressionProxy => p
+    })
+    // ( (one * two) * (one * two) )
+    assert(proxys.size == 2)
+    assert(proxys.forall(_.child == mul2))
+  }
+
+  test("ExpressionProxy won't be on non deterministic") {
+    val runtime = new SubExprEvaluationRuntime(1)
+
+    val sum = Add(Rand(0), Rand(0))
+    val proxys = runtime.proxyExpressions(Seq(sum, sum)).flatMap(_.collect {
+      case p: ExpressionProxy => p
+    })
+    assert(proxys.isEmpty)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentileSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentileSuite.scala
index 303fa137d8925..53e8ee9fbe715 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentileSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentileSuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
 import org.apache.spark.sql.catalyst.util.{ArrayData, QuantileSummaries}
 import org.apache.spark.sql.catalyst.util.QuantileSummaries.Stats
-import org.apache.spark.sql.types.{ArrayType, Decimal, DecimalType, DoubleType, FloatType, IntegerType, IntegralType, LongType}
+import org.apache.spark.sql.types.{ArrayType, Decimal, DecimalType, DoubleType, FloatType, IntegerType, IntegralType}
 import org.apache.spark.util.SizeEstimator
 
 class ApproximatePercentileSuite extends SparkFunSuite {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala
index 67e3bc69543e8..d660afb7f8a05 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions.codegen
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.types.{BooleanType, IntegerType}
+import org.apache.spark.sql.types.IntegerType
 
 class CodeBlockSuite extends SparkFunSuite {
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
index 4c9bcfe8f93a6..180665e653727 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
@@ -23,13 +23,14 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util.GenericArrayData
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
 /**
  * A test suite for generated projections
  */
-class GeneratedProjectionSuite extends SparkFunSuite {
+class GeneratedProjectionSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   test("generated projections on wider table") {
     val N = 1000
@@ -246,4 +247,50 @@ class GeneratedProjectionSuite extends SparkFunSuite {
     val row2 = mutableProj(result)
     assert(result === row2)
   }
+
+  test("SPARK-33473: subexpression elimination for interpreted SafeProjection") {
+    Seq("true", "false").foreach { enabled =>
+      withSQLConf(
+        SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> enabled,
+        SQLConf.CODEGEN_FACTORY_MODE.key -> CodegenObjectFactoryMode.NO_CODEGEN.toString) {
+        val one = BoundReference(0, DoubleType, true)
+        val two = BoundReference(1, DoubleType, true)
+
+        val mul = Multiply(one, two)
+        val mul2 = Multiply(mul, mul)
+        val sqrt = Sqrt(mul2)
+        val sum = Add(mul2, sqrt)
+
+        val proj = SafeProjection.create(Seq(sum))
+        val result = (d1: Double, d2: Double) =>
+          ((d1 * d2) * (d1 * d2)) + Math.sqrt((d1 * d2) * (d1 * d2))
+
+        val inputRows = Seq(
+          InternalRow.fromSeq(Seq(1.0, 2.0)),
+          InternalRow.fromSeq(Seq(2.0, 3.0)),
+          InternalRow.fromSeq(Seq(1.0, null)),
+          InternalRow.fromSeq(Seq(null, 2.0)),
+          InternalRow.fromSeq(Seq(3.0, 4.0)),
+          InternalRow.fromSeq(Seq(null, null))
+        )
+        val expectedResults = Seq(
+          result(1.0, 2.0),
+          result(2.0, 3.0),
+          null,
+          null,
+          result(3.0, 4.0),
+          null
+        )
+
+        inputRows.zip(expectedResults).foreach { case (inputRow, expected) =>
+          val projRow = proj.apply(inputRow)
+          if (expected != null) {
+            assert(projRow.getDouble(0) == expected)
+          } else {
+            assert(projRow.isNullAt(0))
+          }
+        }
+      }
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
index 03d75340e31e9..04dcf50e0c3c5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
@@ -26,7 +26,6 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.BooleanType
 
 class BooleanSimplificationSuite extends PlanTest with ExpressionEvalHelper with PredicateHelper {
@@ -188,25 +187,23 @@ class BooleanSimplificationSuite extends PlanTest with ExpressionEvalHelper with
     checkCondition(!(('e || 'f) && ('g || 'h)), (!'e && !'f) || (!'g && !'h))
   }
 
-  private val caseInsensitiveConf = new SQLConf().copy(SQLConf.CASE_SENSITIVE -> false)
-  private val caseInsensitiveAnalyzer = new Analyzer(
-    new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, caseInsensitiveConf),
-    caseInsensitiveConf)
+  private val analyzer = new Analyzer(
+    new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry))
 
   test("(a && b) || (a && c) => a && (b || c) when case insensitive") {
-    val plan = caseInsensitiveAnalyzer.execute(
+    val plan = analyzer.execute(
       testRelation.where(('a > 2 && 'b > 3) || ('A > 2 && 'b < 5)))
     val actual = Optimize.execute(plan)
-    val expected = caseInsensitiveAnalyzer.execute(
+    val expected = analyzer.execute(
       testRelation.where('a > 2 && ('b > 3 || 'b < 5)))
     comparePlans(actual, expected)
   }
 
   test("(a || b) && (a || c) => a || (b && c) when case insensitive") {
-    val plan = caseInsensitiveAnalyzer.execute(
+    val plan = analyzer.execute(
       testRelation.where(('a > 2 || 'b > 3) && ('A > 2 || 'b < 5)))
     val actual = Optimize.execute(plan)
-    val expected = caseInsensitiveAnalyzer.execute(
+    val expected = analyzer.execute(
       testRelation.where('a > 2 || ('b > 3 && 'b < 5)))
     comparePlans(actual, expected)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
index b190dd5a7c220..70f130f834c68 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
@@ -30,8 +30,8 @@ class CombiningLimitsSuite extends PlanTest {
       Batch("Column Pruning", FixedPoint(100),
         ColumnPruning,
         RemoveNoopOperators) ::
-      Batch("Combine Limit", FixedPoint(10),
-        CombineLimits) ::
+      Batch("Eliminate Limit", FixedPoint(10),
+        EliminateLimits) ::
       Batch("Constant Folding", FixedPoint(10),
         NullPropagation,
         ConstantFolding,
@@ -90,4 +90,31 @@ class CombiningLimitsSuite extends PlanTest {
 
     comparePlans(optimized, correctAnswer)
   }
+
+  test("SPARK-33442: Change Combine Limit to Eliminate limit using max row") {
+    // test child max row <= limit.
+    val query1 = testRelation.select().groupBy()(count(1)).limit(1).analyze
+    val optimized1 = Optimize.execute(query1)
+    val expected1 = testRelation.select().groupBy()(count(1)).analyze
+    comparePlans(optimized1, expected1)
+
+    // test child max row > limit.
+    val query2 = testRelation.select().groupBy()(count(1)).limit(0).analyze
+    val optimized2 = Optimize.execute(query2)
+    comparePlans(optimized2, query2)
+
+    // test child max row is none
+    val query3 = testRelation.select(Symbol("a")).limit(1).analyze
+    val optimized3 = Optimize.execute(query3)
+    comparePlans(optimized3, query3)
+
+    // test sort after limit
+    val query4 = testRelation.select().groupBy()(count(1))
+      .orderBy(count(1).asc).limit(1).analyze
+    val optimized4 = Optimize.execute(query4)
+    // the top project has been removed, so we need optimize expected too
+    val expected4 = Optimize.execute(
+      testRelation.select().groupBy()(count(1)).orderBy(count(1).asc).analyze)
+    comparePlans(optimized4, expected4)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala
index f40691bd1a038..51c751923e414 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.optimizer
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.plans.PlanTest
-import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Expand, LocalRelation, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 
 class EliminateDistinctSuite extends PlanTest {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsBeforeRepartitionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsBeforeRepartitionSuite.scala
index 9f031358611b1..82db174ad41b0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsBeforeRepartitionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsBeforeRepartitionSuite.scala
@@ -27,8 +27,8 @@ import org.apache.spark.sql.catalyst.rules.RuleExecutor
 
 class EliminateSortsBeforeRepartitionSuite extends PlanTest {
 
-  val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf)
-  val analyzer = new Analyzer(catalog, conf)
+  val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry)
+  val analyzer = new Analyzer(catalog)
 
   val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
   val anotherTestRelation = LocalRelation('d.int, 'e.int)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
index 11ec037c94f73..c518fdded2112 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
@@ -25,8 +25,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{BooleanType, IntegerType, StringType, TimestampType}
+import org.apache.spark.sql.types.{IntegerType, StringType}
 import org.apache.spark.unsafe.types.CalendarInterval
 
 class FilterPushdownSuite extends PlanTest {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala
index d993aee3d7518..e365e3300096e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala
@@ -33,7 +33,7 @@ class LimitPushdownSuite extends PlanTest {
         EliminateSubqueryAliases) ::
       Batch("Limit pushdown", FixedPoint(100),
         LimitPushDown,
-        CombineLimits,
+        EliminateLimits,
         ConstantFolding,
         BooleanSimplification) :: Nil
   }
@@ -74,7 +74,7 @@ class LimitPushdownSuite extends PlanTest {
       Union(testRelation.limit(1), testRelation2.select('d, 'e, 'f).limit(1)).limit(2)
     val unionOptimized = Optimize.execute(unionQuery.analyze)
     val unionCorrectAnswer =
-      Limit(2, Union(testRelation.limit(1), testRelation2.select('d, 'e, 'f).limit(1))).analyze
+      Union(testRelation.limit(1), testRelation2.select('d, 'e, 'f).limit(1)).analyze
     comparePlans(unionOptimized, unionCorrectAnswer)
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala
index 5998437f11f4d..42ab43242a16b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala
@@ -27,7 +27,6 @@ import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LocalRelation, LogicalPlan, OneRowRelation, Project}
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.connector.catalog.CatalogManager
-import org.apache.spark.sql.internal.SQLConf
 
 
 class OptimizerStructuralIntegrityCheckerSuite extends PlanTest {
@@ -45,9 +44,8 @@ class OptimizerStructuralIntegrityCheckerSuite extends PlanTest {
 
   object Optimize extends Optimizer(
     new CatalogManager(
-      new SQLConf(),
       FakeV2SessionCatalog,
-      new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, new SQLConf()))) {
+      new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry))) {
     val newBatch = Batch("OptimizeRuleBreakSI", Once, OptimizeRuleBreakSI)
     override def defaultBatches: Seq[Batch] = Seq(newBatch) ++ super.defaultBatches
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala
index 8785bc7cd36cb..17dfc7f3f18f7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.PlanTest
-import org.apache.spark.sql.catalyst.plans.logical.{Filter, LocalRelation, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 
 class PullupCorrelatedPredicatesSuite extends PlanTest {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregatesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregatesSuite.scala
index 8cb939e010c68..5d6abf516f288 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregatesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregatesSuite.scala
@@ -16,23 +16,15 @@
  */
 package org.apache.spark.sql.catalyst.optimizer
 
-import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry}
-import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.catalyst.expressions.aggregate.CollectSet
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Expand, LocalRelation, LogicalPlan}
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.SQLConf.{CASE_SENSITIVE, GROUP_BY_ORDINAL}
 import org.apache.spark.sql.types.{IntegerType, StringType}
 
 class RewriteDistinctAggregatesSuite extends PlanTest {
-  override val conf = new SQLConf().copy(CASE_SENSITIVE -> false, GROUP_BY_ORDINAL -> false)
-  val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf)
-  val analyzer = new Analyzer(catalog, conf)
-
   val nullInt = Literal(null, IntegerType)
   val nullString = Literal(null, StringType)
   val testRelation = LocalRelation('a.string, 'b.string, 'c.string, 'd.string, 'e.int)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyCastsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyCastsSuite.scala
index 0ccf8aea660b2..c981cee55d0fa 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyCastsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyCastsSuite.scala
@@ -17,10 +17,8 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import org.apache.spark.sql.catalyst.dsl._
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
index 9878969959bfd..dcd2fbbf00529 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
 import org.apache.spark.sql.catalyst.plans.PlanTest
-import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, OneRowRelation, Project, Range}
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.catalyst.util.GenericArrayData
 import org.apache.spark.sql.types._
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index cddc392cfa2d7..f93c0dcf59f4c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -21,12 +21,11 @@ import java.util.Locale
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedNamespace, UnresolvedPartitionSpec, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView}
-import org.apache.spark.sql.catalyst.catalog.{ArchiveResource, BucketSpec, FileResource, FunctionResource, FunctionResourceType, JarResource}
+import org.apache.spark.sql.catalyst.catalog.{ArchiveResource, BucketSpec, FileResource, FunctionResource, JarResource}
 import org.apache.spark.sql.catalyst.expressions.{EqualTo, Literal}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition.{after, first}
 import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform}
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType, TimestampType}
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -1629,32 +1628,6 @@ class DDLParserSuite extends AnalysisTest {
       TruncateTableStatement(Seq("a", "b", "c"), Some(Map("ds" -> "2017-06-10"))))
   }
 
-  test("SHOW PARTITIONS") {
-    val sql1 = "SHOW PARTITIONS t1"
-    val sql2 = "SHOW PARTITIONS db1.t1"
-    val sql3 = "SHOW PARTITIONS t1 PARTITION(partcol1='partvalue', partcol2='partvalue')"
-    val sql4 = "SHOW PARTITIONS a.b.c"
-    val sql5 = "SHOW PARTITIONS a.b.c PARTITION(ds='2017-06-10')"
-
-    val parsed1 = parsePlan(sql1)
-    val expected1 = ShowPartitionsStatement(Seq("t1"), None)
-    val parsed2 = parsePlan(sql2)
-    val expected2 = ShowPartitionsStatement(Seq("db1", "t1"), None)
-    val parsed3 = parsePlan(sql3)
-    val expected3 = ShowPartitionsStatement(Seq("t1"),
-      Some(Map("partcol1" -> "partvalue", "partcol2" -> "partvalue")))
-    val parsed4 = parsePlan(sql4)
-    val expected4 = ShowPartitionsStatement(Seq("a", "b", "c"), None)
-    val parsed5 = parsePlan(sql5)
-    val expected5 = ShowPartitionsStatement(Seq("a", "b", "c"), Some(Map("ds" -> "2017-06-10")))
-
-    comparePlans(parsed1, expected1)
-    comparePlans(parsed2, expected2)
-    comparePlans(parsed3, expected3)
-    comparePlans(parsed4, expected4)
-    comparePlans(parsed5, expected5)
-  }
-
   test("REFRESH TABLE") {
     comparePlans(
       parsePlan("REFRESH TABLE a.b.c"),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
index 6ad132cdfe449..7c70ab98e4183 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
@@ -22,6 +22,7 @@ import org.scalatest.Suite
 import org.scalatest.Tag
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode
@@ -56,10 +57,7 @@ trait CodegenInterpretedPlanTest extends PlanTest {
  * Provides helper methods for comparing plans, but without the overhead of
  * mandating a FunSuite.
  */
-trait PlanTestBase extends PredicateHelper with SQLHelper { self: Suite =>
-
-  // TODO(gatorsmile): remove this from PlanTest and all the analyzer rules
-  protected def conf = SQLConf.get
+trait PlanTestBase extends PredicateHelper with SQLHelper with SQLConfHelper { self: Suite =>
 
   /**
    * Since attribute references are given globally unique ids during analysis,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala
index 1cf888519077a..878fae4c547b3 100755
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
 import org.apache.spark.sql.catalyst.plans.LeftOuter
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.{ColumnStatsMap, FilterEstimation}
+import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.ColumnStatsMap
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.types._
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
index b0325600e7530..3b47271a114e2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
@@ -27,6 +27,7 @@ import scala.collection.mutable
 import org.scalatest.Assertions._
 
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, JoinedRow}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.connector.catalog._
 import org.apache.spark.sql.connector.expressions.{BucketTransform, DaysTransform, HoursTransform, IdentityTransform, MonthsTransform, Transform, YearsTransform}
@@ -34,8 +35,9 @@ import org.apache.spark.sql.connector.read._
 import org.apache.spark.sql.connector.write._
 import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.sources.{And, EqualTo, Filter, IsNotNull}
-import org.apache.spark.sql.types.{DataType, DateType, StructType, TimestampType}
+import org.apache.spark.sql.types.{DataType, DateType, StringType, StructField, StructType, TimestampType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.unsafe.types.UTF8String
 
 /**
  * A simple in-memory table. Rows are stored as a buffered group produced by each output task.
@@ -45,7 +47,24 @@ class InMemoryTable(
     val schema: StructType,
     override val partitioning: Array[Transform],
     override val properties: util.Map[String, String])
-  extends Table with SupportsRead with SupportsWrite with SupportsDelete {
+  extends Table with SupportsRead with SupportsWrite with SupportsDelete
+      with SupportsMetadataColumns {
+
+  private object PartitionKeyColumn extends MetadataColumn {
+    override def name: String = "_partition"
+    override def dataType: DataType = StringType
+    override def comment: String = "Partition key used to store the row"
+  }
+
+  private object IndexColumn extends MetadataColumn {
+    override def name: String = "index"
+    override def dataType: DataType = StringType
+    override def comment: String = "Metadata column used to conflict with a data column"
+  }
+
+  // purposely exposes a metadata column that conflicts with a data column in some tests
+  override val metadataColumns: Array[MetadataColumn] = Array(IndexColumn, PartitionKeyColumn)
+  private val metadataColumnNames = metadataColumns.map(_.name).toSet -- schema.map(_.name)
 
   private val allowUnsupportedTransforms =
     properties.getOrDefault("allow-unsupported-transforms", "false").toBoolean
@@ -146,7 +165,7 @@ class InMemoryTable(
       val key = getKey(row)
       dataMap += dataMap.get(key)
         .map(key -> _.withRow(row))
-        .getOrElse(key -> new BufferedRows().withRow(row))
+        .getOrElse(key -> new BufferedRows(key.toArray.mkString("/")).withRow(row))
     })
     this
   }
@@ -160,17 +179,38 @@ class InMemoryTable(
     TableCapability.TRUNCATE).asJava
 
   override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
-    () => new InMemoryBatchScan(data.map(_.asInstanceOf[InputPartition]))
+    new InMemoryScanBuilder(schema)
+  }
+
+  class InMemoryScanBuilder(tableSchema: StructType) extends ScanBuilder
+      with SupportsPushDownRequiredColumns {
+    private var schema: StructType = tableSchema
+
+    override def build: Scan =
+      new InMemoryBatchScan(data.map(_.asInstanceOf[InputPartition]), schema)
+
+    override def pruneColumns(requiredSchema: StructType): Unit = {
+      // if metadata columns are projected, return the table schema and metadata columns
+      val hasMetadataColumns = requiredSchema.map(_.name).exists(metadataColumnNames.contains)
+      if (hasMetadataColumns) {
+        schema = StructType(tableSchema ++ metadataColumnNames
+            .flatMap(name => metadataColumns.find(_.name == name))
+            .map(col => StructField(col.name, col.dataType, col.isNullable)))
+      }
+    }
   }
 
-  class InMemoryBatchScan(data: Array[InputPartition]) extends Scan with Batch {
+  class InMemoryBatchScan(data: Array[InputPartition], schema: StructType) extends Scan with Batch {
     override def readSchema(): StructType = schema
 
     override def toBatch: Batch = this
 
     override def planInputPartitions(): Array[InputPartition] = data
 
-    override def createReaderFactory(): PartitionReaderFactory = BufferedRowsReaderFactory
+    override def createReaderFactory(): PartitionReaderFactory = {
+      val metadataColumns = schema.map(_.name).filter(metadataColumnNames.contains)
+      new BufferedRowsReaderFactory(metadataColumns)
+    }
   }
 
   override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
@@ -340,7 +380,8 @@ object InMemoryTable {
   }
 }
 
-class BufferedRows extends WriterCommitMessage with InputPartition with Serializable {
+class BufferedRows(
+    val key: String = "") extends WriterCommitMessage with InputPartition with Serializable {
   val rows = new mutable.ArrayBuffer[InternalRow]()
 
   def withRow(row: InternalRow): BufferedRows = {
@@ -349,13 +390,24 @@ class BufferedRows extends WriterCommitMessage with InputPartition with Serializ
   }
 }
 
-private object BufferedRowsReaderFactory extends PartitionReaderFactory {
+private class BufferedRowsReaderFactory(
+    metadataColumns: Seq[String]) extends PartitionReaderFactory {
   override def createReader(partition: InputPartition): PartitionReader[InternalRow] = {
-    new BufferedRowsReader(partition.asInstanceOf[BufferedRows])
+    new BufferedRowsReader(partition.asInstanceOf[BufferedRows], metadataColumns)
   }
 }
 
-private class BufferedRowsReader(partition: BufferedRows) extends PartitionReader[InternalRow] {
+private class BufferedRowsReader(
+    partition: BufferedRows,
+    metadataColumns: Seq[String]) extends PartitionReader[InternalRow] {
+  private def addMetadata(row: InternalRow): InternalRow = {
+    val metadataRow = new GenericInternalRow(metadataColumns.map {
+      case "index" => index
+      case "_partition" => UTF8String.fromString(partition.key)
+    }.toArray)
+    new JoinedRow(row, metadataRow)
+  }
+
   private var index: Int = -1
 
   override def next(): Boolean = {
@@ -363,7 +415,7 @@ private class BufferedRowsReader(partition: BufferedRows) extends PartitionReade
     index < partition.rows.length
   }
 
-  override def get(): InternalRow = partition.rows(index)
+  override def get(): InternalRow = addMetadata(partition.rows(index))
 
   override def close(): Unit = {}
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala
index 7dd0753fcf777..aec361b9799cc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala
@@ -24,76 +24,77 @@ import scala.collection.JavaConverters._
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.analysis.{EmptyFunctionRegistry, FakeV2SessionCatalog, NoSuchNamespaceException}
 import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog, SessionCatalog}
+import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.connector.InMemoryTableCatalog
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
-class CatalogManagerSuite extends SparkFunSuite {
+class CatalogManagerSuite extends SparkFunSuite with SQLHelper {
 
-  private def createSessionCatalog(conf: SQLConf): SessionCatalog = {
+  private def createSessionCatalog(): SessionCatalog = {
     val catalog = new InMemoryCatalog()
     catalog.createDatabase(
       CatalogDatabase(SessionCatalog.DEFAULT_DATABASE, "", new URI("fake"), Map.empty),
       ignoreIfExists = true)
-    new SessionCatalog(catalog, EmptyFunctionRegistry, conf)
+    new SessionCatalog(catalog, EmptyFunctionRegistry)
   }
 
   test("CatalogManager should reflect the changes of default catalog") {
-    val conf = new SQLConf
-    val catalogManager = new CatalogManager(conf, FakeV2SessionCatalog, createSessionCatalog(conf))
+    val catalogManager = new CatalogManager(FakeV2SessionCatalog, createSessionCatalog())
     assert(catalogManager.currentCatalog.name() == CatalogManager.SESSION_CATALOG_NAME)
     assert(catalogManager.currentNamespace.sameElements(Array("default")))
 
-    conf.setConfString("spark.sql.catalog.dummy", classOf[DummyCatalog].getName)
-    conf.setConfString(SQLConf.DEFAULT_CATALOG.key, "dummy")
-
-    // The current catalog should be changed if the default catalog is set.
-    assert(catalogManager.currentCatalog.name() == "dummy")
-    assert(catalogManager.currentNamespace.sameElements(Array("a", "b")))
+    withSQLConf("spark.sql.catalog.dummy" -> classOf[DummyCatalog].getName,
+      SQLConf.DEFAULT_CATALOG.key -> "dummy") {
+      // The current catalog should be changed if the default catalog is set.
+      assert(catalogManager.currentCatalog.name() == "dummy")
+      assert(catalogManager.currentNamespace.sameElements(Array("a", "b")))
+    }
   }
 
   test("CatalogManager should keep the current catalog once set") {
-    val conf = new SQLConf
-    val catalogManager = new CatalogManager(conf, FakeV2SessionCatalog, createSessionCatalog(conf))
+    val catalogManager = new CatalogManager(FakeV2SessionCatalog, createSessionCatalog())
     assert(catalogManager.currentCatalog.name() == CatalogManager.SESSION_CATALOG_NAME)
-    conf.setConfString("spark.sql.catalog.dummy", classOf[DummyCatalog].getName)
-    catalogManager.setCurrentCatalog("dummy")
-    assert(catalogManager.currentCatalog.name() == "dummy")
-    assert(catalogManager.currentNamespace.sameElements(Array("a", "b")))
-
-    conf.setConfString("spark.sql.catalog.dummy2", classOf[DummyCatalog].getName)
-    conf.setConfString(SQLConf.DEFAULT_CATALOG.key, "dummy2")
-    // The current catalog shouldn't be changed if it's set before.
-    assert(catalogManager.currentCatalog.name() == "dummy")
+    withSQLConf("spark.sql.catalog.dummy" -> classOf[DummyCatalog].getName) {
+      catalogManager.setCurrentCatalog("dummy")
+      assert(catalogManager.currentCatalog.name() == "dummy")
+      assert(catalogManager.currentNamespace.sameElements(Array("a", "b")))
+
+      withSQLConf("spark.sql.catalog.dummy2" -> classOf[DummyCatalog].getName,
+        SQLConf.DEFAULT_CATALOG.key -> "dummy2") {
+        // The current catalog shouldn't be changed if it's set before.
+        assert(catalogManager.currentCatalog.name() == "dummy")
+      }
+    }
   }
 
   test("current namespace should be updated when switching current catalog") {
-    val conf = new SQLConf
-    val catalogManager = new CatalogManager(conf, FakeV2SessionCatalog, createSessionCatalog(conf))
-    conf.setConfString("spark.sql.catalog.dummy", classOf[DummyCatalog].getName)
-    catalogManager.setCurrentCatalog("dummy")
-    assert(catalogManager.currentNamespace.sameElements(Array("a", "b")))
-    catalogManager.setCurrentNamespace(Array("a"))
-    assert(catalogManager.currentNamespace.sameElements(Array("a")))
-
-    // If we set current catalog to the same catalog, current namespace should stay the same.
-    catalogManager.setCurrentCatalog("dummy")
-    assert(catalogManager.currentNamespace.sameElements(Array("a")))
-
-    // If we switch to a different catalog, current namespace should be reset.
-    conf.setConfString("spark.sql.catalog.dummy2", classOf[DummyCatalog].getName)
-    catalogManager.setCurrentCatalog("dummy2")
-    assert(catalogManager.currentNamespace.sameElements(Array("a", "b")))
+    val catalogManager = new CatalogManager(FakeV2SessionCatalog, createSessionCatalog())
+    withSQLConf("spark.sql.catalog.dummy" -> classOf[DummyCatalog].getName) {
+      catalogManager.setCurrentCatalog("dummy")
+      assert(catalogManager.currentNamespace.sameElements(Array("a", "b")))
+      catalogManager.setCurrentNamespace(Array("a"))
+      assert(catalogManager.currentNamespace.sameElements(Array("a")))
+
+      // If we set current catalog to the same catalog, current namespace should stay the same.
+      catalogManager.setCurrentCatalog("dummy")
+      assert(catalogManager.currentNamespace.sameElements(Array("a")))
+
+      // If we switch to a different catalog, current namespace should be reset.
+      withSQLConf("spark.sql.catalog.dummy2" -> classOf[DummyCatalog].getName) {
+        catalogManager.setCurrentCatalog("dummy2")
+        assert(catalogManager.currentNamespace.sameElements(Array("a", "b")))
+      }
+    }
   }
 
   test("set current namespace") {
-    val conf = new SQLConf
-    val v1SessionCatalog = createSessionCatalog(conf)
+    val v1SessionCatalog = createSessionCatalog()
     v1SessionCatalog.createDatabase(
       CatalogDatabase(
         "test", "", v1SessionCatalog.getDefaultDBPath("test"), Map.empty),
       ignoreIfExists = false)
-    val catalogManager = new CatalogManager(conf, FakeV2SessionCatalog, v1SessionCatalog)
+    val catalogManager = new CatalogManager(FakeV2SessionCatalog, v1SessionCatalog)
 
     // If the current catalog is session catalog, setting current namespace actually sets
     // `SessionCatalog.currentDb`.
@@ -106,23 +107,25 @@ class CatalogManagerSuite extends SparkFunSuite {
     }
 
     // when switching current catalog, `SessionCatalog.currentDb` should be reset.
-    conf.setConfString("spark.sql.catalog.dummy", classOf[DummyCatalog].getName)
-    catalogManager.setCurrentCatalog("dummy")
-    assert(v1SessionCatalog.getCurrentDatabase == "default")
-    catalogManager.setCurrentNamespace(Array("test2"))
-    assert(v1SessionCatalog.getCurrentDatabase == "default")
-
-    // Check namespace existence if currentCatalog implements SupportsNamespaces.
-    conf.setConfString("spark.sql.catalog.testCatalog", classOf[InMemoryTableCatalog].getName)
-    catalogManager.setCurrentCatalog("testCatalog")
-    catalogManager.currentCatalog.asInstanceOf[InMemoryTableCatalog]
-      .createNamespace(Array("test3"), Map.empty[String, String].asJava)
-    assert(v1SessionCatalog.getCurrentDatabase == "default")
-    catalogManager.setCurrentNamespace(Array("test3"))
-    assert(v1SessionCatalog.getCurrentDatabase == "default")
-
-    intercept[NoSuchNamespaceException] {
-      catalogManager.setCurrentNamespace(Array("ns1", "ns2"))
+    withSQLConf("spark.sql.catalog.dummy" -> classOf[DummyCatalog].getName) {
+      catalogManager.setCurrentCatalog("dummy")
+      assert(v1SessionCatalog.getCurrentDatabase == "default")
+      catalogManager.setCurrentNamespace(Array("test2"))
+      assert(v1SessionCatalog.getCurrentDatabase == "default")
+
+      // Check namespace existence if currentCatalog implements SupportsNamespaces.
+      withSQLConf("spark.sql.catalog.testCatalog" -> classOf[InMemoryTableCatalog].getName) {
+        catalogManager.setCurrentCatalog("testCatalog")
+        catalogManager.currentCatalog.asInstanceOf[InMemoryTableCatalog]
+          .createNamespace(Array("test3"), Map.empty[String, String].asJava)
+        assert(v1SessionCatalog.getCurrentDatabase == "default")
+        catalogManager.setCurrentNamespace(Array("test3"))
+        assert(v1SessionCatalog.getCurrentDatabase == "default")
+
+        intercept[NoSuchNamespaceException] {
+          catalogManager.setCurrentNamespace(Array("ns1", "ns2"))
+        }
+      }
     }
   }
 }
diff --git a/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt b/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt
index 49dc7adccbf3c..3d2b2e5c8edba 100644
--- a/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt
@@ -7,9 +7,9 @@ OpenJDK 64-Bit Server VM 11.0.9+11 on Mac OS X 10.15.6
 Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
 from_json as subExpr:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-subexpressionElimination off, codegen on           26809          27731         898          0.0   268094225.4       1.0X
-subexpressionElimination off, codegen off          25117          26612        1357          0.0   251166638.4       1.1X
-subexpressionElimination on, codegen on             2582           2906         282          0.0    25819408.7      10.4X
-subexpressionElimination on, codegen off           25635          26131         804          0.0   256346873.1       1.0X
+subexpressionElimination off, codegen on           25932          26908         916          0.0   259320042.3       1.0X
+subexpressionElimination off, codegen off          26085          26159          65          0.0   260848905.0       1.0X
+subexpressionElimination on, codegen on             2860           2939          72          0.0    28603312.9       9.1X
+subexpressionElimination on, codegen off            2517           2617          93          0.0    25165157.7      10.3X
 
 
diff --git a/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt b/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt
index 3f131726bc53d..ca2a9c6497500 100644
--- a/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt
+++ b/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt
@@ -7,9 +7,9 @@ OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.6
 Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
 from_json as subExpr:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-subexpressionElimination off, codegen on           24841          25365         803          0.0   248412787.5       1.0X
-subexpressionElimination off, codegen off          25344          26205         941          0.0   253442656.5       1.0X
-subexpressionElimination on, codegen on             2883           3019         119          0.0    28833086.8       8.6X
-subexpressionElimination on, codegen off           24707          25688         903          0.0   247068775.9       1.0X
+subexpressionElimination off, codegen on           26503          27622        1937          0.0   265033362.4       1.0X
+subexpressionElimination off, codegen off          24920          25376         430          0.0   249196978.2       1.1X
+subexpressionElimination on, codegen on             2421           2466          39          0.0    24213606.1      10.9X
+subexpressionElimination on, codegen off            2360           2435          87          0.0    23604320.7      11.2X
 
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 30792c9bacd53..c164835c753e8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql
 
 import scala.collection.JavaConverters._
-import scala.language.implicitConversions
 
 import org.apache.spark.annotation.Stable
 import org.apache.spark.internal.Logging
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 991f02d43bc47..31b4c158aa67b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -470,7 +470,6 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
   def insertInto(tableName: String): Unit = {
     import df.sparkSession.sessionState.analyzer.{AsTableIdentifier, NonSessionCatalogAndIdentifier, SessionCatalogAndIdentifier}
     import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-    import org.apache.spark.sql.connector.catalog.CatalogV2Util._
 
     assertNotBucketed("insertInto")
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 3d431d6ff13a9..2c38a65ac2106 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -21,7 +21,6 @@ import java.io.{ByteArrayOutputStream, CharArrayWriter, DataOutputStream}
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
-import scala.language.implicitConversions
 import scala.reflect.runtime.universe.TypeTag
 import scala.util.control.NonFatal
 
@@ -63,7 +62,7 @@ import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.SchemaUtils
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.unsafe.array.ByteArrayMethods
-import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
+import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.Utils
 
 private[sql] object Dataset {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
index 7e430b682faf4..c40ce0f4777c6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql
 import java.util.Locale
 
 import scala.collection.JavaConverters._
-import scala.language.implicitConversions
 
 import org.apache.spark.annotation.Stable
 import org.apache.spark.api.python.PythonEvalType
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala b/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
index e9bc25d489718..2f46fa8073bbc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
@@ -18,10 +18,8 @@
 package org.apache.spark.sql
 
 import org.apache.spark.annotation.Stable
-import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.{ConfigEntry, OptionalConfigEntry}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.SQLConf.{DeprecatedConfig, RemovedConfig}
 
 /**
  * Runtime configuration interface for Spark. To access this, use `SparkSession.conf`.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index 0f6ae9c5d44e1..cceb38558946e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -30,9 +30,9 @@ import org.apache.spark.sql.catalyst.{JavaTypeInference, ScalaReflection}
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUDF}
-import org.apache.spark.sql.execution.aggregate.{ScalaAggregator, ScalaUDAF}
+import org.apache.spark.sql.execution.aggregate.ScalaUDAF
 import org.apache.spark.sql.execution.python.UserDefinedPythonFunction
-import org.apache.spark.sql.expressions.{Aggregator, SparkUserDefinedFunction, UserDefinedAggregateFunction, UserDefinedAggregator, UserDefinedFunction}
+import org.apache.spark.sql.expressions.{SparkUserDefinedFunction, UserDefinedAggregateFunction, UserDefinedAggregator, UserDefinedFunction}
 import org.apache.spark.sql.types.DataType
 import org.apache.spark.util.Utils
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
index c6a644f9f2e29..1436574c0d90a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalog
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.annotation.{Evolving, Experimental, Stable}
+import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.storage.StorageLevel
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index bd9120a1fbe78..303ae47f06b84 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, Identifier, LookupCatalog, SupportsNamespaces, SupportsPartitionManagement, TableCatalog, TableChange, V1Table}
+import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, Identifier, LookupCatalog, SupportsNamespaces, TableCatalog, TableChange, V1Table}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala
index fa41e865444da..3ba8745be995f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala
@@ -16,8 +16,8 @@
  */
 package org.apache.spark.sql.execution
 
-import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, Expression, NamedExpression, SortOrder}
-import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning}
+import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeMap, AttributeReference, Expression, NamedExpression, SortOrder}
+import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 
 /**
  * A trait that provides functionality to handle aliases in the `outputExpressions`.
@@ -25,19 +25,15 @@ import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partition
 trait AliasAwareOutputExpression extends UnaryExecNode {
   protected def outputExpressions: Seq[NamedExpression]
 
-  protected def hasAlias: Boolean = outputExpressions.collectFirst { case _: Alias => }.isDefined
+  private lazy val aliasMap = AttributeMap(outputExpressions.collect {
+    case a @ Alias(child: AttributeReference, _) => (child, a.toAttribute)
+  })
 
-  protected def replaceAliases(exprs: Seq[Expression]): Seq[Expression] = {
-    exprs.map {
-      case a: AttributeReference => replaceAlias(a).getOrElse(a)
-      case other => other
-    }
-  }
+  protected def hasAlias: Boolean = aliasMap.nonEmpty
 
-  protected def replaceAlias(attr: AttributeReference): Option[Attribute] = {
-    outputExpressions.collectFirst {
-      case a @ Alias(child: AttributeReference, _) if child.semanticEquals(attr) =>
-        a.toAttribute
+  protected def normalizeExpression(exp: Expression): Expression = {
+    exp.transform {
+      case attr: AttributeReference => aliasMap.getOrElse(attr, attr)
     }
   }
 }
@@ -50,7 +46,8 @@ trait AliasAwareOutputPartitioning extends AliasAwareOutputExpression {
   final override def outputPartitioning: Partitioning = {
     if (hasAlias) {
       child.outputPartitioning match {
-        case h: HashPartitioning => h.copy(expressions = replaceAliases(h.expressions))
+        case e: Expression =>
+          normalizeExpression(e).asInstanceOf[Partitioning]
         case other => other
       }
     } else {
@@ -68,11 +65,10 @@ trait AliasAwareOutputOrdering extends AliasAwareOutputExpression {
 
   final override def outputOrdering: Seq[SortOrder] = {
     if (hasAlias) {
-      orderingExpressions.map { s =>
-        s.child match {
-          case a: AttributeReference => s.copy(child = replaceAlias(a).getOrElse(a))
-          case _ => s
-        }
+      orderingExpressions.map { sortOrder =>
+        val newSortOrder = normalizeExpression(sortOrder).asInstanceOf[SortOrder]
+        val newSameOrderExpressions = newSortOrder.sameOrderExpressions.map(normalizeExpression)
+        newSortOrder.copy(sameOrderExpressions = newSameOrderExpressions)
       }
     } else {
       orderingExpressions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index 5f72d6005a8dd..f163d85914bc9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, SubqueryExpression}
 import org.apache.spark.sql.catalyst.optimizer.EliminateResolvedHint
 import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan, ResolvedHint}
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
-import org.apache.spark.sql.execution.columnar.{DefaultCachedBatchSerializer, InMemoryRelation}
+import org.apache.spark.sql.execution.columnar.InMemoryRelation
 import org.apache.spark.sql.execution.command.CommandUtils
 import org.apache.spark.sql.execution.datasources.{FileIndex, HadoopFsRelation, LogicalRelation}
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileTable}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CollectMetricsExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CollectMetricsExec.scala
index e1b9c8f430c56..b0bbb52bc4990 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CollectMetricsExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CollectMetricsExec.scala
@@ -16,8 +16,6 @@
  */
 package org.apache.spark.sql.execution
 
-import scala.collection.mutable
-
 import org.apache.spark.TaskContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.Row
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index 45d28ddb42fc3..44636beeec7fc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -25,7 +25,6 @@ import org.apache.commons.lang3.StringUtils
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.expressions._
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
index dcec0b019da28..08950c827f5aa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
@@ -22,7 +22,7 @@ import java.sql.{Date, Timestamp}
 import java.time.{Instant, LocalDate, ZoneOffset}
 
 import org.apache.spark.sql.Row
-import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, LegacyDateFormats, TimestampFormatter}
+import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter}
 import org.apache.spark.sql.execution.command.{DescribeCommandBase, ExecutedCommandExec, ShowTablesCommand, ShowViewsCommand}
 import org.apache.spark.sql.execution.datasources.v2.{DescribeTableExec, ShowTablesExec}
 import org.apache.spark.sql.internal.SQLConf
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index 77f7a4e553f06..040d1f36ed8a5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -343,8 +343,10 @@ object QueryExecution {
       PlanDynamicPruningFilters(sparkSession),
       PlanSubqueries(sparkSession),
       RemoveRedundantProjects,
-      RemoveRedundantSorts,
       EnsureRequirements,
+      // `RemoveRedundantSorts` needs to be added before `EnsureRequirements` to guarantee the same
+      // number of partitions when instantiating PartitioningCollection.
+      RemoveRedundantSorts,
       DisableUnnecessaryBucketedScan,
       ApplyColumnarRulesAndInsertTransitions(sparkSession.sessionState.columnarRules),
       CollapseCodegenStages(),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala
index 8746cc6f650d7..bbe3f50492d9f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala
@@ -22,7 +22,6 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.{Final, PartialMerge}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.aggregate.BaseAggregateExec
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanExecBase
-import org.apache.spark.sql.execution.window.WindowExec
 import org.apache.spark.sql.internal.SQLConf
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
index ead8c00031112..062aa69b3adb3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
@@ -135,7 +135,12 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
   def longMetric(name: String): SQLMetric = metrics(name)
 
   // TODO: Move to `DistributedPlan`
-  /** Specifies how data is partitioned across different nodes in the cluster. */
+  /**
+   * Specifies how data is partitioned across different nodes in the cluster.
+   * Note this method may fail if it is invoked before `EnsureRequirements` is applied
+   * since `PartitioningCollection` requires all its partitionings to have
+   * the same number of partitions.
+   */
   def outputPartitioning: Partitioning = UnknownPartitioning(0) // TODO: WRONG WIDTH!
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala
index 895eeedd86b8b..c88fcecc9983b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala
@@ -18,18 +18,15 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.adaptive.LogicalQueryStageStrategy
 import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, FileSourceStrategy}
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Strategy
-import org.apache.spark.sql.internal.SQLConf
 
-class SparkPlanner(
-    val session: SparkSession,
-    val conf: SQLConf,
-    val experimentalMethods: ExperimentalMethods)
-  extends SparkStrategies {
+class SparkPlanner(val session: SparkSession, val experimentalMethods: ExperimentalMethods)
+  extends SparkStrategies with SQLConfHelper {
 
   def numPartitions: Int = conf.numShufflePartitions
 
@@ -40,7 +37,7 @@ class SparkPlanner(
       PythonEvals ::
       new DataSourceV2Strategy(session) ::
       FileSourceStrategy ::
-      DataSourceStrategy(conf) ::
+      DataSourceStrategy ::
       SpecialLimits ::
       Aggregation ::
       Window ::
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 6c42c051fbba6..85476bcd21e19 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -27,7 +27,7 @@ import org.antlr.v4.runtime.{ParserRuleContext, Token}
 import org.antlr.v4.runtime.tree.TerminalNode
 
 import org.apache.spark.sql.SaveMode
-import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.parser._
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index ba3d83714c302..e9b1aa81895f5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -37,7 +37,7 @@ import org.apache.spark.sql.execution.python._
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.sources.MemoryPlan
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.streaming.{OutputMode, StreamingQuery}
+import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -312,8 +312,9 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
 
   /**
    * Used to plan streaming aggregation queries that are computed incrementally as part of a
-   * [[StreamingQuery]]. Currently this rule is injected into the planner
-   * on-demand, only when planning in a [[org.apache.spark.sql.execution.streaming.StreamExecution]]
+   * [[org.apache.spark.sql.streaming.StreamingQuery]]. Currently this rule is injected into the
+   * planner on-demand, only when planning in a
+   * [[org.apache.spark.sql.execution.streaming.StreamExecution]]
    */
   object StatefulAggregationStrategy extends Strategy {
     override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index a8905ca530005..b2963457e22db 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -29,7 +29,6 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.aggregate.HashAggregateExec
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
index 0865e42b440db..570edbf5f78a3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -88,8 +88,8 @@ case class AdaptiveSparkPlanExec(
   // Exchange nodes) after running these rules.
   private def queryStagePreparationRules: Seq[Rule[SparkPlan]] = Seq(
     RemoveRedundantProjects,
-    RemoveRedundantSorts,
     EnsureRequirements,
+    RemoveRedundantSorts,
     DisableUnnecessaryBucketedScan
   ) ++ context.session.sessionState.queryStagePrepRules
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/DemoteBroadcastHashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/DemoteBroadcastHashJoin.scala
index 011acbf1b22a4..3760782515e97 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/DemoteBroadcastHashJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/DemoteBroadcastHashJoin.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.adaptive
 
 import org.apache.spark.sql.catalyst.plans.logical.{HintInfo, Join, LogicalPlan, NO_BROADCAST_HASH}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.internal.SQLConf
 
 /**
  * This optimization rule detects a join child that has a high ratio of empty partitions and
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStage.scala
index 9914eddd53a3d..bff142315f8ff 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStage.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.adaptive
 
 import org.apache.spark.sql.catalyst.expressions.{Attribute, SortOrder}
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
-import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.execution.SparkPlan
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ReuseAdaptiveSubquery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ReuseAdaptiveSubquery.scala
index c3c7358641fcb..71540dbd39f95 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ReuseAdaptiveSubquery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ReuseAdaptiveSubquery.scala
@@ -21,7 +21,6 @@ import scala.collection.concurrent.TrieMap
 
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.{BaseSubqueryExec, ExecSubqueryExpression, ReusedSubqueryExec, SparkPlan}
-import org.apache.spark.sql.internal.SQLConf
 
 case class ReuseAdaptiveSubquery(
     reuseMap: TrieMap[SparkPlan, BaseSubqueryExec]) extends Rule[SparkPlan] {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/simpleCosting.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/simpleCosting.scala
index cdc57dbc7dcc2..aae3d922b28a5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/simpleCosting.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/simpleCosting.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.execution.adaptive
 
 import org.apache.spark.sql.execution.SparkPlan
-import org.apache.spark.sql.execution.exchange.{ShuffleExchangeExec, ShuffleExchangeLike}
+import org.apache.spark.sql.execution.exchange.ShuffleExchangeLike
 
 /**
  * A simple implementation of [[Cost]], which takes a number of [[Long]] as the cost value.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationIterator.scala
index 75651500954cf..1c140d7b6955f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationIterator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationIterator.scala
@@ -28,7 +28,6 @@ import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.unsafe.KVIterator
-import org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter
 
 class ObjectAggregationIterator(
     partIndex: Int,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationMap.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationMap.scala
index b5372bcca89dd..9f2cf84a6d7e6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationMap.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationMap.scala
@@ -26,7 +26,6 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, UnsafeProjection, U
 import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateFunction, TypedImperativeAggregate}
 import org.apache.spark.sql.execution.UnsafeKVExternalSorter
 import org.apache.spark.sql.types.StructType
-import org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter
 
 /**
  * An aggregation map that supports using safe `SpecificInternalRow`s aggregation buffers, so that
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala
index 492b0f2da77cb..deb9e76c51760 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala
@@ -19,12 +19,13 @@ package org.apache.spark.sql.execution.aggregate
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction}
+import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.execution.metric.SQLMetric
 
 /**
- * An iterator used to evaluate [[AggregateFunction]]. It assumes the input rows have been
- * sorted by values of [[groupingExpressions]].
+ * An iterator used to evaluate
+ * [[org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction]].
+ * It assumes the input rows have been sorted by values of [[groupingExpressions]].
  */
 class SortBasedAggregationIterator(
     partIndex: Int,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala
index 44bc9c2e3a9d0..41e247a02759b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala
@@ -17,16 +17,12 @@
 
 package org.apache.spark.sql.execution.aggregate
 
-import scala.reflect.runtime.universe.TypeTag
-
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{Column, Row}
+import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, _}
-import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete}
 import org.apache.spark.sql.catalyst.expressions.aggregate.{ImperativeAggregate, TypedImperativeAggregate}
-import org.apache.spark.sql.catalyst.expressions.codegen.{GenerateMutableProjection, GenerateSafeProjection}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.expressions.{Aggregator, MutableAggregationBuffer, UserDefinedAggregateFunction}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala
index 501e1c460f9c9..f62aa5db0872f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala
@@ -63,10 +63,10 @@ object ArrowWriter {
         val elementVector = createFieldWriter(vector.getDataVector())
         new ArrayWriter(vector, elementVector)
       case (MapType(_, _, _), vector: MapVector) =>
-        val entryWriter = createFieldWriter(vector.getDataVector).asInstanceOf[StructWriter]
-        val keyWriter = createFieldWriter(entryWriter.valueVector.getChild(MapVector.KEY_NAME))
-        val valueWriter = createFieldWriter(entryWriter.valueVector.getChild(MapVector.VALUE_NAME))
-        new MapWriter(vector, keyWriter, valueWriter)
+        val structVector = vector.getDataVector.asInstanceOf[StructVector]
+        val keyWriter = createFieldWriter(structVector.getChild(MapVector.KEY_NAME))
+        val valueWriter = createFieldWriter(structVector.getChild(MapVector.VALUE_NAME))
+        new MapWriter(vector, structVector, keyWriter, valueWriter)
       case (StructType(_), vector: StructVector) =>
         val children = (0 until vector.size()).map { ordinal =>
           createFieldWriter(vector.getChildByOrdinal(ordinal))
@@ -331,11 +331,11 @@ private[arrow] class StructWriter(
   override def setValue(input: SpecializedGetters, ordinal: Int): Unit = {
     val struct = input.getStruct(ordinal, children.length)
     var i = 0
+    valueVector.setIndexDefined(count)
     while (i < struct.numFields) {
       children(i).write(struct, i)
       i += 1
     }
-    valueVector.setIndexDefined(count)
   }
 
   override def finish(): Unit = {
@@ -351,6 +351,7 @@ private[arrow] class StructWriter(
 
 private[arrow] class MapWriter(
     val valueVector: MapVector,
+    val structVector: StructVector,
     val keyWriter: ArrowFieldWriter,
     val valueWriter: ArrowFieldWriter) extends ArrowFieldWriter {
 
@@ -363,6 +364,7 @@ private[arrow] class MapWriter(
     val values = map.valueArray()
     var i = 0
     while (i <  map.numElements()) {
+      structVector.setIndexDefined(keyWriter.count)
       keyWriter.write(keys, i)
       valueWriter.write(values, i)
       i += 1
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index 7334ea1e27284..006fa0fba4138 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -21,7 +21,7 @@ import java.util.concurrent.{Future => JFuture}
 import java.util.concurrent.TimeUnit._
 
 import scala.collection.mutable
-import scala.concurrent.{ExecutionContext}
+import scala.concurrent.ExecutionContext
 import scala.concurrent.duration.Duration
 
 import org.apache.spark.{InterruptibleIterator, Partition, SparkContext, TaskContext}
@@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution.metric.SQLMetrics
 import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 import org.apache.spark.sql.types.{LongType, StructType}
-import org.apache.spark.util.{ThreadUtils, Utils}
+import org.apache.spark.util.ThreadUtils
 import org.apache.spark.util.random.{BernoulliCellSampler, PoissonSampler}
 
 /** Physical plan for Project. */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoin.scala
index 40a2a7a2359e0..a4e5be01b45a2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoin.scala
@@ -26,7 +26,6 @@ import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partition
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.{FileSourceScanExec, FilterExec, ProjectExec, SparkPlan}
 import org.apache.spark.sql.execution.joins.{BaseJoinExec, ShuffledHashJoinExec, SortMergeJoinExec}
-import org.apache.spark.sql.internal.SQLConf
 
 /**
  * This rule coalesces one side of the `SortMergeJoin` and `ShuffledHashJoin`
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala
index bb59f44abc761..6b195b3b49f09 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala
@@ -22,7 +22,6 @@ import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.{FileSourceScanExec, FilterExec, ProjectExec, SortExec, SparkPlan}
 import org.apache.spark.sql.execution.aggregate.BaseAggregateExec
 import org.apache.spark.sql.execution.exchange.Exchange
-import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Disable unnecessary bucketed table scan based on actual physical query plan.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnStats.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnStats.scala
index 45557bfbada6c..d2f65b745f35a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnStats.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnStats.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.columnar
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference}
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
+import org.apache.spark.unsafe.types.UTF8String
 
 class ColumnStatisticsSchema(a: Attribute) extends Serializable {
   val upperBound = AttributeReference(a.name + ".upperBound", a.dataType, nullable = true)()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandCheck.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandCheck.scala
index dedace4af4d14..216636c7ea14f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandCheck.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandCheck.scala
@@ -17,14 +17,14 @@
 
 package org.apache.spark.sql.execution.command
 
+import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.util.SchemaUtils
 
 /**
  * Checks legitimization of various execution commands.
  */
-case class CommandCheck(conf: SQLConf) extends (LogicalPlan => Unit) {
+object CommandCheck extends (LogicalPlan => Unit) with SQLConfHelper {
 
   override def apply(plan: LogicalPlan): Unit = {
     plan.foreach {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
index 8bf7504716f79..f86f62bbf853b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
@@ -27,7 +27,7 @@ import org.apache.hadoop.fs.{FileSystem, Path, PathFilter}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, SparkSession}
 import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
-import org.apache.spark.sql.catalyst.catalog.{CatalogColumnStat, CatalogStatistics, CatalogTable}
+import org.apache.spark.sql.catalyst.catalog.{CatalogStatistics, CatalogTable}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -163,7 +163,7 @@ object CommandUtils extends Logging {
       .getConfString("hive.exec.stagingdir", ".hive-staging")
     val filter = new PathFilterIgnoreNonData(stagingDir)
     val sizes = InMemoryFileIndex.bulkListLeafFiles(paths.flatten,
-      sparkSession.sessionState.newHadoopConf(), filter, sparkSession, isRootLevel = true).map {
+      sparkSession.sessionState.newHadoopConf(), filter, sparkSession).map {
       case (_, files) => files.map(_.getLen).sum
     }
     // the size is 0 where paths(i) is not defined and sizes(i) where it is defined
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DataWritingCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DataWritingCommand.scala
index a1bb5af1ab723..a56007f5d5d95 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DataWritingCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DataWritingCommand.scala
@@ -24,7 +24,6 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan}
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.datasources.BasicWriteJobStatsTracker
-import org.apache.spark.sql.execution.datasources.FileFormatWriter
 import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.util.SerializableConfiguration
 
@@ -35,7 +34,7 @@ trait DataWritingCommand extends Command {
   /**
    * The input query plan that produces the data to be written.
    * IMPORTANT: the input query plan MUST be analyzed, so that we can carry its output columns
-   *            to [[FileFormatWriter]].
+   *            to [[org.apache.spark.sql.execution.datasources.FileFormatWriter]].
    */
   def query: LogicalPlan
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
index 61ee6d7f4a299..00accedf21556 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.command
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.IgnoreCachedData
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.types.{StringType, StructField, StructType}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala
index ef6b0bba1628e..f99dc8d9f1a8e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala
@@ -21,7 +21,6 @@ import java.util.Locale
 
 import org.apache.spark.sql.{Dataset, Row, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
index 68c47d6a6dfaa..6ed40aacd1125 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
@@ -21,7 +21,6 @@ import java.net.URI
 
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.catalog._
-import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.datasources._
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
index d76b4b8894783..330a503e5f8e2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
 import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchFunctionException}
 import org.apache.spark.sql.catalyst.catalog.{CatalogFunction, FunctionResource}
-import org.apache.spark.sql.catalyst.expressions.{Attribute, ExpressionInfo}
+import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.util.StringUtils
 import org.apache.spark.sql.types.{StringType, StructField, StructType}
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index b55bed9cd7fc0..34ded5d456d09 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.execution.datasources
 import java.util.{Locale, ServiceConfigurationError, ServiceLoader}
 
 import scala.collection.JavaConverters._
-import scala.language.implicitConversions
 import scala.util.{Failure, Success, Try}
 
 import org.apache.hadoop.conf.Configuration
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 822bdbdad8f00..361d1fab03421 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.datasources
 
 import java.util.Locale
 
-import scala.collection.JavaConverters._
 import scala.collection.mutable
 
 import org.apache.hadoop.fs.Path
@@ -27,7 +26,7 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, QualifiedTableName}
+import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, QualifiedTableName, SQLConfHelper}
 import org.apache.spark.sql.catalyst.CatalystTypeConverters.convertToScala
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog._
@@ -42,9 +41,7 @@ import org.apache.spark.sql.connector.catalog.SupportsRead
 import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.execution.{RowDataSourceScanExec, SparkPlan}
 import org.apache.spark.sql.execution.command._
-import org.apache.spark.sql.execution.datasources.FileSourceStrategy.{extractPredicatesWithinOutputSet, logInfo}
 import org.apache.spark.sql.execution.streaming.StreamingRelation
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
@@ -314,8 +311,8 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan]
 /**
  * A Strategy for planning scans over data sources defined using the sources API.
  */
-case class DataSourceStrategy(conf: SQLConf) extends Strategy with Logging with CastSupport {
-  import DataSourceStrategy._
+object DataSourceStrategy
+  extends Strategy with Logging with CastSupport with PredicateHelper with SQLConfHelper {
 
   def apply(plan: LogicalPlan): Seq[execution.SparkPlan] = plan match {
     case ScanOperation(projects, filters, l @ LogicalRelation(t: CatalystScan, _, _, _)) =>
@@ -466,9 +463,7 @@ case class DataSourceStrategy(conf: SQLConf) extends Strategy with Logging with
   private[this] def toCatalystRDD(relation: LogicalRelation, rdd: RDD[Row]): RDD[InternalRow] = {
     toCatalystRDD(relation, relation.output, rdd)
   }
-}
 
-object DataSourceStrategy extends PredicateHelper {
   /**
    * The attribute name may differ from the one in the schema if the query analyzer
    * is case insensitive. We should change attribute names to match the ones in the schema,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala
index 28a63c26604ec..1149767bdade2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala
@@ -22,11 +22,12 @@ import scala.collection.JavaConverters._
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoStatement, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileDataSourceV2, FileTable}
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileTable}
 
 /**
  * Replace the File source V2 table in [[InsertIntoStatement]] to V1 [[FileFormat]].
- * E.g, with temporary view `t` using [[FileDataSourceV2]], inserting into  view `t` fails
+ * E.g, with temporary view `t` using
+ * [[org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2]], inserting into view `t` fails
  * since there is no corresponding physical plan.
  * This is a temporary hack for making current data source V2 work. It should be
  * removed when Catalog support of file data source v2 is finished.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelation.scala
index d278802e6c9f2..a0b191e60f376 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelation.scala
@@ -17,10 +17,6 @@
 
 package org.apache.spark.sql.execution.datasources
 
-import java.util.Locale
-
-import scala.collection.mutable
-
 import org.apache.spark.sql.{SparkSession, SQLContext}
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.execution.FileRelation
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
index 130894e9bc025..21275951b5603 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
@@ -128,7 +128,7 @@ class InMemoryFileIndex(
     }
     val filter = FileInputFormat.getInputPathFilter(new JobConf(hadoopConf, this.getClass))
     val discovered = InMemoryFileIndex.bulkListLeafFiles(
-      pathsToFetch.toSeq, hadoopConf, filter, sparkSession, isRootLevel = true)
+      pathsToFetch.toSeq, hadoopConf, filter, sparkSession)
     discovered.foreach { case (path, leafFiles) =>
       HiveCatalogMetrics.incrementFilesDiscovered(leafFiles.size)
       fileStatusCache.putLeafFiles(path, leafFiles.toArray)
@@ -146,20 +146,17 @@ object InMemoryFileIndex extends Logging {
       paths: Seq[Path],
       hadoopConf: Configuration,
       filter: PathFilter,
-      sparkSession: SparkSession,
-      isRootLevel: Boolean): Seq[(Path, Seq[FileStatus])] = {
+      sparkSession: SparkSession): Seq[(Path, Seq[FileStatus])] = {
     HadoopFSUtils.parallelListLeafFiles(
       sc = sparkSession.sparkContext,
       paths = paths,
       hadoopConf = hadoopConf,
-      filter = filter,
-      isRootLevel = isRootLevel,
+      filter = new PathFilterWrapper(filter),
       ignoreMissingFiles = sparkSession.sessionState.conf.ignoreMissingFiles,
       ignoreLocality = sparkSession.sessionState.conf.ignoreDataLocality,
       parallelismThreshold = sparkSession.sessionState.conf.parallelPartitionDiscoveryThreshold,
-      parallelismMax = sparkSession.sessionState.conf.parallelPartitionDiscoveryParallelism,
-      filterFun = Some(shouldFilterOut))
- }
+      parallelismMax = sparkSession.sessionState.conf.parallelPartitionDiscoveryParallelism)
+  }
 
   /** Checks if we should filter out this path name. */
   def shouldFilterOut(pathName: String): Boolean = {
@@ -175,3 +172,9 @@ object InMemoryFileIndex extends Logging {
     exclude && !include
   }
 }
+
+private class PathFilterWrapper(val filter: PathFilter) extends PathFilter with Serializable {
+  override def accept(path: Path): Boolean = {
+    (filter == null || filter.accept(path)) && !InMemoryFileIndex.shouldFilterOut(path.getName)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/OutputWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/OutputWriter.scala
index 868e5371426c0..1d7abe5b938c2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/OutputWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/OutputWriter.scala
@@ -19,8 +19,7 @@ package org.apache.spark.sql.execution.datasources
 
 import org.apache.hadoop.mapreduce.TaskAttemptContext
 
-import org.apache.spark.sql.Row
-import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.types.StructType
 
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
index 5341e22f5e670..fed9614347f6a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.{expressions, InternalRow}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
-import org.apache.spark.sql.types.{StringType, StructType}
+import org.apache.spark.sql.types.StructType
 
 /**
  * An abstract class that represents [[FileIndex]]s that are aware of partitioned tables.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index 4087efc486a4f..796c23c7337d8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{Resolver, TypeCoercion}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Cast, Literal}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Literal}
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateFormatter, DateTimeUtils, TimestampFormatter}
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.SchemaUtils
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/RecordReaderIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/RecordReaderIterator.scala
index c3dd6939ec5bd..0959d8799f5a1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/RecordReaderIterator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/RecordReaderIterator.scala
@@ -21,8 +21,6 @@ import java.io.Closeable
 
 import org.apache.hadoop.mapreduce.RecordReader
 
-import org.apache.spark.sql.catalyst.InternalRow
-
 /**
  * An adaptor from a Hadoop [[RecordReader]] to an [[Iterator]] over the values returned.
  *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
index 61e0154a0ffe8..76a6a48ca0b0c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructField, StructType}
+import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructType}
 
 /**
  * Prunes unnecessary physical columns given a [[PhysicalOperation]] over a data source relation.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala
index 637ce68ec05a2..b241243363746 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala
@@ -22,14 +22,14 @@ import java.sql.Timestamp
 
 import com.google.common.io.{ByteStreams, Closeables}
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{FileStatus, GlobFilter, Path}
+import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hadoop.mapreduce.Job
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter
-import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.datasources.{FileFormat, OutputWriterFactory, PartitionedFile}
 import org.apache.spark.sql.internal.SQLConf.SOURCES_BINARY_FILE_MAX_LENGTH
 import org.apache.spark.sql.sources.{And, DataSourceRegister, EqualTo, Filter, GreaterThan, GreaterThanOrEqual, LessThan, LessThanOrEqual, Not, Or}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
index 10146be44e8bf..d8fa768a604f4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
@@ -23,8 +23,6 @@ import java.nio.charset.{Charset, StandardCharsets}
 import com.univocity.parsers.csv.CsvParser
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, Path}
-import org.apache.hadoop.io.{LongWritable, Text}
-import org.apache.hadoop.mapred.TextInputFormat
 import org.apache.hadoop.mapreduce.Job
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
index e25ce53941ff6..87ca78db59b29 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.datasources.jdbc
 
-import java.sql.{Connection, PreparedStatement, ResultSet, SQLException}
+import java.sql.{Connection, PreparedStatement, ResultSet}
 
 import scala.util.control.NonFatal
 
@@ -46,8 +46,8 @@ object JDBCRDD extends Logging {
    * @param options - JDBC options that contains url, table and other information.
    *
    * @return A StructType giving the table's Catalyst schema.
-   * @throws SQLException if the table specification is garbage.
-   * @throws SQLException if the table contains an unsupported type.
+   * @throws java.sql.SQLException if the table specification is garbage.
+   * @throws java.sql.SQLException if the table contains an unsupported type.
    */
   def resolveTable(options: JDBCOptions): StructType = {
     val url = options.url
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
index e0fa4584185e9..f2f6f60cb1dde 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
@@ -17,13 +17,10 @@
 
 package org.apache.spark.sql.execution.datasources.json
 
-import java.nio.charset.{Charset, StandardCharsets}
-
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
 
-import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.ExprUtils
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala
index 4ab009c6bd014..32ce7185f7381 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala
@@ -23,7 +23,6 @@ import org.apache.orc.mapred.{OrcList, OrcMap, OrcStruct, OrcTimestamp}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{SpecificInternalRow, UnsafeArrayData}
 import org.apache.spark.sql.catalyst.util._
-import org.apache.spark.sql.catalyst.util.RebaseDateTime.rebaseJulianToGregorianDays
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index 95f19f9dcee64..1901f5575470e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -35,7 +35,6 @@ import org.apache.parquet.hadoop._
 import org.apache.parquet.hadoop.ParquetOutputFormat.JobSummaryLevel
 import org.apache.parquet.hadoop.codec.CodecConfig
 import org.apache.parquet.hadoop.util.ContextUtil
-import org.apache.parquet.schema.MessageType
 
 import org.apache.spark.{SparkException, TaskContext}
 import org.apache.spark.internal.Logging
@@ -504,7 +503,8 @@ object ParquetFileFormat extends Logging {
   /**
    * Reads Spark SQL schema from a Parquet footer.  If a valid serialized Spark SQL schema string
    * can be found in the file metadata, returns the deserialized [[StructType]], otherwise, returns
-   * a [[StructType]] converted from the [[MessageType]] stored in this footer.
+   * a [[StructType]] converted from the [[org.apache.parquet.schema.MessageType]] stored in this
+   * footer.
    */
   def readSchemaFromFooter(
       footer: Footer, converter: ParquetToSparkSchemaConverter): StructType = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOutputWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOutputWriter.scala
index e7753cec681cf..70f6726c581a2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOutputWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOutputWriter.scala
@@ -21,7 +21,6 @@ import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapreduce._
 import org.apache.parquet.hadoop.ParquetOutputFormat
 
-import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.datasources.OutputWriter
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
index 6ef56af927129..f65aef95b6c38 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
@@ -26,9 +26,9 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.apache.parquet.column.Dictionary
 import org.apache.parquet.io.api.{Binary, Converter, GroupConverter, PrimitiveConverter}
-import org.apache.parquet.schema.{GroupType, MessageType, OriginalType, Type}
-import org.apache.parquet.schema.OriginalType.{INT_32, LIST, UTF8}
-import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.{BINARY, DOUBLE, FIXED_LEN_BYTE_ARRAY, INT32, INT64, INT96}
+import org.apache.parquet.schema.{GroupType, OriginalType, Type}
+import org.apache.parquet.schema.OriginalType.LIST
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.{BINARY, FIXED_LEN_BYTE_ARRAY, INT32, INT64, INT96}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
@@ -107,11 +107,15 @@ private[parquet] class ParquetPrimitiveConverter(val updater: ParentContainerUpd
  * }}}
  * 5 converters will be created:
  *
- * - a root [[ParquetRowConverter]] for [[MessageType]] `root`, which contains:
- *   - a [[ParquetPrimitiveConverter]] for required [[INT_32]] field `f1`, and
+ * - a root [[ParquetRowConverter]] for [[org.apache.parquet.schema.MessageType]] `root`,
+ * which contains:
+ *   - a [[ParquetPrimitiveConverter]] for required
+ *   [[org.apache.parquet.schema.OriginalType.INT_32]] field `f1`, and
  *   - a nested [[ParquetRowConverter]] for optional [[GroupType]] `f2`, which contains:
- *     - a [[ParquetPrimitiveConverter]] for required [[DOUBLE]] field `f21`, and
- *     - a [[ParquetStringConverter]] for optional [[UTF8]] string field `f22`
+ *     - a [[ParquetPrimitiveConverter]] for required
+ *     [[org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE]] field `f21`, and
+ *     - a [[ParquetStringConverter]] for optional [[org.apache.parquet.schema.OriginalType.UTF8]]
+ *     string field `f22`
  *
  * When used as a root converter, [[NoopUpdater]] should be used since root converters don't have
  * any "parent" container.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index e45514385e292..3a2a642b870f8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -29,8 +29,6 @@ import org.apache.spark.sql.connector.catalog.CatalogV2Util.assertNoNullTypeInSc
 import org.apache.spark.sql.connector.expressions.{FieldReference, RewritableTransform}
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
 import org.apache.spark.sql.sources.InsertableRelation
 import org.apache.spark.sql.types.{AtomicType, StructType}
 import org.apache.spark.sql.util.SchemaUtils
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
index 81b1c81499c74..0ca442baeea2f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
@@ -23,7 +23,7 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema}
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Table, TableCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, Table}
 import org.apache.spark.sql.types.StructType
 
 case class DescribeTableExec(
@@ -41,6 +41,7 @@ case class DescribeTableExec(
     addPartitioning(rows)
 
     if (isExtended) {
+      addMetadataColumns(rows)
       addTableDetails(rows)
     }
     rows.toSeq
@@ -72,6 +73,19 @@ case class DescribeTableExec(
     }
   }
 
+  private def addMetadataColumns(rows: ArrayBuffer[InternalRow]): Unit = table match {
+    case hasMeta: SupportsMetadataColumns if hasMeta.metadataColumns.nonEmpty =>
+      rows += emptyRow()
+      rows += toCatalystRow("# Metadata Columns", "", "")
+      rows ++= hasMeta.metadataColumns.map { column =>
+        toCatalystRow(
+          column.name,
+          column.dataType.simpleString,
+          Option(column.comment()).getOrElse(""))
+      }
+    case _ =>
+  }
+
   private def addPartitioning(rows: ArrayBuffer[InternalRow]): Unit = {
     rows += emptyRow()
     rows += toCatalystRow("# Partitioning", "", "")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropNamespaceExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropNamespaceExec.scala
index f7b4317ad65e2..777ee9d385f12 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropNamespaceExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropNamespaceExec.scala
@@ -21,7 +21,7 @@ import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.{CatalogPlugin, SupportsNamespaces}
+import org.apache.spark.sql.connector.catalog.CatalogPlugin
 
 /**
  * Physical plan node for dropping a namespace.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
index e4de70d4ee88f..8cf59f3a59323 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
@@ -25,7 +25,6 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.connector.catalog.{Table, TableProvider}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.execution.datasources._
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala
index 7f6ae20d5cd0b..ce8edce6f08d6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala
@@ -96,13 +96,11 @@ object PushDownUtils extends PredicateHelper {
         val exprs = projects ++ filters
         val requiredColumns = AttributeSet(exprs.flatMap(_.references))
         val neededOutput = relation.output.filter(requiredColumns.contains)
-        if (neededOutput != relation.output) {
-          r.pruneColumns(neededOutput.toStructType)
-          val scan = r.build()
-          scan -> toOutputAttrs(scan.readSchema(), relation)
-        } else {
-          r.build() -> relation.output
-        }
+        r.pruneColumns(neededOutput.toStructType)
+        val scan = r.build()
+        // always project, in case the relation's output has been updated and doesn't match
+        // the underlying table schema
+        scan -> toOutputAttrs(scan.readSchema(), relation)
 
       case _ => scanBuilder.build() -> relation.output
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala
index 95715fd1af56e..7ceee1edee180 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources.v2
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet, GenericRowWithSchema}
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Table, TableCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Table}
 
 /**
  * Physical plan node for showing table properties.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala
index 5dfd2e52706d0..cb4a2994de1f4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic}
 import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
-import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table}
+import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.execution.streaming.StreamingRelation
 import org.apache.spark.sql.types.BooleanType
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TextBasedFileScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TextBasedFileScan.scala
index 1ca3fd42c0597..f24fb95acb922 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TextBasedFileScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TextBasedFileScan.scala
@@ -22,8 +22,6 @@ import org.apache.hadoop.fs.Path
 import org.apache.hadoop.io.compress.CompressionCodecFactory
 
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
-import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.Utils
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
index 6dda1d4aaf37e..9ee145580ce6d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
@@ -23,22 +23,21 @@ import java.util
 import scala.collection.JavaConverters._
 import scala.collection.mutable
 
-import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.{SQLConfHelper, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.{NamespaceAlreadyExistsException, NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogDatabase, CatalogTable, CatalogTableType, CatalogUtils, SessionCatalog}
 import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogV2Util, Identifier, NamespaceChange, SupportsNamespaces, Table, TableCatalog, TableChange, V1Table}
 import org.apache.spark.sql.connector.catalog.NamespaceChange.RemoveProperty
 import org.apache.spark.sql.connector.expressions.{BucketTransform, FieldReference, IdentityTransform, Transform}
 import org.apache.spark.sql.execution.datasources.DataSource
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
  * A [[TableCatalog]] that translates calls to the v1 SessionCatalog.
  */
-class V2SessionCatalog(catalog: SessionCatalog, conf: SQLConf)
-  extends TableCatalog with SupportsNamespaces {
+class V2SessionCatalog(catalog: SessionCatalog)
+  extends TableCatalog with SupportsNamespaces with SQLConfHelper {
   import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper
   import V2SessionCatalog._
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
index 2f9387532c25c..0dbc74395afb1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
@@ -19,10 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2.orc
 
 import scala.collection.JavaConverters._
 
-import org.apache.orc.mapreduce.OrcInputFormat
-
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quoteIfNeeded
 import org.apache.spark.sql.connector.read.{Scan, SupportsPushDownFilters}
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.orc.OrcFilters
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala
index 6973f55e8dca0..93d7db44f2285 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeSeq, BindReferences, DynamicPruningExpression, DynamicPruningSubquery, Expression, ListQuery, Literal, PredicateHelper}
 import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight}
-import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.Aggregate
 import org.apache.spark.sql.catalyst.plans.physical.BroadcastMode
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.{InSubqueryExec, QueryExecution, SparkPlan, SubqueryBroadcastExec}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
index cf38fee055ca5..ebbc8a4df5643 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
@@ -25,7 +25,6 @@ import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.joins.{ShuffledHashJoinExec, SortMergeJoinExec}
-import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Ensures that the [[org.apache.spark.sql.catalyst.plans.physical.Partitioning Partitioning]]
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala
index aeaf59b7f0f4a..e58733b35990a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala
@@ -27,7 +27,6 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, Expre
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution._
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala
index b44b13c8de0da..7171c7f7f9746 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala
@@ -46,6 +46,8 @@ class ArrowPythonRunner(
   extends BasePythonRunner[Iterator[InternalRow], ColumnarBatch](funcs, evalType, argOffsets)
   with PythonArrowOutput {
 
+  override val simplifiedTraceback: Boolean = SQLConf.get.pysparkSimplifiedTraceback
+
   override val bufferSize: Int = SQLConf.get.pandasUDFBufferSize
   require(
     bufferSize >= 4,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/CoGroupedArrowPythonRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/CoGroupedArrowPythonRunner.scala
index 25ce16db264ac..e3d8a943d8cf2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/CoGroupedArrowPythonRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/CoGroupedArrowPythonRunner.scala
@@ -27,6 +27,7 @@ import org.apache.spark.{SparkEnv, TaskContext}
 import org.apache.spark.api.python.{BasePythonRunner, ChainedPythonFunctions, PythonRDD}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.arrow.ArrowWriter
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.ArrowUtils
 import org.apache.spark.sql.vectorized.ColumnarBatch
@@ -49,6 +50,8 @@ class CoGroupedArrowPythonRunner(
     (Iterator[InternalRow], Iterator[InternalRow]), ColumnarBatch](funcs, evalType, argOffsets)
   with PythonArrowOutput {
 
+  override val simplifiedTraceback: Boolean = SQLConf.get.pysparkSimplifiedTraceback
+
   protected def newWriterThread(
       env: SparkEnv,
       worker: Socket,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala
index 298d63478b63e..7c476ab03c002 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala
@@ -26,7 +26,7 @@ import org.apache.spark.api.python.ChainedPythonFunctions
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
+import org.apache.spark.sql.execution.UnaryExecNode
 import org.apache.spark.sql.types.{DataType, StructField, StructType}
 import org.apache.spark.util.Utils
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonUDFRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonUDFRunner.scala
index d341d7019f0ac..d9fe07214d061 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonUDFRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonUDFRunner.scala
@@ -23,6 +23,7 @@ import java.util.concurrent.atomic.AtomicBoolean
 
 import org.apache.spark._
 import org.apache.spark.api.python._
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * A helper class to run Python UDFs in Spark.
@@ -34,6 +35,8 @@ class PythonUDFRunner(
   extends BasePythonRunner[Array[Byte], Array[Byte]](
     funcs, evalType, argOffsets) {
 
+  override val simplifiedTraceback: Boolean = SQLConf.get.pysparkSimplifiedTraceback
+
   protected override def newWriterThread(
       env: SparkEnv,
       worker: Socket,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala
index 59f5a7078a151..ae7b7ef23512c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala
@@ -26,7 +26,7 @@ import org.apache.arrow.vector.VectorSchemaRoot
 import org.apache.arrow.vector.ipc.{ArrowStreamReader, ArrowStreamWriter}
 import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel
 
-import org.apache.spark.{SparkException, TaskContext}
+import org.apache.spark.TaskContext
 import org.apache.spark.api.r._
 import org.apache.spark.api.r.SpecialLengths
 import org.apache.spark.broadcast.Broadcast
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
index eb8b8af7950b2..747094b7791c1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.streaming
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.catalyst.expressions.{Ascending, Attribute, AttributeReference, Expression, Literal, SortOrder, UnsafeRow}
+import org.apache.spark.sql.catalyst.expressions.{Ascending, Attribute, Expression, SortOrder, UnsafeRow}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, Distribution}
 import org.apache.spark.sql.execution._
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
index c2278e8659147..893639a86c88c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
@@ -19,14 +19,12 @@ package org.apache.spark.sql.execution.streaming
 
 import java.io._
 import java.nio.charset.StandardCharsets
-import java.util.{ConcurrentModificationException, EnumSet, UUID}
+import java.util.ConcurrentModificationException
 
 import scala.reflect.ClassTag
 
 import org.apache.commons.io.IOUtils
-import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs._
-import org.apache.hadoop.fs.permission.FsPermission
 import org.json4s.NoTypeHints
 import org.json4s.jackson.Serialization
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
index bfa60cf7dfd78..b871874f52967 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
@@ -21,14 +21,13 @@ import java.util.UUID
 import java.util.concurrent.atomic.AtomicInteger
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{AnalysisException, SparkSession, Strategy}
+import org.apache.spark.sql.{SparkSession, Strategy}
 import org.apache.spark.sql.catalyst.QueryPlanningTracker
 import org.apache.spark.sql.catalyst.expressions.{CurrentBatchTimestamp, ExpressionWithRandomSeed}
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, HashPartitioning, SinglePartition}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.execution.{LeafExecNode, LocalLimitExec, QueryExecution, SparkPlan, SparkPlanner, UnaryExecNode}
-import org.apache.spark.sql.execution.exchange.{ShuffleExchangeExec, ShuffleExchangeLike}
+import org.apache.spark.sql.execution.{LocalLimitExec, QueryExecution, SparkPlan, SparkPlanner, UnaryExecNode}
+import org.apache.spark.sql.execution.exchange.ShuffleExchangeLike
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.util.Utils
@@ -51,7 +50,6 @@ class IncrementalExecution(
   // Modified planner with stateful operations.
   override val planner: SparkPlanner = new SparkPlanner(
       sparkSession,
-      sparkSession.sessionState.conf,
       sparkSession.sessionState.experimentalMethods) {
     override def strategies: Seq[Strategy] =
       extraPlanningStrategies ++
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index aba0463f56cd7..d6be33c76e937 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -40,7 +40,6 @@ import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table}
 import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2, ReadLimit, SparkDataStream}
 import org.apache.spark.sql.connector.write.{LogicalWriteInfoImpl, SupportsTruncate}
 import org.apache.spark.sql.connector.write.streaming.StreamingWrite
-import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.command.StreamingExplainCommand
 import org.apache.spark.sql.execution.datasources.v2.StreamWriterCommitProgress
 import org.apache.spark.sql.internal.SQLConf
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetadata.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetadata.scala
index 516afbea5d9de..fc0cfc30ff2fd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetadata.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetadata.scala
@@ -31,14 +31,14 @@ import org.json4s.jackson.Serialization
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.execution.streaming.CheckpointFileManager.CancellableFSDataOutputStream
-import org.apache.spark.sql.streaming.StreamingQuery
 
 /**
- * Contains metadata associated with a [[StreamingQuery]]. This information is written
- * in the checkpoint location the first time a query is started and recovered every time the query
- * is restarted.
+ * Contains metadata associated with a [[org.apache.spark.sql.streaming.StreamingQuery]].
+ * This information is written in the checkpoint location the first time a query is started
+ * and recovered every time the query is restarted.
  *
- * @param id  unique id of the [[StreamingQuery]] that needs to be persisted across restarts
+ * @param id  unique id of the [[org.apache.spark.sql.streaming.StreamingQuery]]
+ *            that needs to be persisted across restarts
  */
 case class StreamMetadata(id: String) {
   def json: String = Serialization.write(this)(StreamMetadata.format)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala
index 71792facf698a..2f62dbd7ec578 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala
@@ -21,13 +21,13 @@ import scala.reflect.ClassTag
 
 import org.apache.spark.{Partition, SparkContext, TaskContext}
 import org.apache.spark.internal.Logging
-import org.apache.spark.rdd.{RDD, ZippedPartitionsBaseRDD, ZippedPartitionsPartition, ZippedPartitionsRDD2}
+import org.apache.spark.rdd.{RDD, ZippedPartitionsBaseRDD, ZippedPartitionsPartition}
 import org.apache.spark.sql.catalyst.analysis.StreamingJoinHelper
 import org.apache.spark.sql.catalyst.expressions.{And, Attribute, AttributeSet, BoundReference, Expression, NamedExpression, PredicateHelper}
 import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark._
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.streaming.WatermarkSupport.watermarkExpression
-import org.apache.spark.sql.execution.streaming.state.{StateStoreCoordinatorRef, StateStoreProvider, StateStoreProviderId}
+import org.apache.spark.sql.execution.streaming.state.{StateStoreCoordinatorRef, StateStoreProviderId}
 
 
 /**
@@ -200,8 +200,8 @@ object StreamingSymmetricHashJoinHelper extends Logging {
   /**
    * A custom RDD that allows partitions to be "zipped" together, while ensuring the tasks'
    * preferred location is based on which executors have the required join state stores already
-   * loaded. This class is a variant of [[ZippedPartitionsRDD2]] which only changes signature
-   * of `f`.
+   * loaded. This class is a variant of [[org.apache.spark.rdd.ZippedPartitionsRDD2]] which only
+   * changes signature of `f`.
    */
   class StateStoreAwareZipPartitionsRDD[A: ClassTag, B: ClassTag, V: ClassTag](
       sc: SparkContext,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSink.scala
index 6d5e7fd5c5cf3..60c66d863a3c5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSink.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSink.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.execution.streaming.sources
 
-import org.apache.spark.api.python.PythonException
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.execution.streaming.Sink
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/PackedRowWriterFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/PackedRowWriterFactory.scala
index 507f860e0452a..fa51dd61a939b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/PackedRowWriterFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/PackedRowWriterFactory.scala
@@ -21,12 +21,13 @@ import scala.collection.mutable
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.connector.write.{BatchWrite, DataWriter, DataWriterFactory, WriterCommitMessage}
+import org.apache.spark.sql.connector.write.{DataWriter, WriterCommitMessage}
 import org.apache.spark.sql.connector.write.streaming.StreamingDataWriterFactory
 
 /**
- * A simple [[DataWriterFactory]] whose tasks just pack rows into the commit message for delivery
- * to a [[BatchWrite]] on the driver.
+ * A simple [[org.apache.spark.sql.connector.write.DataWriterFactory]] whose tasks just pack rows
+ * into the commit message for delivery to a
+ * [[org.apache.spark.sql.connector.write.BatchWrite]] on the driver.
  *
  * Note that, because it sends all rows to the driver, this factory will generally be unsuitable
  * for production-quality sinks. It's intended for use in tests.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memory.scala
index a6ac6f2da8e41..778cfeda68af0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memory.scala
@@ -35,13 +35,12 @@ import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUti
 import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table, TableCapability}
 import org.apache.spark.sql.connector.write.{DataWriter, DataWriterFactory, LogicalWriteInfo, PhysicalWriteInfo, SupportsTruncate, WriteBuilder, WriterCommitMessage}
 import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactory, StreamingWrite}
-import org.apache.spark.sql.execution.streaming.Sink
 import org.apache.spark.sql.internal.connector.SupportsStreamingUpdateAsAppend
 import org.apache.spark.sql.types.StructType
 
 /**
- * A sink that stores the results in memory. This [[Sink]] is primarily intended for use in unit
- * tests and does not provide durability.
+ * A sink that stores the results in memory. This [[org.apache.spark.sql.execution.streaming.Sink]]
+ * is primarily intended for use in unit tests and does not provide durability.
  */
 class MemorySink extends Table with SupportsWrite with Logging {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
index 7b99ceeb612ee..084ddf8077a15 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
@@ -27,7 +27,7 @@ import scala.util.control.NonFatal
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.{SparkContext, SparkEnv, SparkException}
+import org.apache.spark.{SparkContext, SparkEnv}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.catalyst.util.UnsafeRowUtils
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala
index b894e771a6fe2..f21e2ffb80a7b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala
@@ -23,8 +23,6 @@ import scala.reflect.ClassTag
 
 import org.apache.spark.{Partition, TaskContext}
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.execution.streaming.StreamExecution
-import org.apache.spark.sql.execution.streaming.continuous.EpochTracker
 import org.apache.spark.sql.internal.SessionState
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.SerializableConfiguration
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala
index c1954e1d3858e..8cf3739e11150 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala
@@ -25,14 +25,14 @@ import org.apache.spark.TaskContext
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, JoinedRow, Literal, SpecificInternalRow, UnsafeProjection, UnsafeRow}
-import org.apache.spark.sql.execution.streaming.{StatefulOperatorStateInfo, StreamingSymmetricHashJoinExec}
+import org.apache.spark.sql.execution.streaming.StatefulOperatorStateInfo
 import org.apache.spark.sql.execution.streaming.StreamingSymmetricHashJoinHelper._
-import org.apache.spark.sql.execution.streaming.state.SymmetricHashJoinStateManager.KeyToValuePair
 import org.apache.spark.sql.types.{BooleanType, LongType, StructField, StructType}
 import org.apache.spark.util.NextIterator
 
 /**
- * Helper class to manage state required by a single side of [[StreamingSymmetricHashJoinExec]].
+ * Helper class to manage state required by a single side of
+ * [[org.apache.spark.sql.execution.streaming.StreamingSymmetricHashJoinExec]].
  * The interface of this class is basically that of a multi-map:
  * - Get: Returns an iterator of multiple values for given key
  * - Append: Append a new value to the given key
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
index 639e862fea1da..9a5183a22d23d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
@@ -33,7 +33,6 @@ import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
 import org.apache.spark.sql.execution.streaming.state._
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.{OutputMode, StateOperatorProgress}
 import org.apache.spark.sql.types._
 import org.apache.spark.util.{CompletionIterator, NextIterator, Utils}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/streamingLimits.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/streamingLimits.scala
index b19540253d7eb..e53e0644eb268 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/streamingLimits.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/streamingLimits.scala
@@ -22,7 +22,6 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericInternalRow, SortOrder, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, Distribution, Partitioning}
-import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
 import org.apache.spark.sql.execution.{LimitExec, SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.execution.streaming.state.StateStoreOps
 import org.apache.spark.sql.streaming.OutputMode
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
index b693cae824bf9..6e0e36cbe5901 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
@@ -17,17 +17,11 @@
 
 package org.apache.spark.sql.execution.window
 
-import scala.collection.mutable
-import scala.collection.mutable.ArrayBuffer
-
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.physical._
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
-import org.apache.spark.sql.execution.{ExternalAppendOnlyUnsafeRowArray, SparkPlan, UnaryExecNode}
-import org.apache.spark.sql.types.{CalendarIntervalType, DateType, IntegerType, TimestampType}
+import org.apache.spark.sql.execution.{ExternalAppendOnlyUnsafeRowArray, SparkPlan}
 
 /**
  * This class calculates and outputs (windowed) aggregates over the rows in a single (sorted)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
index a6a3f3d7384bf..c6b98d48d7dde 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
@@ -23,7 +23,7 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
-import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
+import org.apache.spark.sql.execution.UnaryExecNode
 import org.apache.spark.sql.types.{CalendarIntervalType, DateType, IntegerType, TimestampType}
 
 trait WindowExecBase extends UnaryExecNode {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
index 0cef33509a175..80dd3cf8bc840 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
@@ -17,16 +17,13 @@
 
 package org.apache.spark.sql.expressions
 
-import scala.reflect.runtime.universe.TypeTag
-
-import org.apache.spark.annotation.{Experimental, Stable}
+import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.{Column, Encoder}
-import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUDF}
 import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete}
 import org.apache.spark.sql.execution.aggregate.ScalaAggregator
-import org.apache.spark.sql.types.{AnyDataType, DataType}
+import org.apache.spark.sql.types.DataType
 
 /**
  * A user-defined function. To create one, use the `udf` functions in `functions`.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
index f7591e4d265e0..4e3c5586209e4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
@@ -44,8 +44,6 @@ object typed {
     override protected def _sqlContext: SQLContext = null
   }
 
-  import implicits._
-
   /**
    * Average aggregate function.
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 8d6281882f188..9861d21d3a430 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -18,8 +18,7 @@
 package org.apache.spark.sql
 
 import scala.collection.JavaConverters._
-import scala.language.implicitConversions
-import scala.reflect.runtime.universe.{typeTag, TypeTag}
+import scala.reflect.runtime.universe.TypeTag
 import scala.util.Try
 
 import org.apache.spark.annotation.Stable
@@ -4151,7 +4150,7 @@ object functions {
   /**
    * Parses a JSON string and infers its schema in DDL format.
    *
-   * @param json a string literal containing a JSON string.
+   * @param json a foldable string column containing a JSON string.
    *
    * @group collection_funcs
    * @since 2.4.0
@@ -4161,7 +4160,7 @@ object functions {
   /**
    * Parses a JSON string and infers its schema in DDL format using options.
    *
-   * @param json a string column containing JSON data.
+   * @param json a foldable string column containing JSON data.
    * @param options options to control how the json is parsed. accepts the same options and the
    *                json data source. See [[DataFrameReader#json]].
    * @return a column with string literal containing schema in DDL format.
@@ -4426,7 +4425,7 @@ object functions {
   /**
    * Parses a CSV string and infers its schema in DDL format.
    *
-   * @param csv a string literal containing a CSV string.
+   * @param csv a foldable string column containing a CSV string.
    *
    * @group collection_funcs
    * @since 3.0.0
@@ -4436,7 +4435,7 @@ object functions {
   /**
    * Parses a CSV string and infers its schema in DDL format using options.
    *
-   * @param csv a string literal containing a CSV string.
+   * @param csv a foldable string column containing a CSV string.
    * @param options options to control how the CSV is parsed. accepts the same options and the
    *                json data source. See [[DataFrameReader#csv]].
    * @return a column with string literal containing schema in DDL format.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index 33c15ec76654d..538a5408723bb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -150,7 +150,6 @@ abstract class BaseSessionStateBuilder(
       () => session.sharedState.externalCatalog,
       () => session.sharedState.globalTempViewManager,
       functionRegistry,
-      conf,
       SessionState.newHadoopConf(session.sparkContext.hadoopConfiguration, conf),
       sqlParser,
       resourceLoader)
@@ -158,9 +157,9 @@ abstract class BaseSessionStateBuilder(
     catalog
   }
 
-  protected lazy val v2SessionCatalog = new V2SessionCatalog(catalog, conf)
+  protected lazy val v2SessionCatalog = new V2SessionCatalog(catalog)
 
-  protected lazy val catalogManager = new CatalogManager(conf, v2SessionCatalog, catalog)
+  protected lazy val catalogManager = new CatalogManager(v2SessionCatalog, catalog)
 
   /**
    * Interface exposed to the user for registering user-defined functions.
@@ -175,7 +174,7 @@ abstract class BaseSessionStateBuilder(
    *
    * Note: this depends on the `conf` and `catalog` fields.
    */
-  protected def analyzer: Analyzer = new Analyzer(catalogManager, conf) {
+  protected def analyzer: Analyzer = new Analyzer(catalogManager) {
     override val extendedResolutionRules: Seq[Rule[LogicalPlan]] =
       new FindDataSourceTable(session) +:
         new ResolveSQLOnFile(session) +:
@@ -197,7 +196,7 @@ abstract class BaseSessionStateBuilder(
         PreReadCheck +:
         HiveOnlyCheck +:
         TableCapabilityCheck +:
-        CommandCheck(conf) +:
+        CommandCheck +:
         customCheckRules
   }
 
@@ -270,7 +269,7 @@ abstract class BaseSessionStateBuilder(
    * Note: this depends on the `conf` and `experimentalMethods` fields.
    */
   protected def planner: SparkPlanner = {
-    new SparkPlanner(session, conf, experimentalMethods) {
+    new SparkPlanner(session, experimentalMethods) {
       override def extraPlanningStrategies: Seq[Strategy] =
         super.extraPlanningStrategies ++ customPlanningStrategies
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index 0f9a89741c192..48d8c3d325347 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.streaming.StreamingQueryManager
-import org.apache.spark.sql.util.{ExecutionListenerManager, QueryExecutionListener}
+import org.apache.spark.sql.util.ExecutionListenerManager
 
 /**
  * A class that holds all session-specific state in a given [[SparkSession]].
@@ -52,7 +52,8 @@ import org.apache.spark.sql.util.{ExecutionListenerManager, QueryExecutionListen
  * @param planner Planner that converts optimized logical plans to physical plans.
  * @param streamingQueryManagerBuilder A function to create a streaming query manager to
  *                                     start and stop streaming queries.
- * @param listenerManager Interface to register custom [[QueryExecutionListener]]s.
+ * @param listenerManager Interface to register custominternal/SessionState.scala
+ *                        [[org.apache.spark.sql.util.QueryExecutionListener]]s.
  * @param resourceLoaderBuilder a function to create a session shared resource loader to load JARs,
  *                              files, etc.
  * @param createQueryExecution Function used to create QueryExecution objects.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index 1acdc4bd5f0e3..89aceacac6007 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -31,13 +31,11 @@ import org.apache.hadoop.fs.FsUrlStreamHandlerFactory
 
 import org.apache.spark.{SparkConf, SparkContext, SparkException}
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.execution.CacheManager
 import org.apache.spark.sql.execution.streaming.StreamExecution
 import org.apache.spark.sql.execution.ui.{SQLAppStatusListener, SQLAppStatusStore, SQLTab}
 import org.apache.spark.sql.internal.StaticSQLConf._
-import org.apache.spark.sql.streaming.StreamingQueryListener
 import org.apache.spark.sql.streaming.ui.{StreamingQueryStatusListener, StreamingQueryTab}
 import org.apache.spark.status.ElementTrackingStore
 import org.apache.spark.util.Utils
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
index 2b9c574aaaf0c..248dfa107bc4b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.internal
 
 import org.apache.spark.internal.config._
+import org.apache.spark.sql.catalyst.SQLConfHelper
 
 /**
  * A helper class that enables substitution using syntax like
@@ -25,9 +26,7 @@ import org.apache.spark.internal.config._
  *
  * Variable substitution is controlled by `SQLConf.variableSubstituteEnabled`.
  */
-class VariableSubstitution {
-
-  private def conf = SQLConf.get
+class VariableSubstitution extends SQLConfHelper {
 
   private val provider = new ConfigProvider {
     override def get(key: String): Option[String] = Option(conf.getConfString(key, ""))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index 0fe2d0be966d0..ffdbe9d4e4915 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -29,7 +29,6 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.SparkException
 import org.apache.spark.annotation.Evolving
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config.UI.UI_ENABLED
 import org.apache.spark.sql.{AnalysisException, DataFrame, SparkSession}
 import org.apache.spark.sql.catalyst.analysis.UnsupportedOperationChecker
 import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/UIUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/UIUtils.scala
index cdad5ed9942b5..1f7e65dede170 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/UIUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/UIUtils.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.streaming.ui
 
 import java.text.SimpleDateFormat
-import java.util.Locale
 
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.getTimeZone
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/map.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/map.sql
new file mode 100644
index 0000000000000..23e5b9562973b
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/map.sql
@@ -0,0 +1 @@
+--IMPORT map.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/like-all.sql b/sql/core/src/test/resources/sql-tests/inputs/like-all.sql
index a084dbef61a0c..f83277376e680 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/like-all.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/like-all.sql
@@ -1,3 +1,7 @@
+-- test cases for like all
+--CONFIG_DIM1 spark.sql.optimizer.likeAllConversionThreshold=0
+--CONFIG_DIM1 spark.sql.optimizer.likeAllConversionThreshold=200
+
 CREATE OR REPLACE TEMPORARY VIEW like_all_table AS SELECT * FROM (VALUES
   ('google', '%oo%'),
   ('facebook', '%oo%'),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/map.sql b/sql/core/src/test/resources/sql-tests/inputs/map.sql
new file mode 100644
index 0000000000000..e2d855fba154e
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/map.sql
@@ -0,0 +1,5 @@
+-- test cases for map functions
+
+-- key does not exist
+select element_at(map(1, 'a', 2, 'b'), 5);
+select map(1, 'a', 2, 'b')[5];
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out
new file mode 100644
index 0000000000000..12c599812cdee
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out
@@ -0,0 +1,20 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 2
+
+
+-- !query
+select element_at(map(1, 'a', 2, 'b'), 5)
+-- !query schema
+struct<>
+-- !query output
+java.util.NoSuchElementException
+Key 5 does not exist.
+
+
+-- !query
+select map(1, 'a', 2, 'b')[5]
+-- !query schema
+struct<>
+-- !query output
+java.util.NoSuchElementException
+Key 5 does not exist.
diff --git a/sql/core/src/test/resources/sql-tests/results/map.sql.out b/sql/core/src/test/resources/sql-tests/results/map.sql.out
new file mode 100644
index 0000000000000..7a0c0d776ca2b
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/map.sql.out
@@ -0,0 +1,18 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 2
+
+
+-- !query
+select element_at(map(1, 'a', 2, 'b'), 5)
+-- !query schema
+struct<element_at(map(1, a, 2, b), 5):string>
+-- !query output
+NULL
+
+
+-- !query
+select map(1, 'a', 2, 'b')[5]
+-- !query schema
+struct<map(1, a, 2, b)[5]:string>
+-- !query output
+NULL
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt
index 509fb0133095b..a446163e3d29d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt
@@ -1,5 +1,5 @@
 == Physical Plan ==
-TakeOrderedAndProject (44)
+* Sort (44)
 +- * HashAggregate (43)
    +- Exchange (42)
       +- * HashAggregate (41)
@@ -244,7 +244,7 @@ Functions [3]: [sum(UnscaledValue(cs_ext_ship_cost#6)), sum(UnscaledValue(cs_net
 Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#23, sum(UnscaledValue(cs_net_profit#7))#24, count(cs_order_number#5)#27]
 Results [3]: [count(cs_order_number#5)#27 AS order count #30, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#23,17,2) AS total shipping cost #31, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#24,17,2) AS total net profit #32]
 
-(44) TakeOrderedAndProject
+(44) Sort [codegen id : 12]
 Input [3]: [order count #30, total shipping cost #31, total net profit #32]
-Arguments: 100, [order count #30 ASC NULLS FIRST], [order count #30, total shipping cost #31, total net profit #32]
+Arguments: [order count #30 ASC NULLS FIRST], true, 0
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt
index ea9a0b27ff700..73a9b58010f58 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt
@@ -1,5 +1,5 @@
-TakeOrderedAndProject [order count ,total shipping cost ,total net profit ]
-  WholeStageCodegen (12)
+WholeStageCodegen (12)
+  Sort [order count ]
     HashAggregate [sum,sum,count] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count]
       InputAdapter
         Exchange #1
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt
index 2ae939cfe41f3..ea7e298393e4c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt
@@ -1,5 +1,5 @@
 == Physical Plan ==
-TakeOrderedAndProject (41)
+* Sort (41)
 +- * HashAggregate (40)
    +- Exchange (39)
       +- * HashAggregate (38)
@@ -229,7 +229,7 @@ Functions [3]: [sum(UnscaledValue(cs_ext_ship_cost#6)), sum(UnscaledValue(cs_net
 Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#22, sum(UnscaledValue(cs_net_profit#7))#23, count(cs_order_number#5)#27]
 Results [3]: [count(cs_order_number#5)#27 AS order count #30, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#22,17,2) AS total shipping cost #31, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#23,17,2) AS total net profit #32]
 
-(41) TakeOrderedAndProject
+(41) Sort [codegen id : 8]
 Input [3]: [order count #30, total shipping cost #31, total net profit #32]
-Arguments: 100, [order count #30 ASC NULLS FIRST], [order count #30, total shipping cost #31, total net profit #32]
+Arguments: [order count #30 ASC NULLS FIRST], true, 0
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt
index a044b05365f8e..169f07c2d85e5 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt
@@ -1,5 +1,5 @@
-TakeOrderedAndProject [order count ,total shipping cost ,total net profit ]
-  WholeStageCodegen (8)
+WholeStageCodegen (8)
+  Sort [order count ]
     HashAggregate [sum,sum,count] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count]
       InputAdapter
         Exchange #1
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt
index fe5966bb4dfb3..61e5ae0121819 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt
@@ -1,46 +1,45 @@
 == Physical Plan ==
-* Sort (42)
-+- Exchange (41)
-   +- * Project (40)
-      +- * SortMergeJoin Inner (39)
-         :- * Sort (27)
-         :  +- Exchange (26)
-         :     +- * Project (25)
-         :        +- * BroadcastHashJoin Inner BuildRight (24)
-         :           :- * HashAggregate (18)
-         :           :  +- Exchange (17)
-         :           :     +- * HashAggregate (16)
-         :           :        +- * Project (15)
-         :           :           +- * BroadcastHashJoin Inner BuildRight (14)
-         :           :              :- Union (9)
-         :           :              :  :- * Project (4)
-         :           :              :  :  +- * Filter (3)
-         :           :              :  :     +- * ColumnarToRow (2)
-         :           :              :  :        +- Scan parquet default.web_sales (1)
-         :           :              :  +- * Project (8)
-         :           :              :     +- * Filter (7)
-         :           :              :        +- * ColumnarToRow (6)
-         :           :              :           +- Scan parquet default.catalog_sales (5)
-         :           :              +- BroadcastExchange (13)
-         :           :                 +- * Filter (12)
-         :           :                    +- * ColumnarToRow (11)
-         :           :                       +- Scan parquet default.date_dim (10)
-         :           +- BroadcastExchange (23)
-         :              +- * Project (22)
-         :                 +- * Filter (21)
-         :                    +- * ColumnarToRow (20)
-         :                       +- Scan parquet default.date_dim (19)
-         +- * Sort (38)
-            +- Exchange (37)
-               +- * Project (36)
-                  +- * BroadcastHashJoin Inner BuildRight (35)
-                     :- * HashAggregate (29)
-                     :  +- ReusedExchange (28)
-                     +- BroadcastExchange (34)
-                        +- * Project (33)
-                           +- * Filter (32)
-                              +- * ColumnarToRow (31)
-                                 +- Scan parquet default.date_dim (30)
+* Sort (41)
++- Exchange (40)
+   +- * Project (39)
+      +- * SortMergeJoin Inner (38)
+         :- * Sort (26)
+         :  +- * Project (25)
+         :     +- * BroadcastHashJoin Inner BuildRight (24)
+         :        :- * HashAggregate (18)
+         :        :  +- Exchange (17)
+         :        :     +- * HashAggregate (16)
+         :        :        +- * Project (15)
+         :        :           +- * BroadcastHashJoin Inner BuildRight (14)
+         :        :              :- Union (9)
+         :        :              :  :- * Project (4)
+         :        :              :  :  +- * Filter (3)
+         :        :              :  :     +- * ColumnarToRow (2)
+         :        :              :  :        +- Scan parquet default.web_sales (1)
+         :        :              :  +- * Project (8)
+         :        :              :     +- * Filter (7)
+         :        :              :        +- * ColumnarToRow (6)
+         :        :              :           +- Scan parquet default.catalog_sales (5)
+         :        :              +- BroadcastExchange (13)
+         :        :                 +- * Filter (12)
+         :        :                    +- * ColumnarToRow (11)
+         :        :                       +- Scan parquet default.date_dim (10)
+         :        +- BroadcastExchange (23)
+         :           +- * Project (22)
+         :              +- * Filter (21)
+         :                 +- * ColumnarToRow (20)
+         :                    +- Scan parquet default.date_dim (19)
+         +- * Sort (37)
+            +- Exchange (36)
+               +- * Project (35)
+                  +- * BroadcastHashJoin Inner BuildRight (34)
+                     :- * HashAggregate (28)
+                     :  +- ReusedExchange (27)
+                     +- BroadcastExchange (33)
+                        +- * Project (32)
+                           +- * Filter (31)
+                              +- * ColumnarToRow (30)
+                                 +- Scan parquet default.date_dim (29)
 
 
 (1) Scan parquet default.web_sales
@@ -157,77 +156,73 @@ Join condition: None
 Output [8]: [d_week_seq#10 AS d_week_seq1#45, sun_sales#35 AS sun_sales1#46, mon_sales#36 AS mon_sales1#47, tue_sales#37 AS tue_sales1#48, wed_sales#38 AS wed_sales1#49, thu_sales#39 AS thu_sales1#50, fri_sales#40 AS fri_sales1#51, sat_sales#41 AS sat_sales1#52]
 Input [9]: [d_week_seq#10, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41, d_week_seq#42]
 
-(26) Exchange
-Input [8]: [d_week_seq1#45, sun_sales1#46, mon_sales1#47, tue_sales1#48, wed_sales1#49, thu_sales1#50, fri_sales1#51, sat_sales1#52]
-Arguments: hashpartitioning(d_week_seq1#45, 5), true, [id=#53]
-
-(27) Sort [codegen id : 7]
+(26) Sort [codegen id : 6]
 Input [8]: [d_week_seq1#45, sun_sales1#46, mon_sales1#47, tue_sales1#48, wed_sales1#49, thu_sales1#50, fri_sales1#51, sat_sales1#52]
 Arguments: [d_week_seq1#45 ASC NULLS FIRST], false, 0
 
-(28) ReusedExchange [Reuses operator id: 17]
-Output [8]: [d_week_seq#10, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59, sum#60]
+(27) ReusedExchange [Reuses operator id: 17]
+Output [8]: [d_week_seq#10, sum#53, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59]
 
-(29) HashAggregate [codegen id : 13]
-Input [8]: [d_week_seq#10, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59, sum#60]
+(28) HashAggregate [codegen id : 12]
+Input [8]: [d_week_seq#10, sum#53, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59]
 Keys [1]: [d_week_seq#10]
 Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))]
-Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END))#61, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END))#62, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END))#63, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#64, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END))#65, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END))#66, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))#67]
-Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END))#61,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END))#62,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END))#63,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#64,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END))#65,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END))#66,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))#67,17,2) AS sat_sales#41]
+Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END))#60, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END))#61, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END))#62, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#63, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END))#64, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END))#65, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))#66]
+Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END))#60,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END))#61,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END))#62,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#63,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END))#64,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END))#65,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))#66,17,2) AS sat_sales#41]
 
-(30) Scan parquet default.date_dim
-Output [2]: [d_week_seq#68, d_year#69]
+(29) Scan parquet default.date_dim
+Output [2]: [d_week_seq#67, d_year#68]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_week_seq)]
 ReadSchema: struct<d_week_seq:int,d_year:int>
 
-(31) ColumnarToRow [codegen id : 12]
-Input [2]: [d_week_seq#68, d_year#69]
+(30) ColumnarToRow [codegen id : 11]
+Input [2]: [d_week_seq#67, d_year#68]
 
-(32) Filter [codegen id : 12]
-Input [2]: [d_week_seq#68, d_year#69]
-Condition : ((isnotnull(d_year#69) AND (d_year#69 = 2002)) AND isnotnull(d_week_seq#68))
+(31) Filter [codegen id : 11]
+Input [2]: [d_week_seq#67, d_year#68]
+Condition : ((isnotnull(d_year#68) AND (d_year#68 = 2002)) AND isnotnull(d_week_seq#67))
 
-(33) Project [codegen id : 12]
-Output [1]: [d_week_seq#68]
-Input [2]: [d_week_seq#68, d_year#69]
+(32) Project [codegen id : 11]
+Output [1]: [d_week_seq#67]
+Input [2]: [d_week_seq#67, d_year#68]
 
-(34) BroadcastExchange
-Input [1]: [d_week_seq#68]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#70]
+(33) BroadcastExchange
+Input [1]: [d_week_seq#67]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#69]
 
-(35) BroadcastHashJoin [codegen id : 13]
+(34) BroadcastHashJoin [codegen id : 12]
 Left keys [1]: [d_week_seq#10]
-Right keys [1]: [d_week_seq#68]
+Right keys [1]: [d_week_seq#67]
 Join condition: None
 
-(36) Project [codegen id : 13]
-Output [8]: [d_week_seq#10 AS d_week_seq2#71, sun_sales#35 AS sun_sales2#72, mon_sales#36 AS mon_sales2#73, tue_sales#37 AS tue_sales2#74, wed_sales#38 AS wed_sales2#75, thu_sales#39 AS thu_sales2#76, fri_sales#40 AS fri_sales2#77, sat_sales#41 AS sat_sales2#78]
-Input [9]: [d_week_seq#10, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41, d_week_seq#68]
+(35) Project [codegen id : 12]
+Output [8]: [d_week_seq#10 AS d_week_seq2#70, sun_sales#35 AS sun_sales2#71, mon_sales#36 AS mon_sales2#72, tue_sales#37 AS tue_sales2#73, wed_sales#38 AS wed_sales2#74, thu_sales#39 AS thu_sales2#75, fri_sales#40 AS fri_sales2#76, sat_sales#41 AS sat_sales2#77]
+Input [9]: [d_week_seq#10, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41, d_week_seq#67]
 
-(37) Exchange
-Input [8]: [d_week_seq2#71, sun_sales2#72, mon_sales2#73, tue_sales2#74, wed_sales2#75, thu_sales2#76, fri_sales2#77, sat_sales2#78]
-Arguments: hashpartitioning((d_week_seq2#71 - 53), 5), true, [id=#79]
+(36) Exchange
+Input [8]: [d_week_seq2#70, sun_sales2#71, mon_sales2#72, tue_sales2#73, wed_sales2#74, thu_sales2#75, fri_sales2#76, sat_sales2#77]
+Arguments: hashpartitioning((d_week_seq2#70 - 53), 5), true, [id=#78]
 
-(38) Sort [codegen id : 14]
-Input [8]: [d_week_seq2#71, sun_sales2#72, mon_sales2#73, tue_sales2#74, wed_sales2#75, thu_sales2#76, fri_sales2#77, sat_sales2#78]
-Arguments: [(d_week_seq2#71 - 53) ASC NULLS FIRST], false, 0
+(37) Sort [codegen id : 13]
+Input [8]: [d_week_seq2#70, sun_sales2#71, mon_sales2#72, tue_sales2#73, wed_sales2#74, thu_sales2#75, fri_sales2#76, sat_sales2#77]
+Arguments: [(d_week_seq2#70 - 53) ASC NULLS FIRST], false, 0
 
-(39) SortMergeJoin [codegen id : 15]
+(38) SortMergeJoin [codegen id : 14]
 Left keys [1]: [d_week_seq1#45]
-Right keys [1]: [(d_week_seq2#71 - 53)]
+Right keys [1]: [(d_week_seq2#70 - 53)]
 Join condition: None
 
-(40) Project [codegen id : 15]
-Output [8]: [d_week_seq1#45, round(CheckOverflow((promote_precision(sun_sales1#46) / promote_precision(sun_sales2#72)), DecimalType(37,20), true), 2) AS round((sun_sales1 / sun_sales2), 2)#80, round(CheckOverflow((promote_precision(mon_sales1#47) / promote_precision(mon_sales2#73)), DecimalType(37,20), true), 2) AS round((mon_sales1 / mon_sales2), 2)#81, round(CheckOverflow((promote_precision(tue_sales1#48) / promote_precision(tue_sales2#74)), DecimalType(37,20), true), 2) AS round((tue_sales1 / tue_sales2), 2)#82, round(CheckOverflow((promote_precision(wed_sales1#49) / promote_precision(wed_sales2#75)), DecimalType(37,20), true), 2) AS round((wed_sales1 / wed_sales2), 2)#83, round(CheckOverflow((promote_precision(thu_sales1#50) / promote_precision(thu_sales2#76)), DecimalType(37,20), true), 2) AS round((thu_sales1 / thu_sales2), 2)#84, round(CheckOverflow((promote_precision(fri_sales1#51) / promote_precision(fri_sales2#77)), DecimalType(37,20), true), 2) AS round((fri_sales1 / fri_sales2), 2)#85, round(CheckOverflow((promote_precision(sat_sales1#52) / promote_precision(sat_sales2#78)), DecimalType(37,20), true), 2) AS round((sat_sales1 / sat_sales2), 2)#86]
-Input [16]: [d_week_seq1#45, sun_sales1#46, mon_sales1#47, tue_sales1#48, wed_sales1#49, thu_sales1#50, fri_sales1#51, sat_sales1#52, d_week_seq2#71, sun_sales2#72, mon_sales2#73, tue_sales2#74, wed_sales2#75, thu_sales2#76, fri_sales2#77, sat_sales2#78]
+(39) Project [codegen id : 14]
+Output [8]: [d_week_seq1#45, round(CheckOverflow((promote_precision(sun_sales1#46) / promote_precision(sun_sales2#71)), DecimalType(37,20), true), 2) AS round((sun_sales1 / sun_sales2), 2)#79, round(CheckOverflow((promote_precision(mon_sales1#47) / promote_precision(mon_sales2#72)), DecimalType(37,20), true), 2) AS round((mon_sales1 / mon_sales2), 2)#80, round(CheckOverflow((promote_precision(tue_sales1#48) / promote_precision(tue_sales2#73)), DecimalType(37,20), true), 2) AS round((tue_sales1 / tue_sales2), 2)#81, round(CheckOverflow((promote_precision(wed_sales1#49) / promote_precision(wed_sales2#74)), DecimalType(37,20), true), 2) AS round((wed_sales1 / wed_sales2), 2)#82, round(CheckOverflow((promote_precision(thu_sales1#50) / promote_precision(thu_sales2#75)), DecimalType(37,20), true), 2) AS round((thu_sales1 / thu_sales2), 2)#83, round(CheckOverflow((promote_precision(fri_sales1#51) / promote_precision(fri_sales2#76)), DecimalType(37,20), true), 2) AS round((fri_sales1 / fri_sales2), 2)#84, round(CheckOverflow((promote_precision(sat_sales1#52) / promote_precision(sat_sales2#77)), DecimalType(37,20), true), 2) AS round((sat_sales1 / sat_sales2), 2)#85]
+Input [16]: [d_week_seq1#45, sun_sales1#46, mon_sales1#47, tue_sales1#48, wed_sales1#49, thu_sales1#50, fri_sales1#51, sat_sales1#52, d_week_seq2#70, sun_sales2#71, mon_sales2#72, tue_sales2#73, wed_sales2#74, thu_sales2#75, fri_sales2#76, sat_sales2#77]
 
-(41) Exchange
-Input [8]: [d_week_seq1#45, round((sun_sales1 / sun_sales2), 2)#80, round((mon_sales1 / mon_sales2), 2)#81, round((tue_sales1 / tue_sales2), 2)#82, round((wed_sales1 / wed_sales2), 2)#83, round((thu_sales1 / thu_sales2), 2)#84, round((fri_sales1 / fri_sales2), 2)#85, round((sat_sales1 / sat_sales2), 2)#86]
-Arguments: rangepartitioning(d_week_seq1#45 ASC NULLS FIRST, 5), true, [id=#87]
+(40) Exchange
+Input [8]: [d_week_seq1#45, round((sun_sales1 / sun_sales2), 2)#79, round((mon_sales1 / mon_sales2), 2)#80, round((tue_sales1 / tue_sales2), 2)#81, round((wed_sales1 / wed_sales2), 2)#82, round((thu_sales1 / thu_sales2), 2)#83, round((fri_sales1 / fri_sales2), 2)#84, round((sat_sales1 / sat_sales2), 2)#85]
+Arguments: rangepartitioning(d_week_seq1#45 ASC NULLS FIRST, 5), true, [id=#86]
 
-(42) Sort [codegen id : 16]
-Input [8]: [d_week_seq1#45, round((sun_sales1 / sun_sales2), 2)#80, round((mon_sales1 / mon_sales2), 2)#81, round((tue_sales1 / tue_sales2), 2)#82, round((wed_sales1 / wed_sales2), 2)#83, round((thu_sales1 / thu_sales2), 2)#84, round((fri_sales1 / fri_sales2), 2)#85, round((sat_sales1 / sat_sales2), 2)#86]
+(41) Sort [codegen id : 15]
+Input [8]: [d_week_seq1#45, round((sun_sales1 / sun_sales2), 2)#79, round((mon_sales1 / mon_sales2), 2)#80, round((tue_sales1 / tue_sales2), 2)#81, round((wed_sales1 / wed_sales2), 2)#82, round((thu_sales1 / thu_sales2), 2)#83, round((fri_sales1 / fri_sales2), 2)#84, round((sat_sales1 / sat_sales2), 2)#85]
 Arguments: [d_week_seq1#45 ASC NULLS FIRST], true, 0
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt
index 3df7e4c8e6f3f..3389774c46469 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt
@@ -1,68 +1,65 @@
-WholeStageCodegen (16)
+WholeStageCodegen (15)
   Sort [d_week_seq1]
     InputAdapter
       Exchange [d_week_seq1] #1
-        WholeStageCodegen (15)
+        WholeStageCodegen (14)
           Project [d_week_seq1,sun_sales1,sun_sales2,mon_sales1,mon_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2,thu_sales1,thu_sales2,fri_sales1,fri_sales2,sat_sales1,sat_sales2]
             SortMergeJoin [d_week_seq1,d_week_seq2]
               InputAdapter
-                WholeStageCodegen (7)
+                WholeStageCodegen (6)
                   Sort [d_week_seq1]
-                    InputAdapter
-                      Exchange [d_week_seq1] #2
-                        WholeStageCodegen (6)
-                          Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales]
-                            BroadcastHashJoin [d_week_seq,d_week_seq]
-                              HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum]
-                                InputAdapter
-                                  Exchange [d_week_seq] #3
-                                    WholeStageCodegen (4)
-                                      HashAggregate [d_week_seq,d_day_name,sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum]
-                                        Project [sales_price,d_week_seq,d_day_name]
-                                          BroadcastHashJoin [sold_date_sk,d_date_sk]
-                                            InputAdapter
-                                              Union
-                                                WholeStageCodegen (1)
-                                                  Project [ws_sold_date_sk,ws_ext_sales_price]
-                                                    Filter [ws_sold_date_sk]
-                                                      ColumnarToRow
-                                                        InputAdapter
-                                                          Scan parquet default.web_sales [ws_sold_date_sk,ws_ext_sales_price]
-                                                WholeStageCodegen (2)
-                                                  Project [cs_sold_date_sk,cs_ext_sales_price]
-                                                    Filter [cs_sold_date_sk]
-                                                      ColumnarToRow
-                                                        InputAdapter
-                                                          Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ext_sales_price]
-                                            InputAdapter
-                                              BroadcastExchange #4
-                                                WholeStageCodegen (3)
-                                                  Filter [d_date_sk,d_week_seq]
-                                                    ColumnarToRow
-                                                      InputAdapter
-                                                        Scan parquet default.date_dim [d_date_sk,d_week_seq,d_day_name]
-                              InputAdapter
-                                BroadcastExchange #5
-                                  WholeStageCodegen (5)
-                                    Project [d_week_seq]
-                                      Filter [d_year,d_week_seq]
-                                        ColumnarToRow
-                                          InputAdapter
-                                            Scan parquet default.date_dim [d_week_seq,d_year]
+                    Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales]
+                      BroadcastHashJoin [d_week_seq,d_week_seq]
+                        HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum]
+                          InputAdapter
+                            Exchange [d_week_seq] #2
+                              WholeStageCodegen (4)
+                                HashAggregate [d_week_seq,d_day_name,sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum]
+                                  Project [sales_price,d_week_seq,d_day_name]
+                                    BroadcastHashJoin [sold_date_sk,d_date_sk]
+                                      InputAdapter
+                                        Union
+                                          WholeStageCodegen (1)
+                                            Project [ws_sold_date_sk,ws_ext_sales_price]
+                                              Filter [ws_sold_date_sk]
+                                                ColumnarToRow
+                                                  InputAdapter
+                                                    Scan parquet default.web_sales [ws_sold_date_sk,ws_ext_sales_price]
+                                          WholeStageCodegen (2)
+                                            Project [cs_sold_date_sk,cs_ext_sales_price]
+                                              Filter [cs_sold_date_sk]
+                                                ColumnarToRow
+                                                  InputAdapter
+                                                    Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ext_sales_price]
+                                      InputAdapter
+                                        BroadcastExchange #3
+                                          WholeStageCodegen (3)
+                                            Filter [d_date_sk,d_week_seq]
+                                              ColumnarToRow
+                                                InputAdapter
+                                                  Scan parquet default.date_dim [d_date_sk,d_week_seq,d_day_name]
+                        InputAdapter
+                          BroadcastExchange #4
+                            WholeStageCodegen (5)
+                              Project [d_week_seq]
+                                Filter [d_year,d_week_seq]
+                                  ColumnarToRow
+                                    InputAdapter
+                                      Scan parquet default.date_dim [d_week_seq,d_year]
               InputAdapter
-                WholeStageCodegen (14)
+                WholeStageCodegen (13)
                   Sort [d_week_seq2]
                     InputAdapter
-                      Exchange [d_week_seq2] #6
-                        WholeStageCodegen (13)
+                      Exchange [d_week_seq2] #5
+                        WholeStageCodegen (12)
                           Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales]
                             BroadcastHashJoin [d_week_seq,d_week_seq]
                               HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum]
                                 InputAdapter
-                                  ReusedExchange [d_week_seq,sum,sum,sum,sum,sum,sum,sum] #3
+                                  ReusedExchange [d_week_seq,sum,sum,sum,sum,sum,sum,sum] #2
                               InputAdapter
-                                BroadcastExchange #7
-                                  WholeStageCodegen (12)
+                                BroadcastExchange #6
+                                  WholeStageCodegen (11)
                                     Project [d_week_seq]
                                       Filter [d_year,d_week_seq]
                                         ColumnarToRow
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt
index c5988072f758d..85f71b6cd9388 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt
@@ -1,96 +1,103 @@
 == Physical Plan ==
-CollectLimit (92)
-+- * HashAggregate (91)
-   +- Exchange (90)
-      +- * HashAggregate (89)
-         +- Union (88)
-            :- * Project (60)
-            :  +- * BroadcastHashJoin Inner BuildRight (59)
-            :     :- * Project (53)
-            :     :  +- SortMergeJoin LeftSemi (52)
-            :     :     :- * Sort (34)
-            :     :     :  +- Exchange (33)
-            :     :     :     +- * Project (32)
-            :     :     :        +- SortMergeJoin LeftSemi (31)
-            :     :     :           :- * Sort (5)
-            :     :     :           :  +- Exchange (4)
-            :     :     :           :     +- * Filter (3)
-            :     :     :           :        +- * ColumnarToRow (2)
-            :     :     :           :           +- Scan parquet default.catalog_sales (1)
-            :     :     :           +- * Sort (30)
-            :     :     :              +- Exchange (29)
-            :     :     :                 +- * Project (28)
-            :     :     :                    +- * Filter (27)
-            :     :     :                       +- * HashAggregate (26)
-            :     :     :                          +- * HashAggregate (25)
-            :     :     :                             +- * Project (24)
-            :     :     :                                +- * SortMergeJoin Inner (23)
-            :     :     :                                   :- * Sort (17)
-            :     :     :                                   :  +- Exchange (16)
-            :     :     :                                   :     +- * Project (15)
-            :     :     :                                   :        +- * BroadcastHashJoin Inner BuildRight (14)
-            :     :     :                                   :           :- * Filter (8)
-            :     :     :                                   :           :  +- * ColumnarToRow (7)
-            :     :     :                                   :           :     +- Scan parquet default.store_sales (6)
-            :     :     :                                   :           +- BroadcastExchange (13)
-            :     :     :                                   :              +- * Project (12)
-            :     :     :                                   :                 +- * Filter (11)
-            :     :     :                                   :                    +- * ColumnarToRow (10)
-            :     :     :                                   :                       +- Scan parquet default.date_dim (9)
-            :     :     :                                   +- * Sort (22)
-            :     :     :                                      +- Exchange (21)
-            :     :     :                                         +- * Filter (20)
-            :     :     :                                            +- * ColumnarToRow (19)
-            :     :     :                                               +- Scan parquet default.item (18)
-            :     :     +- * Sort (51)
-            :     :        +- * Project (50)
-            :     :           +- * Filter (49)
-            :     :              +- * HashAggregate (48)
-            :     :                 +- * HashAggregate (47)
-            :     :                    +- * Project (46)
-            :     :                       +- * SortMergeJoin Inner (45)
-            :     :                          :- * Sort (39)
-            :     :                          :  +- Exchange (38)
-            :     :                          :     +- * Filter (37)
-            :     :                          :        +- * ColumnarToRow (36)
-            :     :                          :           +- Scan parquet default.store_sales (35)
-            :     :                          +- * Sort (44)
-            :     :                             +- Exchange (43)
-            :     :                                +- * Filter (42)
-            :     :                                   +- * ColumnarToRow (41)
-            :     :                                      +- Scan parquet default.customer (40)
-            :     +- BroadcastExchange (58)
-            :        +- * Project (57)
-            :           +- * Filter (56)
-            :              +- * ColumnarToRow (55)
-            :                 +- Scan parquet default.date_dim (54)
-            +- * Project (87)
-               +- * BroadcastHashJoin Inner BuildRight (86)
-                  :- * Project (84)
-                  :  +- SortMergeJoin LeftSemi (83)
-                  :     :- * Sort (71)
-                  :     :  +- Exchange (70)
-                  :     :     +- * Project (69)
-                  :     :        +- SortMergeJoin LeftSemi (68)
-                  :     :           :- * Sort (65)
-                  :     :           :  +- Exchange (64)
-                  :     :           :     +- * Filter (63)
-                  :     :           :        +- * ColumnarToRow (62)
-                  :     :           :           +- Scan parquet default.web_sales (61)
-                  :     :           +- * Sort (67)
-                  :     :              +- ReusedExchange (66)
-                  :     +- * Sort (82)
-                  :        +- * Project (81)
-                  :           +- * Filter (80)
-                  :              +- * HashAggregate (79)
-                  :                 +- * HashAggregate (78)
-                  :                    +- * Project (77)
-                  :                       +- * SortMergeJoin Inner (76)
-                  :                          :- * Sort (73)
-                  :                          :  +- ReusedExchange (72)
-                  :                          +- * Sort (75)
-                  :                             +- ReusedExchange (74)
-                  +- ReusedExchange (85)
+* HashAggregate (99)
++- Exchange (98)
+   +- * HashAggregate (97)
+      +- Union (96)
+         :- * Project (59)
+         :  +- * BroadcastHashJoin Inner BuildRight (58)
+         :     :- * Project (52)
+         :     :  +- SortMergeJoin LeftSemi (51)
+         :     :     :- * Sort (33)
+         :     :     :  +- Exchange (32)
+         :     :     :     +- * Project (31)
+         :     :     :        +- SortMergeJoin LeftSemi (30)
+         :     :     :           :- * Sort (5)
+         :     :     :           :  +- Exchange (4)
+         :     :     :           :     +- * Filter (3)
+         :     :     :           :        +- * ColumnarToRow (2)
+         :     :     :           :           +- Scan parquet default.catalog_sales (1)
+         :     :     :           +- * Sort (29)
+         :     :     :              +- * Project (28)
+         :     :     :                 +- * Filter (27)
+         :     :     :                    +- * HashAggregate (26)
+         :     :     :                       +- * HashAggregate (25)
+         :     :     :                          +- * Project (24)
+         :     :     :                             +- * SortMergeJoin Inner (23)
+         :     :     :                                :- * Sort (17)
+         :     :     :                                :  +- Exchange (16)
+         :     :     :                                :     +- * Project (15)
+         :     :     :                                :        +- * BroadcastHashJoin Inner BuildRight (14)
+         :     :     :                                :           :- * Filter (8)
+         :     :     :                                :           :  +- * ColumnarToRow (7)
+         :     :     :                                :           :     +- Scan parquet default.store_sales (6)
+         :     :     :                                :           +- BroadcastExchange (13)
+         :     :     :                                :              +- * Project (12)
+         :     :     :                                :                 +- * Filter (11)
+         :     :     :                                :                    +- * ColumnarToRow (10)
+         :     :     :                                :                       +- Scan parquet default.date_dim (9)
+         :     :     :                                +- * Sort (22)
+         :     :     :                                   +- Exchange (21)
+         :     :     :                                      +- * Filter (20)
+         :     :     :                                         +- * ColumnarToRow (19)
+         :     :     :                                            +- Scan parquet default.item (18)
+         :     :     +- * Sort (50)
+         :     :        +- * Project (49)
+         :     :           +- * Filter (48)
+         :     :              +- * HashAggregate (47)
+         :     :                 +- * HashAggregate (46)
+         :     :                    +- * Project (45)
+         :     :                       +- * SortMergeJoin Inner (44)
+         :     :                          :- * Sort (38)
+         :     :                          :  +- Exchange (37)
+         :     :                          :     +- * Filter (36)
+         :     :                          :        +- * ColumnarToRow (35)
+         :     :                          :           +- Scan parquet default.store_sales (34)
+         :     :                          +- * Sort (43)
+         :     :                             +- Exchange (42)
+         :     :                                +- * Filter (41)
+         :     :                                   +- * ColumnarToRow (40)
+         :     :                                      +- Scan parquet default.customer (39)
+         :     +- BroadcastExchange (57)
+         :        +- * Project (56)
+         :           +- * Filter (55)
+         :              +- * ColumnarToRow (54)
+         :                 +- Scan parquet default.date_dim (53)
+         +- * Project (95)
+            +- * BroadcastHashJoin Inner BuildRight (94)
+               :- * Project (92)
+               :  +- SortMergeJoin LeftSemi (91)
+               :     :- * Sort (79)
+               :     :  +- Exchange (78)
+               :     :     +- * Project (77)
+               :     :        +- SortMergeJoin LeftSemi (76)
+               :     :           :- * Sort (64)
+               :     :           :  +- Exchange (63)
+               :     :           :     +- * Filter (62)
+               :     :           :        +- * ColumnarToRow (61)
+               :     :           :           +- Scan parquet default.web_sales (60)
+               :     :           +- * Sort (75)
+               :     :              +- * Project (74)
+               :     :                 +- * Filter (73)
+               :     :                    +- * HashAggregate (72)
+               :     :                       +- * HashAggregate (71)
+               :     :                          +- * Project (70)
+               :     :                             +- * SortMergeJoin Inner (69)
+               :     :                                :- * Sort (66)
+               :     :                                :  +- ReusedExchange (65)
+               :     :                                +- * Sort (68)
+               :     :                                   +- ReusedExchange (67)
+               :     +- * Sort (90)
+               :        +- * Project (89)
+               :           +- * Filter (88)
+               :              +- * HashAggregate (87)
+               :                 +- * HashAggregate (86)
+               :                    +- * Project (85)
+               :                       +- * SortMergeJoin Inner (84)
+               :                          :- * Sort (81)
+               :                          :  +- ReusedExchange (80)
+               :                          +- * Sort (83)
+               :                             +- ReusedExchange (82)
+               +- ReusedExchange (93)
 
 
 (1) Scan parquet default.catalog_sales
@@ -221,435 +228,469 @@ Condition : (count(1)#22 > 4)
 Output [1]: [item_sk#21]
 Input [2]: [item_sk#21, count(1)#22]
 
-(29) Exchange
-Input [1]: [item_sk#21]
-Arguments: hashpartitioning(item_sk#21, 5), true, [id=#23]
-
-(30) Sort [codegen id : 9]
+(29) Sort [codegen id : 8]
 Input [1]: [item_sk#21]
 Arguments: [item_sk#21 ASC NULLS FIRST], false, 0
 
-(31) SortMergeJoin
+(30) SortMergeJoin
 Left keys [1]: [cs_item_sk#3]
 Right keys [1]: [item_sk#21]
 Join condition: None
 
-(32) Project [codegen id : 10]
+(31) Project [codegen id : 9]
 Output [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5]
 Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5]
 
-(33) Exchange
+(32) Exchange
 Input [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5]
-Arguments: hashpartitioning(cs_bill_customer_sk#2, 5), true, [id=#24]
+Arguments: hashpartitioning(cs_bill_customer_sk#2, 5), true, [id=#23]
 
-(34) Sort [codegen id : 11]
+(33) Sort [codegen id : 10]
 Input [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5]
 Arguments: [cs_bill_customer_sk#2 ASC NULLS FIRST], false, 0
 
-(35) Scan parquet default.store_sales
-Output [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
+(34) Scan parquet default.store_sales
+Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
 PushedFilters: [IsNotNull(ss_customer_sk)]
 ReadSchema: struct<ss_customer_sk:int,ss_quantity:int,ss_sales_price:decimal(7,2)>
 
-(36) ColumnarToRow [codegen id : 12]
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
+(35) ColumnarToRow [codegen id : 11]
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 
-(37) Filter [codegen id : 12]
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Condition : isnotnull(ss_customer_sk#25)
+(36) Filter [codegen id : 11]
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Condition : isnotnull(ss_customer_sk#24)
 
-(38) Exchange
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Arguments: hashpartitioning(ss_customer_sk#25, 5), true, [id=#28]
+(37) Exchange
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Arguments: hashpartitioning(ss_customer_sk#24, 5), true, [id=#27]
 
-(39) Sort [codegen id : 13]
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0
+(38) Sort [codegen id : 12]
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Arguments: [ss_customer_sk#24 ASC NULLS FIRST], false, 0
 
-(40) Scan parquet default.customer
-Output [1]: [c_customer_sk#29]
+(39) Scan parquet default.customer
+Output [1]: [c_customer_sk#28]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
 PushedFilters: [IsNotNull(c_customer_sk)]
 ReadSchema: struct<c_customer_sk:int>
 
-(41) ColumnarToRow [codegen id : 14]
-Input [1]: [c_customer_sk#29]
+(40) ColumnarToRow [codegen id : 13]
+Input [1]: [c_customer_sk#28]
 
-(42) Filter [codegen id : 14]
-Input [1]: [c_customer_sk#29]
-Condition : isnotnull(c_customer_sk#29)
+(41) Filter [codegen id : 13]
+Input [1]: [c_customer_sk#28]
+Condition : isnotnull(c_customer_sk#28)
 
-(43) Exchange
-Input [1]: [c_customer_sk#29]
-Arguments: hashpartitioning(c_customer_sk#29, 5), true, [id=#30]
+(42) Exchange
+Input [1]: [c_customer_sk#28]
+Arguments: hashpartitioning(c_customer_sk#28, 5), true, [id=#29]
 
-(44) Sort [codegen id : 15]
-Input [1]: [c_customer_sk#29]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
+(43) Sort [codegen id : 14]
+Input [1]: [c_customer_sk#28]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
 
-(45) SortMergeJoin [codegen id : 16]
-Left keys [1]: [ss_customer_sk#25]
-Right keys [1]: [c_customer_sk#29]
+(44) SortMergeJoin [codegen id : 15]
+Left keys [1]: [ss_customer_sk#24]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(46) Project [codegen id : 16]
-Output [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Input [4]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-
-(47) HashAggregate [codegen id : 16]
-Input [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#31, isEmpty#32]
-Results [3]: [c_customer_sk#29, sum#33, isEmpty#34]
-
-(48) HashAggregate [codegen id : 16]
-Input [3]: [c_customer_sk#29, sum#33, isEmpty#34]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#35]
-Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#35 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36]
-
-(49) Filter [codegen id : 16]
-Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true)))
-
-(50) Project [codegen id : 16]
-Output [1]: [c_customer_sk#29]
-Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36]
-
-(51) Sort [codegen id : 16]
-Input [1]: [c_customer_sk#29]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
-
-(52) SortMergeJoin
+(45) Project [codegen id : 15]
+Output [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+
+(46) HashAggregate [codegen id : 15]
+Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#30, isEmpty#31]
+Results [3]: [c_customer_sk#28, sum#32, isEmpty#33]
+
+(47) HashAggregate [codegen id : 15]
+Input [3]: [c_customer_sk#28, sum#32, isEmpty#33]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#34]
+Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#34 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+
+(48) Filter [codegen id : 15]
+Input [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#36, [id=#37] as decimal(32,6)))), DecimalType(38,8), true)))
+
+(49) Project [codegen id : 15]
+Output [1]: [c_customer_sk#28]
+Input [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+
+(50) Sort [codegen id : 15]
+Input [1]: [c_customer_sk#28]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
+
+(51) SortMergeJoin
 Left keys [1]: [cs_bill_customer_sk#2]
-Right keys [1]: [c_customer_sk#29]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(53) Project [codegen id : 18]
+(52) Project [codegen id : 17]
 Output [3]: [cs_sold_date_sk#1, cs_quantity#4, cs_list_price#5]
 Input [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5]
 
-(54) Scan parquet default.date_dim
-Output [3]: [d_date_sk#9, d_year#11, d_moy#39]
+(53) Scan parquet default.date_dim
+Output [3]: [d_date_sk#9, d_year#11, d_moy#38]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
-(55) ColumnarToRow [codegen id : 17]
-Input [3]: [d_date_sk#9, d_year#11, d_moy#39]
+(54) ColumnarToRow [codegen id : 16]
+Input [3]: [d_date_sk#9, d_year#11, d_moy#38]
 
-(56) Filter [codegen id : 17]
-Input [3]: [d_date_sk#9, d_year#11, d_moy#39]
-Condition : ((((isnotnull(d_year#11) AND isnotnull(d_moy#39)) AND (d_year#11 = 2000)) AND (d_moy#39 = 2)) AND isnotnull(d_date_sk#9))
+(55) Filter [codegen id : 16]
+Input [3]: [d_date_sk#9, d_year#11, d_moy#38]
+Condition : ((((isnotnull(d_year#11) AND isnotnull(d_moy#38)) AND (d_year#11 = 2000)) AND (d_moy#38 = 2)) AND isnotnull(d_date_sk#9))
 
-(57) Project [codegen id : 17]
+(56) Project [codegen id : 16]
 Output [1]: [d_date_sk#9]
-Input [3]: [d_date_sk#9, d_year#11, d_moy#39]
+Input [3]: [d_date_sk#9, d_year#11, d_moy#38]
 
-(58) BroadcastExchange
+(57) BroadcastExchange
 Input [1]: [d_date_sk#9]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#40]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#39]
 
-(59) BroadcastHashJoin [codegen id : 18]
+(58) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [cs_sold_date_sk#1]
 Right keys [1]: [d_date_sk#9]
 Join condition: None
 
-(60) Project [codegen id : 18]
-Output [1]: [CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true) AS sales#41]
+(59) Project [codegen id : 17]
+Output [1]: [CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true) AS sales#40]
 Input [4]: [cs_sold_date_sk#1, cs_quantity#4, cs_list_price#5, d_date_sk#9]
 
-(61) Scan parquet default.web_sales
-Output [5]: [ws_sold_date_sk#42, ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46]
+(60) Scan parquet default.web_sales
+Output [5]: [ws_sold_date_sk#41, ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
 PushedFilters: [IsNotNull(ws_sold_date_sk)]
 ReadSchema: struct<ws_sold_date_sk:int,ws_item_sk:int,ws_bill_customer_sk:int,ws_quantity:int,ws_list_price:decimal(7,2)>
 
-(62) ColumnarToRow [codegen id : 19]
-Input [5]: [ws_sold_date_sk#42, ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46]
+(61) ColumnarToRow [codegen id : 18]
+Input [5]: [ws_sold_date_sk#41, ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45]
+
+(62) Filter [codegen id : 18]
+Input [5]: [ws_sold_date_sk#41, ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45]
+Condition : isnotnull(ws_sold_date_sk#41)
+
+(63) Exchange
+Input [5]: [ws_sold_date_sk#41, ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45]
+Arguments: hashpartitioning(ws_item_sk#42, 5), true, [id=#46]
+
+(64) Sort [codegen id : 19]
+Input [5]: [ws_sold_date_sk#41, ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45]
+Arguments: [ws_item_sk#42 ASC NULLS FIRST], false, 0
+
+(65) ReusedExchange [Reuses operator id: 16]
+Output [2]: [ss_item_sk#8, d_date#10]
+
+(66) Sort [codegen id : 22]
+Input [2]: [ss_item_sk#8, d_date#10]
+Arguments: [ss_item_sk#8 ASC NULLS FIRST], false, 0
+
+(67) ReusedExchange [Reuses operator id: 21]
+Output [2]: [i_item_sk#14, i_item_desc#15]
+
+(68) Sort [codegen id : 24]
+Input [2]: [i_item_sk#14, i_item_desc#15]
+Arguments: [i_item_sk#14 ASC NULLS FIRST], false, 0
+
+(69) SortMergeJoin [codegen id : 25]
+Left keys [1]: [ss_item_sk#8]
+Right keys [1]: [i_item_sk#14]
+Join condition: None
+
+(70) Project [codegen id : 25]
+Output [3]: [d_date#10, i_item_sk#14, i_item_desc#15]
+Input [4]: [ss_item_sk#8, d_date#10, i_item_sk#14, i_item_desc#15]
 
-(63) Filter [codegen id : 19]
-Input [5]: [ws_sold_date_sk#42, ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46]
-Condition : isnotnull(ws_sold_date_sk#42)
+(71) HashAggregate [codegen id : 25]
+Input [3]: [d_date#10, i_item_sk#14, i_item_desc#15]
+Keys [3]: [substr(i_item_desc#15, 1, 30) AS substr(i_item_desc#15, 1, 30)#47, i_item_sk#14, d_date#10]
+Functions [1]: [partial_count(1)]
+Aggregate Attributes [1]: [count#48]
+Results [4]: [substr(i_item_desc#15, 1, 30)#47, i_item_sk#14, d_date#10, count#49]
 
-(64) Exchange
-Input [5]: [ws_sold_date_sk#42, ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46]
-Arguments: hashpartitioning(ws_item_sk#43, 5), true, [id=#47]
+(72) HashAggregate [codegen id : 25]
+Input [4]: [substr(i_item_desc#15, 1, 30)#47, i_item_sk#14, d_date#10, count#49]
+Keys [3]: [substr(i_item_desc#15, 1, 30)#47, i_item_sk#14, d_date#10]
+Functions [1]: [count(1)]
+Aggregate Attributes [1]: [count(1)#50]
+Results [2]: [i_item_sk#14 AS item_sk#21, count(1)#50 AS count(1)#51]
 
-(65) Sort [codegen id : 20]
-Input [5]: [ws_sold_date_sk#42, ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46]
-Arguments: [ws_item_sk#43 ASC NULLS FIRST], false, 0
+(73) Filter [codegen id : 25]
+Input [2]: [item_sk#21, count(1)#51]
+Condition : (count(1)#51 > 4)
 
-(66) ReusedExchange [Reuses operator id: 29]
+(74) Project [codegen id : 25]
 Output [1]: [item_sk#21]
+Input [2]: [item_sk#21, count(1)#51]
 
-(67) Sort [codegen id : 27]
+(75) Sort [codegen id : 25]
 Input [1]: [item_sk#21]
 Arguments: [item_sk#21 ASC NULLS FIRST], false, 0
 
-(68) SortMergeJoin
-Left keys [1]: [ws_item_sk#43]
+(76) SortMergeJoin
+Left keys [1]: [ws_item_sk#42]
 Right keys [1]: [item_sk#21]
 Join condition: None
 
-(69) Project [codegen id : 28]
-Output [4]: [ws_sold_date_sk#42, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46]
-Input [5]: [ws_sold_date_sk#42, ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46]
+(77) Project [codegen id : 26]
+Output [4]: [ws_sold_date_sk#41, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45]
+Input [5]: [ws_sold_date_sk#41, ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45]
 
-(70) Exchange
-Input [4]: [ws_sold_date_sk#42, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46]
-Arguments: hashpartitioning(ws_bill_customer_sk#44, 5), true, [id=#48]
+(78) Exchange
+Input [4]: [ws_sold_date_sk#41, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45]
+Arguments: hashpartitioning(ws_bill_customer_sk#43, 5), true, [id=#52]
 
-(71) Sort [codegen id : 29]
-Input [4]: [ws_sold_date_sk#42, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46]
-Arguments: [ws_bill_customer_sk#44 ASC NULLS FIRST], false, 0
+(79) Sort [codegen id : 27]
+Input [4]: [ws_sold_date_sk#41, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45]
+Arguments: [ws_bill_customer_sk#43 ASC NULLS FIRST], false, 0
 
-(72) ReusedExchange [Reuses operator id: 38]
-Output [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
+(80) ReusedExchange [Reuses operator id: 37]
+Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 
-(73) Sort [codegen id : 31]
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0
+(81) Sort [codegen id : 29]
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Arguments: [ss_customer_sk#24 ASC NULLS FIRST], false, 0
 
-(74) ReusedExchange [Reuses operator id: 43]
-Output [1]: [c_customer_sk#29]
+(82) ReusedExchange [Reuses operator id: 42]
+Output [1]: [c_customer_sk#28]
 
-(75) Sort [codegen id : 33]
-Input [1]: [c_customer_sk#29]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
+(83) Sort [codegen id : 31]
+Input [1]: [c_customer_sk#28]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
 
-(76) SortMergeJoin [codegen id : 34]
-Left keys [1]: [ss_customer_sk#25]
-Right keys [1]: [c_customer_sk#29]
+(84) SortMergeJoin [codegen id : 32]
+Left keys [1]: [ss_customer_sk#24]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(77) Project [codegen id : 34]
-Output [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Input [4]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-
-(78) HashAggregate [codegen id : 34]
-Input [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#49, isEmpty#50]
-Results [3]: [c_customer_sk#29, sum#51, isEmpty#52]
-
-(79) HashAggregate [codegen id : 34]
-Input [3]: [c_customer_sk#29, sum#51, isEmpty#52]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#53]
-Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#53 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#54]
-
-(80) Filter [codegen id : 34]
-Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#54]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#54) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#54 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true)))
-
-(81) Project [codegen id : 34]
-Output [1]: [c_customer_sk#29]
-Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#54]
-
-(82) Sort [codegen id : 34]
-Input [1]: [c_customer_sk#29]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
-
-(83) SortMergeJoin
-Left keys [1]: [ws_bill_customer_sk#44]
-Right keys [1]: [c_customer_sk#29]
+(85) Project [codegen id : 32]
+Output [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+
+(86) HashAggregate [codegen id : 32]
+Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#53, isEmpty#54]
+Results [3]: [c_customer_sk#28, sum#55, isEmpty#56]
+
+(87) HashAggregate [codegen id : 32]
+Input [3]: [c_customer_sk#28, sum#55, isEmpty#56]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#57]
+Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#57 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#58]
+
+(88) Filter [codegen id : 32]
+Input [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#58]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#58) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#58 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#36, [id=#37] as decimal(32,6)))), DecimalType(38,8), true)))
+
+(89) Project [codegen id : 32]
+Output [1]: [c_customer_sk#28]
+Input [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#58]
+
+(90) Sort [codegen id : 32]
+Input [1]: [c_customer_sk#28]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
+
+(91) SortMergeJoin
+Left keys [1]: [ws_bill_customer_sk#43]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(84) Project [codegen id : 36]
-Output [3]: [ws_sold_date_sk#42, ws_quantity#45, ws_list_price#46]
-Input [4]: [ws_sold_date_sk#42, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46]
+(92) Project [codegen id : 34]
+Output [3]: [ws_sold_date_sk#41, ws_quantity#44, ws_list_price#45]
+Input [4]: [ws_sold_date_sk#41, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45]
 
-(85) ReusedExchange [Reuses operator id: 58]
+(93) ReusedExchange [Reuses operator id: 57]
 Output [1]: [d_date_sk#9]
 
-(86) BroadcastHashJoin [codegen id : 36]
-Left keys [1]: [ws_sold_date_sk#42]
+(94) BroadcastHashJoin [codegen id : 34]
+Left keys [1]: [ws_sold_date_sk#41]
 Right keys [1]: [d_date_sk#9]
 Join condition: None
 
-(87) Project [codegen id : 36]
-Output [1]: [CheckOverflow((promote_precision(cast(cast(ws_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#46 as decimal(12,2)))), DecimalType(18,2), true) AS sales#55]
-Input [4]: [ws_sold_date_sk#42, ws_quantity#45, ws_list_price#46, d_date_sk#9]
+(95) Project [codegen id : 34]
+Output [1]: [CheckOverflow((promote_precision(cast(cast(ws_quantity#44 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#45 as decimal(12,2)))), DecimalType(18,2), true) AS sales#59]
+Input [4]: [ws_sold_date_sk#41, ws_quantity#44, ws_list_price#45, d_date_sk#9]
 
-(88) Union
+(96) Union
 
-(89) HashAggregate [codegen id : 37]
-Input [1]: [sales#41]
+(97) HashAggregate [codegen id : 35]
+Input [1]: [sales#40]
 Keys: []
-Functions [1]: [partial_sum(sales#41)]
-Aggregate Attributes [2]: [sum#56, isEmpty#57]
-Results [2]: [sum#58, isEmpty#59]
+Functions [1]: [partial_sum(sales#40)]
+Aggregate Attributes [2]: [sum#60, isEmpty#61]
+Results [2]: [sum#62, isEmpty#63]
 
-(90) Exchange
-Input [2]: [sum#58, isEmpty#59]
-Arguments: SinglePartition, true, [id=#60]
+(98) Exchange
+Input [2]: [sum#62, isEmpty#63]
+Arguments: SinglePartition, true, [id=#64]
 
-(91) HashAggregate [codegen id : 38]
-Input [2]: [sum#58, isEmpty#59]
+(99) HashAggregate [codegen id : 36]
+Input [2]: [sum#62, isEmpty#63]
 Keys: []
-Functions [1]: [sum(sales#41)]
-Aggregate Attributes [1]: [sum(sales#41)#61]
-Results [1]: [sum(sales#41)#61 AS sum(sales)#62]
-
-(92) CollectLimit
-Input [1]: [sum(sales)#62]
-Arguments: 100
+Functions [1]: [sum(sales#40)]
+Aggregate Attributes [1]: [sum(sales#40)#65]
+Results [1]: [sum(sales#40)#65 AS sum(sales)#66]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 49 Hosting Expression = Subquery scalar-subquery#37, [id=#38]
-* HashAggregate (116)
-+- Exchange (115)
-   +- * HashAggregate (114)
-      +- * HashAggregate (113)
-         +- * HashAggregate (112)
-            +- * Project (111)
-               +- * SortMergeJoin Inner (110)
-                  :- * Sort (104)
-                  :  +- Exchange (103)
-                  :     +- * Project (102)
-                  :        +- * BroadcastHashJoin Inner BuildRight (101)
-                  :           :- * Filter (95)
-                  :           :  +- * ColumnarToRow (94)
-                  :           :     +- Scan parquet default.store_sales (93)
-                  :           +- BroadcastExchange (100)
-                  :              +- * Project (99)
-                  :                 +- * Filter (98)
-                  :                    +- * ColumnarToRow (97)
-                  :                       +- Scan parquet default.date_dim (96)
-                  +- * Sort (109)
-                     +- Exchange (108)
-                        +- * Filter (107)
-                           +- * ColumnarToRow (106)
-                              +- Scan parquet default.customer (105)
-
-
-(93) Scan parquet default.store_sales
-Output [4]: [ss_sold_date_sk#7, ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
+Subquery:1 Hosting operator id = 48 Hosting Expression = Subquery scalar-subquery#36, [id=#37]
+* HashAggregate (123)
++- Exchange (122)
+   +- * HashAggregate (121)
+      +- * HashAggregate (120)
+         +- * HashAggregate (119)
+            +- * Project (118)
+               +- * SortMergeJoin Inner (117)
+                  :- * Sort (111)
+                  :  +- Exchange (110)
+                  :     +- * Project (109)
+                  :        +- * BroadcastHashJoin Inner BuildRight (108)
+                  :           :- * Filter (102)
+                  :           :  +- * ColumnarToRow (101)
+                  :           :     +- Scan parquet default.store_sales (100)
+                  :           +- BroadcastExchange (107)
+                  :              +- * Project (106)
+                  :                 +- * Filter (105)
+                  :                    +- * ColumnarToRow (104)
+                  :                       +- Scan parquet default.date_dim (103)
+                  +- * Sort (116)
+                     +- Exchange (115)
+                        +- * Filter (114)
+                           +- * ColumnarToRow (113)
+                              +- Scan parquet default.customer (112)
+
+
+(100) Scan parquet default.store_sales
+Output [4]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
 PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)]
 ReadSchema: struct<ss_sold_date_sk:int,ss_customer_sk:int,ss_quantity:int,ss_sales_price:decimal(7,2)>
 
-(94) ColumnarToRow [codegen id : 2]
-Input [4]: [ss_sold_date_sk#7, ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
+(101) ColumnarToRow [codegen id : 2]
+Input [4]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 
-(95) Filter [codegen id : 2]
-Input [4]: [ss_sold_date_sk#7, ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Condition : (isnotnull(ss_customer_sk#25) AND isnotnull(ss_sold_date_sk#7))
+(102) Filter [codegen id : 2]
+Input [4]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Condition : (isnotnull(ss_customer_sk#24) AND isnotnull(ss_sold_date_sk#7))
 
-(96) Scan parquet default.date_dim
+(103) Scan parquet default.date_dim
 Output [2]: [d_date_sk#9, d_year#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(97) ColumnarToRow [codegen id : 1]
+(104) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#9, d_year#11]
 
-(98) Filter [codegen id : 1]
+(105) Filter [codegen id : 1]
 Input [2]: [d_date_sk#9, d_year#11]
 Condition : (d_year#11 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#9))
 
-(99) Project [codegen id : 1]
+(106) Project [codegen id : 1]
 Output [1]: [d_date_sk#9]
 Input [2]: [d_date_sk#9, d_year#11]
 
-(100) BroadcastExchange
+(107) BroadcastExchange
 Input [1]: [d_date_sk#9]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#63]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#67]
 
-(101) BroadcastHashJoin [codegen id : 2]
+(108) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#7]
 Right keys [1]: [d_date_sk#9]
 Join condition: None
 
-(102) Project [codegen id : 2]
-Output [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Input [5]: [ss_sold_date_sk#7, ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, d_date_sk#9]
+(109) Project [codegen id : 2]
+Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Input [5]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, d_date_sk#9]
 
-(103) Exchange
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Arguments: hashpartitioning(ss_customer_sk#25, 5), true, [id=#64]
+(110) Exchange
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Arguments: hashpartitioning(ss_customer_sk#24, 5), true, [id=#68]
 
-(104) Sort [codegen id : 3]
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0
+(111) Sort [codegen id : 3]
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Arguments: [ss_customer_sk#24 ASC NULLS FIRST], false, 0
 
-(105) Scan parquet default.customer
-Output [1]: [c_customer_sk#29]
+(112) Scan parquet default.customer
+Output [1]: [c_customer_sk#28]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
 PushedFilters: [IsNotNull(c_customer_sk)]
 ReadSchema: struct<c_customer_sk:int>
 
-(106) ColumnarToRow [codegen id : 4]
-Input [1]: [c_customer_sk#29]
+(113) ColumnarToRow [codegen id : 4]
+Input [1]: [c_customer_sk#28]
 
-(107) Filter [codegen id : 4]
-Input [1]: [c_customer_sk#29]
-Condition : isnotnull(c_customer_sk#29)
+(114) Filter [codegen id : 4]
+Input [1]: [c_customer_sk#28]
+Condition : isnotnull(c_customer_sk#28)
 
-(108) Exchange
-Input [1]: [c_customer_sk#29]
-Arguments: hashpartitioning(c_customer_sk#29, 5), true, [id=#65]
+(115) Exchange
+Input [1]: [c_customer_sk#28]
+Arguments: hashpartitioning(c_customer_sk#28, 5), true, [id=#69]
 
-(109) Sort [codegen id : 5]
-Input [1]: [c_customer_sk#29]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
+(116) Sort [codegen id : 5]
+Input [1]: [c_customer_sk#28]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
 
-(110) SortMergeJoin [codegen id : 6]
-Left keys [1]: [ss_customer_sk#25]
-Right keys [1]: [c_customer_sk#29]
+(117) SortMergeJoin [codegen id : 6]
+Left keys [1]: [ss_customer_sk#24]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(111) Project [codegen id : 6]
-Output [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Input [4]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-
-(112) HashAggregate [codegen id : 6]
-Input [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#66, isEmpty#67]
-Results [3]: [c_customer_sk#29, sum#68, isEmpty#69]
-
-(113) HashAggregate [codegen id : 6]
-Input [3]: [c_customer_sk#29, sum#68, isEmpty#69]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#70]
-Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#70 AS csales#71]
-
-(114) HashAggregate [codegen id : 6]
-Input [1]: [csales#71]
+(118) Project [codegen id : 6]
+Output [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+
+(119) HashAggregate [codegen id : 6]
+Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#70, isEmpty#71]
+Results [3]: [c_customer_sk#28, sum#72, isEmpty#73]
+
+(120) HashAggregate [codegen id : 6]
+Input [3]: [c_customer_sk#28, sum#72, isEmpty#73]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#74]
+Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#74 AS csales#75]
+
+(121) HashAggregate [codegen id : 6]
+Input [1]: [csales#75]
 Keys: []
-Functions [1]: [partial_max(csales#71)]
-Aggregate Attributes [1]: [max#72]
-Results [1]: [max#73]
+Functions [1]: [partial_max(csales#75)]
+Aggregate Attributes [1]: [max#76]
+Results [1]: [max#77]
 
-(115) Exchange
-Input [1]: [max#73]
-Arguments: SinglePartition, true, [id=#74]
+(122) Exchange
+Input [1]: [max#77]
+Arguments: SinglePartition, true, [id=#78]
 
-(116) HashAggregate [codegen id : 7]
-Input [1]: [max#73]
+(123) HashAggregate [codegen id : 7]
+Input [1]: [max#77]
 Keys: []
-Functions [1]: [max(csales#71)]
-Aggregate Attributes [1]: [max(csales#71)#75]
-Results [1]: [max(csales#71)#75 AS tpcds_cmax#76]
+Functions [1]: [max(csales#75)]
+Aggregate Attributes [1]: [max(csales#75)#79]
+Results [1]: [max(csales#75)#79 AS tpcds_cmax#80]
 
-Subquery:2 Hosting operator id = 80 Hosting Expression = ReusedSubquery Subquery scalar-subquery#37, [id=#38]
+Subquery:2 Hosting operator id = 88 Hosting Expression = ReusedSubquery Subquery scalar-subquery#36, [id=#37]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt
index 9ee444cdd988c..5bb8bc5b99d0c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt
@@ -1,198 +1,208 @@
-CollectLimit
-  WholeStageCodegen (38)
-    HashAggregate [sum,isEmpty] [sum(sales),sum(sales),sum,isEmpty]
-      InputAdapter
-        Exchange #1
-          WholeStageCodegen (37)
-            HashAggregate [sales] [sum,isEmpty,sum,isEmpty]
-              InputAdapter
-                Union
-                  WholeStageCodegen (18)
-                    Project [cs_quantity,cs_list_price]
-                      BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                        Project [cs_sold_date_sk,cs_quantity,cs_list_price]
-                          InputAdapter
-                            SortMergeJoin [cs_bill_customer_sk,c_customer_sk]
-                              WholeStageCodegen (11)
-                                Sort [cs_bill_customer_sk]
-                                  InputAdapter
-                                    Exchange [cs_bill_customer_sk] #2
-                                      WholeStageCodegen (10)
-                                        Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price]
-                                          InputAdapter
-                                            SortMergeJoin [cs_item_sk,item_sk]
-                                              WholeStageCodegen (2)
-                                                Sort [cs_item_sk]
-                                                  InputAdapter
-                                                    Exchange [cs_item_sk] #3
-                                                      WholeStageCodegen (1)
-                                                        Filter [cs_sold_date_sk]
-                                                          ColumnarToRow
-                                                            InputAdapter
-                                                              Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price]
-                                              WholeStageCodegen (9)
-                                                Sort [item_sk]
-                                                  InputAdapter
-                                                    Exchange [item_sk] #4
-                                                      WholeStageCodegen (8)
-                                                        Project [item_sk]
-                                                          Filter [count(1)]
-                                                            HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count]
-                                                              HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count]
-                                                                Project [d_date,i_item_sk,i_item_desc]
-                                                                  SortMergeJoin [ss_item_sk,i_item_sk]
-                                                                    InputAdapter
-                                                                      WholeStageCodegen (5)
-                                                                        Sort [ss_item_sk]
-                                                                          InputAdapter
-                                                                            Exchange [ss_item_sk] #5
-                                                                              WholeStageCodegen (4)
-                                                                                Project [ss_item_sk,d_date]
-                                                                                  BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                    Filter [ss_sold_date_sk,ss_item_sk]
-                                                                                      ColumnarToRow
-                                                                                        InputAdapter
-                                                                                          Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk]
-                                                                                    InputAdapter
-                                                                                      BroadcastExchange #6
-                                                                                        WholeStageCodegen (3)
-                                                                                          Project [d_date_sk,d_date]
-                                                                                            Filter [d_year,d_date_sk]
-                                                                                              ColumnarToRow
-                                                                                                InputAdapter
-                                                                                                  Scan parquet default.date_dim [d_date_sk,d_date,d_year]
-                                                                    InputAdapter
-                                                                      WholeStageCodegen (7)
-                                                                        Sort [i_item_sk]
-                                                                          InputAdapter
-                                                                            Exchange [i_item_sk] #7
-                                                                              WholeStageCodegen (6)
-                                                                                Filter [i_item_sk]
-                                                                                  ColumnarToRow
-                                                                                    InputAdapter
-                                                                                      Scan parquet default.item [i_item_sk,i_item_desc]
-                              WholeStageCodegen (16)
-                                Sort [c_customer_sk]
-                                  Project [c_customer_sk]
-                                    Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                      Subquery #1
-                                        WholeStageCodegen (7)
-                                          HashAggregate [max] [max(csales),tpcds_cmax,max]
-                                            InputAdapter
-                                              Exchange #10
-                                                WholeStageCodegen (6)
-                                                  HashAggregate [csales] [max,max]
-                                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty]
-                                                      HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
-                                                        Project [ss_quantity,ss_sales_price,c_customer_sk]
-                                                          SortMergeJoin [ss_customer_sk,c_customer_sk]
+WholeStageCodegen (36)
+  HashAggregate [sum,isEmpty] [sum(sales),sum(sales),sum,isEmpty]
+    InputAdapter
+      Exchange #1
+        WholeStageCodegen (35)
+          HashAggregate [sales] [sum,isEmpty,sum,isEmpty]
+            InputAdapter
+              Union
+                WholeStageCodegen (17)
+                  Project [cs_quantity,cs_list_price]
+                    BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                      Project [cs_sold_date_sk,cs_quantity,cs_list_price]
+                        InputAdapter
+                          SortMergeJoin [cs_bill_customer_sk,c_customer_sk]
+                            WholeStageCodegen (10)
+                              Sort [cs_bill_customer_sk]
+                                InputAdapter
+                                  Exchange [cs_bill_customer_sk] #2
+                                    WholeStageCodegen (9)
+                                      Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price]
+                                        InputAdapter
+                                          SortMergeJoin [cs_item_sk,item_sk]
+                                            WholeStageCodegen (2)
+                                              Sort [cs_item_sk]
+                                                InputAdapter
+                                                  Exchange [cs_item_sk] #3
+                                                    WholeStageCodegen (1)
+                                                      Filter [cs_sold_date_sk]
+                                                        ColumnarToRow
+                                                          InputAdapter
+                                                            Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price]
+                                            WholeStageCodegen (8)
+                                              Sort [item_sk]
+                                                Project [item_sk]
+                                                  Filter [count(1)]
+                                                    HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count]
+                                                      HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count]
+                                                        Project [d_date,i_item_sk,i_item_desc]
+                                                          SortMergeJoin [ss_item_sk,i_item_sk]
                                                             InputAdapter
-                                                              WholeStageCodegen (3)
-                                                                Sort [ss_customer_sk]
+                                                              WholeStageCodegen (5)
+                                                                Sort [ss_item_sk]
                                                                   InputAdapter
-                                                                    Exchange [ss_customer_sk] #11
-                                                                      WholeStageCodegen (2)
-                                                                        Project [ss_customer_sk,ss_quantity,ss_sales_price]
+                                                                    Exchange [ss_item_sk] #4
+                                                                      WholeStageCodegen (4)
+                                                                        Project [ss_item_sk,d_date]
                                                                           BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                            Filter [ss_customer_sk,ss_sold_date_sk]
+                                                                            Filter [ss_sold_date_sk,ss_item_sk]
                                                                               ColumnarToRow
                                                                                 InputAdapter
-                                                                                  Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price]
+                                                                                  Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk]
                                                                             InputAdapter
-                                                                              BroadcastExchange #12
-                                                                                WholeStageCodegen (1)
-                                                                                  Project [d_date_sk]
+                                                                              BroadcastExchange #5
+                                                                                WholeStageCodegen (3)
+                                                                                  Project [d_date_sk,d_date]
                                                                                     Filter [d_year,d_date_sk]
                                                                                       ColumnarToRow
                                                                                         InputAdapter
-                                                                                          Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                          Scan parquet default.date_dim [d_date_sk,d_date,d_year]
                                                             InputAdapter
-                                                              WholeStageCodegen (5)
-                                                                Sort [c_customer_sk]
+                                                              WholeStageCodegen (7)
+                                                                Sort [i_item_sk]
                                                                   InputAdapter
-                                                                    Exchange [c_customer_sk] #13
-                                                                      WholeStageCodegen (4)
-                                                                        Filter [c_customer_sk]
+                                                                    Exchange [i_item_sk] #6
+                                                                      WholeStageCodegen (6)
+                                                                        Filter [i_item_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.customer [c_customer_sk]
-                                      HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty]
-                                        HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
-                                          Project [ss_quantity,ss_sales_price,c_customer_sk]
-                                            SortMergeJoin [ss_customer_sk,c_customer_sk]
-                                              InputAdapter
-                                                WholeStageCodegen (13)
-                                                  Sort [ss_customer_sk]
-                                                    InputAdapter
-                                                      Exchange [ss_customer_sk] #8
-                                                        WholeStageCodegen (12)
-                                                          Filter [ss_customer_sk]
-                                                            ColumnarToRow
-                                                              InputAdapter
-                                                                Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price]
-                                              InputAdapter
-                                                WholeStageCodegen (15)
-                                                  Sort [c_customer_sk]
-                                                    InputAdapter
-                                                      Exchange [c_customer_sk] #9
-                                                        WholeStageCodegen (14)
-                                                          Filter [c_customer_sk]
-                                                            ColumnarToRow
-                                                              InputAdapter
-                                                                Scan parquet default.customer [c_customer_sk]
-                        InputAdapter
-                          BroadcastExchange #14
-                            WholeStageCodegen (17)
-                              Project [d_date_sk]
-                                Filter [d_year,d_moy,d_date_sk]
-                                  ColumnarToRow
-                                    InputAdapter
-                                      Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
-                  WholeStageCodegen (36)
-                    Project [ws_quantity,ws_list_price]
-                      BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                        Project [ws_sold_date_sk,ws_quantity,ws_list_price]
-                          InputAdapter
-                            SortMergeJoin [ws_bill_customer_sk,c_customer_sk]
-                              WholeStageCodegen (29)
-                                Sort [ws_bill_customer_sk]
-                                  InputAdapter
-                                    Exchange [ws_bill_customer_sk] #15
-                                      WholeStageCodegen (28)
-                                        Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price]
+                                                                              Scan parquet default.item [i_item_sk,i_item_desc]
+                            WholeStageCodegen (15)
+                              Sort [c_customer_sk]
+                                Project [c_customer_sk]
+                                  Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                    Subquery #1
+                                      WholeStageCodegen (7)
+                                        HashAggregate [max] [max(csales),tpcds_cmax,max]
                                           InputAdapter
-                                            SortMergeJoin [ws_item_sk,item_sk]
-                                              WholeStageCodegen (20)
-                                                Sort [ws_item_sk]
+                                            Exchange #9
+                                              WholeStageCodegen (6)
+                                                HashAggregate [csales] [max,max]
+                                                  HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty]
+                                                    HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
+                                                      Project [ss_quantity,ss_sales_price,c_customer_sk]
+                                                        SortMergeJoin [ss_customer_sk,c_customer_sk]
+                                                          InputAdapter
+                                                            WholeStageCodegen (3)
+                                                              Sort [ss_customer_sk]
+                                                                InputAdapter
+                                                                  Exchange [ss_customer_sk] #10
+                                                                    WholeStageCodegen (2)
+                                                                      Project [ss_customer_sk,ss_quantity,ss_sales_price]
+                                                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                          Filter [ss_customer_sk,ss_sold_date_sk]
+                                                                            ColumnarToRow
+                                                                              InputAdapter
+                                                                                Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price]
+                                                                          InputAdapter
+                                                                            BroadcastExchange #11
+                                                                              WholeStageCodegen (1)
+                                                                                Project [d_date_sk]
+                                                                                  Filter [d_year,d_date_sk]
+                                                                                    ColumnarToRow
+                                                                                      InputAdapter
+                                                                                        Scan parquet default.date_dim [d_date_sk,d_year]
+                                                          InputAdapter
+                                                            WholeStageCodegen (5)
+                                                              Sort [c_customer_sk]
+                                                                InputAdapter
+                                                                  Exchange [c_customer_sk] #12
+                                                                    WholeStageCodegen (4)
+                                                                      Filter [c_customer_sk]
+                                                                        ColumnarToRow
+                                                                          InputAdapter
+                                                                            Scan parquet default.customer [c_customer_sk]
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty]
+                                      HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
+                                        Project [ss_quantity,ss_sales_price,c_customer_sk]
+                                          SortMergeJoin [ss_customer_sk,c_customer_sk]
+                                            InputAdapter
+                                              WholeStageCodegen (12)
+                                                Sort [ss_customer_sk]
                                                   InputAdapter
-                                                    Exchange [ws_item_sk] #16
-                                                      WholeStageCodegen (19)
-                                                        Filter [ws_sold_date_sk]
+                                                    Exchange [ss_customer_sk] #7
+                                                      WholeStageCodegen (11)
+                                                        Filter [ss_customer_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price]
-                                              WholeStageCodegen (27)
-                                                Sort [item_sk]
+                                                              Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price]
+                                            InputAdapter
+                                              WholeStageCodegen (14)
+                                                Sort [c_customer_sk]
                                                   InputAdapter
-                                                    ReusedExchange [item_sk] #4
-                              WholeStageCodegen (34)
-                                Sort [c_customer_sk]
-                                  Project [c_customer_sk]
-                                    Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                      ReusedSubquery [tpcds_cmax] #1
-                                      HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty]
-                                        HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
-                                          Project [ss_quantity,ss_sales_price,c_customer_sk]
-                                            SortMergeJoin [ss_customer_sk,c_customer_sk]
-                                              InputAdapter
-                                                WholeStageCodegen (31)
-                                                  Sort [ss_customer_sk]
-                                                    InputAdapter
-                                                      ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #8
-                                              InputAdapter
-                                                WholeStageCodegen (33)
-                                                  Sort [c_customer_sk]
-                                                    InputAdapter
-                                                      ReusedExchange [c_customer_sk] #9
+                                                    Exchange [c_customer_sk] #8
+                                                      WholeStageCodegen (13)
+                                                        Filter [c_customer_sk]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet default.customer [c_customer_sk]
+                      InputAdapter
+                        BroadcastExchange #13
+                          WholeStageCodegen (16)
+                            Project [d_date_sk]
+                              Filter [d_year,d_moy,d_date_sk]
+                                ColumnarToRow
+                                  InputAdapter
+                                    Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                WholeStageCodegen (34)
+                  Project [ws_quantity,ws_list_price]
+                    BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                      Project [ws_sold_date_sk,ws_quantity,ws_list_price]
                         InputAdapter
-                          ReusedExchange [d_date_sk] #14
+                          SortMergeJoin [ws_bill_customer_sk,c_customer_sk]
+                            WholeStageCodegen (27)
+                              Sort [ws_bill_customer_sk]
+                                InputAdapter
+                                  Exchange [ws_bill_customer_sk] #14
+                                    WholeStageCodegen (26)
+                                      Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price]
+                                        InputAdapter
+                                          SortMergeJoin [ws_item_sk,item_sk]
+                                            WholeStageCodegen (19)
+                                              Sort [ws_item_sk]
+                                                InputAdapter
+                                                  Exchange [ws_item_sk] #15
+                                                    WholeStageCodegen (18)
+                                                      Filter [ws_sold_date_sk]
+                                                        ColumnarToRow
+                                                          InputAdapter
+                                                            Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price]
+                                            WholeStageCodegen (25)
+                                              Sort [item_sk]
+                                                Project [item_sk]
+                                                  Filter [count(1)]
+                                                    HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count]
+                                                      HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count]
+                                                        Project [d_date,i_item_sk,i_item_desc]
+                                                          SortMergeJoin [ss_item_sk,i_item_sk]
+                                                            InputAdapter
+                                                              WholeStageCodegen (22)
+                                                                Sort [ss_item_sk]
+                                                                  InputAdapter
+                                                                    ReusedExchange [ss_item_sk,d_date] #4
+                                                            InputAdapter
+                                                              WholeStageCodegen (24)
+                                                                Sort [i_item_sk]
+                                                                  InputAdapter
+                                                                    ReusedExchange [i_item_sk,i_item_desc] #6
+                            WholeStageCodegen (32)
+                              Sort [c_customer_sk]
+                                Project [c_customer_sk]
+                                  Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                    ReusedSubquery [tpcds_cmax] #1
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty]
+                                      HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
+                                        Project [ss_quantity,ss_sales_price,c_customer_sk]
+                                          SortMergeJoin [ss_customer_sk,c_customer_sk]
+                                            InputAdapter
+                                              WholeStageCodegen (29)
+                                                Sort [ss_customer_sk]
+                                                  InputAdapter
+                                                    ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #7
+                                            InputAdapter
+                                              WholeStageCodegen (31)
+                                                Sort [c_customer_sk]
+                                                  InputAdapter
+                                                    ReusedExchange [c_customer_sk] #8
+                      InputAdapter
+                        ReusedExchange [d_date_sk] #13
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt
index 6d2b5b0013d8f..15ae5bfe24303 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt
@@ -1,76 +1,75 @@
 == Physical Plan ==
-CollectLimit (72)
-+- * HashAggregate (71)
-   +- Exchange (70)
-      +- * HashAggregate (69)
-         +- Union (68)
-            :- * Project (51)
-            :  +- * BroadcastHashJoin Inner BuildRight (50)
-            :     :- * Project (44)
-            :     :  +- * BroadcastHashJoin LeftSemi BuildRight (43)
-            :     :     :- * Project (27)
-            :     :     :  +- * BroadcastHashJoin LeftSemi BuildRight (26)
-            :     :     :     :- * Filter (3)
-            :     :     :     :  +- * ColumnarToRow (2)
-            :     :     :     :     +- Scan parquet default.catalog_sales (1)
-            :     :     :     +- BroadcastExchange (25)
-            :     :     :        +- * Project (24)
-            :     :     :           +- * Filter (23)
-            :     :     :              +- * HashAggregate (22)
-            :     :     :                 +- Exchange (21)
-            :     :     :                    +- * HashAggregate (20)
-            :     :     :                       +- * Project (19)
-            :     :     :                          +- * BroadcastHashJoin Inner BuildRight (18)
-            :     :     :                             :- * Project (13)
-            :     :     :                             :  +- * BroadcastHashJoin Inner BuildRight (12)
-            :     :     :                             :     :- * Filter (6)
-            :     :     :                             :     :  +- * ColumnarToRow (5)
-            :     :     :                             :     :     +- Scan parquet default.store_sales (4)
-            :     :     :                             :     +- BroadcastExchange (11)
-            :     :     :                             :        +- * Project (10)
-            :     :     :                             :           +- * Filter (9)
-            :     :     :                             :              +- * ColumnarToRow (8)
-            :     :     :                             :                 +- Scan parquet default.date_dim (7)
-            :     :     :                             +- BroadcastExchange (17)
-            :     :     :                                +- * Filter (16)
-            :     :     :                                   +- * ColumnarToRow (15)
-            :     :     :                                      +- Scan parquet default.item (14)
-            :     :     +- BroadcastExchange (42)
-            :     :        +- * Project (41)
-            :     :           +- * Filter (40)
-            :     :              +- * HashAggregate (39)
-            :     :                 +- Exchange (38)
-            :     :                    +- * HashAggregate (37)
-            :     :                       +- * Project (36)
-            :     :                          +- * BroadcastHashJoin Inner BuildRight (35)
-            :     :                             :- * Filter (30)
-            :     :                             :  +- * ColumnarToRow (29)
-            :     :                             :     +- Scan parquet default.store_sales (28)
-            :     :                             +- BroadcastExchange (34)
-            :     :                                +- * Filter (33)
-            :     :                                   +- * ColumnarToRow (32)
-            :     :                                      +- Scan parquet default.customer (31)
-            :     +- BroadcastExchange (49)
-            :        +- * Project (48)
-            :           +- * Filter (47)
-            :              +- * ColumnarToRow (46)
-            :                 +- Scan parquet default.date_dim (45)
-            +- * Project (67)
-               +- * BroadcastHashJoin Inner BuildRight (66)
-                  :- * Project (64)
-                  :  +- * BroadcastHashJoin LeftSemi BuildRight (63)
-                  :     :- * Project (57)
-                  :     :  +- * BroadcastHashJoin LeftSemi BuildRight (56)
-                  :     :     :- * Filter (54)
-                  :     :     :  +- * ColumnarToRow (53)
-                  :     :     :     +- Scan parquet default.web_sales (52)
-                  :     :     +- ReusedExchange (55)
-                  :     +- BroadcastExchange (62)
-                  :        +- * Project (61)
-                  :           +- * Filter (60)
-                  :              +- * HashAggregate (59)
-                  :                 +- ReusedExchange (58)
-                  +- ReusedExchange (65)
+* HashAggregate (71)
++- Exchange (70)
+   +- * HashAggregate (69)
+      +- Union (68)
+         :- * Project (51)
+         :  +- * BroadcastHashJoin Inner BuildRight (50)
+         :     :- * Project (44)
+         :     :  +- * BroadcastHashJoin LeftSemi BuildRight (43)
+         :     :     :- * Project (27)
+         :     :     :  +- * BroadcastHashJoin LeftSemi BuildRight (26)
+         :     :     :     :- * Filter (3)
+         :     :     :     :  +- * ColumnarToRow (2)
+         :     :     :     :     +- Scan parquet default.catalog_sales (1)
+         :     :     :     +- BroadcastExchange (25)
+         :     :     :        +- * Project (24)
+         :     :     :           +- * Filter (23)
+         :     :     :              +- * HashAggregate (22)
+         :     :     :                 +- Exchange (21)
+         :     :     :                    +- * HashAggregate (20)
+         :     :     :                       +- * Project (19)
+         :     :     :                          +- * BroadcastHashJoin Inner BuildRight (18)
+         :     :     :                             :- * Project (13)
+         :     :     :                             :  +- * BroadcastHashJoin Inner BuildRight (12)
+         :     :     :                             :     :- * Filter (6)
+         :     :     :                             :     :  +- * ColumnarToRow (5)
+         :     :     :                             :     :     +- Scan parquet default.store_sales (4)
+         :     :     :                             :     +- BroadcastExchange (11)
+         :     :     :                             :        +- * Project (10)
+         :     :     :                             :           +- * Filter (9)
+         :     :     :                             :              +- * ColumnarToRow (8)
+         :     :     :                             :                 +- Scan parquet default.date_dim (7)
+         :     :     :                             +- BroadcastExchange (17)
+         :     :     :                                +- * Filter (16)
+         :     :     :                                   +- * ColumnarToRow (15)
+         :     :     :                                      +- Scan parquet default.item (14)
+         :     :     +- BroadcastExchange (42)
+         :     :        +- * Project (41)
+         :     :           +- * Filter (40)
+         :     :              +- * HashAggregate (39)
+         :     :                 +- Exchange (38)
+         :     :                    +- * HashAggregate (37)
+         :     :                       +- * Project (36)
+         :     :                          +- * BroadcastHashJoin Inner BuildRight (35)
+         :     :                             :- * Filter (30)
+         :     :                             :  +- * ColumnarToRow (29)
+         :     :                             :     +- Scan parquet default.store_sales (28)
+         :     :                             +- BroadcastExchange (34)
+         :     :                                +- * Filter (33)
+         :     :                                   +- * ColumnarToRow (32)
+         :     :                                      +- Scan parquet default.customer (31)
+         :     +- BroadcastExchange (49)
+         :        +- * Project (48)
+         :           +- * Filter (47)
+         :              +- * ColumnarToRow (46)
+         :                 +- Scan parquet default.date_dim (45)
+         +- * Project (67)
+            +- * BroadcastHashJoin Inner BuildRight (66)
+               :- * Project (64)
+               :  +- * BroadcastHashJoin LeftSemi BuildRight (63)
+               :     :- * Project (57)
+               :     :  +- * BroadcastHashJoin LeftSemi BuildRight (56)
+               :     :     :- * Filter (54)
+               :     :     :  +- * ColumnarToRow (53)
+               :     :     :     +- Scan parquet default.web_sales (52)
+               :     :     +- ReusedExchange (55)
+               :     +- BroadcastExchange (62)
+               :        +- * Project (61)
+               :           +- * Filter (60)
+               :              +- * HashAggregate (59)
+               :                 +- ReusedExchange (58)
+               +- ReusedExchange (65)
 
 
 (1) Scan parquet default.catalog_sales
@@ -398,139 +397,135 @@ Functions [1]: [sum(sales#40)]
 Aggregate Attributes [1]: [sum(sales#40)#57]
 Results [1]: [sum(sales#40)#57 AS sum(sales)#58]
 
-(72) CollectLimit
-Input [1]: [sum(sales)#58]
-Arguments: 100
-
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 40 Hosting Expression = Subquery scalar-subquery#35, [id=#36]
-* HashAggregate (94)
-+- Exchange (93)
-   +- * HashAggregate (92)
-      +- * HashAggregate (91)
-         +- Exchange (90)
-            +- * HashAggregate (89)
-               +- * Project (88)
-                  +- * BroadcastHashJoin Inner BuildRight (87)
-                     :- * Project (81)
-                     :  +- * BroadcastHashJoin Inner BuildRight (80)
-                     :     :- * Filter (75)
-                     :     :  +- * ColumnarToRow (74)
-                     :     :     +- Scan parquet default.store_sales (73)
-                     :     +- BroadcastExchange (79)
-                     :        +- * Filter (78)
-                     :           +- * ColumnarToRow (77)
-                     :              +- Scan parquet default.customer (76)
-                     +- BroadcastExchange (86)
-                        +- * Project (85)
-                           +- * Filter (84)
-                              +- * ColumnarToRow (83)
-                                 +- Scan parquet default.date_dim (82)
-
-
-(73) Scan parquet default.store_sales
+* HashAggregate (93)
++- Exchange (92)
+   +- * HashAggregate (91)
+      +- * HashAggregate (90)
+         +- Exchange (89)
+            +- * HashAggregate (88)
+               +- * Project (87)
+                  +- * BroadcastHashJoin Inner BuildRight (86)
+                     :- * Project (80)
+                     :  +- * BroadcastHashJoin Inner BuildRight (79)
+                     :     :- * Filter (74)
+                     :     :  +- * ColumnarToRow (73)
+                     :     :     +- Scan parquet default.store_sales (72)
+                     :     +- BroadcastExchange (78)
+                     :        +- * Filter (77)
+                     :           +- * ColumnarToRow (76)
+                     :              +- Scan parquet default.customer (75)
+                     +- BroadcastExchange (85)
+                        +- * Project (84)
+                           +- * Filter (83)
+                              +- * ColumnarToRow (82)
+                                 +- Scan parquet default.date_dim (81)
+
+
+(72) Scan parquet default.store_sales
 Output [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
 PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)]
 ReadSchema: struct<ss_sold_date_sk:int,ss_customer_sk:int,ss_quantity:int,ss_sales_price:decimal(7,2)>
 
-(74) ColumnarToRow [codegen id : 3]
+(73) ColumnarToRow [codegen id : 3]
 Input [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25]
 
-(75) Filter [codegen id : 3]
+(74) Filter [codegen id : 3]
 Input [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25]
 Condition : (isnotnull(ss_customer_sk#23) AND isnotnull(ss_sold_date_sk#6))
 
-(76) Scan parquet default.customer
+(75) Scan parquet default.customer
 Output [1]: [c_customer_sk#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
 PushedFilters: [IsNotNull(c_customer_sk)]
 ReadSchema: struct<c_customer_sk:int>
 
-(77) ColumnarToRow [codegen id : 1]
+(76) ColumnarToRow [codegen id : 1]
 Input [1]: [c_customer_sk#26]
 
-(78) Filter [codegen id : 1]
+(77) Filter [codegen id : 1]
 Input [1]: [c_customer_sk#26]
 Condition : isnotnull(c_customer_sk#26)
 
-(79) BroadcastExchange
+(78) BroadcastExchange
 Input [1]: [c_customer_sk#26]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#59]
 
-(80) BroadcastHashJoin [codegen id : 3]
+(79) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_customer_sk#23]
 Right keys [1]: [c_customer_sk#26]
 Join condition: None
 
-(81) Project [codegen id : 3]
+(80) Project [codegen id : 3]
 Output [4]: [ss_sold_date_sk#6, ss_quantity#24, ss_sales_price#25, c_customer_sk#26]
 Input [5]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25, c_customer_sk#26]
 
-(82) Scan parquet default.date_dim
+(81) Scan parquet default.date_dim
 Output [2]: [d_date_sk#8, d_year#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(83) ColumnarToRow [codegen id : 2]
+(82) ColumnarToRow [codegen id : 2]
 Input [2]: [d_date_sk#8, d_year#10]
 
-(84) Filter [codegen id : 2]
+(83) Filter [codegen id : 2]
 Input [2]: [d_date_sk#8, d_year#10]
 Condition : (d_year#10 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#8))
 
-(85) Project [codegen id : 2]
+(84) Project [codegen id : 2]
 Output [1]: [d_date_sk#8]
 Input [2]: [d_date_sk#8, d_year#10]
 
-(86) BroadcastExchange
+(85) BroadcastExchange
 Input [1]: [d_date_sk#8]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#60]
 
-(87) BroadcastHashJoin [codegen id : 3]
+(86) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#6]
 Right keys [1]: [d_date_sk#8]
 Join condition: None
 
-(88) Project [codegen id : 3]
+(87) Project [codegen id : 3]
 Output [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26]
 Input [5]: [ss_sold_date_sk#6, ss_quantity#24, ss_sales_price#25, c_customer_sk#26, d_date_sk#8]
 
-(89) HashAggregate [codegen id : 3]
+(88) HashAggregate [codegen id : 3]
 Input [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26]
 Keys [1]: [c_customer_sk#26]
 Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))]
 Aggregate Attributes [2]: [sum#61, isEmpty#62]
 Results [3]: [c_customer_sk#26, sum#63, isEmpty#64]
 
-(90) Exchange
+(89) Exchange
 Input [3]: [c_customer_sk#26, sum#63, isEmpty#64]
 Arguments: hashpartitioning(c_customer_sk#26, 5), true, [id=#65]
 
-(91) HashAggregate [codegen id : 4]
+(90) HashAggregate [codegen id : 4]
 Input [3]: [c_customer_sk#26, sum#63, isEmpty#64]
 Keys [1]: [c_customer_sk#26]
 Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))]
 Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#66]
 Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#66 AS csales#67]
 
-(92) HashAggregate [codegen id : 4]
+(91) HashAggregate [codegen id : 4]
 Input [1]: [csales#67]
 Keys: []
 Functions [1]: [partial_max(csales#67)]
 Aggregate Attributes [1]: [max#68]
 Results [1]: [max#69]
 
-(93) Exchange
+(92) Exchange
 Input [1]: [max#69]
 Arguments: SinglePartition, true, [id=#70]
 
-(94) HashAggregate [codegen id : 5]
+(93) HashAggregate [codegen id : 5]
 Input [1]: [max#69]
 Keys: []
 Functions [1]: [max(csales#67)]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt
index d860e18574f2a..aebe2bd3e1a6c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt
@@ -1,143 +1,142 @@
-CollectLimit
-  WholeStageCodegen (20)
-    HashAggregate [sum,isEmpty] [sum(sales),sum(sales),sum,isEmpty]
-      InputAdapter
-        Exchange #1
-          WholeStageCodegen (19)
-            HashAggregate [sales] [sum,isEmpty,sum,isEmpty]
-              InputAdapter
-                Union
-                  WholeStageCodegen (9)
-                    Project [cs_quantity,cs_list_price]
-                      BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                        Project [cs_sold_date_sk,cs_quantity,cs_list_price]
-                          BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk]
-                            Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price]
-                              BroadcastHashJoin [cs_item_sk,item_sk]
-                                Filter [cs_sold_date_sk]
-                                  ColumnarToRow
-                                    InputAdapter
-                                      Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price]
-                                InputAdapter
-                                  BroadcastExchange #2
-                                    WholeStageCodegen (4)
-                                      Project [item_sk]
-                                        Filter [count(1)]
-                                          HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count]
-                                            InputAdapter
-                                              Exchange [substr(i_item_desc, 1, 30),i_item_sk,d_date] #3
-                                                WholeStageCodegen (3)
-                                                  HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count]
-                                                    Project [d_date,i_item_sk,i_item_desc]
-                                                      BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                        Project [ss_item_sk,d_date]
-                                                          BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                            Filter [ss_sold_date_sk,ss_item_sk]
+WholeStageCodegen (20)
+  HashAggregate [sum,isEmpty] [sum(sales),sum(sales),sum,isEmpty]
+    InputAdapter
+      Exchange #1
+        WholeStageCodegen (19)
+          HashAggregate [sales] [sum,isEmpty,sum,isEmpty]
+            InputAdapter
+              Union
+                WholeStageCodegen (9)
+                  Project [cs_quantity,cs_list_price]
+                    BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                      Project [cs_sold_date_sk,cs_quantity,cs_list_price]
+                        BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk]
+                          Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price]
+                            BroadcastHashJoin [cs_item_sk,item_sk]
+                              Filter [cs_sold_date_sk]
+                                ColumnarToRow
+                                  InputAdapter
+                                    Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price]
+                              InputAdapter
+                                BroadcastExchange #2
+                                  WholeStageCodegen (4)
+                                    Project [item_sk]
+                                      Filter [count(1)]
+                                        HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count]
+                                          InputAdapter
+                                            Exchange [substr(i_item_desc, 1, 30),i_item_sk,d_date] #3
+                                              WholeStageCodegen (3)
+                                                HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count]
+                                                  Project [d_date,i_item_sk,i_item_desc]
+                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                      Project [ss_item_sk,d_date]
+                                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                          Filter [ss_sold_date_sk,ss_item_sk]
+                                                            ColumnarToRow
+                                                              InputAdapter
+                                                                Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk]
+                                                          InputAdapter
+                                                            BroadcastExchange #4
+                                                              WholeStageCodegen (1)
+                                                                Project [d_date_sk,d_date]
+                                                                  Filter [d_year,d_date_sk]
+                                                                    ColumnarToRow
+                                                                      InputAdapter
+                                                                        Scan parquet default.date_dim [d_date_sk,d_date,d_year]
+                                                      InputAdapter
+                                                        BroadcastExchange #5
+                                                          WholeStageCodegen (2)
+                                                            Filter [i_item_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk]
-                                                            InputAdapter
-                                                              BroadcastExchange #4
-                                                                WholeStageCodegen (1)
-                                                                  Project [d_date_sk,d_date]
-                                                                    Filter [d_year,d_date_sk]
+                                                                  Scan parquet default.item [i_item_sk,i_item_desc]
+                          InputAdapter
+                            BroadcastExchange #6
+                              WholeStageCodegen (7)
+                                Project [c_customer_sk]
+                                  Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                    Subquery #1
+                                      WholeStageCodegen (5)
+                                        HashAggregate [max] [max(csales),tpcds_cmax,max]
+                                          InputAdapter
+                                            Exchange #9
+                                              WholeStageCodegen (4)
+                                                HashAggregate [csales] [max,max]
+                                                  HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty]
+                                                    InputAdapter
+                                                      Exchange [c_customer_sk] #10
+                                                        WholeStageCodegen (3)
+                                                          HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
+                                                            Project [ss_quantity,ss_sales_price,c_customer_sk]
+                                                              BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                Project [ss_sold_date_sk,ss_quantity,ss_sales_price,c_customer_sk]
+                                                                  BroadcastHashJoin [ss_customer_sk,c_customer_sk]
+                                                                    Filter [ss_customer_sk,ss_sold_date_sk]
                                                                       ColumnarToRow
                                                                         InputAdapter
-                                                                          Scan parquet default.date_dim [d_date_sk,d_date,d_year]
-                                                        InputAdapter
-                                                          BroadcastExchange #5
-                                                            WholeStageCodegen (2)
-                                                              Filter [i_item_sk]
-                                                                ColumnarToRow
-                                                                  InputAdapter
-                                                                    Scan parquet default.item [i_item_sk,i_item_desc]
-                            InputAdapter
-                              BroadcastExchange #6
-                                WholeStageCodegen (7)
-                                  Project [c_customer_sk]
-                                    Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                      Subquery #1
-                                        WholeStageCodegen (5)
-                                          HashAggregate [max] [max(csales),tpcds_cmax,max]
-                                            InputAdapter
-                                              Exchange #9
-                                                WholeStageCodegen (4)
-                                                  HashAggregate [csales] [max,max]
-                                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty]
-                                                      InputAdapter
-                                                        Exchange [c_customer_sk] #10
-                                                          WholeStageCodegen (3)
-                                                            HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
-                                                              Project [ss_quantity,ss_sales_price,c_customer_sk]
-                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                  Project [ss_sold_date_sk,ss_quantity,ss_sales_price,c_customer_sk]
-                                                                    BroadcastHashJoin [ss_customer_sk,c_customer_sk]
-                                                                      Filter [ss_customer_sk,ss_sold_date_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price]
-                                                                      InputAdapter
-                                                                        BroadcastExchange #11
-                                                                          WholeStageCodegen (1)
-                                                                            Filter [c_customer_sk]
-                                                                              ColumnarToRow
-                                                                                InputAdapter
-                                                                                  Scan parquet default.customer [c_customer_sk]
-                                                                  InputAdapter
-                                                                    BroadcastExchange #12
-                                                                      WholeStageCodegen (2)
-                                                                        Project [d_date_sk]
-                                                                          Filter [d_year,d_date_sk]
+                                                                          Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price]
+                                                                    InputAdapter
+                                                                      BroadcastExchange #11
+                                                                        WholeStageCodegen (1)
+                                                                          Filter [c_customer_sk]
                                                                             ColumnarToRow
                                                                               InputAdapter
-                                                                                Scan parquet default.date_dim [d_date_sk,d_year]
-                                      HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty]
-                                        InputAdapter
-                                          Exchange [c_customer_sk] #7
-                                            WholeStageCodegen (6)
-                                              HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
-                                                Project [ss_quantity,ss_sales_price,c_customer_sk]
-                                                  BroadcastHashJoin [ss_customer_sk,c_customer_sk]
-                                                    Filter [ss_customer_sk]
-                                                      ColumnarToRow
-                                                        InputAdapter
-                                                          Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price]
-                                                    InputAdapter
-                                                      BroadcastExchange #8
-                                                        WholeStageCodegen (5)
-                                                          Filter [c_customer_sk]
-                                                            ColumnarToRow
-                                                              InputAdapter
-                                                                Scan parquet default.customer [c_customer_sk]
-                        InputAdapter
-                          BroadcastExchange #13
-                            WholeStageCodegen (8)
-                              Project [d_date_sk]
-                                Filter [d_year,d_moy,d_date_sk]
-                                  ColumnarToRow
-                                    InputAdapter
-                                      Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
-                  WholeStageCodegen (18)
-                    Project [ws_quantity,ws_list_price]
-                      BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                        Project [ws_sold_date_sk,ws_quantity,ws_list_price]
-                          BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk]
-                            Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price]
-                              BroadcastHashJoin [ws_item_sk,item_sk]
-                                Filter [ws_sold_date_sk]
-                                  ColumnarToRow
-                                    InputAdapter
-                                      Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price]
-                                InputAdapter
-                                  ReusedExchange [item_sk] #2
-                            InputAdapter
-                              BroadcastExchange #14
-                                WholeStageCodegen (16)
-                                  Project [c_customer_sk]
-                                    Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                      ReusedSubquery [tpcds_cmax] #1
-                                      HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty]
-                                        InputAdapter
-                                          ReusedExchange [c_customer_sk,sum,isEmpty] #7
-                        InputAdapter
-                          ReusedExchange [d_date_sk] #13
+                                                                                Scan parquet default.customer [c_customer_sk]
+                                                                InputAdapter
+                                                                  BroadcastExchange #12
+                                                                    WholeStageCodegen (2)
+                                                                      Project [d_date_sk]
+                                                                        Filter [d_year,d_date_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet default.date_dim [d_date_sk,d_year]
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty]
+                                      InputAdapter
+                                        Exchange [c_customer_sk] #7
+                                          WholeStageCodegen (6)
+                                            HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
+                                              Project [ss_quantity,ss_sales_price,c_customer_sk]
+                                                BroadcastHashJoin [ss_customer_sk,c_customer_sk]
+                                                  Filter [ss_customer_sk]
+                                                    ColumnarToRow
+                                                      InputAdapter
+                                                        Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price]
+                                                  InputAdapter
+                                                    BroadcastExchange #8
+                                                      WholeStageCodegen (5)
+                                                        Filter [c_customer_sk]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet default.customer [c_customer_sk]
+                      InputAdapter
+                        BroadcastExchange #13
+                          WholeStageCodegen (8)
+                            Project [d_date_sk]
+                              Filter [d_year,d_moy,d_date_sk]
+                                ColumnarToRow
+                                  InputAdapter
+                                    Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                WholeStageCodegen (18)
+                  Project [ws_quantity,ws_list_price]
+                    BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                      Project [ws_sold_date_sk,ws_quantity,ws_list_price]
+                        BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk]
+                          Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price]
+                            BroadcastHashJoin [ws_item_sk,item_sk]
+                              Filter [ws_sold_date_sk]
+                                ColumnarToRow
+                                  InputAdapter
+                                    Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price]
+                              InputAdapter
+                                ReusedExchange [item_sk] #2
+                          InputAdapter
+                            BroadcastExchange #14
+                              WholeStageCodegen (16)
+                                Project [c_customer_sk]
+                                  Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                    ReusedSubquery [tpcds_cmax] #1
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty]
+                                      InputAdapter
+                                        ReusedExchange [c_customer_sk,sum,isEmpty] #7
+                      InputAdapter
+                        ReusedExchange [d_date_sk] #13
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt
index 51b85142f37ff..9a4c2b064d091 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt
@@ -1,134 +1,140 @@
 == Physical Plan ==
-TakeOrderedAndProject (130)
-+- Union (129)
-   :- * HashAggregate (82)
-   :  +- Exchange (81)
-   :     +- * HashAggregate (80)
-   :        +- * Project (79)
-   :           +- * SortMergeJoin Inner (78)
-   :              :- * Project (59)
-   :              :  +- * BroadcastHashJoin Inner BuildRight (58)
-   :              :     :- SortMergeJoin LeftSemi (52)
-   :              :     :  :- * Sort (34)
-   :              :     :  :  +- Exchange (33)
-   :              :     :  :     +- * Project (32)
-   :              :     :  :        +- SortMergeJoin LeftSemi (31)
+TakeOrderedAndProject (136)
++- Union (135)
+   :- * HashAggregate (80)
+   :  +- Exchange (79)
+   :     +- * HashAggregate (78)
+   :        +- * Project (77)
+   :           +- * SortMergeJoin Inner (76)
+   :              :- * Project (58)
+   :              :  +- * BroadcastHashJoin Inner BuildRight (57)
+   :              :     :- SortMergeJoin LeftSemi (51)
+   :              :     :  :- * Sort (33)
+   :              :     :  :  +- Exchange (32)
+   :              :     :  :     +- * Project (31)
+   :              :     :  :        +- SortMergeJoin LeftSemi (30)
    :              :     :  :           :- * Sort (5)
    :              :     :  :           :  +- Exchange (4)
    :              :     :  :           :     +- * Filter (3)
    :              :     :  :           :        +- * ColumnarToRow (2)
    :              :     :  :           :           +- Scan parquet default.catalog_sales (1)
-   :              :     :  :           +- * Sort (30)
-   :              :     :  :              +- Exchange (29)
-   :              :     :  :                 +- * Project (28)
-   :              :     :  :                    +- * Filter (27)
-   :              :     :  :                       +- * HashAggregate (26)
-   :              :     :  :                          +- * HashAggregate (25)
-   :              :     :  :                             +- * Project (24)
-   :              :     :  :                                +- * SortMergeJoin Inner (23)
-   :              :     :  :                                   :- * Sort (17)
-   :              :     :  :                                   :  +- Exchange (16)
-   :              :     :  :                                   :     +- * Project (15)
-   :              :     :  :                                   :        +- * BroadcastHashJoin Inner BuildRight (14)
-   :              :     :  :                                   :           :- * Filter (8)
-   :              :     :  :                                   :           :  +- * ColumnarToRow (7)
-   :              :     :  :                                   :           :     +- Scan parquet default.store_sales (6)
-   :              :     :  :                                   :           +- BroadcastExchange (13)
-   :              :     :  :                                   :              +- * Project (12)
-   :              :     :  :                                   :                 +- * Filter (11)
-   :              :     :  :                                   :                    +- * ColumnarToRow (10)
-   :              :     :  :                                   :                       +- Scan parquet default.date_dim (9)
-   :              :     :  :                                   +- * Sort (22)
-   :              :     :  :                                      +- Exchange (21)
-   :              :     :  :                                         +- * Filter (20)
-   :              :     :  :                                            +- * ColumnarToRow (19)
-   :              :     :  :                                               +- Scan parquet default.item (18)
-   :              :     :  +- * Sort (51)
-   :              :     :     +- * Project (50)
-   :              :     :        +- * Filter (49)
-   :              :     :           +- * HashAggregate (48)
-   :              :     :              +- * HashAggregate (47)
-   :              :     :                 +- * Project (46)
-   :              :     :                    +- * SortMergeJoin Inner (45)
-   :              :     :                       :- * Sort (39)
-   :              :     :                       :  +- Exchange (38)
-   :              :     :                       :     +- * Filter (37)
-   :              :     :                       :        +- * ColumnarToRow (36)
-   :              :     :                       :           +- Scan parquet default.store_sales (35)
-   :              :     :                       +- * Sort (44)
-   :              :     :                          +- Exchange (43)
-   :              :     :                             +- * Filter (42)
-   :              :     :                                +- * ColumnarToRow (41)
-   :              :     :                                   +- Scan parquet default.customer (40)
-   :              :     +- BroadcastExchange (57)
-   :              :        +- * Project (56)
-   :              :           +- * Filter (55)
-   :              :              +- * ColumnarToRow (54)
-   :              :                 +- Scan parquet default.date_dim (53)
-   :              +- SortMergeJoin LeftSemi (77)
-   :                 :- * Sort (64)
-   :                 :  +- Exchange (63)
-   :                 :     +- * Filter (62)
-   :                 :        +- * ColumnarToRow (61)
-   :                 :           +- Scan parquet default.customer (60)
-   :                 +- * Sort (76)
-   :                    +- Exchange (75)
-   :                       +- * Project (74)
-   :                          +- * Filter (73)
-   :                             +- * HashAggregate (72)
-   :                                +- * HashAggregate (71)
-   :                                   +- * Project (70)
-   :                                      +- * SortMergeJoin Inner (69)
-   :                                         :- * Sort (66)
-   :                                         :  +- ReusedExchange (65)
-   :                                         +- * Sort (68)
-   :                                            +- ReusedExchange (67)
-   +- * HashAggregate (128)
-      +- Exchange (127)
-         +- * HashAggregate (126)
-            +- * Project (125)
-               +- * SortMergeJoin Inner (124)
-                  :- * Project (108)
-                  :  +- * BroadcastHashJoin Inner BuildRight (107)
-                  :     :- SortMergeJoin LeftSemi (105)
-                  :     :  :- * Sort (93)
-                  :     :  :  +- Exchange (92)
-                  :     :  :     +- * Project (91)
-                  :     :  :        +- SortMergeJoin LeftSemi (90)
-                  :     :  :           :- * Sort (87)
-                  :     :  :           :  +- Exchange (86)
-                  :     :  :           :     +- * Filter (85)
-                  :     :  :           :        +- * ColumnarToRow (84)
-                  :     :  :           :           +- Scan parquet default.web_sales (83)
-                  :     :  :           +- * Sort (89)
-                  :     :  :              +- ReusedExchange (88)
-                  :     :  +- * Sort (104)
-                  :     :     +- * Project (103)
-                  :     :        +- * Filter (102)
-                  :     :           +- * HashAggregate (101)
-                  :     :              +- * HashAggregate (100)
-                  :     :                 +- * Project (99)
-                  :     :                    +- * SortMergeJoin Inner (98)
-                  :     :                       :- * Sort (95)
-                  :     :                       :  +- ReusedExchange (94)
-                  :     :                       +- * Sort (97)
-                  :     :                          +- ReusedExchange (96)
-                  :     +- ReusedExchange (106)
-                  +- SortMergeJoin LeftSemi (123)
-                     :- * Sort (110)
-                     :  +- ReusedExchange (109)
-                     +- * Sort (122)
-                        +- Exchange (121)
-                           +- * Project (120)
-                              +- * Filter (119)
-                                 +- * HashAggregate (118)
-                                    +- * HashAggregate (117)
-                                       +- * Project (116)
-                                          +- * SortMergeJoin Inner (115)
-                                             :- * Sort (112)
-                                             :  +- ReusedExchange (111)
-                                             +- * Sort (114)
-                                                +- ReusedExchange (113)
+   :              :     :  :           +- * Sort (29)
+   :              :     :  :              +- * Project (28)
+   :              :     :  :                 +- * Filter (27)
+   :              :     :  :                    +- * HashAggregate (26)
+   :              :     :  :                       +- * HashAggregate (25)
+   :              :     :  :                          +- * Project (24)
+   :              :     :  :                             +- * SortMergeJoin Inner (23)
+   :              :     :  :                                :- * Sort (17)
+   :              :     :  :                                :  +- Exchange (16)
+   :              :     :  :                                :     +- * Project (15)
+   :              :     :  :                                :        +- * BroadcastHashJoin Inner BuildRight (14)
+   :              :     :  :                                :           :- * Filter (8)
+   :              :     :  :                                :           :  +- * ColumnarToRow (7)
+   :              :     :  :                                :           :     +- Scan parquet default.store_sales (6)
+   :              :     :  :                                :           +- BroadcastExchange (13)
+   :              :     :  :                                :              +- * Project (12)
+   :              :     :  :                                :                 +- * Filter (11)
+   :              :     :  :                                :                    +- * ColumnarToRow (10)
+   :              :     :  :                                :                       +- Scan parquet default.date_dim (9)
+   :              :     :  :                                +- * Sort (22)
+   :              :     :  :                                   +- Exchange (21)
+   :              :     :  :                                      +- * Filter (20)
+   :              :     :  :                                         +- * ColumnarToRow (19)
+   :              :     :  :                                            +- Scan parquet default.item (18)
+   :              :     :  +- * Sort (50)
+   :              :     :     +- * Project (49)
+   :              :     :        +- * Filter (48)
+   :              :     :           +- * HashAggregate (47)
+   :              :     :              +- * HashAggregate (46)
+   :              :     :                 +- * Project (45)
+   :              :     :                    +- * SortMergeJoin Inner (44)
+   :              :     :                       :- * Sort (38)
+   :              :     :                       :  +- Exchange (37)
+   :              :     :                       :     +- * Filter (36)
+   :              :     :                       :        +- * ColumnarToRow (35)
+   :              :     :                       :           +- Scan parquet default.store_sales (34)
+   :              :     :                       +- * Sort (43)
+   :              :     :                          +- Exchange (42)
+   :              :     :                             +- * Filter (41)
+   :              :     :                                +- * ColumnarToRow (40)
+   :              :     :                                   +- Scan parquet default.customer (39)
+   :              :     +- BroadcastExchange (56)
+   :              :        +- * Project (55)
+   :              :           +- * Filter (54)
+   :              :              +- * ColumnarToRow (53)
+   :              :                 +- Scan parquet default.date_dim (52)
+   :              +- SortMergeJoin LeftSemi (75)
+   :                 :- * Sort (63)
+   :                 :  +- Exchange (62)
+   :                 :     +- * Filter (61)
+   :                 :        +- * ColumnarToRow (60)
+   :                 :           +- Scan parquet default.customer (59)
+   :                 +- * Sort (74)
+   :                    +- * Project (73)
+   :                       +- * Filter (72)
+   :                          +- * HashAggregate (71)
+   :                             +- * HashAggregate (70)
+   :                                +- * Project (69)
+   :                                   +- * SortMergeJoin Inner (68)
+   :                                      :- * Sort (65)
+   :                                      :  +- ReusedExchange (64)
+   :                                      +- * Sort (67)
+   :                                         +- ReusedExchange (66)
+   +- * HashAggregate (134)
+      +- Exchange (133)
+         +- * HashAggregate (132)
+            +- * Project (131)
+               +- * SortMergeJoin Inner (130)
+                  :- * Project (115)
+                  :  +- * BroadcastHashJoin Inner BuildRight (114)
+                  :     :- SortMergeJoin LeftSemi (112)
+                  :     :  :- * Sort (100)
+                  :     :  :  +- Exchange (99)
+                  :     :  :     +- * Project (98)
+                  :     :  :        +- SortMergeJoin LeftSemi (97)
+                  :     :  :           :- * Sort (85)
+                  :     :  :           :  +- Exchange (84)
+                  :     :  :           :     +- * Filter (83)
+                  :     :  :           :        +- * ColumnarToRow (82)
+                  :     :  :           :           +- Scan parquet default.web_sales (81)
+                  :     :  :           +- * Sort (96)
+                  :     :  :              +- * Project (95)
+                  :     :  :                 +- * Filter (94)
+                  :     :  :                    +- * HashAggregate (93)
+                  :     :  :                       +- * HashAggregate (92)
+                  :     :  :                          +- * Project (91)
+                  :     :  :                             +- * SortMergeJoin Inner (90)
+                  :     :  :                                :- * Sort (87)
+                  :     :  :                                :  +- ReusedExchange (86)
+                  :     :  :                                +- * Sort (89)
+                  :     :  :                                   +- ReusedExchange (88)
+                  :     :  +- * Sort (111)
+                  :     :     +- * Project (110)
+                  :     :        +- * Filter (109)
+                  :     :           +- * HashAggregate (108)
+                  :     :              +- * HashAggregate (107)
+                  :     :                 +- * Project (106)
+                  :     :                    +- * SortMergeJoin Inner (105)
+                  :     :                       :- * Sort (102)
+                  :     :                       :  +- ReusedExchange (101)
+                  :     :                       +- * Sort (104)
+                  :     :                          +- ReusedExchange (103)
+                  :     +- ReusedExchange (113)
+                  +- SortMergeJoin LeftSemi (129)
+                     :- * Sort (117)
+                     :  +- ReusedExchange (116)
+                     +- * Sort (128)
+                        +- * Project (127)
+                           +- * Filter (126)
+                              +- * HashAggregate (125)
+                                 +- * HashAggregate (124)
+                                    +- * Project (123)
+                                       +- * SortMergeJoin Inner (122)
+                                          :- * Sort (119)
+                                          :  +- ReusedExchange (118)
+                                          +- * Sort (121)
+                                             +- ReusedExchange (120)
 
 
 (1) Scan parquet default.catalog_sales
@@ -259,612 +265,642 @@ Condition : (count(1)#22 > 4)
 Output [1]: [item_sk#21]
 Input [2]: [item_sk#21, count(1)#22]
 
-(29) Exchange
-Input [1]: [item_sk#21]
-Arguments: hashpartitioning(item_sk#21, 5), true, [id=#23]
-
-(30) Sort [codegen id : 9]
+(29) Sort [codegen id : 8]
 Input [1]: [item_sk#21]
 Arguments: [item_sk#21 ASC NULLS FIRST], false, 0
 
-(31) SortMergeJoin
+(30) SortMergeJoin
 Left keys [1]: [cs_item_sk#3]
 Right keys [1]: [item_sk#21]
 Join condition: None
 
-(32) Project [codegen id : 10]
+(31) Project [codegen id : 9]
 Output [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5]
 Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5]
 
-(33) Exchange
+(32) Exchange
 Input [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5]
-Arguments: hashpartitioning(cs_bill_customer_sk#2, 5), true, [id=#24]
+Arguments: hashpartitioning(cs_bill_customer_sk#2, 5), true, [id=#23]
 
-(34) Sort [codegen id : 11]
+(33) Sort [codegen id : 10]
 Input [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5]
 Arguments: [cs_bill_customer_sk#2 ASC NULLS FIRST], false, 0
 
-(35) Scan parquet default.store_sales
-Output [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
+(34) Scan parquet default.store_sales
+Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
 PushedFilters: [IsNotNull(ss_customer_sk)]
 ReadSchema: struct<ss_customer_sk:int,ss_quantity:int,ss_sales_price:decimal(7,2)>
 
-(36) ColumnarToRow [codegen id : 12]
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
+(35) ColumnarToRow [codegen id : 11]
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 
-(37) Filter [codegen id : 12]
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Condition : isnotnull(ss_customer_sk#25)
+(36) Filter [codegen id : 11]
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Condition : isnotnull(ss_customer_sk#24)
 
-(38) Exchange
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Arguments: hashpartitioning(ss_customer_sk#25, 5), true, [id=#28]
+(37) Exchange
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Arguments: hashpartitioning(ss_customer_sk#24, 5), true, [id=#27]
 
-(39) Sort [codegen id : 13]
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0
+(38) Sort [codegen id : 12]
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Arguments: [ss_customer_sk#24 ASC NULLS FIRST], false, 0
 
-(40) Scan parquet default.customer
-Output [1]: [c_customer_sk#29]
+(39) Scan parquet default.customer
+Output [1]: [c_customer_sk#28]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
 PushedFilters: [IsNotNull(c_customer_sk)]
 ReadSchema: struct<c_customer_sk:int>
 
-(41) ColumnarToRow [codegen id : 14]
-Input [1]: [c_customer_sk#29]
+(40) ColumnarToRow [codegen id : 13]
+Input [1]: [c_customer_sk#28]
 
-(42) Filter [codegen id : 14]
-Input [1]: [c_customer_sk#29]
-Condition : isnotnull(c_customer_sk#29)
+(41) Filter [codegen id : 13]
+Input [1]: [c_customer_sk#28]
+Condition : isnotnull(c_customer_sk#28)
 
-(43) Exchange
-Input [1]: [c_customer_sk#29]
-Arguments: hashpartitioning(c_customer_sk#29, 5), true, [id=#30]
+(42) Exchange
+Input [1]: [c_customer_sk#28]
+Arguments: hashpartitioning(c_customer_sk#28, 5), true, [id=#29]
 
-(44) Sort [codegen id : 15]
-Input [1]: [c_customer_sk#29]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
+(43) Sort [codegen id : 14]
+Input [1]: [c_customer_sk#28]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
 
-(45) SortMergeJoin [codegen id : 16]
-Left keys [1]: [ss_customer_sk#25]
-Right keys [1]: [c_customer_sk#29]
+(44) SortMergeJoin [codegen id : 15]
+Left keys [1]: [ss_customer_sk#24]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(46) Project [codegen id : 16]
-Output [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Input [4]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-
-(47) HashAggregate [codegen id : 16]
-Input [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#31, isEmpty#32]
-Results [3]: [c_customer_sk#29, sum#33, isEmpty#34]
-
-(48) HashAggregate [codegen id : 16]
-Input [3]: [c_customer_sk#29, sum#33, isEmpty#34]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#35]
-Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#35 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36]
-
-(49) Filter [codegen id : 16]
-Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true)))
-
-(50) Project [codegen id : 16]
-Output [1]: [c_customer_sk#29]
-Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36]
-
-(51) Sort [codegen id : 16]
-Input [1]: [c_customer_sk#29]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
-
-(52) SortMergeJoin
+(45) Project [codegen id : 15]
+Output [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+
+(46) HashAggregate [codegen id : 15]
+Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#30, isEmpty#31]
+Results [3]: [c_customer_sk#28, sum#32, isEmpty#33]
+
+(47) HashAggregate [codegen id : 15]
+Input [3]: [c_customer_sk#28, sum#32, isEmpty#33]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#34]
+Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#34 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+
+(48) Filter [codegen id : 15]
+Input [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#36, [id=#37] as decimal(32,6)))), DecimalType(38,8), true)))
+
+(49) Project [codegen id : 15]
+Output [1]: [c_customer_sk#28]
+Input [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+
+(50) Sort [codegen id : 15]
+Input [1]: [c_customer_sk#28]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
+
+(51) SortMergeJoin
 Left keys [1]: [cs_bill_customer_sk#2]
-Right keys [1]: [c_customer_sk#29]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(53) Scan parquet default.date_dim
-Output [3]: [d_date_sk#9, d_year#11, d_moy#39]
+(52) Scan parquet default.date_dim
+Output [3]: [d_date_sk#9, d_year#11, d_moy#38]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
-(54) ColumnarToRow [codegen id : 17]
-Input [3]: [d_date_sk#9, d_year#11, d_moy#39]
+(53) ColumnarToRow [codegen id : 16]
+Input [3]: [d_date_sk#9, d_year#11, d_moy#38]
 
-(55) Filter [codegen id : 17]
-Input [3]: [d_date_sk#9, d_year#11, d_moy#39]
-Condition : ((((isnotnull(d_year#11) AND isnotnull(d_moy#39)) AND (d_year#11 = 2000)) AND (d_moy#39 = 2)) AND isnotnull(d_date_sk#9))
+(54) Filter [codegen id : 16]
+Input [3]: [d_date_sk#9, d_year#11, d_moy#38]
+Condition : ((((isnotnull(d_year#11) AND isnotnull(d_moy#38)) AND (d_year#11 = 2000)) AND (d_moy#38 = 2)) AND isnotnull(d_date_sk#9))
 
-(56) Project [codegen id : 17]
+(55) Project [codegen id : 16]
 Output [1]: [d_date_sk#9]
-Input [3]: [d_date_sk#9, d_year#11, d_moy#39]
+Input [3]: [d_date_sk#9, d_year#11, d_moy#38]
 
-(57) BroadcastExchange
+(56) BroadcastExchange
 Input [1]: [d_date_sk#9]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#40]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#39]
 
-(58) BroadcastHashJoin [codegen id : 18]
+(57) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [cs_sold_date_sk#1]
 Right keys [1]: [d_date_sk#9]
 Join condition: None
 
-(59) Project [codegen id : 18]
+(58) Project [codegen id : 17]
 Output [3]: [cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5]
 Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5, d_date_sk#9]
 
-(60) Scan parquet default.customer
-Output [3]: [c_customer_sk#29, c_first_name#41, c_last_name#42]
+(59) Scan parquet default.customer
+Output [3]: [c_customer_sk#28, c_first_name#40, c_last_name#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
 PushedFilters: [IsNotNull(c_customer_sk)]
 ReadSchema: struct<c_customer_sk:int,c_first_name:string,c_last_name:string>
 
-(61) ColumnarToRow [codegen id : 19]
-Input [3]: [c_customer_sk#29, c_first_name#41, c_last_name#42]
+(60) ColumnarToRow [codegen id : 18]
+Input [3]: [c_customer_sk#28, c_first_name#40, c_last_name#41]
 
-(62) Filter [codegen id : 19]
-Input [3]: [c_customer_sk#29, c_first_name#41, c_last_name#42]
-Condition : isnotnull(c_customer_sk#29)
+(61) Filter [codegen id : 18]
+Input [3]: [c_customer_sk#28, c_first_name#40, c_last_name#41]
+Condition : isnotnull(c_customer_sk#28)
 
-(63) Exchange
-Input [3]: [c_customer_sk#29, c_first_name#41, c_last_name#42]
-Arguments: hashpartitioning(c_customer_sk#29, 5), true, [id=#43]
+(62) Exchange
+Input [3]: [c_customer_sk#28, c_first_name#40, c_last_name#41]
+Arguments: hashpartitioning(c_customer_sk#28, 5), true, [id=#42]
 
-(64) Sort [codegen id : 20]
-Input [3]: [c_customer_sk#29, c_first_name#41, c_last_name#42]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
+(63) Sort [codegen id : 19]
+Input [3]: [c_customer_sk#28, c_first_name#40, c_last_name#41]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
 
-(65) ReusedExchange [Reuses operator id: 38]
-Output [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
+(64) ReusedExchange [Reuses operator id: 37]
+Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 
-(66) Sort [codegen id : 22]
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0
+(65) Sort [codegen id : 21]
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Arguments: [ss_customer_sk#24 ASC NULLS FIRST], false, 0
 
-(67) ReusedExchange [Reuses operator id: 43]
-Output [1]: [c_customer_sk#29]
+(66) ReusedExchange [Reuses operator id: 42]
+Output [1]: [c_customer_sk#28]
 
-(68) Sort [codegen id : 24]
-Input [1]: [c_customer_sk#29]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
+(67) Sort [codegen id : 23]
+Input [1]: [c_customer_sk#28]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
 
-(69) SortMergeJoin [codegen id : 25]
-Left keys [1]: [ss_customer_sk#25]
-Right keys [1]: [c_customer_sk#29]
+(68) SortMergeJoin [codegen id : 24]
+Left keys [1]: [ss_customer_sk#24]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(70) Project [codegen id : 25]
-Output [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Input [4]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-
-(71) HashAggregate [codegen id : 25]
-Input [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#31, isEmpty#32]
-Results [3]: [c_customer_sk#29, sum#33, isEmpty#34]
-
-(72) HashAggregate [codegen id : 25]
-Input [3]: [c_customer_sk#29, sum#33, isEmpty#34]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#35]
-Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#35 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36]
-
-(73) Filter [codegen id : 25]
-Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true)))
-
-(74) Project [codegen id : 25]
-Output [1]: [c_customer_sk#29 AS c_customer_sk#29#44]
-Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36]
-
-(75) Exchange
-Input [1]: [c_customer_sk#29#44]
-Arguments: hashpartitioning(c_customer_sk#29#44, 5), true, [id=#45]
-
-(76) Sort [codegen id : 26]
-Input [1]: [c_customer_sk#29#44]
-Arguments: [c_customer_sk#29#44 ASC NULLS FIRST], false, 0
-
-(77) SortMergeJoin
-Left keys [1]: [c_customer_sk#29]
-Right keys [1]: [c_customer_sk#29#44]
+(69) Project [codegen id : 24]
+Output [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+
+(70) HashAggregate [codegen id : 24]
+Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#30, isEmpty#31]
+Results [3]: [c_customer_sk#28, sum#32, isEmpty#33]
+
+(71) HashAggregate [codegen id : 24]
+Input [3]: [c_customer_sk#28, sum#32, isEmpty#33]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#34]
+Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#34 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+
+(72) Filter [codegen id : 24]
+Input [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#36, [id=#37] as decimal(32,6)))), DecimalType(38,8), true)))
+
+(73) Project [codegen id : 24]
+Output [1]: [c_customer_sk#28 AS c_customer_sk#28#43]
+Input [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+
+(74) Sort [codegen id : 24]
+Input [1]: [c_customer_sk#28#43]
+Arguments: [c_customer_sk#28#43 ASC NULLS FIRST], false, 0
+
+(75) SortMergeJoin
+Left keys [1]: [c_customer_sk#28]
+Right keys [1]: [c_customer_sk#28#43]
 Join condition: None
 
-(78) SortMergeJoin [codegen id : 27]
+(76) SortMergeJoin [codegen id : 25]
 Left keys [1]: [cs_bill_customer_sk#2]
-Right keys [1]: [c_customer_sk#29]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(79) Project [codegen id : 27]
-Output [4]: [cs_quantity#4, cs_list_price#5, c_first_name#41, c_last_name#42]
-Input [6]: [cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5, c_customer_sk#29, c_first_name#41, c_last_name#42]
+(77) Project [codegen id : 25]
+Output [4]: [cs_quantity#4, cs_list_price#5, c_first_name#40, c_last_name#41]
+Input [6]: [cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5, c_customer_sk#28, c_first_name#40, c_last_name#41]
 
-(80) HashAggregate [codegen id : 27]
-Input [4]: [cs_quantity#4, cs_list_price#5, c_first_name#41, c_last_name#42]
-Keys [2]: [c_last_name#42, c_first_name#41]
+(78) HashAggregate [codegen id : 25]
+Input [4]: [cs_quantity#4, cs_list_price#5, c_first_name#40, c_last_name#41]
+Keys [2]: [c_last_name#41, c_first_name#40]
 Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#46, isEmpty#47]
-Results [4]: [c_last_name#42, c_first_name#41, sum#48, isEmpty#49]
+Aggregate Attributes [2]: [sum#44, isEmpty#45]
+Results [4]: [c_last_name#41, c_first_name#40, sum#46, isEmpty#47]
 
-(81) Exchange
-Input [4]: [c_last_name#42, c_first_name#41, sum#48, isEmpty#49]
-Arguments: hashpartitioning(c_last_name#42, c_first_name#41, 5), true, [id=#50]
+(79) Exchange
+Input [4]: [c_last_name#41, c_first_name#40, sum#46, isEmpty#47]
+Arguments: hashpartitioning(c_last_name#41, c_first_name#40, 5), true, [id=#48]
 
-(82) HashAggregate [codegen id : 28]
-Input [4]: [c_last_name#42, c_first_name#41, sum#48, isEmpty#49]
-Keys [2]: [c_last_name#42, c_first_name#41]
+(80) HashAggregate [codegen id : 26]
+Input [4]: [c_last_name#41, c_first_name#40, sum#46, isEmpty#47]
+Keys [2]: [c_last_name#41, c_first_name#40]
 Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true))#51]
-Results [3]: [c_last_name#42, c_first_name#41, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true))#51 AS sales#52]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true))#49]
+Results [3]: [c_last_name#41, c_first_name#40, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true))#49 AS sales#50]
 
-(83) Scan parquet default.web_sales
-Output [5]: [ws_sold_date_sk#53, ws_item_sk#54, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57]
+(81) Scan parquet default.web_sales
+Output [5]: [ws_sold_date_sk#51, ws_item_sk#52, ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
 PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)]
 ReadSchema: struct<ws_sold_date_sk:int,ws_item_sk:int,ws_bill_customer_sk:int,ws_quantity:int,ws_list_price:decimal(7,2)>
 
-(84) ColumnarToRow [codegen id : 29]
-Input [5]: [ws_sold_date_sk#53, ws_item_sk#54, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57]
+(82) ColumnarToRow [codegen id : 27]
+Input [5]: [ws_sold_date_sk#51, ws_item_sk#52, ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55]
+
+(83) Filter [codegen id : 27]
+Input [5]: [ws_sold_date_sk#51, ws_item_sk#52, ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55]
+Condition : (isnotnull(ws_bill_customer_sk#53) AND isnotnull(ws_sold_date_sk#51))
+
+(84) Exchange
+Input [5]: [ws_sold_date_sk#51, ws_item_sk#52, ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55]
+Arguments: hashpartitioning(ws_item_sk#52, 5), true, [id=#56]
 
-(85) Filter [codegen id : 29]
-Input [5]: [ws_sold_date_sk#53, ws_item_sk#54, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57]
-Condition : (isnotnull(ws_bill_customer_sk#55) AND isnotnull(ws_sold_date_sk#53))
+(85) Sort [codegen id : 28]
+Input [5]: [ws_sold_date_sk#51, ws_item_sk#52, ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55]
+Arguments: [ws_item_sk#52 ASC NULLS FIRST], false, 0
 
-(86) Exchange
-Input [5]: [ws_sold_date_sk#53, ws_item_sk#54, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57]
-Arguments: hashpartitioning(ws_item_sk#54, 5), true, [id=#58]
+(86) ReusedExchange [Reuses operator id: 16]
+Output [2]: [ss_item_sk#8, d_date#10]
+
+(87) Sort [codegen id : 31]
+Input [2]: [ss_item_sk#8, d_date#10]
+Arguments: [ss_item_sk#8 ASC NULLS FIRST], false, 0
+
+(88) ReusedExchange [Reuses operator id: 21]
+Output [2]: [i_item_sk#14, i_item_desc#15]
+
+(89) Sort [codegen id : 33]
+Input [2]: [i_item_sk#14, i_item_desc#15]
+Arguments: [i_item_sk#14 ASC NULLS FIRST], false, 0
+
+(90) SortMergeJoin [codegen id : 34]
+Left keys [1]: [ss_item_sk#8]
+Right keys [1]: [i_item_sk#14]
+Join condition: None
+
+(91) Project [codegen id : 34]
+Output [3]: [d_date#10, i_item_sk#14, i_item_desc#15]
+Input [4]: [ss_item_sk#8, d_date#10, i_item_sk#14, i_item_desc#15]
+
+(92) HashAggregate [codegen id : 34]
+Input [3]: [d_date#10, i_item_sk#14, i_item_desc#15]
+Keys [3]: [substr(i_item_desc#15, 1, 30) AS substr(i_item_desc#15, 1, 30)#57, i_item_sk#14, d_date#10]
+Functions [1]: [partial_count(1)]
+Aggregate Attributes [1]: [count#58]
+Results [4]: [substr(i_item_desc#15, 1, 30)#57, i_item_sk#14, d_date#10, count#59]
+
+(93) HashAggregate [codegen id : 34]
+Input [4]: [substr(i_item_desc#15, 1, 30)#57, i_item_sk#14, d_date#10, count#59]
+Keys [3]: [substr(i_item_desc#15, 1, 30)#57, i_item_sk#14, d_date#10]
+Functions [1]: [count(1)]
+Aggregate Attributes [1]: [count(1)#60]
+Results [2]: [i_item_sk#14 AS item_sk#21, count(1)#60 AS count(1)#61]
 
-(87) Sort [codegen id : 30]
-Input [5]: [ws_sold_date_sk#53, ws_item_sk#54, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57]
-Arguments: [ws_item_sk#54 ASC NULLS FIRST], false, 0
+(94) Filter [codegen id : 34]
+Input [2]: [item_sk#21, count(1)#61]
+Condition : (count(1)#61 > 4)
 
-(88) ReusedExchange [Reuses operator id: 29]
+(95) Project [codegen id : 34]
 Output [1]: [item_sk#21]
+Input [2]: [item_sk#21, count(1)#61]
 
-(89) Sort [codegen id : 37]
+(96) Sort [codegen id : 34]
 Input [1]: [item_sk#21]
 Arguments: [item_sk#21 ASC NULLS FIRST], false, 0
 
-(90) SortMergeJoin
-Left keys [1]: [ws_item_sk#54]
+(97) SortMergeJoin
+Left keys [1]: [ws_item_sk#52]
 Right keys [1]: [item_sk#21]
 Join condition: None
 
-(91) Project [codegen id : 38]
-Output [4]: [ws_sold_date_sk#53, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57]
-Input [5]: [ws_sold_date_sk#53, ws_item_sk#54, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57]
+(98) Project [codegen id : 35]
+Output [4]: [ws_sold_date_sk#51, ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55]
+Input [5]: [ws_sold_date_sk#51, ws_item_sk#52, ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55]
 
-(92) Exchange
-Input [4]: [ws_sold_date_sk#53, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57]
-Arguments: hashpartitioning(ws_bill_customer_sk#55, 5), true, [id=#59]
+(99) Exchange
+Input [4]: [ws_sold_date_sk#51, ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55]
+Arguments: hashpartitioning(ws_bill_customer_sk#53, 5), true, [id=#62]
 
-(93) Sort [codegen id : 39]
-Input [4]: [ws_sold_date_sk#53, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57]
-Arguments: [ws_bill_customer_sk#55 ASC NULLS FIRST], false, 0
+(100) Sort [codegen id : 36]
+Input [4]: [ws_sold_date_sk#51, ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55]
+Arguments: [ws_bill_customer_sk#53 ASC NULLS FIRST], false, 0
 
-(94) ReusedExchange [Reuses operator id: 38]
-Output [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
+(101) ReusedExchange [Reuses operator id: 37]
+Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 
-(95) Sort [codegen id : 41]
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0
+(102) Sort [codegen id : 38]
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Arguments: [ss_customer_sk#24 ASC NULLS FIRST], false, 0
 
-(96) ReusedExchange [Reuses operator id: 43]
-Output [1]: [c_customer_sk#29]
+(103) ReusedExchange [Reuses operator id: 42]
+Output [1]: [c_customer_sk#28]
 
-(97) Sort [codegen id : 43]
-Input [1]: [c_customer_sk#29]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
+(104) Sort [codegen id : 40]
+Input [1]: [c_customer_sk#28]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
 
-(98) SortMergeJoin [codegen id : 44]
-Left keys [1]: [ss_customer_sk#25]
-Right keys [1]: [c_customer_sk#29]
+(105) SortMergeJoin [codegen id : 41]
+Left keys [1]: [ss_customer_sk#24]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(99) Project [codegen id : 44]
-Output [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Input [4]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-
-(100) HashAggregate [codegen id : 44]
-Input [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#60, isEmpty#61]
-Results [3]: [c_customer_sk#29, sum#62, isEmpty#63]
-
-(101) HashAggregate [codegen id : 44]
-Input [3]: [c_customer_sk#29, sum#62, isEmpty#63]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#64]
-Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#64 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65]
-
-(102) Filter [codegen id : 44]
-Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true)))
-
-(103) Project [codegen id : 44]
-Output [1]: [c_customer_sk#29]
-Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65]
-
-(104) Sort [codegen id : 44]
-Input [1]: [c_customer_sk#29]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
-
-(105) SortMergeJoin
-Left keys [1]: [ws_bill_customer_sk#55]
-Right keys [1]: [c_customer_sk#29]
+(106) Project [codegen id : 41]
+Output [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+
+(107) HashAggregate [codegen id : 41]
+Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#63, isEmpty#64]
+Results [3]: [c_customer_sk#28, sum#65, isEmpty#66]
+
+(108) HashAggregate [codegen id : 41]
+Input [3]: [c_customer_sk#28, sum#65, isEmpty#66]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#67]
+Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#67 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#68]
+
+(109) Filter [codegen id : 41]
+Input [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#68]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#68) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#68 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#36, [id=#37] as decimal(32,6)))), DecimalType(38,8), true)))
+
+(110) Project [codegen id : 41]
+Output [1]: [c_customer_sk#28]
+Input [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#68]
+
+(111) Sort [codegen id : 41]
+Input [1]: [c_customer_sk#28]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
+
+(112) SortMergeJoin
+Left keys [1]: [ws_bill_customer_sk#53]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(106) ReusedExchange [Reuses operator id: 57]
+(113) ReusedExchange [Reuses operator id: 56]
 Output [1]: [d_date_sk#9]
 
-(107) BroadcastHashJoin [codegen id : 46]
-Left keys [1]: [ws_sold_date_sk#53]
+(114) BroadcastHashJoin [codegen id : 43]
+Left keys [1]: [ws_sold_date_sk#51]
 Right keys [1]: [d_date_sk#9]
 Join condition: None
 
-(108) Project [codegen id : 46]
-Output [3]: [ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57]
-Input [5]: [ws_sold_date_sk#53, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57, d_date_sk#9]
+(115) Project [codegen id : 43]
+Output [3]: [ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55]
+Input [5]: [ws_sold_date_sk#51, ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55, d_date_sk#9]
 
-(109) ReusedExchange [Reuses operator id: 63]
-Output [3]: [c_customer_sk#29, c_first_name#41, c_last_name#42]
+(116) ReusedExchange [Reuses operator id: 62]
+Output [3]: [c_customer_sk#28, c_first_name#40, c_last_name#41]
 
-(110) Sort [codegen id : 48]
-Input [3]: [c_customer_sk#29, c_first_name#41, c_last_name#42]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
+(117) Sort [codegen id : 45]
+Input [3]: [c_customer_sk#28, c_first_name#40, c_last_name#41]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
 
-(111) ReusedExchange [Reuses operator id: 38]
-Output [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
+(118) ReusedExchange [Reuses operator id: 37]
+Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 
-(112) Sort [codegen id : 50]
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0
+(119) Sort [codegen id : 47]
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Arguments: [ss_customer_sk#24 ASC NULLS FIRST], false, 0
 
-(113) ReusedExchange [Reuses operator id: 43]
-Output [1]: [c_customer_sk#29]
+(120) ReusedExchange [Reuses operator id: 42]
+Output [1]: [c_customer_sk#28]
 
-(114) Sort [codegen id : 52]
-Input [1]: [c_customer_sk#29]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
+(121) Sort [codegen id : 49]
+Input [1]: [c_customer_sk#28]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
 
-(115) SortMergeJoin [codegen id : 53]
-Left keys [1]: [ss_customer_sk#25]
-Right keys [1]: [c_customer_sk#29]
+(122) SortMergeJoin [codegen id : 50]
+Left keys [1]: [ss_customer_sk#24]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(116) Project [codegen id : 53]
-Output [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Input [4]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-
-(117) HashAggregate [codegen id : 53]
-Input [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#60, isEmpty#61]
-Results [3]: [c_customer_sk#29, sum#62, isEmpty#63]
-
-(118) HashAggregate [codegen id : 53]
-Input [3]: [c_customer_sk#29, sum#62, isEmpty#63]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#64]
-Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#64 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65]
-
-(119) Filter [codegen id : 53]
-Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true)))
-
-(120) Project [codegen id : 53]
-Output [1]: [c_customer_sk#29 AS c_customer_sk#29#66]
-Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65]
-
-(121) Exchange
-Input [1]: [c_customer_sk#29#66]
-Arguments: hashpartitioning(c_customer_sk#29#66, 5), true, [id=#67]
-
-(122) Sort [codegen id : 54]
-Input [1]: [c_customer_sk#29#66]
-Arguments: [c_customer_sk#29#66 ASC NULLS FIRST], false, 0
-
-(123) SortMergeJoin
-Left keys [1]: [c_customer_sk#29]
-Right keys [1]: [c_customer_sk#29#66]
+(123) Project [codegen id : 50]
+Output [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+
+(124) HashAggregate [codegen id : 50]
+Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#63, isEmpty#64]
+Results [3]: [c_customer_sk#28, sum#65, isEmpty#66]
+
+(125) HashAggregate [codegen id : 50]
+Input [3]: [c_customer_sk#28, sum#65, isEmpty#66]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#67]
+Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#67 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#68]
+
+(126) Filter [codegen id : 50]
+Input [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#68]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#68) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#68 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#36, [id=#37] as decimal(32,6)))), DecimalType(38,8), true)))
+
+(127) Project [codegen id : 50]
+Output [1]: [c_customer_sk#28 AS c_customer_sk#28#69]
+Input [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#68]
+
+(128) Sort [codegen id : 50]
+Input [1]: [c_customer_sk#28#69]
+Arguments: [c_customer_sk#28#69 ASC NULLS FIRST], false, 0
+
+(129) SortMergeJoin
+Left keys [1]: [c_customer_sk#28]
+Right keys [1]: [c_customer_sk#28#69]
 Join condition: None
 
-(124) SortMergeJoin [codegen id : 55]
-Left keys [1]: [ws_bill_customer_sk#55]
-Right keys [1]: [c_customer_sk#29]
+(130) SortMergeJoin [codegen id : 51]
+Left keys [1]: [ws_bill_customer_sk#53]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(125) Project [codegen id : 55]
-Output [4]: [ws_quantity#56, ws_list_price#57, c_first_name#41, c_last_name#42]
-Input [6]: [ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57, c_customer_sk#29, c_first_name#41, c_last_name#42]
+(131) Project [codegen id : 51]
+Output [4]: [ws_quantity#54, ws_list_price#55, c_first_name#40, c_last_name#41]
+Input [6]: [ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55, c_customer_sk#28, c_first_name#40, c_last_name#41]
 
-(126) HashAggregate [codegen id : 55]
-Input [4]: [ws_quantity#56, ws_list_price#57, c_first_name#41, c_last_name#42]
-Keys [2]: [c_last_name#42, c_first_name#41]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#56 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#57 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#68, isEmpty#69]
-Results [4]: [c_last_name#42, c_first_name#41, sum#70, isEmpty#71]
+(132) HashAggregate [codegen id : 51]
+Input [4]: [ws_quantity#54, ws_list_price#55, c_first_name#40, c_last_name#41]
+Keys [2]: [c_last_name#41, c_first_name#40]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#54 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#55 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#70, isEmpty#71]
+Results [4]: [c_last_name#41, c_first_name#40, sum#72, isEmpty#73]
 
-(127) Exchange
-Input [4]: [c_last_name#42, c_first_name#41, sum#70, isEmpty#71]
-Arguments: hashpartitioning(c_last_name#42, c_first_name#41, 5), true, [id=#72]
+(133) Exchange
+Input [4]: [c_last_name#41, c_first_name#40, sum#72, isEmpty#73]
+Arguments: hashpartitioning(c_last_name#41, c_first_name#40, 5), true, [id=#74]
 
-(128) HashAggregate [codegen id : 56]
-Input [4]: [c_last_name#42, c_first_name#41, sum#70, isEmpty#71]
-Keys [2]: [c_last_name#42, c_first_name#41]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#56 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#57 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#56 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#57 as decimal(12,2)))), DecimalType(18,2), true))#73]
-Results [3]: [c_last_name#42, c_first_name#41, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#56 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#57 as decimal(12,2)))), DecimalType(18,2), true))#73 AS sales#74]
+(134) HashAggregate [codegen id : 52]
+Input [4]: [c_last_name#41, c_first_name#40, sum#72, isEmpty#73]
+Keys [2]: [c_last_name#41, c_first_name#40]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#54 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#55 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#54 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#55 as decimal(12,2)))), DecimalType(18,2), true))#75]
+Results [3]: [c_last_name#41, c_first_name#40, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#54 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#55 as decimal(12,2)))), DecimalType(18,2), true))#75 AS sales#76]
 
-(129) Union
+(135) Union
 
-(130) TakeOrderedAndProject
-Input [3]: [c_last_name#42, c_first_name#41, sales#52]
-Arguments: 100, [c_last_name#42 ASC NULLS FIRST, c_first_name#41 ASC NULLS FIRST, sales#52 ASC NULLS FIRST], [c_last_name#42, c_first_name#41, sales#52]
+(136) TakeOrderedAndProject
+Input [3]: [c_last_name#41, c_first_name#40, sales#50]
+Arguments: 100, [c_last_name#41 ASC NULLS FIRST, c_first_name#40 ASC NULLS FIRST, sales#50 ASC NULLS FIRST], [c_last_name#41, c_first_name#40, sales#50]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 49 Hosting Expression = Subquery scalar-subquery#37, [id=#38]
-* HashAggregate (154)
-+- Exchange (153)
-   +- * HashAggregate (152)
-      +- * HashAggregate (151)
-         +- * HashAggregate (150)
-            +- * Project (149)
-               +- * SortMergeJoin Inner (148)
-                  :- * Sort (142)
-                  :  +- Exchange (141)
-                  :     +- * Project (140)
-                  :        +- * BroadcastHashJoin Inner BuildRight (139)
-                  :           :- * Filter (133)
-                  :           :  +- * ColumnarToRow (132)
-                  :           :     +- Scan parquet default.store_sales (131)
-                  :           +- BroadcastExchange (138)
-                  :              +- * Project (137)
-                  :                 +- * Filter (136)
-                  :                    +- * ColumnarToRow (135)
-                  :                       +- Scan parquet default.date_dim (134)
-                  +- * Sort (147)
-                     +- Exchange (146)
-                        +- * Filter (145)
-                           +- * ColumnarToRow (144)
-                              +- Scan parquet default.customer (143)
-
-
-(131) Scan parquet default.store_sales
-Output [4]: [ss_sold_date_sk#7, ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
+Subquery:1 Hosting operator id = 48 Hosting Expression = Subquery scalar-subquery#36, [id=#37]
+* HashAggregate (160)
++- Exchange (159)
+   +- * HashAggregate (158)
+      +- * HashAggregate (157)
+         +- * HashAggregate (156)
+            +- * Project (155)
+               +- * SortMergeJoin Inner (154)
+                  :- * Sort (148)
+                  :  +- Exchange (147)
+                  :     +- * Project (146)
+                  :        +- * BroadcastHashJoin Inner BuildRight (145)
+                  :           :- * Filter (139)
+                  :           :  +- * ColumnarToRow (138)
+                  :           :     +- Scan parquet default.store_sales (137)
+                  :           +- BroadcastExchange (144)
+                  :              +- * Project (143)
+                  :                 +- * Filter (142)
+                  :                    +- * ColumnarToRow (141)
+                  :                       +- Scan parquet default.date_dim (140)
+                  +- * Sort (153)
+                     +- Exchange (152)
+                        +- * Filter (151)
+                           +- * ColumnarToRow (150)
+                              +- Scan parquet default.customer (149)
+
+
+(137) Scan parquet default.store_sales
+Output [4]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
 PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)]
 ReadSchema: struct<ss_sold_date_sk:int,ss_customer_sk:int,ss_quantity:int,ss_sales_price:decimal(7,2)>
 
-(132) ColumnarToRow [codegen id : 2]
-Input [4]: [ss_sold_date_sk#7, ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
+(138) ColumnarToRow [codegen id : 2]
+Input [4]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 
-(133) Filter [codegen id : 2]
-Input [4]: [ss_sold_date_sk#7, ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Condition : (isnotnull(ss_customer_sk#25) AND isnotnull(ss_sold_date_sk#7))
+(139) Filter [codegen id : 2]
+Input [4]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Condition : (isnotnull(ss_customer_sk#24) AND isnotnull(ss_sold_date_sk#7))
 
-(134) Scan parquet default.date_dim
+(140) Scan parquet default.date_dim
 Output [2]: [d_date_sk#9, d_year#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(135) ColumnarToRow [codegen id : 1]
+(141) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#9, d_year#11]
 
-(136) Filter [codegen id : 1]
+(142) Filter [codegen id : 1]
 Input [2]: [d_date_sk#9, d_year#11]
 Condition : (d_year#11 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#9))
 
-(137) Project [codegen id : 1]
+(143) Project [codegen id : 1]
 Output [1]: [d_date_sk#9]
 Input [2]: [d_date_sk#9, d_year#11]
 
-(138) BroadcastExchange
+(144) BroadcastExchange
 Input [1]: [d_date_sk#9]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#75]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#77]
 
-(139) BroadcastHashJoin [codegen id : 2]
+(145) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#7]
 Right keys [1]: [d_date_sk#9]
 Join condition: None
 
-(140) Project [codegen id : 2]
-Output [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Input [5]: [ss_sold_date_sk#7, ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, d_date_sk#9]
+(146) Project [codegen id : 2]
+Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Input [5]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, d_date_sk#9]
 
-(141) Exchange
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Arguments: hashpartitioning(ss_customer_sk#25, 5), true, [id=#76]
+(147) Exchange
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Arguments: hashpartitioning(ss_customer_sk#24, 5), true, [id=#78]
 
-(142) Sort [codegen id : 3]
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0
+(148) Sort [codegen id : 3]
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Arguments: [ss_customer_sk#24 ASC NULLS FIRST], false, 0
 
-(143) Scan parquet default.customer
-Output [1]: [c_customer_sk#29]
+(149) Scan parquet default.customer
+Output [1]: [c_customer_sk#28]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
 PushedFilters: [IsNotNull(c_customer_sk)]
 ReadSchema: struct<c_customer_sk:int>
 
-(144) ColumnarToRow [codegen id : 4]
-Input [1]: [c_customer_sk#29]
+(150) ColumnarToRow [codegen id : 4]
+Input [1]: [c_customer_sk#28]
 
-(145) Filter [codegen id : 4]
-Input [1]: [c_customer_sk#29]
-Condition : isnotnull(c_customer_sk#29)
+(151) Filter [codegen id : 4]
+Input [1]: [c_customer_sk#28]
+Condition : isnotnull(c_customer_sk#28)
 
-(146) Exchange
-Input [1]: [c_customer_sk#29]
-Arguments: hashpartitioning(c_customer_sk#29, 5), true, [id=#77]
+(152) Exchange
+Input [1]: [c_customer_sk#28]
+Arguments: hashpartitioning(c_customer_sk#28, 5), true, [id=#79]
 
-(147) Sort [codegen id : 5]
-Input [1]: [c_customer_sk#29]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
+(153) Sort [codegen id : 5]
+Input [1]: [c_customer_sk#28]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
 
-(148) SortMergeJoin [codegen id : 6]
-Left keys [1]: [ss_customer_sk#25]
-Right keys [1]: [c_customer_sk#29]
+(154) SortMergeJoin [codegen id : 6]
+Left keys [1]: [ss_customer_sk#24]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(149) Project [codegen id : 6]
-Output [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Input [4]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-
-(150) HashAggregate [codegen id : 6]
-Input [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#78, isEmpty#79]
-Results [3]: [c_customer_sk#29, sum#80, isEmpty#81]
-
-(151) HashAggregate [codegen id : 6]
-Input [3]: [c_customer_sk#29, sum#80, isEmpty#81]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#82]
-Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#82 AS csales#83]
-
-(152) HashAggregate [codegen id : 6]
-Input [1]: [csales#83]
+(155) Project [codegen id : 6]
+Output [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+
+(156) HashAggregate [codegen id : 6]
+Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#80, isEmpty#81]
+Results [3]: [c_customer_sk#28, sum#82, isEmpty#83]
+
+(157) HashAggregate [codegen id : 6]
+Input [3]: [c_customer_sk#28, sum#82, isEmpty#83]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#84]
+Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#84 AS csales#85]
+
+(158) HashAggregate [codegen id : 6]
+Input [1]: [csales#85]
 Keys: []
-Functions [1]: [partial_max(csales#83)]
-Aggregate Attributes [1]: [max#84]
-Results [1]: [max#85]
+Functions [1]: [partial_max(csales#85)]
+Aggregate Attributes [1]: [max#86]
+Results [1]: [max#87]
 
-(153) Exchange
-Input [1]: [max#85]
-Arguments: SinglePartition, true, [id=#86]
+(159) Exchange
+Input [1]: [max#87]
+Arguments: SinglePartition, true, [id=#88]
 
-(154) HashAggregate [codegen id : 7]
-Input [1]: [max#85]
+(160) HashAggregate [codegen id : 7]
+Input [1]: [max#87]
 Keys: []
-Functions [1]: [max(csales#83)]
-Aggregate Attributes [1]: [max(csales#83)#87]
-Results [1]: [max(csales#83)#87 AS tpcds_cmax#88]
+Functions [1]: [max(csales#85)]
+Aggregate Attributes [1]: [max(csales#85)#89]
+Results [1]: [max(csales#85)#89 AS tpcds_cmax#90]
 
-Subquery:2 Hosting operator id = 73 Hosting Expression = ReusedSubquery Subquery scalar-subquery#37, [id=#38]
+Subquery:2 Hosting operator id = 72 Hosting Expression = ReusedSubquery Subquery scalar-subquery#36, [id=#37]
 
-Subquery:3 Hosting operator id = 102 Hosting Expression = ReusedSubquery Subquery scalar-subquery#37, [id=#38]
+Subquery:3 Hosting operator id = 109 Hosting Expression = ReusedSubquery Subquery scalar-subquery#36, [id=#37]
 
-Subquery:4 Hosting operator id = 119 Hosting Expression = ReusedSubquery Subquery scalar-subquery#37, [id=#38]
+Subquery:4 Hosting operator id = 126 Hosting Expression = ReusedSubquery Subquery scalar-subquery#36, [id=#37]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/simplified.txt
index e8891f032a091..4279bf3e16a82 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/simplified.txt
@@ -1,24 +1,24 @@
 TakeOrderedAndProject [c_last_name,c_first_name,sales]
   Union
-    WholeStageCodegen (28)
+    WholeStageCodegen (26)
       HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sales,sum,isEmpty]
         InputAdapter
           Exchange [c_last_name,c_first_name] #1
-            WholeStageCodegen (27)
+            WholeStageCodegen (25)
               HashAggregate [c_last_name,c_first_name,cs_quantity,cs_list_price] [sum,isEmpty,sum,isEmpty]
                 Project [cs_quantity,cs_list_price,c_first_name,c_last_name]
                   SortMergeJoin [cs_bill_customer_sk,c_customer_sk]
                     InputAdapter
-                      WholeStageCodegen (18)
+                      WholeStageCodegen (17)
                         Project [cs_bill_customer_sk,cs_quantity,cs_list_price]
                           BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                             InputAdapter
                               SortMergeJoin [cs_bill_customer_sk,c_customer_sk]
-                                WholeStageCodegen (11)
+                                WholeStageCodegen (10)
                                   Sort [cs_bill_customer_sk]
                                     InputAdapter
                                       Exchange [cs_bill_customer_sk] #2
-                                        WholeStageCodegen (10)
+                                        WholeStageCodegen (9)
                                           Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price]
                                             InputAdapter
                                               SortMergeJoin [cs_item_sk,item_sk]
@@ -31,48 +31,45 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                             ColumnarToRow
                                                               InputAdapter
                                                                 Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price]
-                                                WholeStageCodegen (9)
+                                                WholeStageCodegen (8)
                                                   Sort [item_sk]
-                                                    InputAdapter
-                                                      Exchange [item_sk] #4
-                                                        WholeStageCodegen (8)
-                                                          Project [item_sk]
-                                                            Filter [count(1)]
-                                                              HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count]
-                                                                HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count]
-                                                                  Project [d_date,i_item_sk,i_item_desc]
-                                                                    SortMergeJoin [ss_item_sk,i_item_sk]
+                                                    Project [item_sk]
+                                                      Filter [count(1)]
+                                                        HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count]
+                                                          HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count]
+                                                            Project [d_date,i_item_sk,i_item_desc]
+                                                              SortMergeJoin [ss_item_sk,i_item_sk]
+                                                                InputAdapter
+                                                                  WholeStageCodegen (5)
+                                                                    Sort [ss_item_sk]
                                                                       InputAdapter
-                                                                        WholeStageCodegen (5)
-                                                                          Sort [ss_item_sk]
-                                                                            InputAdapter
-                                                                              Exchange [ss_item_sk] #5
-                                                                                WholeStageCodegen (4)
-                                                                                  Project [ss_item_sk,d_date]
-                                                                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                      Filter [ss_sold_date_sk,ss_item_sk]
-                                                                                        ColumnarToRow
-                                                                                          InputAdapter
-                                                                                            Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk]
-                                                                                      InputAdapter
-                                                                                        BroadcastExchange #6
-                                                                                          WholeStageCodegen (3)
-                                                                                            Project [d_date_sk,d_date]
-                                                                                              Filter [d_year,d_date_sk]
-                                                                                                ColumnarToRow
-                                                                                                  InputAdapter
-                                                                                                    Scan parquet default.date_dim [d_date_sk,d_date,d_year]
+                                                                        Exchange [ss_item_sk] #4
+                                                                          WholeStageCodegen (4)
+                                                                            Project [ss_item_sk,d_date]
+                                                                              BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                                Filter [ss_sold_date_sk,ss_item_sk]
+                                                                                  ColumnarToRow
+                                                                                    InputAdapter
+                                                                                      Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk]
+                                                                                InputAdapter
+                                                                                  BroadcastExchange #5
+                                                                                    WholeStageCodegen (3)
+                                                                                      Project [d_date_sk,d_date]
+                                                                                        Filter [d_year,d_date_sk]
+                                                                                          ColumnarToRow
+                                                                                            InputAdapter
+                                                                                              Scan parquet default.date_dim [d_date_sk,d_date,d_year]
+                                                                InputAdapter
+                                                                  WholeStageCodegen (7)
+                                                                    Sort [i_item_sk]
                                                                       InputAdapter
-                                                                        WholeStageCodegen (7)
-                                                                          Sort [i_item_sk]
-                                                                            InputAdapter
-                                                                              Exchange [i_item_sk] #7
-                                                                                WholeStageCodegen (6)
-                                                                                  Filter [i_item_sk]
-                                                                                    ColumnarToRow
-                                                                                      InputAdapter
-                                                                                        Scan parquet default.item [i_item_sk,i_item_desc]
-                                WholeStageCodegen (16)
+                                                                        Exchange [i_item_sk] #6
+                                                                          WholeStageCodegen (6)
+                                                                            Filter [i_item_sk]
+                                                                              ColumnarToRow
+                                                                                InputAdapter
+                                                                                  Scan parquet default.item [i_item_sk,i_item_desc]
+                                WholeStageCodegen (15)
                                   Sort [c_customer_sk]
                                     Project [c_customer_sk]
                                       Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
@@ -80,7 +77,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                           WholeStageCodegen (7)
                                             HashAggregate [max] [max(csales),tpcds_cmax,max]
                                               InputAdapter
-                                                Exchange #10
+                                                Exchange #9
                                                   WholeStageCodegen (6)
                                                     HashAggregate [csales] [max,max]
                                                       HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty]
@@ -91,7 +88,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                                 WholeStageCodegen (3)
                                                                   Sort [ss_customer_sk]
                                                                     InputAdapter
-                                                                      Exchange [ss_customer_sk] #11
+                                                                      Exchange [ss_customer_sk] #10
                                                                         WholeStageCodegen (2)
                                                                           Project [ss_customer_sk,ss_quantity,ss_sales_price]
                                                                             BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
@@ -100,7 +97,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                                                   InputAdapter
                                                                                     Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price]
                                                                               InputAdapter
-                                                                                BroadcastExchange #12
+                                                                                BroadcastExchange #11
                                                                                   WholeStageCodegen (1)
                                                                                     Project [d_date_sk]
                                                                                       Filter [d_year,d_date_sk]
@@ -111,7 +108,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                                 WholeStageCodegen (5)
                                                                   Sort [c_customer_sk]
                                                                     InputAdapter
-                                                                      Exchange [c_customer_sk] #13
+                                                                      Exchange [c_customer_sk] #12
                                                                         WholeStageCodegen (4)
                                                                           Filter [c_customer_sk]
                                                                             ColumnarToRow
@@ -122,28 +119,28 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                             Project [ss_quantity,ss_sales_price,c_customer_sk]
                                               SortMergeJoin [ss_customer_sk,c_customer_sk]
                                                 InputAdapter
-                                                  WholeStageCodegen (13)
+                                                  WholeStageCodegen (12)
                                                     Sort [ss_customer_sk]
                                                       InputAdapter
-                                                        Exchange [ss_customer_sk] #8
-                                                          WholeStageCodegen (12)
+                                                        Exchange [ss_customer_sk] #7
+                                                          WholeStageCodegen (11)
                                                             Filter [ss_customer_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
                                                                   Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price]
                                                 InputAdapter
-                                                  WholeStageCodegen (15)
+                                                  WholeStageCodegen (14)
                                                     Sort [c_customer_sk]
                                                       InputAdapter
-                                                        Exchange [c_customer_sk] #9
-                                                          WholeStageCodegen (14)
+                                                        Exchange [c_customer_sk] #8
+                                                          WholeStageCodegen (13)
                                                             Filter [c_customer_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
                                                                   Scan parquet default.customer [c_customer_sk]
                             InputAdapter
-                              BroadcastExchange #14
-                                WholeStageCodegen (17)
+                              BroadcastExchange #13
+                                WholeStageCodegen (16)
                                   Project [d_date_sk]
                                     Filter [d_year,d_moy,d_date_sk]
                                       ColumnarToRow
@@ -151,73 +148,84 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                           Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
                     InputAdapter
                       SortMergeJoin [c_customer_sk,c_customer_sk]
-                        WholeStageCodegen (20)
+                        WholeStageCodegen (19)
                           Sort [c_customer_sk]
                             InputAdapter
-                              Exchange [c_customer_sk] #15
-                                WholeStageCodegen (19)
+                              Exchange [c_customer_sk] #14
+                                WholeStageCodegen (18)
                                   Filter [c_customer_sk]
                                     ColumnarToRow
                                       InputAdapter
                                         Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name]
-                        WholeStageCodegen (26)
+                        WholeStageCodegen (24)
                           Sort [c_customer_sk]
-                            InputAdapter
-                              Exchange [c_customer_sk] #16
-                                WholeStageCodegen (25)
-                                  Project [c_customer_sk]
-                                    Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                      ReusedSubquery [tpcds_cmax] #1
-                                      HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty]
-                                        HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
-                                          Project [ss_quantity,ss_sales_price,c_customer_sk]
-                                            SortMergeJoin [ss_customer_sk,c_customer_sk]
+                            Project [c_customer_sk]
+                              Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                ReusedSubquery [tpcds_cmax] #1
+                                HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty]
+                                  HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
+                                    Project [ss_quantity,ss_sales_price,c_customer_sk]
+                                      SortMergeJoin [ss_customer_sk,c_customer_sk]
+                                        InputAdapter
+                                          WholeStageCodegen (21)
+                                            Sort [ss_customer_sk]
                                               InputAdapter
-                                                WholeStageCodegen (22)
-                                                  Sort [ss_customer_sk]
-                                                    InputAdapter
-                                                      ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #8
+                                                ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #7
+                                        InputAdapter
+                                          WholeStageCodegen (23)
+                                            Sort [c_customer_sk]
                                               InputAdapter
-                                                WholeStageCodegen (24)
-                                                  Sort [c_customer_sk]
-                                                    InputAdapter
-                                                      ReusedExchange [c_customer_sk] #9
-    WholeStageCodegen (56)
+                                                ReusedExchange [c_customer_sk] #8
+    WholeStageCodegen (52)
       HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sales,sum,isEmpty]
         InputAdapter
-          Exchange [c_last_name,c_first_name] #17
-            WholeStageCodegen (55)
+          Exchange [c_last_name,c_first_name] #15
+            WholeStageCodegen (51)
               HashAggregate [c_last_name,c_first_name,ws_quantity,ws_list_price] [sum,isEmpty,sum,isEmpty]
                 Project [ws_quantity,ws_list_price,c_first_name,c_last_name]
                   SortMergeJoin [ws_bill_customer_sk,c_customer_sk]
                     InputAdapter
-                      WholeStageCodegen (46)
+                      WholeStageCodegen (43)
                         Project [ws_bill_customer_sk,ws_quantity,ws_list_price]
                           BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                             InputAdapter
                               SortMergeJoin [ws_bill_customer_sk,c_customer_sk]
-                                WholeStageCodegen (39)
+                                WholeStageCodegen (36)
                                   Sort [ws_bill_customer_sk]
                                     InputAdapter
-                                      Exchange [ws_bill_customer_sk] #18
-                                        WholeStageCodegen (38)
+                                      Exchange [ws_bill_customer_sk] #16
+                                        WholeStageCodegen (35)
                                           Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price]
                                             InputAdapter
                                               SortMergeJoin [ws_item_sk,item_sk]
-                                                WholeStageCodegen (30)
+                                                WholeStageCodegen (28)
                                                   Sort [ws_item_sk]
                                                     InputAdapter
-                                                      Exchange [ws_item_sk] #19
-                                                        WholeStageCodegen (29)
+                                                      Exchange [ws_item_sk] #17
+                                                        WholeStageCodegen (27)
                                                           Filter [ws_bill_customer_sk,ws_sold_date_sk]
                                                             ColumnarToRow
                                                               InputAdapter
                                                                 Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price]
-                                                WholeStageCodegen (37)
+                                                WholeStageCodegen (34)
                                                   Sort [item_sk]
-                                                    InputAdapter
-                                                      ReusedExchange [item_sk] #4
-                                WholeStageCodegen (44)
+                                                    Project [item_sk]
+                                                      Filter [count(1)]
+                                                        HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count]
+                                                          HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count]
+                                                            Project [d_date,i_item_sk,i_item_desc]
+                                                              SortMergeJoin [ss_item_sk,i_item_sk]
+                                                                InputAdapter
+                                                                  WholeStageCodegen (31)
+                                                                    Sort [ss_item_sk]
+                                                                      InputAdapter
+                                                                        ReusedExchange [ss_item_sk,d_date] #4
+                                                                InputAdapter
+                                                                  WholeStageCodegen (33)
+                                                                    Sort [i_item_sk]
+                                                                      InputAdapter
+                                                                        ReusedExchange [i_item_sk,i_item_desc] #6
+                                WholeStageCodegen (41)
                                   Sort [c_customer_sk]
                                     Project [c_customer_sk]
                                       Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
@@ -227,42 +235,39 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                             Project [ss_quantity,ss_sales_price,c_customer_sk]
                                               SortMergeJoin [ss_customer_sk,c_customer_sk]
                                                 InputAdapter
-                                                  WholeStageCodegen (41)
+                                                  WholeStageCodegen (38)
                                                     Sort [ss_customer_sk]
                                                       InputAdapter
-                                                        ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #8
+                                                        ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #7
                                                 InputAdapter
-                                                  WholeStageCodegen (43)
+                                                  WholeStageCodegen (40)
                                                     Sort [c_customer_sk]
                                                       InputAdapter
-                                                        ReusedExchange [c_customer_sk] #9
+                                                        ReusedExchange [c_customer_sk] #8
                             InputAdapter
-                              ReusedExchange [d_date_sk] #14
+                              ReusedExchange [d_date_sk] #13
                     InputAdapter
                       SortMergeJoin [c_customer_sk,c_customer_sk]
-                        WholeStageCodegen (48)
+                        WholeStageCodegen (45)
                           Sort [c_customer_sk]
                             InputAdapter
-                              ReusedExchange [c_customer_sk,c_first_name,c_last_name] #15
-                        WholeStageCodegen (54)
+                              ReusedExchange [c_customer_sk,c_first_name,c_last_name] #14
+                        WholeStageCodegen (50)
                           Sort [c_customer_sk]
-                            InputAdapter
-                              Exchange [c_customer_sk] #20
-                                WholeStageCodegen (53)
-                                  Project [c_customer_sk]
-                                    Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                      ReusedSubquery [tpcds_cmax] #1
-                                      HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty]
-                                        HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
-                                          Project [ss_quantity,ss_sales_price,c_customer_sk]
-                                            SortMergeJoin [ss_customer_sk,c_customer_sk]
+                            Project [c_customer_sk]
+                              Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                ReusedSubquery [tpcds_cmax] #1
+                                HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty]
+                                  HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
+                                    Project [ss_quantity,ss_sales_price,c_customer_sk]
+                                      SortMergeJoin [ss_customer_sk,c_customer_sk]
+                                        InputAdapter
+                                          WholeStageCodegen (47)
+                                            Sort [ss_customer_sk]
                                               InputAdapter
-                                                WholeStageCodegen (50)
-                                                  Sort [ss_customer_sk]
-                                                    InputAdapter
-                                                      ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #8
+                                                ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #7
+                                        InputAdapter
+                                          WholeStageCodegen (49)
+                                            Sort [c_customer_sk]
                                               InputAdapter
-                                                WholeStageCodegen (52)
-                                                  Sort [c_customer_sk]
-                                                    InputAdapter
-                                                      ReusedExchange [c_customer_sk] #9
+                                                ReusedExchange [c_customer_sk] #8
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt
index 92b9c26825e51..7465ddae84e8a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt
@@ -1,72 +1,71 @@
 == Physical Plan ==
-CollectLimit (68)
-+- * HashAggregate (67)
-   +- Exchange (66)
-      +- * HashAggregate (65)
-         +- * HashAggregate (64)
-            +- * HashAggregate (63)
-               +- * HashAggregate (62)
-                  +- * HashAggregate (61)
-                     +- * HashAggregate (60)
-                        +- Exchange (59)
-                           +- * HashAggregate (58)
-                              +- SortMergeJoin LeftSemi (57)
-                                 :- SortMergeJoin LeftSemi (39)
-                                 :  :- * Sort (21)
-                                 :  :  +- Exchange (20)
-                                 :  :     +- * Project (19)
-                                 :  :        +- * SortMergeJoin Inner (18)
-                                 :  :           :- * Sort (12)
-                                 :  :           :  +- Exchange (11)
-                                 :  :           :     +- * Project (10)
-                                 :  :           :        +- * BroadcastHashJoin Inner BuildRight (9)
-                                 :  :           :           :- * Filter (3)
-                                 :  :           :           :  +- * ColumnarToRow (2)
-                                 :  :           :           :     +- Scan parquet default.store_sales (1)
-                                 :  :           :           +- BroadcastExchange (8)
-                                 :  :           :              +- * Project (7)
-                                 :  :           :                 +- * Filter (6)
-                                 :  :           :                    +- * ColumnarToRow (5)
-                                 :  :           :                       +- Scan parquet default.date_dim (4)
-                                 :  :           +- * Sort (17)
-                                 :  :              +- Exchange (16)
-                                 :  :                 +- * Filter (15)
-                                 :  :                    +- * ColumnarToRow (14)
-                                 :  :                       +- Scan parquet default.customer (13)
-                                 :  +- * Sort (38)
-                                 :     +- Exchange (37)
-                                 :        +- * HashAggregate (36)
-                                 :           +- Exchange (35)
-                                 :              +- * HashAggregate (34)
-                                 :                 +- * Project (33)
-                                 :                    +- * SortMergeJoin Inner (32)
-                                 :                       :- * Sort (29)
-                                 :                       :  +- Exchange (28)
-                                 :                       :     +- * Project (27)
-                                 :                       :        +- * BroadcastHashJoin Inner BuildRight (26)
-                                 :                       :           :- * Filter (24)
-                                 :                       :           :  +- * ColumnarToRow (23)
-                                 :                       :           :     +- Scan parquet default.catalog_sales (22)
-                                 :                       :           +- ReusedExchange (25)
-                                 :                       +- * Sort (31)
-                                 :                          +- ReusedExchange (30)
-                                 +- * Sort (56)
-                                    +- Exchange (55)
-                                       +- * HashAggregate (54)
-                                          +- Exchange (53)
-                                             +- * HashAggregate (52)
-                                                +- * Project (51)
-                                                   +- * SortMergeJoin Inner (50)
-                                                      :- * Sort (47)
-                                                      :  +- Exchange (46)
-                                                      :     +- * Project (45)
-                                                      :        +- * BroadcastHashJoin Inner BuildRight (44)
-                                                      :           :- * Filter (42)
-                                                      :           :  +- * ColumnarToRow (41)
-                                                      :           :     +- Scan parquet default.web_sales (40)
-                                                      :           +- ReusedExchange (43)
-                                                      +- * Sort (49)
-                                                         +- ReusedExchange (48)
+* HashAggregate (67)
++- Exchange (66)
+   +- * HashAggregate (65)
+      +- * HashAggregate (64)
+         +- * HashAggregate (63)
+            +- * HashAggregate (62)
+               +- * HashAggregate (61)
+                  +- * HashAggregate (60)
+                     +- Exchange (59)
+                        +- * HashAggregate (58)
+                           +- SortMergeJoin LeftSemi (57)
+                              :- SortMergeJoin LeftSemi (39)
+                              :  :- * Sort (21)
+                              :  :  +- Exchange (20)
+                              :  :     +- * Project (19)
+                              :  :        +- * SortMergeJoin Inner (18)
+                              :  :           :- * Sort (12)
+                              :  :           :  +- Exchange (11)
+                              :  :           :     +- * Project (10)
+                              :  :           :        +- * BroadcastHashJoin Inner BuildRight (9)
+                              :  :           :           :- * Filter (3)
+                              :  :           :           :  +- * ColumnarToRow (2)
+                              :  :           :           :     +- Scan parquet default.store_sales (1)
+                              :  :           :           +- BroadcastExchange (8)
+                              :  :           :              +- * Project (7)
+                              :  :           :                 +- * Filter (6)
+                              :  :           :                    +- * ColumnarToRow (5)
+                              :  :           :                       +- Scan parquet default.date_dim (4)
+                              :  :           +- * Sort (17)
+                              :  :              +- Exchange (16)
+                              :  :                 +- * Filter (15)
+                              :  :                    +- * ColumnarToRow (14)
+                              :  :                       +- Scan parquet default.customer (13)
+                              :  +- * Sort (38)
+                              :     +- Exchange (37)
+                              :        +- * HashAggregate (36)
+                              :           +- Exchange (35)
+                              :              +- * HashAggregate (34)
+                              :                 +- * Project (33)
+                              :                    +- * SortMergeJoin Inner (32)
+                              :                       :- * Sort (29)
+                              :                       :  +- Exchange (28)
+                              :                       :     +- * Project (27)
+                              :                       :        +- * BroadcastHashJoin Inner BuildRight (26)
+                              :                       :           :- * Filter (24)
+                              :                       :           :  +- * ColumnarToRow (23)
+                              :                       :           :     +- Scan parquet default.catalog_sales (22)
+                              :                       :           +- ReusedExchange (25)
+                              :                       +- * Sort (31)
+                              :                          +- ReusedExchange (30)
+                              +- * Sort (56)
+                                 +- Exchange (55)
+                                    +- * HashAggregate (54)
+                                       +- Exchange (53)
+                                          +- * HashAggregate (52)
+                                             +- * Project (51)
+                                                +- * SortMergeJoin Inner (50)
+                                                   :- * Sort (47)
+                                                   :  +- Exchange (46)
+                                                   :     +- * Project (45)
+                                                   :        +- * BroadcastHashJoin Inner BuildRight (44)
+                                                   :           :- * Filter (42)
+                                                   :           :  +- * ColumnarToRow (41)
+                                                   :           :     +- Scan parquet default.web_sales (40)
+                                                   :           +- ReusedExchange (43)
+                                                   +- * Sort (49)
+                                                      +- ReusedExchange (48)
 
 
 (1) Scan parquet default.store_sales
@@ -387,7 +386,3 @@ Functions [1]: [count(1)]
 Aggregate Attributes [1]: [count(1)#37]
 Results [1]: [count(1)#37 AS count(1)#38]
 
-(68) CollectLimit
-Input [1]: [count(1)#38]
-Arguments: 100
-
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt
index 5bcd7dbb93022..8dd59340cf069 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt
@@ -1,118 +1,117 @@
-CollectLimit
-  WholeStageCodegen (26)
-    HashAggregate [count] [count(1),count(1),count]
-      InputAdapter
-        Exchange #1
-          WholeStageCodegen (25)
-            HashAggregate [count,count]
+WholeStageCodegen (26)
+  HashAggregate [count] [count(1),count(1),count]
+    InputAdapter
+      Exchange #1
+        WholeStageCodegen (25)
+          HashAggregate [count,count]
+            HashAggregate [c_last_name,c_first_name,d_date]
               HashAggregate [c_last_name,c_first_name,d_date]
                 HashAggregate [c_last_name,c_first_name,d_date]
                   HashAggregate [c_last_name,c_first_name,d_date]
                     HashAggregate [c_last_name,c_first_name,d_date]
-                      HashAggregate [c_last_name,c_first_name,d_date]
-                        InputAdapter
-                          Exchange [c_last_name,c_first_name,d_date] #2
-                            WholeStageCodegen (24)
-                              HashAggregate [c_last_name,c_first_name,d_date]
-                                InputAdapter
+                      InputAdapter
+                        Exchange [c_last_name,c_first_name,d_date] #2
+                          WholeStageCodegen (24)
+                            HashAggregate [c_last_name,c_first_name,d_date]
+                              InputAdapter
+                                SortMergeJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date]
                                   SortMergeJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date]
-                                    SortMergeJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date]
-                                      WholeStageCodegen (7)
-                                        Sort [c_last_name,c_first_name,d_date]
-                                          InputAdapter
-                                            Exchange [c_last_name,c_first_name,d_date] #3
-                                              WholeStageCodegen (6)
-                                                Project [d_date,c_first_name,c_last_name]
-                                                  SortMergeJoin [ss_customer_sk,c_customer_sk]
-                                                    InputAdapter
-                                                      WholeStageCodegen (3)
-                                                        Sort [ss_customer_sk]
-                                                          InputAdapter
-                                                            Exchange [ss_customer_sk] #4
-                                                              WholeStageCodegen (2)
-                                                                Project [ss_customer_sk,d_date]
-                                                                  BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                    Filter [ss_sold_date_sk,ss_customer_sk]
-                                                                      ColumnarToRow
-                                                                        InputAdapter
-                                                                          Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk]
-                                                                    InputAdapter
-                                                                      BroadcastExchange #5
-                                                                        WholeStageCodegen (1)
-                                                                          Project [d_date_sk,d_date]
-                                                                            Filter [d_month_seq,d_date_sk]
-                                                                              ColumnarToRow
-                                                                                InputAdapter
-                                                                                  Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq]
-                                                    InputAdapter
-                                                      WholeStageCodegen (5)
-                                                        Sort [c_customer_sk]
-                                                          InputAdapter
-                                                            Exchange [c_customer_sk] #6
-                                                              WholeStageCodegen (4)
-                                                                Filter [c_customer_sk]
-                                                                  ColumnarToRow
-                                                                    InputAdapter
-                                                                      Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name]
-                                      WholeStageCodegen (15)
-                                        Sort [c_last_name,c_first_name,d_date]
-                                          InputAdapter
-                                            Exchange [c_last_name,c_first_name,d_date] #7
-                                              WholeStageCodegen (14)
-                                                HashAggregate [c_last_name,c_first_name,d_date]
+                                    WholeStageCodegen (7)
+                                      Sort [c_last_name,c_first_name,d_date]
+                                        InputAdapter
+                                          Exchange [c_last_name,c_first_name,d_date] #3
+                                            WholeStageCodegen (6)
+                                              Project [d_date,c_first_name,c_last_name]
+                                                SortMergeJoin [ss_customer_sk,c_customer_sk]
                                                   InputAdapter
-                                                    Exchange [c_last_name,c_first_name,d_date] #8
-                                                      WholeStageCodegen (13)
-                                                        HashAggregate [c_last_name,c_first_name,d_date]
-                                                          Project [c_last_name,c_first_name,d_date]
-                                                            SortMergeJoin [cs_bill_customer_sk,c_customer_sk]
-                                                              InputAdapter
-                                                                WholeStageCodegen (10)
-                                                                  Sort [cs_bill_customer_sk]
-                                                                    InputAdapter
-                                                                      Exchange [cs_bill_customer_sk] #9
-                                                                        WholeStageCodegen (9)
-                                                                          Project [cs_bill_customer_sk,d_date]
-                                                                            BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                              Filter [cs_sold_date_sk,cs_bill_customer_sk]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk]
+                                                    WholeStageCodegen (3)
+                                                      Sort [ss_customer_sk]
+                                                        InputAdapter
+                                                          Exchange [ss_customer_sk] #4
+                                                            WholeStageCodegen (2)
+                                                              Project [ss_customer_sk,d_date]
+                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                  Filter [ss_sold_date_sk,ss_customer_sk]
+                                                                    ColumnarToRow
+                                                                      InputAdapter
+                                                                        Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk]
+                                                                  InputAdapter
+                                                                    BroadcastExchange #5
+                                                                      WholeStageCodegen (1)
+                                                                        Project [d_date_sk,d_date]
+                                                                          Filter [d_month_seq,d_date_sk]
+                                                                            ColumnarToRow
                                                                               InputAdapter
-                                                                                ReusedExchange [d_date_sk,d_date] #5
-                                                              InputAdapter
-                                                                WholeStageCodegen (12)
-                                                                  Sort [c_customer_sk]
-                                                                    InputAdapter
-                                                                      ReusedExchange [c_customer_sk,c_first_name,c_last_name] #6
-                                    WholeStageCodegen (23)
+                                                                                Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq]
+                                                  InputAdapter
+                                                    WholeStageCodegen (5)
+                                                      Sort [c_customer_sk]
+                                                        InputAdapter
+                                                          Exchange [c_customer_sk] #6
+                                                            WholeStageCodegen (4)
+                                                              Filter [c_customer_sk]
+                                                                ColumnarToRow
+                                                                  InputAdapter
+                                                                    Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name]
+                                    WholeStageCodegen (15)
                                       Sort [c_last_name,c_first_name,d_date]
                                         InputAdapter
-                                          Exchange [c_last_name,c_first_name,d_date] #10
-                                            WholeStageCodegen (22)
+                                          Exchange [c_last_name,c_first_name,d_date] #7
+                                            WholeStageCodegen (14)
                                               HashAggregate [c_last_name,c_first_name,d_date]
                                                 InputAdapter
-                                                  Exchange [c_last_name,c_first_name,d_date] #11
-                                                    WholeStageCodegen (21)
+                                                  Exchange [c_last_name,c_first_name,d_date] #8
+                                                    WholeStageCodegen (13)
                                                       HashAggregate [c_last_name,c_first_name,d_date]
                                                         Project [c_last_name,c_first_name,d_date]
-                                                          SortMergeJoin [ws_bill_customer_sk,c_customer_sk]
+                                                          SortMergeJoin [cs_bill_customer_sk,c_customer_sk]
                                                             InputAdapter
-                                                              WholeStageCodegen (18)
-                                                                Sort [ws_bill_customer_sk]
+                                                              WholeStageCodegen (10)
+                                                                Sort [cs_bill_customer_sk]
                                                                   InputAdapter
-                                                                    Exchange [ws_bill_customer_sk] #12
-                                                                      WholeStageCodegen (17)
-                                                                        Project [ws_bill_customer_sk,d_date]
-                                                                          BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                            Filter [ws_sold_date_sk,ws_bill_customer_sk]
+                                                                    Exchange [cs_bill_customer_sk] #9
+                                                                      WholeStageCodegen (9)
+                                                                        Project [cs_bill_customer_sk,d_date]
+                                                                          BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                            Filter [cs_sold_date_sk,cs_bill_customer_sk]
                                                                               ColumnarToRow
                                                                                 InputAdapter
-                                                                                  Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk]
+                                                                                  Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk]
                                                                             InputAdapter
                                                                               ReusedExchange [d_date_sk,d_date] #5
                                                             InputAdapter
-                                                              WholeStageCodegen (20)
+                                                              WholeStageCodegen (12)
                                                                 Sort [c_customer_sk]
                                                                   InputAdapter
                                                                     ReusedExchange [c_customer_sk,c_first_name,c_last_name] #6
+                                  WholeStageCodegen (23)
+                                    Sort [c_last_name,c_first_name,d_date]
+                                      InputAdapter
+                                        Exchange [c_last_name,c_first_name,d_date] #10
+                                          WholeStageCodegen (22)
+                                            HashAggregate [c_last_name,c_first_name,d_date]
+                                              InputAdapter
+                                                Exchange [c_last_name,c_first_name,d_date] #11
+                                                  WholeStageCodegen (21)
+                                                    HashAggregate [c_last_name,c_first_name,d_date]
+                                                      Project [c_last_name,c_first_name,d_date]
+                                                        SortMergeJoin [ws_bill_customer_sk,c_customer_sk]
+                                                          InputAdapter
+                                                            WholeStageCodegen (18)
+                                                              Sort [ws_bill_customer_sk]
+                                                                InputAdapter
+                                                                  Exchange [ws_bill_customer_sk] #12
+                                                                    WholeStageCodegen (17)
+                                                                      Project [ws_bill_customer_sk,d_date]
+                                                                        BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                                          Filter [ws_sold_date_sk,ws_bill_customer_sk]
+                                                                            ColumnarToRow
+                                                                              InputAdapter
+                                                                                Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk]
+                                                                          InputAdapter
+                                                                            ReusedExchange [d_date_sk,d_date] #5
+                                                          InputAdapter
+                                                            WholeStageCodegen (20)
+                                                              Sort [c_customer_sk]
+                                                                InputAdapter
+                                                                  ReusedExchange [c_customer_sk,c_first_name,c_last_name] #6
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt
index 09ab60c7cf651..74454cf32afd0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt
@@ -1,59 +1,58 @@
 == Physical Plan ==
-CollectLimit (55)
-+- * HashAggregate (54)
-   +- Exchange (53)
-      +- * HashAggregate (52)
-         +- * HashAggregate (51)
-            +- * HashAggregate (50)
-               +- * HashAggregate (49)
-                  +- * HashAggregate (48)
-                     +- * HashAggregate (47)
-                        +- Exchange (46)
-                           +- * HashAggregate (45)
-                              +- * BroadcastHashJoin LeftSemi BuildRight (44)
-                                 :- * BroadcastHashJoin LeftSemi BuildRight (30)
-                                 :  :- * Project (16)
-                                 :  :  +- * BroadcastHashJoin Inner BuildRight (15)
-                                 :  :     :- * Project (10)
-                                 :  :     :  +- * BroadcastHashJoin Inner BuildRight (9)
-                                 :  :     :     :- * Filter (3)
-                                 :  :     :     :  +- * ColumnarToRow (2)
-                                 :  :     :     :     +- Scan parquet default.store_sales (1)
-                                 :  :     :     +- BroadcastExchange (8)
-                                 :  :     :        +- * Project (7)
-                                 :  :     :           +- * Filter (6)
-                                 :  :     :              +- * ColumnarToRow (5)
-                                 :  :     :                 +- Scan parquet default.date_dim (4)
-                                 :  :     +- BroadcastExchange (14)
-                                 :  :        +- * Filter (13)
-                                 :  :           +- * ColumnarToRow (12)
-                                 :  :              +- Scan parquet default.customer (11)
-                                 :  +- BroadcastExchange (29)
-                                 :     +- * HashAggregate (28)
-                                 :        +- Exchange (27)
-                                 :           +- * HashAggregate (26)
-                                 :              +- * Project (25)
-                                 :                 +- * BroadcastHashJoin Inner BuildRight (24)
-                                 :                    :- * Project (22)
-                                 :                    :  +- * BroadcastHashJoin Inner BuildRight (21)
-                                 :                    :     :- * Filter (19)
-                                 :                    :     :  +- * ColumnarToRow (18)
-                                 :                    :     :     +- Scan parquet default.catalog_sales (17)
-                                 :                    :     +- ReusedExchange (20)
-                                 :                    +- ReusedExchange (23)
-                                 +- BroadcastExchange (43)
-                                    +- * HashAggregate (42)
-                                       +- Exchange (41)
-                                          +- * HashAggregate (40)
-                                             +- * Project (39)
-                                                +- * BroadcastHashJoin Inner BuildRight (38)
-                                                   :- * Project (36)
-                                                   :  +- * BroadcastHashJoin Inner BuildRight (35)
-                                                   :     :- * Filter (33)
-                                                   :     :  +- * ColumnarToRow (32)
-                                                   :     :     +- Scan parquet default.web_sales (31)
-                                                   :     +- ReusedExchange (34)
-                                                   +- ReusedExchange (37)
+* HashAggregate (54)
++- Exchange (53)
+   +- * HashAggregate (52)
+      +- * HashAggregate (51)
+         +- * HashAggregate (50)
+            +- * HashAggregate (49)
+               +- * HashAggregate (48)
+                  +- * HashAggregate (47)
+                     +- Exchange (46)
+                        +- * HashAggregate (45)
+                           +- * BroadcastHashJoin LeftSemi BuildRight (44)
+                              :- * BroadcastHashJoin LeftSemi BuildRight (30)
+                              :  :- * Project (16)
+                              :  :  +- * BroadcastHashJoin Inner BuildRight (15)
+                              :  :     :- * Project (10)
+                              :  :     :  +- * BroadcastHashJoin Inner BuildRight (9)
+                              :  :     :     :- * Filter (3)
+                              :  :     :     :  +- * ColumnarToRow (2)
+                              :  :     :     :     +- Scan parquet default.store_sales (1)
+                              :  :     :     +- BroadcastExchange (8)
+                              :  :     :        +- * Project (7)
+                              :  :     :           +- * Filter (6)
+                              :  :     :              +- * ColumnarToRow (5)
+                              :  :     :                 +- Scan parquet default.date_dim (4)
+                              :  :     +- BroadcastExchange (14)
+                              :  :        +- * Filter (13)
+                              :  :           +- * ColumnarToRow (12)
+                              :  :              +- Scan parquet default.customer (11)
+                              :  +- BroadcastExchange (29)
+                              :     +- * HashAggregate (28)
+                              :        +- Exchange (27)
+                              :           +- * HashAggregate (26)
+                              :              +- * Project (25)
+                              :                 +- * BroadcastHashJoin Inner BuildRight (24)
+                              :                    :- * Project (22)
+                              :                    :  +- * BroadcastHashJoin Inner BuildRight (21)
+                              :                    :     :- * Filter (19)
+                              :                    :     :  +- * ColumnarToRow (18)
+                              :                    :     :     +- Scan parquet default.catalog_sales (17)
+                              :                    :     +- ReusedExchange (20)
+                              :                    +- ReusedExchange (23)
+                              +- BroadcastExchange (43)
+                                 +- * HashAggregate (42)
+                                    +- Exchange (41)
+                                       +- * HashAggregate (40)
+                                          +- * Project (39)
+                                             +- * BroadcastHashJoin Inner BuildRight (38)
+                                                :- * Project (36)
+                                                :  +- * BroadcastHashJoin Inner BuildRight (35)
+                                                :     :- * Filter (33)
+                                                :     :  +- * ColumnarToRow (32)
+                                                :     :     +- Scan parquet default.web_sales (31)
+                                                :     +- ReusedExchange (34)
+                                                +- ReusedExchange (37)
 
 
 (1) Scan parquet default.store_sales
@@ -322,7 +321,3 @@ Functions [1]: [count(1)]
 Aggregate Attributes [1]: [count(1)#33]
 Results [1]: [count(1)#33 AS count(1)#34]
 
-(55) CollectLimit
-Input [1]: [count(1)#34]
-Arguments: 100
-
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt
index 10a2166ce761d..a5b57a4ac9450 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt
@@ -1,81 +1,80 @@
-CollectLimit
-  WholeStageCodegen (13)
-    HashAggregate [count] [count(1),count(1),count]
-      InputAdapter
-        Exchange #1
-          WholeStageCodegen (12)
-            HashAggregate [count,count]
+WholeStageCodegen (13)
+  HashAggregate [count] [count(1),count(1),count]
+    InputAdapter
+      Exchange #1
+        WholeStageCodegen (12)
+          HashAggregate [count,count]
+            HashAggregate [c_last_name,c_first_name,d_date]
               HashAggregate [c_last_name,c_first_name,d_date]
                 HashAggregate [c_last_name,c_first_name,d_date]
                   HashAggregate [c_last_name,c_first_name,d_date]
                     HashAggregate [c_last_name,c_first_name,d_date]
-                      HashAggregate [c_last_name,c_first_name,d_date]
-                        InputAdapter
-                          Exchange [c_last_name,c_first_name,d_date] #2
-                            WholeStageCodegen (11)
-                              HashAggregate [c_last_name,c_first_name,d_date]
+                      InputAdapter
+                        Exchange [c_last_name,c_first_name,d_date] #2
+                          WholeStageCodegen (11)
+                            HashAggregate [c_last_name,c_first_name,d_date]
+                              BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date]
                                 BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date]
-                                  BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date]
-                                    Project [d_date,c_first_name,c_last_name]
-                                      BroadcastHashJoin [ss_customer_sk,c_customer_sk]
-                                        Project [ss_customer_sk,d_date]
-                                          BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                            Filter [ss_sold_date_sk,ss_customer_sk]
+                                  Project [d_date,c_first_name,c_last_name]
+                                    BroadcastHashJoin [ss_customer_sk,c_customer_sk]
+                                      Project [ss_customer_sk,d_date]
+                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                          Filter [ss_sold_date_sk,ss_customer_sk]
+                                            ColumnarToRow
+                                              InputAdapter
+                                                Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk]
+                                          InputAdapter
+                                            BroadcastExchange #3
+                                              WholeStageCodegen (1)
+                                                Project [d_date_sk,d_date]
+                                                  Filter [d_month_seq,d_date_sk]
+                                                    ColumnarToRow
+                                                      InputAdapter
+                                                        Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq]
+                                      InputAdapter
+                                        BroadcastExchange #4
+                                          WholeStageCodegen (2)
+                                            Filter [c_customer_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk]
-                                            InputAdapter
-                                              BroadcastExchange #3
-                                                WholeStageCodegen (1)
-                                                  Project [d_date_sk,d_date]
-                                                    Filter [d_month_seq,d_date_sk]
-                                                      ColumnarToRow
-                                                        InputAdapter
-                                                          Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq]
-                                        InputAdapter
-                                          BroadcastExchange #4
-                                            WholeStageCodegen (2)
-                                              Filter [c_customer_sk]
-                                                ColumnarToRow
-                                                  InputAdapter
-                                                    Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name]
-                                    InputAdapter
-                                      BroadcastExchange #5
-                                        WholeStageCodegen (6)
-                                          HashAggregate [c_last_name,c_first_name,d_date]
-                                            InputAdapter
-                                              Exchange [c_last_name,c_first_name,d_date] #6
-                                                WholeStageCodegen (5)
-                                                  HashAggregate [c_last_name,c_first_name,d_date]
-                                                    Project [c_last_name,c_first_name,d_date]
-                                                      BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk]
-                                                        Project [cs_bill_customer_sk,d_date]
-                                                          BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                            Filter [cs_sold_date_sk,cs_bill_customer_sk]
-                                                              ColumnarToRow
-                                                                InputAdapter
-                                                                  Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk]
-                                                            InputAdapter
-                                                              ReusedExchange [d_date_sk,d_date] #3
-                                                        InputAdapter
-                                                          ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4
+                                                  Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name]
                                   InputAdapter
-                                    BroadcastExchange #7
-                                      WholeStageCodegen (10)
+                                    BroadcastExchange #5
+                                      WholeStageCodegen (6)
                                         HashAggregate [c_last_name,c_first_name,d_date]
                                           InputAdapter
-                                            Exchange [c_last_name,c_first_name,d_date] #8
-                                              WholeStageCodegen (9)
+                                            Exchange [c_last_name,c_first_name,d_date] #6
+                                              WholeStageCodegen (5)
                                                 HashAggregate [c_last_name,c_first_name,d_date]
                                                   Project [c_last_name,c_first_name,d_date]
-                                                    BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk]
-                                                      Project [ws_bill_customer_sk,d_date]
-                                                        BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                          Filter [ws_sold_date_sk,ws_bill_customer_sk]
+                                                    BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk]
+                                                      Project [cs_bill_customer_sk,d_date]
+                                                        BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                          Filter [cs_sold_date_sk,cs_bill_customer_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk]
+                                                                Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk]
                                                           InputAdapter
                                                             ReusedExchange [d_date_sk,d_date] #3
                                                       InputAdapter
                                                         ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4
+                                InputAdapter
+                                  BroadcastExchange #7
+                                    WholeStageCodegen (10)
+                                      HashAggregate [c_last_name,c_first_name,d_date]
+                                        InputAdapter
+                                          Exchange [c_last_name,c_first_name,d_date] #8
+                                            WholeStageCodegen (9)
+                                              HashAggregate [c_last_name,c_first_name,d_date]
+                                                Project [c_last_name,c_first_name,d_date]
+                                                  BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk]
+                                                    Project [ws_bill_customer_sk,d_date]
+                                                      BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                        Filter [ws_sold_date_sk,ws_bill_customer_sk]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk]
+                                                        InputAdapter
+                                                          ReusedExchange [d_date_sk,d_date] #3
+                                                    InputAdapter
+                                                      ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt
index dc4665185b014..99459bfe9a049 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt
@@ -1,5 +1,5 @@
 == Physical Plan ==
-TakeOrderedAndProject (34)
+* Sort (34)
 +- * HashAggregate (33)
    +- Exchange (32)
       +- * HashAggregate (31)
@@ -190,7 +190,7 @@ Functions [1]: [sum(UnscaledValue(ws_ext_discount_amt#6))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_discount_amt#6))#22]
 Results [1]: [MakeDecimal(sum(UnscaledValue(ws_ext_discount_amt#6))#22,17,2) AS Excess Discount Amount #23]
 
-(34) TakeOrderedAndProject
+(34) Sort [codegen id : 7]
 Input [1]: [Excess Discount Amount #23]
-Arguments: 100, [Excess Discount Amount #23 ASC NULLS FIRST], [Excess Discount Amount #23]
+Arguments: [Excess Discount Amount #23 ASC NULLS FIRST], true, 0
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt
index 7fd1cd3637a09..0721155286d17 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt
@@ -1,5 +1,5 @@
-TakeOrderedAndProject [Excess Discount Amount ]
-  WholeStageCodegen (7)
+WholeStageCodegen (7)
+  Sort [Excess Discount Amount ]
     HashAggregate [sum] [sum(UnscaledValue(ws_ext_discount_amt)),Excess Discount Amount ,sum]
       InputAdapter
         Exchange #1
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt
index b17a48db8baac..8a441392f4165 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt
@@ -1,5 +1,5 @@
 == Physical Plan ==
-TakeOrderedAndProject (34)
+* Sort (34)
 +- * HashAggregate (33)
    +- Exchange (32)
       +- * HashAggregate (31)
@@ -190,7 +190,7 @@ Functions [1]: [sum(UnscaledValue(ws_ext_discount_amt#3))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_discount_amt#3))#22]
 Results [1]: [MakeDecimal(sum(UnscaledValue(ws_ext_discount_amt#3))#22,17,2) AS Excess Discount Amount #23]
 
-(34) TakeOrderedAndProject
+(34) Sort [codegen id : 7]
 Input [1]: [Excess Discount Amount #23]
-Arguments: 100, [Excess Discount Amount #23 ASC NULLS FIRST], [Excess Discount Amount #23]
+Arguments: [Excess Discount Amount #23 ASC NULLS FIRST], true, 0
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt
index 652b2e36cf781..1f24a7c964f20 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt
@@ -1,5 +1,5 @@
-TakeOrderedAndProject [Excess Discount Amount ]
-  WholeStageCodegen (7)
+WholeStageCodegen (7)
+  Sort [Excess Discount Amount ]
     HashAggregate [sum] [sum(UnscaledValue(ws_ext_discount_amt)),Excess Discount Amount ,sum]
       InputAdapter
         Exchange #1
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt
index 7720d9dee4170..43390c5048a6d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt
@@ -1,5 +1,5 @@
 == Physical Plan ==
-TakeOrderedAndProject (47)
+* Sort (47)
 +- * HashAggregate (46)
    +- Exchange (45)
       +- * HashAggregate (44)
@@ -259,7 +259,7 @@ Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#6)), sum(UnscaledValue(ws_net
 Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#24, sum(UnscaledValue(ws_net_profit#7))#25, count(ws_order_number#5)#29]
 Results [3]: [count(ws_order_number#5)#29 AS order count #32, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#24,17,2) AS total shipping cost #33, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#25,17,2) AS total net profit #34]
 
-(47) TakeOrderedAndProject
+(47) Sort [codegen id : 14]
 Input [3]: [order count #32, total shipping cost #33, total net profit #34]
-Arguments: 100, [order count #32 ASC NULLS FIRST], [order count #32, total shipping cost #33, total net profit #34]
+Arguments: [order count #32 ASC NULLS FIRST], true, 0
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt
index 128a8179ac10b..7b3d461b9e80f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt
@@ -1,5 +1,5 @@
-TakeOrderedAndProject [order count ,total shipping cost ,total net profit ]
-  WholeStageCodegen (14)
+WholeStageCodegen (14)
+  Sort [order count ]
     HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count]
       InputAdapter
         Exchange #1
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt
index a94e74f66b201..2abbe4f9b8390 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt
@@ -1,5 +1,5 @@
 == Physical Plan ==
-TakeOrderedAndProject (41)
+* Sort (41)
 +- * HashAggregate (40)
    +- Exchange (39)
       +- * HashAggregate (38)
@@ -229,7 +229,7 @@ Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#6)), sum(UnscaledValue(ws_net
 Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#22, sum(UnscaledValue(ws_net_profit#7))#23, count(ws_order_number#5)#27]
 Results [3]: [count(ws_order_number#5)#27 AS order count #30, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#22,17,2) AS total shipping cost #31, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#23,17,2) AS total net profit #32]
 
-(41) TakeOrderedAndProject
+(41) Sort [codegen id : 8]
 Input [3]: [order count #30, total shipping cost #31, total net profit #32]
-Arguments: 100, [order count #30 ASC NULLS FIRST], [order count #30, total shipping cost #31, total net profit #32]
+Arguments: [order count #30 ASC NULLS FIRST], true, 0
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt
index 9d30b998fe174..5e7d7db5c0a9e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt
@@ -1,5 +1,5 @@
-TakeOrderedAndProject [order count ,total shipping cost ,total net profit ]
-  WholeStageCodegen (8)
+WholeStageCodegen (8)
+  Sort [order count ]
     HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count]
       InputAdapter
         Exchange #1
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt
index eae118d46245d..547792f3d7ae4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt
@@ -1,67 +1,65 @@
 == Physical Plan ==
-TakeOrderedAndProject (63)
-+- * HashAggregate (62)
-   +- Exchange (61)
-      +- * HashAggregate (60)
-         +- * HashAggregate (59)
-            +- Exchange (58)
-               +- * HashAggregate (57)
-                  +- * Project (56)
-                     +- * BroadcastHashJoin Inner BuildRight (55)
-                        :- * Project (49)
-                        :  +- * BroadcastHashJoin Inner BuildRight (48)
-                        :     :- * Project (42)
-                        :     :  +- * BroadcastHashJoin Inner BuildRight (41)
-                        :     :     :- SortMergeJoin LeftSemi (35)
-                        :     :     :  :- * Sort (19)
-                        :     :     :  :  +- Exchange (18)
-                        :     :     :  :     +- SortMergeJoin LeftSemi (17)
+* Sort (61)
++- * HashAggregate (60)
+   +- Exchange (59)
+      +- * HashAggregate (58)
+         +- * HashAggregate (57)
+            +- Exchange (56)
+               +- * HashAggregate (55)
+                  +- * Project (54)
+                     +- * BroadcastHashJoin Inner BuildRight (53)
+                        :- * Project (47)
+                        :  +- * BroadcastHashJoin Inner BuildRight (46)
+                        :     :- * Project (40)
+                        :     :  +- * BroadcastHashJoin Inner BuildRight (39)
+                        :     :     :- SortMergeJoin LeftSemi (33)
+                        :     :     :  :- * Sort (17)
+                        :     :     :  :  +- Exchange (16)
+                        :     :     :  :     +- SortMergeJoin LeftSemi (15)
                         :     :     :  :        :- * Sort (5)
                         :     :     :  :        :  +- Exchange (4)
                         :     :     :  :        :     +- * Filter (3)
                         :     :     :  :        :        +- * ColumnarToRow (2)
                         :     :     :  :        :           +- Scan parquet default.web_sales (1)
-                        :     :     :  :        +- * Sort (16)
-                        :     :     :  :           +- Exchange (15)
-                        :     :     :  :              +- * Project (14)
-                        :     :     :  :                 +- * SortMergeJoin Inner (13)
-                        :     :     :  :                    :- * Sort (10)
-                        :     :     :  :                    :  +- Exchange (9)
-                        :     :     :  :                    :     +- * Filter (8)
-                        :     :     :  :                    :        +- * ColumnarToRow (7)
-                        :     :     :  :                    :           +- Scan parquet default.web_sales (6)
-                        :     :     :  :                    +- * Sort (12)
-                        :     :     :  :                       +- ReusedExchange (11)
-                        :     :     :  +- * Project (34)
-                        :     :     :     +- * SortMergeJoin Inner (33)
-                        :     :     :        :- * Sort (27)
-                        :     :     :        :  +- Exchange (26)
-                        :     :     :        :     +- * Project (25)
-                        :     :     :        :        +- * SortMergeJoin Inner (24)
-                        :     :     :        :           :- * Sort (21)
-                        :     :     :        :           :  +- ReusedExchange (20)
-                        :     :     :        :           +- * Sort (23)
-                        :     :     :        :              +- ReusedExchange (22)
-                        :     :     :        +- * Sort (32)
-                        :     :     :           +- Exchange (31)
-                        :     :     :              +- * Filter (30)
-                        :     :     :                 +- * ColumnarToRow (29)
-                        :     :     :                    +- Scan parquet default.web_returns (28)
-                        :     :     +- BroadcastExchange (40)
-                        :     :        +- * Project (39)
-                        :     :           +- * Filter (38)
-                        :     :              +- * ColumnarToRow (37)
-                        :     :                 +- Scan parquet default.customer_address (36)
-                        :     +- BroadcastExchange (47)
-                        :        +- * Project (46)
-                        :           +- * Filter (45)
-                        :              +- * ColumnarToRow (44)
-                        :                 +- Scan parquet default.web_site (43)
-                        +- BroadcastExchange (54)
-                           +- * Project (53)
-                              +- * Filter (52)
-                                 +- * ColumnarToRow (51)
-                                    +- Scan parquet default.date_dim (50)
+                        :     :     :  :        +- * Project (14)
+                        :     :     :  :           +- * SortMergeJoin Inner (13)
+                        :     :     :  :              :- * Sort (10)
+                        :     :     :  :              :  +- Exchange (9)
+                        :     :     :  :              :     +- * Filter (8)
+                        :     :     :  :              :        +- * ColumnarToRow (7)
+                        :     :     :  :              :           +- Scan parquet default.web_sales (6)
+                        :     :     :  :              +- * Sort (12)
+                        :     :     :  :                 +- ReusedExchange (11)
+                        :     :     :  +- * Project (32)
+                        :     :     :     +- * SortMergeJoin Inner (31)
+                        :     :     :        :- * Sort (25)
+                        :     :     :        :  +- Exchange (24)
+                        :     :     :        :     +- * Project (23)
+                        :     :     :        :        +- * SortMergeJoin Inner (22)
+                        :     :     :        :           :- * Sort (19)
+                        :     :     :        :           :  +- ReusedExchange (18)
+                        :     :     :        :           +- * Sort (21)
+                        :     :     :        :              +- ReusedExchange (20)
+                        :     :     :        +- * Sort (30)
+                        :     :     :           +- Exchange (29)
+                        :     :     :              +- * Filter (28)
+                        :     :     :                 +- * ColumnarToRow (27)
+                        :     :     :                    +- Scan parquet default.web_returns (26)
+                        :     :     +- BroadcastExchange (38)
+                        :     :        +- * Project (37)
+                        :     :           +- * Filter (36)
+                        :     :              +- * ColumnarToRow (35)
+                        :     :                 +- Scan parquet default.customer_address (34)
+                        :     +- BroadcastExchange (45)
+                        :        +- * Project (44)
+                        :           +- * Filter (43)
+                        :              +- * ColumnarToRow (42)
+                        :                 +- Scan parquet default.web_site (41)
+                        +- BroadcastExchange (52)
+                           +- * Project (51)
+                              +- * Filter (50)
+                                 +- * ColumnarToRow (49)
+                                    +- Scan parquet default.date_dim (48)
 
 
 (1) Scan parquet default.web_sales
@@ -124,224 +122,216 @@ Join condition: NOT (ws_warehouse_sk#8 = ws_warehouse_sk#10)
 Output [1]: [ws_order_number#4 AS ws_order_number#4#12]
 Input [4]: [ws_warehouse_sk#8, ws_order_number#4, ws_warehouse_sk#10, ws_order_number#11]
 
-(15) Exchange
-Input [1]: [ws_order_number#4#12]
-Arguments: hashpartitioning(ws_order_number#4#12, 5), true, [id=#13]
-
-(16) Sort [codegen id : 8]
-Input [1]: [ws_order_number#4#12]
-Arguments: [ws_order_number#4#12 ASC NULLS FIRST], false, 0
-
-(17) SortMergeJoin
+(15) SortMergeJoin
 Left keys [1]: [ws_order_number#4]
 Right keys [1]: [ws_order_number#4#12]
 Join condition: None
 
-(18) Exchange
+(16) Exchange
 Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6]
-Arguments: hashpartitioning(cast(ws_order_number#4 as bigint), 5), true, [id=#14]
+Arguments: hashpartitioning(cast(ws_order_number#4 as bigint), 5), true, [id=#13]
 
-(19) Sort [codegen id : 9]
+(17) Sort [codegen id : 8]
 Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6]
 Arguments: [cast(ws_order_number#4 as bigint) ASC NULLS FIRST], false, 0
 
-(20) ReusedExchange [Reuses operator id: 9]
+(18) ReusedExchange [Reuses operator id: 9]
 Output [2]: [ws_warehouse_sk#8, ws_order_number#4]
 
-(21) Sort [codegen id : 11]
+(19) Sort [codegen id : 10]
 Input [2]: [ws_warehouse_sk#8, ws_order_number#4]
 Arguments: [ws_order_number#4 ASC NULLS FIRST], false, 0
 
-(22) ReusedExchange [Reuses operator id: 9]
-Output [2]: [ws_warehouse_sk#15, ws_order_number#16]
+(20) ReusedExchange [Reuses operator id: 9]
+Output [2]: [ws_warehouse_sk#14, ws_order_number#15]
 
-(23) Sort [codegen id : 13]
-Input [2]: [ws_warehouse_sk#15, ws_order_number#16]
-Arguments: [ws_order_number#16 ASC NULLS FIRST], false, 0
+(21) Sort [codegen id : 12]
+Input [2]: [ws_warehouse_sk#14, ws_order_number#15]
+Arguments: [ws_order_number#15 ASC NULLS FIRST], false, 0
 
-(24) SortMergeJoin [codegen id : 14]
+(22) SortMergeJoin [codegen id : 13]
 Left keys [1]: [ws_order_number#4]
-Right keys [1]: [ws_order_number#16]
-Join condition: NOT (ws_warehouse_sk#8 = ws_warehouse_sk#15)
+Right keys [1]: [ws_order_number#15]
+Join condition: NOT (ws_warehouse_sk#8 = ws_warehouse_sk#14)
 
-(25) Project [codegen id : 14]
+(23) Project [codegen id : 13]
 Output [1]: [ws_order_number#4]
-Input [4]: [ws_warehouse_sk#8, ws_order_number#4, ws_warehouse_sk#15, ws_order_number#16]
+Input [4]: [ws_warehouse_sk#8, ws_order_number#4, ws_warehouse_sk#14, ws_order_number#15]
 
-(26) Exchange
+(24) Exchange
 Input [1]: [ws_order_number#4]
-Arguments: hashpartitioning(cast(ws_order_number#4 as bigint), 5), true, [id=#17]
+Arguments: hashpartitioning(cast(ws_order_number#4 as bigint), 5), true, [id=#16]
 
-(27) Sort [codegen id : 15]
+(25) Sort [codegen id : 14]
 Input [1]: [ws_order_number#4]
 Arguments: [cast(ws_order_number#4 as bigint) ASC NULLS FIRST], false, 0
 
-(28) Scan parquet default.web_returns
-Output [1]: [wr_order_number#18]
+(26) Scan parquet default.web_returns
+Output [1]: [wr_order_number#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
 PushedFilters: [IsNotNull(wr_order_number)]
 ReadSchema: struct<wr_order_number:bigint>
 
-(29) ColumnarToRow [codegen id : 16]
-Input [1]: [wr_order_number#18]
+(27) ColumnarToRow [codegen id : 15]
+Input [1]: [wr_order_number#17]
 
-(30) Filter [codegen id : 16]
-Input [1]: [wr_order_number#18]
-Condition : isnotnull(wr_order_number#18)
+(28) Filter [codegen id : 15]
+Input [1]: [wr_order_number#17]
+Condition : isnotnull(wr_order_number#17)
 
-(31) Exchange
-Input [1]: [wr_order_number#18]
-Arguments: hashpartitioning(wr_order_number#18, 5), true, [id=#19]
+(29) Exchange
+Input [1]: [wr_order_number#17]
+Arguments: hashpartitioning(wr_order_number#17, 5), true, [id=#18]
 
-(32) Sort [codegen id : 17]
-Input [1]: [wr_order_number#18]
-Arguments: [wr_order_number#18 ASC NULLS FIRST], false, 0
+(30) Sort [codegen id : 16]
+Input [1]: [wr_order_number#17]
+Arguments: [wr_order_number#17 ASC NULLS FIRST], false, 0
 
-(33) SortMergeJoin [codegen id : 18]
+(31) SortMergeJoin [codegen id : 17]
 Left keys [1]: [cast(ws_order_number#4 as bigint)]
-Right keys [1]: [wr_order_number#18]
+Right keys [1]: [wr_order_number#17]
 Join condition: None
 
-(34) Project [codegen id : 18]
-Output [1]: [wr_order_number#18]
-Input [2]: [ws_order_number#4, wr_order_number#18]
+(32) Project [codegen id : 17]
+Output [1]: [wr_order_number#17]
+Input [2]: [ws_order_number#4, wr_order_number#17]
 
-(35) SortMergeJoin
+(33) SortMergeJoin
 Left keys [1]: [cast(ws_order_number#4 as bigint)]
-Right keys [1]: [wr_order_number#18]
+Right keys [1]: [wr_order_number#17]
 Join condition: None
 
-(36) Scan parquet default.customer_address
-Output [2]: [ca_address_sk#20, ca_state#21]
+(34) Scan parquet default.customer_address
+Output [2]: [ca_address_sk#19, ca_state#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
 PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)]
 ReadSchema: struct<ca_address_sk:int,ca_state:string>
 
-(37) ColumnarToRow [codegen id : 19]
-Input [2]: [ca_address_sk#20, ca_state#21]
+(35) ColumnarToRow [codegen id : 18]
+Input [2]: [ca_address_sk#19, ca_state#20]
 
-(38) Filter [codegen id : 19]
-Input [2]: [ca_address_sk#20, ca_state#21]
-Condition : ((isnotnull(ca_state#21) AND (ca_state#21 = IL)) AND isnotnull(ca_address_sk#20))
+(36) Filter [codegen id : 18]
+Input [2]: [ca_address_sk#19, ca_state#20]
+Condition : ((isnotnull(ca_state#20) AND (ca_state#20 = IL)) AND isnotnull(ca_address_sk#19))
 
-(39) Project [codegen id : 19]
-Output [1]: [ca_address_sk#20]
-Input [2]: [ca_address_sk#20, ca_state#21]
+(37) Project [codegen id : 18]
+Output [1]: [ca_address_sk#19]
+Input [2]: [ca_address_sk#19, ca_state#20]
 
-(40) BroadcastExchange
-Input [1]: [ca_address_sk#20]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22]
+(38) BroadcastExchange
+Input [1]: [ca_address_sk#19]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21]
 
-(41) BroadcastHashJoin [codegen id : 22]
+(39) BroadcastHashJoin [codegen id : 21]
 Left keys [1]: [ws_ship_addr_sk#2]
-Right keys [1]: [ca_address_sk#20]
+Right keys [1]: [ca_address_sk#19]
 Join condition: None
 
-(42) Project [codegen id : 22]
+(40) Project [codegen id : 21]
 Output [5]: [ws_ship_date_sk#1, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6]
-Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ca_address_sk#20]
+Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ca_address_sk#19]
 
-(43) Scan parquet default.web_site
-Output [2]: [web_site_sk#23, web_company_name#24]
+(41) Scan parquet default.web_site
+Output [2]: [web_site_sk#22, web_company_name#23]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_site]
 PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri), IsNotNull(web_site_sk)]
 ReadSchema: struct<web_site_sk:int,web_company_name:string>
 
-(44) ColumnarToRow [codegen id : 20]
-Input [2]: [web_site_sk#23, web_company_name#24]
+(42) ColumnarToRow [codegen id : 19]
+Input [2]: [web_site_sk#22, web_company_name#23]
 
-(45) Filter [codegen id : 20]
-Input [2]: [web_site_sk#23, web_company_name#24]
-Condition : ((isnotnull(web_company_name#24) AND (web_company_name#24 = pri)) AND isnotnull(web_site_sk#23))
+(43) Filter [codegen id : 19]
+Input [2]: [web_site_sk#22, web_company_name#23]
+Condition : ((isnotnull(web_company_name#23) AND (web_company_name#23 = pri)) AND isnotnull(web_site_sk#22))
 
-(46) Project [codegen id : 20]
-Output [1]: [web_site_sk#23]
-Input [2]: [web_site_sk#23, web_company_name#24]
+(44) Project [codegen id : 19]
+Output [1]: [web_site_sk#22]
+Input [2]: [web_site_sk#22, web_company_name#23]
 
-(47) BroadcastExchange
-Input [1]: [web_site_sk#23]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25]
+(45) BroadcastExchange
+Input [1]: [web_site_sk#22]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#24]
 
-(48) BroadcastHashJoin [codegen id : 22]
+(46) BroadcastHashJoin [codegen id : 21]
 Left keys [1]: [ws_web_site_sk#3]
-Right keys [1]: [web_site_sk#23]
+Right keys [1]: [web_site_sk#22]
 Join condition: None
 
-(49) Project [codegen id : 22]
+(47) Project [codegen id : 21]
 Output [4]: [ws_ship_date_sk#1, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6]
-Input [6]: [ws_ship_date_sk#1, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, web_site_sk#23]
+Input [6]: [ws_ship_date_sk#1, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, web_site_sk#22]
 
-(50) Scan parquet default.date_dim
-Output [2]: [d_date_sk#26, d_date#27]
+(48) Scan parquet default.date_dim
+Output [2]: [d_date_sk#25, d_date#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-01), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date>
 
-(51) ColumnarToRow [codegen id : 21]
-Input [2]: [d_date_sk#26, d_date#27]
+(49) ColumnarToRow [codegen id : 20]
+Input [2]: [d_date_sk#25, d_date#26]
 
-(52) Filter [codegen id : 21]
-Input [2]: [d_date_sk#26, d_date#27]
-Condition : (((isnotnull(d_date#27) AND (d_date#27 >= 10623)) AND (d_date#27 <= 10683)) AND isnotnull(d_date_sk#26))
+(50) Filter [codegen id : 20]
+Input [2]: [d_date_sk#25, d_date#26]
+Condition : (((isnotnull(d_date#26) AND (d_date#26 >= 10623)) AND (d_date#26 <= 10683)) AND isnotnull(d_date_sk#25))
 
-(53) Project [codegen id : 21]
-Output [1]: [d_date_sk#26]
-Input [2]: [d_date_sk#26, d_date#27]
+(51) Project [codegen id : 20]
+Output [1]: [d_date_sk#25]
+Input [2]: [d_date_sk#25, d_date#26]
 
-(54) BroadcastExchange
-Input [1]: [d_date_sk#26]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#28]
+(52) BroadcastExchange
+Input [1]: [d_date_sk#25]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27]
 
-(55) BroadcastHashJoin [codegen id : 22]
+(53) BroadcastHashJoin [codegen id : 21]
 Left keys [1]: [ws_ship_date_sk#1]
-Right keys [1]: [d_date_sk#26]
+Right keys [1]: [d_date_sk#25]
 Join condition: None
 
-(56) Project [codegen id : 22]
+(54) Project [codegen id : 21]
 Output [3]: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6]
-Input [5]: [ws_ship_date_sk#1, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, d_date_sk#26]
+Input [5]: [ws_ship_date_sk#1, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, d_date_sk#25]
 
-(57) HashAggregate [codegen id : 22]
+(55) HashAggregate [codegen id : 21]
 Input [3]: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6]
 Keys [1]: [ws_order_number#4]
 Functions [2]: [partial_sum(UnscaledValue(ws_ext_ship_cost#5)), partial_sum(UnscaledValue(ws_net_profit#6))]
-Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#29, sum(UnscaledValue(ws_net_profit#6))#30]
-Results [3]: [ws_order_number#4, sum#31, sum#32]
+Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#28, sum(UnscaledValue(ws_net_profit#6))#29]
+Results [3]: [ws_order_number#4, sum#30, sum#31]
 
-(58) Exchange
-Input [3]: [ws_order_number#4, sum#31, sum#32]
-Arguments: hashpartitioning(ws_order_number#4, 5), true, [id=#33]
+(56) Exchange
+Input [3]: [ws_order_number#4, sum#30, sum#31]
+Arguments: hashpartitioning(ws_order_number#4, 5), true, [id=#32]
 
-(59) HashAggregate [codegen id : 23]
-Input [3]: [ws_order_number#4, sum#31, sum#32]
+(57) HashAggregate [codegen id : 22]
+Input [3]: [ws_order_number#4, sum#30, sum#31]
 Keys [1]: [ws_order_number#4]
 Functions [2]: [merge_sum(UnscaledValue(ws_ext_ship_cost#5)), merge_sum(UnscaledValue(ws_net_profit#6))]
-Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#29, sum(UnscaledValue(ws_net_profit#6))#30]
-Results [3]: [ws_order_number#4, sum#31, sum#32]
+Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#28, sum(UnscaledValue(ws_net_profit#6))#29]
+Results [3]: [ws_order_number#4, sum#30, sum#31]
 
-(60) HashAggregate [codegen id : 23]
-Input [3]: [ws_order_number#4, sum#31, sum#32]
+(58) HashAggregate [codegen id : 22]
+Input [3]: [ws_order_number#4, sum#30, sum#31]
 Keys: []
 Functions [3]: [merge_sum(UnscaledValue(ws_ext_ship_cost#5)), merge_sum(UnscaledValue(ws_net_profit#6)), partial_count(distinct ws_order_number#4)]
-Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#29, sum(UnscaledValue(ws_net_profit#6))#30, count(ws_order_number#4)#34]
-Results [3]: [sum#31, sum#32, count#35]
+Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#28, sum(UnscaledValue(ws_net_profit#6))#29, count(ws_order_number#4)#33]
+Results [3]: [sum#30, sum#31, count#34]
 
-(61) Exchange
-Input [3]: [sum#31, sum#32, count#35]
-Arguments: SinglePartition, true, [id=#36]
+(59) Exchange
+Input [3]: [sum#30, sum#31, count#34]
+Arguments: SinglePartition, true, [id=#35]
 
-(62) HashAggregate [codegen id : 24]
-Input [3]: [sum#31, sum#32, count#35]
+(60) HashAggregate [codegen id : 23]
+Input [3]: [sum#30, sum#31, count#34]
 Keys: []
 Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net_profit#6)), count(distinct ws_order_number#4)]
-Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#29, sum(UnscaledValue(ws_net_profit#6))#30, count(ws_order_number#4)#34]
-Results [3]: [count(ws_order_number#4)#34 AS order count #37, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#29,17,2) AS total shipping cost #38, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#30,17,2) AS total net profit #39]
+Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#28, sum(UnscaledValue(ws_net_profit#6))#29, count(ws_order_number#4)#33]
+Results [3]: [count(ws_order_number#4)#33 AS order count #36, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#28,17,2) AS total shipping cost #37, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#29,17,2) AS total net profit #38]
 
-(63) TakeOrderedAndProject
-Input [3]: [order count #37, total shipping cost #38, total net profit #39]
-Arguments: 100, [order count #37 ASC NULLS FIRST], [order count #37, total shipping cost #38, total net profit #39]
+(61) Sort [codegen id : 23]
+Input [3]: [order count #36, total shipping cost #37, total net profit #38]
+Arguments: [order count #36 ASC NULLS FIRST], true, 0
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt
index bdcbb87b372dc..7213a9f58d3f8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt
@@ -1,14 +1,14 @@
-TakeOrderedAndProject [order count ,total shipping cost ,total net profit ]
-  WholeStageCodegen (24)
+WholeStageCodegen (23)
+  Sort [order count ]
     HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count]
       InputAdapter
         Exchange #1
-          WholeStageCodegen (23)
+          WholeStageCodegen (22)
             HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum,sum,count,sum,sum,count]
               HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),sum,sum,sum,sum]
                 InputAdapter
                   Exchange [ws_order_number] #2
-                    WholeStageCodegen (22)
+                    WholeStageCodegen (21)
                       HashAggregate [ws_order_number,ws_ext_ship_cost,ws_net_profit] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),sum,sum,sum,sum]
                         Project [ws_order_number,ws_ext_ship_cost,ws_net_profit]
                           BroadcastHashJoin [ws_ship_date_sk,d_date_sk]
@@ -18,7 +18,7 @@ TakeOrderedAndProject [order count ,total shipping cost ,total net profit ]
                                   BroadcastHashJoin [ws_ship_addr_sk,ca_address_sk]
                                     InputAdapter
                                       SortMergeJoin [ws_order_number,wr_order_number]
-                                        WholeStageCodegen (9)
+                                        WholeStageCodegen (8)
                                           Sort [ws_order_number]
                                             InputAdapter
                                               Exchange [ws_order_number] #3
@@ -32,78 +32,74 @@ TakeOrderedAndProject [order count ,total shipping cost ,total net profit ]
                                                               ColumnarToRow
                                                                 InputAdapter
                                                                   Scan parquet default.web_sales [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit]
-                                                  WholeStageCodegen (8)
-                                                    Sort [ws_order_number]
-                                                      InputAdapter
-                                                        Exchange [ws_order_number] #5
-                                                          WholeStageCodegen (7)
-                                                            Project [ws_order_number]
-                                                              SortMergeJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk]
-                                                                InputAdapter
-                                                                  WholeStageCodegen (4)
-                                                                    Sort [ws_order_number]
-                                                                      InputAdapter
-                                                                        Exchange [ws_order_number] #6
-                                                                          WholeStageCodegen (3)
-                                                                            Filter [ws_order_number,ws_warehouse_sk]
-                                                                              ColumnarToRow
-                                                                                InputAdapter
-                                                                                  Scan parquet default.web_sales [ws_warehouse_sk,ws_order_number]
-                                                                InputAdapter
-                                                                  WholeStageCodegen (6)
-                                                                    Sort [ws_order_number]
-                                                                      InputAdapter
-                                                                        ReusedExchange [ws_warehouse_sk,ws_order_number] #6
-                                        WholeStageCodegen (18)
+                                                  WholeStageCodegen (7)
+                                                    Project [ws_order_number]
+                                                      SortMergeJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk]
+                                                        InputAdapter
+                                                          WholeStageCodegen (4)
+                                                            Sort [ws_order_number]
+                                                              InputAdapter
+                                                                Exchange [ws_order_number] #5
+                                                                  WholeStageCodegen (3)
+                                                                    Filter [ws_order_number,ws_warehouse_sk]
+                                                                      ColumnarToRow
+                                                                        InputAdapter
+                                                                          Scan parquet default.web_sales [ws_warehouse_sk,ws_order_number]
+                                                        InputAdapter
+                                                          WholeStageCodegen (6)
+                                                            Sort [ws_order_number]
+                                                              InputAdapter
+                                                                ReusedExchange [ws_warehouse_sk,ws_order_number] #5
+                                        WholeStageCodegen (17)
                                           Project [wr_order_number]
                                             SortMergeJoin [ws_order_number,wr_order_number]
                                               InputAdapter
-                                                WholeStageCodegen (15)
+                                                WholeStageCodegen (14)
                                                   Sort [ws_order_number]
                                                     InputAdapter
-                                                      Exchange [ws_order_number] #7
-                                                        WholeStageCodegen (14)
+                                                      Exchange [ws_order_number] #6
+                                                        WholeStageCodegen (13)
                                                           Project [ws_order_number]
                                                             SortMergeJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk]
                                                               InputAdapter
-                                                                WholeStageCodegen (11)
+                                                                WholeStageCodegen (10)
                                                                   Sort [ws_order_number]
                                                                     InputAdapter
-                                                                      ReusedExchange [ws_warehouse_sk,ws_order_number] #6
+                                                                      ReusedExchange [ws_warehouse_sk,ws_order_number] #5
                                                               InputAdapter
-                                                                WholeStageCodegen (13)
+                                                                WholeStageCodegen (12)
                                                                   Sort [ws_order_number]
                                                                     InputAdapter
-                                                                      ReusedExchange [ws_warehouse_sk,ws_order_number] #6
+                                                                      ReusedExchange [ws_warehouse_sk,ws_order_number] #5
                                               InputAdapter
-                                                WholeStageCodegen (17)
+                                                WholeStageCodegen (16)
                                                   Sort [wr_order_number]
                                                     InputAdapter
-                                                      Exchange [wr_order_number] #8
-                                                        WholeStageCodegen (16)
+                                                      Exchange [wr_order_number] #7
+                                                        WholeStageCodegen (15)
                                                           Filter [wr_order_number]
                                                             ColumnarToRow
                                                               InputAdapter
                                                                 Scan parquet default.web_returns [wr_order_number]
                                     InputAdapter
-                                      BroadcastExchange #9
-                                        WholeStageCodegen (19)
+                                      BroadcastExchange #8
+                                        WholeStageCodegen (18)
                                           Project [ca_address_sk]
                                             Filter [ca_state,ca_address_sk]
                                               ColumnarToRow
                                                 InputAdapter
                                                   Scan parquet default.customer_address [ca_address_sk,ca_state]
                                 InputAdapter
-                                  BroadcastExchange #10
-                                    WholeStageCodegen (20)
+                                  BroadcastExchange #9
+                                    WholeStageCodegen (19)
                                       Project [web_site_sk]
                                         Filter [web_company_name,web_site_sk]
                                           ColumnarToRow
                                             InputAdapter
                                               Scan parquet default.web_site [web_site_sk,web_company_name]
                             InputAdapter
-                              BroadcastExchange #11
-                                WholeStageCodegen (21)
+                              BroadcastExchange #10
+                                WholeStageCodegen (20)
                                   Project [d_date_sk]
                                     Filter [d_date,d_date_sk]
                                       ColumnarToRow
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt
index 3a24e83aff256..1cc99e296383f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt
@@ -1,5 +1,5 @@
 == Physical Plan ==
-TakeOrderedAndProject (56)
+* Sort (56)
 +- * HashAggregate (55)
    +- Exchange (54)
       +- * HashAggregate (53)
@@ -312,7 +312,7 @@ Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net
 Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28, count(ws_order_number#4)#32]
 Results [3]: [count(ws_order_number#4)#32 AS order count #35, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#27,17,2) AS total shipping cost #36, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#28,17,2) AS total net profit #37]
 
-(56) TakeOrderedAndProject
+(56) Sort [codegen id : 11]
 Input [3]: [order count #35, total shipping cost #36, total net profit #37]
-Arguments: 100, [order count #35 ASC NULLS FIRST], [order count #35, total shipping cost #36, total net profit #37]
+Arguments: [order count #35 ASC NULLS FIRST], true, 0
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt
index 6d35311c810f5..191ff22c1961f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt
@@ -1,5 +1,5 @@
-TakeOrderedAndProject [order count ,total shipping cost ,total net profit ]
-  WholeStageCodegen (11)
+WholeStageCodegen (11)
+  Sort [order count ]
     HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count]
       InputAdapter
         Exchange #1
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt
index d00029f985471..5ae0e1632f15b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt
@@ -1,5 +1,5 @@
 == Physical Plan ==
-TakeOrderedAndProject (28)
+* Sort (28)
 +- * HashAggregate (27)
    +- Exchange (26)
       +- * HashAggregate (25)
@@ -154,7 +154,7 @@ Functions [1]: [count(1)]
 Aggregate Attributes [1]: [count(1)#17]
 Results [1]: [count(1)#17 AS count(1)#18]
 
-(28) TakeOrderedAndProject
+(28) Sort [codegen id : 5]
 Input [1]: [count(1)#18]
-Arguments: 100, [count(1)#18 ASC NULLS FIRST], [count(1)#18]
+Arguments: [count(1)#18 ASC NULLS FIRST], true, 0
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt
index 1355caffbbfe8..d9ee3e09481ed 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt
@@ -1,5 +1,5 @@
-TakeOrderedAndProject [count(1)]
-  WholeStageCodegen (5)
+WholeStageCodegen (5)
+  Sort [count(1)]
     HashAggregate [count] [count(1),count(1),count]
       InputAdapter
         Exchange #1
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt
index 3561eff8f57ef..6729910d9cb4a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt
@@ -1,5 +1,5 @@
 == Physical Plan ==
-TakeOrderedAndProject (28)
+* Sort (28)
 +- * HashAggregate (27)
    +- Exchange (26)
       +- * HashAggregate (25)
@@ -154,7 +154,7 @@ Functions [1]: [count(1)]
 Aggregate Attributes [1]: [count(1)#17]
 Results [1]: [count(1)#17 AS count(1)#18]
 
-(28) TakeOrderedAndProject
+(28) Sort [codegen id : 5]
 Input [1]: [count(1)#18]
-Arguments: 100, [count(1)#18 ASC NULLS FIRST], [count(1)#18]
+Arguments: [count(1)#18 ASC NULLS FIRST], true, 0
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt
index b13f28bf69cfd..45400b6c512f4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt
@@ -1,5 +1,5 @@
-TakeOrderedAndProject [count(1)]
-  WholeStageCodegen (5)
+WholeStageCodegen (5)
+  Sort [count(1)]
     HashAggregate [count] [count(1),count(1),count]
       InputAdapter
         Exchange #1
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt
index 0a2e88b5bc160..e904ad94dd8fa 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt
@@ -1,34 +1,33 @@
 == Physical Plan ==
-CollectLimit (30)
-+- * HashAggregate (29)
-   +- Exchange (28)
-      +- * HashAggregate (27)
-         +- * Project (26)
-            +- SortMergeJoin FullOuter (25)
-               :- * Sort (14)
-               :  +- * HashAggregate (13)
-               :     +- Exchange (12)
-               :        +- * HashAggregate (11)
-               :           +- * Project (10)
-               :              +- * BroadcastHashJoin Inner BuildRight (9)
-               :                 :- * Filter (3)
-               :                 :  +- * ColumnarToRow (2)
-               :                 :     +- Scan parquet default.store_sales (1)
-               :                 +- BroadcastExchange (8)
-               :                    +- * Project (7)
-               :                       +- * Filter (6)
-               :                          +- * ColumnarToRow (5)
-               :                             +- Scan parquet default.date_dim (4)
-               +- * Sort (24)
-                  +- * HashAggregate (23)
-                     +- Exchange (22)
-                        +- * HashAggregate (21)
-                           +- * Project (20)
-                              +- * BroadcastHashJoin Inner BuildRight (19)
-                                 :- * Filter (17)
-                                 :  +- * ColumnarToRow (16)
-                                 :     +- Scan parquet default.catalog_sales (15)
-                                 +- ReusedExchange (18)
+* HashAggregate (29)
++- Exchange (28)
+   +- * HashAggregate (27)
+      +- * Project (26)
+         +- SortMergeJoin FullOuter (25)
+            :- * Sort (14)
+            :  +- * HashAggregate (13)
+            :     +- Exchange (12)
+            :        +- * HashAggregate (11)
+            :           +- * Project (10)
+            :              +- * BroadcastHashJoin Inner BuildRight (9)
+            :                 :- * Filter (3)
+            :                 :  +- * ColumnarToRow (2)
+            :                 :     +- Scan parquet default.store_sales (1)
+            :                 +- BroadcastExchange (8)
+            :                    +- * Project (7)
+            :                       +- * Filter (6)
+            :                          +- * ColumnarToRow (5)
+            :                             +- Scan parquet default.date_dim (4)
+            +- * Sort (24)
+               +- * HashAggregate (23)
+                  +- Exchange (22)
+                     +- * HashAggregate (21)
+                        +- * Project (20)
+                           +- * BroadcastHashJoin Inner BuildRight (19)
+                              :- * Filter (17)
+                              :  +- * ColumnarToRow (16)
+                              :     +- Scan parquet default.catalog_sales (15)
+                              +- ReusedExchange (18)
 
 
 (1) Scan parquet default.store_sales
@@ -173,7 +172,3 @@ Functions [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer
 Aggregate Attributes [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25]
 Results [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23 AS store_only#26, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24 AS catalog_only#27, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25 AS store_and_catalog#28]
 
-(30) CollectLimit
-Input [3]: [store_only#26, catalog_only#27, store_and_catalog#28]
-Arguments: 100
-
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt
index bae48ec244faa..c5921a11cd889 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt
@@ -1,46 +1,45 @@
-CollectLimit
-  WholeStageCodegen (8)
-    HashAggregate [sum,sum,sum] [sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_only,catalog_only,store_and_catalog,sum,sum,sum]
-      InputAdapter
-        Exchange #1
-          WholeStageCodegen (7)
-            HashAggregate [customer_sk,customer_sk] [sum,sum,sum,sum,sum,sum]
-              Project [customer_sk,customer_sk]
-                InputAdapter
-                  SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk]
-                    WholeStageCodegen (3)
-                      Sort [customer_sk,item_sk]
-                        HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk]
-                          InputAdapter
-                            Exchange [ss_customer_sk,ss_item_sk] #2
-                              WholeStageCodegen (2)
-                                HashAggregate [ss_customer_sk,ss_item_sk]
-                                  Project [ss_item_sk,ss_customer_sk]
-                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                      Filter [ss_sold_date_sk]
-                                        ColumnarToRow
-                                          InputAdapter
-                                            Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk]
-                                      InputAdapter
-                                        BroadcastExchange #3
-                                          WholeStageCodegen (1)
-                                            Project [d_date_sk]
-                                              Filter [d_month_seq,d_date_sk]
-                                                ColumnarToRow
-                                                  InputAdapter
-                                                    Scan parquet default.date_dim [d_date_sk,d_month_seq]
-                    WholeStageCodegen (6)
-                      Sort [customer_sk,item_sk]
-                        HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk]
-                          InputAdapter
-                            Exchange [cs_bill_customer_sk,cs_item_sk] #4
-                              WholeStageCodegen (5)
-                                HashAggregate [cs_bill_customer_sk,cs_item_sk]
-                                  Project [cs_bill_customer_sk,cs_item_sk]
-                                    BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                      Filter [cs_sold_date_sk]
-                                        ColumnarToRow
-                                          InputAdapter
-                                            Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk]
-                                      InputAdapter
-                                        ReusedExchange [d_date_sk] #3
+WholeStageCodegen (8)
+  HashAggregate [sum,sum,sum] [sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_only,catalog_only,store_and_catalog,sum,sum,sum]
+    InputAdapter
+      Exchange #1
+        WholeStageCodegen (7)
+          HashAggregate [customer_sk,customer_sk] [sum,sum,sum,sum,sum,sum]
+            Project [customer_sk,customer_sk]
+              InputAdapter
+                SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk]
+                  WholeStageCodegen (3)
+                    Sort [customer_sk,item_sk]
+                      HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk]
+                        InputAdapter
+                          Exchange [ss_customer_sk,ss_item_sk] #2
+                            WholeStageCodegen (2)
+                              HashAggregate [ss_customer_sk,ss_item_sk]
+                                Project [ss_item_sk,ss_customer_sk]
+                                  BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                    Filter [ss_sold_date_sk]
+                                      ColumnarToRow
+                                        InputAdapter
+                                          Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk]
+                                    InputAdapter
+                                      BroadcastExchange #3
+                                        WholeStageCodegen (1)
+                                          Project [d_date_sk]
+                                            Filter [d_month_seq,d_date_sk]
+                                              ColumnarToRow
+                                                InputAdapter
+                                                  Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                  WholeStageCodegen (6)
+                    Sort [customer_sk,item_sk]
+                      HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk]
+                        InputAdapter
+                          Exchange [cs_bill_customer_sk,cs_item_sk] #4
+                            WholeStageCodegen (5)
+                              HashAggregate [cs_bill_customer_sk,cs_item_sk]
+                                Project [cs_bill_customer_sk,cs_item_sk]
+                                  BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                    Filter [cs_sold_date_sk]
+                                      ColumnarToRow
+                                        InputAdapter
+                                          Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk]
+                                    InputAdapter
+                                      ReusedExchange [d_date_sk] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt
index 0a2e88b5bc160..e904ad94dd8fa 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt
@@ -1,34 +1,33 @@
 == Physical Plan ==
-CollectLimit (30)
-+- * HashAggregate (29)
-   +- Exchange (28)
-      +- * HashAggregate (27)
-         +- * Project (26)
-            +- SortMergeJoin FullOuter (25)
-               :- * Sort (14)
-               :  +- * HashAggregate (13)
-               :     +- Exchange (12)
-               :        +- * HashAggregate (11)
-               :           +- * Project (10)
-               :              +- * BroadcastHashJoin Inner BuildRight (9)
-               :                 :- * Filter (3)
-               :                 :  +- * ColumnarToRow (2)
-               :                 :     +- Scan parquet default.store_sales (1)
-               :                 +- BroadcastExchange (8)
-               :                    +- * Project (7)
-               :                       +- * Filter (6)
-               :                          +- * ColumnarToRow (5)
-               :                             +- Scan parquet default.date_dim (4)
-               +- * Sort (24)
-                  +- * HashAggregate (23)
-                     +- Exchange (22)
-                        +- * HashAggregate (21)
-                           +- * Project (20)
-                              +- * BroadcastHashJoin Inner BuildRight (19)
-                                 :- * Filter (17)
-                                 :  +- * ColumnarToRow (16)
-                                 :     +- Scan parquet default.catalog_sales (15)
-                                 +- ReusedExchange (18)
+* HashAggregate (29)
++- Exchange (28)
+   +- * HashAggregate (27)
+      +- * Project (26)
+         +- SortMergeJoin FullOuter (25)
+            :- * Sort (14)
+            :  +- * HashAggregate (13)
+            :     +- Exchange (12)
+            :        +- * HashAggregate (11)
+            :           +- * Project (10)
+            :              +- * BroadcastHashJoin Inner BuildRight (9)
+            :                 :- * Filter (3)
+            :                 :  +- * ColumnarToRow (2)
+            :                 :     +- Scan parquet default.store_sales (1)
+            :                 +- BroadcastExchange (8)
+            :                    +- * Project (7)
+            :                       +- * Filter (6)
+            :                          +- * ColumnarToRow (5)
+            :                             +- Scan parquet default.date_dim (4)
+            +- * Sort (24)
+               +- * HashAggregate (23)
+                  +- Exchange (22)
+                     +- * HashAggregate (21)
+                        +- * Project (20)
+                           +- * BroadcastHashJoin Inner BuildRight (19)
+                              :- * Filter (17)
+                              :  +- * ColumnarToRow (16)
+                              :     +- Scan parquet default.catalog_sales (15)
+                              +- ReusedExchange (18)
 
 
 (1) Scan parquet default.store_sales
@@ -173,7 +172,3 @@ Functions [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer
 Aggregate Attributes [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25]
 Results [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23 AS store_only#26, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24 AS catalog_only#27, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25 AS store_and_catalog#28]
 
-(30) CollectLimit
-Input [3]: [store_only#26, catalog_only#27, store_and_catalog#28]
-Arguments: 100
-
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt
index bae48ec244faa..c5921a11cd889 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt
@@ -1,46 +1,45 @@
-CollectLimit
-  WholeStageCodegen (8)
-    HashAggregate [sum,sum,sum] [sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_only,catalog_only,store_and_catalog,sum,sum,sum]
-      InputAdapter
-        Exchange #1
-          WholeStageCodegen (7)
-            HashAggregate [customer_sk,customer_sk] [sum,sum,sum,sum,sum,sum]
-              Project [customer_sk,customer_sk]
-                InputAdapter
-                  SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk]
-                    WholeStageCodegen (3)
-                      Sort [customer_sk,item_sk]
-                        HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk]
-                          InputAdapter
-                            Exchange [ss_customer_sk,ss_item_sk] #2
-                              WholeStageCodegen (2)
-                                HashAggregate [ss_customer_sk,ss_item_sk]
-                                  Project [ss_item_sk,ss_customer_sk]
-                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                      Filter [ss_sold_date_sk]
-                                        ColumnarToRow
-                                          InputAdapter
-                                            Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk]
-                                      InputAdapter
-                                        BroadcastExchange #3
-                                          WholeStageCodegen (1)
-                                            Project [d_date_sk]
-                                              Filter [d_month_seq,d_date_sk]
-                                                ColumnarToRow
-                                                  InputAdapter
-                                                    Scan parquet default.date_dim [d_date_sk,d_month_seq]
-                    WholeStageCodegen (6)
-                      Sort [customer_sk,item_sk]
-                        HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk]
-                          InputAdapter
-                            Exchange [cs_bill_customer_sk,cs_item_sk] #4
-                              WholeStageCodegen (5)
-                                HashAggregate [cs_bill_customer_sk,cs_item_sk]
-                                  Project [cs_bill_customer_sk,cs_item_sk]
-                                    BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                      Filter [cs_sold_date_sk]
-                                        ColumnarToRow
-                                          InputAdapter
-                                            Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk]
-                                      InputAdapter
-                                        ReusedExchange [d_date_sk] #3
+WholeStageCodegen (8)
+  HashAggregate [sum,sum,sum] [sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_only,catalog_only,store_and_catalog,sum,sum,sum]
+    InputAdapter
+      Exchange #1
+        WholeStageCodegen (7)
+          HashAggregate [customer_sk,customer_sk] [sum,sum,sum,sum,sum,sum]
+            Project [customer_sk,customer_sk]
+              InputAdapter
+                SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk]
+                  WholeStageCodegen (3)
+                    Sort [customer_sk,item_sk]
+                      HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk]
+                        InputAdapter
+                          Exchange [ss_customer_sk,ss_item_sk] #2
+                            WholeStageCodegen (2)
+                              HashAggregate [ss_customer_sk,ss_item_sk]
+                                Project [ss_item_sk,ss_customer_sk]
+                                  BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                    Filter [ss_sold_date_sk]
+                                      ColumnarToRow
+                                        InputAdapter
+                                          Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk]
+                                    InputAdapter
+                                      BroadcastExchange #3
+                                        WholeStageCodegen (1)
+                                          Project [d_date_sk]
+                                            Filter [d_month_seq,d_date_sk]
+                                              ColumnarToRow
+                                                InputAdapter
+                                                  Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                  WholeStageCodegen (6)
+                    Sort [customer_sk,item_sk]
+                      HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk]
+                        InputAdapter
+                          Exchange [cs_bill_customer_sk,cs_item_sk] #4
+                            WholeStageCodegen (5)
+                              HashAggregate [cs_bill_customer_sk,cs_item_sk]
+                                Project [cs_bill_customer_sk,cs_item_sk]
+                                  BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                    Filter [cs_sold_date_sk]
+                                      ColumnarToRow
+                                        InputAdapter
+                                          Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk]
+                                    InputAdapter
+                                      ReusedExchange [d_date_sk] #3
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 321f4966178d7..d34dcb4fe0c01 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -26,7 +26,6 @@ import java.util.concurrent.atomic.AtomicLong
 import scala.reflect.runtime.universe.TypeTag
 import scala.util.Random
 
-import org.scalatest.matchers.must.Matchers
 import org.scalatest.matchers.should.Matchers._
 
 import org.apache.spark.SparkException
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
index 8b0f46b9d1ddb..4fdaeb57ad50e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql
 
-import org.scalatest.BeforeAndAfterEach
-
 import org.apache.spark.sql.catalyst.plans.logical.Expand
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.SharedSparkSession
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala
index 8c998290b5044..fd408c37ef6cd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql
 
-import java.sql.Date
-
 import org.apache.spark.sql.expressions.Window
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.SharedSparkSession
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
index de791383326f1..35e732e0840e4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
@@ -23,7 +23,7 @@ import scala.collection.JavaConverters._
 
 import org.scalatest.BeforeAndAfter
 
-import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException}
+import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic}
 import org.apache.spark.sql.connector.{InMemoryTable, InMemoryTableCatalog}
 import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
index 2be86b9ad6208..ac51634febc99 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql
 import scala.collection.immutable.{HashSet => HSet}
 import scala.collection.immutable.Queue
 import scala.collection.mutable.{LinkedHashMap => LHMap}
-import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.test.SharedSparkSession
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
index 80346b350c142..861a001b190aa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
@@ -27,7 +27,6 @@ import org.scalatest.Assertions._
 import org.apache.spark.TestUtils
 import org.apache.spark.api.python.{PythonBroadcast, PythonEvalType, PythonFunction, PythonUtils}
 import org.apache.spark.broadcast.Broadcast
-import org.apache.spark.internal.config.Tests
 import org.apache.spark.sql.catalyst.expressions.{Cast, Expression}
 import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.execution.python.UserDefinedPythonFunction
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala
index c2aee0ad4c9a1..76204c504c0ed 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala
@@ -30,7 +30,6 @@ import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.adaptive.DisableAdaptiveExecutionSuite
 import org.apache.spark.sql.execution.exchange.{Exchange, ReusedExchangeExec}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.tags.ExtendedSQLTest
 
 // scalastyle:off line.size.limit
 /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index b016cc3f57e0d..65377594f083c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -25,7 +25,6 @@ import java.util.concurrent.TimeUnit
 import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.catalyst.catalog.CatalogColumnStat
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
index 6b25d7c61663c..46112d40f08ba 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
@@ -30,7 +30,6 @@ import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.internal.SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.StructType
-import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class DataSourceV2DataFrameSessionCatalogSuite
   extends InsertIntoTests(supportsDynamicOverwrite = true, includeSQLOnlyTests = false)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index db3f11dbda51a..ddafa1bb5070a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -25,7 +25,7 @@ import scala.collection.JavaConverters._
 import org.apache.spark.SparkException
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NamespaceAlreadyExistsException, NoSuchDatabaseException, NoSuchNamespaceException, NoSuchPartitionException, NoSuchPartitionsException, NoSuchTableException, PartitionsAlreadyExistException, TableAlreadyExistsException}
+import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NamespaceAlreadyExistsException, NoSuchDatabaseException, NoSuchNamespaceException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.connector.catalog._
 import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
@@ -139,6 +139,10 @@ class DataSourceV2SQLSuite
       Array("# Partitioning", "", ""),
       Array("Part 0", "id", ""),
       Array("", "", ""),
+      Array("# Metadata Columns", "", ""),
+      Array("index", "string", "Metadata column used to conflict with a data column"),
+      Array("_partition", "string", "Partition key used to store the row"),
+      Array("", "", ""),
       Array("# Detailed Table Information", "", ""),
       Array("Name", "testcat.table_name", ""),
       Array("Comment", "this is a test table", ""),
@@ -1909,21 +1913,6 @@ class DataSourceV2SQLSuite
     }
   }
 
-  test("SHOW PARTITIONS") {
-    val t = "testcat.ns1.ns2.tbl"
-    withTable(t) {
-      sql(
-        s"""
-           |CREATE TABLE $t (id bigint, data string)
-           |USING foo
-           |PARTITIONED BY (id)
-         """.stripMargin)
-
-      testV1Command("SHOW PARTITIONS", t)
-      testV1Command("SHOW PARTITIONS", s"$t PARTITION(id='1')")
-    }
-  }
-
   test("LOAD DATA INTO TABLE") {
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {
@@ -2485,6 +2474,45 @@ class DataSourceV2SQLSuite
     }
   }
 
+  test("SPARK-31255: Project a metadata column") {
+    val t1 = s"${catalogAndNamespace}table"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format " +
+          "PARTITIONED BY (bucket(4, id), id)")
+      sql(s"INSERT INTO $t1 VALUES (1, 'a'), (2, 'b'), (3, 'c')")
+
+      checkAnswer(
+        spark.sql(s"SELECT id, data, _partition FROM $t1"),
+        Seq(Row(1, "a", "3/1"), Row(2, "b", "2/2"), Row(3, "c", "2/3")))
+    }
+  }
+
+  test("SPARK-31255: Projects data column when metadata column has the same name") {
+    val t1 = s"${catalogAndNamespace}table"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (index bigint, data string) USING $v2Format " +
+          "PARTITIONED BY (bucket(4, index), index)")
+      sql(s"INSERT INTO $t1 VALUES (3, 'c'), (2, 'b'), (1, 'a')")
+
+      checkAnswer(
+        spark.sql(s"SELECT index, data, _partition FROM $t1"),
+        Seq(Row(3, "c", "2/3"), Row(2, "b", "2/2"), Row(1, "a", "3/1")))
+    }
+  }
+
+  test("SPARK-31255: * expansion does not include metadata columns") {
+    val t1 = s"${catalogAndNamespace}table"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format " +
+          "PARTITIONED BY (bucket(4, id), id)")
+      sql(s"INSERT INTO $t1 VALUES (3, 'c'), (2, 'b'), (1, 'a')")
+
+      checkAnswer(
+        spark.sql(s"SELECT * FROM $t1"),
+        Seq(Row(3, "c"), Row(2, "b"), Row(1, "a")))
+    }
+  }
+
   private def testNotSupportedV2Command(sqlCommand: String, sqlParams: String): Unit = {
     val e = intercept[AnalysisException] {
       sql(s"$sqlCommand $sqlParams")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala
index eacdb9e2fcd7b..3aad644655aa6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression}
 import org.apache.spark.sql.connector.catalog.{Identifier, SupportsCatalogOptions, TableCatalog}
 import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
-import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform, Transform}
+import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform}
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.internal.SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala
index 2d75a35215866..bad21aac41712 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.analysis.{AnalysisSuite, NamedRelation}
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Literal}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
-import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, Table, TableCapability, TableProvider}
+import org.apache.spark.sql.connector.catalog.{Table, TableCapability}
 import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, TableCapabilityCheck}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1ReadFallbackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1ReadFallbackSuite.scala
index 74f2ca14234d2..9beef690cba32 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1ReadFallbackSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1ReadFallbackSuite.scala
@@ -23,7 +23,7 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, QueryTest, Row, SparkSession, SQLContext}
-import org.apache.spark.sql.connector.catalog.{Identifier, SupportsRead, Table, TableCapability, TableProvider}
+import org.apache.spark.sql.connector.catalog.{Identifier, SupportsRead, Table, TableCapability}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownFilters, SupportsPushDownRequiredColumns, V1Scan}
 import org.apache.spark.sql.execution.RowDataSourceScanExec
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
index e6029400997a2..81f292809df4a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
@@ -28,7 +28,6 @@ import org.scalatest.exceptions.TestFailedException
 
 import org.apache.spark.{SparkException, TaskContext, TestUtils}
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.Column
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, GenericInternalRow}
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index be29acb6d3a7c..6de81cc414d7d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.{execution, DataFrame, Row}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Range, Repartition, Sort, Union}
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Range, Repartition, Union}
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, DisableAdaptiveExecution}
 import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec}
@@ -895,6 +895,201 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
     }
   }
 
+  test("SPARK-33399: aliases should be handled properly in PartitioningCollection output" +
+    " partitioning") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      withTempView("t1", "t2", "t3") {
+        spark.range(10).repartition($"id").createTempView("t1")
+        spark.range(20).repartition($"id").createTempView("t2")
+        spark.range(30).repartition($"id").createTempView("t3")
+        val planned = sql(
+          """
+            |SELECT t3.id as t3id
+            |FROM (
+            |    SELECT t1.id as t1id, t2.id as t2id
+            |    FROM t1, t2
+            |    WHERE t1.id = t2.id
+            |) t12, t3
+            |WHERE t1id = t3.id
+          """.stripMargin).queryExecution.executedPlan
+        val exchanges = planned.collect { case s: ShuffleExchangeExec => s }
+        assert(exchanges.size == 3)
+
+        val projects = planned.collect { case p: ProjectExec => p }
+        assert(projects.exists(_.outputPartitioning match {
+          case PartitioningCollection(Seq(HashPartitioning(Seq(k1: AttributeReference), _),
+            HashPartitioning(Seq(k2: AttributeReference), _))) if k1.name == "t1id" =>
+            true
+          case _ => false
+        }))
+      }
+    }
+  }
+
+  test("SPARK-33399: aliases should be handled properly in HashPartitioning") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      withTempView("t1", "t2", "t3") {
+        spark.range(10).repartition($"id").createTempView("t1")
+        spark.range(20).repartition($"id").createTempView("t2")
+        spark.range(30).repartition($"id").createTempView("t3")
+        val planned = sql(
+          """
+            |SELECT t1id, t3.id as t3id
+            |FROM (
+            |    SELECT t1.id as t1id
+            |    FROM t1 LEFT SEMI JOIN t2
+            |    ON t1.id = t2.id
+            |) t12 INNER JOIN t3
+            |WHERE t1id = t3.id
+          """.stripMargin).queryExecution.executedPlan
+        val exchanges = planned.collect { case s: ShuffleExchangeExec => s }
+        assert(exchanges.size == 3)
+
+        val projects = planned.collect { case p: ProjectExec => p }
+        assert(projects.exists(_.outputPartitioning match {
+          case HashPartitioning(Seq(a: AttributeReference), _) => a.name == "t1id"
+          case _ => false
+        }))
+      }
+    }
+  }
+
+  test("SPARK-33399: alias handling should happen properly for RangePartitioning") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      val df = spark.range(1, 100)
+        .select(col("id").as("id1")).groupBy("id1").count()
+      // Plan for this will be Range -> ProjectWithAlias -> HashAggregate -> HashAggregate
+      // if Project normalizes alias in its Range outputPartitioning, then no Exchange should come
+      // in between HashAggregates
+      val planned = df.queryExecution.executedPlan
+      val exchanges = planned.collect { case s: ShuffleExchangeExec => s }
+      assert(exchanges.isEmpty)
+
+      val projects = planned.collect { case p: ProjectExec => p }
+      assert(projects.exists(_.outputPartitioning match {
+        case RangePartitioning(Seq(SortOrder(ar: AttributeReference, _, _, _)), _) =>
+          ar.name == "id1"
+        case _ => false
+      }))
+    }
+  }
+
+  test("SPARK-33399: aliased should be handled properly " +
+    "for partitioning and sortorder involving complex expressions") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      withTempView("t1", "t2", "t3") {
+        spark.range(10).select(col("id").as("id1")).createTempView("t1")
+        spark.range(20).select(col("id").as("id2")).createTempView("t2")
+        spark.range(30).select(col("id").as("id3")).createTempView("t3")
+        val planned = sql(
+          """
+            |SELECT t3.id3 as t3id
+            |FROM (
+            |    SELECT t1.id1 as t1id, t2.id2 as t2id
+            |    FROM t1, t2
+            |    WHERE t1.id1 * 10 = t2.id2 * 10
+            |) t12, t3
+            |WHERE t1id * 10 = t3.id3 * 10
+          """.stripMargin).queryExecution.executedPlan
+        val sortNodes = planned.collect { case s: SortExec => s }
+        assert(sortNodes.size == 3)
+        val exchangeNodes = planned.collect { case e: ShuffleExchangeExec => e }
+        assert(exchangeNodes.size == 3)
+
+        val projects = planned.collect { case p: ProjectExec => p }
+        assert(projects.exists(_.outputPartitioning match {
+          case PartitioningCollection(Seq(HashPartitioning(Seq(Multiply(ar1, _, _)), _),
+            HashPartitioning(Seq(Multiply(ar2, _, _)), _))) =>
+            Seq(ar1, ar2) match {
+              case Seq(ar1: AttributeReference, ar2: AttributeReference) =>
+                ar1.name == "t1id" && ar2.name == "id2"
+              case _ =>
+                false
+            }
+          case _ => false
+        }))
+
+      }
+    }
+  }
+
+  test("SPARK-33399: alias handling should happen properly for SinglePartition") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      val df = spark.range(1, 100, 1, 1)
+        .select(col("id").as("id1")).groupBy("id1").count()
+      val planned = df.queryExecution.executedPlan
+      val exchanges = planned.collect { case s: ShuffleExchangeExec => s }
+      assert(exchanges.isEmpty)
+
+      val projects = planned.collect { case p: ProjectExec => p }
+      assert(projects.exists(_.outputPartitioning match {
+        case SinglePartition => true
+        case _ => false
+      }))
+    }
+  }
+
+  test("SPARK-33399: No extra exchanges in case of" +
+    " [Inner Join -> Project with aliases -> HashAggregate]") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      withTempView("t1", "t2") {
+        spark.range(10).repartition($"id").createTempView("t1")
+        spark.range(20).repartition($"id").createTempView("t2")
+        val planned = sql(
+          """
+            |SELECT t1id, t2id
+            |FROM (
+            |  SELECT t1.id as t1id, t2.id as t2id
+            |  FROM t1 INNER JOIN t2
+            |  WHERE t1.id = t2.id
+            |) t12
+            |GROUP BY t1id, t2id
+          """.stripMargin).queryExecution.executedPlan
+        val exchanges = planned.collect { case s: ShuffleExchangeExec => s }
+        assert(exchanges.size == 2)
+
+        val projects = planned.collect { case p: ProjectExec => p }
+        assert(projects.exists(_.outputPartitioning match {
+          case PartitioningCollection(Seq(HashPartitioning(Seq(k1: AttributeReference), _),
+          HashPartitioning(Seq(k2: AttributeReference), _))) =>
+            k1.name == "t1id" && k2.name == "t2id"
+          case _ => false
+        }))
+      }
+    }
+  }
+
+  test("SPARK-33400: Normalization of sortOrder should take care of sameOrderExprs") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      withTempView("t1", "t2", "t3") {
+        spark.range(10).repartition($"id").createTempView("t1")
+        spark.range(20).repartition($"id").createTempView("t2")
+        spark.range(30).repartition($"id").createTempView("t3")
+        val planned = sql(
+          """
+            |SELECT t2id, t3.id as t3id
+            |FROM (
+            |    SELECT t1.id as t1id, t2.id as t2id
+            |    FROM t1, t2
+            |    WHERE t1.id = t2.id
+            |) t12, t3
+            |WHERE t2id = t3.id
+          """.stripMargin).queryExecution.executedPlan
+
+        val sortNodes = planned.collect { case s: SortExec => s }
+        assert(sortNodes.size == 3)
+
+        val projects = planned.collect { case p: ProjectExec => p }
+        assert(projects.exists(_.outputOrdering match {
+          case Seq(SortOrder(_, Ascending, NullsFirst, sameOrderExprs)) =>
+            sameOrderExprs.size == 1 && sameOrderExprs.head.isInstanceOf[AttributeReference] &&
+              sameOrderExprs.head.asInstanceOf[AttributeReference].name == "t2id"
+          case _ => false
+        }))
+      }
+    }
+  }
+
   test("aliases to expressions should not be replaced") {
     withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
       withTempView("df1", "df2") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala
index 54c5a33441900..751078d08fda9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala
@@ -18,7 +18,9 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.sql.{DataFrame, QueryTest}
+import org.apache.spark.sql.catalyst.plans.physical.{RangePartitioning, UnknownPartitioning}
 import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, DisableAdaptiveExecutionSuite, EnableAdaptiveExecutionSuite}
+import org.apache.spark.sql.execution.joins.ShuffledJoin
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 
@@ -135,6 +137,32 @@ abstract class RemoveRedundantSortsSuiteBase
       }
     }
   }
+
+  test("SPARK-33472: shuffled join with different left and right side partition numbers") {
+    withTempView("t1", "t2") {
+      spark.range(0, 100, 1, 2).select('id as "key").createOrReplaceTempView("t1")
+      (0 to 100).toDF("key").createOrReplaceTempView("t2")
+
+      val queryTemplate = """
+        |SELECT /*+ %s(t1) */ t1.key
+        |FROM t1 JOIN t2 ON t1.key = t2.key
+        |WHERE t1.key > 10 AND t2.key < 50
+        |ORDER BY t1.key ASC
+      """.stripMargin
+
+      Seq(("MERGE", 3), ("SHUFFLE_HASH", 1)).foreach { case (hint, count) =>
+        val query = queryTemplate.format(hint)
+        val df = sql(query)
+        val sparkPlan = df.queryExecution.sparkPlan
+        val join = sparkPlan.collect { case j: ShuffledJoin => j }.head
+        val leftPartitioning = join.left.outputPartitioning
+        assert(leftPartitioning.isInstanceOf[RangePartitioning])
+        assert(leftPartitioning.numPartitions == 2)
+        assert(join.right.outputPartitioning == UnknownPartitioning(0))
+        checkSorts(query, count, count)
+      }
+    }
+  }
 }
 
 class RemoveRedundantSortsSuite extends RemoveRedundantSortsSuiteBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index 87a5cb9f73355..792f920ee0217 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -181,7 +181,6 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         sql(s"SHOW CREATE TABLE $viewName")
       }.getMessage
       assert(e3.contains(s"$viewName is a temp view not table or permanent view"))
-      assertNoSuchTable(s"SHOW PARTITIONS $viewName")
       val e4 = intercept[AnalysisException] {
         sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS")
       }.getMessage
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala
index ddaa2687eaf1a..18d36670306b8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution
 import org.apache.spark.sql.{DataFrame, QueryTest}
 import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project}
-import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, FileScan}
+import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanTest.scala
index 7ddf9d87a6aca..f1fcf3bc5125e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanTest.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.execution
 
-import scala.language.implicitConversions
 import scala.util.control.NonFatal
 
 import org.apache.spark.SparkFunSuite
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveTestUtils.scala
index 48f85ae76cd8c..ad3ec85e984c8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveTestUtils.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.execution.adaptive
 
-import java.io.{PrintWriter, StringWriter}
-
 import org.scalactic.source.Position
 import org.scalatest.Tag
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala
index 9ade8b14f59b0..a98ca7f5d8f88 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.{DataFrame, SparkSession}
 import org.apache.spark.sql.functions.{monotonically_increasing_id, timestamp_seconds}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.ParquetOutputTimestampType
-import org.apache.spark.sql.types.{ByteType, Decimal, DecimalType, TimestampType}
+import org.apache.spark.sql.types.{ByteType, Decimal, DecimalType}
 
 /**
  * Benchmark to measure read performance with Filter pushdown.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ParquetNestedPredicatePushDownBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ParquetNestedPredicatePushDownBenchmark.scala
index d2bd962b50654..f89fe2e64c778 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ParquetNestedPredicatePushDownBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ParquetNestedPredicatePushDownBenchmark.scala
@@ -17,9 +17,8 @@
 
 package org.apache.spark.sql.execution.benchmark
 
-import org.apache.spark.SparkConf
 import org.apache.spark.benchmark.Benchmark
-import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
+import org.apache.spark.sql.{DataFrame, SaveMode}
 import org.apache.spark.sql.internal.SQLConf
 
 /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
index 43bc7c12937ec..f931914b19c6c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.benchmark
 
 import org.apache.spark.SparkConf
 import org.apache.spark.benchmark.Benchmark
-import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
 import org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnStatsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnStatsSuite.scala
index 847e0ec4f3195..0abb3cb6a2ed0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnStatsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnStatsSuite.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.columnar
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.CalendarInterval
 
 class ColumnStatsSuite extends SparkFunSuite {
   testColumnStats(classOf[BooleanColumnStats], BOOLEAN, Array(true, false, 0))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
index 7fd7040f0f51d..8ce4bcbadc223 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
@@ -359,14 +359,6 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     assert(e.contains("Found duplicate keys 'a'"))
   }
 
-  test("empty values in non-optional partition specs") {
-    val e = intercept[ParseException] {
-      parser.parsePlan(
-        "SHOW PARTITIONS dbx.tab1 PARTITION (a='1', b)")
-    }.getMessage
-    assert(e.contains("Found an empty partition key 'b'"))
-  }
-
   test("Test CTAS #1") {
     val s1 =
       """
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 348cf94dfc629..9d0147048dbb8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -2026,7 +2026,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
   }
 
   test("SPARK-30312: truncate table - keep acl/permission") {
-    import testImplicits._
     val ignorePermissionAcl = Seq(true, false)
 
     ignorePermissionAcl.foreach { ignore =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index d5820b016736a..fd1978c5137a5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -26,7 +26,7 @@ import org.mockito.invocation.InvocationOnMock
 
 import org.apache.spark.sql.{AnalysisException, SaveMode}
 import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, Analyzer, CTESubstitution, EmptyFunctionRegistry, NoSuchTableException, ResolveCatalogs, ResolvedTable, ResolveInlineTables, ResolveSessionCatalog, UnresolvedAttribute, UnresolvedRelation, UnresolvedStar, UnresolvedSubqueryColumnAliases, UnresolvedV2Relation}
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, Analyzer, CTESubstitution, EmptyFunctionRegistry, NoSuchTableException, ResolveCatalogs, ResolvedTable, ResolveInlineTables, ResolveSessionCatalog, UnresolvedAttribute, UnresolvedRelation, UnresolvedSubqueryColumnAliases, UnresolvedV2Relation}
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Expression, InSubquery, IntegerLiteral, ListQuery, StringLiteral}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
@@ -151,7 +151,7 @@ class PlanResolutionSuite extends AnalysisTest {
     } else {
       catalogManagerWithoutDefault
     }
-    val analyzer = new Analyzer(catalogManager, conf)
+    val analyzer = new Analyzer(catalogManager)
     // TODO: run the analyzer directly.
     val rules = Seq(
       CTESubstitution,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsParserSuite.scala
new file mode 100644
index 0000000000000..bc75528b9644c
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsParserSuite.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.catalyst.analysis.AnalysisTest
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
+import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.catalyst.plans.logical.ShowPartitionsStatement
+import org.apache.spark.sql.execution.SparkSqlParser
+import org.apache.spark.sql.test.SharedSparkSession
+
+class ShowPartitionsParserSuite extends AnalysisTest with SharedSparkSession {
+  test("SHOW PARTITIONS") {
+    Seq(
+      "SHOW PARTITIONS t1" -> ShowPartitionsStatement(Seq("t1"), None),
+      "SHOW PARTITIONS db1.t1" -> ShowPartitionsStatement(Seq("db1", "t1"), None),
+      "SHOW PARTITIONS t1 PARTITION(partcol1='partvalue', partcol2='partvalue')" ->
+        ShowPartitionsStatement(
+          Seq("t1"),
+          Some(Map("partcol1" -> "partvalue", "partcol2" -> "partvalue"))),
+      "SHOW PARTITIONS a.b.c" -> ShowPartitionsStatement(Seq("a", "b", "c"), None),
+      "SHOW PARTITIONS a.b.c PARTITION(ds='2017-06-10')" ->
+        ShowPartitionsStatement(Seq("a", "b", "c"), Some(Map("ds" -> "2017-06-10")))
+    ).foreach { case (sql, expected) =>
+      val parsed = parsePlan(sql)
+      comparePlans(parsed, expected)
+    }
+  }
+
+  test("empty values in non-optional partition specs") {
+    val e = intercept[ParseException] {
+      new SparkSqlParser().parsePlan(
+        "SHOW PARTITIONS dbx.tab1 PARTITION (a='1', b)")
+    }.getMessage
+    assert(e.contains("Found an empty partition key 'b'"))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
new file mode 100644
index 0000000000000..413e170326eea
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.scalactic.source.Position
+import org.scalatest.Tag
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.test.SQLTestUtils
+
+trait ShowPartitionsSuiteBase extends QueryTest with SQLTestUtils {
+  protected def version: String
+  protected def catalog: String
+  protected def defaultNamespace: Seq[String]
+  protected def defaultUsing: String
+
+  override def test(testName: String, testTags: Tag*)(testFun: => Any)
+      (implicit pos: Position): Unit = {
+    super.test(s"SHOW PARTITIONS $version: " + testName, testTags: _*)(testFun)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
new file mode 100644
index 0000000000000..bcc71e9b7241c
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import org.apache.spark.sql.{AnalysisException, Row, SaveMode}
+import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
+import org.apache.spark.sql.connector.catalog.CatalogManager
+import org.apache.spark.sql.execution.command
+import org.apache.spark.sql.test.SharedSparkSession
+
+trait ShowPartitionsSuiteBase extends command.ShowPartitionsSuiteBase {
+  override def version: String = "V1"
+  override def catalog: String = CatalogManager.SESSION_CATALOG_NAME
+  override def defaultNamespace: Seq[String] = Seq("default")
+  override def defaultUsing: String = "USING parquet"
+
+  private def createDateTable(table: String): Unit = {
+    sql(s"""
+      |CREATE TABLE $table (price int, qty int, year int, month int)
+      |$defaultUsing
+      |partitioned by (year, month)""".stripMargin)
+    sql(s"INSERT INTO $table PARTITION(year = 2015, month = 1) SELECT 1, 1")
+    sql(s"INSERT INTO $table PARTITION(year = 2015, month = 2) SELECT 2, 2")
+    sql(s"INSERT INTO $table PARTITION(year = 2016, month = 2) SELECT 3, 3")
+    sql(s"INSERT INTO $table PARTITION(year = 2016, month = 3) SELECT 3, 3")
+  }
+
+  test("show everything") {
+    val table = "dateTable"
+    withTable(table) {
+      createDateTable(table)
+      checkAnswer(
+        sql(s"show partitions $table"),
+        Row("year=2015/month=1") ::
+          Row("year=2015/month=2") ::
+          Row("year=2016/month=2") ::
+          Row("year=2016/month=3") :: Nil)
+
+      checkAnswer(
+        sql(s"show partitions default.$table"),
+        Row("year=2015/month=1") ::
+          Row("year=2015/month=2") ::
+          Row("year=2016/month=2") ::
+          Row("year=2016/month=3") :: Nil)
+    }
+  }
+
+  test("filter by partitions") {
+    val table = "dateTable"
+    withTable(table) {
+      createDateTable(table)
+      checkAnswer(
+        sql(s"show partitions default.$table PARTITION(year=2015)"),
+        Row("year=2015/month=1") ::
+          Row("year=2015/month=2") :: Nil)
+      checkAnswer(
+        sql(s"show partitions default.$table PARTITION(year=2015, month=1)"),
+        Row("year=2015/month=1") :: Nil)
+      checkAnswer(
+        sql(s"show partitions default.$table PARTITION(month=2)"),
+        Row("year=2015/month=2") ::
+          Row("year=2016/month=2") :: Nil)
+    }
+  }
+
+  test("show everything more than 5 part keys") {
+    val table = "wideTable"
+    withTable(table) {
+      sql(s"""
+        |CREATE TABLE $table (
+        |  price int, qty int,
+        |  year int, month int, hour int, minute int, sec int, extra int)
+        |$defaultUsing
+        |PARTITIONED BY (year, month, hour, minute, sec, extra)""".stripMargin)
+      sql(s"""
+        |INSERT INTO $table
+        |PARTITION(year = 2016, month = 3, hour = 10, minute = 10, sec = 10, extra = 1) SELECT 3, 3
+      """.stripMargin)
+      sql(s"""
+        |INSERT INTO $table
+        |PARTITION(year = 2016, month = 4, hour = 10, minute = 10, sec = 10, extra = 1) SELECT 3, 3
+      """.stripMargin)
+      checkAnswer(
+        sql(s"show partitions $table"),
+        Row("year=2016/month=3/hour=10/minute=10/sec=10/extra=1") ::
+          Row("year=2016/month=4/hour=10/minute=10/sec=10/extra=1") :: Nil)
+    }
+  }
+
+  test("non-partitioning columns") {
+    val table = "dateTable"
+    withTable(table) {
+      createDateTable(table)
+      val errMsg = intercept[AnalysisException] {
+        sql(s"SHOW PARTITIONS $table PARTITION(abcd=2015, xyz=1)")
+      }.getMessage
+      assert(errMsg.contains("Non-partitioning column(s) [abcd, xyz] are specified"))
+    }
+  }
+
+  test("show partitions of non-partitioned table") {
+    val table = "not_partitioned_table"
+    withTable(table) {
+      sql(s"CREATE TABLE $table (col1 int) $defaultUsing")
+      val errMsg = intercept[AnalysisException] {
+        sql(s"SHOW PARTITIONS $table")
+      }.getMessage
+      assert(errMsg.contains("not allowed on a table that is not partitioned"))
+    }
+  }
+
+  test("show partitions of a view") {
+    val table = "dateTable"
+    withTable(table) {
+      createDateTable(table)
+      val view = "view1"
+      withView(view) {
+        sql(s"CREATE VIEW $view as select * from $table")
+        val errMsg = intercept[AnalysisException] {
+          sql(s"SHOW PARTITIONS $view")
+        }.getMessage
+        assert(errMsg.contains("is not allowed on a view"))
+      }
+    }
+  }
+
+  test("show partitions of a temporary view") {
+    val viewName = "test_view"
+    withTempView(viewName) {
+      spark.range(10).createTempView(viewName)
+      val errMsg = intercept[NoSuchTableException] {
+        sql(s"SHOW PARTITIONS $viewName")
+      }.getMessage
+      assert(errMsg.contains(s"Table or view '$viewName' not found"))
+    }
+  }
+}
+
+class ShowPartitionsSuite extends ShowPartitionsSuiteBase with SharedSparkSession {
+  // The test is placed here because it fails with `USING HIVE`:
+  // org.apache.spark.sql.AnalysisException:
+  //   Hive data source can only be used with tables, you can't use it with CREATE TEMP VIEW USING
+  test("issue exceptions on the temporary view") {
+    val viewName = "test_view"
+    withTempView(viewName) {
+      sql(s"""
+             |CREATE TEMPORARY VIEW $viewName (c1 INT, c2 STRING)
+             |$defaultUsing""".stripMargin)
+      val errMsg = intercept[NoSuchTableException] {
+        sql(s"SHOW PARTITIONS $viewName")
+      }.getMessage
+      assert(errMsg.contains(s"Table or view '$viewName' not found"))
+    }
+  }
+
+  test("show partitions from a datasource") {
+    import testImplicits._
+    withTable("part_datasrc") {
+      val df = (1 to 3).map(i => (i, s"val_$i", i * 2)).toDF("a", "b", "c")
+      df.write
+        .partitionBy("a")
+        .format("parquet")
+        .mode(SaveMode.Overwrite)
+        .saveAsTable("part_datasrc")
+
+      assert(sql("SHOW PARTITIONS part_datasrc").count() == 3)
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
new file mode 100644
index 0000000000000..8a63cd49e89e9
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.connector.InMemoryTableCatalog
+import org.apache.spark.sql.execution.command
+import org.apache.spark.sql.test.SharedSparkSession
+
+class ShowPartitionsSuite extends command.ShowPartitionsSuiteBase with SharedSparkSession {
+  override def version: String = "V2"
+  override def catalog: String = "test_catalog"
+  override def defaultNamespace: Seq[String] = Nil
+  override def defaultUsing: String = "USING _"
+
+  override def sparkConf: SparkConf = super.sparkConf
+    .set(s"spark.sql.catalog.$catalog", classOf[InMemoryTableCatalog].getName)
+
+  // TODO(SPARK-33452): Create a V2 SHOW PARTITIONS execution node
+  test("not supported SHOW PARTITIONS") {
+    def testV1Command(sqlCommand: String, sqlParams: String): Unit = {
+      val e = intercept[AnalysisException] {
+        sql(s"$sqlCommand $sqlParams")
+      }
+      assert(e.message.contains(s"$sqlCommand is only supported with v1 tables"))
+    }
+    val t = s"$catalog.ns1.ns2.tbl"
+    withTable(t) {
+      sql(
+        s"""
+           |CREATE TABLE $t (id bigint, data string)
+           |$defaultUsing
+           |PARTITIONED BY (id)
+         """.stripMargin)
+
+      testV1Command("SHOW PARTITIONS", t)
+      testV1Command("SHOW PARTITIONS", s"$t PARTITION(id='1')")
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaTest.scala
index fd70b6529ff51..22db55afc27c9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaTest.scala
@@ -21,7 +21,7 @@ import java.io.File
 
 import org.apache.spark.sql.{QueryTest, Row}
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
+import org.apache.spark.sql.test.SharedSparkSession
 
 /**
  * The reader schema is said to be evolved (or projected) when it changed after the data is
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/RowDataSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/RowDataSourceStrategySuite.scala
index 6420081a9757b..3e8a4fe290502 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/RowDataSourceStrategySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/RowDataSourceStrategySuite.scala
@@ -22,15 +22,10 @@ import java.util.Properties
 
 import org.scalatest.BeforeAndAfter
 
-import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.{DataFrame, Row}
-import org.apache.spark.sql.sources._
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
 class RowDataSourceStrategySuite extends SharedSparkSession with BeforeAndAfter {
-  import testImplicits._
 
   val url = "jdbc:h2:mem:testdb0"
   val urlWithUserAndPass = "jdbc:h2:mem:testdb0;user=testUser;password=testPass"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala
index 233978289f068..e843d1d328425 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.execution.datasources
 
-import org.apache.spark.SparkConf
 import org.apache.spark.sql.SaveMode
 import org.apache.spark.sql.test.SharedSparkSession
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
index 8462916daaab8..86ff026d7b1e9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.execution.datasources.PartitionedFile
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.internal.SQLConf.SOURCES_BINARY_FILE_MAX_LENGTH
 import org.apache.spark.sql.sources._
-import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
+import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
index d27b5c4737a11..7cc3a1cf9f3b8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
@@ -18,12 +18,11 @@
 package org.apache.spark.sql.execution.datasources.json
 
 import org.apache.spark.sql.{QueryTest, Row}
-import org.apache.spark.sql.catalyst.json.JSONOptions
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.{DoubleType, StringType, StructType}
+import org.apache.spark.sql.types.{StringType, StructType}
 
 /**
- * Test cases for various [[JSONOptions]].
+ * Test cases for various [[org.apache.spark.sql.catalyst.json.JSONOptions]].
  */
 class JsonParsingOptionsSuite extends QueryTest with SharedSparkSession {
   import testImplicits._
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala
index 6c9bd32913178..378b52f9c6c8c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala
@@ -17,7 +17,7 @@
 package org.apache.spark.sql.execution.datasources.orc
 
 import org.apache.spark.SparkConf
-import org.apache.spark.sql.{DataFrame, Row}
+import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.datasources.SchemaPruningSuite
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCommitterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCommitterSuite.scala
index 4b2437803d645..7f408dbba5099 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCommitterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCommitterSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.execution.datasources.parquet
 
-import java.io.FileNotFoundException
-
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hadoop.mapreduce.{JobContext, TaskAttemptContext}
@@ -149,7 +147,7 @@ private object MarkingFileOutput {
    * @param outputPath destination directory
    * @param conf configuration to create the FS with
    * @return the status of the marker
-   * @throws FileNotFoundException if the marker is absent
+   * @throws java.io.FileNotFoundException if the marker is absent
    */
   def checkMarker(outputPath: Path, conf: Configuration): FileStatus = {
     outputPath.getFileSystem(conf).getFileStatus(new Path(outputPath, "marker"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
index 34bdef7bdb402..d13b3e58a30ff 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.datasources.parquet
 
 import java.nio.file.{Files, Paths, StandardCopyOption}
 import java.sql.{Date, Timestamp}
-import java.time._
 import java.util.Locale
 
 import scala.collection.JavaConverters._
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala
index 8c4eedfde76cd..8c5f7bed7c50d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala
@@ -21,7 +21,7 @@ import java.io.File
 import java.time.ZoneOffset
 
 import org.apache.commons.io.FileUtils
-import org.apache.hadoop.fs.{FileSystem, Path, PathFilter}
+import org.apache.hadoop.fs.{Path, PathFilter}
 import org.apache.parquet.format.converter.ParquetMetadataConverter.NO_FILTER
 import org.apache.parquet.hadoop.ParquetFileReader
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
index accd04592bec5..5c41614c45b6f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
@@ -23,8 +23,6 @@ import java.sql.{Date, Timestamp}
 import java.time.{ZoneId, ZoneOffset}
 import java.util.{Calendar, Locale}
 
-import scala.collection.mutable.ArrayBuffer
-
 import com.google.common.io.Files
 import org.apache.hadoop.fs.Path
 import org.apache.parquet.hadoop.ParquetOutputFormat
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
index 7990b1c27437a..e97c6cd29709c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
@@ -23,7 +23,7 @@ import scala.reflect.runtime.universe.TypeTag
 import org.apache.parquet.io.ParquetDecodingException
 import org.apache.parquet.schema.{MessageType, MessageTypeParser}
 
-import org.apache.spark.{SparkConf, SparkException}
+import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.execution.QueryExecutionException
 import org.apache.spark.sql.execution.datasources.SchemaColumnConvertNotSupportedException
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
index c3bcf86c1ed27..1a4f08418f8d3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
@@ -29,7 +29,7 @@ import org.scalatest.BeforeAndAfter
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{NamespaceAlreadyExistsException, NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, NamespaceChange, SupportsNamespaces, TableCatalog, TableChange, V1Table}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, NamespaceChange, TableCatalog, TableChange, V1Table}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{DoubleType, IntegerType, LongType, StringType, StructField, StructType, TimestampType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -46,7 +46,7 @@ abstract class V2SessionCatalogBaseSuite extends SharedSparkSession with BeforeA
   val testIdent: Identifier = Identifier.of(testNs, "test_table")
 
   def newCatalog(): V2SessionCatalog = {
-    val newCatalog = new V2SessionCatalog(spark.sessionState.catalog, spark.sessionState.conf)
+    val newCatalog = new V2SessionCatalog(spark.sessionState.catalog)
     newCatalog.initialize("test", CaseInsensitiveStringMap.empty())
     newCatalog
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala
index c53617b40e09d..622d69e188821 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala
@@ -22,7 +22,6 @@ import java.lang.{Long => JLong}
 import java.net.URI
 import java.nio.charset.StandardCharsets.UTF_8
 import java.util.concurrent.ConcurrentHashMap
-import java.util.concurrent.atomic.AtomicLong
 
 import scala.util.Random
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala
index 3ead91fcf712a..014840d758c0c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala
@@ -24,7 +24,7 @@ import org.scalatest.BeforeAndAfter
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.streaming.sources._
-import org.apache.spark.sql.streaming.{OutputMode, StreamTest}
+import org.apache.spark.sql.streaming.StreamTest
 import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
 import org.apache.spark.util.Utils
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithStateExecHelperSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithStateExecHelperSuite.scala
index dec30fd01f7e2..ea6fd8ab312c9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithStateExecHelperSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithStateExecHelperSuite.scala
@@ -23,7 +23,6 @@ import org.apache.spark.sql.Encoder
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.execution.streaming.GroupStateImpl._
-import org.apache.spark.sql.streaming.FlatMapGroupsWithStateSuite._
 import org.apache.spark.sql.streaming.StreamTest
 import org.apache.spark.sql.types._
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
index 298820349b683..6eb070138c3b8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalog.{Column, Database, Function, Table}
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, ScalaReflection, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog._
-import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionInfo}
+import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.plans.logical.Range
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.StructType
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
index 77a5d12cd8c95..580e7df6ef63e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
@@ -19,8 +19,6 @@ package org.apache.spark.sql.internal
 
 import java.util.TimeZone
 
-import scala.language.reflectiveCalls
-
 import org.apache.hadoop.fs.Path
 import org.apache.log4j.Level
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
index 4832386e553db..167e87dd3d5cb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
-import org.apache.spark.sql.execution.{DataSourceScanExec, FileSourceScanExec, SortExec, SparkPlan}
+import org.apache.spark.sql.execution.{FileSourceScanExec, SortExec, SparkPlan}
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec
 import org.apache.spark.sql.execution.datasources.BucketingUtils
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala
index 1fdd3be88f782..179cdeb976391 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala
@@ -22,7 +22,6 @@ import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
 import org.apache.spark.sql.execution.FileSourceScanExec
 import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, DisableAdaptiveExecutionSuite, EnableAdaptiveExecutionSuite}
-import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index 4686a0c69de63..aaf8765c04425 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -756,6 +756,47 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
     }
   }
 
+  test("SPARK-33354: Throw exceptions on inserting invalid cast with ANSI casting policy") {
+    withSQLConf(
+      SQLConf.STORE_ASSIGNMENT_POLICY.key -> SQLConf.StoreAssignmentPolicy.ANSI.toString) {
+      withTable("t") {
+        sql("CREATE TABLE t(i int, t timestamp) USING parquet")
+        val msg = intercept[AnalysisException] {
+          sql("INSERT INTO t VALUES (TIMESTAMP('2010-09-02 14:10:10'), 1)")
+        }.getMessage
+        assert(msg.contains("Cannot safely cast 'i': timestamp to int"))
+        assert(msg.contains("Cannot safely cast 't': int to timestamp"))
+      }
+
+      withTable("t") {
+        sql("CREATE TABLE t(i int, d date) USING parquet")
+        val msg = intercept[AnalysisException] {
+          sql("INSERT INTO t VALUES (date('2010-09-02'), 1)")
+        }.getMessage
+        assert(msg.contains("Cannot safely cast 'i': date to int"))
+        assert(msg.contains("Cannot safely cast 'd': int to date"))
+      }
+
+      withTable("t") {
+        sql("CREATE TABLE t(b boolean, t timestamp) USING parquet")
+        val msg = intercept[AnalysisException] {
+          sql("INSERT INTO t VALUES (TIMESTAMP('2010-09-02 14:10:10'), true)")
+        }.getMessage
+        assert(msg.contains("Cannot safely cast 'b': timestamp to boolean"))
+        assert(msg.contains("Cannot safely cast 't': boolean to timestamp"))
+      }
+
+      withTable("t") {
+        sql("CREATE TABLE t(b boolean, d date) USING parquet")
+        val msg = intercept[AnalysisException] {
+          sql("INSERT INTO t VALUES (date('2010-09-02'), true)")
+        }.getMessage
+        assert(msg.contains("Cannot safely cast 'b': date to boolean"))
+        assert(msg.contains("Cannot safely cast 'd': boolean to date"))
+      }
+    }
+  }
+
   test("SPARK-30844: static partition should also follow StoreAssignmentPolicy") {
     SQLConf.StoreAssignmentPolicy.values.foreach { policy =>
       withSQLConf(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala
index 9b26a5659df49..48d717daf00d4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala
@@ -19,8 +19,6 @@ package org.apache.spark.sql.sources
 
 import java.net.URI
 
-import org.apache.hadoop.fs.Path
-
 import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession, SQLContext}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogUtils
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
index f97c9386f9488..788be539fe073 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
@@ -21,7 +21,6 @@ import java.io.File
 import java.sql.Date
 
 import org.apache.commons.io.FileUtils
-import org.scalatest.BeforeAndAfterAll
 import org.scalatest.exceptions.TestFailedException
 
 import org.apache.spark.SparkException
@@ -34,7 +33,7 @@ import org.apache.spark.sql.catalyst.plans.physical.UnknownPartitioning
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.execution.RDDScanExec
 import org.apache.spark.sql.execution.streaming._
-import org.apache.spark.sql.execution.streaming.state.{FlatMapGroupsWithStateExecHelper, MemoryStateStore, StateStore, StateStoreId, StateStoreMetrics, UnsafeRowPair}
+import org.apache.spark.sql.execution.streaming.state.{FlatMapGroupsWithStateExecHelper, MemoryStateStore, StateStore}
 import org.apache.spark.sql.functions.timestamp_seconds
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.util.StreamManualClock
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
index 8797e5ad64149..e64d5f6f3587e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -1134,7 +1134,7 @@ class StreamSuite extends StreamTest {
     verifyLocalLimit(inputDF.toDF("value").join(staticDF, "value"), expectStreamingLimit = false)
 
     verifyLocalLimit(
-      inputDF.groupBy().count().limit(1),
+      inputDF.groupBy("value").count().limit(1),
       expectStreamingLimit = false,
       outputMode = OutputMode.Complete())
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index 7a2e29f1258ae..624b630401f47 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.streaming
 
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
-import scala.language.experimental.macros
 import scala.reflect.ClassTag
 import scala.util.Random
 import scala.util.control.NonFatal
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
index 4a57cc27b1d59..0524e29662014 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
@@ -20,8 +20,6 @@ package org.apache.spark.sql.streaming
 import java.io.File
 import java.util.{Locale, TimeZone}
 
-import scala.collection.mutable
-
 import org.apache.commons.io.FileUtils
 import org.scalatest.Assertions
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala
index e1505acf3ecda..ac9cd1a12d06f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala
@@ -17,13 +17,9 @@
 
 package org.apache.spark.sql.streaming
 
-import org.scalatest.BeforeAndAfterAll
-
-import org.apache.spark.sql.{DataFrame, Row}
-import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, HashPartitioning, SinglePartition}
+import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
-import org.apache.spark.sql.execution.streaming.{MemoryStream, StreamingDeduplicateExec}
-import org.apache.spark.sql.execution.streaming.state.StateStore
+import org.apache.spark.sql.execution.streaming.MemoryStream
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
index b2bb00b704a69..a25616af360b1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
@@ -26,19 +26,11 @@ import scala.util.Random
 import org.apache.commons.io.FileUtils
 import org.scalatest.BeforeAndAfter
 
-import org.apache.spark.SparkContext
 import org.apache.spark.scheduler.ExecutorCacheTaskLocation
-import org.apache.spark.sql.{AnalysisException, DataFrame, Row, SparkSession}
-import org.apache.spark.sql.catalyst.analysis.StreamingJoinHelper
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, AttributeSet, Literal}
-import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, Filter}
-import org.apache.spark.sql.catalyst.trees.TreeNode
-import org.apache.spark.sql.execution.{FileSourceScanExec, LogicalRDD}
-import org.apache.spark.sql.execution.datasources.LogicalRelation
+import org.apache.spark.sql.{DataFrame, Row, SparkSession}
 import org.apache.spark.sql.execution.streaming.{MemoryStream, StatefulOperatorStateInfo, StreamingSymmetricHashJoinExec, StreamingSymmetricHashJoinHelper}
 import org.apache.spark.sql.execution.streaming.state.{StateStore, StateStoreProviderId}
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
 abstract class StreamingJoinSuite
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
index 0d17f2e0bc7fb..02f91399fce1c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
@@ -22,7 +22,6 @@ import java.sql.Timestamp
 import org.apache.spark.{SparkContext, SparkException}
 import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskStart}
 import org.apache.spark.sql._
-import org.apache.spark.sql.execution.datasources.v2.ContinuousScanExec
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous._
 import org.apache.spark.sql.execution.streaming.sources.ContinuousMemoryStream
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
index 8d39704c61d4e..bdc714d49fcc9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
@@ -89,8 +89,6 @@ class DefaultSource extends StreamSourceProvider with StreamSinkProvider {
       override def getOffset: Option[Offset] = Some(new LongOffset(0))
 
       override def getBatch(start: Option[Offset], end: Offset): DataFrame = {
-        import spark.implicits._
-
         spark.internalCreateDataFrame(spark.sparkContext.emptyRDD, schema, isStreaming = true)
       }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/GenericFunSpecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/GenericFunSpecSuite.scala
index 1b6724054a3ad..d15e5c42732d1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/GenericFunSpecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/GenericFunSpecSuite.scala
@@ -19,8 +19,6 @@ package org.apache.spark.sql.test
 
 import org.scalatest.funspec.AnyFunSpec
 
-import org.apache.spark.sql.Dataset
-
 /**
  * The purpose of this suite is to make sure that generic FunSpec-based scala
  * tests work with a shared spark session
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala
index e58357a415545..45cfa86ba9343 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala
@@ -17,10 +17,8 @@
 
 package org.apache.spark.sql.hive.thriftserver
 
-import java.util.UUID
 import java.util.regex.Pattern
 
-import org.apache.commons.lang3.exception.ExceptionUtils
 import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType
 import org.apache.hive.service.cli._
 import org.apache.hive.service.cli.operation.GetSchemasOperation
@@ -29,7 +27,6 @@ import org.apache.hive.service.cli.session.HiveSession
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SQLContext
-import org.apache.spark.util.{Utils => SparkUtils}
 
 /**
  * Spark's own GetSchemasOperation
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala
index bccad865be27a..bddf5eb82012f 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala
@@ -30,7 +30,6 @@ import org.apache.hive.service.cli.session.HiveSession
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.catalyst.catalog.CatalogTableType._
-import org.apache.spark.sql.hive.HiveUtils
 
 /**
  * Spark's own GetTablesOperation
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
index c39d2ecdd7923..df0fa514ccff3 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
@@ -24,7 +24,6 @@ import javax.security.auth.login.LoginException
 import scala.collection.JavaConverters._
 import scala.util.control.NonFatal
 
-import org.apache.commons.logging.Log
 import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 import org.apache.hadoop.hive.shims.Utils
@@ -37,7 +36,6 @@ import org.apache.hive.service.server.HiveServer2
 import org.slf4j.Logger
 
 import org.apache.spark.sql.SQLContext
-import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._
 
 private[hive] class SparkSQLCLIService(hiveServer: HiveServer2, sqlContext: SQLContext)
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
index e4559e69e7585..856edede0b85f 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
@@ -17,11 +17,7 @@
 
 package org.apache.spark.sql.hive.thriftserver
 
-import java.util.concurrent.Executors
-
-import org.apache.commons.logging.Log
 import org.apache.hadoop.hive.conf.HiveConf
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 import org.apache.hive.service.cli.SessionHandle
 import org.apache.hive.service.cli.session.SessionManager
 import org.apache.hive.service.rpc.thrift.TProtocolVersion
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala
index 8efbdb30c605c..54a40e3990f09 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.hive.thriftserver.ui
 
 import java.net.URLEncoder
 import java.nio.charset.StandardCharsets.UTF_8
-import java.util.Calendar
 import javax.servlet.http.HttpServletRequest
 
 import scala.xml.Node
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index f5ce21f2af335..d39b94503fe40 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -27,7 +27,7 @@ import scala.concurrent.Promise
 import scala.concurrent.duration._
 
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
-import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach}
+import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.internal.Logging
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
index be42497113469..4a87be5f61195 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
@@ -23,7 +23,6 @@ import java.util.{Locale, MissingFormatArgumentException}
 
 import scala.util.control.NonFatal
 
-import org.apache.commons.io.FileUtils
 import org.apache.commons.lang3.exception.ExceptionUtils
 
 import org.apache.spark.SparkException
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index a685549290f0e..d9b6bb43c2b47 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -22,7 +22,6 @@ import java.io.File
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
-import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index f01a03996821a..907bb86ad0c1c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -41,7 +41,6 @@ import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
-import org.apache.spark.sql.connector.catalog.TableCatalog
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.{PartitioningUtils, SourceOptions}
 import org.apache.spark.sql.hive.client.HiveClient
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 8a248a251820f..f60bad180a710 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -34,7 +34,6 @@ import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions.{Cast, Expression}
 import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.hive.HiveShim.HiveFunctionWrapper
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DecimalType, DoubleType}
 import org.apache.spark.util.Utils
 
@@ -44,7 +43,6 @@ private[sql] class HiveSessionCatalog(
     globalTempViewManagerBuilder: () => GlobalTempViewManager,
     val metastoreCatalog: HiveMetastoreCatalog,
     functionRegistry: FunctionRegistry,
-    conf: SQLConf,
     hadoopConf: Configuration,
     parser: ParserInterface,
     functionResourceLoader: FunctionResourceLoader)
@@ -52,7 +50,6 @@ private[sql] class HiveSessionCatalog(
       externalCatalogBuilder,
       globalTempViewManagerBuilder,
       functionRegistry,
-      conf,
       hadoopConf,
       parser,
       functionResourceLoader) {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
index f79aaa464de81..b30492802495f 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -62,7 +62,6 @@ class HiveSessionStateBuilder(
       () => session.sharedState.globalTempViewManager,
       new HiveMetastoreCatalog(session),
       functionRegistry,
-      conf,
       SessionState.newHadoopConf(session.sparkContext.hadoopConfiguration, conf),
       sqlParser,
       resourceLoader)
@@ -73,7 +72,7 @@ class HiveSessionStateBuilder(
   /**
    * A logical query plan `Analyzer` with rules specific to Hive.
    */
-  override protected def analyzer: Analyzer = new Analyzer(catalogManager, conf) {
+  override protected def analyzer: Analyzer = new Analyzer(catalogManager) {
     override val extendedResolutionRules: Seq[Rule[LogicalPlan]] =
       new ResolveHiveSerdeTable(session) +:
         new FindDataSourceTable(session) +:
@@ -98,7 +97,7 @@ class HiveSessionStateBuilder(
       PreWriteCheck +:
         PreReadCheck +:
         TableCapabilityCheck +:
-        CommandCheck(conf) +:
+        CommandCheck +:
         customCheckRules
   }
 
@@ -109,7 +108,7 @@ class HiveSessionStateBuilder(
    * Planner that takes into account Hive-specific strategies.
    */
   override protected def planner: SparkPlanner = {
-    new SparkPlanner(session, conf, experimentalMethods) with HiveStrategies {
+    new SparkPlanner(session, experimentalMethods) with HiveStrategies {
       override val sparkSession: SparkSession = session
 
       override def extraPlanningStrategies: Seq[Strategy] =
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index 399f8911ef679..46a8e9660a207 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -24,7 +24,6 @@ import java.util.concurrent.TimeUnit
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.HashMap
-import scala.language.implicitConversions
 
 import org.apache.commons.lang3.{JavaVersion, SystemUtils}
 import org.apache.hadoop.conf.Configuration
@@ -96,17 +95,18 @@ private[spark] object HiveUtils extends Logging {
     .createWithDefault("builtin")
 
   val HIVE_METASTORE_JARS_PATH = buildStaticConf("spark.sql.hive.metastore.jars.path")
-    .doc(s"Comma separated URL of Hive jars, support both local and remote paths," +
-      s"Such as: " +
-      s" 1. file://path/to/jar/xxx.jar\n" +
-      s" 2. hdfs://nameservice/path/to/jar/xxx.jar\n" +
-      s" 3. /path/to/jar/ (path without URI scheme follow conf `fs.defaultFS`'s URI schema)\n" +
-      s" 4. [http/https/ftp]://path/to/jar/xxx.jar\n" +
-      s"Notice: `http/https/ftp` doesn't support wildcard, but other URLs support" +
-      s"nested path wildcard, Such as: " +
-      s" 1. file://path/to/jar/*, file://path/to/jar/*/*\n" +
-      s" 2. hdfs://nameservice/path/to/jar/*, hdfs://nameservice/path/to/jar/*/*\n" +
-      s"When ${HIVE_METASTORE_JARS.key} is set to `path`, we will use Hive jars configured by this")
+    .doc(s"""
+      | Comma-separated paths of the jars that used to instantiate the HiveMetastoreClient.
+      | This configuration is useful only when `{$HIVE_METASTORE_JARS.key}` is set as `path`.
+      | The paths can be any of the following format:
+      | 1. file://path/to/jar/foo.jar
+      | 2. hdfs://nameservice/path/to/jar/foo.jar
+      | 3. /path/to/jar/ (path without URI scheme follow conf `fs.defaultFS`'s URI schema)
+      | 4. [http/https/ftp]://path/to/jar/foo.jar
+      | Note that 1, 2, and 3 support wildcard. For example:
+      | 1. file://path/to/jar/*,file://path2/to/jar/*/*.jar
+      | 2. hdfs://nameservice/path/to/jar/*,hdfs://nameservice2/path/to/jar/*/*.jar
+      """.stripMargin)
     .version("3.1.0")
     .stringConf
     .toSequence
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index 3e0d44160c8a1..eb9ce877fc8d2 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -39,7 +39,7 @@ import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.{EmptyRDD, HadoopRDD, NewHadoopRDD, RDD, UnionRDD}
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.{InternalRow, SQLConfHelper}
 import org.apache.spark.sql.catalyst.analysis.CastSupport
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
@@ -68,7 +68,7 @@ class HadoopTableReader(
     @transient private val tableDesc: TableDesc,
     @transient private val sparkSession: SparkSession,
     hadoopConf: Configuration)
-  extends TableReader with CastSupport with Logging {
+  extends TableReader with CastSupport with SQLConfHelper with Logging {
 
   // Hadoop honors "mapreduce.job.maps" as hint,
   // but will ignore when mapreduce.jobtracker.address is "local".
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index a78e1cebc588c..9bc99b08c2cc8 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -57,7 +57,6 @@ import org.apache.spark.sql.connector.catalog.SupportsNamespaces._
 import org.apache.spark.sql.execution.QueryExecutionException
 import org.apache.spark.sql.hive.HiveExternalCatalog
 import org.apache.spark.sql.hive.HiveExternalCatalog.{DATASOURCE_SCHEMA, DATASOURCE_SCHEMA_NUMPARTS, DATASOURCE_SCHEMA_PART_PREFIX}
-import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.util.{CircularBuffer, Utils}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index bf67ae6bfe92e..d989f0154ea95 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -41,11 +41,11 @@ import org.apache.hadoop.hive.serde.serdeConstants
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.analysis.NoSuchPermanentFunctionException
 import org.apache.spark.sql.catalyst.catalog.{CatalogFunction, CatalogTablePartition, CatalogUtils, FunctionResource, FunctionResourceType}
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.util.TypeUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{AtomicType, IntegralType, StringType}
 import org.apache.spark.unsafe.types.UTF8String
@@ -724,12 +724,13 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
     }
 
     val useAdvanced = SQLConf.get.advancedPartitionPredicatePushdownEnabled
+    val inSetThreshold = SQLConf.get.metastorePartitionPruningInSetThreshold
 
     object ExtractAttribute {
       def unapply(expr: Expression): Option[Attribute] = {
         expr match {
           case attr: Attribute => Some(attr)
-          case Cast(child @ AtomicType(), dt: AtomicType, _)
+          case Cast(child @ IntegralType(), dt: IntegralType, _)
               if Cast.canUpCast(child.dataType.asInstanceOf[AtomicType], dt) => unapply(child)
           case _ => None
         }
@@ -741,6 +742,12 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
           if useAdvanced =>
         Some(convertInToOr(name, values))
 
+      case InSet(child, values) if useAdvanced && values.size > inSetThreshold =>
+        val dataType = child.dataType
+        val sortedValues = values.toSeq.sorted(TypeUtils.getInterpretedOrdering(dataType))
+        convert(And(GreaterThanOrEqual(child, Literal(sortedValues.head, dataType)),
+          LessThanOrEqual(child, Literal(sortedValues.last, dataType))))
+
       case InSet(ExtractAttribute(SupportedAttribute(name)), ExtractableValues(values))
           if useAdvanced =>
         Some(convertInToOr(name, values))
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala
index 4be3cd45454c6..c712a4a2b7c23 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.hive.execution
 
-import java.io.{File, IOException}
+import java.io.IOException
 import java.net.URI
 import java.text.SimpleDateFormat
 import java.util.{Date, Locale, Random}
diff --git a/sql/hive/src/test/resources/data/scripts/test_transform.py b/sql/hive/src/test/resources/data/scripts/test_transform.py
index ac6d11d8b919c..dedb370f6c90e 100755
--- a/sql/hive/src/test/resources/data/scripts/test_transform.py
+++ b/sql/hive/src/test/resources/data/scripts/test_transform.py
@@ -1,3 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
 import sys
 
 delim = sys.argv[1]
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertIntoHiveTableBenchmark.scala b/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertIntoHiveTableBenchmark.scala
index da34c54cb36a2..e71b11e7a3f41 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertIntoHiveTableBenchmark.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertIntoHiveTableBenchmark.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.benchmark
 
 import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.test.TestHive
 
 /**
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
index 270595b0011e9..e413e0ee73cb9 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.hive
 
-import java.net.URI
-
 import org.apache.hadoop.conf.Configuration
 
 import org.apache.spark.SparkConf
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
index 86fc32cd8ca63..b3ea54a7bc931 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
@@ -25,7 +25,6 @@ import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.util.Utils
 
 /**
  * A suite of tests for the Parquet support through the data sources API.
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala
index 54c64a4eeb190..89131a79e59de 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala
@@ -16,9 +16,6 @@
  */
 package org.apache.spark.sql.hive
 
-import scala.collection.JavaConverters._
-import scala.language.implicitConversions
-
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.hive.serde2.ColumnProjectionUtils
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala
index 446923ad23201..3e7c3e6799724 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.{AnalysisException, ShowCreateTableSuite}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.hive.test.TestHiveSingleton
-import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
+import org.apache.spark.sql.internal.HiveSerDe
 
 class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSingleton {
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index 501a877e8b7fb..77d54ed45a5de 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -770,8 +770,6 @@ object SPARK_14244 extends QueryTest {
     val hiveContext = new TestHiveContext(sparkContext)
     spark = hiveContext.sparkSession
 
-    import hiveContext.implicits._
-
     try {
       val window = Window.orderBy("id")
       val df = spark.range(2).select(cume_dist().over(window).as("cdist")).orderBy("cdist")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUserDefinedTypeSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUserDefinedTypeSuite.scala
index ca1af73b038a7..d0af8dc7ae49f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUserDefinedTypeSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUserDefinedTypeSuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.hive
 
 import scala.collection.JavaConverters._
-import scala.util.Random
 
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF
 import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspector, StandardListObjectInspector}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
index 4ad97eaa2b1c8..d8e1e01292820 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
@@ -23,9 +23,8 @@ import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.sql.QueryTest
-import org.apache.spark.sql.execution.HiveResult
 import org.apache.spark.sql.hive.test.TestHiveSingleton
-import org.apache.spark.sql.test.{ExamplePoint, ExamplePointUDT, SQLTestUtils}
+import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.util.ChildFirstURLClassLoader
 
 class HiveUtilsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala
index 1e396553c9c52..483622b16762a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala
@@ -17,12 +17,8 @@
 
 package org.apache.spark.sql.hive
 
-import java.io.File
 import java.sql.Timestamp
 
-import com.google.common.io.Files
-import org.apache.hadoop.fs.FileSystem
-
 import org.apache.spark.internal.config._
 import org.apache.spark.sql._
 import org.apache.spark.sql.hive.test.TestHiveSingleton
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index 7d5a200606356..43d1ba04c561d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionException
 import org.apache.spark.sql.catalyst.catalog.{CatalogColumnStat, CatalogStatistics, HiveTableRelation}
-import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, HistogramBin, HistogramSerializer}
+import org.apache.spark.sql.catalyst.plans.logical.HistogramBin
 import org.apache.spark.sql.catalyst.util.{DateTimeUtils, StringUtils}
 import org.apache.spark.sql.execution.command.{AnalyzeColumnCommand, CommandUtils, DDLUtils}
 import org.apache.spark.sql.execution.datasources.LogicalRelation
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
index 2a4efd0cce6e0..12b409e487061 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
@@ -114,5 +114,33 @@ class FiltersSuite extends SparkFunSuite with Logging with PlanTest {
     }
   }
 
+  test("SPARK-33416: Avoid Hive metastore stack overflow when InSet predicate have many values") {
+    def checkConverted(inSet: InSet, result: String): Unit = {
+      assert(shim.convertFilters(testTable, inSet :: Nil) == result)
+    }
+
+    withSQLConf(SQLConf.HIVE_METASTORE_PARTITION_PRUNING_INSET_THRESHOLD.key -> "15") {
+      checkConverted(
+        InSet(a("intcol", IntegerType),
+          Range(1, 20).map(s => Literal(s).eval(EmptyRow)).toSet),
+        "(intcol >= 1 and intcol <= 19)")
+
+      checkConverted(
+        InSet(a("stringcol", StringType),
+          Range(1, 20).map(s => Literal(s.toString).eval(EmptyRow)).toSet),
+        "(stringcol >= \"1\" and stringcol <= \"9\")")
+
+      checkConverted(
+        InSet(a("intcol", IntegerType).cast(LongType),
+          Range(1, 20).map(s => Literal(s.toLong).eval(EmptyRow)).toSet),
+        "(intcol >= 1 and intcol <= 19)")
+
+      checkConverted(
+        InSet(a("doublecol", DoubleType),
+          Range(1, 20).map(s => Literal(s.toDouble).eval(EmptyRow)).toSet),
+        "")
+    }
+  }
+
   private def a(name: String, dataType: DataType) = AttributeReference(name, dataType)()
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientUserNameSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientUserNameSuite.scala
index 77956f4fe69da..b94d517e89e30 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientUserNameSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientUserNameSuite.scala
@@ -21,7 +21,6 @@ import java.security.PrivilegedExceptionAction
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.security.UserGroupInformation
-import org.scalatest.{BeforeAndAfterAll, PrivateMethodTester}
 
 import org.apache.spark.util.Utils
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
index daa785bf110c5..81186909bb167 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.types.{BooleanType, IntegerType, LongType, StructType}
+import org.apache.spark.sql.types.{BooleanType, IntegerType, LongType, StringType, StructType}
 import org.apache.spark.util.Utils
 
 class HivePartitionFilteringSuite(version: String)
@@ -290,6 +290,13 @@ class HivePartitionFilteringSuite(version: String)
       (20170101 to 20170103, 0 to 4, Seq("ab", "bb")) :: Nil)
   }
 
+  test("getPartitionsByFilter: chunk in ('ab', 'ba') and ((cast(ds as string)>'20170102')") {
+    val day = (20170101 to 20170103, 0 to 4, Seq("ab", "ba"))
+    testMetastorePartitionFiltering(
+      attr("chunk").in("ab", "ba") && (attr("ds").cast(StringType) > "20170102"),
+      day :: Nil)
+  }
+
   private def testMetastorePartitionFiltering(
       filterExpr: Expression,
       expectedDs: Seq[Int],
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
index a78fd506b752e..d3398842afb21 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
@@ -22,9 +22,8 @@ import java.io.File
 import com.google.common.io.Files
 import org.apache.hadoop.fs.{FileContext, FsConstants, Path}
 
-import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
 import org.apache.spark.sql.execution.command.LoadDataCommand
 import org.apache.spark.sql.hive.test.TestHiveSingleton
@@ -33,7 +32,6 @@ import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types.StructType
 
 class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
-  import testImplicits._
 
   protected override def beforeAll(): Unit = {
     super.beforeAll()
@@ -58,27 +56,11 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
         |STORED AS PARQUET
         |TBLPROPERTIES('prop1Key'="prop1Val", '`prop2Key`'="prop2Val")
       """.stripMargin)
-    sql("CREATE TABLE parquet_tab3(col1 int, `col 2` int) USING hive")
     sql("CREATE TABLE parquet_tab4 (price int, qty int) partitioned by (year int, month int)")
     sql("INSERT INTO parquet_tab4 PARTITION(year = 2015, month = 1) SELECT 1, 1")
     sql("INSERT INTO parquet_tab4 PARTITION(year = 2015, month = 2) SELECT 2, 2")
     sql("INSERT INTO parquet_tab4 PARTITION(year = 2016, month = 2) SELECT 3, 3")
     sql("INSERT INTO parquet_tab4 PARTITION(year = 2016, month = 3) SELECT 3, 3")
-    sql(
-      """
-        |CREATE TABLE parquet_tab5 (price int, qty int)
-        |PARTITIONED BY (year int, month int, hour int, minute int, sec int, extra int)
-      """.stripMargin)
-    sql(
-      """
-        |INSERT INTO parquet_tab5
-        |PARTITION(year = 2016, month = 3, hour = 10, minute = 10, sec = 10, extra = 1) SELECT 3, 3
-      """.stripMargin)
-    sql(
-      """
-        |INSERT INTO parquet_tab5
-        |PARTITION(year = 2016, month = 4, hour = 10, minute = 10, sec = 10, extra = 1) SELECT 3, 3
-      """.stripMargin)
     sql("CREATE VIEW parquet_view1 as select * from parquet_tab4")
   }
 
@@ -86,10 +68,8 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
     try {
       sql("DROP TABLE IF EXISTS parquet_tab1")
       sql("DROP TABLE IF EXISTS parquet_tab2")
-      sql("DROP TABLE IF EXISTS parquet_tab3")
       sql("DROP VIEW IF EXISTS parquet_view1")
       sql("DROP TABLE IF EXISTS parquet_tab4")
-      sql("DROP TABLE IF EXISTS parquet_tab5")
     } finally {
       super.afterAll()
     }
@@ -393,88 +373,6 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
     }
   }
 
-
-  test("show partitions - show everything") {
-    checkAnswer(
-      sql("show partitions parquet_tab4"),
-      Row("year=2015/month=1") ::
-        Row("year=2015/month=2") ::
-        Row("year=2016/month=2") ::
-        Row("year=2016/month=3") :: Nil)
-
-    checkAnswer(
-      sql("show partitions default.parquet_tab4"),
-      Row("year=2015/month=1") ::
-        Row("year=2015/month=2") ::
-        Row("year=2016/month=2") ::
-        Row("year=2016/month=3") :: Nil)
-  }
-
-  test("show partitions - show everything more than 5 part keys") {
-    checkAnswer(
-      sql("show partitions parquet_tab5"),
-      Row("year=2016/month=3/hour=10/minute=10/sec=10/extra=1") ::
-        Row("year=2016/month=4/hour=10/minute=10/sec=10/extra=1") :: Nil)
-  }
-
-  test("show partitions - filter") {
-    checkAnswer(
-      sql("show partitions default.parquet_tab4 PARTITION(year=2015)"),
-      Row("year=2015/month=1") ::
-        Row("year=2015/month=2") :: Nil)
-
-    checkAnswer(
-      sql("show partitions default.parquet_tab4 PARTITION(year=2015, month=1)"),
-      Row("year=2015/month=1") :: Nil)
-
-    checkAnswer(
-      sql("show partitions default.parquet_tab4 PARTITION(month=2)"),
-      Row("year=2015/month=2") ::
-        Row("year=2016/month=2") :: Nil)
-  }
-
-  test("show partitions - empty row") {
-    withTempView("parquet_temp") {
-      sql(
-        """
-         |CREATE TEMPORARY VIEW parquet_temp (c1 INT, c2 STRING)
-         |USING org.apache.spark.sql.parquet.DefaultSource
-        """.stripMargin)
-      // An empty sequence of row is returned for session temporary table.
-      intercept[NoSuchTableException] {
-        sql("SHOW PARTITIONS parquet_temp")
-      }
-
-      val message1 = intercept[AnalysisException] {
-        sql("SHOW PARTITIONS parquet_tab3")
-      }.getMessage
-      assert(message1.contains("not allowed on a table that is not partitioned"))
-
-      val message2 = intercept[AnalysisException] {
-        sql("SHOW PARTITIONS parquet_tab4 PARTITION(abcd=2015, xyz=1)")
-      }.getMessage
-      assert(message2.contains("Non-partitioning column(s) [abcd, xyz] are specified"))
-
-      val message3 = intercept[AnalysisException] {
-        sql("SHOW PARTITIONS parquet_view1")
-      }.getMessage
-      assert(message3.contains("is not allowed on a view"))
-    }
-  }
-
-  test("show partitions - datasource") {
-    withTable("part_datasrc") {
-      val df = (1 to 3).map(i => (i, s"val_$i", i * 2)).toDF("a", "b", "c")
-      df.write
-        .partitionBy("a")
-        .format("parquet")
-        .mode(SaveMode.Overwrite)
-        .saveAsTable("part_datasrc")
-
-      assert(sql("SHOW PARTITIONS part_datasrc").count() == 3)
-    }
-  }
-
   test("SPARK-25918: LOAD DATA LOCAL INPATH should handle a relative path") {
     val localFS = FileContext.getLocalFSFileContext()
     val workingDir = localFS.getWorkingDirectory
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index cea7c5686054a..1cabf6033e8d8 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -34,7 +34,6 @@ import org.apache.spark.sql.catalyst.expressions.Cast
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.Project
 import org.apache.spark.sql.execution.joins.BroadcastNestedLoopJoinExec
-import org.apache.spark.sql.hive._
 import org.apache.spark.sql.hive.test.{HiveTestJars, TestHive}
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.internal.SQLConf
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala
index da7dfd05f33d6..8aae7a1545b1a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala
@@ -17,11 +17,11 @@
 
 package org.apache.spark.sql.hive.execution
 
-import org.apache.spark.sql.{AnalysisException, Row, SaveMode, SparkSession}
+import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
 import org.apache.spark.sql.execution.SQLViewSuite
-import org.apache.spark.sql.hive.test.{TestHive, TestHiveSingleton}
+import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.types.{NullType, StructType}
 
 /**
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
index a8b10fc94d880..1018ae5b68895 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
@@ -17,10 +17,8 @@
 
 package org.apache.spark.sql.hive.execution
 
-import java.io.File
 import java.sql.Timestamp
 
-import org.apache.commons.io.FileUtils
 import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
 import org.scalatest.exceptions.TestFailedException
 
@@ -28,7 +26,6 @@ import org.apache.spark.{SparkException, TestUtils}
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression}
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
index ba6dbb01d5901..4a50621d89d4e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
@@ -21,7 +21,6 @@ import java.io.{File, IOException}
 
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.functions.col
-import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.test.{TestHive, TestHiveSingleton}
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.hive.test.TestHive.implicits._
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
index dd797b39e0939..9e8046b9ef544 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
@@ -34,7 +34,6 @@ import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.catalyst.plans.logical.Project
 import org.apache.spark.sql.execution.command.FunctionsCommand
 import org.apache.spark.sql.functions.max
-import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala
index 993a730524f6f..8e35cd034311d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.hive.execution
 
 import org.apache.spark.sql.QueryTest
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, BinaryOperator, EqualTo, Expression, IsNotNull, Literal}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, BinaryOperator, Expression, IsNotNull, Literal}
 import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan}
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.test.SQLTestUtils
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index a69a949e3a3a2..712f81d98753e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -45,7 +45,6 @@ import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types._
 import org.apache.spark.tags.SlowHiveTest
-import org.apache.spark.util.Utils
 
 case class Nested1(f1: Nested2)
 case class Nested2(f2: Nested3)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/UDAQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/UDAQuerySuite.scala
index 1f1a5568b0201..50f13efccc915 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/UDAQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/UDAQuerySuite.scala
@@ -17,23 +17,15 @@
 
 package org.apache.spark.sql.hive.execution
 
-import java.lang.{Double => jlDouble, Integer => jlInt, Long => jlLong}
-
-import scala.collection.JavaConverters._
-import scala.util.Random
-
-import test.org.apache.spark.sql.MyDoubleAvg
-import test.org.apache.spark.sql.MyDoubleSum
+import java.lang.{Double => jlDouble, Long => jlLong}
 
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
-import org.apache.spark.sql.catalyst.expressions.UnsafeRow
-import org.apache.spark.sql.expressions.{Aggregator}
+import org.apache.spark.sql.expressions.Aggregator
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.hive.test.TestHiveSingleton
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types._
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala
new file mode 100644
index 0000000000000..a92478faf0e16
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution.command
+
+import org.apache.spark.sql.execution.command.v1
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+
+class ShowPartitionsSuite extends v1.ShowPartitionsSuiteBase with TestHiveSingleton {
+  override def version: String = "Hive V1"
+  override def defaultUsing: String = "USING HIVE"
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 5669cb757a678..f7c13ea047da7 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -23,7 +23,6 @@ import java.util.{Set => JavaSet}
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
-import scala.language.implicitConversions
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
diff --git a/streaming/src/main/scala/org/apache/spark/status/api/v1/streaming/ApiStreamingRootResource.scala b/streaming/src/main/scala/org/apache/spark/status/api/v1/streaming/ApiStreamingRootResource.scala
index a2571b910f615..99d59e4a1447a 100644
--- a/streaming/src/main/scala/org/apache/spark/status/api/v1/streaming/ApiStreamingRootResource.scala
+++ b/streaming/src/main/scala/org/apache/spark/status/api/v1/streaming/ApiStreamingRootResource.scala
@@ -23,9 +23,7 @@ import javax.ws.rs.core.MediaType
 
 import org.apache.spark.status.api.v1.NotFoundException
 import org.apache.spark.streaming.Time
-import org.apache.spark.streaming.ui.StreamingJobProgressListener
 import org.apache.spark.streaming.ui.StreamingJobProgressListener._
-import org.apache.spark.ui.SparkUI
 
 @Produces(Array(MediaType.APPLICATION_JSON))
 private[v1] class ApiStreamingRootResource extends BaseStreamingAppResource {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/State.scala b/streaming/src/main/scala/org/apache/spark/streaming/State.scala
index 734c6ef42696e..c4cd1a9dc336b 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/State.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/State.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.streaming
 
-import scala.language.implicitConversions
-
 import org.apache.spark.annotation.Experimental
 
 /**
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala
index d038021e93e73..4ac1c62822e7a 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala
@@ -21,7 +21,6 @@ import java.nio.ByteBuffer
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
-import scala.language.implicitConversions
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.conf.Configuration
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala
index 6b332206e8f6d..9d4b67bccecaf 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala
@@ -22,7 +22,6 @@ import scala.util.Random
 import org.apache.spark.{SparkConf, SparkEnv}
 import org.apache.spark.rdd.BlockRDD
 import org.apache.spark.storage.{StorageLevel, StreamBlockId}
-import org.apache.spark.streaming.StreamingConf.RECEIVER_WAL_ENABLE_CONF_KEY
 import org.apache.spark.streaming.dstream.ReceiverInputDStream
 import org.apache.spark.streaming.rdd.WriteAheadLogBackedBlockRDD
 import org.apache.spark.streaming.receiver.{BlockManagerBasedStoreResult, Receiver, WriteAheadLogBasedStoreResult}
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
index 55c2950261a07..7ce4343acbdac 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
@@ -17,11 +17,10 @@
 
 package org.apache.spark.streaming
 
-import java.io.{File, IOException, ObjectInputStream}
+import java.io.{IOException, ObjectInputStream}
 import java.util.concurrent.{ConcurrentLinkedQueue, TimeUnit}
 
 import scala.collection.JavaConverters._
-import scala.language.implicitConversions
 import scala.reflect.ClassTag
 
 import org.scalatest.BeforeAndAfterEach
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala
index cd867aa8132bc..31456b0b95b18 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala
@@ -25,7 +25,6 @@ import scala.collection.mutable
 import org.scalatest.BeforeAndAfter
 import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
 import org.scalatest.concurrent.Eventually._
-import org.scalatest.matchers.must.Matchers
 import org.scalatest.matchers.should.Matchers._
 import org.scalatest.time.SpanSugar._
 
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
index 293498ae5c37b..c2b039244d01f 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.streaming.scheduler
 
 import org.mockito.ArgumentMatchers.{any, eq => meq}
 import org.mockito.Mockito.{never, reset, times, verify, when}
-import org.scalatest.{BeforeAndAfterEach, PrivateMethodTester}
+import org.scalatest.PrivateMethodTester
 import org.scalatest.concurrent.Eventually.{eventually, timeout}
 import org.scalatest.time.SpanSugar._
 import org.scalatestplus.mockito.MockitoSugar