apache
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala‎
Lines changed: 20 additions & 6 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala‎
Lines changed: 20 additions & 6 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala‎
Lines changed: 6 additions & 1 deletion b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala‎
Lines changed: 6 additions & 1 deletion b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala‎
Lines changed: 3 additions & 1 deletion b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/commits/.0.crc‎
12 Bytes b/‎sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/commits/.0.crc‎
12 Bytes
diff --git a/‎sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/commits/.1.crc‎
12 Bytes b/‎sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/commits/.1.crc‎
12 Bytes
diff --git a/‎sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/commits/0‎
Lines changed: 2 additions & 0 deletions b/‎sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/commits/0‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/commits/1‎
Lines changed: 2 additions & 0 deletions b/‎sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/commits/1‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/metadata‎
Lines changed: 1 addition & 0 deletions b/‎sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/metadata‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/offsets/.0.crc‎
16 Bytes b/‎sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/offsets/.0.crc‎
16 Bytes
@@ -41,20 +41,34 @@ class StateSchemaCompatibilityChecker(
   fm.mkdirs(schemaFileLocation.getParent)
 
   def check(keySchema: StructType, valueSchema: StructType): Unit = {
+    check(keySchema, valueSchema, ignoreValueSchema = false)
+  }
+
+  def check(keySchema: StructType, valueSchema: StructType, ignoreValueSchema: Boolean): Unit = {
     if (fm.exists(schemaFileLocation)) {
       logDebug(s"Schema file for provider $providerId exists. Comparing with provided schema.")
       val (storedKeySchema, storedValueSchema) = readSchemaFile()
-      if (storedKeySchema.equals(keySchema) && storedValueSchema.equals(valueSchema)) {
+      if (storedKeySchema.equals(keySchema) &&
+        (ignoreValueSchema || storedValueSchema.equals(valueSchema))) {
         // schema is exactly same
       } else if (!schemasCompatible(storedKeySchema, keySchema) ||
-        !schemasCompatible(storedValueSchema, valueSchema)) {
+        (!ignoreValueSchema && !schemasCompatible(storedValueSchema, valueSchema))) {
+        val errorMsgForKeySchema = s"- Provided key schema: $keySchema\n" +
+          s"- Existing key schema: $storedKeySchema\n"
+
+        // If it is requested to skip checking the value schema, we also don't expose the value
+        // schema information to the error message.
+        val errorMsgForValueSchema = if (!ignoreValueSchema) {
+          s"- Provided value schema: $valueSchema\n" +
+            s"- Existing value schema: $storedValueSchema\n"
+        } else {
+          ""
+        }
         val errorMsg = "Provided schema doesn't match to the schema for existing state! " +
           "Please note that Spark allow difference of field name: check count of fields " +
           "and data type of each field.\n" +
-          s"- Provided key schema: $keySchema\n" +
-          s"- Provided value schema: $valueSchema\n" +
-          s"- Existing key schema: $storedKeySchema\n" +
-          s"- Existing value schema: $storedValueSchema\n" +
+          errorMsgForKeySchema +
+          errorMsgForValueSchema +
           s"If you want to force running query without schema validation, please set " +
           s"${SQLConf.STATE_SCHEMA_CHECK_ENABLED.key} to false.\n" +
           "Please note running query with incompatible schema could cause indeterministic" +
 
@@ -511,7 +511,12 @@ object StateStore extends Logging {
           val checker = new StateSchemaCompatibilityChecker(storeProviderId, hadoopConf)
           // regardless of configuration, we check compatibility to at least write schema file
           // if necessary
-          val ret = Try(checker.check(keySchema, valueSchema)).toEither.fold(Some(_), _ => None)
+          // if the format validation for value schema is disabled, we also disable the schema
+          // compatibility checker for value schema as well.
+          val ret = Try(
+            checker.check(keySchema, valueSchema,
+              ignoreValueSchema = !storeConf.formatValidationCheckValue)
+          ).toEither.fold(Some(_), _ => None)
           if (storeConf.stateSchemaCheckEnabled) {
             ret
           } else {
 
@@ -48,7 +48,12 @@ class StateStoreConf(
   /** Whether validate the underlying format or not. */
   val formatValidationEnabled: Boolean = sqlConf.stateStoreFormatValidationEnabled
 
-  /** Whether validate the value format when the format invalidation enabled. */
+  /**
+   * Whether to validate the value side. This config is applied to both validators as below:
+   *
+   * - whether to validate the value format when the format validation is enabled.
+   * - whether to validate the value schema when the state schema check is enabled.
+   */
   val formatValidationCheckValue: Boolean =
     extraOptions.getOrElse(StateStoreConf.FORMAT_VALIDATION_CHECK_VALUE_CONFIG, "true") == "true"
 
 
@@ -783,13 +783,15 @@ case class StreamingDeduplicateExec(
       keyExpressions, getStateInfo, conf) :: Nil
   }
 
+  private val schemaForEmptyRow: StructType = StructType(Array(StructField("__dummy__", NullType)))
+
   override protected def doExecute(): RDD[InternalRow] = {
     metrics // force lazy init at driver
 
     child.execute().mapPartitionsWithStateStore(
       getStateInfo,
       keyExpressions.toStructType,
-      child.output.toStructType,
+      schemaForEmptyRow,
       numColsPrefixKey = 0,
       session.sessionState,
       Some(session.streams.stateStoreCoordinator),
 
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
@@ -0,0 +1 @@
+{"id":"33e8de33-00b8-4b60-8246-df2f433257ff"}
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+{"id":"33e8de33-00b8-4b60-8246-df2f433257ff"}`