Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/user-guide/latest/expressions.md
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ Comet supports using the following aggregate functions within window contexts wi
| ArrayRemove | Yes | |
| ArrayRepeat | No | |
| ArrayUnion | No | Behaves differently than spark. Comet sorts the input arrays before performing the union, while Spark preserves the order of the first array and appends unique elements from the second. |
| ArraysOverlap | No | |
| ArraysOverlap | No | Null handling differs: Comet returns `false` when no common elements exist and null elements are present, while Spark returns `null` (three-valued logic). Example: `arrays_overlap(array(1, null, 3), array(4, 5))` returns `null` in Spark but `false` in Comet. |
| CreateArray | Yes | |
| ElementAt | Yes | Input must be an array. Map inputs are not supported. |
| Flatten | Yes | |
Expand Down
6 changes: 5 additions & 1 deletion spark/src/main/scala/org/apache/comet/serde/arrays.scala
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,11 @@ object CometArrayMin extends CometExpressionSerde[ArrayMin] {

object CometArraysOverlap extends CometExpressionSerde[ArraysOverlap] {

override def getSupportLevel(expr: ArraysOverlap): SupportLevel = Incompatible(None)
override def getSupportLevel(expr: ArraysOverlap): SupportLevel = Incompatible(Some(
"Null handling differs from Spark: DataFusion's array_has_any returns false when no " +
"common elements are found, even if null elements exist. Spark returns null in such " +
"cases following three-valued logic (SQL standard). Example: " +
"arrays_overlap(array(1, null, 3), array(4, 5)) returns null in Spark but false in Comet."))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for writing this. I would probably keep the message short and route to the documentation to help declutter the logs :)


override def convert(
expr: ArraysOverlap,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,43 @@ class CometArrayExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelp
}
}

test("arrays_overlap - null handling behavior verification") {
withSQLConf(CometConf.getExprAllowIncompatConfigKey(classOf[ArraysOverlap]) -> "true") {
withTempDir { dir =>
withTempView("t1") {
val path = new Path(dir.toURI.toString, "test.parquet")
makeParquetFileAllPrimitiveTypes(path, dictionaryEnabled = false, 100)
spark.read.parquet(path.toString).createOrReplaceTempView("t1")

// Test case 1: Common element exists - should return true
checkSparkAnswerAndOperator(sql(
"SELECT arrays_overlap(array(1, 2, 3), array(3, 4, 5)) from t1 limit 1"))

// Test case 2: No common elements, no nulls - should return false
checkSparkAnswerAndOperator(sql(
"SELECT arrays_overlap(array(1, 2), array(3, 4)) from t1 limit 1"))

// Test case 3: No common elements, but null exists - Spark returns null (three-valued logic)
// This is the key incompatibility case documented in issue #3175
checkSparkAnswerAndOperator(sql(
"SELECT arrays_overlap(array(1, null, 3), array(4, 5)) from t1 limit 1"))

// Test case 4: Common element exists even with null - should return true
checkSparkAnswerAndOperator(sql(
"SELECT arrays_overlap(array(1, null, 3), array(1, 4)) from t1 limit 1"))

// Test case 5: Both arrays have null but no common non-null elements
checkSparkAnswerAndOperator(sql(
"SELECT arrays_overlap(array(1, null), array(2, null)) from t1 limit 1"))

// Test case 6: Empty arrays
checkSparkAnswerAndOperator(sql(
"SELECT arrays_overlap(array(), array(1, 2)) from t1 limit 1"))
}
}
}
}

test("array_compact") {
// TODO fix for Spark 4.0.0
assume(!isSpark40Plus)
Expand Down
Loading