Skip to content

Commit

Permalink
[SPARK-49719][SQL] Make UUID and SHUFFLE accept integer seed
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
Make `UUID` and `SHUFFLE` accept integer `seed`

### Why are the changes needed?
In most cases, `seed` accept both int and long, but `UUID` and `SHUFFLE` only accept long seed

```py
In [1]: spark.sql("SELECT RAND(1L), RAND(1), SHUFFLE(array(1, 20, 3, 5), 1L), UUID(1L)").show()
+------------------+------------------+---------------------------+--------------------+
|           rand(1)|           rand(1)|shuffle(array(1, 20, 3, 5))|              uuid()|
+------------------+------------------+---------------------------+--------------------+
|0.6363787615254752|0.6363787615254752|              [20, 1, 3, 5]|1ced31d7-59ef-4bb...|
+------------------+------------------+---------------------------+--------------------+

In [2]: spark.sql("SELECT UUID(1)").show()
...
AnalysisException: [INVALID_PARAMETER_VALUE.LONG] The value of parameter(s) `seed` in `UUID` is invalid: expects a long literal, but got "1". SQLSTATE: 22023; line 1 pos 7
...

In [3]: spark.sql("SELECT SHUFFLE(array(1, 20, 3, 5), 1)").show()
...
AnalysisException: [INVALID_PARAMETER_VALUE.LONG] The value of parameter(s) `seed` in `shuffle` is invalid: expects a long literal, but got "1". SQLSTATE: 22023; line 1 pos 7
...
```

### Does this PR introduce _any_ user-facing change?
yes

after this fix:
```py
In [2]: spark.sql("SELECT SHUFFLE(array(1, 20, 3, 5), 1L), SHUFFLE(array(1, 20, 3, 5), 1), UUID(1L), UUID(1)").show()
+---------------------------+---------------------------+--------------------+--------------------+
|shuffle(array(1, 20, 3, 5))|shuffle(array(1, 20, 3, 5))|              uuid()|              uuid()|
+---------------------------+---------------------------+--------------------+--------------------+
|              [20, 1, 3, 5]|              [20, 1, 3, 5]|1ced31d7-59ef-4bb...|1ced31d7-59ef-4bb...|
+---------------------------+---------------------------+--------------------+--------------------+
```

### How was this patch tested?
added tests

### Was this patch authored or co-authored using generative AI tooling?
no

Closes #48166 from zhengruifeng/int_seed.

Authored-by: Ruifeng Zheng <ruifengz@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
  • Loading branch information
zhengruifeng authored and dongjoon-hyun committed Sep 19, 2024
1 parent 94dca78 commit f0fb0c8
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ trait ExpressionWithRandomSeed extends Expression {

private[catalyst] object ExpressionWithRandomSeed {
def expressionToSeed(e: Expression, source: String): Option[Long] = e match {
case IntegerLiteral(seed) => Some(seed)
case LongLiteral(seed) => Some(seed)
case Literal(null, _) => None
case _ => throw QueryCompilationErrors.invalidRandomSeedParameter(source, e)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2293,6 +2293,14 @@ class CollectionExpressionsSuite
evaluateWithMutableProjection(Shuffle(ai0, seed2)))
assert(evaluateWithUnsafeProjection(Shuffle(ai0, seed1)) !==
evaluateWithUnsafeProjection(Shuffle(ai0, seed2)))

val seed3 = Literal.create(r.nextInt())
assert(evaluateWithoutCodegen(new Shuffle(ai0, seed3)) ===
evaluateWithoutCodegen(new Shuffle(ai0, seed3)))
assert(evaluateWithMutableProjection(new Shuffle(ai0, seed3)) ===
evaluateWithMutableProjection(new Shuffle(ai0, seed3)))
assert(evaluateWithUnsafeProjection(new Shuffle(ai0, seed3)) ===
evaluateWithUnsafeProjection(new Shuffle(ai0, seed3)))
}

test("Array Except") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,13 @@ class MiscExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
evaluateWithMutableProjection(Uuid(seed2)))
assert(evaluateWithUnsafeProjection(Uuid(seed1)) !==
evaluateWithUnsafeProjection(Uuid(seed2)))

val seed3 = Literal.create(r.nextInt())
assert(evaluateWithoutCodegen(new Uuid(seed3)) === evaluateWithoutCodegen(new Uuid(seed3)))
assert(evaluateWithMutableProjection(new Uuid(seed3)) ===
evaluateWithMutableProjection(new Uuid(seed3)))
assert(evaluateWithUnsafeProjection(new Uuid(seed3)) ===
evaluateWithUnsafeProjection(new Uuid(seed3)))
}

test("PrintToStderr") {
Expand Down

0 comments on commit f0fb0c8

Please sign in to comment.