Skip to content

Commit 500877e

Browse files
TJX2014dongjoon-hyun
authored andcommitted
[SPARK-32133][SQL] Forbid time field steps for date start/end in Sequence
### What changes were proposed in this pull request? 1.Add time field steps check for date start/end in Sequence at `org.apache.spark.sql.catalyst.expressions.Sequence.TemporalSequenceImpl` 2.Add a UT:`SPARK-32133: Sequence step must be a day interval if start and end values are dates` at `org.apache.spark.sql.catalyst.expressions.CollectionExpressionsSuite` ### Why are the changes needed? **Sequence time field steps for date start/end looks strange in spark as follows:** ``` scala> sql("select explode(sequence(cast('2011-03-01' as date), cast('2011-03-02' as date), interval 1 hour))").head(3) res0: Array[org.apache.spark.sql.Row] = _Array([2011-03-01], [2011-03-01], [2011-03-01])_ **<- strange result.** scala> sql("select explode(sequence(cast('2011-03-01' as date), cast('2011-03-02' as date), interval 1 day))").head(3) res1: Array[org.apache.spark.sql.Row] = Array([2011-03-01], [2011-03-02]) ``` **While this behavior in Prosto make sense:** ``` presto> select sequence(date('2011-03-01'),date('2011-03-02'),interval '1' hour); Query 20200624_122744_00002_pehix failed: sequence step must be a day interval if start and end values are dates presto> select sequence(date('2011-03-01'),date('2011-03-02'),interval '1' day); _col0 [2011-03-01, 2011-03-02] ``` ### Does this PR introduce _any_ user-facing change? Yes, after this patch, users will get informed `sequence step must be a day interval if start and end values are dates` when use time field steps for date start/end in Sequence. ### How was this patch tested? Unit test. Closes #28926 from TJX2014/master-SPARK-31982-sequence-cross-dst-follow-presto. Authored-by: TJX2014 <xiaoxingstack@gmail.com> Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
1 parent 560fe1f commit 500877e

File tree

2 files changed

+27
-0
lines changed

2 files changed

+27
-0
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2612,10 +2612,17 @@ object Sequence {
26122612
val stepDays = step.days
26132613
val stepMicros = step.microseconds
26142614

2615+
if (scale == MICROS_PER_DAY && stepMonths == 0 && stepDays == 0) {
2616+
throw new IllegalArgumentException(
2617+
"sequence step must be a day interval if start and end values are dates")
2618+
}
2619+
26152620
if (stepMonths == 0 && stepMicros == 0 && scale == MICROS_PER_DAY) {
2621+
// Adding pure days to date start/end
26162622
backedSequenceImpl.eval(start, stop, fromLong(stepDays))
26172623

26182624
} else if (stepMonths == 0 && stepDays == 0 && scale == 1) {
2625+
// Adding pure microseconds to timestamp start/end
26192626
backedSequenceImpl.eval(start, stop, fromLong(stepMicros))
26202627

26212628
} else {
@@ -2674,11 +2681,24 @@ object Sequence {
26742681
|${genSequenceLengthCode(ctx, startMicros, stopMicros, intervalInMicros, arrLength)}
26752682
""".stripMargin
26762683

2684+
val check = if (scale == MICROS_PER_DAY) {
2685+
s"""
2686+
|if ($stepMonths == 0 && $stepDays == 0) {
2687+
| throw new IllegalArgumentException(
2688+
| "sequence step must be a day interval if start and end values are dates");
2689+
|}
2690+
""".stripMargin
2691+
} else {
2692+
""
2693+
}
2694+
26772695
s"""
26782696
|final int $stepMonths = $step.months;
26792697
|final int $stepDays = $step.days;
26802698
|final long $stepMicros = $step.microseconds;
26812699
|
2700+
|$check
2701+
|
26822702
|if ($stepMonths == 0 && $stepMicros == 0 && ${scale}L == ${MICROS_PER_DAY}L) {
26832703
| ${backedSequenceImpl.genCode(ctx, start, stop, stepDays, arr, elemType)};
26842704
|

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -933,6 +933,13 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
933933
Literal(negateExact(stringToInterval("interval 1 month")))),
934934
EmptyRow,
935935
s"sequence boundaries: 0 to 2678400000000 by -${28 * MICROS_PER_DAY}")
936+
937+
// SPARK-32133: Sequence step must be a day interval if start and end values are dates
938+
checkExceptionInExpression[IllegalArgumentException](Sequence(
939+
Cast(Literal("2011-03-01"), DateType),
940+
Cast(Literal("2011-04-01"), DateType),
941+
Option(Literal(stringToInterval("interval 1 hour")))), null,
942+
"sequence step must be a day interval if start and end values are dates")
936943
}
937944
}
938945

0 commit comments

Comments
 (0)