Skip to content

Commit

Permalink
fix spark ut
Browse files Browse the repository at this point in the history
  • Loading branch information
zml1206 committed Oct 19, 2024
1 parent 738b8f2 commit a250d0f
Show file tree
Hide file tree
Showing 9 changed files with 297 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1438,4 +1438,37 @@ abstract class ScalarFunctionsValidateSuite extends FunctionsValidateSuite {
checkGlutenOperatorMatch[FilterExecTransformer](df)
}
}

test("date_trunc") {
withTempPath {
path =>
Seq(
(1, Timestamp.valueOf("2015-07-22 10:01:40.123456")),
(2, Timestamp.valueOf("2014-12-31 05:29:06.123456")))
.toDF("a", "b")
.write
.parquet(path.getCanonicalPath)

spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("view")

runQueryAndCompare("""
|SELECT
| date_trunc('yy', b) as t1,
| date_trunc('yyyy', b) as t2,
| date_trunc('year', b) as t3,
| date_trunc('quarter', b) as t4,
| date_trunc('mon', b) as t5,
| date_trunc('month', b) as t6,
| date_trunc('mm', b) as t7,
| date_trunc('dd', b) as t8,
| date_trunc('day', b) as t9,
| date_trunc('hour', b) as t10,
| date_trunc('minute', b) as t11,
| date_trunc('second', b) as t12
|FROM view
|""".stripMargin) {
checkGlutenOperatorMatch[ProjectExecTransformer]
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,8 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_unix_timestamp")
// Replaced by a gluten test to pass timezone through config.
.exclude("Hour")
// Replaced by a gluten test to pass timezone through config.
.exclude("TruncTimestamp")
// Unsupported format: yyyy-MM-dd HH:mm:ss.SSS
.exclude("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with parseError")
// Replaced by a gluten test to pass timezone through config.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.sql.GlutenTestsTrait
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
import org.apache.spark.sql.catalyst.util.DateTimeUtils
Expand Down Expand Up @@ -477,4 +477,67 @@ class GlutenDateExpressionsSuite extends DateExpressionsSuite with GlutenTestsTr
}
}
}

private def testTruncTimestamp(input: Timestamp, fmt: String, expected: Timestamp): Unit = {
checkEvaluation(
TruncTimestamp(Literal.create(fmt, StringType), Literal.create(input, TimestampType)),
expected)
checkEvaluation(
TruncTimestamp(
NonFoldableLiteral.create(fmt, StringType),
Literal.create(input, TimestampType)),
expected)
// SPARK-38990: ensure that evaluation with input rows also works
val catalystInput = CatalystTypeConverters.convertToCatalyst(input)
val inputRow = InternalRow(UTF8String.fromString(fmt), catalystInput)
checkEvaluation(
TruncTimestamp(
BoundReference(ordinal = 0, dataType = StringType, nullable = true),
BoundReference(ordinal = 1, dataType = TimestampType, nullable = true)),
expected,
inputRow
)
}

testGluten("TruncTimestamp") {
withDefaultTimeZone(UTC) {
val inputDate = Timestamp.valueOf("2015-07-22 05:30:06")

withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> UTC_OPT.get) {
Seq("yyyy", "YYYY", "year", "YEAR", "yy", "YY").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-01-01 00:00:00"))
}

Seq("month", "MONTH", "mon", "MON", "mm", "MM").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-01 00:00:00"))
}

Seq("DAY", "day", "DD", "dd").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 00:00:00"))
}

Seq("HOUR", "hour").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 05:00:00"))
}

Seq("MINUTE", "minute").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 05:30:00"))
}

Seq("SECOND", "second").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 05:30:06"))
}

Seq("WEEK", "week").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-20 00:00:00"))
}

Seq("QUARTER", "quarter").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-01 00:00:00"))
}

testTruncTimestamp(null, "MON", null)
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_unix_timestamp")
// Replaced by a gluten test to pass timezone through config.
.exclude("Hour")
// Replaced by a gluten test to pass timezone through config.
.exclude("TruncTimestamp")
// Unsupported format: yyyy-MM-dd HH:mm:ss.SSS
.exclude("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with parseError")
// Replaced by a gluten test to pass timezone through config.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.sql.GlutenTestsTrait
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
import org.apache.spark.sql.catalyst.util.DateTimeUtils
Expand Down Expand Up @@ -497,4 +497,67 @@ class GlutenDateExpressionsSuite extends DateExpressionsSuite with GlutenTestsTr
TimestampAdd("YEAR", Literal(1), Literal(Timestamp.valueOf("2022-02-15 12:57:00"))),
Timestamp.valueOf("2023-02-15 12:57:00"))
}

private def testTruncTimestamp(input: Timestamp, fmt: String, expected: Timestamp): Unit = {
checkEvaluation(
TruncTimestamp(Literal.create(fmt, StringType), Literal.create(input, TimestampType)),
expected)
checkEvaluation(
TruncTimestamp(
NonFoldableLiteral.create(fmt, StringType),
Literal.create(input, TimestampType)),
expected)
// SPARK-38990: ensure that evaluation with input rows also works
val catalystInput = CatalystTypeConverters.convertToCatalyst(input)
val inputRow = InternalRow(UTF8String.fromString(fmt), catalystInput)
checkEvaluation(
TruncTimestamp(
BoundReference(ordinal = 0, dataType = StringType, nullable = true),
BoundReference(ordinal = 1, dataType = TimestampType, nullable = true)),
expected,
inputRow
)
}

testGluten("TruncTimestamp") {
withDefaultTimeZone(UTC) {
val inputDate = Timestamp.valueOf("2015-07-22 05:30:06")

withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> UTC_OPT.get) {
Seq("yyyy", "YYYY", "year", "YEAR", "yy", "YY").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-01-01 00:00:00"))
}

Seq("month", "MONTH", "mon", "MON", "mm", "MM").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-01 00:00:00"))
}

Seq("DAY", "day", "DD", "dd").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 00:00:00"))
}

Seq("HOUR", "hour").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 05:00:00"))
}

Seq("MINUTE", "minute").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 05:30:00"))
}

Seq("SECOND", "second").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 05:30:06"))
}

Seq("WEEK", "week").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-20 00:00:00"))
}

Seq("QUARTER", "quarter").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-01 00:00:00"))
}

testTruncTimestamp(null, "MON", null)
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_unix_timestamp")
// Replaced by a gluten test to pass timezone through config.
.exclude("Hour")
// Replaced by a gluten test to pass timezone through config.
.exclude("TruncTimestamp")
// Unsupported format: yyyy-MM-dd HH:mm:ss.SSS
.exclude("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with parseError")
// Replaced by a gluten test to pass timezone through config.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.sql.GlutenTestsTrait
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
import org.apache.spark.sql.catalyst.util.DateTimeUtils
Expand Down Expand Up @@ -475,4 +475,67 @@ class GlutenDateExpressionsSuite extends DateExpressionsSuite with GlutenTestsTr
}
}
}

private def testTruncTimestamp(input: Timestamp, fmt: String, expected: Timestamp): Unit = {
checkEvaluation(
TruncTimestamp(Literal.create(fmt, StringType), Literal.create(input, TimestampType)),
expected)
checkEvaluation(
TruncTimestamp(
NonFoldableLiteral.create(fmt, StringType),
Literal.create(input, TimestampType)),
expected)
// SPARK-38990: ensure that evaluation with input rows also works
val catalystInput = CatalystTypeConverters.convertToCatalyst(input)
val inputRow = InternalRow(UTF8String.fromString(fmt), catalystInput)
checkEvaluation(
TruncTimestamp(
BoundReference(ordinal = 0, dataType = StringType, nullable = true),
BoundReference(ordinal = 1, dataType = TimestampType, nullable = true)),
expected,
inputRow
)
}

testGluten("TruncTimestamp") {
withDefaultTimeZone(UTC) {
val inputDate = Timestamp.valueOf("2015-07-22 05:30:06")

withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> UTC_OPT.get) {
Seq("yyyy", "YYYY", "year", "YEAR", "yy", "YY").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-01-01 00:00:00"))
}

Seq("month", "MONTH", "mon", "MON", "mm", "MM").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-01 00:00:00"))
}

Seq("DAY", "day", "DD", "dd").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 00:00:00"))
}

Seq("HOUR", "hour").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 05:00:00"))
}

Seq("MINUTE", "minute").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 05:30:00"))
}

Seq("SECOND", "second").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 05:30:06"))
}

Seq("WEEK", "week").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-20 00:00:00"))
}

Seq("QUARTER", "quarter").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-01 00:00:00"))
}

testTruncTimestamp(null, "MON", null)
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_unix_timestamp")
// Replaced by a gluten test to pass timezone through config.
.exclude("Hour")
// Replaced by a gluten test to pass timezone through config.
.exclude("TruncTimestamp")
// Unsupported format: yyyy-MM-dd HH:mm:ss.SSS
.exclude("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with parseError")
// Replaced by a gluten test to pass timezone through config.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.sql.GlutenTestsTrait
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
import org.apache.spark.sql.catalyst.util.DateTimeUtils
Expand Down Expand Up @@ -475,4 +475,67 @@ class GlutenDateExpressionsSuite extends DateExpressionsSuite with GlutenTestsTr
}
}
}

private def testTruncTimestamp(input: Timestamp, fmt: String, expected: Timestamp): Unit = {
checkEvaluation(
TruncTimestamp(Literal.create(fmt, StringType), Literal.create(input, TimestampType)),
expected)
checkEvaluation(
TruncTimestamp(
NonFoldableLiteral.create(fmt, StringType),
Literal.create(input, TimestampType)),
expected)
// SPARK-38990: ensure that evaluation with input rows also works
val catalystInput = CatalystTypeConverters.convertToCatalyst(input)
val inputRow = InternalRow(UTF8String.fromString(fmt), catalystInput)
checkEvaluation(
TruncTimestamp(
BoundReference(ordinal = 0, dataType = StringType, nullable = true),
BoundReference(ordinal = 1, dataType = TimestampType, nullable = true)),
expected,
inputRow
)
}

testGluten("TruncTimestamp") {
withDefaultTimeZone(UTC) {
val inputDate = Timestamp.valueOf("2015-07-22 05:30:06")

withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> UTC_OPT.get) {
Seq("yyyy", "YYYY", "year", "YEAR", "yy", "YY").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-01-01 00:00:00"))
}

Seq("month", "MONTH", "mon", "MON", "mm", "MM").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-01 00:00:00"))
}

Seq("DAY", "day", "DD", "dd").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 00:00:00"))
}

Seq("HOUR", "hour").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 05:00:00"))
}

Seq("MINUTE", "minute").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 05:30:00"))
}

Seq("SECOND", "second").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 05:30:06"))
}

Seq("WEEK", "week").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-20 00:00:00"))
}

Seq("QUARTER", "quarter").foreach {
fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-01 00:00:00"))
}

testTruncTimestamp(null, "MON", null)
}
}
}
}

0 comments on commit a250d0f

Please sign in to comment.