diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala index 479139f7a932..cf015a38b144 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala @@ -1438,4 +1438,37 @@ abstract class ScalarFunctionsValidateSuite extends FunctionsValidateSuite { checkGlutenOperatorMatch[FilterExecTransformer](df) } } + + test("date_trunc") { + withTempPath { + path => + Seq( + (1, Timestamp.valueOf("2015-07-22 10:01:40.123456")), + (2, Timestamp.valueOf("2014-12-31 05:29:06.123456"))) + .toDF("a", "b") + .write + .parquet(path.getCanonicalPath) + + spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("view") + + runQueryAndCompare(""" + |SELECT + | date_trunc('yy', b) as t1, + | date_trunc('yyyy', b) as t2, + | date_trunc('year', b) as t3, + | date_trunc('quarter', b) as t4, + | date_trunc('mon', b) as t5, + | date_trunc('month', b) as t6, + | date_trunc('mm', b) as t7, + | date_trunc('dd', b) as t8, + | date_trunc('day', b) as t9, + | date_trunc('hour', b) as t10, + | date_trunc('minute', b) as t11, + | date_trunc('second', b) as t12 + |FROM view + |""".stripMargin) { + checkGlutenOperatorMatch[ProjectExecTransformer] + } + } + } } diff --git a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 337aa5025ff9..ebfd6162392e 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -210,6 +210,8 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("to_unix_timestamp") // Replaced by a gluten test to pass timezone through config. .exclude("Hour") + // Replaced by a gluten test to pass timezone through config. + .exclude("TruncTimestamp") // Unsupported format: yyyy-MM-dd HH:mm:ss.SSS .exclude("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with parseError") // Replaced by a gluten test to pass timezone through config. diff --git a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala index 44d4502aedac..3928458f52fe 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.GlutenTestsTrait -import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ import org.apache.spark.sql.catalyst.util.DateTimeUtils @@ -477,4 +477,67 @@ class GlutenDateExpressionsSuite extends DateExpressionsSuite with GlutenTestsTr } } } + + private def testTruncTimestamp(input: Timestamp, fmt: String, expected: Timestamp): Unit = { + checkEvaluation( + TruncTimestamp(Literal.create(fmt, StringType), Literal.create(input, TimestampType)), + expected) + checkEvaluation( + TruncTimestamp( + NonFoldableLiteral.create(fmt, StringType), + Literal.create(input, TimestampType)), + expected) + // SPARK-38990: ensure that evaluation with input rows also works + val catalystInput = CatalystTypeConverters.convertToCatalyst(input) + val inputRow = InternalRow(UTF8String.fromString(fmt), catalystInput) + checkEvaluation( + TruncTimestamp( + BoundReference(ordinal = 0, dataType = StringType, nullable = true), + BoundReference(ordinal = 1, dataType = TimestampType, nullable = true)), + expected, + inputRow + ) + } + + testGluten("TruncTimestamp") { + withDefaultTimeZone(UTC) { + val inputDate = Timestamp.valueOf("2015-07-22 05:30:06") + + withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> UTC_OPT.get) { + Seq("yyyy", "YYYY", "year", "YEAR", "yy", "YY").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-01-01 00:00:00")) + } + + Seq("month", "MONTH", "mon", "MON", "mm", "MM").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-01 00:00:00")) + } + + Seq("DAY", "day", "DD", "dd").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 00:00:00")) + } + + Seq("HOUR", "hour").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 05:00:00")) + } + + Seq("MINUTE", "minute").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 05:30:00")) + } + + Seq("SECOND", "second").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 05:30:06")) + } + + Seq("WEEK", "week").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-20 00:00:00")) + } + + Seq("QUARTER", "quarter").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-01 00:00:00")) + } + + testTruncTimestamp(null, "MON", null) + } + } + } } diff --git a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index caa91891cf02..1b62a449e6e7 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -126,6 +126,8 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("to_unix_timestamp") // Replaced by a gluten test to pass timezone through config. .exclude("Hour") + // Replaced by a gluten test to pass timezone through config. + .exclude("TruncTimestamp") // Unsupported format: yyyy-MM-dd HH:mm:ss.SSS .exclude("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with parseError") // Replaced by a gluten test to pass timezone through config. diff --git a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala index 234537feef8a..f8579510de48 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.GlutenTestsTrait -import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ import org.apache.spark.sql.catalyst.util.DateTimeUtils @@ -497,4 +497,67 @@ class GlutenDateExpressionsSuite extends DateExpressionsSuite with GlutenTestsTr TimestampAdd("YEAR", Literal(1), Literal(Timestamp.valueOf("2022-02-15 12:57:00"))), Timestamp.valueOf("2023-02-15 12:57:00")) } + + private def testTruncTimestamp(input: Timestamp, fmt: String, expected: Timestamp): Unit = { + checkEvaluation( + TruncTimestamp(Literal.create(fmt, StringType), Literal.create(input, TimestampType)), + expected) + checkEvaluation( + TruncTimestamp( + NonFoldableLiteral.create(fmt, StringType), + Literal.create(input, TimestampType)), + expected) + // SPARK-38990: ensure that evaluation with input rows also works + val catalystInput = CatalystTypeConverters.convertToCatalyst(input) + val inputRow = InternalRow(UTF8String.fromString(fmt), catalystInput) + checkEvaluation( + TruncTimestamp( + BoundReference(ordinal = 0, dataType = StringType, nullable = true), + BoundReference(ordinal = 1, dataType = TimestampType, nullable = true)), + expected, + inputRow + ) + } + + testGluten("TruncTimestamp") { + withDefaultTimeZone(UTC) { + val inputDate = Timestamp.valueOf("2015-07-22 05:30:06") + + withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> UTC_OPT.get) { + Seq("yyyy", "YYYY", "year", "YEAR", "yy", "YY").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-01-01 00:00:00")) + } + + Seq("month", "MONTH", "mon", "MON", "mm", "MM").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-01 00:00:00")) + } + + Seq("DAY", "day", "DD", "dd").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 00:00:00")) + } + + Seq("HOUR", "hour").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 05:00:00")) + } + + Seq("MINUTE", "minute").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 05:30:00")) + } + + Seq("SECOND", "second").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 05:30:06")) + } + + Seq("WEEK", "week").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-20 00:00:00")) + } + + Seq("QUARTER", "quarter").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-01 00:00:00")) + } + + testTruncTimestamp(null, "MON", null) + } + } + } } diff --git a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 963fb79a3504..af925d11521c 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -105,6 +105,8 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("to_unix_timestamp") // Replaced by a gluten test to pass timezone through config. .exclude("Hour") + // Replaced by a gluten test to pass timezone through config. + .exclude("TruncTimestamp") // Unsupported format: yyyy-MM-dd HH:mm:ss.SSS .exclude("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with parseError") // Replaced by a gluten test to pass timezone through config. diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala index f4a9dfd56a92..2ea3c355b988 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.GlutenTestsTrait -import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ import org.apache.spark.sql.catalyst.util.DateTimeUtils @@ -475,4 +475,67 @@ class GlutenDateExpressionsSuite extends DateExpressionsSuite with GlutenTestsTr } } } + + private def testTruncTimestamp(input: Timestamp, fmt: String, expected: Timestamp): Unit = { + checkEvaluation( + TruncTimestamp(Literal.create(fmt, StringType), Literal.create(input, TimestampType)), + expected) + checkEvaluation( + TruncTimestamp( + NonFoldableLiteral.create(fmt, StringType), + Literal.create(input, TimestampType)), + expected) + // SPARK-38990: ensure that evaluation with input rows also works + val catalystInput = CatalystTypeConverters.convertToCatalyst(input) + val inputRow = InternalRow(UTF8String.fromString(fmt), catalystInput) + checkEvaluation( + TruncTimestamp( + BoundReference(ordinal = 0, dataType = StringType, nullable = true), + BoundReference(ordinal = 1, dataType = TimestampType, nullable = true)), + expected, + inputRow + ) + } + + testGluten("TruncTimestamp") { + withDefaultTimeZone(UTC) { + val inputDate = Timestamp.valueOf("2015-07-22 05:30:06") + + withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> UTC_OPT.get) { + Seq("yyyy", "YYYY", "year", "YEAR", "yy", "YY").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-01-01 00:00:00")) + } + + Seq("month", "MONTH", "mon", "MON", "mm", "MM").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-01 00:00:00")) + } + + Seq("DAY", "day", "DD", "dd").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 00:00:00")) + } + + Seq("HOUR", "hour").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 05:00:00")) + } + + Seq("MINUTE", "minute").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 05:30:00")) + } + + Seq("SECOND", "second").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 05:30:06")) + } + + Seq("WEEK", "week").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-20 00:00:00")) + } + + Seq("QUARTER", "quarter").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-01 00:00:00")) + } + + testTruncTimestamp(null, "MON", null) + } + } + } } diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 03f56b46010a..4c8b0bbd240b 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -106,6 +106,8 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("to_unix_timestamp") // Replaced by a gluten test to pass timezone through config. .exclude("Hour") + // Replaced by a gluten test to pass timezone through config. + .exclude("TruncTimestamp") // Unsupported format: yyyy-MM-dd HH:mm:ss.SSS .exclude("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with parseError") // Replaced by a gluten test to pass timezone through config. diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala index f4a9dfd56a92..2ea3c355b988 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.GlutenTestsTrait -import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ import org.apache.spark.sql.catalyst.util.DateTimeUtils @@ -475,4 +475,67 @@ class GlutenDateExpressionsSuite extends DateExpressionsSuite with GlutenTestsTr } } } + + private def testTruncTimestamp(input: Timestamp, fmt: String, expected: Timestamp): Unit = { + checkEvaluation( + TruncTimestamp(Literal.create(fmt, StringType), Literal.create(input, TimestampType)), + expected) + checkEvaluation( + TruncTimestamp( + NonFoldableLiteral.create(fmt, StringType), + Literal.create(input, TimestampType)), + expected) + // SPARK-38990: ensure that evaluation with input rows also works + val catalystInput = CatalystTypeConverters.convertToCatalyst(input) + val inputRow = InternalRow(UTF8String.fromString(fmt), catalystInput) + checkEvaluation( + TruncTimestamp( + BoundReference(ordinal = 0, dataType = StringType, nullable = true), + BoundReference(ordinal = 1, dataType = TimestampType, nullable = true)), + expected, + inputRow + ) + } + + testGluten("TruncTimestamp") { + withDefaultTimeZone(UTC) { + val inputDate = Timestamp.valueOf("2015-07-22 05:30:06") + + withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> UTC_OPT.get) { + Seq("yyyy", "YYYY", "year", "YEAR", "yy", "YY").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-01-01 00:00:00")) + } + + Seq("month", "MONTH", "mon", "MON", "mm", "MM").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-01 00:00:00")) + } + + Seq("DAY", "day", "DD", "dd").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 00:00:00")) + } + + Seq("HOUR", "hour").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 05:00:00")) + } + + Seq("MINUTE", "minute").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 05:30:00")) + } + + Seq("SECOND", "second").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-22 05:30:06")) + } + + Seq("WEEK", "week").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-20 00:00:00")) + } + + Seq("QUARTER", "quarter").foreach { + fmt => testTruncTimestamp(inputDate, fmt, Timestamp.valueOf("2015-07-01 00:00:00")) + } + + testTruncTimestamp(null, "MON", null) + } + } + } }