From 59166c5407d6bff94f64b60fa8445b7d30547bf6 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 12 Oct 2024 01:19:02 -0700 Subject: [PATCH] fix: Fallback to Spark if named_struct contains duplicate field names --- .../apache/comet/serde/QueryPlanSerde.scala | 5 +++++ .../apache/comet/CometExpressionSuite.scala | 22 ++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index 51b32b7df..c5ca14df5 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -2453,6 +2453,11 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde with CometExprShim } case struct @ CreateNamedStruct(_) => + if (struct.names.length != struct.names.distinct.length) { + withInfo(expr, "CreateNamedStruct with duplicate field names are not supported") + return None + } + val valExprs = struct.valExprs.map(exprToProto(_, inputs, binding)) if (valExprs.forall(_.isDefined)) { diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index 16bc15b84..6874a5ceb 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -29,7 +29,7 @@ import org.apache.hadoop.fs.Path import org.apache.spark.sql.{CometTestBase, DataFrame, Row} import org.apache.spark.sql.catalyst.optimizer.SimplifyExtractValueOps import org.apache.spark.sql.comet.CometProjectExec -import org.apache.spark.sql.execution.{ColumnarToRowExec, InputAdapter, WholeStageCodegenExec} +import org.apache.spark.sql.execution.{ColumnarToRowExec, InputAdapter, ProjectExec, WholeStageCodegenExec} import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf @@ -2062,6 +2062,26 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { } } + test("named_struct with duplicate field names") { + Seq(true, false).foreach { dictionaryEnabled => + withTempDir { dir => + val path = new Path(dir.toURI.toString, "test.parquet") + makeParquetFileAllTypes(path, dictionaryEnabled = dictionaryEnabled, 10000) + withParquetTable(path.toString, "tbl") { + checkSparkAnswerAndOperator( + "SELECT named_struct('a', _1, 'a', _2) FROM tbl", + ProjectExec.getClass) + checkSparkAnswerAndOperator( + "SELECT named_struct('a', _1, 'a', 2) FROM tbl", + ProjectExec.getClass) + checkSparkAnswerAndOperator( + "SELECT named_struct('a', named_struct('b', _1, 'b', _2)) FROM tbl", + ProjectExec.getClass) + } + } + } + } + test("to_json") { Seq(true, false).foreach { dictionaryEnabled => withParquetTable(