Skip to content

Commit ca07f74

Browse files
committed
Generalize nested column selection
* MISC: Change excpetion in `ParquetRowConverter.fieldConverters` to RuntimeException
1 parent 9709e96 commit ca07f74

File tree

2 files changed

+36
-31
lines changed

2 files changed

+36
-31
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ private[parquet] class ParquetRowConverter(
186186
}
187187
parquetType.getFields.asScala.map { parquetField =>
188188
val fieldIndex = catalystFieldNameToIndex.getOrElse(parquetField.getName,
189-
throw new IllegalArgumentException(
189+
throw new RuntimeException(
190190
s"${parquetField.getName} does not exist. " +
191191
s"Available: ${catalystType.fieldNames.mkString(", ")}")
192192
)

sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala

Lines changed: 35 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -843,36 +843,41 @@ class FileBasedDataSourceSuite extends QueryTest
843843
}
844844
}
845845

846-
test("SPARK-31116: Select nested parquet with case insensitive mode") {
847-
Seq("true", "false").foreach { nestedSchemaPruningEnabled =>
848-
withSQLConf(
849-
SQLConf.CASE_SENSITIVE.key -> "false",
850-
SQLConf.NESTED_SCHEMA_PRUNING_ENABLED.key -> nestedSchemaPruningEnabled) {
851-
withTempPath { dir =>
852-
val path = dir.getCanonicalPath
853-
854-
// Prepare values for testing nested parquet data
855-
spark
856-
.range(1L)
857-
.selectExpr("NAMED_STRUCT('lowercase', id, 'camelCase', id + 1) AS StructColumn")
858-
.write.parquet(path)
859-
860-
val exactSchema = "StructColumn struct<lowercase: LONG, camelCase: LONG>"
861-
862-
checkAnswer(spark.read.schema(exactSchema).parquet(path), Row(Row(0, 1)))
863-
864-
// In case insensitive manner, parquet's column cases are ignored
865-
val innerColumnCaseInsensitiveSchema =
866-
"StructColumn struct<Lowercase: LONG, camelcase: LONG>"
867-
checkAnswer(
868-
spark.read.schema(innerColumnCaseInsensitiveSchema).parquet(path),
869-
Row(Row(0, 1)))
870-
871-
val rootColumnCaseInsensitiveSchema =
872-
"structColumn struct<lowercase: LONG, camelCase: LONG>"
873-
checkAnswer(
874-
spark.read.schema(rootColumnCaseInsensitiveSchema).parquet(path),
875-
Row(Row(0, 1)))
846+
test("SPARK-31116: Select nested schema with case insensitive mode") {
847+
// This test case failed at only Parquet. ORC is added for test coverage parity.
848+
Seq("orc", "parquet").foreach { format =>
849+
Seq("true", "false").foreach { nestedSchemaPruningEnabled =>
850+
withSQLConf(
851+
SQLConf.CASE_SENSITIVE.key -> "false",
852+
SQLConf.NESTED_SCHEMA_PRUNING_ENABLED.key -> nestedSchemaPruningEnabled) {
853+
withTempPath { dir =>
854+
val path = dir.getCanonicalPath
855+
856+
// Prepare values for testing nested parquet data
857+
spark
858+
.range(1L)
859+
.selectExpr("NAMED_STRUCT('lowercase', id, 'camelCase', id + 1) AS StructColumn")
860+
.write
861+
.format(format)
862+
.save(path)
863+
864+
val exactSchema = "StructColumn struct<lowercase: LONG, camelCase: LONG>"
865+
866+
checkAnswer(spark.read.schema(exactSchema).format(format).load(path), Row(Row(0, 1)))
867+
868+
// In case insensitive manner, parquet's column cases are ignored
869+
val innerColumnCaseInsensitiveSchema =
870+
"StructColumn struct<Lowercase: LONG, camelcase: LONG>"
871+
checkAnswer(
872+
spark.read.schema(innerColumnCaseInsensitiveSchema).format(format).load(path),
873+
Row(Row(0, 1)))
874+
875+
val rootColumnCaseInsensitiveSchema =
876+
"structColumn struct<lowercase: LONG, camelCase: LONG>"
877+
checkAnswer(
878+
spark.read.schema(rootColumnCaseInsensitiveSchema).format(format).load(path),
879+
Row(Row(0, 1)))
880+
}
876881
}
877882
}
878883
}

0 commit comments

Comments
 (0)