Skip to content

Commit 8b94edb

Browse files
stefankandicilicmarkodb
authored andcommitted
[SPARK-49013] Change key in collationsMap for Map and Array types in scala
### What changes were proposed in this pull request? When deserializing map/array that is not part of the struct field, the key in collation map should just be `{"element": collation}` instead of `{".element": collation}`. ### Why are the changes needed? To be consistent with the behavior on the pyspark side (apache#46737). ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit tests. ### Was this patch authored or co-authored using generative AI tooling? No. Closes apache#47497 from stefankandic/complexTypeDeSer. Authored-by: Stefan Kandic <stefan.kandic@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
1 parent f97c111 commit 8b94edb

File tree

2 files changed

+55
-3
lines changed

2 files changed

+55
-3
lines changed

sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ object DataType {
223223
("elementType", t: JValue),
224224
("type", JString("array"))) =>
225225
assertValidTypeForCollations(fieldPath, "array", collationsMap)
226-
val elementType = parseDataType(t, fieldPath + ".element", collationsMap)
226+
val elementType = parseDataType(t, appendFieldToPath(fieldPath, "element"), collationsMap)
227227
ArrayType(elementType, n)
228228

229229
case JSortedObject(
@@ -232,8 +232,8 @@ object DataType {
232232
("valueContainsNull", JBool(n)),
233233
("valueType", v: JValue)) =>
234234
assertValidTypeForCollations(fieldPath, "map", collationsMap)
235-
val keyType = parseDataType(k, fieldPath + ".key", collationsMap)
236-
val valueType = parseDataType(v, fieldPath + ".value", collationsMap)
235+
val keyType = parseDataType(k, appendFieldToPath(fieldPath, "key"), collationsMap)
236+
val valueType = parseDataType(v, appendFieldToPath(fieldPath, "value"), collationsMap)
237237
MapType(keyType, valueType, n)
238238

239239
case JSortedObject(
@@ -304,6 +304,13 @@ object DataType {
304304
}
305305
}
306306

307+
/**
308+
* Appends a field name to a given path, using a dot separator if the path is not empty.
309+
*/
310+
private def appendFieldToPath(basePath: String, fieldName: String): String = {
311+
if (basePath.isEmpty) fieldName else s"$basePath.$fieldName"
312+
}
313+
307314
/**
308315
* Returns a map of field path to collation name.
309316
*/

sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
package org.apache.spark.sql.types
1919

2020
import com.fasterxml.jackson.core.JsonParseException
21+
import org.json4s.jackson.JsonMethods
2122

2223
import org.apache.spark.{SparkException, SparkFunSuite, SparkIllegalArgumentException}
2324
import org.apache.spark.sql.catalyst.analysis.{caseInsensitiveResolution, caseSensitiveResolution}
@@ -1001,6 +1002,50 @@ class DataTypeSuite extends SparkFunSuite {
10011002
)
10021003
}
10031004

1005+
test("parse array type with collation metadata") {
1006+
val unicodeCollationId = CollationFactory.collationNameToId("UNICODE")
1007+
val arrayJson =
1008+
s"""
1009+
|{
1010+
| "type": "array",
1011+
| "elementType": "string",
1012+
| "containsNull": true
1013+
|}
1014+
|""".stripMargin
1015+
1016+
val collationsMap = Map("element" -> "UNICODE")
1017+
1018+
// Parse without collations map
1019+
assert(DataType.parseDataType(JsonMethods.parse(arrayJson)) === ArrayType(StringType))
1020+
1021+
val parsedWithCollations = DataType.parseDataType(
1022+
JsonMethods.parse(arrayJson), collationsMap = collationsMap)
1023+
assert(parsedWithCollations === ArrayType(StringType(unicodeCollationId)))
1024+
}
1025+
1026+
test("parse map type with collation metadata") {
1027+
val unicodeCollationId = CollationFactory.collationNameToId("UNICODE")
1028+
val mapJson =
1029+
s"""
1030+
|{
1031+
| "type": "map",
1032+
| "keyType": "string",
1033+
| "valueType": "string",
1034+
| "valueContainsNull": true
1035+
|}
1036+
|""".stripMargin
1037+
1038+
val collationsMap = Map("key" -> "UNICODE", "value" -> "UNICODE")
1039+
1040+
// Parse without collations map
1041+
assert(DataType.parseDataType(JsonMethods.parse(mapJson)) === MapType(StringType, StringType))
1042+
1043+
val parsedWithCollations = DataType.parseDataType(
1044+
JsonMethods.parse(mapJson), collationsMap = collationsMap)
1045+
assert(parsedWithCollations ===
1046+
MapType(StringType(unicodeCollationId), StringType(unicodeCollationId)))
1047+
}
1048+
10041049
test("SPARK-48680: Add CharType and VarcharType to DataTypes JAVA API") {
10051050
assert(DataTypes.createCharType(1) === CharType(1))
10061051
assert(DataTypes.createVarcharType(100) === VarcharType(100))

0 commit comments

Comments
 (0)