Skip to content

Commit 6e509fd

Browse files
icexellossHyukjinKwon
authored andcommitted
[SPARK-23047][PYTHON][SQL] Change MapVector to NullableMapVector in ArrowColumnVector
## What changes were proposed in this pull request? This PR changes usage of `MapVector` in Spark codebase to use `NullableMapVector`. `MapVector` is an internal Arrow class that is not supposed to be used directly. We should use `NullableMapVector` instead. ## How was this patch tested? Existing test. Author: Li Jin <ice.xelloss@gmail.com> Closes #20239 from icexelloss/arrow-map-vector. (cherry picked from commit 4e6f8fb) Signed-off-by: hyukjinkwon <gurwls223@gmail.com>
1 parent 79ccd0c commit 6e509fd

File tree

2 files changed

+46
-3
lines changed

2 files changed

+46
-3
lines changed

sql/core/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -247,8 +247,8 @@ public ArrowColumnVector(ValueVector vector) {
247247

248248
childColumns = new ArrowColumnVector[1];
249249
childColumns[0] = new ArrowColumnVector(listVector.getDataVector());
250-
} else if (vector instanceof MapVector) {
251-
MapVector mapVector = (MapVector) vector;
250+
} else if (vector instanceof NullableMapVector) {
251+
NullableMapVector mapVector = (NullableMapVector) vector;
252252
accessor = new StructAccessor(mapVector);
253253

254254
childColumns = new ArrowColumnVector[mapVector.size()];
@@ -553,9 +553,16 @@ final int getArrayOffset(int rowId) {
553553
}
554554
}
555555

556+
/**
557+
* Any call to "get" method will throw UnsupportedOperationException.
558+
*
559+
* Access struct values in a ArrowColumnVector doesn't use this accessor. Instead, it uses getStruct() method defined
560+
* in the parent class. Any call to "get" method in this class is a bug in the code.
561+
*
562+
*/
556563
private static class StructAccessor extends ArrowVectorAccessor {
557564

558-
StructAccessor(MapVector vector) {
565+
StructAccessor(NullableMapVector vector) {
559566
super(vector);
560567
}
561568
}

sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,42 @@ class ArrowColumnVectorSuite extends SparkFunSuite {
322322
allocator.close()
323323
}
324324

325+
test("non nullable struct") {
326+
val allocator = ArrowUtils.rootAllocator.newChildAllocator("struct", 0, Long.MaxValue)
327+
val schema = new StructType().add("int", IntegerType).add("long", LongType)
328+
val vector = ArrowUtils.toArrowField("struct", schema, nullable = false, null)
329+
.createVector(allocator).asInstanceOf[NullableMapVector]
330+
331+
vector.allocateNew()
332+
val intVector = vector.getChildByOrdinal(0).asInstanceOf[IntVector]
333+
val longVector = vector.getChildByOrdinal(1).asInstanceOf[BigIntVector]
334+
335+
vector.setIndexDefined(0)
336+
intVector.setSafe(0, 1)
337+
longVector.setSafe(0, 1L)
338+
339+
vector.setIndexDefined(1)
340+
intVector.setSafe(1, 2)
341+
longVector.setNull(1)
342+
343+
vector.setValueCount(2)
344+
345+
val columnVector = new ArrowColumnVector(vector)
346+
assert(columnVector.dataType === schema)
347+
assert(columnVector.numNulls === 0)
348+
349+
val row0 = columnVector.getStruct(0, 2)
350+
assert(row0.getInt(0) === 1)
351+
assert(row0.getLong(1) === 1L)
352+
353+
val row1 = columnVector.getStruct(1, 2)
354+
assert(row1.getInt(0) === 2)
355+
assert(row1.isNullAt(1))
356+
357+
columnVector.close()
358+
allocator.close()
359+
}
360+
325361
test("struct") {
326362
val allocator = ArrowUtils.rootAllocator.newChildAllocator("struct", 0, Long.MaxValue)
327363
val schema = new StructType().add("int", IntegerType).add("long", LongType)

0 commit comments

Comments
 (0)