Skip to content

Commit acbc138

Browse files
authored
GH-79: Move splitAndTransferValidityBuffer to BaseValueVector (#777)
## What's Changed Move `splitAndTransferValidityBuffer` up to `BaseValueVector`. This PR is not touching the implementation of this function in `StructVector` -- that is not being derived from `BaseValueVector` so some amount of duplication is probably fine. Closes #79
1 parent 1795832 commit acbc138

14 files changed

+182
-705
lines changed

vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java

Lines changed: 15 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,7 @@ public abstract class BaseFixedWidthVector extends BaseValueVector
4949

5050
protected final Field field;
5151
private int allocationMonitor;
52-
protected ArrowBuf validityBuffer;
5352
protected ArrowBuf valueBuffer;
54-
protected int valueCount;
5553

5654
/**
5755
* Constructs a new instance.
@@ -87,7 +85,7 @@ public String getName() {
8785

8886
/* TODO:
8987
* Once the entire hierarchy has been refactored, move common functions
90-
* like getNullCount(), splitAndTransferValidityBuffer to top level
88+
* like getNullCount() to top level
9189
* base class BaseValueVector.
9290
*
9391
* Along with this, some class members (validityBuffer) can also be
@@ -342,9 +340,9 @@ private void allocateBytes(int valueCount) {
342340
* slice the source buffer so we have to explicitly allocate the validityBuffer of the target
343341
* vector. This is unlike the databuffer which we can always slice for the target vector.
344342
*/
345-
private void allocateValidityBuffer(final int validityBufferSize) {
346-
validityBuffer = allocator.buffer(validityBufferSize);
347-
validityBuffer.readerIndex(0);
343+
@Override
344+
protected void allocateValidityBuffer(final long validityBufferSize) {
345+
super.allocateValidityBuffer(validityBufferSize);
348346
refreshValueCapacity();
349347
}
350348

@@ -656,72 +654,18 @@ private void splitAndTransferValueBuffer(
656654
target.refreshValueCapacity();
657655
}
658656

659-
/**
660-
* Validity buffer has multiple cases of split and transfer depending on the starting position of
661-
* the source index.
662-
*/
663-
private void splitAndTransferValidityBuffer(
664-
int startIndex, int length, BaseFixedWidthVector target) {
665-
int firstByteSource = BitVectorHelper.byteIndex(startIndex);
666-
int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
667-
int byteSizeTarget = BitVectorHelper.getValidityBufferSizeFromCount(length);
668-
int offset = startIndex % 8;
669-
670-
if (length > 0) {
671-
if (offset == 0) {
672-
/* slice */
673-
if (target.validityBuffer != null) {
674-
target.validityBuffer.getReferenceManager().release();
675-
}
676-
ArrowBuf slicedValidityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
677-
target.validityBuffer = transferBuffer(slicedValidityBuffer, target.allocator);
678-
target.refreshValueCapacity();
679-
} else {
680-
/* Copy data
681-
* When the first bit starts from the middle of a byte (offset != 0),
682-
* copy data from src BitVector.
683-
* Each byte in the target is composed by a part in i-th byte,
684-
* another part in (i+1)-th byte.
685-
*/
686-
target.allocateValidityBuffer(byteSizeTarget);
687-
688-
for (int i = 0; i < byteSizeTarget - 1; i++) {
689-
byte b1 =
690-
BitVectorHelper.getBitsFromCurrentByte(
691-
this.validityBuffer, firstByteSource + i, offset);
692-
byte b2 =
693-
BitVectorHelper.getBitsFromNextByte(
694-
this.validityBuffer, firstByteSource + i + 1, offset);
695-
696-
target.validityBuffer.setByte(i, (b1 + b2));
697-
}
698-
699-
/* Copying the last piece is done in the following manner:
700-
* if the source vector has 1 or more bytes remaining, we copy
701-
* the last piece as a byte formed by shifting data
702-
* from the current byte and the next byte.
703-
*
704-
* if the source vector has no more bytes remaining
705-
* (we are at the last byte), we copy the last piece as a byte
706-
* by shifting data from the current byte.
707-
*/
708-
if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
709-
byte b1 =
710-
BitVectorHelper.getBitsFromCurrentByte(
711-
this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset);
712-
byte b2 =
713-
BitVectorHelper.getBitsFromNextByte(
714-
this.validityBuffer, firstByteSource + byteSizeTarget, offset);
715-
716-
target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2);
717-
} else {
718-
byte b1 =
719-
BitVectorHelper.getBitsFromCurrentByte(
720-
this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset);
721-
target.validityBuffer.setByte(byteSizeTarget - 1, b1);
722-
}
723-
}
657+
@Override
658+
protected void sliceAndTransferValidityBuffer(
659+
int startIndex, int length, BaseValueVector target) {
660+
final int firstByteSource = BitVectorHelper.byteIndex(startIndex);
661+
final int byteSizeTarget = BitVectorHelper.getValidityBufferSizeFromCount(length);
662+
663+
if (target.validityBuffer != null) {
664+
target.validityBuffer.getReferenceManager().release();
724665
}
666+
ArrowBuf slicedValidityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
667+
target.validityBuffer = transferBuffer(slicedValidityBuffer, target.allocator);
668+
((BaseFixedWidthVector) target).refreshValueCapacity();
725669
}
726670

727671
/*----------------------------------------------------------------*

vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java

Lines changed: 12 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,8 @@ public abstract class BaseLargeVariableWidthVector extends BaseValueVector
5252
/* protected members */
5353
public static final int OFFSET_WIDTH = 8; /* 8 byte unsigned int to track offsets */
5454
protected static final byte[] emptyByteArray = new byte[] {};
55-
protected ArrowBuf validityBuffer;
5655
protected ArrowBuf valueBuffer;
5756
protected ArrowBuf offsetBuffer;
58-
protected int valueCount;
5957
protected int lastSet;
6058
protected final Field field;
6159

@@ -501,10 +499,9 @@ private ArrowBuf allocateOffsetBuffer(final long size) {
501499
}
502500

503501
/* allocate validity buffer */
504-
private void allocateValidityBuffer(final long size) {
505-
validityBuffer = allocator.buffer(size);
506-
validityBuffer.readerIndex(0);
507-
initValidityBuffer();
502+
@Override
503+
protected void allocateValidityBuffer(final long size) {
504+
super.allocateValidityBuffer(size);
508505
}
509506

510507
/**
@@ -809,69 +806,17 @@ private void splitAndTransferOffsetBuffer(
809806
target.valueBuffer = transferBuffer(slicedBuffer, target.allocator);
810807
}
811808

812-
/*
813-
* Transfer the validity.
814-
*/
815-
private void splitAndTransferValidityBuffer(
816-
int startIndex, int length, BaseLargeVariableWidthVector target) {
817-
int firstByteSource = BitVectorHelper.byteIndex(startIndex);
818-
int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
819-
int byteSizeTarget = BitVectorHelper.getValidityBufferSizeFromCount(length);
820-
int offset = startIndex % 8;
809+
@Override
810+
protected void sliceAndTransferValidityBuffer(
811+
int startIndex, int length, BaseValueVector target) {
812+
final int firstByteSource = BitVectorHelper.byteIndex(startIndex);
813+
final int byteSizeTarget = BitVectorHelper.getValidityBufferSizeFromCount(length);
821814

822-
if (length > 0) {
823-
if (offset == 0) {
824-
// slice
825-
if (target.validityBuffer != null) {
826-
target.validityBuffer.getReferenceManager().release();
827-
}
828-
target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
829-
target.validityBuffer.getReferenceManager().retain();
830-
} else {
831-
/* Copy data
832-
* When the first bit starts from the middle of a byte (offset != 0),
833-
* copy data from src BitVector.
834-
* Each byte in the target is composed by a part in i-th byte,
835-
* another part in (i+1)-th byte.
836-
*/
837-
target.allocateValidityBuffer(byteSizeTarget);
838-
839-
for (int i = 0; i < byteSizeTarget - 1; i++) {
840-
byte b1 =
841-
BitVectorHelper.getBitsFromCurrentByte(
842-
this.validityBuffer, firstByteSource + i, offset);
843-
byte b2 =
844-
BitVectorHelper.getBitsFromNextByte(
845-
this.validityBuffer, firstByteSource + i + 1, offset);
846-
847-
target.validityBuffer.setByte(i, (b1 + b2));
848-
}
849-
/* Copying the last piece is done in the following manner:
850-
* if the source vector has 1 or more bytes remaining, we copy
851-
* the last piece as a byte formed by shifting data
852-
* from the current byte and the next byte.
853-
*
854-
* if the source vector has no more bytes remaining
855-
* (we are at the last byte), we copy the last piece as a byte
856-
* by shifting data from the current byte.
857-
*/
858-
if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
859-
byte b1 =
860-
BitVectorHelper.getBitsFromCurrentByte(
861-
this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset);
862-
byte b2 =
863-
BitVectorHelper.getBitsFromNextByte(
864-
this.validityBuffer, firstByteSource + byteSizeTarget, offset);
865-
866-
target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2);
867-
} else {
868-
byte b1 =
869-
BitVectorHelper.getBitsFromCurrentByte(
870-
this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset);
871-
target.validityBuffer.setByte(byteSizeTarget - 1, b1);
872-
}
873-
}
815+
if (target.validityBuffer != null) {
816+
target.validityBuffer.getReferenceManager().release();
874817
}
818+
target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
819+
target.validityBuffer.getReferenceManager().retain();
875820
}
876821

877822
/*----------------------------------------------------------------*

vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ public abstract class BaseValueVector implements ValueVector {
4848

4949
protected volatile FieldReader fieldReader;
5050

51+
protected ArrowBuf validityBuffer;
52+
53+
protected int valueCount;
54+
5155
protected BaseValueVector(BufferAllocator allocator) {
5256
this.allocator = Preconditions.checkNotNull(allocator, "allocator cannot be null");
5357
}
@@ -255,4 +259,116 @@ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) {
255259
public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) {
256260
throw new UnsupportedOperationException();
257261
}
262+
263+
/**
264+
* Transfer the validity buffer from `validityBuffer` to the target vector's `validityBuffer`.
265+
* Start at `startIndex` and copy `length` number of elements. If the starting index is 8 byte
266+
* aligned, then the buffer is sliced from that index and ownership is transferred. If not,
267+
* individual bytes are copied.
268+
*
269+
* @param startIndex starting index
270+
* @param length number of elements to be copied
271+
* @param target target vector
272+
*/
273+
protected void splitAndTransferValidityBuffer(
274+
int startIndex, int length, BaseValueVector target) {
275+
int offset = startIndex % 8;
276+
277+
if (length <= 0) {
278+
return;
279+
}
280+
if (offset == 0) {
281+
sliceAndTransferValidityBuffer(startIndex, length, target);
282+
} else {
283+
copyValidityBuffer(startIndex, length, target);
284+
}
285+
}
286+
287+
/**
288+
* If the start index is 8 byte aligned, slice `validityBuffer` and transfer ownership to
289+
* `target`'s `validityBuffer`.
290+
*
291+
* @param startIndex starting index
292+
* @param length number of elements to be copied
293+
* @param target target vector
294+
*/
295+
protected void sliceAndTransferValidityBuffer(
296+
int startIndex, int length, BaseValueVector target) {
297+
final int firstByteSource = BitVectorHelper.byteIndex(startIndex);
298+
final int byteSizeTarget = getValidityBufferSizeFromCount(length);
299+
300+
if (target.validityBuffer != null) {
301+
target.validityBuffer.getReferenceManager().release();
302+
}
303+
target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
304+
target.validityBuffer.getReferenceManager().retain(1);
305+
}
306+
307+
/**
308+
* Allocate new validity buffer for `target` and copy bytes from `validityBuffer`. Precise details
309+
* in the comments below.
310+
*
311+
* @param startIndex starting index
312+
* @param length number of elements to be copied
313+
* @param target target vector
314+
*/
315+
protected void copyValidityBuffer(int startIndex, int length, BaseValueVector target) {
316+
final int firstByteSource = BitVectorHelper.byteIndex(startIndex);
317+
final int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
318+
final int byteSizeTarget = getValidityBufferSizeFromCount(length);
319+
final int offset = startIndex % 8;
320+
321+
/* Copy data
322+
* When the first bit starts from the middle of a byte (offset != 0),
323+
* copy data from src BitVector.
324+
* Each byte in the target is composed by a part in i-th byte,
325+
* another part in (i+1)-th byte.
326+
*/
327+
target.allocateValidityBuffer(byteSizeTarget);
328+
329+
for (int i = 0; i < byteSizeTarget - 1; i++) {
330+
byte b1 =
331+
BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer, firstByteSource + i, offset);
332+
byte b2 =
333+
BitVectorHelper.getBitsFromNextByte(this.validityBuffer, firstByteSource + i + 1, offset);
334+
335+
target.validityBuffer.setByte(i, (b1 + b2));
336+
}
337+
338+
/* Copying the last piece is done in the following manner:
339+
* if the source vector has 1 or more bytes remaining, we copy
340+
* the last piece as a byte formed by shifting data
341+
* from the current byte and the next byte.
342+
*
343+
* if the source vector has no more bytes remaining
344+
* (we are at the last byte), we copy the last piece as a byte
345+
* by shifting data from the current byte.
346+
*/
347+
if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
348+
byte b1 =
349+
BitVectorHelper.getBitsFromCurrentByte(
350+
this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset);
351+
byte b2 =
352+
BitVectorHelper.getBitsFromNextByte(
353+
this.validityBuffer, firstByteSource + byteSizeTarget, offset);
354+
355+
target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2);
356+
} else {
357+
byte b1 =
358+
BitVectorHelper.getBitsFromCurrentByte(
359+
this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset);
360+
target.validityBuffer.setByte(byteSizeTarget - 1, b1);
361+
}
362+
}
363+
364+
/**
365+
* Allocate new validity buffer for when the bytes need to be copied over.
366+
*
367+
* @param byteSizeTarget desired size of the buffer
368+
*/
369+
protected void allocateValidityBuffer(long byteSizeTarget) {
370+
validityBuffer = allocator.buffer(byteSizeTarget);
371+
validityBuffer.readerIndex(0);
372+
validityBuffer.setZero(0, validityBuffer.capacity());
373+
}
258374
}

0 commit comments

Comments
 (0)