-
Notifications
You must be signed in to change notification settings - Fork 69
GH-79: Move splitAndTransferValidityBuffer
to BaseValueVector
#777
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
1521cf2
4123e3b
86887d5
260eae5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -48,6 +48,10 @@ public abstract class BaseValueVector implements ValueVector { | |
|
||
protected volatile FieldReader fieldReader; | ||
|
||
protected ArrowBuf validityBuffer; | ||
|
||
protected int valueCount; | ||
|
||
protected BaseValueVector(BufferAllocator allocator) { | ||
this.allocator = Preconditions.checkNotNull(allocator, "allocator cannot be null"); | ||
} | ||
|
@@ -255,4 +259,116 @@ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { | |
public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { | ||
throw new UnsupportedOperationException(); | ||
} | ||
|
||
/** | ||
* Transfer the validity buffer from `validityBuffer` to the target vector's `validityBuffer`. | ||
* Start at `startIndex` and copy `length` number of elements. If the starting index is 8 byte | ||
* aligned, then the buffer is sliced from that index and ownership is transferred. If not, | ||
* individual bytes are copied. | ||
* | ||
* @param startIndex starting index | ||
* @param length number of elements to be copied | ||
* @param target target vector | ||
*/ | ||
protected void splitAndTransferValidityBuffer( | ||
int startIndex, int length, BaseValueVector target) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Accepting There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Checked that UnionVector doesn't extend it either (which is good - as it doesn't have a validity buffer) |
||
int offset = startIndex % 8; | ||
|
||
if (length <= 0) { | ||
return; | ||
} | ||
if (offset == 0) { | ||
sliceAndTransferValidityBuffer(startIndex, length, target); | ||
} else { | ||
copyValidityBuffer(startIndex, length, target); | ||
} | ||
} | ||
|
||
/** | ||
* If the start index is 8 byte aligned, slice `validityBuffer` and transfer ownership to | ||
* `target`'s `validityBuffer`. | ||
* | ||
* @param startIndex starting index | ||
* @param length number of elements to be copied | ||
* @param target target vector | ||
*/ | ||
protected void sliceAndTransferValidityBuffer( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This implementation is the one being used by the vectors inside the |
||
int startIndex, int length, BaseValueVector target) { | ||
final int firstByteSource = BitVectorHelper.byteIndex(startIndex); | ||
final int byteSizeTarget = getValidityBufferSizeFromCount(length); | ||
|
||
if (target.validityBuffer != null) { | ||
target.validityBuffer.getReferenceManager().release(); | ||
} | ||
target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); | ||
target.validityBuffer.getReferenceManager().retain(1); | ||
} | ||
|
||
/** | ||
* Allocate new validity buffer for `target` and copy bytes from `validityBuffer`. Precise details | ||
* in the comments below. | ||
* | ||
* @param startIndex starting index | ||
* @param length number of elements to be copied | ||
* @param target target vector | ||
*/ | ||
protected void copyValidityBuffer(int startIndex, int length, BaseValueVector target) { | ||
final int firstByteSource = BitVectorHelper.byteIndex(startIndex); | ||
final int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); | ||
final int byteSizeTarget = getValidityBufferSizeFromCount(length); | ||
final int offset = startIndex % 8; | ||
|
||
/* Copy data | ||
* When the first bit starts from the middle of a byte (offset != 0), | ||
* copy data from src BitVector. | ||
* Each byte in the target is composed by a part in i-th byte, | ||
* another part in (i+1)-th byte. | ||
*/ | ||
target.allocateValidityBuffer(byteSizeTarget); | ||
|
||
for (int i = 0; i < byteSizeTarget - 1; i++) { | ||
byte b1 = | ||
BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer, firstByteSource + i, offset); | ||
byte b2 = | ||
BitVectorHelper.getBitsFromNextByte(this.validityBuffer, firstByteSource + i + 1, offset); | ||
|
||
target.validityBuffer.setByte(i, (b1 + b2)); | ||
} | ||
|
||
/* Copying the last piece is done in the following manner: | ||
* if the source vector has 1 or more bytes remaining, we copy | ||
* the last piece as a byte formed by shifting data | ||
* from the current byte and the next byte. | ||
* | ||
* if the source vector has no more bytes remaining | ||
* (we are at the last byte), we copy the last piece as a byte | ||
* by shifting data from the current byte. | ||
*/ | ||
if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { | ||
byte b1 = | ||
BitVectorHelper.getBitsFromCurrentByte( | ||
this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset); | ||
byte b2 = | ||
BitVectorHelper.getBitsFromNextByte( | ||
this.validityBuffer, firstByteSource + byteSizeTarget, offset); | ||
|
||
target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); | ||
} else { | ||
byte b1 = | ||
BitVectorHelper.getBitsFromCurrentByte( | ||
this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset); | ||
target.validityBuffer.setByte(byteSizeTarget - 1, b1); | ||
} | ||
} | ||
|
||
/** | ||
* Allocate new validity buffer for when the bytes need to be copied over. | ||
* | ||
* @param byteSizeTarget desired size of the buffer | ||
*/ | ||
protected void allocateValidityBuffer(long byteSizeTarget) { | ||
validityBuffer = allocator.buffer(byteSizeTarget); | ||
validityBuffer.readerIndex(0); | ||
validityBuffer.setZero(0, validityBuffer.capacity()); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Given many of these are very similar maybe they should delegate to a base implementation with
super.allocateValidityBuffer
then call any vector-specific logic at the end?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, there is a slight behavior change though -- the validity buffer in
BaseFixedWidthVector
was not zeroing out its elements post allocation, it is now. Should be fine though I am thinking.