Skip to content

Commit db8b6aa

Browse files
authored
apacheGH-48: Implement VectorAppender for BaseVariableWidthViewVector (apache#454)
Fixes apache#48.
1 parent c69ae1a commit db8b6aa

File tree

5 files changed

+218
-28
lines changed

5 files changed

+218
-28
lines changed

vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java

Lines changed: 62 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
2020

2121
import java.util.HashSet;
22+
import java.util.List;
23+
import org.apache.arrow.memory.ArrowBuf;
2224
import org.apache.arrow.memory.util.MemoryUtil;
2325
import org.apache.arrow.util.Preconditions;
2426
import org.apache.arrow.vector.BaseFixedWidthVector;
@@ -91,7 +93,6 @@ public ValueVector visit(BaseFixedWidthVector deltaVector, Void value) {
9193
deltaVector.getDataBuffer(),
9294
deltaVector.getValueCount(),
9395
targetVector.getDataBuffer());
94-
9596
} else {
9697
MemoryUtil.copyMemory(
9798
deltaVector.getDataBuffer().memoryAddress(),
@@ -247,8 +248,66 @@ public ValueVector visit(BaseLargeVariableWidthVector deltaVector, Void value) {
247248
}
248249

249250
@Override
250-
public ValueVector visit(BaseVariableWidthViewVector left, Void value) {
251-
throw new UnsupportedOperationException("View vectors are not supported.");
251+
public ValueVector visit(BaseVariableWidthViewVector deltaVector, Void value) {
252+
Preconditions.checkArgument(
253+
typeVisitor.equals(deltaVector),
254+
"The targetVector to append must have the same type as the targetVector being appended");
255+
256+
if (deltaVector.getValueCount() == 0) {
257+
return targetVector; // nothing to append, return
258+
}
259+
260+
int oldTargetValueCount = targetVector.getValueCount();
261+
int newValueCount = oldTargetValueCount + deltaVector.getValueCount();
262+
263+
// make sure there is enough capacity
264+
while (targetVector.getValueCapacity() < newValueCount) {
265+
// Do not call BaseVariableWidthViewVector#reAlloc() here,
266+
// because reallocViewDataBuffer() is always unnecessary
267+
((BaseVariableWidthViewVector) targetVector).reallocValidityBuffer();
268+
((BaseVariableWidthViewVector) targetVector).reallocViewBuffer();
269+
}
270+
271+
// append validity buffer
272+
BitVectorHelper.concatBits(
273+
targetVector.getValidityBuffer(),
274+
oldTargetValueCount,
275+
deltaVector.getValidityBuffer(),
276+
deltaVector.getValueCount(),
277+
targetVector.getValidityBuffer());
278+
279+
// append data buffers
280+
BaseVariableWidthViewVector targetViewVector = (BaseVariableWidthViewVector) targetVector;
281+
List<ArrowBuf> targetDataBuffers = targetViewVector.getDataBuffers();
282+
final int oldTargetDataBufferCount = targetDataBuffers.size();
283+
List<ArrowBuf> deltaVectorDataBuffers = deltaVector.getDataBuffers();
284+
deltaVectorDataBuffers.forEach(buf -> buf.getReferenceManager().retain());
285+
targetDataBuffers.addAll(deltaVectorDataBuffers);
286+
287+
// append view buffer
288+
ArrowBuf targetViewBuffer = targetVector.getDataBuffer();
289+
MemoryUtil.copyMemory(
290+
deltaVector.getDataBuffer().memoryAddress(),
291+
targetViewBuffer.memoryAddress()
292+
+ (long) BaseVariableWidthViewVector.ELEMENT_SIZE * oldTargetValueCount,
293+
(long) BaseVariableWidthViewVector.ELEMENT_SIZE * deltaVector.getValueCount());
294+
295+
// update view buffer
296+
for (int i = oldTargetValueCount; i < newValueCount; i++) {
297+
if (targetViewVector.isSet(i) > 0
298+
&& targetViewVector.getValueLength(i) > BaseVariableWidthViewVector.INLINE_SIZE) {
299+
long start =
300+
(long) i * BaseVariableWidthViewVector.ELEMENT_SIZE
301+
+ BaseVariableWidthViewVector.LENGTH_WIDTH
302+
+ BaseVariableWidthViewVector.PREFIX_WIDTH;
303+
// shift buf id
304+
int bufferId = targetViewBuffer.getInt(start);
305+
targetViewBuffer.setInt(start, bufferId + oldTargetDataBufferCount);
306+
}
307+
}
308+
309+
targetVector.setValueCount(newValueCount);
310+
return targetVector;
252311
}
253312

254313
@Override

vector/src/test/java/org/apache/arrow/vector/TestUtils.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
*/
1717
package org.apache.arrow.vector;
1818

19+
import java.util.Random;
1920
import org.apache.arrow.memory.BufferAllocator;
2021
import org.apache.arrow.vector.types.Types.MinorType;
2122
import org.apache.arrow.vector.types.pojo.ArrowType;
@@ -52,4 +53,13 @@ public static <T> T newVector(
5253
Class<T> c, String name, MinorType type, BufferAllocator allocator) {
5354
return c.cast(FieldType.nullable(type.getType()).createNewSingleVector(name, allocator, null));
5455
}
56+
57+
public static String generateRandomString(int length) {
58+
Random random = new Random();
59+
StringBuilder sb = new StringBuilder(length);
60+
for (int i = 0; i < length; i++) {
61+
sb.append(random.nextInt(10)); // 0-9
62+
}
63+
return sb.toString();
64+
}
5565
}

vector/src/test/java/org/apache/arrow/vector/TestVariableWidthViewVector.java

Lines changed: 16 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ public void testDataBufferBasedAllocationInSameBuffer() {
160160
try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) {
161161
viewVarCharVector.allocateNew(48, 4);
162162
final int valueCount = 4;
163-
String str4 = generateRandomString(34);
163+
String str4 = TestUtils.generateRandomString(34);
164164
viewVarCharVector.set(0, STR1);
165165
viewVarCharVector.set(1, STR2);
166166
viewVarCharVector.set(2, STR3);
@@ -216,7 +216,7 @@ public void testDataBufferBasedAllocationInOtherBuffer() {
216216
try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) {
217217
viewVarCharVector.allocateNew(48, 4);
218218
final int valueCount = 4;
219-
String str4 = generateRandomString(35);
219+
String str4 = TestUtils.generateRandomString(35);
220220
viewVarCharVector.set(0, STR1);
221221
viewVarCharVector.set(1, STR2);
222222
viewVarCharVector.set(2, STR3);
@@ -271,7 +271,7 @@ public void testDataBufferBasedAllocationInOtherBuffer() {
271271
public void testSetSafe() {
272272
try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) {
273273
viewVarCharVector.allocateNew(1, 1);
274-
byte[] str6 = generateRandomString(40).getBytes();
274+
byte[] str6 = TestUtils.generateRandomString(40).getBytes();
275275
final List<byte[]> strings = List.of(STR0, STR1, STR2, STR3, STR4, STR5, str6);
276276

277277
// set data to a position out of capacity index
@@ -305,8 +305,8 @@ public void testMixedAllocation() {
305305
try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) {
306306
viewVarCharVector.allocateNew(128, 6);
307307
final int valueCount = 6;
308-
String str4 = generateRandomString(35);
309-
String str6 = generateRandomString(40);
308+
String str4 = TestUtils.generateRandomString(35);
309+
String str6 = TestUtils.generateRandomString(40);
310310
viewVarCharVector.set(0, STR1);
311311
viewVarCharVector.set(1, STR2);
312312
viewVarCharVector.set(2, STR3);
@@ -405,7 +405,7 @@ public void testSetNullableViewVarCharHolder() {
405405
setAndCheck(viewVarCharVector, i, strings.get(size - i - 1), stringHolder);
406406
}
407407

408-
String longString = generateRandomString(128);
408+
String longString = TestUtils.generateRandomString(128);
409409
setAndCheck(viewVarCharVector, 6, longString.getBytes(), stringHolder);
410410
}
411411
}
@@ -441,7 +441,7 @@ public void testSetNullableViewVarBinaryHolder() {
441441
setAndCheck(viewVarBinaryVector, i, strings.get(size - i - 1), holder);
442442
}
443443

444-
String longString = generateRandomString(128);
444+
String longString = TestUtils.generateRandomString(128);
445445
setAndCheck(viewVarBinaryVector, 6, longString.getBytes(), holder);
446446
}
447447
}
@@ -1169,7 +1169,7 @@ public void testOverwriteShortFromLongString() {
11691169
vector.setValueCount(5);
11701170

11711171
// overwrite index 2 with a long string
1172-
String longString = generateRandomString(128);
1172+
String longString = TestUtils.generateRandomString(128);
11731173
byte[] longStringBytes = longString.getBytes(StandardCharsets.UTF_8);
11741174
// since the append-only approach is used and the remaining capacity
11751175
// is not enough to store the new string; a new buffer will be allocated.
@@ -1373,7 +1373,7 @@ public void testOverwriteLongFromALongerLongString() {
13731373
// since a new buffer is added to the dataBuffers
13741374
final ArrowBuf currentDataBuf = vector.dataBuffers.get(0);
13751375
final long remainingCapacity = currentDataBuf.capacity() - currentDataBuf.writerIndex();
1376-
String longerString = generateRandomString(35);
1376+
String longerString = TestUtils.generateRandomString(35);
13771377
byte[] longerStringBytes = longerString.getBytes(StandardCharsets.UTF_8);
13781378
assertTrue(remainingCapacity < longerStringBytes.length);
13791379

@@ -1406,7 +1406,7 @@ public void testOverwriteLongFromALongerLongString() {
14061406
// the remaining capacity is enough to store in the same data buffer
14071407
final ArrowBuf currentDataBuf = vector.dataBuffers.get(0);
14081408
final long remainingCapacity = currentDataBuf.capacity() - currentDataBuf.writerIndex();
1409-
String longerString = generateRandomString(24);
1409+
String longerString = TestUtils.generateRandomString(24);
14101410
byte[] longerStringBytes = longerString.getBytes(StandardCharsets.UTF_8);
14111411
assertTrue(remainingCapacity > longerStringBytes.length);
14121412

@@ -1505,7 +1505,7 @@ public void testSafeOverwriteShortFromLongString() {
15051505
vector.setValueCount(5);
15061506

15071507
// overwrite index 2 with a long string
1508-
String longString = generateRandomString(128);
1508+
String longString = TestUtils.generateRandomString(128);
15091509
byte[] longStringBytes = longString.getBytes(StandardCharsets.UTF_8);
15101510

15111511
vector.setSafe(2, longStringBytes);
@@ -1671,7 +1671,7 @@ public void testSafeOverwriteLongFromALongerLongString() {
16711671
vector.setSafe(2, STR7);
16721672
vector.setValueCount(3);
16731673

1674-
String longerString = generateRandomString(35);
1674+
String longerString = TestUtils.generateRandomString(35);
16751675
byte[] longerStringBytes = longerString.getBytes(StandardCharsets.UTF_8);
16761676

16771677
vector.setSafe(1, longerStringBytes);
@@ -1697,7 +1697,7 @@ public void testSafeOverwriteLongFromALongerLongString() {
16971697
vector.setSafe(4, STR6);
16981698
vector.setValueCount(5);
16991699

1700-
String longerString = generateRandomString(24);
1700+
String longerString = TestUtils.generateRandomString(24);
17011701
byte[] longerStringBytes = longerString.getBytes(StandardCharsets.UTF_8);
17021702

17031703
vector.setSafe(2, longerStringBytes);
@@ -1869,7 +1869,7 @@ public void testCopyFromWithNulls(
18691869
// to avoid re-allocation. This is to test copyFrom() without re-allocation.
18701870
final int numberOfValues = initialCapacity / 2 / ViewVarCharVector.ELEMENT_SIZE;
18711871

1872-
final String prefixString = generateRandomString(12);
1872+
final String prefixString = TestUtils.generateRandomString(12);
18731873

18741874
for (int i = 0; i < numberOfValues; i++) {
18751875
if (i % 3 == 0) {
@@ -1965,7 +1965,7 @@ public void testCopyFromSafeWithNulls(
19651965

19661966
final int numberOfValues = initialCapacity / ViewVarCharVector.ELEMENT_SIZE;
19671967

1968-
final String prefixString = generateRandomString(12);
1968+
final String prefixString = TestUtils.generateRandomString(12);
19691969

19701970
for (int i = 0; i < numberOfValues; i++) {
19711971
if (i % 3 == 0) {
@@ -2746,7 +2746,7 @@ private void testSplitAndTransferWithMultipleDataBuffersHelper(
27462746
*/
27472747
@Test
27482748
public void testSplitAndTransferWithMultipleDataBuffers() {
2749-
final String str4 = generateRandomString(35);
2749+
final String str4 = TestUtils.generateRandomString(35);
27502750
final byte[][] data = new byte[][] {STR1, STR2, STR3, str4.getBytes(StandardCharsets.UTF_8)};
27512751
final int startIndex = 1;
27522752
final int length = 3;
@@ -2851,13 +2851,4 @@ public void testVectorLoadUnloadOnMixedTypes() {
28512851
}
28522852
}
28532853
}
2854-
2855-
private String generateRandomString(int length) {
2856-
Random random = new Random();
2857-
StringBuilder sb = new StringBuilder(length);
2858-
for (int i = 0; i < length; i++) {
2859-
sb.append(random.nextInt(10)); // 0-9
2860-
}
2861-
return sb.toString();
2862-
}
28632854
}

vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
import org.apache.arrow.vector.VarBinaryVector;
6161
import org.apache.arrow.vector.VarCharVector;
6262
import org.apache.arrow.vector.VariableWidthFieldVector;
63+
import org.apache.arrow.vector.ViewVarCharVector;
6364
import org.apache.arrow.vector.complex.BaseLargeRepeatedValueViewVector;
6465
import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
6566
import org.apache.arrow.vector.complex.BaseRepeatedValueViewVector;
@@ -606,6 +607,18 @@ public static void setVector(VarCharVector vector, String... values) {
606607
vector.setValueCount(length);
607608
}
608609

610+
/** Populate values for ViewVarCharVector. */
611+
public static void setVector(ViewVarCharVector vector, String... values) {
612+
final int length = values.length;
613+
vector.allocateNewSafe();
614+
for (int i = 0; i < length; i++) {
615+
if (values[i] != null) {
616+
vector.setSafe(i, values[i].getBytes(StandardCharsets.UTF_8));
617+
}
618+
}
619+
vector.setValueCount(length);
620+
}
621+
609622
/** Populate values for LargeVarCharVector. */
610623
public static void setVector(LargeVarCharVector vector, String... values) {
611624
final int length = values.length;

0 commit comments

Comments
 (0)