Skip to content

Commit baca69c

Browse files
committed
Add methods to count the number null values in the vector
1 parent cfb544d commit baca69c

File tree

6 files changed

+51
-0
lines changed

6 files changed

+51
-0
lines changed

java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,11 @@ protected BaseAccessor() { }
7272
public boolean isNull(int index) {
7373
return false;
7474
}
75+
76+
@Override
77+
public int getNullCount() {
78+
return 0;
79+
}
7580
}
7681

7782
public abstract static class BaseMutator implements ValueVector.Mutator {

java/vector/src/main/java/org/apache/arrow/vector/BitVector.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,21 @@ public final void get(int index, NullableBitHolder holder) {
379379
holder.isSet = 1;
380380
holder.value = get(index);
381381
}
382+
383+
/**
384+
* Get the number of bits set to 1
385+
* @return the number of bits set to 1
386+
*/
387+
public final int getNullCount() {
388+
int count = 0;
389+
for (int i = 0; i < allocationSizeInBytes; ++i) {
390+
byte byteValue = data.getByte(i);
391+
// Java uses two's complement binary representation, hence 11111111_b which is -1 when converted to Int
392+
// will have 32bits set to 1. Masking the MSB and then adding it back solves the issue.
393+
count += Integer.bitCount(byteValue & 0x7F) - (byteValue >> 7);
394+
}
395+
return count;
396+
}
382397
}
383398

384399
/**

java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,11 @@ interface Accessor {
180180
* Returns true if the value at the given index is null, false otherwise.
181181
*/
182182
boolean isNull(int index);
183+
184+
/**
185+
* Returns the number of null values
186+
*/
187+
int getNullCount();
183188
}
184189

185190
/**

java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,11 @@ public int getValueCount() {
6969
public boolean isNull(int index) {
7070
return true;
7171
}
72+
73+
@Override
74+
public int getNullCount() {
75+
return 0;
76+
}
7277
};
7378

7479
private final Mutator defaultMutator = new Mutator() {

java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,11 @@ public Object getObject(int index) {
310310
public boolean isNull(int index) {
311311
return bits.getAccessor().get(index) == 0;
312312
}
313+
314+
@Override
315+
public int getNullCount() {
316+
return bits.getAccessor().getNullCount();
317+
}
313318
}
314319

315320
public class Mutator extends BaseRepeatedMutator {

java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,8 @@ public void testBitVector() {
301301
assertEquals(0, accessor.get(100));
302302
assertEquals(1, accessor.get(1022));
303303

304+
assertEquals(2, accessor.getNullCount());
305+
304306
// test setting the same value twice
305307
m.set(0, 1);
306308
m.set(0, 1);
@@ -315,8 +317,22 @@ public void testBitVector() {
315317
assertEquals(0, accessor.get(0));
316318
assertEquals(1, accessor.get(1));
317319

320+
// should not change
321+
assertEquals(2, accessor.getNullCount());
322+
318323
// Ensure unallocated space returns 0
319324
assertEquals(0, accessor.get(3));
325+
326+
m.set(1, 0);
327+
m.set(1022, 0);
328+
329+
assertEquals(0, accessor.getNullCount());
330+
331+
for (int i = 0; i < 1024; ++i) {
332+
m.set(i, 1);
333+
}
334+
335+
assertEquals(1024, accessor.getNullCount());
320336
}
321337
}
322338

0 commit comments

Comments
 (0)