Skip to content

Commit 4e87d88

Browse files
committed
ARROW-398: Java file format requires bitmaps of all 1's to be written when there are no nulls
1 parent 3b946b8 commit 4e87d88

File tree

11 files changed

+96
-30
lines changed

11 files changed

+96
-30
lines changed

java/vector/src/main/codegen/templates/NullableValueVectors.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ public List<FieldVector> getChildrenFromFields() {
144144

145145
@Override
146146
public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
147-
org.apache.arrow.vector.BaseDataValueVector.load(getFieldInnerVectors(), ownBuffers);
147+
org.apache.arrow.vector.BaseDataValueVector.load(fieldNode, getFieldInnerVectors(), ownBuffers);
148148
bits.valueCount = fieldNode.getLength();
149149
}
150150

java/vector/src/main/codegen/templates/UnionVector.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ public List<FieldVector> getChildrenFromFields() {
105105

106106
@Override
107107
public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
108-
BaseDataValueVector.load(getFieldInnerVectors(), ownBuffers);
108+
BaseDataValueVector.load(fieldNode, getFieldInnerVectors(), ownBuffers);
109109
this.valueCount = fieldNode.getLength();
110110
}
111111

java/vector/src/main/java/org/apache/arrow/vector/BaseDataValueVector.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.util.List;
2222

2323
import org.apache.arrow.memory.BufferAllocator;
24+
import org.apache.arrow.vector.schema.ArrowFieldNode;
2425

2526
import io.netty.buffer.ArrowBuf;
2627

@@ -29,13 +30,13 @@ public abstract class BaseDataValueVector extends BaseValueVector implements Buf
2930

3031
protected final static byte[] emptyByteArray = new byte[]{}; // Nullable vectors use this
3132

32-
public static void load(List<BufferBacked> vectors, List<ArrowBuf> buffers) {
33+
public static void load(ArrowFieldNode fieldNode, List<BufferBacked> vectors, List<ArrowBuf> buffers) {
3334
int expectedSize = vectors.size();
3435
if (buffers.size() != expectedSize) {
3536
throw new IllegalArgumentException("Illegal buffer count, expected " + expectedSize + ", got: " + buffers.size());
3637
}
3738
for (int i = 0; i < expectedSize; i++) {
38-
vectors.get(i).load(buffers.get(i));
39+
vectors.get(i).load(fieldNode, buffers.get(i));
3940
}
4041
}
4142

@@ -106,7 +107,7 @@ public ArrowBuf getBuffer() {
106107
}
107108

108109
@Override
109-
public void load(ArrowBuf data) {
110+
public void load(ArrowFieldNode fieldNode, ArrowBuf data) {
110111
this.data.release();
111112
this.data = data.retain(allocator);
112113
}

java/vector/src/main/java/org/apache/arrow/vector/BitVector.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.apache.arrow.vector.complex.reader.FieldReader;
2323
import org.apache.arrow.vector.holders.BitHolder;
2424
import org.apache.arrow.vector.holders.NullableBitHolder;
25+
import org.apache.arrow.vector.schema.ArrowFieldNode;
2526
import org.apache.arrow.vector.types.Types.MinorType;
2627
import org.apache.arrow.vector.types.pojo.Field;
2728
import org.apache.arrow.vector.util.OversizedAllocationException;
@@ -48,6 +49,34 @@ public BitVector(String name, BufferAllocator allocator) {
4849
super(name, allocator);
4950
}
5051

52+
@Override
53+
public void load(ArrowFieldNode fieldNode, ArrowBuf data) {
54+
// When the vector is all nulls or all defined, the content of the buffer can be omitted
55+
if (data.readableBytes() == 0 && fieldNode.getLength() != 0) {
56+
data.release();
57+
allocateNew(fieldNode.getLength());
58+
int n = getSizeFromCount(fieldNode.getLength());
59+
if (fieldNode.getNullCount() == 0) {
60+
// all defined
61+
// create an all 1s buffer
62+
for (int i = 0; i < n; ++i) {
63+
this.data.setByte(i, 0xFF);
64+
}
65+
} else if (fieldNode.getNullCount() == fieldNode.getLength()) {
66+
// all null
67+
// create an all 0s buffer
68+
for (int i = 0; i < n; ++i) {
69+
this.data.setByte(i, 0x00);
70+
}
71+
} else {
72+
throw new IllegalArgumentException("The buffer can be empty only if there's no data or it's all null or all defined");
73+
}
74+
this.data.writerIndex(n);
75+
} else {
76+
super.load(fieldNode, data);
77+
}
78+
}
79+
5180
@Override
5281
public Field getField() {
5382
throw new UnsupportedOperationException("internal vector");

java/vector/src/main/java/org/apache/arrow/vector/BufferBacked.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,16 @@
1717
*/
1818
package org.apache.arrow.vector;
1919

20+
import org.apache.arrow.vector.schema.ArrowFieldNode;
21+
2022
import io.netty.buffer.ArrowBuf;
2123

2224
/**
2325
* Content is backed by a buffer and can be loaded/unloaded
2426
*/
2527
public interface BufferBacked {
2628

27-
void load(ArrowBuf data);
29+
void load(ArrowFieldNode fieldNode, ArrowBuf data);
2830

2931
ArrowBuf unLoad();
3032

java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -130,13 +130,6 @@ public interface ValueVector extends Closeable, Iterable<ValueVector> {
130130
*/
131131
FieldReader getReader();
132132

133-
/**
134-
* Get the metadata for this field. Used in serialization
135-
*
136-
* @return FieldMetadata for this field.
137-
*/
138-
// SerializedField getMetadata();
139-
140133
/**
141134
* Returns the number of bytes that is used by this vector instance.
142135
*/
@@ -166,16 +159,6 @@ public interface ValueVector extends Closeable, Iterable<ValueVector> {
166159
*/
167160
ArrowBuf[] getBuffers(boolean clear);
168161

169-
/**
170-
* Load the data provided in the buffer. Typically used when deserializing from the wire.
171-
*
172-
* @param metadata
173-
* Metadata used to decode the incoming buffer.
174-
* @param buffer
175-
* The buffer that contains the ValueVector.
176-
*/
177-
// void load(SerializedField metadata, DrillBuf buffer);
178-
179162
/**
180163
* An abstraction that is used to read from this vector instance.
181164
*/

java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@ private void loadBuffers(FieldVector vector, Field field, Iterator<ArrowBuf> buf
8181
try {
8282
vector.loadFieldBuffers(fieldNode, ownBuffers);
8383
} catch (RuntimeException e) {
84-
e.printStackTrace();
8584
throw new IllegalArgumentException("Could not load buffers for field " +
8685
field + " error message" + e.getMessage(), e);
8786
}

java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ public List<FieldVector> getChildrenFromFields() {
9393

9494
@Override
9595
public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
96-
BaseDataValueVector.load(getFieldInnerVectors(), ownBuffers);
96+
BaseDataValueVector.load(fieldNode, getFieldInnerVectors(), ownBuffers);
9797
}
9898

9999
@Override

java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ public NullableMapVector(String name, BufferAllocator allocator, CallBack callBa
6262

6363
@Override
6464
public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
65-
BaseDataValueVector.load(getFieldInnerVectors(), ownBuffers);
65+
BaseDataValueVector.load(fieldNode, getFieldInnerVectors(), ownBuffers);
6666
this.valueCount = fieldNode.getLength();
6767
}
6868

java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,13 @@
1717
*/
1818
package org.apache.arrow.vector;
1919

20+
import static java.util.Arrays.asList;
21+
import static org.junit.Assert.assertEquals;
22+
import static org.junit.Assert.assertNotNull;
23+
import static org.junit.Assert.assertNull;
24+
2025
import java.io.IOException;
26+
import java.util.Collections;
2127
import java.util.List;
2228

2329
import org.apache.arrow.memory.BufferAllocator;
@@ -29,12 +35,17 @@
2935
import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;
3036
import org.apache.arrow.vector.complex.writer.BigIntWriter;
3137
import org.apache.arrow.vector.complex.writer.IntWriter;
38+
import org.apache.arrow.vector.schema.ArrowFieldNode;
3239
import org.apache.arrow.vector.schema.ArrowRecordBatch;
40+
import org.apache.arrow.vector.types.pojo.ArrowType;
41+
import org.apache.arrow.vector.types.pojo.Field;
3342
import org.apache.arrow.vector.types.pojo.Schema;
3443
import org.junit.AfterClass;
3544
import org.junit.Assert;
3645
import org.junit.Test;
3746

47+
import io.netty.buffer.ArrowBuf;
48+
3849
public class TestVectorUnloadLoad {
3950

4051
static final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
@@ -88,6 +99,51 @@ public void testUnloadLoad() throws IOException {
8899
}
89100
}
90101

102+
/**
103+
* The validity buffer can be empty if:
104+
* - all values are defined
105+
* - all values are null
106+
* @throws IOException
107+
*/
108+
@Test
109+
public void testLoadEmptyValidityBuffer() throws IOException {
110+
Schema schema = new Schema(asList(
111+
new Field("intDefined", true, new ArrowType.Int(32, true), Collections.<Field>emptyList()),
112+
new Field("intNull", true, new ArrowType.Int(32, true), Collections.<Field>emptyList())
113+
));
114+
int count = 10;
115+
ArrowBuf validity = allocator.getEmpty();
116+
ArrowBuf values = allocator.buffer(count * 4); // integers
117+
for (int i = 0; i < count; i++) {
118+
values.setInt(i * 4, i);
119+
}
120+
try (
121+
ArrowRecordBatch recordBatch = new ArrowRecordBatch(count, asList(new ArrowFieldNode(count, 0), new ArrowFieldNode(count, count)), asList(validity, values, validity, values));
122+
BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE);
123+
VectorSchemaRoot newRoot = new VectorSchemaRoot(schema, finalVectorsAllocator);
124+
) {
125+
126+
// load it
127+
VectorLoader vectorLoader = new VectorLoader(newRoot);
128+
129+
vectorLoader.load(recordBatch);
130+
131+
FieldReader intDefinedReader = newRoot.getVector("intDefined").getReader();
132+
FieldReader intNullReader = newRoot.getVector("intNull").getReader();
133+
for (int i = 0; i < count; i++) {
134+
intDefinedReader.setPosition(i);
135+
intNullReader.setPosition(i);
136+
Integer defined = intDefinedReader.readInteger();
137+
assertNotNull("#" + i, defined);
138+
assertEquals("#" + i, i, defined.intValue());
139+
Integer nullVal = intNullReader.readInteger();
140+
assertNull("#" + i, nullVal);
141+
}
142+
} finally {
143+
values.release();
144+
}
145+
}
146+
91147
public static VectorUnloader newVectorUnloader(FieldVector root) {
92148
Schema schema = new Schema(root.getField().getChildren());
93149
int valueCount = root.getAccessor().getValueCount();

0 commit comments

Comments
 (0)