Skip to content

Commit

Permalink
[core] Fix bug: zindexer should deal well with column null value. (a…
Browse files Browse the repository at this point in the history
  • Loading branch information
leaves12138 authored Jan 18, 2024
1 parent c8180f5 commit 0050167
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@

import java.io.Serializable;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
Expand Down Expand Up @@ -145,48 +146,57 @@ public static class TypeVisitor implements DataTypeVisitor<ZProcessFunction>, Se
private final int fieldIndex;
private final int varTypeSize;

private final byte[] nullVarBytes;

public TypeVisitor(int index, int varTypeSize) {
this.fieldIndex = index;
this.varTypeSize = varTypeSize;

if (varTypeSize == PRIMITIVE_BUFFER_SIZE) {
nullVarBytes = NULL_BYTES;
} else {
nullVarBytes = new byte[varTypeSize];
Arrays.fill(nullVarBytes, (byte) 0x00);
}
}

@Override
public ZProcessFunction visit(CharType charType) {
return (row, reuse) -> {
BinaryString binaryString = row.getString(fieldIndex);

return row.isNullAt(fieldIndex)
? NULL_BYTES
: ZOrderByteUtils.byteTruncateOrFill(
MemorySegmentUtils.getBytes(
binaryString.getSegments(),
binaryString.getOffset(),
Math.min(
varTypeSize,
binaryString.getSizeInBytes())),
varTypeSize,
reuse)
.array();
if (row.isNullAt(fieldIndex)) {
return nullVarBytes;
} else {
BinaryString binaryString = row.getString(fieldIndex);

return ZOrderByteUtils.byteTruncateOrFill(
MemorySegmentUtils.getBytes(
binaryString.getSegments(),
binaryString.getOffset(),
Math.min(varTypeSize, binaryString.getSizeInBytes())),
varTypeSize,
reuse)
.array();
}
};
}

@Override
public ZProcessFunction visit(VarCharType varCharType) {
return (row, reuse) -> {
BinaryString binaryString = row.getString(fieldIndex);

return row.isNullAt(fieldIndex)
? NULL_BYTES
: ZOrderByteUtils.byteTruncateOrFill(
MemorySegmentUtils.getBytes(
binaryString.getSegments(),
binaryString.getOffset(),
Math.min(
varTypeSize,
binaryString.getSizeInBytes())),
varTypeSize,
reuse)
.array();
if (row.isNullAt(fieldIndex)) {
return nullVarBytes;
} else {
BinaryString binaryString = row.getString(fieldIndex);

return ZOrderByteUtils.byteTruncateOrFill(
MemorySegmentUtils.getBytes(
binaryString.getSegments(),
binaryString.getOffset(),
Math.min(varTypeSize, binaryString.getSizeInBytes())),
varTypeSize,
reuse)
.array();
}
};
}

Expand All @@ -206,7 +216,7 @@ public ZProcessFunction visit(BooleanType booleanType) {
public ZProcessFunction visit(BinaryType binaryType) {
return (row, reuse) ->
row.isNullAt(fieldIndex)
? NULL_BYTES
? nullVarBytes
: ZOrderByteUtils.byteTruncateOrFill(
row.getBinary(fieldIndex), varTypeSize, reuse)
.array();
Expand All @@ -216,7 +226,7 @@ public ZProcessFunction visit(BinaryType binaryType) {
public ZProcessFunction visit(VarBinaryType varBinaryType) {
return (row, reuse) ->
row.isNullAt(fieldIndex)
? NULL_BYTES
? nullVarBytes
: ZOrderByteUtils.byteTruncateOrFill(
row.getBinary(fieldIndex), varTypeSize, reuse)
.array();
Expand Down Expand Up @@ -380,5 +390,7 @@ public byte[] zvalue(InternalRow o) {
}
}

interface ZProcessFunction extends BiFunction<InternalRow, ByteBuffer, byte[]>, Serializable {}
/** Process function interface. */
public interface ZProcessFunction
extends BiFunction<InternalRow, ByteBuffer, byte[]>, Serializable {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,39 @@ public void testZIndexerForVarchar() {
}
}

@Test
public void testZIndexerForVarcharWithNull() {
RowType rowType = RowType.of(new VarCharType(), new VarCharType());

int varTypeSize = 10;
ZIndexer zIndexer = new ZIndexer(rowType, Arrays.asList("f0", "f1"), varTypeSize);
zIndexer.open();

byte[] nullBytes = new byte[varTypeSize];
Arrays.fill(nullBytes, (byte) 0x00);
for (int i = 0; i < 1000; i++) {
BinaryString a = BinaryString.fromString(randomString(varTypeSize + 1));

InternalRow internalRow = GenericRow.of(a, null);

byte[] zOrder = zIndexer.index(internalRow);

byte[][] zCache = new byte[2][];
ByteBuffer byteBuffer = ByteBuffer.allocate(varTypeSize);
ZOrderByteUtils.stringToOrderedBytes(a.toString(), varTypeSize, byteBuffer);
zCache[0] = Arrays.copyOf(byteBuffer.array(), varTypeSize);

zCache[1] = nullBytes;

byte[] expectedZOrder =
ZOrderByteUtils.interleaveBits(zCache, zCache.length * varTypeSize);

for (int j = 0; j < zCache.length * varTypeSize; j++) {
Assertions.assertThat(zOrder[j]).isEqualTo(expectedZOrder[j]);
}
}
}

public static String randomString(int length) {
byte[] buffer = new byte[length];

Expand Down

0 comments on commit 0050167

Please sign in to comment.