Skip to content

Commit 328e339

Browse files
jbewingbbeaudreault
authored andcommitted
HubSpot Backport: HBASE-28012 Avoid CellUtil.cloneRow in BufferedEncodedSeeker (apache#5347)
Signed-off-by: Duo Zhang <zhangduo@apache.org> (cherry picked from commit 2fb2ae1)
1 parent 723af51 commit 328e339

File tree

3 files changed

+223
-23
lines changed

3 files changed

+223
-23
lines changed

hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java

Lines changed: 134 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -87,21 +87,100 @@ public ByteBuffer decodeKeyValues(DataInputStream source,
8787
// Having this as static is fine but if META is having DBE then we should
8888
// change this.
8989
public static int compareCommonRowPrefix(Cell left, Cell right, int rowCommonPrefix) {
90-
return Bytes.compareTo(left.getRowArray(), left.getRowOffset() + rowCommonPrefix,
91-
left.getRowLength() - rowCommonPrefix, right.getRowArray(),
92-
right.getRowOffset() + rowCommonPrefix, right.getRowLength() - rowCommonPrefix);
90+
if (left instanceof ByteBufferExtendedCell) {
91+
ByteBufferExtendedCell bbLeft = (ByteBufferExtendedCell) left;
92+
if (right instanceof ByteBufferExtendedCell) {
93+
ByteBufferExtendedCell bbRight = (ByteBufferExtendedCell) right;
94+
return ByteBufferUtils.compareTo(bbLeft.getRowByteBuffer(),
95+
bbLeft.getRowPosition() + rowCommonPrefix, left.getRowLength() - rowCommonPrefix,
96+
bbRight.getRowByteBuffer(), bbRight.getRowPosition() + rowCommonPrefix,
97+
right.getRowLength() - rowCommonPrefix);
98+
} else {
99+
return ByteBufferUtils.compareTo(bbLeft.getRowByteBuffer(),
100+
bbLeft.getRowPosition() + rowCommonPrefix, left.getRowLength() - rowCommonPrefix,
101+
right.getRowArray(), right.getRowOffset() + rowCommonPrefix,
102+
right.getRowLength() - rowCommonPrefix);
103+
}
104+
} else {
105+
if (right instanceof ByteBufferExtendedCell) {
106+
ByteBufferExtendedCell bbRight = (ByteBufferExtendedCell) right;
107+
return ByteBufferUtils.compareTo(left.getRowArray(), left.getRowOffset() + rowCommonPrefix,
108+
left.getRowLength() - rowCommonPrefix, bbRight.getRowByteBuffer(),
109+
bbRight.getRowPosition() + rowCommonPrefix, right.getRowLength() - rowCommonPrefix);
110+
} else {
111+
return Bytes.compareTo(left.getRowArray(), left.getRowOffset() + rowCommonPrefix,
112+
left.getRowLength() - rowCommonPrefix, right.getRowArray(),
113+
right.getRowOffset() + rowCommonPrefix, right.getRowLength() - rowCommonPrefix);
114+
}
115+
}
93116
}
94117

95118
public static int compareCommonFamilyPrefix(Cell left, Cell right, int familyCommonPrefix) {
96-
return Bytes.compareTo(left.getFamilyArray(), left.getFamilyOffset() + familyCommonPrefix,
97-
left.getFamilyLength() - familyCommonPrefix, right.getFamilyArray(),
98-
right.getFamilyOffset() + familyCommonPrefix, right.getFamilyLength() - familyCommonPrefix);
119+
if (left instanceof ByteBufferExtendedCell) {
120+
ByteBufferExtendedCell bbLeft = (ByteBufferExtendedCell) left;
121+
if (right instanceof ByteBufferExtendedCell) {
122+
ByteBufferExtendedCell bbRight = (ByteBufferExtendedCell) right;
123+
return ByteBufferUtils.compareTo(bbLeft.getFamilyByteBuffer(),
124+
bbLeft.getFamilyPosition() + familyCommonPrefix,
125+
left.getFamilyLength() - familyCommonPrefix, bbRight.getFamilyByteBuffer(),
126+
bbRight.getFamilyPosition() + familyCommonPrefix,
127+
right.getFamilyLength() - familyCommonPrefix);
128+
} else {
129+
return ByteBufferUtils.compareTo(bbLeft.getFamilyByteBuffer(),
130+
bbLeft.getFamilyPosition() + familyCommonPrefix,
131+
left.getFamilyLength() - familyCommonPrefix, right.getFamilyArray(),
132+
right.getFamilyOffset() + familyCommonPrefix,
133+
right.getFamilyLength() - familyCommonPrefix);
134+
}
135+
} else {
136+
if (right instanceof ByteBufferExtendedCell) {
137+
ByteBufferExtendedCell bbRight = (ByteBufferExtendedCell) right;
138+
return ByteBufferUtils.compareTo(left.getFamilyArray(),
139+
left.getFamilyOffset() + familyCommonPrefix, left.getFamilyLength() - familyCommonPrefix,
140+
bbRight.getFamilyByteBuffer(), bbRight.getFamilyPosition() + familyCommonPrefix,
141+
right.getFamilyLength() - familyCommonPrefix);
142+
} else {
143+
return Bytes.compareTo(left.getFamilyArray(), left.getFamilyOffset() + familyCommonPrefix,
144+
left.getFamilyLength() - familyCommonPrefix, right.getFamilyArray(),
145+
right.getFamilyOffset() + familyCommonPrefix,
146+
right.getFamilyLength() - familyCommonPrefix);
147+
}
148+
}
99149
}
100150

101151
public static int compareCommonQualifierPrefix(Cell left, Cell right, int qualCommonPrefix) {
102-
return Bytes.compareTo(left.getQualifierArray(), left.getQualifierOffset() + qualCommonPrefix,
103-
left.getQualifierLength() - qualCommonPrefix, right.getQualifierArray(),
104-
right.getQualifierOffset() + qualCommonPrefix, right.getQualifierLength() - qualCommonPrefix);
152+
if (left instanceof ByteBufferExtendedCell) {
153+
ByteBufferExtendedCell bbLeft = (ByteBufferExtendedCell) left;
154+
if (right instanceof ByteBufferExtendedCell) {
155+
ByteBufferExtendedCell bbRight = (ByteBufferExtendedCell) right;
156+
return ByteBufferUtils.compareTo(bbLeft.getQualifierByteBuffer(),
157+
bbLeft.getQualifierPosition() + qualCommonPrefix,
158+
left.getQualifierLength() - qualCommonPrefix, bbRight.getQualifierByteBuffer(),
159+
bbRight.getQualifierPosition() + qualCommonPrefix,
160+
right.getQualifierLength() - qualCommonPrefix);
161+
} else {
162+
return ByteBufferUtils.compareTo(bbLeft.getQualifierByteBuffer(),
163+
bbLeft.getQualifierPosition() + qualCommonPrefix,
164+
left.getQualifierLength() - qualCommonPrefix, right.getQualifierArray(),
165+
right.getQualifierOffset() + qualCommonPrefix,
166+
right.getQualifierLength() - qualCommonPrefix);
167+
}
168+
} else {
169+
if (right instanceof ByteBufferExtendedCell) {
170+
ByteBufferExtendedCell bbRight = (ByteBufferExtendedCell) right;
171+
return ByteBufferUtils.compareTo(left.getQualifierArray(),
172+
left.getQualifierOffset() + qualCommonPrefix,
173+
left.getQualifierLength() - qualCommonPrefix, bbRight.getQualifierByteBuffer(),
174+
bbRight.getQualifierPosition() + qualCommonPrefix,
175+
right.getQualifierLength() - qualCommonPrefix);
176+
} else {
177+
return Bytes.compareTo(left.getQualifierArray(),
178+
left.getQualifierOffset() + qualCommonPrefix,
179+
left.getQualifierLength() - qualCommonPrefix, right.getQualifierArray(),
180+
right.getQualifierOffset() + qualCommonPrefix,
181+
right.getQualifierLength() - qualCommonPrefix);
182+
}
183+
}
105184
}
106185

107186
protected static class SeekerState {
@@ -954,25 +1033,57 @@ private int compareTypeBytes(Cell key, Cell right) {
9541033
return 0;
9551034
}
9561035

957-
private static int findCommonPrefixInRowPart(Cell left, Cell right, int rowCommonPrefix) {
958-
return Bytes.findCommonPrefix(left.getRowArray(), right.getRowArray(),
959-
left.getRowLength() - rowCommonPrefix, right.getRowLength() - rowCommonPrefix,
960-
left.getRowOffset() + rowCommonPrefix, right.getRowOffset() + rowCommonPrefix);
1036+
// These findCommonPrefix* methods rely on the fact that keyOnlyKv is the "right" cell argument
1037+
// and always on-heap
1038+
1039+
private static int findCommonPrefixInRowPart(Cell left, KeyValue.KeyOnlyKeyValue right,
1040+
int rowCommonPrefix) {
1041+
if (left instanceof ByteBufferExtendedCell) {
1042+
ByteBufferExtendedCell bbLeft = (ByteBufferExtendedCell) left;
1043+
return ByteBufferUtils.findCommonPrefix(bbLeft.getRowByteBuffer(),
1044+
bbLeft.getRowPosition() + rowCommonPrefix, left.getRowLength() - rowCommonPrefix,
1045+
right.getRowArray(), right.getRowOffset() + rowCommonPrefix,
1046+
right.getRowLength() - rowCommonPrefix);
1047+
} else {
1048+
return Bytes.findCommonPrefix(left.getRowArray(), right.getRowArray(),
1049+
left.getRowLength() - rowCommonPrefix, right.getRowLength() - rowCommonPrefix,
1050+
left.getRowOffset() + rowCommonPrefix, right.getRowOffset() + rowCommonPrefix);
1051+
}
9611052
}
9621053

963-
private static int findCommonPrefixInFamilyPart(Cell left, Cell right, int familyCommonPrefix) {
964-
return Bytes.findCommonPrefix(left.getFamilyArray(), right.getFamilyArray(),
965-
left.getFamilyLength() - familyCommonPrefix, right.getFamilyLength() - familyCommonPrefix,
966-
left.getFamilyOffset() + familyCommonPrefix, right.getFamilyOffset() + familyCommonPrefix);
1054+
private static int findCommonPrefixInFamilyPart(Cell left, KeyValue.KeyOnlyKeyValue right,
1055+
int familyCommonPrefix) {
1056+
if (left instanceof ByteBufferExtendedCell) {
1057+
ByteBufferExtendedCell bbLeft = (ByteBufferExtendedCell) left;
1058+
return ByteBufferUtils.findCommonPrefix(bbLeft.getFamilyByteBuffer(),
1059+
bbLeft.getFamilyPosition() + familyCommonPrefix,
1060+
left.getFamilyLength() - familyCommonPrefix, right.getFamilyArray(),
1061+
right.getFamilyOffset() + familyCommonPrefix,
1062+
right.getFamilyLength() - familyCommonPrefix);
1063+
} else {
1064+
return Bytes.findCommonPrefix(left.getFamilyArray(), right.getFamilyArray(),
1065+
left.getFamilyLength() - familyCommonPrefix, right.getFamilyLength() - familyCommonPrefix,
1066+
left.getFamilyOffset() + familyCommonPrefix,
1067+
right.getFamilyOffset() + familyCommonPrefix);
1068+
}
9671069
}
9681070

969-
private static int findCommonPrefixInQualifierPart(Cell left, Cell right,
1071+
private static int findCommonPrefixInQualifierPart(Cell left, KeyValue.KeyOnlyKeyValue right,
9701072
int qualifierCommonPrefix) {
971-
return Bytes.findCommonPrefix(left.getQualifierArray(), right.getQualifierArray(),
972-
left.getQualifierLength() - qualifierCommonPrefix,
973-
right.getQualifierLength() - qualifierCommonPrefix,
974-
left.getQualifierOffset() + qualifierCommonPrefix,
975-
right.getQualifierOffset() + qualifierCommonPrefix);
1073+
if (left instanceof ByteBufferExtendedCell) {
1074+
ByteBufferExtendedCell bbLeft = (ByteBufferExtendedCell) left;
1075+
return ByteBufferUtils.findCommonPrefix(bbLeft.getQualifierByteBuffer(),
1076+
bbLeft.getQualifierPosition() + qualifierCommonPrefix,
1077+
left.getQualifierLength() - qualifierCommonPrefix, right.getQualifierArray(),
1078+
right.getQualifierOffset() + qualifierCommonPrefix,
1079+
right.getQualifierLength() - qualifierCommonPrefix);
1080+
} else {
1081+
return Bytes.findCommonPrefix(left.getQualifierArray(), right.getQualifierArray(),
1082+
left.getQualifierLength() - qualifierCommonPrefix,
1083+
right.getQualifierLength() - qualifierCommonPrefix,
1084+
left.getQualifierOffset() + qualifierCommonPrefix,
1085+
right.getQualifierOffset() + qualifierCommonPrefix);
1086+
}
9761087
}
9771088

9781089
private void moveToPrevious() {

hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -803,6 +803,30 @@ public static int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLeng
803803
return result;
804804
}
805805

806+
/**
807+
* Find length of common prefix in two arrays.
808+
* @param left ByteBuffer to be compared.
809+
* @param leftOffset Offset in left ByteBuffer.
810+
* @param leftLength Length of left ByteBuffer.
811+
* @param right Array to be compared
812+
* @param rightOffset Offset in right Array.
813+
* @param rightLength Length of right Array.
814+
*/
815+
public static int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLength, byte[] right,
816+
int rightOffset, int rightLength) {
817+
int length = Math.min(leftLength, rightLength);
818+
int result = 0;
819+
820+
while (
821+
result < length
822+
&& ByteBufferUtils.toByte(left, leftOffset + result) == right[rightOffset + result]
823+
) {
824+
result++;
825+
}
826+
827+
return result;
828+
}
829+
806830
/**
807831
* Check whether two parts in the same buffer are equal.
808832
* @param buffer In which buffer there are parts

hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestDataBlockEncoders.java

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import java.util.concurrent.ThreadLocalRandom;
3434
import org.apache.hadoop.conf.Configuration;
3535
import org.apache.hadoop.hbase.ArrayBackedTag;
36+
import org.apache.hadoop.hbase.ByteBufferKeyValue;
3637
import org.apache.hadoop.hbase.Cell;
3738
import org.apache.hadoop.hbase.CellComparatorImpl;
3839
import org.apache.hadoop.hbase.CellUtil;
@@ -230,6 +231,59 @@ public void testSeekingOnSample() throws IOException {
230231
LOG.info("Done");
231232
}
232233

234+
@Test
235+
public void testSeekingToOffHeapKeyValueInSample() throws IOException {
236+
List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags);
237+
238+
// create all seekers
239+
List<DataBlockEncoder.EncodedSeeker> encodedSeekers = new ArrayList<>();
240+
for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
241+
LOG.info("Encoding: " + encoding);
242+
DataBlockEncoder encoder = encoding.getEncoder();
243+
if (encoder == null) {
244+
continue;
245+
}
246+
LOG.info("Encoder: " + encoder);
247+
ByteBuffer encodedBuffer = encodeKeyValues(encoding, sampleKv,
248+
getEncodingContext(conf, Compression.Algorithm.NONE, encoding), this.useOffheapData);
249+
HFileContext meta =
250+
new HFileContextBuilder().withHBaseCheckSum(false).withIncludesMvcc(includesMemstoreTS)
251+
.withIncludesTags(includesTags).withCompression(Compression.Algorithm.NONE).build();
252+
DataBlockEncoder.EncodedSeeker seeker =
253+
encoder.createSeeker(encoder.newDataBlockDecodingContext(conf, meta));
254+
seeker.setCurrentBuffer(new SingleByteBuff(encodedBuffer));
255+
encodedSeekers.add(seeker);
256+
}
257+
LOG.info("Testing it!");
258+
// test it!
259+
// try a few random seeks
260+
Random rand = ThreadLocalRandom.current();
261+
for (boolean seekBefore : new boolean[] { false, true }) {
262+
for (int i = 0; i < NUM_RANDOM_SEEKS; ++i) {
263+
int keyValueId;
264+
if (!seekBefore) {
265+
keyValueId = rand.nextInt(sampleKv.size());
266+
} else {
267+
keyValueId = rand.nextInt(sampleKv.size() - 1) + 1;
268+
}
269+
270+
KeyValue keyValue = sampleKv.get(keyValueId);
271+
checkSeekingConsistency(encodedSeekers, seekBefore, buildOffHeapKeyValue(keyValue));
272+
}
273+
}
274+
275+
// check edge cases
276+
LOG.info("Checking edge cases");
277+
checkSeekingConsistency(encodedSeekers, false, sampleKv.get(0));
278+
for (boolean seekBefore : new boolean[] { false, true }) {
279+
checkSeekingConsistency(encodedSeekers, seekBefore, sampleKv.get(sampleKv.size() - 1));
280+
KeyValue midKv = sampleKv.get(sampleKv.size() / 2);
281+
Cell lastMidKv = PrivateCellUtil.createLastOnRowCol(midKv);
282+
checkSeekingConsistency(encodedSeekers, seekBefore, lastMidKv);
283+
}
284+
LOG.info("Done");
285+
}
286+
233287
static ByteBuffer encodeKeyValues(DataBlockEncoding encoding, List<KeyValue> kvs,
234288
HFileBlockEncodingContext encodingContext, boolean useOffheapData) throws IOException {
235289
DataBlockEncoder encoder = encoding.getEncoder();
@@ -438,4 +492,15 @@ private void testAlgorithm(byte[] encodedData, ByteBuffer unencodedDataBuf,
438492
assertEquals("Encoding -> decoding gives different results for " + encoder,
439493
Bytes.toStringBinary(unencodedDataBuf), Bytes.toStringBinary(actualDataset));
440494
}
495+
496+
private static ByteBufferKeyValue buildOffHeapKeyValue(KeyValue keyValue) throws IOException {
497+
ByteArrayOutputStream out = new ByteArrayOutputStream();
498+
keyValue.write(out, false);
499+
byte[] bytes = out.toByteArray();
500+
ByteBuffer bb = ByteBuffer.allocateDirect(bytes.length);
501+
bb.put(bytes);
502+
bb.flip();
503+
504+
return new ByteBufferKeyValue(bb, 0, bytes.length);
505+
}
441506
}

0 commit comments

Comments
 (0)