Skip to content

Commit 6e273e8

Browse files
titsukiopeninx
authored andcommitted
HBASE-22833 MultiRowRangeFilter should provide a method for creating… (#493)
* HBASE-22833: MultiRowRangeFilter should provide a method for creating a filter which is functionally equivalent to multiple prefix filters * Delete superfluous comments * Add description for MultiRowRangeFilter constructor * Add null check for rowKeyPrefixes * Fix checkstyle Signed-off-by: huzheng <openinx@gmail.com>
1 parent 0481b04 commit 6e273e8

File tree

4 files changed

+131
-47
lines changed

4 files changed

+131
-47
lines changed

hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientUtil.java

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@
1717
*/
1818
package org.apache.hadoop.hbase.client;
1919

20-
import org.apache.yetus.audience.InterfaceAudience;
20+
import java.util.Arrays;
21+
import org.apache.hadoop.hbase.HConstants;
2122
import org.apache.hadoop.hbase.util.Bytes;
23+
import org.apache.yetus.audience.InterfaceAudience;
2224

2325
@InterfaceAudience.Private
2426
public class ClientUtil {
@@ -31,4 +33,46 @@ public static boolean areScanStartRowAndStopRowEqual(byte[] startRow, byte[] sto
3133
public static Cursor createCursor(byte[] row) {
3234
return new Cursor(row);
3335
}
36+
37+
/**
38+
* <p>When scanning for a prefix the scan should stop immediately after the the last row that
39+
* has the specified prefix. This method calculates the closest next rowKey immediately following
40+
* the given rowKeyPrefix.</p>
41+
* <p><b>IMPORTANT: This converts a rowKey<u>Prefix</u> into a rowKey</b>.</p>
42+
* <p>If the prefix is an 'ASCII' string put into a byte[] then this is easy because you can
43+
* simply increment the last byte of the array.
44+
* But if your application uses real binary rowids you may run into the scenario that your
45+
* prefix is something like:</p>
46+
* &nbsp;&nbsp;&nbsp;<b>{ 0x12, 0x23, 0xFF, 0xFF }</b><br/>
47+
* Then this stopRow needs to be fed into the actual scan<br/>
48+
* &nbsp;&nbsp;&nbsp;<b>{ 0x12, 0x24 }</b> (Notice that it is shorter now)<br/>
49+
* This method calculates the correct stop row value for this usecase.
50+
*
51+
* @param rowKeyPrefix the rowKey<u>Prefix</u>.
52+
* @return the closest next rowKey immediately following the given rowKeyPrefix.
53+
*/
54+
public static byte[] calculateTheClosestNextRowKeyForPrefix(byte[] rowKeyPrefix) {
55+
// Essentially we are treating it like an 'unsigned very very long' and doing +1 manually.
56+
// Search for the place where the trailing 0xFFs start
57+
int offset = rowKeyPrefix.length;
58+
while (offset > 0) {
59+
if (rowKeyPrefix[offset - 1] != (byte) 0xFF) {
60+
break;
61+
}
62+
offset--;
63+
}
64+
65+
if (offset == 0) {
66+
// We got an 0xFFFF... (only FFs) stopRow value which is
67+
// the last possible prefix before the end of the table.
68+
// So set it to stop at the 'end of the table'
69+
return HConstants.EMPTY_END_ROW;
70+
}
71+
72+
// Copy the right length of the original
73+
byte[] newStopRow = Arrays.copyOfRange(rowKeyPrefix, 0, offset);
74+
// And increment the last one
75+
newStopRow[newStopRow.length - 1]++;
76+
return newStopRow;
77+
}
3478
}

hbase-client/src/main/java/org/apache/hadoop/hbase/client/Scan.java

Lines changed: 1 addition & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121

2222
import java.io.IOException;
2323
import java.util.ArrayList;
24-
import java.util.Arrays;
2524
import java.util.HashMap;
2625
import java.util.List;
2726
import java.util.Map;
@@ -563,53 +562,11 @@ public Scan setRowPrefixFilter(byte[] rowPrefix) {
563562
setStopRow(HConstants.EMPTY_END_ROW);
564563
} else {
565564
this.setStartRow(rowPrefix);
566-
this.setStopRow(calculateTheClosestNextRowKeyForPrefix(rowPrefix));
565+
this.setStopRow(ClientUtil.calculateTheClosestNextRowKeyForPrefix(rowPrefix));
567566
}
568567
return this;
569568
}
570569

571-
/**
572-
* <p>When scanning for a prefix the scan should stop immediately after the the last row that
573-
* has the specified prefix. This method calculates the closest next rowKey immediately following
574-
* the given rowKeyPrefix.</p>
575-
* <p><b>IMPORTANT: This converts a rowKey<u>Prefix</u> into a rowKey</b>.</p>
576-
* <p>If the prefix is an 'ASCII' string put into a byte[] then this is easy because you can
577-
* simply increment the last byte of the array.
578-
* But if your application uses real binary rowids you may run into the scenario that your
579-
* prefix is something like:</p>
580-
* &nbsp;&nbsp;&nbsp;<b>{ 0x12, 0x23, 0xFF, 0xFF }</b><br/>
581-
* Then this stopRow needs to be fed into the actual scan<br/>
582-
* &nbsp;&nbsp;&nbsp;<b>{ 0x12, 0x24 }</b> (Notice that it is shorter now)<br/>
583-
* This method calculates the correct stop row value for this usecase.
584-
*
585-
* @param rowKeyPrefix the rowKey<u>Prefix</u>.
586-
* @return the closest next rowKey immediately following the given rowKeyPrefix.
587-
*/
588-
private byte[] calculateTheClosestNextRowKeyForPrefix(byte[] rowKeyPrefix) {
589-
// Essentially we are treating it like an 'unsigned very very long' and doing +1 manually.
590-
// Search for the place where the trailing 0xFFs start
591-
int offset = rowKeyPrefix.length;
592-
while (offset > 0) {
593-
if (rowKeyPrefix[offset - 1] != (byte) 0xFF) {
594-
break;
595-
}
596-
offset--;
597-
}
598-
599-
if (offset == 0) {
600-
// We got an 0xFFFF... (only FFs) stopRow value which is
601-
// the last possible prefix before the end of the table.
602-
// So set it to stop at the 'end of the table'
603-
return HConstants.EMPTY_END_ROW;
604-
}
605-
606-
// Copy the right length of the original
607-
byte[] newStopRow = Arrays.copyOfRange(rowKeyPrefix, 0, offset);
608-
// And increment the last one
609-
newStopRow[newStopRow.length - 1]++;
610-
return newStopRow;
611-
}
612-
613570
/**
614571
* Get all available versions.
615572
* @return this

hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultiRowRangeFilter.java

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,13 @@
2626
import org.apache.hadoop.hbase.CellUtil;
2727
import org.apache.hadoop.hbase.HConstants;
2828
import org.apache.hadoop.hbase.PrivateCellUtil;
29-
import org.apache.yetus.audience.InterfaceAudience;
29+
import org.apache.hadoop.hbase.client.ClientUtil;
3030
import org.apache.hadoop.hbase.exceptions.DeserializationException;
31+
import org.apache.hadoop.hbase.util.Bytes;
32+
import org.apache.yetus.audience.InterfaceAudience;
3133
import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException;
3234
import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations;
3335
import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos;
34-
import org.apache.hadoop.hbase.util.Bytes;
3536

3637
/**
3738
* Filter to support scan multiple row key ranges. It can construct the row key ranges from the
@@ -71,6 +72,33 @@ public MultiRowRangeFilter(List<RowRange> list) {
7172
this.ranges = new RangeIteration(rangeList);
7273
}
7374

75+
/**
76+
* Constructor for creating a <code>MultiRowRangeFilter</code> from multiple rowkey prefixes.
77+
*
78+
* As <code>MultiRowRangeFilter</code> javadoc says (See the solution 1 of the first statement),
79+
* if you try to create a filter list that scans row keys corresponding to given prefixes (e.g.,
80+
* <code>FilterList</code> composed of multiple <code>PrefixFilter</code>s), this constructor
81+
* provides a way to avoid creating an inefficient one.
82+
*
83+
* @param rowKeyPrefixes the array of byte array
84+
*/
85+
public MultiRowRangeFilter(byte[][] rowKeyPrefixes) {
86+
this(createRangeListFromRowKeyPrefixes(rowKeyPrefixes));
87+
}
88+
89+
private static List<RowRange> createRangeListFromRowKeyPrefixes(byte[][] rowKeyPrefixes) {
90+
if (rowKeyPrefixes == null) {
91+
throw new IllegalArgumentException("Invalid rowkey prefixes");
92+
}
93+
94+
List<RowRange> list = new ArrayList<>();
95+
for (byte[] rowKeyPrefix: rowKeyPrefixes) {
96+
byte[] stopRow = ClientUtil.calculateTheClosestNextRowKeyForPrefix(rowKeyPrefix);
97+
list.add(new RowRange(rowKeyPrefix, true, stopRow, false));
98+
}
99+
return list;
100+
}
101+
74102
public List<RowRange> getRowRanges() {
75103
// Used by hbase-rest
76104
return this.rangeList;

hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestMultiRowRangeFilter.java

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,61 @@ public static void tearDownAfterClass() throws Exception {
8282
TEST_UTIL.shutdownMiniCluster();
8383
}
8484

85+
@Test
86+
public void testRowKeyPrefixWithEmptyPrefix() throws IOException {
87+
byte[] prefix = {};
88+
byte[][] rowKeyPrefixes = new byte[1][];
89+
rowKeyPrefixes[0] = prefix;
90+
MultiRowRangeFilter filter = new MultiRowRangeFilter(rowKeyPrefixes);
91+
List<RowRange> actualRanges = filter.getRowRanges();
92+
List<RowRange> expectedRanges = new ArrayList<>();
93+
expectedRanges.add(
94+
new RowRange(HConstants.EMPTY_START_ROW, true, HConstants.EMPTY_END_ROW, false)
95+
);
96+
assertRangesEqual(expectedRanges, actualRanges);
97+
}
98+
99+
@Test
100+
public void testRowKeyPrefixWithLastIncrementablePrefix() throws IOException {
101+
byte[] prefix = {(byte) 0x12, (byte) 0x23, (byte) 0xFF, (byte) 0xFE};
102+
byte[][] rowKeyPrefixes = new byte[1][];
103+
rowKeyPrefixes[0] = prefix;
104+
MultiRowRangeFilter filter = new MultiRowRangeFilter(rowKeyPrefixes);
105+
List<RowRange> actualRanges = filter.getRowRanges();
106+
List<RowRange> expectedRanges = new ArrayList<>();
107+
final byte[] expectedStop = {(byte) 0x12, (byte) 0x23, (byte) 0xFF, (byte) 0xFF};
108+
expectedRanges.add(new RowRange(prefix, true, expectedStop , false));
109+
assertRangesEqual(expectedRanges, actualRanges);
110+
}
111+
112+
@Test
113+
public void testRowKeyPrefixWithoutLastIncrementablePrefix() throws IOException {
114+
byte[] prefix = {(byte) 0x12, (byte) 0x23, (byte) 0xFF, (byte) 0xFF};
115+
byte[][] rowKeyPrefixes = new byte[1][];
116+
rowKeyPrefixes[0] = prefix;
117+
MultiRowRangeFilter filter = new MultiRowRangeFilter(rowKeyPrefixes);
118+
List<RowRange> actualRanges = filter.getRowRanges();
119+
List<RowRange> expectedRanges = new ArrayList<>();
120+
final byte[] expectedStop = {(byte) 0x12, (byte) 0x24};
121+
expectedRanges.add(new RowRange(prefix, true, expectedStop , false));
122+
assertRangesEqual(expectedRanges, actualRanges);
123+
}
124+
125+
@Test
126+
public void testRowKeyPrefixWithMergablePrefix() throws IOException {
127+
byte[] prefix1 = {(byte) 0x12, (byte) 0x23, (byte) 0xFF, (byte) 0xFE};
128+
byte[] prefix2 = {(byte) 0x12, (byte) 0x23, (byte) 0xFF, (byte) 0xFF};
129+
byte[][] rowKeyPrefixes = new byte[2][];
130+
rowKeyPrefixes[0] = prefix1;
131+
rowKeyPrefixes[1] = prefix2;
132+
MultiRowRangeFilter filter = new MultiRowRangeFilter(rowKeyPrefixes);
133+
List<RowRange> actualRanges = filter.getRowRanges();
134+
List<RowRange> expectedRanges = new ArrayList<>();
135+
final byte[] expectedStop = {(byte) 0x12, (byte) 0x24};
136+
expectedRanges.add(new RowRange(prefix1, true, expectedStop , false));
137+
assertRangesEqual(expectedRanges, actualRanges);
138+
}
139+
85140
@Test
86141
public void testRanges() throws IOException {
87142
byte[] key1Start = new byte[] {-3};

0 commit comments

Comments
 (0)