Skip to content

Commit d2793f1

Browse files
titsukiopeninx
authored andcommitted
HBASE-22833 MultiRowRangeFilter should provide a method for creating… (#493)
* HBASE-22833: MultiRowRangeFilter should provide a method for creating a filter which is functionally equivalent to multiple prefix filters * Delete superfluous comments * Add description for MultiRowRangeFilter constructor * Add null check for rowKeyPrefixes * Fix checkstyle Signed-off-by: huzheng <openinx@gmail.com>
1 parent 4b34d24 commit d2793f1

File tree

4 files changed

+129
-44
lines changed

4 files changed

+129
-44
lines changed

hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientUtil.java

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
*/
1818
package org.apache.hadoop.hbase.client;
1919

20+
import java.util.Arrays;
21+
import org.apache.hadoop.hbase.HConstants;
2022
import org.apache.hadoop.hbase.classification.InterfaceAudience;
2123
import org.apache.hadoop.hbase.util.Bytes;
2224

@@ -31,4 +33,46 @@ public static boolean areScanStartRowAndStopRowEqual(byte[] startRow, byte[] sto
3133
public static Cursor createCursor(byte[] row) {
3234
return new Cursor(row);
3335
}
36+
37+
/**
38+
* <p>When scanning for a prefix the scan should stop immediately after the the last row that
39+
* has the specified prefix. This method calculates the closest next rowKey immediately following
40+
* the given rowKeyPrefix.</p>
41+
* <p><b>IMPORTANT: This converts a rowKey<u>Prefix</u> into a rowKey</b>.</p>
42+
* <p>If the prefix is an 'ASCII' string put into a byte[] then this is easy because you can
43+
* simply increment the last byte of the array.
44+
* But if your application uses real binary rowids you may run into the scenario that your
45+
* prefix is something like:</p>
46+
* &nbsp;&nbsp;&nbsp;<b>{ 0x12, 0x23, 0xFF, 0xFF }</b><br/>
47+
* Then this stopRow needs to be fed into the actual scan<br/>
48+
* &nbsp;&nbsp;&nbsp;<b>{ 0x12, 0x24 }</b> (Notice that it is shorter now)<br/>
49+
* This method calculates the correct stop row value for this usecase.
50+
*
51+
* @param rowKeyPrefix the rowKey<u>Prefix</u>.
52+
* @return the closest next rowKey immediately following the given rowKeyPrefix.
53+
*/
54+
public static byte[] calculateTheClosestNextRowKeyForPrefix(byte[] rowKeyPrefix) {
55+
// Essentially we are treating it like an 'unsigned very very long' and doing +1 manually.
56+
// Search for the place where the trailing 0xFFs start
57+
int offset = rowKeyPrefix.length;
58+
while (offset > 0) {
59+
if (rowKeyPrefix[offset - 1] != (byte) 0xFF) {
60+
break;
61+
}
62+
offset--;
63+
}
64+
65+
if (offset == 0) {
66+
// We got an 0xFFFF... (only FFs) stopRow value which is
67+
// the last possible prefix before the end of the table.
68+
// So set it to stop at the 'end of the table'
69+
return HConstants.EMPTY_END_ROW;
70+
}
71+
72+
// Copy the right length of the original
73+
byte[] newStopRow = Arrays.copyOfRange(rowKeyPrefix, 0, offset);
74+
// And increment the last one
75+
newStopRow[newStopRow.length - 1]++;
76+
return newStopRow;
77+
}
3478
}

hbase-client/src/main/java/org/apache/hadoop/hbase/client/Scan.java

Lines changed: 1 addition & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121

2222
import java.io.IOException;
2323
import java.util.ArrayList;
24-
import java.util.Arrays;
2524
import java.util.HashMap;
2625
import java.util.List;
2726
import java.util.Map;
@@ -530,53 +529,11 @@ public Scan setRowPrefixFilter(byte[] rowPrefix) {
530529
setStopRow(HConstants.EMPTY_END_ROW);
531530
} else {
532531
this.setStartRow(rowPrefix);
533-
this.setStopRow(calculateTheClosestNextRowKeyForPrefix(rowPrefix));
532+
this.setStopRow(ClientUtil.calculateTheClosestNextRowKeyForPrefix(rowPrefix));
534533
}
535534
return this;
536535
}
537536

538-
/**
539-
* <p>When scanning for a prefix the scan should stop immediately after the the last row that
540-
* has the specified prefix. This method calculates the closest next rowKey immediately following
541-
* the given rowKeyPrefix.</p>
542-
* <p><b>IMPORTANT: This converts a rowKey<u>Prefix</u> into a rowKey</b>.</p>
543-
* <p>If the prefix is an 'ASCII' string put into a byte[] then this is easy because you can
544-
* simply increment the last byte of the array.
545-
* But if your application uses real binary rowids you may run into the scenario that your
546-
* prefix is something like:</p>
547-
* &nbsp;&nbsp;&nbsp;<b>{ 0x12, 0x23, 0xFF, 0xFF }</b><br/>
548-
* Then this stopRow needs to be fed into the actual scan<br/>
549-
* &nbsp;&nbsp;&nbsp;<b>{ 0x12, 0x24 }</b> (Notice that it is shorter now)<br/>
550-
* This method calculates the correct stop row value for this usecase.
551-
*
552-
* @param rowKeyPrefix the rowKey<u>Prefix</u>.
553-
* @return the closest next rowKey immediately following the given rowKeyPrefix.
554-
*/
555-
private byte[] calculateTheClosestNextRowKeyForPrefix(byte[] rowKeyPrefix) {
556-
// Essentially we are treating it like an 'unsigned very very long' and doing +1 manually.
557-
// Search for the place where the trailing 0xFFs start
558-
int offset = rowKeyPrefix.length;
559-
while (offset > 0) {
560-
if (rowKeyPrefix[offset - 1] != (byte) 0xFF) {
561-
break;
562-
}
563-
offset--;
564-
}
565-
566-
if (offset == 0) {
567-
// We got an 0xFFFF... (only FFs) stopRow value which is
568-
// the last possible prefix before the end of the table.
569-
// So set it to stop at the 'end of the table'
570-
return HConstants.EMPTY_END_ROW;
571-
}
572-
573-
// Copy the right length of the original
574-
byte[] newStopRow = Arrays.copyOfRange(rowKeyPrefix, 0, offset);
575-
// And increment the last one
576-
newStopRow[newStopRow.length - 1]++;
577-
return newStopRow;
578-
}
579-
580537
/**
581538
* Get all available versions.
582539
* @return this

hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultiRowRangeFilter.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.apache.hadoop.hbase.KeyValueUtil;
2929
import org.apache.hadoop.hbase.classification.InterfaceAudience;
3030
import org.apache.hadoop.hbase.classification.InterfaceStability;
31+
import org.apache.hadoop.hbase.client.ClientUtil;
3132
import org.apache.hadoop.hbase.exceptions.DeserializationException;
3233
import org.apache.hadoop.hbase.protobuf.generated.FilterProtos;
3334
import org.apache.hadoop.hbase.util.ByteStringer;
@@ -77,6 +78,33 @@ public MultiRowRangeFilter(List<RowRange> list) throws IOException {
7778
this.ranges = new RangeIteration(rangeList);
7879
}
7980

81+
/**
82+
* Constructor for creating a <code>MultiRowRangeFilter</code> from multiple rowkey prefixes.
83+
*
84+
* As <code>MultiRowRangeFilter</code> javadoc says (See the solution 1 of the first statement),
85+
* if you try to create a filter list that scans row keys corresponding to given prefixes (e.g.,
86+
* <code>FilterList</code> composed of multiple <code>PrefixFilter</code>s), this constructor
87+
* provides a way to avoid creating an inefficient one.
88+
*
89+
* @param rowKeyPrefixes the array of byte array
90+
*/
91+
public MultiRowRangeFilter(byte[][] rowKeyPrefixes) throws IOException {
92+
this(createRangeListFromRowKeyPrefixes(rowKeyPrefixes));
93+
}
94+
95+
private static List<RowRange> createRangeListFromRowKeyPrefixes(byte[][] rowKeyPrefixes) {
96+
if (rowKeyPrefixes == null) {
97+
throw new IllegalArgumentException("Invalid rowkey prefixes");
98+
}
99+
100+
List<RowRange> list = new ArrayList<>();
101+
for (byte[] rowKeyPrefix: rowKeyPrefixes) {
102+
byte[] stopRow = ClientUtil.calculateTheClosestNextRowKeyForPrefix(rowKeyPrefix);
103+
list.add(new RowRange(rowKeyPrefix, true, stopRow, false));
104+
}
105+
return list;
106+
}
107+
80108
public List<RowRange> getRowRanges() {
81109
// Used by hbase-rest
82110
return this.rangeList;

hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestMultiRowRangeFilter.java

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.apache.commons.logging.LogFactory;
2929
import org.apache.hadoop.hbase.Cell;
3030
import org.apache.hadoop.hbase.HBaseTestingUtility;
31+
import org.apache.hadoop.hbase.HConstants;
3132
import org.apache.hadoop.hbase.client.HTable;
3233
import org.apache.hadoop.hbase.client.Put;
3334
import org.apache.hadoop.hbase.client.Result;
@@ -69,6 +70,61 @@ public static void tearDownAfterClass() throws Exception {
6970
TEST_UTIL.shutdownMiniCluster();
7071
}
7172

73+
@Test
74+
public void testRowKeyPrefixWithEmptyPrefix() throws IOException {
75+
byte[] prefix = {};
76+
byte[][] rowKeyPrefixes = new byte[1][];
77+
rowKeyPrefixes[0] = prefix;
78+
MultiRowRangeFilter filter = new MultiRowRangeFilter(rowKeyPrefixes);
79+
List<RowRange> actualRanges = filter.getRowRanges();
80+
List<RowRange> expectedRanges = new ArrayList<>();
81+
expectedRanges.add(
82+
new RowRange(HConstants.EMPTY_START_ROW, true, HConstants.EMPTY_END_ROW, false)
83+
);
84+
assertRangesEqual(expectedRanges, actualRanges);
85+
}
86+
87+
@Test
88+
public void testRowKeyPrefixWithLastIncrementablePrefix() throws IOException {
89+
byte[] prefix = {(byte) 0x12, (byte) 0x23, (byte) 0xFF, (byte) 0xFE};
90+
byte[][] rowKeyPrefixes = new byte[1][];
91+
rowKeyPrefixes[0] = prefix;
92+
MultiRowRangeFilter filter = new MultiRowRangeFilter(rowKeyPrefixes);
93+
List<RowRange> actualRanges = filter.getRowRanges();
94+
List<RowRange> expectedRanges = new ArrayList<>();
95+
final byte[] expectedStop = {(byte) 0x12, (byte) 0x23, (byte) 0xFF, (byte) 0xFF};
96+
expectedRanges.add(new RowRange(prefix, true, expectedStop , false));
97+
assertRangesEqual(expectedRanges, actualRanges);
98+
}
99+
100+
@Test
101+
public void testRowKeyPrefixWithoutLastIncrementablePrefix() throws IOException {
102+
byte[] prefix = {(byte) 0x12, (byte) 0x23, (byte) 0xFF, (byte) 0xFF};
103+
byte[][] rowKeyPrefixes = new byte[1][];
104+
rowKeyPrefixes[0] = prefix;
105+
MultiRowRangeFilter filter = new MultiRowRangeFilter(rowKeyPrefixes);
106+
List<RowRange> actualRanges = filter.getRowRanges();
107+
List<RowRange> expectedRanges = new ArrayList<>();
108+
final byte[] expectedStop = {(byte) 0x12, (byte) 0x24};
109+
expectedRanges.add(new RowRange(prefix, true, expectedStop , false));
110+
assertRangesEqual(expectedRanges, actualRanges);
111+
}
112+
113+
@Test
114+
public void testRowKeyPrefixWithMergablePrefix() throws IOException {
115+
byte[] prefix1 = {(byte) 0x12, (byte) 0x23, (byte) 0xFF, (byte) 0xFE};
116+
byte[] prefix2 = {(byte) 0x12, (byte) 0x23, (byte) 0xFF, (byte) 0xFF};
117+
byte[][] rowKeyPrefixes = new byte[2][];
118+
rowKeyPrefixes[0] = prefix1;
119+
rowKeyPrefixes[1] = prefix2;
120+
MultiRowRangeFilter filter = new MultiRowRangeFilter(rowKeyPrefixes);
121+
List<RowRange> actualRanges = filter.getRowRanges();
122+
List<RowRange> expectedRanges = new ArrayList<>();
123+
final byte[] expectedStop = {(byte) 0x12, (byte) 0x24};
124+
expectedRanges.add(new RowRange(prefix1, true, expectedStop , false));
125+
assertRangesEqual(expectedRanges, actualRanges);
126+
}
127+
72128
@Test
73129
public void testRanges() throws IOException {
74130
byte[] key1Start = new byte[] {-3};

0 commit comments

Comments
 (0)