Skip to content

HBASE-22833 MultiRowRangeFilter should provide a method for creating… #493

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Aug 16, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@
*/
package org.apache.hadoop.hbase.client;

import org.apache.yetus.audience.InterfaceAudience;
import java.util.Arrays;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.yetus.audience.InterfaceAudience;

@InterfaceAudience.Private
public class ClientUtil {
Expand All @@ -31,4 +33,46 @@ public static boolean areScanStartRowAndStopRowEqual(byte[] startRow, byte[] sto
public static Cursor createCursor(byte[] row) {
return new Cursor(row);
}

/**
* <p>When scanning for a prefix the scan should stop immediately after the the last row that
* has the specified prefix. This method calculates the closest next rowKey immediately following
* the given rowKeyPrefix.</p>
* <p><b>IMPORTANT: This converts a rowKey<u>Prefix</u> into a rowKey</b>.</p>
* <p>If the prefix is an 'ASCII' string put into a byte[] then this is easy because you can
* simply increment the last byte of the array.
* But if your application uses real binary rowids you may run into the scenario that your
* prefix is something like:</p>
* &nbsp;&nbsp;&nbsp;<b>{ 0x12, 0x23, 0xFF, 0xFF }</b><br/>
* Then this stopRow needs to be fed into the actual scan<br/>
* &nbsp;&nbsp;&nbsp;<b>{ 0x12, 0x24 }</b> (Notice that it is shorter now)<br/>
* This method calculates the correct stop row value for this usecase.
*
* @param rowKeyPrefix the rowKey<u>Prefix</u>.
* @return the closest next rowKey immediately following the given rowKeyPrefix.
*/
public static byte[] calculateTheClosestNextRowKeyForPrefix(byte[] rowKeyPrefix) {
// Essentially we are treating it like an 'unsigned very very long' and doing +1 manually.
// Search for the place where the trailing 0xFFs start
int offset = rowKeyPrefix.length;
while (offset > 0) {
if (rowKeyPrefix[offset - 1] != (byte) 0xFF) {
break;
}
offset--;
}

if (offset == 0) {
// We got an 0xFFFF... (only FFs) stopRow value which is
// the last possible prefix before the end of the table.
// So set it to stop at the 'end of the table'
return HConstants.EMPTY_END_ROW;
}

// Copy the right length of the original
byte[] newStopRow = Arrays.copyOfRange(rowKeyPrefix, 0, offset);
// And increment the last one
newStopRow[newStopRow.length - 1]++;
return newStopRow;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -549,53 +548,11 @@ public Scan setRowPrefixFilter(byte[] rowPrefix) {
setStopRow(HConstants.EMPTY_END_ROW);
} else {
this.setStartRow(rowPrefix);
this.setStopRow(calculateTheClosestNextRowKeyForPrefix(rowPrefix));
this.setStopRow(ClientUtil.calculateTheClosestNextRowKeyForPrefix(rowPrefix));
}
return this;
}

/**
* <p>When scanning for a prefix the scan should stop immediately after the the last row that
* has the specified prefix. This method calculates the closest next rowKey immediately following
* the given rowKeyPrefix.</p>
* <p><b>IMPORTANT: This converts a rowKey<u>Prefix</u> into a rowKey</b>.</p>
* <p>If the prefix is an 'ASCII' string put into a byte[] then this is easy because you can
* simply increment the last byte of the array.
* But if your application uses real binary rowids you may run into the scenario that your
* prefix is something like:</p>
* &nbsp;&nbsp;&nbsp;<b>{ 0x12, 0x23, 0xFF, 0xFF }</b><br/>
* Then this stopRow needs to be fed into the actual scan<br/>
* &nbsp;&nbsp;&nbsp;<b>{ 0x12, 0x24 }</b> (Notice that it is shorter now)<br/>
* This method calculates the correct stop row value for this usecase.
*
* @param rowKeyPrefix the rowKey<u>Prefix</u>.
* @return the closest next rowKey immediately following the given rowKeyPrefix.
*/
private byte[] calculateTheClosestNextRowKeyForPrefix(byte[] rowKeyPrefix) {
// Essentially we are treating it like an 'unsigned very very long' and doing +1 manually.
// Search for the place where the trailing 0xFFs start
int offset = rowKeyPrefix.length;
while (offset > 0) {
if (rowKeyPrefix[offset - 1] != (byte) 0xFF) {
break;
}
offset--;
}

if (offset == 0) {
// We got an 0xFFFF... (only FFs) stopRow value which is
// the last possible prefix before the end of the table.
// So set it to stop at the 'end of the table'
return HConstants.EMPTY_END_ROW;
}

// Copy the right length of the original
byte[] newStopRow = Arrays.copyOfRange(rowKeyPrefix, 0, offset);
// And increment the last one
newStopRow[newStopRow.length - 1]++;
return newStopRow;
}

/**
* Get all available versions.
* @return this
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,13 @@
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.PrivateCellUtil;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.client.ClientUtil;
import org.apache.hadoop.hbase.exceptions.DeserializationException;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException;
import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations;
import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos;
import org.apache.hadoop.hbase.util.Bytes;

/**
* Filter to support scan multiple row key ranges. It can construct the row key ranges from the
Expand Down Expand Up @@ -71,6 +72,33 @@ public MultiRowRangeFilter(List<RowRange> list) {
this.ranges = new RangeIteration(rangeList);
}

/**
* Constructor for creating a <code>MultiRowRangeFilter</code> from multiple rowkey prefixes.
*
* As <code>MultiRowRangeFilter</code> javadoc says (See the solution 1 of the first statement),
* if you try to create a filter list that scans row keys corresponding to given prefixes (e.g.,
* <code>FilterList</code> composed of multiple <code>PrefixFilter</code>s), this constructor
* provides a way to avoid creating an inefficient one.
*
* @param rowKeyPrefixes the array of byte array
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need a careful javadoc here, say why we need the public method ... because exposing an MultiRowRangeFilter constructor with rowKeyPrefixes looks very strange if no doc here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I added explanation for this constructor.

*/
public MultiRowRangeFilter(byte[][] rowKeyPrefixes) {
this(createRangeListFromRowKeyPrefixes(rowKeyPrefixes));
}

private static List<RowRange> createRangeListFromRowKeyPrefixes(byte[][] rowKeyPrefixes) {
if (rowKeyPrefixes == null) {
throw new IllegalArgumentException("Invalid rowkey prefixes");
}

List<RowRange> list = new ArrayList<>();
for (byte[] rowKeyPrefix: rowKeyPrefixes) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need some arguments check here ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I added null check.

byte[] stopRow = ClientUtil.calculateTheClosestNextRowKeyForPrefix(rowKeyPrefix);
list.add(new RowRange(rowKeyPrefix, true, stopRow, false));
}
return list;
}

public List<RowRange> getRowRanges() {
// Used by hbase-rest
return this.rangeList;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,61 @@ public static void tearDownAfterClass() throws Exception {
TEST_UTIL.shutdownMiniCluster();
}

@Test
public void testRowKeyPrefixWithEmptyPrefix() throws IOException {
byte[] prefix = {};
byte[][] rowKeyPrefixes = new byte[1][];
rowKeyPrefixes[0] = prefix;
MultiRowRangeFilter filter = new MultiRowRangeFilter(rowKeyPrefixes);
List<RowRange> actualRanges = filter.getRowRanges();
List<RowRange> expectedRanges = new ArrayList<>();
expectedRanges.add(
new RowRange(HConstants.EMPTY_START_ROW, true, HConstants.EMPTY_END_ROW, false)
);
assertRangesEqual(expectedRanges, actualRanges);
}

@Test
public void testRowKeyPrefixWithLastIncrementablePrefix() throws IOException {
byte[] prefix = {(byte) 0x12, (byte) 0x23, (byte) 0xFF, (byte) 0xFE};
byte[][] rowKeyPrefixes = new byte[1][];
rowKeyPrefixes[0] = prefix;
MultiRowRangeFilter filter = new MultiRowRangeFilter(rowKeyPrefixes);
List<RowRange> actualRanges = filter.getRowRanges();
List<RowRange> expectedRanges = new ArrayList<>();
final byte[] expectedStop = {(byte) 0x12, (byte) 0x23, (byte) 0xFF, (byte) 0xFF};
expectedRanges.add(new RowRange(prefix, true, expectedStop , false));
assertRangesEqual(expectedRanges, actualRanges);
}

@Test
public void testRowKeyPrefixWithoutLastIncrementablePrefix() throws IOException {
byte[] prefix = {(byte) 0x12, (byte) 0x23, (byte) 0xFF, (byte) 0xFF};
byte[][] rowKeyPrefixes = new byte[1][];
rowKeyPrefixes[0] = prefix;
MultiRowRangeFilter filter = new MultiRowRangeFilter(rowKeyPrefixes);
List<RowRange> actualRanges = filter.getRowRanges();
List<RowRange> expectedRanges = new ArrayList<>();
final byte[] expectedStop = {(byte) 0x12, (byte) 0x24};
expectedRanges.add(new RowRange(prefix, true, expectedStop , false));
assertRangesEqual(expectedRanges, actualRanges);
}

@Test
public void testRowKeyPrefixWithMergablePrefix() throws IOException {
byte[] prefix1 = {(byte) 0x12, (byte) 0x23, (byte) 0xFF, (byte) 0xFE};
byte[] prefix2 = {(byte) 0x12, (byte) 0x23, (byte) 0xFF, (byte) 0xFF};
byte[][] rowKeyPrefixes = new byte[2][];
rowKeyPrefixes[0] = prefix1;
rowKeyPrefixes[1] = prefix2;
MultiRowRangeFilter filter = new MultiRowRangeFilter(rowKeyPrefixes);
List<RowRange> actualRanges = filter.getRowRanges();
List<RowRange> expectedRanges = new ArrayList<>();
final byte[] expectedStop = {(byte) 0x12, (byte) 0x24};
expectedRanges.add(new RowRange(prefix1, true, expectedStop , false));
assertRangesEqual(expectedRanges, actualRanges);
}

@Test
public void testRanges() throws IOException {
byte[] key1Start = new byte[] {-3};
Expand Down