Skip to content

Commit 688a388

Browse files
committed
HBASE-22777 Add a multi-region merge (for fixing overlaps)
Makes MergeTableRegionsProcedure do more than just two regions at a time. Compatible as MTRP was done considering one day it'd do more than two at a time. Changes hardcoded assumption that merge parent regions are named mergeA and mergeB in a column on the resultant region. Instead can have N columns on the merged region, one for each parent merged. Column qualifiers all being with 'merge'. Most of code below is undoing the assumption that there are two parents on a merge only.
1 parent f213e1c commit 688a388

File tree

27 files changed

+1103
-745
lines changed

27 files changed

+1103
-745
lines changed

hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java

Lines changed: 270 additions & 186 deletions
Large diffs are not rendered by default.

hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionInfo.java

Lines changed: 34 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/**
1+
/*
22
*
33
* Licensed to the Apache Software Foundation (ASF) under one
44
* or more contributor license agreements. See the NOTICE file
@@ -70,8 +70,7 @@
7070
*/
7171
@InterfaceAudience.Public
7272
public interface RegionInfo {
73-
public static final RegionInfo UNDEFINED =
74-
RegionInfoBuilder.newBuilder(TableName.valueOf("__UNDEFINED__")).build();
73+
RegionInfo UNDEFINED = RegionInfoBuilder.newBuilder(TableName.valueOf("__UNDEFINED__")).build();
7574
/**
7675
* Separator used to demarcate the encodedName in a region name
7776
* in the new format. See description on new format above.
@@ -141,11 +140,16 @@ public interface RegionInfo {
141140
}
142141

143142
int replicaDiff = lhs.getReplicaId() - rhs.getReplicaId();
144-
if (replicaDiff != 0) return replicaDiff;
143+
if (replicaDiff != 0) {
144+
return replicaDiff;
145+
}
145146

146-
if (lhs.isOffline() == rhs.isOffline())
147+
if (lhs.isOffline() == rhs.isOffline()) {
147148
return 0;
148-
if (lhs.isOffline() == true) return -1;
149+
}
150+
if (lhs.isOffline()) {
151+
return -1;
152+
}
149153

150154
return 1;
151155
};
@@ -224,8 +228,6 @@ public interface RegionInfo {
224228
boolean isMetaRegion();
225229

226230
/**
227-
* @param rangeStartKey
228-
* @param rangeEndKey
229231
* @return true if the given inclusive range of rows is fully contained
230232
* by this region. For example, if the region is foo,a,g and this is
231233
* passed ["b","c"] or ["a","c"] it will return true, but if this is passed
@@ -235,7 +237,6 @@ public interface RegionInfo {
235237
boolean containsRange(byte[] rangeStartKey, byte[] rangeEndKey);
236238

237239
/**
238-
* @param row
239240
* @return true if the given row falls in this region.
240241
*/
241242
boolean containsRow(byte[] row);
@@ -339,9 +340,7 @@ static TableName getTable(final byte [] regionName) {
339340

340341
/**
341342
* Gets the start key from the specified region name.
342-
* @param regionName
343343
* @return Start key.
344-
* @throws java.io.IOException
345344
*/
346345
static byte[] getStartKey(final byte[] regionName) throws IOException {
347346
return parseRegionName(regionName)[1];
@@ -362,7 +361,6 @@ static boolean isEncodedRegionName(byte[] regionName) throws IOException {
362361
}
363362

364363
/**
365-
* @param bytes
366364
* @return A deserialized {@link RegionInfo}
367365
* or null if we failed deserialize or passed bytes null
368366
*/
@@ -373,9 +371,6 @@ static RegionInfo parseFromOrNull(final byte [] bytes) {
373371
}
374372

375373
/**
376-
* @param bytes
377-
* @param offset
378-
* @param len
379374
* @return A deserialized {@link RegionInfo} or null
380375
* if we failed deserialize or passed bytes null
381376
*/
@@ -392,7 +387,6 @@ static RegionInfo parseFromOrNull(final byte [] bytes, int offset, int len) {
392387
/**
393388
* @param bytes A pb RegionInfo serialized with a pb magic prefix.
394389
* @return A deserialized {@link RegionInfo}
395-
* @throws DeserializationException
396390
*/
397391
@InterfaceAudience.Private
398392
static RegionInfo parseFrom(final byte [] bytes) throws DeserializationException {
@@ -405,7 +399,6 @@ static RegionInfo parseFrom(final byte [] bytes) throws DeserializationException
405399
* @param offset starting point in the byte array
406400
* @param len length to read on the byte array
407401
* @return A deserialized {@link RegionInfo}
408-
* @throws DeserializationException
409402
*/
410403
@InterfaceAudience.Private
411404
static RegionInfo parseFrom(final byte [] bytes, int offset, int len)
@@ -426,30 +419,28 @@ static RegionInfo parseFrom(final byte [] bytes, int offset, int len)
426419
}
427420

428421
/**
429-
* Check whether two regions are adjacent
430-
* @param regionA
431-
* @param regionB
422+
* Check whether two regions are adjacent; i.e. lies just before or just
423+
* after in a table.
432424
* @return true if two regions are adjacent
433425
*/
434426
static boolean areAdjacent(RegionInfo regionA, RegionInfo regionB) {
435427
if (regionA == null || regionB == null) {
436428
throw new IllegalArgumentException(
437429
"Can't check whether adjacent for null region");
438430
}
431+
if (!regionA.getTable().equals(regionB.getTable())) {
432+
return false;
433+
}
439434
RegionInfo a = regionA;
440435
RegionInfo b = regionB;
441436
if (Bytes.compareTo(a.getStartKey(), b.getStartKey()) > 0) {
442437
a = regionB;
443438
b = regionA;
444439
}
445-
if (Bytes.compareTo(a.getEndKey(), b.getStartKey()) == 0) {
446-
return true;
447-
}
448-
return false;
440+
return Bytes.equals(a.getEndKey(), b.getStartKey());
449441
}
450442

451443
/**
452-
* @param ri
453444
* @return This instance serialized as protobuf w/ a magic pb prefix.
454445
* @see #parseFrom(byte[])
455446
*/
@@ -473,7 +464,6 @@ static String prettyPrint(final String encodedRegionName) {
473464

474465
/**
475466
* Make a region name of passed parameters.
476-
* @param tableName
477467
* @param startKey Can be null
478468
* @param regionid Region id (Usually timestamp from when region was created).
479469
* @param newFormat should we create the region name in the new format
@@ -487,7 +477,6 @@ static String prettyPrint(final String encodedRegionName) {
487477

488478
/**
489479
* Make a region name of passed parameters.
490-
* @param tableName
491480
* @param startKey Can be null
492481
* @param id Region id (Usually timestamp from when region was created).
493482
* @param newFormat should we create the region name in the new format
@@ -501,10 +490,8 @@ static String prettyPrint(final String encodedRegionName) {
501490

502491
/**
503492
* Make a region name of passed parameters.
504-
* @param tableName
505493
* @param startKey Can be null
506494
* @param regionid Region id (Usually timestamp from when region was created).
507-
* @param replicaId
508495
* @param newFormat should we create the region name in the new format
509496
* (such that it contains its encoded name?).
510497
* @return Region name made of passed tableName, startKey, id and replicaId
@@ -517,7 +504,6 @@ static String prettyPrint(final String encodedRegionName) {
517504

518505
/**
519506
* Make a region name of passed parameters.
520-
* @param tableName
521507
* @param startKey Can be null
522508
* @param id Region id (Usually timestamp from when region was created).
523509
* @param newFormat should we create the region name in the new format
@@ -531,10 +517,8 @@ static String prettyPrint(final String encodedRegionName) {
531517

532518
/**
533519
* Make a region name of passed parameters.
534-
* @param tableName
535520
* @param startKey Can be null
536521
* @param id Region id (Usually timestamp from when region was created).
537-
* @param replicaId
538522
* @param newFormat should we create the region name in the new format
539523
* @return Region name made of passed tableName, startKey, id and replicaId
540524
*/
@@ -593,7 +577,7 @@ static String prettyPrint(final String encodedRegionName) {
593577
b[offset++] = ENC_SEPARATOR;
594578
System.arraycopy(md5HashBytes, 0, b, offset, MD5_HEX_LENGTH);
595579
offset += MD5_HEX_LENGTH;
596-
b[offset++] = ENC_SEPARATOR;
580+
b[offset] = ENC_SEPARATOR;
597581
}
598582

599583
return b;
@@ -612,9 +596,7 @@ static RegionInfo createMobRegionInfo(TableName tableName) {
612596

613597
/**
614598
* Separate elements of a regionName.
615-
* @param regionName
616599
* @return Array of byte[] containing tableName, startKey and id
617-
* @throws IOException
618600
*/
619601
static byte [][] parseRegionName(final byte[] regionName)
620602
throws IOException {
@@ -693,7 +675,6 @@ static RegionInfo createMobRegionInfo(TableName tableName) {
693675
* be used to read back the instances.
694676
* @param infos RegionInfo objects to serialize
695677
* @return This instance serialized as a delimited protobuf w/ a magic pb prefix.
696-
* @throws IOException
697678
*/
698679
static byte[] toDelimitedByteArray(RegionInfo... infos) throws IOException {
699680
byte[][] bytes = new byte[infos.length][];
@@ -715,9 +696,7 @@ static byte[] toDelimitedByteArray(RegionInfo... infos) throws IOException {
715696
/**
716697
* Use this instead of {@link RegionInfo#toByteArray(RegionInfo)} when writing to a stream and you want to use
717698
* the pb mergeDelimitedFrom (w/o the delimiter, pb reads to EOF which may not be what you want).
718-
* @param ri
719699
* @return This instance serialized as a delimied protobuf w/ a magic pb prefix.
720-
* @throws IOException
721700
*/
722701
static byte [] toDelimitedByteArray(RegionInfo ri) throws IOException {
723702
return ProtobufUtil.toDelimitedByteArray(ProtobufUtil.toRegionInfo(ri));
@@ -727,9 +706,7 @@ static byte[] toDelimitedByteArray(RegionInfo... infos) throws IOException {
727706
* Parses an RegionInfo instance from the passed in stream.
728707
* Presumes the RegionInfo was serialized to the stream with
729708
* {@link #toDelimitedByteArray(RegionInfo)}.
730-
* @param in
731709
* @return An instance of RegionInfo.
732-
* @throws IOException
733710
*/
734711
static RegionInfo parseFrom(final DataInputStream in) throws IOException {
735712
// I need to be able to move back in the stream if this is not a pb
@@ -757,28 +734,23 @@ static RegionInfo parseFrom(final DataInputStream in) throws IOException {
757734
* @param offset the start offset into the byte[] buffer
758735
* @param length how far we should read into the byte[] buffer
759736
* @return All the RegionInfos that are in the byte array. Keeps reading till we hit the end.
760-
* @throws IOException
761737
*/
762738
static List<RegionInfo> parseDelimitedFrom(final byte[] bytes, final int offset,
763739
final int length) throws IOException {
764740
if (bytes == null) {
765741
throw new IllegalArgumentException("Can't build an object with empty bytes array");
766742
}
767-
DataInputBuffer in = new DataInputBuffer();
768743
List<RegionInfo> ris = new ArrayList<>();
769-
try {
744+
try (DataInputBuffer in = new DataInputBuffer()) {
770745
in.reset(bytes, offset, length);
771746
while (in.available() > 0) {
772747
RegionInfo ri = parseFrom(in);
773748
ris.add(ri);
774749
}
775-
} finally {
776-
in.close();
777750
}
778751
return ris;
779752
}
780753

781-
782754
/**
783755
* @return True if this is first Region in Table
784756
*/
@@ -794,10 +766,20 @@ default boolean isLast() {
794766
}
795767

796768
/**
797-
* @return True if regions are adjacent, if 'after' next. Does not do tablename compare.
769+
* @return True if region is next, adjacent but 'after' this one.
770+
* @see #isAdjacent(RegionInfo)
771+
* @see #areAdjacent(RegionInfo, RegionInfo)
798772
*/
799773
default boolean isNext(RegionInfo after) {
800-
return Bytes.equals(getEndKey(), after.getStartKey());
774+
return getTable().equals(after.getTable()) && Bytes.equals(getEndKey(), after.getStartKey());
775+
}
776+
777+
/**
778+
* @return True if region is adjacent, either just before or just after this one.
779+
* @see #isNext(RegionInfo)
780+
*/
781+
default boolean isAdjacent(RegionInfo other) {
782+
return getTable().equals(other.getTable()) && areAdjacent(this, other);
801783
}
802784

803785
/**
@@ -808,11 +790,13 @@ default boolean isDegenerate() {
808790
}
809791

810792
/**
811-
* @return True if an overlap in region range. Does not do tablename compare.
812-
* Does not check if <code>other</code> has degenerate range.
793+
* @return True if an overlap in region range.
813794
* @see #isDegenerate()
814795
*/
815796
default boolean isOverlap(RegionInfo other) {
797+
if (!getTable().equals(other.getTable())) {
798+
return false;
799+
}
816800
int startKeyCompare = Bytes.compareTo(getStartKey(), other.getStartKey());
817801
if (startKeyCompare == 0) {
818802
return true;

hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -499,11 +499,31 @@ public enum OperationStatusCode {
499499
/** The upper-half split region column qualifier */
500500
public static final byte [] SPLITB_QUALIFIER = Bytes.toBytes("splitB");
501501

502-
/** The lower-half merge region column qualifier */
503-
public static final byte[] MERGEA_QUALIFIER = Bytes.toBytes("mergeA");
502+
/**
503+
* Merge qualifier prefix.
504+
* We used to only allow two regions merge; mergeA and mergeB.
505+
* Now we allow many to merge. Each region to merge will be referenced
506+
* in a column whose qualifier starts with this define.
507+
*/
508+
public static final String MERGE_QUALIFIER_PREFIX_STR = "merge";
509+
public static final byte [] MERGE_QUALIFIER_PREFIX =
510+
Bytes.toBytes(MERGE_QUALIFIER_PREFIX_STR);
504511

505-
/** The upper-half merge region column qualifier */
506-
public static final byte[] MERGEB_QUALIFIER = Bytes.toBytes("mergeB");
512+
/**
513+
* The lower-half merge region column qualifier
514+
* @deprecated Since 2.3.0 and 2.2.1. Not used anymore. Instead we look for
515+
* the {@link #MERGE_QUALIFIER_PREFIX_STR} prefix.
516+
*/
517+
@Deprecated
518+
public static final byte[] MERGEA_QUALIFIER = Bytes.toBytes(MERGE_QUALIFIER_PREFIX_STR + "A");
519+
520+
/**
521+
* The upper-half merge region column qualifier
522+
* @deprecated Since 2.3.0 and 2.2.1. Not used anymore. Instead we look for
523+
* the {@link #MERGE_QUALIFIER_PREFIX_STR} prefix.
524+
*/
525+
@Deprecated
526+
public static final byte[] MERGEB_QUALIFIER = Bytes.toBytes(MERGE_QUALIFIER_PREFIX_STR + "B");
507527

508528
/** The catalog family as a string*/
509529
public static final String TABLE_FAMILY_STR = "table";

hbase-common/src/main/java/org/apache/hadoop/hbase/PrivateCellUtil.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -758,6 +758,25 @@ public static boolean matchingQualifier(final Cell left, final byte[] buf, final
758758
left.getQualifierLength(), buf, offset, length);
759759
}
760760

761+
/**
762+
* Finds if the start of the qualifier part of the Cell matches <code>buf</code>
763+
* @param left the cell with which we need to match the qualifier
764+
* @param startsWith the serialized keyvalue format byte[]
765+
* @return true if the qualifier have same staring characters, false otherwise
766+
*/
767+
public static boolean qualifierStartsWith(final Cell left, final byte[] startsWith) {
768+
if (startsWith == null || startsWith.length == 0) {
769+
throw new IllegalArgumentException("Cannot pass an empty startsWith");
770+
}
771+
if (left instanceof ByteBufferExtendedCell) {
772+
return ByteBufferUtils.equals(((ByteBufferExtendedCell) left).getQualifierByteBuffer(),
773+
((ByteBufferExtendedCell) left).getQualifierPosition(), startsWith.length,
774+
startsWith, 0, startsWith.length);
775+
}
776+
return Bytes.equals(left.getQualifierArray(), left.getQualifierOffset(),
777+
startsWith.length, startsWith, 0, startsWith.length);
778+
}
779+
761780
public static boolean matchingColumn(final Cell left, final byte[] fam, final int foffset,
762781
final int flength, final byte[] qual, final int qoffset, final int qlength) {
763782
if (!matchingFamily(left, fam, foffset, flength)) {

0 commit comments

Comments
 (0)