From 71e8db15fb9d31c751221b03b469da6b3e01dc99 Mon Sep 17 00:00:00 2001 From: Todd Lipcon Date: Tue, 13 Sep 2011 02:39:41 +0000 Subject: [PATCH] HBASE-4321. Add a more comprehensive region split calculator for future use in hbck. git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1170013 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 2 + .../apache/hadoop/hbase/util/KeyRange.java | 29 ++ .../hbase/util/RegionSplitCalculator.java | 162 ++++++++ .../hbase/util/TestRegionSplitCalculator.java | 350 ++++++++++++++++++ 4 files changed, 543 insertions(+) create mode 100644 src/main/java/org/apache/hadoop/hbase/util/KeyRange.java create mode 100644 src/main/java/org/apache/hadoop/hbase/util/RegionSplitCalculator.java create mode 100644 src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitCalculator.java diff --git a/CHANGES.txt b/CHANGES.txt index 8dacddc43777..378896a8bb58 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -584,6 +584,8 @@ Release 0.90.5 - Unreleased easier (Jonathan Hsieh) HBASE-4272. Add -metaonly flag to hbck feature to only inspect and try to repair META and ROOT. (todd) + HBASE-4321. Add a more comprehensive region split calculator for future use + in hbck. (Jonathan Hsieh) Release 0.90.4 - August 10, 2011 diff --git a/src/main/java/org/apache/hadoop/hbase/util/KeyRange.java b/src/main/java/org/apache/hadoop/hbase/util/KeyRange.java new file mode 100644 index 000000000000..958edef8d5a4 --- /dev/null +++ b/src/main/java/org/apache/hadoop/hbase/util/KeyRange.java @@ -0,0 +1,29 @@ +/** + * Copyright 2011 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +/** + * A key range use in split coverage. + */ +public interface KeyRange { + abstract byte[] getStartKey(); + + abstract byte[] getEndKey(); +} diff --git a/src/main/java/org/apache/hadoop/hbase/util/RegionSplitCalculator.java b/src/main/java/org/apache/hadoop/hbase/util/RegionSplitCalculator.java new file mode 100644 index 000000000000..69a87ab07a37 --- /dev/null +++ b/src/main/java/org/apache/hadoop/hbase/util/RegionSplitCalculator.java @@ -0,0 +1,162 @@ +/** + * Copyright 2011 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import java.util.Collection; +import java.util.Comparator; +import java.util.Map.Entry; +import java.util.TreeSet; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator; + +import com.google.common.collect.ArrayListMultimap; +import com.google.common.collect.Multimap; +import com.google.common.collect.TreeMultimap; + +/** + * This is a generic region split calculator. It requires Ranges that provide + * start, end, and a comparator. It works in two phases -- the first adds ranges + * and rejects backwards ranges. Then one calls calcRegions to generate the + * multimap that has a start split key as a key and possibly multiple Ranges as + * members. + * + * To traverse, one normally would get the split set, and iterate through the + * calcRegions. Normal regions would have only one entry, holes would have zero, + * and any overlaps would have multiple entries. + * + * The interface is a bit cumbersome currently but is exposed this way so that + * clients can choose how to iterate through the region splits. + * + * @param + */ +public class RegionSplitCalculator { + final static Log LOG = LogFactory.getLog(RegionSplitCalculator.class); + + private final Comparator rangeCmp; + /** + * This contains a sorted set of all the possible split points + * + * Invariant: once populated this has 0 entries if empty or at most n+1 values + * where n == number of added ranges. + */ + private final TreeSet splits = new TreeSet(BYTES_COMPARATOR); + + /** + * This is a map from start key to regions with the same start key. + * + * Invariant: This always have n values in total + */ + private final Multimap starts = ArrayListMultimap.create(); + + /** + * SPECIAL CASE + */ + private final static byte[] ENDKEY = null; + + public RegionSplitCalculator(Comparator cmp) { + rangeCmp = cmp; + } + + public final static Comparator BYTES_COMPARATOR = new ByteArrayComparator() { + @Override + public int compare(byte[] l, byte[] r) { + if (l == null && r == null) + return 0; + if (l == null) + return 1; + if (r == null) + return -1; + return super.compare(l, r); + } + }; + + /** + * SPECIAL CASE wrapper for empty end key + * + * @return ENDKEY if end key is empty, else normal endkey. + */ + private byte[] specialEndKey(R range) { + byte[] end = range.getEndKey(); + if (end.length == 0) { + return ENDKEY; + } + return end; + } + + /** + * Adds an edge to the split calculator + * + * @return true if is included, false if backwards/invalid + */ + public boolean add(R range) { + byte[] start = range.getStartKey(); + byte[] end = specialEndKey(range); + + if (end != ENDKEY && Bytes.compareTo(start, end) > 0) { + // don't allow backwards edges + LOG.debug("attempted to add backwards edge: " + Bytes.toString(start) + + " " + Bytes.toString(end)); + return false; + } + + splits.add(start); + splits.add(end); + starts.put(start, range); + return true; + } + + /** + * Generates a coverage multimap from split key to Regions that start with the + * split key. + * + * @return coverage multimap + */ + public Multimap calcCoverage() { + // This needs to be sorted to force the use of the comparator on the values, + // otherwise byte array comparison isn't used + Multimap regions = TreeMultimap.create(BYTES_COMPARATOR, + rangeCmp); + + // march through all splits from the start points + for (Entry> start : starts.asMap().entrySet()) { + byte[] key = start.getKey(); + for (R r : start.getValue()) { + regions.put(key, r); + + for (byte[] coveredSplit : splits.subSet(r.getStartKey(), + specialEndKey(r))) { + regions.put(coveredSplit, r); + } + } + } + return regions; + } + + public TreeSet getSplits() { + return splits; + } + + public Multimap getStarts() { + return starts; + } + +} diff --git a/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitCalculator.java b/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitCalculator.java new file mode 100644 index 000000000000..4dc2a8188ae9 --- /dev/null +++ b/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitCalculator.java @@ -0,0 +1,350 @@ +/** + * Copyright 2011 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import static org.junit.Assert.assertEquals; + +import java.util.Collection; +import java.util.Comparator; +import java.util.SortedSet; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.junit.Test; + +import com.google.common.collect.ComparisonChain; +import com.google.common.collect.Multimap; + +public class TestRegionSplitCalculator { + final static Log LOG = LogFactory.getLog(TestRegionSplitCalculator.class); + + /** + * This is range uses a user specified start and end keys. It also has an + * extra time based tiebreaker so that different ranges with the same + * start/end key pair count as different regions. + */ + static class SimpleRange implements KeyRange { + byte[] start, end; + long tiebreaker; + + SimpleRange(byte[] start, byte[] end) { + this.start = start; + this.end = end; + this.tiebreaker = System.nanoTime(); + } + + @Override + public byte[] getStartKey() { + return start; + } + + @Override + public byte[] getEndKey() { + return end; + } + + public String toString() { + return "[" + Bytes.toString(start) + ", " + Bytes.toString(end) + "]"; + } + } + + Comparator cmp = new Comparator() { + @Override + public int compare(SimpleRange sr1, SimpleRange sr2) { + ComparisonChain cc = ComparisonChain.start(); + cc = cc.compare(sr1.getStartKey(), sr2.getStartKey(), + Bytes.BYTES_COMPARATOR); + cc = cc.compare(sr1.getEndKey(), sr2.getEndKey(), + RegionSplitCalculator.BYTES_COMPARATOR); + cc = cc.compare(sr1.tiebreaker, sr2.tiebreaker); + return cc.result(); + } + }; + + /** + * Check the "depth" (number of regions included at a split) of a generated + * split calculation + */ + void checkDepths(SortedSet splits, + Multimap regions, Integer... depths) { + assertEquals(splits.size(), depths.length); + int i = 0; + for (byte[] k : splits) { + Collection rs = regions.get(k); + int sz = rs == null ? 0 : rs.size(); + assertEquals((int) depths[i], sz); + i++; + } + } + + /** + * This dumps data in a visually reasonable way for visual debugging. It has + * the basic iteration structure. + */ + String dump(SortedSet splits, Multimap regions) { + // we display this way because the last end key should be displayed as well. + StringBuilder sb = new StringBuilder(); + for (byte[] k : splits) { + sb.append(Bytes.toString(k) + ":\t"); + for (SimpleRange r : regions.get(k)) { + sb.append(r.toString() + "\t"); + } + sb.append("\n"); + } + String s = sb.toString(); + LOG.info("\n" + s); + return s; + } + + @Test + public void testSplitCalculator() { + SimpleRange a = new SimpleRange(Bytes.toBytes("A"), Bytes.toBytes("B")); + SimpleRange b = new SimpleRange(Bytes.toBytes("B"), Bytes.toBytes("C")); + SimpleRange c = new SimpleRange(Bytes.toBytes("C"), Bytes.toBytes("D")); + RegionSplitCalculator sc = new RegionSplitCalculator( + cmp); + sc.add(a); + sc.add(b); + sc.add(c); + + Multimap regions = sc.calcCoverage(); + LOG.info("Standard"); + String res = dump(sc.getSplits(), regions); + checkDepths(sc.getSplits(), regions, 1, 1, 1, 0); + assertEquals(res, "A:\t[A, B]\t\n" + "B:\t[B, C]\t\n" + "C:\t[C, D]\t\n" + + "D:\t\n"); + } + + @Test + public void testSplitCalculatorNoEdge() { + RegionSplitCalculator sc = new RegionSplitCalculator( + cmp); + + Multimap regions = sc.calcCoverage(); + LOG.info("Empty"); + String res = dump(sc.getSplits(), regions); + checkDepths(sc.getSplits(), regions); + assertEquals(res, ""); + } + + @Test + public void testSplitCalculatorSingleEdge() { + SimpleRange a = new SimpleRange(Bytes.toBytes("A"), Bytes.toBytes("B")); + RegionSplitCalculator sc = new RegionSplitCalculator( + cmp); + sc.add(a); + + Multimap regions = sc.calcCoverage(); + LOG.info("Single edge"); + String res = dump(sc.getSplits(), regions); + checkDepths(sc.getSplits(), regions, 1, 0); + assertEquals(res, "A:\t[A, B]\t\n" + "B:\t\n"); + } + + @Test + public void testSplitCalculatorDegenerateEdge() { + SimpleRange a = new SimpleRange(Bytes.toBytes("A"), Bytes.toBytes("A")); + RegionSplitCalculator sc = new RegionSplitCalculator( + cmp); + sc.add(a); + + Multimap regions = sc.calcCoverage(); + LOG.info("Single empty edge"); + String res = dump(sc.getSplits(), regions); + checkDepths(sc.getSplits(), regions, 1); + assertEquals(res, "A:\t[A, A]\t\n"); + } + + @Test + public void testSplitCalculatorCoverSplit() { + SimpleRange a = new SimpleRange(Bytes.toBytes("A"), Bytes.toBytes("B")); + SimpleRange b = new SimpleRange(Bytes.toBytes("B"), Bytes.toBytes("C")); + SimpleRange c = new SimpleRange(Bytes.toBytes("A"), Bytes.toBytes("C")); + RegionSplitCalculator sc = new RegionSplitCalculator( + cmp); + sc.add(a); + sc.add(b); + sc.add(c); + + Multimap regions = sc.calcCoverage(); + LOG.info("AC covers AB, BC"); + String res = dump(sc.getSplits(), regions); + checkDepths(sc.getSplits(), regions, 2, 2, 0); + assertEquals(res, "A:\t[A, B]\t[A, C]\t\n" + "B:\t[A, C]\t[B, C]\t\n" + + "C:\t\n"); + } + + @Test + public void testSplitCalculatorOverEndpoint() { + SimpleRange a = new SimpleRange(Bytes.toBytes("A"), Bytes.toBytes("B")); + SimpleRange b = new SimpleRange(Bytes.toBytes("B"), Bytes.toBytes("C")); + SimpleRange c = new SimpleRange(Bytes.toBytes("B"), Bytes.toBytes("D")); + RegionSplitCalculator sc = new RegionSplitCalculator( + cmp); + sc.add(a); + sc.add(b); + sc.add(c); + + Multimap regions = sc.calcCoverage(); + LOG.info("AB, BD covers BC"); + String res = dump(sc.getSplits(), regions); + checkDepths(sc.getSplits(), regions, 1, 2, 1, 0); + assertEquals(res, "A:\t[A, B]\t\n" + "B:\t[B, C]\t[B, D]\t\n" + + "C:\t[B, D]\t\n" + "D:\t\n"); + } + + @Test + public void testSplitCalculatorHoles() { + SimpleRange a = new SimpleRange(Bytes.toBytes("A"), Bytes.toBytes("B")); + SimpleRange b = new SimpleRange(Bytes.toBytes("B"), Bytes.toBytes("C")); + SimpleRange c = new SimpleRange(Bytes.toBytes("E"), Bytes.toBytes("F")); + RegionSplitCalculator sc = new RegionSplitCalculator( + cmp); + sc.add(a); + sc.add(b); + sc.add(c); + + Multimap regions = sc.calcCoverage(); + LOG.info("Hole between C and E"); + String res = dump(sc.getSplits(), regions); + checkDepths(sc.getSplits(), regions, 1, 1, 0, 1, 0); + assertEquals(res, "A:\t[A, B]\t\n" + "B:\t[B, C]\t\n" + "C:\t\n" + + "E:\t[E, F]\t\n" + "F:\t\n"); + } + + @Test + public void testSplitCalculatorOverreach() { + SimpleRange a = new SimpleRange(Bytes.toBytes("A"), Bytes.toBytes("C")); + SimpleRange b = new SimpleRange(Bytes.toBytes("B"), Bytes.toBytes("D")); + RegionSplitCalculator sc = new RegionSplitCalculator( + cmp); + sc.add(a); + sc.add(b); + + Multimap regions = sc.calcCoverage(); + LOG.info("AC and BD overlap but share no start/end keys"); + String res = dump(sc.getSplits(), regions); + checkDepths(sc.getSplits(), regions, 1, 2, 1, 0); + assertEquals(res, "A:\t[A, C]\t\n" + "B:\t[A, C]\t[B, D]\t\n" + + "C:\t[B, D]\t\n" + "D:\t\n"); + } + + @Test + public void testSplitCalculatorFloor() { + SimpleRange a = new SimpleRange(Bytes.toBytes("A"), Bytes.toBytes("C")); + SimpleRange b = new SimpleRange(Bytes.toBytes("A"), Bytes.toBytes("B")); + RegionSplitCalculator sc = new RegionSplitCalculator( + cmp); + sc.add(a); + sc.add(b); + + Multimap regions = sc.calcCoverage(); + LOG.info("AC and AB overlap in the beginning"); + String res = dump(sc.getSplits(), regions); + checkDepths(sc.getSplits(), regions, 2, 1, 0); + assertEquals(res, "A:\t[A, B]\t[A, C]\t\n" + "B:\t[A, C]\t\n" + "C:\t\n"); + } + + @Test + public void testSplitCalculatorCeil() { + SimpleRange a = new SimpleRange(Bytes.toBytes("A"), Bytes.toBytes("C")); + SimpleRange b = new SimpleRange(Bytes.toBytes("B"), Bytes.toBytes("C")); + RegionSplitCalculator sc = new RegionSplitCalculator( + cmp); + sc.add(a); + sc.add(b); + + Multimap regions = sc.calcCoverage(); + LOG.info("AC and BC overlap in the end"); + String res = dump(sc.getSplits(), regions); + checkDepths(sc.getSplits(), regions, 1, 2, 0); + assertEquals(res, "A:\t[A, C]\t\n" + "B:\t[A, C]\t[B, C]\t\n" + "C:\t\n"); + } + + @Test + public void testSplitCalculatorEq() { + SimpleRange a = new SimpleRange(Bytes.toBytes("A"), Bytes.toBytes("C")); + SimpleRange b = new SimpleRange(Bytes.toBytes("A"), Bytes.toBytes("C")); + RegionSplitCalculator sc = new RegionSplitCalculator( + cmp); + sc.add(a); + sc.add(b); + + Multimap regions = sc.calcCoverage(); + LOG.info("AC and AC overlap completely"); + String res = dump(sc.getSplits(), regions); + checkDepths(sc.getSplits(), regions, 2, 0); + assertEquals(res, "A:\t[A, C]\t[A, C]\t\n" + "C:\t\n"); + } + + @Test + public void testSplitCalculatorBackwards() { + SimpleRange a = new SimpleRange(Bytes.toBytes("C"), Bytes.toBytes("A")); + RegionSplitCalculator sc = new RegionSplitCalculator( + cmp); + sc.add(a); + + Multimap regions = sc.calcCoverage(); + LOG.info("CA is backwards"); + String res = dump(sc.getSplits(), regions); + checkDepths(sc.getSplits(), regions); // expect nothing + assertEquals(res, ""); + } + + @Test + public void testComplex() { + RegionSplitCalculator sc = new RegionSplitCalculator( + cmp); + sc.add(new SimpleRange(Bytes.toBytes("A"), Bytes.toBytes("Am"))); + sc.add(new SimpleRange(Bytes.toBytes("A"), Bytes.toBytes("C"))); + sc.add(new SimpleRange(Bytes.toBytes("Am"), Bytes.toBytes("C"))); + sc.add(new SimpleRange(Bytes.toBytes("D"), Bytes.toBytes("E"))); + sc.add(new SimpleRange(Bytes.toBytes("F"), Bytes.toBytes("G"))); + sc.add(new SimpleRange(Bytes.toBytes("B"), Bytes.toBytes("E"))); + sc.add(new SimpleRange(Bytes.toBytes("H"), Bytes.toBytes("I"))); + sc.add(new SimpleRange(Bytes.toBytes("A"), Bytes.toBytes("B"))); + + Multimap regions = sc.calcCoverage(); + LOG.info("Something fairly complex"); + String res = dump(sc.getSplits(), regions); + checkDepths(sc.getSplits(), regions, 3, 3, 3, 1, 2, 0, 1, 0, 1, 0); + assertEquals(res, "A:\t[A, Am]\t[A, B]\t[A, C]\t\n" + + "Am:\t[A, B]\t[A, C]\t[Am, C]\t\n" + + "B:\t[A, C]\t[Am, C]\t[B, E]\t\n" + "C:\t[B, E]\t\n" + + "D:\t[B, E]\t[D, E]\t\n" + "E:\t\n" + "F:\t[F, G]\t\n" + "G:\t\n" + + "H:\t[H, I]\t\n" + "I:\t\n"); + } + + @Test + public void testBeginEndMarker() { + RegionSplitCalculator sc = new RegionSplitCalculator( + cmp); + sc.add(new SimpleRange(Bytes.toBytes(""), Bytes.toBytes("A"))); + sc.add(new SimpleRange(Bytes.toBytes("A"), Bytes.toBytes("B"))); + sc.add(new SimpleRange(Bytes.toBytes("B"), Bytes.toBytes(""))); + + Multimap regions = sc.calcCoverage(); + LOG.info("Special cases -- empty"); + String res = dump(sc.getSplits(), regions); + checkDepths(sc.getSplits(), regions, 1, 1, 1, 0); + assertEquals(res, ":\t[, A]\t\n" + "A:\t[A, B]\t\n" + "B:\t[B, ]\t\n" + + "null:\t\n"); + } +}