Skip to content

Commit b8ba3f7

Browse files
authored
HBASE-26255 Add an option to use region location from meta table in TableSnapshotInputFormat (#3661)
Signed-off-by: Anoop Sam John <anoopsamjohn@apache.org>
1 parent fae6261 commit b8ba3f7

File tree

2 files changed

+80
-12
lines changed

2 files changed

+80
-12
lines changed

hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java

Lines changed: 51 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,14 @@
3131
import org.apache.hadoop.fs.Path;
3232
import org.apache.hadoop.hbase.HDFSBlocksDistribution;
3333
import org.apache.hadoop.hbase.HDFSBlocksDistribution.HostAndWeight;
34+
import org.apache.hadoop.hbase.HRegionLocation;
3435
import org.apache.hadoop.hbase.PrivateCellUtil;
3536
import org.apache.hadoop.hbase.client.ClientSideRegionScanner;
37+
import org.apache.hadoop.hbase.client.Connection;
38+
import org.apache.hadoop.hbase.client.ConnectionFactory;
3639
import org.apache.hadoop.hbase.client.IsolationLevel;
3740
import org.apache.hadoop.hbase.client.RegionInfo;
41+
import org.apache.hadoop.hbase.client.RegionLocator;
3842
import org.apache.hadoop.hbase.client.Result;
3943
import org.apache.hadoop.hbase.client.Scan;
4044
import org.apache.hadoop.hbase.client.TableDescriptor;
@@ -101,6 +105,15 @@ public class TableSnapshotInputFormatImpl {
101105
"hbase.TableSnapshotInputFormat.locality.enabled";
102106
public static final boolean SNAPSHOT_INPUTFORMAT_LOCALITY_ENABLED_DEFAULT = true;
103107

108+
/**
109+
* Whether to calculate the Snapshot region location by region location from meta.
110+
* It is much faster than computing block locations for splits.
111+
*/
112+
public static final String SNAPSHOT_INPUTFORMAT_LOCALITY_BY_REGION_LOCATION =
113+
"hbase.TableSnapshotInputFormat.locality.by.region.location";
114+
115+
public static final boolean SNAPSHOT_INPUTFORMAT_LOCALITY_BY_REGION_LOCATION_DEFAULT = false;
116+
104117
/**
105118
* In some scenario, scan limited rows on each InputSplit for sampling data extraction
106119
*/
@@ -392,17 +405,49 @@ public static List<InputSplit> getSplits(Scan scan, SnapshotManifest manifest,
392405
SNAPSHOT_INPUTFORMAT_SCAN_METRICS_ENABLED_DEFAULT);
393406
scan.setScanMetricsEnabled(scanMetricsEnabled);
394407

408+
boolean useRegionLoc = conf.getBoolean(SNAPSHOT_INPUTFORMAT_LOCALITY_BY_REGION_LOCATION,
409+
SNAPSHOT_INPUTFORMAT_LOCALITY_BY_REGION_LOCATION_DEFAULT);
410+
411+
Connection connection = null;
412+
RegionLocator regionLocator = null;
413+
if (localityEnabled && useRegionLoc) {
414+
Configuration newConf = new Configuration(conf);
415+
newConf.setInt("hbase.hconnection.threads.max", 1);
416+
try {
417+
connection = ConnectionFactory.createConnection(newConf);
418+
regionLocator = connection.getRegionLocator(htd.getTableName());
419+
420+
/* Get all locations for the table and cache it */
421+
regionLocator.getAllRegionLocations();
422+
} finally {
423+
if (connection != null) {
424+
connection.close();
425+
}
426+
}
427+
}
428+
395429
List<InputSplit> splits = new ArrayList<>();
396430
for (RegionInfo hri : regionManifests) {
397431
// load region descriptor
432+
List<String> hosts = null;
433+
if (localityEnabled) {
434+
if (regionLocator != null) {
435+
/* Get Location from the local cache */
436+
HRegionLocation
437+
location = regionLocator.getRegionLocation(hri.getStartKey(), false);
438+
439+
hosts = new ArrayList<>(1);
440+
hosts.add(location.getHostname());
441+
} else {
442+
hosts = calculateLocationsForInputSplit(conf, htd, hri, tableDir);
443+
}
444+
}
398445

399446
if (numSplits > 1) {
400447
byte[][] sp = sa.split(hri.getStartKey(), hri.getEndKey(), numSplits, true);
401448
for (int i = 0; i < sp.length - 1; i++) {
402449
if (PrivateCellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(), sp[i],
403450
sp[i + 1])) {
404-
List<String> hosts =
405-
calculateLocationsForInputSplit(conf, htd, hri, tableDir, localityEnabled);
406451

407452
Scan boundedScan = new Scan(scan);
408453
if (scan.getStartRow().length == 0) {
@@ -425,8 +470,7 @@ public static List<InputSplit> getSplits(Scan scan, SnapshotManifest manifest,
425470
} else {
426471
if (PrivateCellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(),
427472
hri.getStartKey(), hri.getEndKey())) {
428-
List<String> hosts =
429-
calculateLocationsForInputSplit(conf, htd, hri, tableDir, localityEnabled);
473+
430474
splits.add(new InputSplit(htd, hri, hosts, scan, restoreDir));
431475
}
432476
}
@@ -440,14 +484,9 @@ public static List<InputSplit> getSplits(Scan scan, SnapshotManifest manifest,
440484
* only when localityEnabled is true.
441485
*/
442486
private static List<String> calculateLocationsForInputSplit(Configuration conf,
443-
TableDescriptor htd, RegionInfo hri, Path tableDir, boolean localityEnabled)
444-
throws IOException {
445-
if (localityEnabled) { // care block locality
446-
return getBestLocations(conf,
447-
HRegion.computeHDFSBlocksDistribution(conf, htd, hri, tableDir));
448-
} else { // do not care block locality
449-
return null;
450-
}
487+
TableDescriptor htd, RegionInfo hri, Path tableDir)
488+
throws IOException {
489+
return getBestLocations(conf, HRegion.computeHDFSBlocksDistribution(conf, htd, hri, tableDir));
451490
}
452491

453492
/**

hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
import static org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatImpl.SNAPSHOT_INPUTFORMAT_LOCALITY_ENABLED_DEFAULT;
2121
import static org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatImpl.SNAPSHOT_INPUTFORMAT_LOCALITY_ENABLED_KEY;
2222
import static org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatImpl.SNAPSHOT_INPUTFORMAT_ROW_LIMIT_PER_INPUTSPLIT;
23+
import static org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatImpl.SNAPSHOT_INPUTFORMAT_LOCALITY_BY_REGION_LOCATION;
24+
import static org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatImpl.SNAPSHOT_INPUTFORMAT_LOCALITY_BY_REGION_LOCATION_DEFAULT;
2325
import static org.mockito.Mockito.mock;
2426
import static org.mockito.Mockito.when;
2527

@@ -198,6 +200,18 @@ public void testInitTableSnapshotMapperJobConfig() throws Exception {
198200
}
199201
}
200202

203+
@Test
204+
public void testWithMockedMapReduceSingleRegionByRegionLocation() throws Exception {
205+
Configuration conf = UTIL.getConfiguration();
206+
conf.setBoolean(SNAPSHOT_INPUTFORMAT_LOCALITY_BY_REGION_LOCATION, true);
207+
try {
208+
testWithMockedMapReduce(UTIL, name.getMethodName() + "Snapshot", 1, 1, 1,
209+
true);
210+
} finally {
211+
conf.unset(SNAPSHOT_INPUTFORMAT_LOCALITY_BY_REGION_LOCATION);
212+
}
213+
}
214+
201215
@Override
202216
public void testRestoreSnapshotDoesNotCreateBackRefLinksInit(TableName tableName,
203217
String snapshotName, Path tmpTableDir) throws Exception {
@@ -218,6 +232,8 @@ public void testWithMockedMapReduce(HBaseTestingUtil util, String snapshotName,
218232

219233
Configuration conf = util.getConfiguration();
220234
conf.setBoolean(SNAPSHOT_INPUTFORMAT_LOCALITY_ENABLED_KEY, setLocalityEnabledTo);
235+
conf.setBoolean(SNAPSHOT_INPUTFORMAT_LOCALITY_BY_REGION_LOCATION,
236+
SNAPSHOT_INPUTFORMAT_LOCALITY_BY_REGION_LOCATION_DEFAULT);
221237
Job job = new Job(conf);
222238
Path tmpTableDir = util.getDataTestDirOnTestFS(snapshotName);
223239
Scan scan = new Scan().withStartRow(getStartRow()).withStopRow(getEndRow()); // limit the scan
@@ -406,13 +422,26 @@ private void verifyWithMockedMapReduce(Job job, int numRegions, int expectedNumS
406422
job.getConfiguration().getBoolean(SNAPSHOT_INPUTFORMAT_LOCALITY_ENABLED_KEY,
407423
SNAPSHOT_INPUTFORMAT_LOCALITY_ENABLED_DEFAULT);
408424

425+
boolean byRegionLoc =
426+
job.getConfiguration().getBoolean(SNAPSHOT_INPUTFORMAT_LOCALITY_BY_REGION_LOCATION,
427+
SNAPSHOT_INPUTFORMAT_LOCALITY_BY_REGION_LOCATION_DEFAULT);
409428
for (int i = 0; i < splits.size(); i++) {
410429
// validate input split
411430
InputSplit split = splits.get(i);
412431
Assert.assertTrue(split instanceof TableSnapshotRegionSplit);
413432
TableSnapshotRegionSplit snapshotRegionSplit = (TableSnapshotRegionSplit) split;
414433
if (localityEnabled) {
415434
Assert.assertTrue(split.getLocations() != null && split.getLocations().length != 0);
435+
if (byRegionLoc) {
436+
// When it uses region location from meta, the hostname will be "localhost",
437+
// the location from hdfs block location is "127.0.0.1".
438+
Assert.assertEquals(1, split.getLocations().length);
439+
Assert.assertTrue("Not using region location!",
440+
split.getLocations()[0].equals("localhost"));
441+
} else {
442+
Assert.assertTrue("Not using region location!",
443+
split.getLocations()[0].equals("127.0.0.1"));
444+
}
416445
} else {
417446
Assert.assertTrue(split.getLocations() != null && split.getLocations().length == 0);
418447
}

0 commit comments

Comments
 (0)