Skip to content

Commit 00a719e

Browse files
frostruanhuiruan
andauthored
HBASE-27305 add an option to skip file splitting when bulkload hfiles (#4709)
Co-authored-by: huiruan <huiruan@tencent.com> Signed-off-by: Duo Zhang <zhangduo@apache.org>
1 parent f238a92 commit 00a719e

File tree

2 files changed

+36
-0
lines changed

2 files changed

+36
-0
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,9 @@ public class BulkLoadHFilesTool extends Configured implements BulkLoadHFiles, To
124124
*/
125125
public static final String BULK_LOAD_HFILES_BY_FAMILY = "hbase.mapreduce.bulkload.by.family";
126126

127+
public static final String FAIL_IF_NEED_SPLIT_HFILE =
128+
"hbase.loadincremental.fail.if.need.split.hfile";
129+
127130
// We use a '.' prefix which is ignored when walking directory trees
128131
// above. It is invalid family name.
129132
static final String TMP_DIR = ".tmp";
@@ -141,6 +144,7 @@ public class BulkLoadHFilesTool extends Configured implements BulkLoadHFiles, To
141144

142145
private List<String> clusterIds = new ArrayList<>();
143146
private boolean replicate = true;
147+
private boolean failIfNeedSplitHFile = false;
144148

145149
public BulkLoadHFilesTool(Configuration conf) {
146150
// make a copy, just to be sure we're not overriding someone else's config
@@ -159,6 +163,7 @@ public void initialize() {
159163
nrThreads =
160164
conf.getInt("hbase.loadincremental.threads.max", Runtime.getRuntime().availableProcessors());
161165
bulkLoadByFamily = conf.getBoolean(BULK_LOAD_HFILES_BY_FAMILY, false);
166+
failIfNeedSplitHFile = conf.getBoolean(FAIL_IF_NEED_SPLIT_HFILE, false);
162167
}
163168

164169
// Initialize a thread pool
@@ -699,6 +704,11 @@ CacheConfig.DISABLED, true, getConf())) {
699704
Bytes.compareTo(last.get(), startEndKeys.get(firstKeyRegionIdx).getSecond()) < 0 || Bytes
700705
.equals(startEndKeys.get(firstKeyRegionIdx).getSecond(), HConstants.EMPTY_BYTE_ARRAY);
701706
if (!lastKeyInRange) {
707+
if (failIfNeedSplitHFile) {
708+
throw new IOException(
709+
"The key range of hfile=" + hfilePath + " fits into no region. " + "And because "
710+
+ FAIL_IF_NEED_SPLIT_HFILE + " was set to true, we just skip the next steps.");
711+
}
702712
int lastKeyRegionIdx = getRegionIndex(startEndKeys, last.get());
703713
int splitIdx = (firstKeyRegionIdx + lastKeyRegionIdx) / 2;
704714
// make sure the splitPoint is valid in case region overlap occur, maybe the splitPoint bigger

hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestBulkLoadHFiles.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import static org.apache.hadoop.hbase.HBaseTestingUtil.countRows;
2121
import static org.junit.Assert.assertArrayEquals;
2222
import static org.junit.Assert.assertEquals;
23+
import static org.junit.Assert.assertThrows;
2324
import static org.junit.Assert.assertTrue;
2425
import static org.junit.Assert.fail;
2526

@@ -782,4 +783,29 @@ protected CompletableFuture<Collection<LoadQueueItem>> tryAtomicRegionLoad(
782783
util.getConfiguration().setBoolean(BulkLoadHFilesTool.BULK_LOAD_HFILES_BY_FAMILY, false);
783784
}
784785
}
786+
787+
@Test
788+
public void testFailIfNeedSplitHFile() throws IOException {
789+
TableName tableName = TableName.valueOf(tn.getMethodName());
790+
Table table = util.createTable(tableName, FAMILY);
791+
792+
util.loadTable(table, FAMILY);
793+
794+
FileSystem fs = util.getTestFileSystem();
795+
Path sfPath = new Path(fs.getWorkingDirectory(), new Path(Bytes.toString(FAMILY), "file"));
796+
HFileTestUtil.createHFile(util.getConfiguration(), fs, sfPath, FAMILY, QUALIFIER,
797+
Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);
798+
799+
util.getAdmin().split(tableName);
800+
util.waitFor(10000, 1000, () -> util.getAdmin().getRegions(tableName).size() > 1);
801+
802+
Configuration config = new Configuration(util.getConfiguration());
803+
config.setBoolean(BulkLoadHFilesTool.FAIL_IF_NEED_SPLIT_HFILE, true);
804+
BulkLoadHFilesTool tool = new BulkLoadHFilesTool(config);
805+
806+
String[] args = new String[] { fs.getWorkingDirectory().toString(), tableName.toString() };
807+
assertThrows(IOException.class, () -> tool.run(args));
808+
util.getHBaseCluster().getRegions(tableName)
809+
.forEach(r -> assertEquals(1, r.getStore(FAMILY).getStorefiles().size()));
810+
}
785811
}

0 commit comments

Comments
 (0)