Skip to content

Commit 046cb69

Browse files
committed
HBASE-29716 Include sequence ID on incremental backup HFiles
1 parent 64a249f commit 046cb69

File tree

3 files changed

+160
-31
lines changed

3 files changed

+160
-31
lines changed

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,7 @@ protected void walToHFiles(List<String> dirPaths, List<String> tableList) throws
431431
conf.set(WALPlayer.INPUT_FILES_SEPARATOR_KEY, ";");
432432
conf.setBoolean(HFileOutputFormat2.TABLE_NAME_WITH_NAMESPACE_INCLUSIVE_KEY, true);
433433
conf.setBoolean(WALPlayer.MULTI_TABLES_SUPPORT, true);
434+
conf.setBoolean(HFileOutputFormat2.SET_MAX_SEQ_ID_KEY, true);
434435
conf.setBoolean(HFileOutputFormat2.DISK_BASED_SORTING_ENABLED_KEY, true);
435436
conf.set(JOB_NAME_CONF_KEY, jobname);
436437

hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat2.java

Lines changed: 45 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import static org.apache.hadoop.hbase.regionserver.HStoreFile.BULKLOAD_TIME_KEY;
2222
import static org.apache.hadoop.hbase.regionserver.HStoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY;
2323
import static org.apache.hadoop.hbase.regionserver.HStoreFile.MAJOR_COMPACTION_KEY;
24+
import static org.apache.hadoop.hbase.regionserver.HStoreFile.MAX_SEQ_ID_KEY;
2425

2526
import java.io.IOException;
2627
import java.io.UnsupportedEncodingException;
@@ -209,6 +210,13 @@ protected static byte[] combineTableNameSuffix(byte[] tableName, byte[] suffix)
209210
public static final String REMOTE_CLUSTER_ZOOKEEPER_ZNODE_PARENT_CONF_KEY =
210211
REMOTE_CLUSTER_CONF_PREFIX + HConstants.ZOOKEEPER_ZNODE_PARENT;
211212

213+
/**
214+
* Set the MAX_SEQ_ID metadata on the resulting HFile. This will ensure the HFiles will be sorted
215+
* properly when read by tools such as the ClientSideRegionScanner. Will have no effect if the
216+
* HFile is bulkloaded, as the sequence ID generated when bulkloading will override this metadata.
217+
*/
218+
public static final String SET_MAX_SEQ_ID_KEY = "hbase.hfileoutputformat.set.max.seq.id";
219+
212220
public static final String STORAGE_POLICY_PROPERTY = HStore.BLOCK_STORAGE_POLICY_KEY;
213221
public static final String STORAGE_POLICY_PROPERTY_CF_PREFIX = STORAGE_POLICY_PROPERTY + ".";
214222

@@ -270,7 +278,7 @@ static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createRecordWrit
270278

271279
return new RecordWriter<ImmutableBytesWritable, V>() {
272280
// Map of families to writers and how much has been output on the writer.
273-
private final Map<byte[], WriterLength> writers = new TreeMap<>(Bytes.BYTES_COMPARATOR);
281+
private final Map<byte[], WriterInfo> writers = new TreeMap<>(Bytes.BYTES_COMPARATOR);
274282
private final Map<byte[], byte[]> previousRows = new TreeMap<>(Bytes.BYTES_COMPARATOR);
275283
private final long now = EnvironmentEdgeManager.currentTime();
276284
private byte[] tableNameBytes = writeMultipleTables ? null : Bytes.toBytes(writeTableNames);
@@ -300,10 +308,10 @@ public void write(ImmutableBytesWritable row, V cell) throws IOException {
300308
}
301309
byte[] tableAndFamily = getTableNameSuffixedWithFamily(tableNameBytes, family);
302310

303-
WriterLength wl = this.writers.get(tableAndFamily);
311+
WriterInfo wi = this.writers.get(tableAndFamily);
304312

305313
// If this is a new column family, verify that the directory exists
306-
if (wl == null) {
314+
if (wi == null) {
307315
Path writerPath = null;
308316
if (writeMultipleTables) {
309317
Path tableRelPath = getTableRelativePath(tableNameBytes);
@@ -317,14 +325,14 @@ public void write(ImmutableBytesWritable row, V cell) throws IOException {
317325

318326
// This can only happen once a row is finished though
319327
if (
320-
wl != null && wl.written + length >= maxsize
328+
wi != null && wi.written + length >= maxsize
321329
&& Bytes.compareTo(this.previousRows.get(family), rowKey) != 0
322330
) {
323-
rollWriters(wl);
331+
rollWriters(wi);
324332
}
325333

326334
// create a new WAL writer, if necessary
327-
if (wl == null || wl.writer == null) {
335+
if (wi == null || wi.writer == null) {
328336
InetSocketAddress[] favoredNodes = null;
329337
if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) {
330338
HRegionLocation loc = null;
@@ -355,14 +363,15 @@ public void write(ImmutableBytesWritable row, V cell) throws IOException {
355363
}
356364
}
357365
}
358-
wl = getNewWriter(tableNameBytes, family, conf, favoredNodes);
366+
wi = getNewWriter(tableNameBytes, family, conf, favoredNodes);
359367

360368
}
361369

362370
// we now have the proper WAL writer. full steam ahead
363371
PrivateCellUtil.updateLatestStamp(cell, this.now);
364-
wl.writer.append(kv);
365-
wl.written += length;
372+
wi.writer.append(kv);
373+
wi.written += length;
374+
wi.maxSequenceId = Math.max(kv.getSequenceId(), wi.maxSequenceId);
366375

367376
// Copy the row so we know when a row transition.
368377
this.previousRows.put(family, rowKey);
@@ -378,24 +387,25 @@ private Path getTableRelativePath(byte[] tableNameBytes) {
378387
return tableRelPath;
379388
}
380389

381-
private void rollWriters(WriterLength writerLength) throws IOException {
382-
if (writerLength != null) {
383-
closeWriter(writerLength);
390+
private void rollWriters(WriterInfo writerInfo) throws IOException {
391+
if (writerInfo != null) {
392+
closeWriter(writerInfo);
384393
} else {
385-
for (WriterLength wl : this.writers.values()) {
386-
closeWriter(wl);
394+
for (WriterInfo wi : this.writers.values()) {
395+
closeWriter(wi);
387396
}
388397
}
389398
}
390399

391-
private void closeWriter(WriterLength wl) throws IOException {
392-
if (wl.writer != null) {
400+
private void closeWriter(WriterInfo wi) throws IOException {
401+
if (wi.writer != null) {
393402
LOG.info(
394-
"Writer=" + wl.writer.getPath() + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
395-
close(wl.writer);
396-
wl.writer = null;
403+
"Writer=" + wi.writer.getPath() + ((wi.written == 0) ? "" : ", wrote=" + wi.written));
404+
close(wi.writer, wi);
405+
wi.writer = null;
397406
}
398-
wl.written = 0;
407+
wi.written = 0;
408+
wi.maxSequenceId = -1;
399409
}
400410

401411
private Configuration createRemoteClusterConf(Configuration conf) {
@@ -435,19 +445,19 @@ private Configuration createRemoteClusterConf(Configuration conf) {
435445

436446
/*
437447
* Create a new StoreFile.Writer.
438-
* @return A WriterLength, containing a new StoreFile.Writer.
448+
* @return A WriterInfo, containing a new StoreFile.Writer.
439449
*/
440450
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "BX_UNBOXING_IMMEDIATELY_REBOXED",
441451
justification = "Not important")
442-
private WriterLength getNewWriter(byte[] tableName, byte[] family, Configuration conf,
452+
private WriterInfo getNewWriter(byte[] tableName, byte[] family, Configuration conf,
443453
InetSocketAddress[] favoredNodes) throws IOException {
444454
byte[] tableAndFamily = getTableNameSuffixedWithFamily(tableName, family);
445455
Path familydir = new Path(outputDir, Bytes.toString(family));
446456
if (writeMultipleTables) {
447457
familydir =
448458
new Path(outputDir, new Path(getTableRelativePath(tableName), Bytes.toString(family)));
449459
}
450-
WriterLength wl = new WriterLength();
460+
WriterInfo wi = new WriterInfo();
451461
Algorithm compression = overriddenCompression;
452462
compression = compression == null ? compressionMap.get(tableAndFamily) : compression;
453463
compression = compression == null ? defaultCompression : compression;
@@ -474,23 +484,26 @@ private WriterLength getNewWriter(byte[] tableName, byte[] family, Configuration
474484

475485
HFileContext hFileContext = contextBuilder.build();
476486
if (null == favoredNodes) {
477-
wl.writer =
487+
wi.writer =
478488
new StoreFileWriter.Builder(conf, CacheConfig.DISABLED, fs).withOutputDir(familydir)
479489
.withBloomType(bloomType).withFileContext(hFileContext).build();
480490
} else {
481-
wl.writer = new StoreFileWriter.Builder(conf, CacheConfig.DISABLED, new HFileSystem(fs))
491+
wi.writer = new StoreFileWriter.Builder(conf, CacheConfig.DISABLED, new HFileSystem(fs))
482492
.withOutputDir(familydir).withBloomType(bloomType).withFileContext(hFileContext)
483493
.withFavoredNodes(favoredNodes).build();
484494
}
485495

486-
this.writers.put(tableAndFamily, wl);
487-
return wl;
496+
this.writers.put(tableAndFamily, wi);
497+
return wi;
488498
}
489499

490-
private void close(final StoreFileWriter w) throws IOException {
500+
private void close(final StoreFileWriter w, final WriterInfo wl) throws IOException {
491501
if (w != null) {
492502
w.appendFileInfo(BULKLOAD_TIME_KEY, Bytes.toBytes(EnvironmentEdgeManager.currentTime()));
493503
w.appendFileInfo(BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString()));
504+
if (conf.getBoolean(SET_MAX_SEQ_ID_KEY, false) && wl.maxSequenceId >= 0) {
505+
w.appendFileInfo(MAX_SEQ_ID_KEY, Bytes.toBytes(wl.maxSequenceId));
506+
}
494507
w.appendFileInfo(MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
495508
w.appendFileInfo(EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
496509
w.appendTrackedTimestampsToMetadata();
@@ -500,8 +513,8 @@ private void close(final StoreFileWriter w) throws IOException {
500513

501514
@Override
502515
public void close(TaskAttemptContext c) throws IOException, InterruptedException {
503-
for (WriterLength wl : this.writers.values()) {
504-
close(wl.writer);
516+
for (WriterInfo wi : this.writers.values()) {
517+
close(wi.writer, wi);
505518
}
506519
}
507520
};
@@ -524,8 +537,9 @@ static void configureStoragePolicy(final Configuration conf, final FileSystem fs
524537
/*
525538
* Data structure to hold a Writer and amount of data written on it.
526539
*/
527-
static class WriterLength {
540+
static class WriterInfo {
528541
long written = 0;
542+
long maxSequenceId = -1;
529543
StoreFileWriter writer = null;
530544
}
531545

hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALPlayer.java

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,10 @@
3636
import java.util.concurrent.ThreadLocalRandom;
3737
import org.apache.hadoop.conf.Configuration;
3838
import org.apache.hadoop.fs.FileSystem;
39+
import org.apache.hadoop.fs.FileUtil;
40+
import org.apache.hadoop.fs.LocatedFileStatus;
3941
import org.apache.hadoop.fs.Path;
42+
import org.apache.hadoop.fs.RemoteIterator;
4043
import org.apache.hadoop.hbase.Cell;
4144
import org.apache.hadoop.hbase.CellUtil;
4245
import org.apache.hadoop.hbase.HBaseClassTestRule;
@@ -45,11 +48,15 @@
4548
import org.apache.hadoop.hbase.KeyValue;
4649
import org.apache.hadoop.hbase.MiniHBaseCluster;
4750
import org.apache.hadoop.hbase.TableName;
51+
import org.apache.hadoop.hbase.client.ClientSideRegionScanner;
4852
import org.apache.hadoop.hbase.client.Delete;
4953
import org.apache.hadoop.hbase.client.Get;
5054
import org.apache.hadoop.hbase.client.Put;
55+
import org.apache.hadoop.hbase.client.RegionInfo;
5156
import org.apache.hadoop.hbase.client.Result;
57+
import org.apache.hadoop.hbase.client.Scan;
5258
import org.apache.hadoop.hbase.client.Table;
59+
import org.apache.hadoop.hbase.client.TableDescriptor;
5360
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
5461
import org.apache.hadoop.hbase.mapreduce.WALPlayer.WALKeyValueMapper;
5562
import org.apache.hadoop.hbase.regionserver.TestRecoveredEdits;
@@ -268,6 +275,113 @@ public void testWALPlayerBulkLoadWithOverriddenTimestamps() throws Exception {
268275
});
269276
}
270277

278+
/**
279+
* Tests that the sequence IDs of cells are retained in the resulting HFile and usable by a
280+
* RegionScanner. It does this by running the WALPlayer multiple times and using a RegionScanner
281+
* to read the files; without sequence IDs, the files will be sorted by size or name and will not
282+
* always return the correct result.
283+
*/
284+
@Test
285+
public void testMaxSeqIdHFileMetadata() throws Exception {
286+
final int numEdits = 20;
287+
final int flushInterval = 10;
288+
289+
// Phase 1: Setup test data and configuration
290+
final TableName tableName = TableName.valueOf(name.getMethodName());
291+
final byte[] family = Bytes.toBytes("family");
292+
final byte[] column = Bytes.toBytes("c1");
293+
final byte[] row = Bytes.toBytes("row");
294+
final Table table = TEST_UTIL.createTable(tableName, family);
295+
296+
long now = EnvironmentEdgeManager.currentTime();
297+
{
298+
Put put = new Put(row);
299+
put.addColumn(family, column, now, column);
300+
table.put(put);
301+
}
302+
303+
String walInputDir = new Path(cluster.getMaster().getMasterFileSystem().getWALRootDir(),
304+
HConstants.HREGION_LOGDIR_NAME).toString();
305+
String walPlayerOutputRoot = "/tmp/" + name.getMethodName();
306+
307+
Configuration walPlayerConfig = new Configuration(TEST_UTIL.getConfiguration());
308+
walPlayerConfig.setBoolean(WALPlayer.MULTI_TABLES_SUPPORT, true);
309+
walPlayerConfig.setBoolean(HFileOutputFormat2.SET_MAX_SEQ_ID_KEY, true);
310+
311+
// Phase 2: Write edits with periodic WAL rolling and WALPlayer execution
312+
int walPlayerRunCount = 0;
313+
byte[] lastVal = null;
314+
315+
for (int i = 0; i < numEdits; i++) {
316+
lastVal = new byte[12];
317+
ThreadLocalRandom.current().nextBytes(lastVal);
318+
319+
Put put = new Put(row);
320+
put.addColumn(family, column, now, lastVal);
321+
table.put(put);
322+
323+
// Roll WALs and run WALPlayer every flushInterval iterations
324+
if (i > 0 && (i % flushInterval == 0) || i + 1 == numEdits) {
325+
WAL log = cluster.getRegionServer(0).getWAL(null);
326+
log.rollWriter();
327+
328+
walPlayerRunCount++;
329+
String walPlayerRunDir = walPlayerOutputRoot + "/run_" + walPlayerRunCount;
330+
Configuration runConfig = new Configuration(walPlayerConfig);
331+
runConfig.set(WALPlayer.BULK_OUTPUT_CONF_KEY, walPlayerRunDir);
332+
333+
WALPlayer player = new WALPlayer(runConfig);
334+
assertEquals(0, ToolRunner.run(runConfig, player,
335+
new String[] { walInputDir, tableName.getNameAsString() }));
336+
}
337+
}
338+
339+
table.close();
340+
341+
final byte[] finalLastVal = lastVal;
342+
343+
// Phase 3: Collect all generated HFiles into proper structure for region scanner
344+
TableDescriptor htd = TEST_UTIL.getAdmin().getDescriptor(tableName);
345+
RegionInfo regionInfo = cluster.getRegions(tableName).get(0).getRegionInfo();
346+
FileSystem fs = cluster.getRegionServer(0).getFileSystem();
347+
348+
Path regionOutPath = CommonFSUtils.getRegionDir(new Path(walPlayerOutputRoot),
349+
htd.getTableName(), regionInfo.getEncodedName());
350+
Path familyOutPath = new Path(regionOutPath, new String(family));
351+
fs.mkdirs(familyOutPath);
352+
353+
// Copy all HFiles from each WALPlayer run
354+
for (int i = 1; i <= walPlayerRunCount; i++) {
355+
Path walPlayerRunPath = new Path(walPlayerOutputRoot, "run_" + i);
356+
RemoteIterator<LocatedFileStatus> files =
357+
fs.listFiles(new Path(walPlayerRunPath, tableName.getNamespaceAsString()), true);
358+
359+
while (files.hasNext()) {
360+
LocatedFileStatus fileStatus = files.next();
361+
// Skip hidden/metadata files (starting with '.')
362+
if (fileStatus.isFile() && !fileStatus.getPath().getName().startsWith(".")) {
363+
FileUtil.copy(fs, fileStatus.getPath(), fs,
364+
new Path(familyOutPath, fileStatus.getPath().getName()), false, walPlayerConfig);
365+
}
366+
}
367+
}
368+
369+
// Phase 4: Verify sequence IDs are preserved correctly
370+
Scan scan = new Scan();
371+
try (ClientSideRegionScanner scanner = new ClientSideRegionScanner(walPlayerConfig, fs,
372+
new Path(walPlayerOutputRoot), htd, regionInfo, scan, null)) {
373+
374+
// Verify exactly one row returned
375+
Result result = scanner.next();
376+
assertThat(result, notNullValue());
377+
assertThat(result.listCells(), notNullValue());
378+
379+
// Verify the value with highest sequence ID (from last iteration) wins
380+
byte[] value = CellUtil.cloneValue(result.getColumnLatestCell(family, column));
381+
assertThat(Bytes.toStringBinary(value), equalTo(Bytes.toStringBinary(finalLastVal)));
382+
}
383+
}
384+
271385
/**
272386
* Simple end-to-end test
273387
*/

0 commit comments

Comments
 (0)