Skip to content

Commit 3a658b6

Browse files
author
ukumawat
committed
HBASE-28951 add server shortName in temp recovered edits files
1 parent ea9babf commit 3a658b6

File tree

3 files changed

+93
-12
lines changed

3 files changed

+93
-12
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/wal/AbstractRecoveredEditsOutputSink.java

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222

2323
import java.io.EOFException;
2424
import java.io.IOException;
25+
import java.net.URLEncoder;
26+
import java.nio.charset.StandardCharsets;
2527
import java.util.ArrayList;
2628
import java.util.List;
2729
import java.util.Map;
@@ -30,8 +32,10 @@
3032
import org.apache.hadoop.fs.Path;
3133
import org.apache.hadoop.hbase.Cell;
3234
import org.apache.hadoop.hbase.CellUtil;
35+
import org.apache.hadoop.hbase.ServerName;
3336
import org.apache.hadoop.hbase.TableName;
3437
import org.apache.hadoop.hbase.log.HBaseMarkers;
38+
import org.apache.hadoop.hbase.util.Addressing;
3539
import org.apache.hadoop.hbase.util.Bytes;
3640
import org.apache.hadoop.ipc.RemoteException;
3741
import org.apache.yetus.audience.InterfaceAudience;
@@ -57,11 +61,9 @@ protected RecoveredEditsWriter createRecoveredEditsWriter(TableName tableName, b
5761
long seqId) throws IOException {
5862
// If multiple worker are splitting a WAL at a same time, both should use unique file name to
5963
// avoid conflict
60-
long workerStartCode =
61-
walSplitter.rsServices != null ? walSplitter.rsServices.getServerName().getStartCode() : 0L;
6264
Path regionEditsPath = getRegionSplitEditsPath(tableName, region, seqId,
6365
walSplitter.getFileBeingSplit().getPath().getName(), walSplitter.getTmpDirName(),
64-
walSplitter.conf, workerStartCode);
66+
walSplitter.conf, getWorkerNameComponent());
6567

6668
if (walSplitter.walFS.exists(regionEditsPath)) {
6769
LOG.warn("Found old edits file. It could be the "
@@ -78,6 +80,16 @@ protected RecoveredEditsWriter createRecoveredEditsWriter(TableName tableName, b
7880
return new RecoveredEditsWriter(region, regionEditsPath, w, seqId);
7981
}
8082

83+
private String getWorkerNameComponent() {
84+
if (walSplitter.rsServices == null) {
85+
return "";
86+
}
87+
return URLEncoder.encode(
88+
walSplitter.rsServices.getServerName().toShortString()
89+
.replace(Addressing.HOSTNAME_PORT_SEPARATOR, ServerName.SERVERNAME_SEPARATOR),
90+
StandardCharsets.UTF_8);
91+
}
92+
8193
/**
8294
* abortRecoveredEditsWriter closes the editsWriter, but does not rename and finalize the
8395
* recovered edits WAL files. Please see HBASE-28569.
@@ -143,6 +155,8 @@ protected Path closeRecoveredEditsWriterAndFinalizeEdits(RecoveredEditsWriter ed
143155
private boolean deleteTmpIfDstHasNoLessEntries(RecoveredEditsWriter editsWriter, Path dst)
144156
throws IOException {
145157
if (walSplitter.walFS.exists(dst) && !isDstHasFewerEntries(editsWriter, dst)) {
158+
LOG.info("Destination {} already have no fewer entries so deleting tmp recovered edits file {}", dst,
159+
editsWriter.path);
146160
if (!walSplitter.walFS.delete(editsWriter.path, false)) {
147161
LOG.warn("Failed deleting of {}", editsWriter.path);
148162
throw new IOException("Failed deleting of " + editsWriter.path);

hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALSplitUtil.java

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -151,17 +151,18 @@ public static void moveWAL(FileSystem fs, Path p, Path targetDir) throws IOExcep
151151
* /hbase/some_table/2323432434/recovered.edits/2332. This method also ensures existence of
152152
* RECOVERED_EDITS_DIR under the region creating it if necessary. And also set storage policy for
153153
* RECOVERED_EDITS_DIR if WAL_STORAGE_POLICY is configured.
154-
* @param tableName the table name
155-
* @param encodedRegionName the encoded region name
156-
* @param seqId the sequence id which used to generate file name
157-
* @param fileNameBeingSplit the file being split currently. Used to generate tmp file name.
158-
* @param tmpDirName of the directory used to sideline old recovered edits file
159-
* @param conf configuration
154+
* @param tableName the table name
155+
* @param encodedRegionName the encoded region name
156+
* @param seqId the sequence id which used to generate file name
157+
* @param fileNameBeingSplit the file being split currently. Used to generate tmp file name.
158+
* @param tmpDirName of the directory used to sideline old recovered edits file
159+
* @param conf configuration
160+
* @param workerNameComponent the worker name component for the file name
160161
* @return Path to file into which to dump split log edits.
161162
*/
162163
@SuppressWarnings("deprecation")
163164
static Path getRegionSplitEditsPath(TableName tableName, byte[] encodedRegionName, long seqId,
164-
String fileNameBeingSplit, String tmpDirName, Configuration conf, long workerStartCode)
165+
String fileNameBeingSplit, String tmpDirName, Configuration conf, String workerNameComponent)
165166
throws IOException {
166167
FileSystem walFS = CommonFSUtils.getWALFileSystem(conf);
167168
Path tableDir = CommonFSUtils.getWALTableDir(conf, tableName);
@@ -195,7 +196,7 @@ static Path getRegionSplitEditsPath(TableName tableName, byte[] encodedRegionNam
195196
// region's replayRecoveredEdits will not delete it
196197
String fileName = formatRecoveredEditsFileName(seqId);
197198
fileName =
198-
getTmpRecoveredEditsFileName(fileName + "-" + fileNameBeingSplit + "-" + workerStartCode);
199+
getTmpRecoveredEditsFileName(fileName + "-" + fileNameBeingSplit + "-" + workerNameComponent);
199200
return new Path(dir, fileName);
200201
}
201202

hbase-server/src/test/java/org/apache/hadoop/hbase/wal/TestWALSplit.java

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@
6666
import org.apache.hadoop.hbase.coordination.SplitLogWorkerCoordination;
6767
import org.apache.hadoop.hbase.master.SplitLogManager;
6868
import org.apache.hadoop.hbase.regionserver.HRegion;
69+
import org.apache.hadoop.hbase.regionserver.LastSequenceId;
70+
import org.apache.hadoop.hbase.regionserver.RegionServerServices;
6971
import org.apache.hadoop.hbase.regionserver.wal.AbstractProtobufWALReader;
7072
import org.apache.hadoop.hbase.regionserver.wal.FaultyProtobufWALStreamReader;
7173
import org.apache.hadoop.hbase.regionserver.wal.InstrumentedLogWriter;
@@ -105,6 +107,7 @@
105107
import org.apache.hbase.thirdparty.com.google.protobuf.ByteString;
106108

107109
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
110+
import org.apache.hadoop.hbase.shaded.protobuf.generated.ClusterStatusProtos;
108111
import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos;
109112

110113
/**
@@ -372,6 +375,69 @@ private void loop(final Writer writer) {
372375
}
373376
}
374377

378+
//If another worker is assigned to split a WAl and last worker is still running, both should not impact each other's progress
379+
@Test
380+
public void testTwoWorkerSplittingSameWAL() throws IOException, InterruptedException {
381+
int numWriter = 1, entries = 10;
382+
generateWALs(numWriter, entries, -1, 0);
383+
FileStatus logfile = fs.listStatus(WALDIR)[0];
384+
FileSystem spiedFs = Mockito.spy(fs);
385+
RegionServerServices zombieRSServices = Mockito.mock(RegionServerServices.class);
386+
RegionServerServices newWorkerRSServices = Mockito.mock(RegionServerServices.class);
387+
Mockito.when(zombieRSServices.getServerName())
388+
.thenReturn(ServerName.valueOf("zombie-rs.abc.com,1234,1234567890"));
389+
Mockito.when(newWorkerRSServices.getServerName())
390+
.thenReturn(ServerName.valueOf("worker-rs.abc.com,1234,1234569870"));
391+
Thread zombieWorker = new SplitWALWorker(logfile, spiedFs, zombieRSServices);
392+
Thread newWorker = new SplitWALWorker(logfile, spiedFs, newWorkerRSServices);
393+
zombieWorker.start();
394+
newWorker.start();
395+
newWorker.join();
396+
zombieWorker.join();
397+
398+
for (String region : REGIONS) {
399+
Path[] logfiles = getLogForRegion(TABLE_NAME, region);
400+
assertEquals("wrong number of split files for region", numWriter, logfiles.length);
401+
402+
int count = 0;
403+
for (Path lf : logfiles) {
404+
count += countWAL(lf);
405+
}
406+
assertEquals("wrong number of edits for region " + region, entries, count);
407+
}
408+
}
409+
410+
private class SplitWALWorker extends Thread implements LastSequenceId {
411+
final FileStatus logfile;
412+
final FileSystem fs;
413+
final RegionServerServices rsServices;
414+
415+
public SplitWALWorker(FileStatus logfile, FileSystem fs, RegionServerServices rsServices) {
416+
super(rsServices.getServerName().toShortString());
417+
setDaemon(true);
418+
this.fs = fs;
419+
this.logfile = logfile;
420+
this.rsServices = rsServices;
421+
}
422+
423+
@Override
424+
public void run() {
425+
try {
426+
boolean ret =
427+
WALSplitter.splitLogFile(HBASEDIR, logfile, fs, conf, null, this, null, wals, rsServices);
428+
assertTrue("Both splitting should pass", ret);
429+
} catch (IOException e) {
430+
LOG.warn(getName() + " Worker exiting " + e);
431+
}
432+
}
433+
434+
@Override
435+
public ClusterStatusProtos.RegionStoreSequenceIds getLastSequenceId(byte[] encodedRegionName) {
436+
return ClusterStatusProtos.RegionStoreSequenceIds.newBuilder()
437+
.setLastFlushedSequenceId(HConstants.NO_SEQNUM).build();
438+
}
439+
}
440+
375441
/**
376442
* @see "https://issues.apache.org/jira/browse/HBASE-3020"
377443
*/
@@ -403,7 +469,7 @@ public void testOldRecoveredEditsFileSidelined() throws IOException {
403469
private Path createRecoveredEditsPathForRegion() throws IOException {
404470
byte[] encoded = RegionInfoBuilder.FIRST_META_REGIONINFO.getEncodedNameAsBytes();
405471
Path p = WALSplitUtil.getRegionSplitEditsPath(TableName.META_TABLE_NAME, encoded, 1,
406-
FILENAME_BEING_SPLIT, TMPDIRNAME, conf, 0L);
472+
FILENAME_BEING_SPLIT, TMPDIRNAME, conf, "");
407473
return p;
408474
}
409475

0 commit comments

Comments
 (0)