Skip to content

Commit d0b2c58

Browse files
committed
HDFS-12516. Suppress the fsnamesystem lock warning on nn startup. Contributed by Ajay Kumar.
1 parent 415e5a1 commit d0b2c58

File tree

3 files changed

+76
-5
lines changed

3 files changed

+76
-5
lines changed

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1095,7 +1095,7 @@ private void loadFSImage(StartupOption startOpt) throws IOException {
10951095
if (!success) {
10961096
fsImage.close();
10971097
}
1098-
writeUnlock("loadFSImage");
1098+
writeUnlock("loadFSImage", true);
10991099
}
11001100
imageLoadComplete();
11011101
}
@@ -1586,6 +1586,11 @@ public void writeUnlock() {
15861586
public void writeUnlock(String opName) {
15871587
this.fsLock.writeUnlock(opName);
15881588
}
1589+
1590+
public void writeUnlock(String opName, boolean suppressWriteLockReport) {
1591+
this.fsLock.writeUnlock(opName, suppressWriteLockReport);
1592+
}
1593+
15891594
@Override
15901595
public boolean hasWriteLock() {
15911596
return this.fsLock.isWriteLockedByCurrentThread();

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -207,13 +207,34 @@ public void writeLockInterruptibly() throws InterruptedException {
207207
}
208208
}
209209

210+
/**
211+
* Unlocks FSNameSystem write lock. This internally calls {@link
212+
* FSNamesystemLock#writeUnlock(String, boolean)}
213+
*/
210214
public void writeUnlock() {
211-
writeUnlock(OP_NAME_OTHER);
215+
writeUnlock(OP_NAME_OTHER, false);
212216
}
213217

218+
/**
219+
* Unlocks FSNameSystem write lock. This internally calls {@link
220+
* FSNamesystemLock#writeUnlock(String, boolean)}
221+
*
222+
* @param opName Operation name.
223+
*/
214224
public void writeUnlock(String opName) {
215-
final boolean needReport = coarseLock.getWriteHoldCount() == 1 &&
216-
coarseLock.isWriteLockedByCurrentThread();
225+
writeUnlock(opName, false);
226+
}
227+
228+
/**
229+
* Unlocks FSNameSystem write lock.
230+
*
231+
* @param opName Operation name
232+
* @param suppressWriteLockReport When false, event of write lock being held
233+
* for long time will be logged in logs and metrics.
234+
*/
235+
public void writeUnlock(String opName, boolean suppressWriteLockReport) {
236+
final boolean needReport = !suppressWriteLockReport && coarseLock
237+
.getWriteHoldCount() == 1 && coarseLock.isWriteLockedByCurrentThread();
217238
final long currentTimeNanos = timer.monotonicNowNanos();
218239
final long writeLockIntervalNanos =
219240
currentTimeNanos - writeLockHeldTimeStampNanos;

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemLock.java

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import java.util.concurrent.TimeUnit;
3939
import java.util.concurrent.TimeoutException;
4040
import java.util.regex.Pattern;
41+
import org.slf4j.LoggerFactory;
4142

4243
import static org.junit.Assert.*;
4344
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_FSLOCK_FAIR_KEY;
@@ -347,7 +348,7 @@ public void testDetailedHoldMetrics() throws Exception {
347348

348349
fsLock.writeLock();
349350
timer.advance(1);
350-
fsLock.writeUnlock("baz");
351+
fsLock.writeUnlock("baz", false);
351352

352353
MetricsRecordBuilder rb = MetricsAsserts.mockMetricsRecordBuilder();
353354
rates.snapshot(rb, true);
@@ -360,4 +361,48 @@ public void testDetailedHoldMetrics() throws Exception {
360361
assertCounter("FSNWriteLockBazNanosNumOps", 1L, rb);
361362
}
362363

364+
/**
365+
* Test to suppress FSNameSystem write lock report when it is held for long
366+
* time.
367+
*/
368+
@Test(timeout = 45000)
369+
public void testFSWriteLockReportSuppressed() throws Exception {
370+
final long writeLockReportingThreshold = 1L;
371+
final long writeLockSuppressWarningInterval = 10L;
372+
Configuration conf = new Configuration();
373+
conf.setLong(
374+
DFSConfigKeys.DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_KEY,
375+
writeLockReportingThreshold);
376+
conf.setTimeDuration(DFSConfigKeys.DFS_LOCK_SUPPRESS_WARNING_INTERVAL_KEY,
377+
writeLockSuppressWarningInterval, TimeUnit.MILLISECONDS);
378+
379+
final FakeTimer timer = new FakeTimer();
380+
final FSNamesystemLock fsnLock = new FSNamesystemLock(conf, null, timer);
381+
timer.advance(writeLockSuppressWarningInterval);
382+
383+
LogCapturer logs = LogCapturer.captureLogs(FSNamesystem.LOG);
384+
GenericTestUtils
385+
.setLogLevel(LoggerFactory.getLogger(FSNamesystem.class.getName()),
386+
org.slf4j.event.Level.INFO);
387+
388+
// Should trigger the write lock report
389+
fsnLock.writeLock();
390+
timer.advance(writeLockReportingThreshold + 100);
391+
fsnLock.writeUnlock();
392+
assertTrue(logs.getOutput().contains(
393+
"FSNamesystem write lock held for"));
394+
395+
logs.clearOutput();
396+
397+
// Suppress report if the write lock is held for a long time
398+
fsnLock.writeLock();
399+
timer.advance(writeLockReportingThreshold + 100);
400+
fsnLock.writeUnlock("testFSWriteLockReportSuppressed", true);
401+
assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName()));
402+
assertFalse(logs.getOutput().contains(
403+
"Number of suppressed write-lock reports:"));
404+
assertFalse(logs.getOutput().contains(
405+
"FSNamesystem write lock held for"));
406+
}
407+
363408
}

0 commit comments

Comments
 (0)