@@ -122,7 +122,7 @@ enum RunningState {
122
122
private final DataNode dn ;
123
123
private final DNConf dnConf ;
124
124
private long prevBlockReportId ;
125
- private long fullBlockReportLeaseId ;
125
+ private volatile long fullBlockReportLeaseId ;
126
126
private final SortedSet <Integer > blockReportSizes =
127
127
Collections .synchronizedSortedSet (new TreeSet <>());
128
128
private final int maxDataLength ;
@@ -483,6 +483,8 @@ List<DatanodeCommand> blockReport(long fullBrLeaseId) throws IOException {
483
483
(nCmds + " commands: " + Joiner .on ("; " ).join (cmds )))) +
484
484
"." );
485
485
}
486
+ scheduler .updateLastBlockReportTime (monotonicNow ());
487
+ scheduler .scheduleNextBlockReport ();
486
488
return cmds .size () == 0 ? null : cmds ;
487
489
}
488
490
@@ -770,10 +772,7 @@ private void offerService() throws Exception {
770
772
LOG .info ("Forcing a full block report to " + nnAddr );
771
773
}
772
774
if ((fullBlockReportLeaseId != 0 ) || forceFullBr ) {
773
- fbrExecutorService .submit (new FBRTaskHandler (fullBlockReportLeaseId ));
774
- fullBlockReportLeaseId = 0 ;
775
- scheduler .updateLastBlockReportTime (monotonicNow ());
776
- scheduler .scheduleNextBlockReport ();
775
+ fbrExecutorService .submit (new FBRTaskHandler ());
777
776
}
778
777
779
778
if (!dn .areCacheReportsDisabledForTests ()) {
@@ -971,20 +970,25 @@ void reportRemoteBadBlock(DatanodeInfo dnInfo, ExtendedBlock block)
971
970
972
971
void reRegister () throws IOException {
973
972
if (shouldRun ()) {
974
- // re-retrieve namespace info to make sure that, if the NN
975
- // was restarted, we still match its version (HDFS-2120)
976
- NamespaceInfo nsInfo = retrieveNamespaceInfo ();
977
- // HDFS-9917,Standby NN IBR can be very huge if standby namenode is down
978
- // for sometime.
979
- if (state == HAServiceState .STANDBY || state == HAServiceState .OBSERVER ) {
980
- ibrManager .clearIBRs ();
973
+ if (scheduler .shouldReRegister ()) {
974
+ // re-retrieve namespace info to make sure that, if the NN
975
+ // was restarted, we still match its version (HDFS-2120)
976
+ NamespaceInfo nsInfo = retrieveNamespaceInfo ();
977
+ // HDFS-9917,Standby NN IBR can be very huge if standby namenode is down
978
+ // for sometime.
979
+ if (state == HAServiceState .STANDBY || state == HAServiceState .OBSERVER ) {
980
+ ibrManager .clearIBRs ();
981
+ }
982
+ // HDFS-15113, register and trigger FBR after clean IBR to avoid missing
983
+ // some blocks report to Standby util next FBR.
984
+ // and re-register
985
+ register (nsInfo );
986
+ scheduler .setReRegisterTime (monotonicNow ());
987
+ scheduler .scheduleHeartbeat ();
988
+ DataNodeFaultInjector .get ().blockUtilSendFullBlockReport ();
989
+ } else {
990
+ LOG .info ("DNA_REGISTER execution interval is too short. Skip." );
981
991
}
982
- // HDFS-15113, register and trigger FBR after clean IBR to avoid missing
983
- // some blocks report to Standby util next FBR.
984
- // and re-register
985
- register (nsInfo );
986
- scheduler .scheduleHeartbeat ();
987
- DataNodeFaultInjector .get ().blockUtilSendFullBlockReport ();
988
992
}
989
993
}
990
994
@@ -1195,10 +1199,7 @@ public void run() {
1195
1199
1196
1200
final class FBRTaskHandler implements Runnable {
1197
1201
1198
- private long fullBlockReportLeaseId ;
1199
-
1200
- private FBRTaskHandler (long fullBlockReportLeaseId ) {
1201
- this .fullBlockReportLeaseId = fullBlockReportLeaseId ;
1202
+ private FBRTaskHandler () {
1202
1203
}
1203
1204
1204
1205
@ Override
@@ -1207,10 +1208,12 @@ public void run() {
1207
1208
List <DatanodeCommand > cmds = null ;
1208
1209
try {
1209
1210
synchronized (sendBRLock ) {
1210
- cmds = blockReport (this . fullBlockReportLeaseId );
1211
+ cmds = blockReport (fullBlockReportLeaseId );
1211
1212
}
1213
+ fullBlockReportLeaseId = 0 ;
1212
1214
commandProcessingThread .enqueue (cmds );
1213
1215
} catch (Throwable t ) {
1216
+ fullBlockReportLeaseId = 0 ;
1214
1217
LOG .warn ("InterruptedException in FBR Task Handler." , t );
1215
1218
sleepAndLogInterrupts (5000 , "offering FBR service" );
1216
1219
synchronized (ibrManager ) {
@@ -1260,6 +1263,7 @@ static class Scheduler {
1260
1263
private final long lifelineIntervalMs ;
1261
1264
private volatile long blockReportIntervalMs ;
1262
1265
private volatile long outliersReportIntervalMs ;
1266
+ private long reRegisterTime = 0 ;
1263
1267
1264
1268
Scheduler (long heartbeatIntervalMs , long lifelineIntervalMs ,
1265
1269
long blockReportIntervalMs , long outliersReportIntervalMs ) {
@@ -1445,6 +1449,14 @@ long getOutliersReportIntervalMs() {
1445
1449
return this .outliersReportIntervalMs ;
1446
1450
}
1447
1451
1452
+ private boolean shouldReRegister () {
1453
+ return monotonicNow () - reRegisterTime > this .heartbeatIntervalMs * 3 ;
1454
+ }
1455
+
1456
+ public void setReRegisterTime (long reRegisterTime ) {
1457
+ this .reRegisterTime = reRegisterTime ;
1458
+ }
1459
+
1448
1460
/**
1449
1461
* Wrapped for testing.
1450
1462
* @return
0 commit comments