26
26
import com .google .errorprone .annotations .RestrictedApi ;
27
27
import com .google .protobuf .Descriptors ;
28
28
import com .google .protobuf .Service ;
29
+ import io .opentelemetry .api .trace .Span ;
30
+ import io .opentelemetry .api .trace .StatusCode ;
31
+ import io .opentelemetry .context .Scope ;
29
32
import java .io .IOException ;
30
33
import java .io .InterruptedIOException ;
31
34
import java .lang .reflect .Constructor ;
213
216
import org .apache .hadoop .hbase .security .AccessDeniedException ;
214
217
import org .apache .hadoop .hbase .security .SecurityConstants ;
215
218
import org .apache .hadoop .hbase .security .UserProvider ;
219
+ import org .apache .hadoop .hbase .trace .TraceUtil ;
216
220
import org .apache .hadoop .hbase .util .Addressing ;
217
221
import org .apache .hadoop .hbase .util .Bytes ;
218
222
import org .apache .hadoop .hbase .util .EnvironmentEdgeManager ;
@@ -452,7 +456,8 @@ public class HMaster extends HRegionServer implements MasterServices {
452
456
*/
453
457
public HMaster (final Configuration conf ) throws IOException {
454
458
super (conf );
455
- try {
459
+ final Span span = TraceUtil .createSpan ("HMaster.cxtor" );
460
+ try (Scope ignored = span .makeCurrent ()) {
456
461
if (conf .getBoolean (MAINTENANCE_MODE , false )) {
457
462
LOG .info ("Detected {}=true via configuration." , MAINTENANCE_MODE );
458
463
maintenanceMode = true ;
@@ -513,11 +518,15 @@ public HMaster(final Configuration conf) throws IOException {
513
518
cachedClusterId = new CachedClusterId (this , conf );
514
519
515
520
this .regionServerTracker = new RegionServerTracker (zooKeeper , this );
521
+ span .setStatus (StatusCode .OK );
516
522
} catch (Throwable t ) {
517
523
// Make sure we log the exception. HMaster is often started via reflection and the
518
524
// cause of failed startup is lost.
525
+ TraceUtil .setError (span , t );
519
526
LOG .error ("Failed construction of Master" , t );
520
527
throw t ;
528
+ } finally {
529
+ span .end ();
521
530
}
522
531
}
523
532
@@ -540,7 +549,7 @@ protected String getUseThisHostnameInstead(Configuration conf) {
540
549
@ Override
541
550
public void run () {
542
551
try {
543
- Threads .setDaemonThreadRunning (new Thread (() -> {
552
+ Threads .setDaemonThreadRunning (new Thread (() -> TraceUtil . trace (() -> {
544
553
try {
545
554
int infoPort = putUpJettyServer ();
546
555
startActiveMasterManager (infoPort );
@@ -553,23 +562,29 @@ public void run() {
553
562
abort (error , t );
554
563
}
555
564
}
556
- }), getName () + ":becomeActiveMaster" );
565
+ }, "HMaster.becomeActiveMaster" ) ), getName () + ":becomeActiveMaster" );
557
566
// Fall in here even if we have been aborted. Need to run the shutdown services and
558
567
// the super run call will do this for us.
559
568
super .run ();
560
569
} finally {
561
- if (this .clusterSchemaService != null ) {
562
- // If on way out, then we are no longer active master.
563
- this .clusterSchemaService .stopAsync ();
564
- try {
565
- this .clusterSchemaService
566
- .awaitTerminated (getConfiguration ().getInt (HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS ,
567
- DEFAULT_HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS ), TimeUnit .SECONDS );
568
- } catch (TimeoutException te ) {
569
- LOG .warn ("Failed shutdown of clusterSchemaService" , te );
570
+ final Span span = TraceUtil .createSpan ("HMaster exiting main loop" );
571
+ try (Scope ignored = span .makeCurrent ()) {
572
+ if (this .clusterSchemaService != null ) {
573
+ // If on way out, then we are no longer active master.
574
+ this .clusterSchemaService .stopAsync ();
575
+ try {
576
+ this .clusterSchemaService
577
+ .awaitTerminated (getConfiguration ().getInt (HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS ,
578
+ DEFAULT_HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS ), TimeUnit .SECONDS );
579
+ } catch (TimeoutException te ) {
580
+ LOG .warn ("Failed shutdown of clusterSchemaService" , te );
581
+ }
570
582
}
583
+ this .activeMaster = false ;
584
+ span .setStatus (StatusCode .OK );
585
+ } finally {
586
+ span .end ();
571
587
}
572
- this .activeMaster = false ;
573
588
}
574
589
}
575
590
@@ -3094,36 +3109,38 @@ public MemoryBoundedLogMessageBuffer getRegionServerFatalLogBuffer() {
3094
3109
* Shutdown the cluster. Master runs a coordinated stop of all RegionServers and then itself.
3095
3110
*/
3096
3111
public void shutdown () throws IOException {
3097
- if (cpHost != null ) {
3098
- cpHost .preShutdown ();
3099
- }
3112
+ TraceUtil .trace (() -> {
3113
+ if (cpHost != null ) {
3114
+ cpHost .preShutdown ();
3115
+ }
3100
3116
3101
- // Tell the servermanager cluster shutdown has been called. This makes it so when Master is
3102
- // last running server, it'll stop itself. Next, we broadcast the cluster shutdown by setting
3103
- // the cluster status as down. RegionServers will notice this change in state and will start
3104
- // shutting themselves down. When last has exited, Master can go down.
3105
- if (this .serverManager != null ) {
3106
- this .serverManager .shutdownCluster ();
3107
- }
3108
- if (this .clusterStatusTracker != null ) {
3109
- try {
3110
- this .clusterStatusTracker .setClusterDown ();
3111
- } catch (KeeperException e ) {
3112
- LOG .error ("ZooKeeper exception trying to set cluster as down in ZK" , e );
3117
+ // Tell the servermanager cluster shutdown has been called. This makes it so when Master is
3118
+ // last running server, it'll stop itself. Next, we broadcast the cluster shutdown by setting
3119
+ // the cluster status as down. RegionServers will notice this change in state and will start
3120
+ // shutting themselves down. When last has exited, Master can go down.
3121
+ if (this .serverManager != null ) {
3122
+ this .serverManager .shutdownCluster ();
3113
3123
}
3114
- }
3115
- // Stop the procedure executor. Will stop any ongoing assign, unassign, server crash etc.,
3116
- // processing so we can go down.
3117
- if (this .procedureExecutor != null ) {
3118
- this .procedureExecutor .stop ();
3119
- }
3120
- // Shutdown our cluster connection. This will kill any hosted RPCs that might be going on;
3121
- // this is what we want especially if the Master is in startup phase doing call outs to
3122
- // hbase:meta, etc. when cluster is down. Without ths connection close, we'd have to wait on
3123
- // the rpc to timeout.
3124
- if (this .clusterConnection != null ) {
3125
- this .clusterConnection .close ();
3126
- }
3124
+ if (this .clusterStatusTracker != null ) {
3125
+ try {
3126
+ this .clusterStatusTracker .setClusterDown ();
3127
+ } catch (KeeperException e ) {
3128
+ LOG .error ("ZooKeeper exception trying to set cluster as down in ZK" , e );
3129
+ }
3130
+ }
3131
+ // Stop the procedure executor. Will stop any ongoing assign, unassign, server crash etc.,
3132
+ // processing so we can go down.
3133
+ if (this .procedureExecutor != null ) {
3134
+ this .procedureExecutor .stop ();
3135
+ }
3136
+ // Shutdown our cluster connection. This will kill any hosted RPCs that might be going on;
3137
+ // this is what we want especially if the Master is in startup phase doing call outs to
3138
+ // hbase:meta, etc. when cluster is down. Without ths connection close, we'd have to wait on
3139
+ // the rpc to timeout.
3140
+ if (this .clusterConnection != null ) {
3141
+ this .clusterConnection .close ();
3142
+ }
3143
+ }, "HMaster.shutdown" );
3127
3144
}
3128
3145
3129
3146
public void stopMaster () throws IOException {
0 commit comments