@@ -491,7 +491,8 @@ func (i *Lifecycler) loop(ctx context.Context) error {
491
491
joined := false
492
492
// First, see if we exist in the cluster, update our state to match if we do,
493
493
// and add ourselves (without tokens) if we don't.
494
- if err := i .initRing (context .Background ()); err != nil {
494
+ addedInRing , err := i .initRing (context .Background ())
495
+ if err != nil {
495
496
return errors .Wrapf (err , "failed to join the ring %s" , i .RingName )
496
497
}
497
498
@@ -504,18 +505,23 @@ func (i *Lifecycler) loop(ctx context.Context) error {
504
505
}
505
506
506
507
var heartbeatTickerChan <- chan time.Time
507
- if uint64 (i .cfg .HeartbeatPeriod ) > 0 {
508
- heartbeatTicker := time .NewTicker (i .cfg .HeartbeatPeriod )
509
- heartbeatTicker .Stop ()
510
- // We are jittering for at least half of the time and max the time of the heartbeat.
511
- // If we jitter too soon, we can have problems of concurrency with autoJoin leaving the instance on ACTIVE without tokens
512
- time .AfterFunc (time .Duration (uint64 (i .cfg .HeartbeatPeriod / 2 )+ uint64 (mathrand .Int63 ())% uint64 (i .cfg .HeartbeatPeriod / 2 )), func () {
513
- i .heartbeat (ctx )
514
- heartbeatTicker .Reset (i .cfg .HeartbeatPeriod )
515
- })
516
- defer heartbeatTicker .Stop ()
517
-
518
- heartbeatTickerChan = heartbeatTicker .C
508
+ startHeartbeat := func () {
509
+ if uint64 (i .cfg .HeartbeatPeriod ) > 0 {
510
+ heartbeatTicker := time .NewTicker (i .cfg .HeartbeatPeriod )
511
+ heartbeatTicker .Stop ()
512
+ // We are jittering for at least half of the time and max the time of the heartbeat.
513
+ // If we jitter too soon, we can have problems of concurrency with autoJoin leaving the instance on ACTIVE without tokens
514
+ time .AfterFunc (time .Duration (uint64 (i .cfg .HeartbeatPeriod / 2 )+ uint64 (mathrand .Int63 ())% uint64 (i .cfg .HeartbeatPeriod / 2 )), func () {
515
+ i .heartbeat (ctx )
516
+ heartbeatTicker .Reset (i .cfg .HeartbeatPeriod )
517
+ })
518
+ defer heartbeatTicker .Stop ()
519
+
520
+ heartbeatTickerChan = heartbeatTicker .C
521
+ }
522
+ }
523
+ if addedInRing {
524
+ startHeartbeat ()
519
525
}
520
526
521
527
for {
@@ -536,17 +542,21 @@ func (i *Lifecycler) loop(ctx context.Context) error {
536
542
if i .cfg .ObservePeriod > 0 {
537
543
// let's observe the ring. By using JOINING state, this ingester will be ignored by LEAVING
538
544
// ingesters, but we also signal that it is not fully functional yet.
539
- if err := i .autoJoin (context .Background (), JOINING ); err != nil {
545
+ if err := i .autoJoin (context .Background (), JOINING , addedInRing ); err != nil {
540
546
return errors .Wrapf (err , "failed to pick tokens in the KV store, ring: %s" , i .RingName )
541
547
}
542
548
543
549
level .Info (i .logger ).Log ("msg" , "observing tokens before going ACTIVE" , "ring" , i .RingName )
544
550
observeChan = time .After (i .cfg .ObservePeriod )
545
551
} else {
546
- if err := i .autoJoin (context .Background (), i .getPreviousState ()); err != nil {
552
+ if err := i .autoJoin (context .Background (), i .getPreviousState (), addedInRing ); err != nil {
547
553
return errors .Wrapf (err , "failed to pick tokens in the KV store, ring: %s, state: %s" , i .RingName , i .getPreviousState ())
548
554
}
549
555
}
556
+
557
+ if ! addedInRing {
558
+ startHeartbeat ()
559
+ }
550
560
}
551
561
552
562
case <- observeChan :
@@ -565,6 +575,10 @@ func (i *Lifecycler) loop(ctx context.Context) error {
565
575
if err != nil {
566
576
level .Error (i .logger ).Log ("msg" , "failed to set state" , "ring" , i .RingName , "state" , i .getPreviousState (), "err" , err )
567
577
}
578
+
579
+ if ! addedInRing {
580
+ startHeartbeat ()
581
+ }
568
582
} else {
569
583
level .Info (i .logger ).Log ("msg" , "token verification failed, observing" , "ring" , i .RingName )
570
584
// keep observing
@@ -653,12 +667,13 @@ heartbeatLoop:
653
667
// initRing is the first thing we do when we start. It:
654
668
// - add an ingester entry to the ring
655
669
// - copies out our state and tokens if they exist
656
- func (i * Lifecycler ) initRing (ctx context.Context ) error {
670
+ func (i * Lifecycler ) initRing (ctx context.Context ) ( bool , error ) {
657
671
var (
658
672
ringDesc * Desc
659
673
tokensFromFile Tokens
660
674
err error
661
675
)
676
+ addedInRing := true
662
677
663
678
if i .cfg .TokensFilePath != "" {
664
679
tokenFile , err := i .loadTokenFile ()
@@ -692,10 +707,15 @@ func (i *Lifecycler) initRing(ctx context.Context) error {
692
707
level .Info (i .logger ).Log ("msg" , "adding tokens from file" , "num_tokens" , len (tokensFromFile ))
693
708
if len (tokensFromFile ) >= i .cfg .NumTokens && i .autoJoinOnStartup {
694
709
i .setState (i .getPreviousState ())
710
+ state := i .GetState ()
711
+ ringDesc .AddIngester (i .ID , i .Addr , i .Zone , tokensFromFile , state , registeredAt )
712
+ level .Info (i .logger ).Log ("msg" , "auto join on startup, adding with token and state" , "ring" , i .RingName , "state" , state )
713
+ return ringDesc , true , nil
695
714
}
696
- ringDesc .AddIngester (i .ID , i .Addr , i .Zone , tokensFromFile , i .GetState (), registeredAt )
697
715
i .setTokens (tokensFromFile )
698
- return ringDesc , true , nil
716
+ // Do not return ring to CAS call since instance has not been added to ring yet.
717
+ addedInRing = false
718
+ return nil , true , nil
699
719
}
700
720
701
721
// Either we are a new ingester, or consul must have restarted
@@ -760,7 +780,7 @@ func (i *Lifecycler) initRing(ctx context.Context) error {
760
780
i .updateCounters (ringDesc )
761
781
}
762
782
763
- return err
783
+ return addedInRing , err
764
784
}
765
785
766
786
func (i * Lifecycler ) RenewTokens (ratio float64 , ctx context.Context ) {
@@ -875,7 +895,7 @@ func (i *Lifecycler) compareTokens(fromRing Tokens) bool {
875
895
}
876
896
877
897
// autoJoin selects random tokens & moves state to targetState
878
- func (i * Lifecycler ) autoJoin (ctx context.Context , targetState InstanceState ) error {
898
+ func (i * Lifecycler ) autoJoin (ctx context.Context , targetState InstanceState , alreadyInRing bool ) error {
879
899
var ringDesc * Desc
880
900
881
901
err := i .KVStore .CAS (ctx , i .RingKey , func (in interface {}) (out interface {}, retry bool , err error ) {
@@ -890,11 +910,16 @@ func (i *Lifecycler) autoJoin(ctx context.Context, targetState InstanceState) er
890
910
// At this point, we should not have any tokens, and we should be in PENDING state.
891
911
// Need to make sure we didn't change the num of tokens configured
892
912
myTokens , _ := ringDesc .TokensFor (i .ID )
913
+ if ! alreadyInRing {
914
+ myTokens = i .getTokens ()
915
+ }
893
916
needTokens := i .cfg .NumTokens - len (myTokens )
894
917
895
918
if needTokens == 0 && myTokens .Equals (i .getTokens ()) {
896
919
// Tokens have been verified. No need to change them.
897
- ringDesc .AddIngester (i .ID , i .Addr , i .Zone , i .getTokens (), i .GetState (), i .getRegisteredAt ())
920
+ state := i .GetState ()
921
+ ringDesc .AddIngester (i .ID , i .Addr , i .Zone , i .getTokens (), state , i .getRegisteredAt ())
922
+ level .Info (i .logger ).Log ("msg" , "auto joined with existing tokens" , "ring" , i .RingName , "state" , state )
898
923
return ringDesc , true , nil
899
924
}
900
925
@@ -908,7 +933,9 @@ func (i *Lifecycler) autoJoin(ctx context.Context, targetState InstanceState) er
908
933
sort .Sort (myTokens )
909
934
i .setTokens (myTokens )
910
935
911
- ringDesc .AddIngester (i .ID , i .Addr , i .Zone , i .getTokens (), i .GetState (), i .getRegisteredAt ())
936
+ state := i .GetState ()
937
+ ringDesc .AddIngester (i .ID , i .Addr , i .Zone , i .getTokens (), state , i .getRegisteredAt ())
938
+ level .Info (i .logger ).Log ("msg" , "auto joined with new tokens" , "ring" , i .RingName , "state" , state )
912
939
913
940
return ringDesc , true , nil
914
941
})
0 commit comments