Skip to content

Commit 6858269

Browse files
Merge pull request #228 from eggfoobar/leader_election_config
Bug 2048563: feat added leader election conventions
2 parents 6182cf6 + b2d3ba8 commit 6858269

File tree

4 files changed

+197
-20
lines changed

4 files changed

+197
-20
lines changed

cmd/package-server-manager/main.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"sigs.k8s.io/controller-runtime/pkg/log/zap"
1818
"sigs.k8s.io/controller-runtime/pkg/manager"
1919

20+
"github.com/openshift/operator-framework-olm/pkg/leaderelection"
2021
controllers "github.com/openshift/operator-framework-olm/pkg/package-server-manager"
2122
//+kubebuilder:scaffold:imports
2223
)
@@ -59,17 +60,20 @@ func run(cmd *cobra.Command, args []string) error {
5960
ctrl.SetLogger(zap.New(zap.UseDevMode(true)))
6061
setupLog := ctrl.Log.WithName("setup")
6162

63+
restConfig := ctrl.GetConfigOrDie()
64+
le := leaderelection.GetLeaderElectionConfig(setupLog, restConfig, !disableLeaderElection)
65+
6266
packageserverCSVFields := fields.Set{"metadata.name": name}
63-
mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), manager.Options{
67+
mgr, err := ctrl.NewManager(restConfig, manager.Options{
6468
Scheme: setupScheme(),
6569
Namespace: namespace,
6670
MetricsBindAddress: defaultMetricsPort,
6771
LeaderElection: !disableLeaderElection,
6872
LeaderElectionNamespace: namespace,
6973
LeaderElectionID: leaderElectionConfigmapName,
70-
RetryPeriod: timeDurationPtr(defaultRetryPeriod),
71-
RenewDeadline: timeDurationPtr(defaultRenewDeadline),
72-
LeaseDuration: timeDurationPtr(defaultLeaseDuration),
74+
LeaseDuration: &le.LeaseDuration.Duration,
75+
RenewDeadline: &le.RenewDeadline.Duration,
76+
RetryPeriod: &le.RetryPeriod.Duration,
7377
HealthProbeBindAddress: healthCheckAddr,
7478
NewCache: cache.BuilderWithOptions(cache.Options{
7579
SelectorsByObject: cache.SelectorsByObject{

cmd/package-server-manager/util.go

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
package main
22

33
import (
4-
"time"
5-
64
configv1 "github.com/openshift/api/config/v1"
75
olmv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1"
86

@@ -11,20 +9,6 @@ import (
119
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
1210
)
1311

14-
const (
15-
// Note: In order for SNO to GA, controllers need to handle ~60s of API server
16-
// disruptions when attempting to get and sustain leader election:
17-
// - https://github.com/openshift/library-go/pull/1104#discussion_r649313822
18-
// - https://bugzilla.redhat.com/show_bug.cgi?id=1985697
19-
defaultRetryPeriod = 30 * time.Second
20-
defaultRenewDeadline = 60 * time.Second
21-
defaultLeaseDuration = 90 * time.Second
22-
)
23-
24-
func timeDurationPtr(t time.Duration) *time.Duration {
25-
return &t
26-
}
27-
2812
func setupScheme() *runtime.Scheme {
2913
scheme := runtime.NewScheme()
3014
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
package leaderelection
2+
3+
import (
4+
"context"
5+
"time"
6+
7+
"github.com/go-logr/logr"
8+
9+
configv1 "github.com/openshift/api/config/v1"
10+
11+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
12+
"k8s.io/apimachinery/pkg/types"
13+
"k8s.io/client-go/rest"
14+
"sigs.k8s.io/controller-runtime/pkg/client"
15+
)
16+
17+
const (
18+
infraResourceName = "cluster"
19+
20+
// Defaults follow conventions
21+
// https://github.com/openshift/enhancements/blob/master/CONVENTIONS.md#high-availability
22+
// Impl Calculations: https://github.com/openshift/library-go/commit/7e7d216ed91c3119800219c9194e5e57113d059a
23+
defaultLeaseDuration = 137 * time.Second
24+
defaultRenewDeadline = 107 * time.Second
25+
defaultRetryPeriod = 26 * time.Second
26+
27+
// Default leader election for SNO environments
28+
// Impl Calculations:
29+
// https://github.com/openshift/library-go/commit/2612981f3019479805ac8448b997266fc07a236a#diff-61dd95c7fd45fa18038e825205fbfab8a803f1970068157608b6b1e9e6c27248R127
30+
defaultSingleNodeLeaseDuration = 270 * time.Second
31+
defaultSingleNodeRenewDeadline = 240 * time.Second
32+
defaultSingleNodeRetryPeriod = 60 * time.Second
33+
)
34+
35+
var (
36+
defaultLeaderElectionConfig = configv1.LeaderElection{
37+
LeaseDuration: metav1.Duration{Duration: defaultLeaseDuration},
38+
RenewDeadline: metav1.Duration{Duration: defaultRenewDeadline},
39+
RetryPeriod: metav1.Duration{Duration: defaultRetryPeriod},
40+
}
41+
)
42+
43+
func GetLeaderElectionConfig(log logr.Logger, restConfig *rest.Config, enabled bool) (defaultConfig configv1.LeaderElection) {
44+
client, err := client.New(restConfig, client.Options{})
45+
if err != nil {
46+
log.Error(err, "unable to create client, using HA cluster values for leader election")
47+
return defaultLeaderElectionConfig
48+
}
49+
configv1.AddToScheme(client.Scheme())
50+
return getLeaderElectionConfig(log, client, enabled)
51+
}
52+
53+
func getLeaderElectionConfig(log logr.Logger, client client.Client, enabled bool) (config configv1.LeaderElection) {
54+
config = defaultLeaderElectionConfig
55+
config.Disable = !enabled
56+
if enabled {
57+
ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(time.Second*3))
58+
defer cancel()
59+
infra, err := getClusterInfraStatus(ctx, client)
60+
if err != nil {
61+
log.Error(err, "unable to get cluster infrastructure status, using HA cluster values for leader election")
62+
return
63+
}
64+
if infra != nil && infra.ControlPlaneTopology == configv1.SingleReplicaTopologyMode {
65+
return leaderElectionSNOConfig(config)
66+
}
67+
}
68+
return
69+
}
70+
71+
func leaderElectionSNOConfig(config configv1.LeaderElection) configv1.LeaderElection {
72+
ret := *(&config).DeepCopy()
73+
ret.LeaseDuration.Duration = defaultSingleNodeLeaseDuration
74+
ret.RenewDeadline.Duration = defaultSingleNodeRenewDeadline
75+
ret.RetryPeriod.Duration = defaultSingleNodeRetryPeriod
76+
return ret
77+
}
78+
79+
// Retrieve the cluster status, used to determine if we should use different leader election.
80+
func getClusterInfraStatus(ctx context.Context, client client.Client) (*configv1.InfrastructureStatus, error) {
81+
infra := &configv1.Infrastructure{}
82+
err := client.Get(ctx, types.NamespacedName{Name: infraResourceName}, infra)
83+
if err != nil {
84+
return nil, err
85+
}
86+
return &infra.Status, nil
87+
}
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
package leaderelection
2+
3+
import (
4+
"reflect"
5+
"testing"
6+
7+
configv1 "github.com/openshift/api/config/v1"
8+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
9+
"k8s.io/apimachinery/pkg/runtime"
10+
11+
ctrl "sigs.k8s.io/controller-runtime"
12+
"sigs.k8s.io/controller-runtime/pkg/client/fake"
13+
)
14+
15+
func TestGetLeaderElectionConfig(t *testing.T) {
16+
sch := runtime.NewScheme()
17+
configv1.AddToScheme(sch)
18+
testCases := []struct {
19+
desc string
20+
enabled bool
21+
clusterInfra configv1.Infrastructure
22+
expected configv1.LeaderElection
23+
}{
24+
{
25+
desc: "single node leader election values when ControlPlaneTopology is SingleReplicaTopologyMode",
26+
enabled: true,
27+
clusterInfra: configv1.Infrastructure{
28+
ObjectMeta: metav1.ObjectMeta{Name: infraResourceName},
29+
Status: configv1.InfrastructureStatus{
30+
ControlPlaneTopology: configv1.SingleReplicaTopologyMode,
31+
}},
32+
expected: configv1.LeaderElection{
33+
Disable: false,
34+
LeaseDuration: metav1.Duration{
35+
Duration: defaultSingleNodeLeaseDuration,
36+
},
37+
RenewDeadline: metav1.Duration{
38+
Duration: defaultSingleNodeRenewDeadline,
39+
},
40+
RetryPeriod: metav1.Duration{
41+
Duration: defaultSingleNodeRetryPeriod,
42+
},
43+
},
44+
},
45+
{
46+
desc: "ha leader election values when ControlPlaneTopology is HighlyAvailableTopologyMode",
47+
enabled: true,
48+
clusterInfra: configv1.Infrastructure{
49+
ObjectMeta: metav1.ObjectMeta{Name: infraResourceName},
50+
Status: configv1.InfrastructureStatus{
51+
ControlPlaneTopology: configv1.HighlyAvailableTopologyMode,
52+
}},
53+
expected: configv1.LeaderElection{
54+
Disable: false,
55+
LeaseDuration: metav1.Duration{
56+
Duration: defaultLeaseDuration,
57+
},
58+
RenewDeadline: metav1.Duration{
59+
Duration: defaultRenewDeadline,
60+
},
61+
RetryPeriod: metav1.Duration{
62+
Duration: defaultRetryPeriod,
63+
},
64+
},
65+
},
66+
{
67+
desc: "when disabled the default HA values should be returned",
68+
enabled: false,
69+
clusterInfra: configv1.Infrastructure{
70+
ObjectMeta: metav1.ObjectMeta{Name: infraResourceName},
71+
Status: configv1.InfrastructureStatus{
72+
ControlPlaneTopology: configv1.SingleReplicaTopologyMode,
73+
}},
74+
expected: configv1.LeaderElection{
75+
Disable: true,
76+
LeaseDuration: metav1.Duration{
77+
Duration: defaultLeaseDuration,
78+
},
79+
RenewDeadline: metav1.Duration{
80+
Duration: defaultRenewDeadline,
81+
},
82+
RetryPeriod: metav1.Duration{
83+
Duration: defaultRetryPeriod,
84+
},
85+
},
86+
},
87+
}
88+
89+
for _, tC := range testCases {
90+
t.Run(tC.desc, func(t *testing.T) {
91+
client := fake.NewClientBuilder().
92+
WithRuntimeObjects(&tC.clusterInfra).WithScheme(sch).Build()
93+
94+
setupLog := ctrl.Log.WithName("leaderelection_config_testing")
95+
96+
result := getLeaderElectionConfig(setupLog, client, tC.enabled)
97+
if !reflect.DeepEqual(result, tC.expected) {
98+
t.Errorf("expected %+v but got %+v", tC.expected, result)
99+
}
100+
})
101+
}
102+
}

0 commit comments

Comments
 (0)