Skip to content

Commit

Permalink
Handle rolling update
Browse files Browse the repository at this point in the history
If the image required by the user changes, the operator will now start
rolling the upgrade to replicas.

After all the replicas are updated the master need to be updated, and
for that the user can choose between two behaviors, which are
specificable in Spec of the cluster:

- `switchover`: promote the replica with less data to receive or to
  reapply as the new master and then finish the upgrade;

- `wait`: wait for the user to require a switchover, and then finish the
  rolling update process.

In the cluster status we keep track of when we required a Pod to restart
to be updated, and use that one to check if the Pod have been really
updated or not.

Co-authored-by: Leonardo Cecchi <leonardo.cecchi@2ndquadrant.it>
Co-authored-by: Jonathan Gonzalez V <jonathan.gonzalez@2ndquadrant.com>
Co-authored-by: Marco Nenciarini <marco.nenciarini@2ndquadrant.it>
  • Loading branch information
3 people committed Mar 19, 2020
1 parent 4137ec0 commit 2361be6
Show file tree
Hide file tree
Showing 17 changed files with 535 additions and 152 deletions.
56 changes: 51 additions & 5 deletions api/v1alpha1/cluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,30 +81,55 @@ type ClusterSpec struct {
// Resources requirements of every generated Pod
// +optional
Resources corev1.ResourceRequirements `json:"resources,omitempty"`

// Whenever to wait for the user to issue a switchover request or directly
// switchover to another replica and then update the last instance
MasterUpdateStrategy MasterUpdateStrategy `json:"masterUpdateStrategy,omitempty"`
}

// ClusterStatus defines the observed state of Cluster
type ClusterStatus struct {
// Name of the image used
ImageName string `json:"imageName,omitempty"`

// Total number of instances in the cluster
Instances int32 `json:"instances,omitempty"`

// Total number of ready instances in the cluster
ReadyInstances int32 `json:"readyInstances,omitempty"`

// Total number of instances which are being updated
InstancesBeingUpdated int32 `json:"instancesBeingUpdated,omitempty"`

// ID of the latest generated node (used to avoid node name clashing)
LatestGeneratedNode int32 `json:"latestGeneratedNode,omitempty"`

// Name of the image used
ImageName string `json:"imageName,omitempty"`

// Current primary instance
CurrentPrimary string `json:"currentPrimary,omitempty"`

// Target primary instance, this is different from the previous one
// during a switchover or a failover
TargetPrimary string `json:"targetPrimary,omitempty"`

// ID of the latest generated node (used to avoid node name clashing)
LatestGeneratedNode int32 `json:"latestGeneratedNode,omitempty"`
// The status of the current updated servers
RollingUpdateStatus map[string]RollingUpdateStatus `json:"rollingUpdateStatus,omitempty"`
}

// MasterUpdateStrategy contains the strategy available to apply an update
// to the master server of the cluster
type MasterUpdateStrategy string

const (
// MasterUpdateStrategyWait means that the operator need to wait for the
// user to manually issue a switchover request before updating the master
// server
MasterUpdateStrategyWait = "wait"

// MasterUpdateStrategySwitchover means that the operator will switchover
// to another updated replica and then update the master server
MasterUpdateStrategySwitchover = "switchover"
)

// PostgresConfiguration defines the PostgreSQL configuration
type PostgresConfiguration struct {
// PostgreSQL configuration options (postgresql.conf)
Expand Down Expand Up @@ -155,6 +180,16 @@ type AffinityConfiguration struct {
TopologyKey string `json:"topologyKey"`
}

// RollingUpdateStatus contains the information about an instance which is
// being updated
type RollingUpdateStatus struct {
// The image which we put into the Pod
ImageName string `json:"imageName"`

// When the update has been started
StartedAt metav1.Time `json:"startedAt,omitempty"`
}

// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
// +kubebuilder:subresource:scale:specpath=.spec.instances,statuspath=.status.instances
Expand Down Expand Up @@ -247,6 +282,17 @@ func (cluster *Cluster) GetMaxStopDelay() int32 {
return 30
}

// GetMasterUpdateStrategy get the cluster master update strategy,
// defaulting to switchover
func (cluster *Cluster) GetMasterUpdateStrategy() MasterUpdateStrategy {
strategy := cluster.Spec.MasterUpdateStrategy
if strategy == "" {
return MasterUpdateStrategySwitchover
}

return strategy
}

func init() {
SchemeBuilder.Register(&Cluster{}, &ClusterList{})
}
17 changes: 17 additions & 0 deletions api/v1alpha1/cluster_types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,20 @@ var _ = Describe("Detect persistent storage", func() {
Expect(cluster.IsUsingPersistentStorage()).To(BeTrue())
})
})

var _ = Describe("Master update strategy", func() {
It("defaults to switchover", func() {
emptyCluster := Cluster{}
Expect(emptyCluster.GetMasterUpdateStrategy()).To(BeEquivalentTo(MasterUpdateStrategySwitchover))
})

It("respect the preference of the user", func() {
cluster := Cluster{
Spec: ClusterSpec{
Instances: 0,
MasterUpdateStrategy: MasterUpdateStrategyWait,
},
}
Expect(cluster.GetMasterUpdateStrategy()).To(BeEquivalentTo(MasterUpdateStrategyWait))
})
})
25 changes: 24 additions & 1 deletion api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 26 additions & 0 deletions config/crd/bases/postgresql.k8s.2ndq.io_clusters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ spec:
format: int32
minimum: 1
type: integer
masterUpdateStrategy:
description: Whenever to wait for the user to issue a switchover request
or directly switchover to another replica and then update the last
instance
type: string
postgresql:
description: Configuration of the PostgreSQL server
properties:
Expand Down Expand Up @@ -295,6 +300,10 @@ spec:
description: Total number of instances in the cluster
format: int32
type: integer
instancesBeingUpdated:
description: Total number of instances which are being updated
format: int32
type: integer
latestGeneratedNode:
description: ID of the latest generated node (used to avoid node name
clashing)
Expand All @@ -304,6 +313,23 @@ spec:
description: Total number of ready instances in the cluster
format: int32
type: integer
rollingUpdateStatus:
additionalProperties:
description: RollingUpdateStatus contains the information about an
instance which is being updated
properties:
imageName:
description: The image which we put into the Pod
type: string
startedAt:
description: When the update has been started
format: date-time
type: string
required:
- imageName
type: object
description: The status of the current updated servers
type: object
targetPrimary:
description: Target primary instance, this is different from the previous
one during a switchover or a failover
Expand Down
77 changes: 50 additions & 27 deletions controllers/cluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/2ndquadrant/cloud-native-postgresql/api/v1alpha1"
"github.com/2ndquadrant/cloud-native-postgresql/pkg/postgres"
)

const (
Expand Down Expand Up @@ -99,18 +100,38 @@ func (r *ClusterReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
}

if cluster.Status.CurrentPrimary != "" && cluster.Status.CurrentPrimary != cluster.Status.TargetPrimary {
r.Log.Info("Switchover initiated by PGK, waiting for the cluster to align")
r.Log.Info("Switchover in progress, waiting for the cluster to align")
// TODO: check if the TargetPrimary is active, otherwise recovery?
return ctrl.Result{}, err
}

// Update the status section of this Cluster resource
if err = r.updateResourceStatus(ctx, &cluster, childPods); err != nil {
if apierrs.IsConflict(err) {
// Let's wait for another reconciler loop, since the
// status already changed
return ctrl.Result{}, nil
}

return ctrl.Result{}, err
}

// Find if we have Pods that we are upgrading
if cluster.Status.InstancesBeingUpdated != 0 {
r.Log.V(2).Info("There are nodes being upgraded, waiting for the new image to be applied",
"clusterName", cluster.Name,
"namespace", cluster.Namespace)
return ctrl.Result{}, nil
}

// Get the replication status
var instancesStatus postgres.PostgresqlStatusList
if instancesStatus, err = r.getStatusFromInstances(ctx, childPods); err != nil {
return ctrl.Result{}, err
}

// Update the target primary name from the Pods status
if err = r.updateTargetPrimaryFromPods(ctx, &cluster, childPods); err != nil {
if err = r.updateTargetPrimaryFromPods(ctx, &cluster, instancesStatus); err != nil {
return ctrl.Result{}, err
}

Expand Down Expand Up @@ -141,41 +162,43 @@ func (r *ClusterReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
// Find if we have Pods that are not ready
if cluster.Status.ReadyInstances != cluster.Status.Instances {
// A pod is not ready, let's retry
r.Log.Info("Waiting for node to be ready")
r.Log.V(2).Info("Waiting for node to be ready",
"clusterName", cluster.Name,
"namespace", cluster.Namespace)
return ctrl.Result{}, nil
}

// TODO failing nodes?

// Are there missing nodes? Let's create one,
// but only if no failover/switchover is running
if cluster.Status.CurrentPrimary == cluster.Status.TargetPrimary {
if cluster.Status.Instances < cluster.Spec.Instances {
newNodeSerial, err := r.generateNodeSerial(ctx, &cluster)
if err != nil {
return ctrl.Result{}, err
}
// Is there a switchover or failover in progress?
// Let's wait for it to terminate before applying the
// following operations
if cluster.Status.TargetPrimary != cluster.Status.CurrentPrimary {
r.Log.V(2).Info("There is a switchover or a failover "+
"in progress, waiting for the operation to complete",
"clusterName", cluster.Name,
"namespace", cluster.Namespace,
"currentPrimary", cluster.Status.CurrentPrimary,
"targetPrimary", cluster.Status.TargetPrimary)
return ctrl.Result{}, nil
}

return r.joinReplicaInstance(ctx, newNodeSerial, &cluster)
// Are there missing nodes? Let's create one
if cluster.Status.Instances < cluster.Spec.Instances {
newNodeSerial, err := r.generateNodeSerial(ctx, &cluster)
if err != nil {
return ctrl.Result{}, err
}

return ctrl.Result{}, r.joinReplicaInstance(ctx, newNodeSerial, &cluster)
}

// Are there nodes to be removed? Remove one of them
if cluster.Status.Instances > cluster.Spec.Instances {
// Is there one pod to be deleted?
sacrificialPod := getSacrificialPod(childPods.Items)
if sacrificialPod == nil {
r.Log.Info("There are no instances to be sacrificed. Wait for the next sync loop")
return ctrl.Result{}, nil
}
return ctrl.Result{}, r.scaleDownCluster(ctx, &cluster, childPods)
}

r.Log.Info("Too many nodes for cluster, deleting an instance",
"cluster", cluster.Name,
"namespace", cluster.Namespace,
"pod", sacrificialPod.Name)
err = r.Delete(ctx, sacrificialPod)
if err != nil {
r.Log.Error(err, "Cannot kill the Pod to scale down")
// Check if we need to handle a rolling upgrade
if cluster.Status.ImageName != cluster.Spec.ImageName {
if err = r.upgradeCluster(ctx, &cluster, childPods, instancesStatus); err != nil {
return ctrl.Result{}, err
}
}
Expand Down
Loading

0 comments on commit 2361be6

Please sign in to comment.