Skip to content

Commit f66e30f

Browse files
committed
Only schedule switchover for pod migration, consider mainWindow for PGVERSION env change
1 parent e04b91d commit f66e30f

File tree

6 files changed

+39
-28
lines changed

6 files changed

+39
-28
lines changed

docs/administrator.md

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -208,9 +208,6 @@ Note that, changes in `SPILO_CONFIGURATION` env variable under `bootstrap.dcs`
208208
path are ignored for the diff. They will be applied through Patroni's rest api
209209
interface, following a restart of all instances.
210210

211-
Rolling update is postponed until the next maintenance window if any is defined
212-
under the `maintenanceWindows` cluster manifest parameter.
213-
214211
The operator also support lazy updates of the Spilo image. In this case the
215212
StatefulSet is only updated, but no rolling update follows. This feature saves
216213
you a switchover - and hence downtime - when you know pods are re-started later

docs/reference/cluster_manifest.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ These parameters are grouped directly under the `spec` key in the manifest.
116116

117117
* **maintenanceWindows**
118118
a list which defines specific time frames when certain maintenance operations
119-
such as automatic major upgrades or rolling updates are allowed. Accepted formats
119+
such as automatic major upgrades or master pod migration. Accepted formats
120120
are "01:00-06:00" for daily maintenance windows or "Sat:00:00-04:00" for specific
121121
days, with all times in UTC.
122122

e2e/tests/test_e2e.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1187,7 +1187,7 @@ def test_major_version_upgrade(self):
11871187
Test major version upgrade: with full upgrade, maintenance window, and annotation
11881188
"""
11891189
def check_version():
1190-
p = k8s.patroni_rest("acid-upgrade-test-0", "")
1190+
p = k8s.patroni_rest("acid-upgrade-test-0", "") or {}
11911191
version = p.get("server_version", 0) // 10000
11921192
return version
11931193

pkg/cluster/cluster.go

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -957,6 +957,11 @@ func (c *Cluster) Update(oldSpec, newSpec *acidv1.Postgresql) error {
957957
defer c.mu.Unlock()
958958

959959
c.KubeClient.SetPostgresCRDStatus(c.clusterName(), acidv1.ClusterStatusUpdating)
960+
961+
if !isInMaintenanceWindow(newSpec.Spec.MaintenanceWindows) {
962+
// do not apply any major version related changes yet
963+
newSpec.Spec.PostgresqlParam.PgVersion = oldSpec.Spec.PostgresqlParam.PgVersion
964+
}
960965
c.setSpec(newSpec)
961966

962967
defer func() {
@@ -1761,35 +1766,39 @@ func (c *Cluster) GetSwitchoverSchedule() string {
17611766
}
17621767

17631768
// Switchover does a switchover (via Patroni) to a candidate pod
1764-
func (c *Cluster) Switchover(curMaster *v1.Pod, candidate spec.NamespacedName) error {
1769+
func (c *Cluster) Switchover(curMaster *v1.Pod, candidate spec.NamespacedName, inMaintWindow bool) error {
17651770
var err error
1766-
c.logger.Debugf("switching over from %q to %q", curMaster.Name, candidate)
1767-
1768-
if !isInMaintenanceWindow(c.Spec.MaintenanceWindows) {
1769-
c.logger.Infof("postponing switchover, not in maintenance window")
1770-
schedule := c.GetSwitchoverSchedule()
1771-
1772-
if err := c.patroni.Switchover(curMaster, candidate.Name, schedule); err != nil {
1773-
return fmt.Errorf("could not schedule switchover: %v", err)
1774-
}
1775-
c.logger.Infof("switchover is scheduled at %s", schedule)
1776-
return nil
1777-
}
17781771

1779-
c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Switchover", "Switching over from %q to %q", curMaster.Name, candidate)
17801772
stopCh := make(chan struct{})
17811773
ch := c.registerPodSubscriber(candidate)
17821774
defer c.unregisterPodSubscriber(candidate)
17831775
defer close(stopCh)
17841776

1785-
if err = c.patroni.Switchover(curMaster, candidate.Name, ""); err == nil {
1777+
var scheduled_at string
1778+
if inMaintWindow {
1779+
c.logger.Debugf("switching over from %q to %q", curMaster.Name, candidate)
1780+
c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Switchover", "Switching over from %q to %q", curMaster.Name, candidate)
1781+
1782+
scheduled_at = c.GetSwitchoverSchedule()
1783+
} else {
1784+
scheduled_at = ""
1785+
}
1786+
1787+
if err = c.patroni.Switchover(curMaster, candidate.Name, scheduled_at); err == nil {
1788+
if inMaintWindow {
1789+
c.logger.Infof("switchover is scheduled at %s", scheduled_at)
1790+
return nil
1791+
}
17861792
c.logger.Debugf("successfully switched over from %q to %q", curMaster.Name, candidate)
17871793
c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Switchover", "Successfully switched over from %q to %q", curMaster.Name, candidate)
17881794
_, err = c.waitForPodLabel(ch, stopCh, nil)
17891795
if err != nil {
17901796
err = fmt.Errorf("could not get master pod label: %v", err)
17911797
}
17921798
} else {
1799+
if inMaintWindow {
1800+
return fmt.Errorf("could not schedule switchover: %v", err)
1801+
}
17931802
err = fmt.Errorf("could not switch over from %q to %q: %v", curMaster.Name, candidate, err)
17941803
c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Switchover", "Switchover from %q to %q FAILED: %v", curMaster.Name, candidate, err)
17951804
}

pkg/cluster/pod.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -280,11 +280,16 @@ func (c *Cluster) MigrateMasterPod(podName spec.NamespacedName) error {
280280
return fmt.Errorf("could not move pod: %v", err)
281281
}
282282

283+
scheduleSwitchover := false
284+
if !isInMaintenanceWindow(c.Spec.MaintenanceWindows) {
285+
c.logger.Infof("postponing switchover, not in maintenance window")
286+
scheduleSwitchover = true
287+
}
283288
err = retryutil.Retry(1*time.Minute, 5*time.Minute,
284289
func() (bool, error) {
285-
err := c.Switchover(oldMaster, masterCandidateName)
290+
err := c.Switchover(oldMaster, masterCandidateName, scheduleSwitchover)
286291
if err != nil {
287-
c.logger.Errorf("could not failover to pod %q: %v", masterCandidateName, err)
292+
c.logger.Errorf("could not switchover to pod %q: %v", masterCandidateName, err)
288293
return false, nil
289294
}
290295
return true, nil
@@ -445,7 +450,7 @@ func (c *Cluster) recreatePods(pods []v1.Pod, switchoverCandidates []spec.Namesp
445450
// do not recreate master now so it will keep the update flag and switchover will be retried on next sync
446451
return fmt.Errorf("skipping switchover: %v", err)
447452
}
448-
if err := c.Switchover(masterPod, masterCandidate); err != nil {
453+
if err := c.Switchover(masterPod, masterCandidate, false); err != nil {
449454
return fmt.Errorf("could not perform switch over: %v", err)
450455
}
451456
} else if newMasterPod == nil && len(replicas) == 0 {

pkg/cluster/sync.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,11 @@ func (c *Cluster) Sync(newSpec *acidv1.Postgresql) error {
9797
}
9898
}
9999

100+
if !isInMaintenanceWindow(newSpec.Spec.MaintenanceWindows) {
101+
// do not apply any major version related changes yet
102+
newSpec.Spec.PostgresqlParam.PgVersion = oldSpec.Spec.PostgresqlParam.PgVersion
103+
}
104+
100105
if err = c.syncStatefulSet(); err != nil {
101106
if !k8sutil.ResourceAlreadyExists(err) {
102107
err = fmt.Errorf("could not sync statefulsets: %v", err)
@@ -658,11 +663,6 @@ func (c *Cluster) syncStatefulSet() error {
658663
isSafeToRecreatePods = false
659664
}
660665

661-
if !isInMaintenanceWindow(c.Spec.MaintenanceWindows) {
662-
postponeReasons = append(postponeReasons, "not in maintenance window")
663-
isSafeToRecreatePods = false
664-
}
665-
666666
// if we get here we also need to re-create the pods (either leftovers from the old
667667
// statefulset or those that got their configuration from the outdated statefulset)
668668
if len(podsToRecreate) > 0 {

0 commit comments

Comments
 (0)