Skip to content

Commit 68fe5c8

Browse files
committed
Improve the check for Pod spec changes
1 parent 3f772ae commit 68fe5c8

File tree

3 files changed

+499
-25
lines changed

3 files changed

+499
-25
lines changed

controllers/update_status.go

Lines changed: 116 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -749,35 +749,12 @@ func validateProcessGroup(
749749
return nil
750750
}
751751

752-
specHash, err := internal.GetPodSpecHash(cluster, processGroupStatus, nil)
752+
correctPodSpec, err := podSpecIsCorrect(ctx, r, cluster, pod, processGroupStatus, logger)
753753
if err != nil {
754754
return err
755755
}
756756

757-
// Check here if the Pod spec matches the desired Pod spec.
758-
incorrectPodSpec := specHash != pod.Annotations[fdbv1beta2.LastSpecKey]
759-
if !incorrectPodSpec {
760-
updated, err := r.PodLifecycleManager.PodIsUpdated(ctx, r, cluster, pod)
761-
if err != nil {
762-
return err
763-
}
764-
incorrectPodSpec = !updated
765-
if incorrectPodSpec {
766-
logger.Info(
767-
"IncorrectPodSpec",
768-
"currentSpecHash",
769-
pod.Annotations[fdbv1beta2.LastSpecKey],
770-
"desiredSpecHash",
771-
specHash,
772-
"updated",
773-
updated,
774-
"incorrectPodSpec",
775-
incorrectPodSpec,
776-
)
777-
}
778-
}
779-
780-
processGroupStatus.UpdateCondition(fdbv1beta2.IncorrectPodSpec, incorrectPodSpec)
757+
processGroupStatus.UpdateCondition(fdbv1beta2.IncorrectPodSpec, !correctPodSpec)
781758

782759
// Check the sidecar image, to ensure the sidecar is running with the desired image.
783760
sidecarImage, err := internal.GetSidecarImage(cluster, processGroupStatus.ProcessClass)
@@ -1277,3 +1254,117 @@ func updateFaultDomains(
12771254
status.ProcessGroups[idx].FaultDomain = fdbv1beta2.FaultDomain(faultDomain)
12781255
}
12791256
}
1257+
1258+
// podSpecIsCorrect returns if the desired Pod spec and the current Pod spec are matching.
1259+
func podSpecIsCorrect(ctx context.Context,
1260+
r *FoundationDBClusterReconciler,
1261+
cluster *fdbv1beta2.FoundationDBCluster,
1262+
pod *corev1.Pod,
1263+
processGroupStatus *fdbv1beta2.ProcessGroupStatus,
1264+
logger logr.Logger,
1265+
) (bool, error) {
1266+
desiredPodSpec, err := internal.GetPodSpec(cluster, processGroupStatus)
1267+
if err != nil {
1268+
return false, err
1269+
}
1270+
1271+
specHash, err := internal.GetPodSpecHash(cluster, processGroupStatus, desiredPodSpec)
1272+
if err != nil {
1273+
return false, err
1274+
}
1275+
1276+
// Check here if the Pod spec matches the desired Pod spec, if not we know that change are pending, and we don't
1277+
// have to preform additional checks.
1278+
correctPodSpec := specHash == pod.Annotations[fdbv1beta2.LastSpecKey]
1279+
if !correctPodSpec {
1280+
logger.Info(
1281+
"IncorrectPodSpec",
1282+
"currentSpecHash",
1283+
pod.Annotations[fdbv1beta2.LastSpecKey],
1284+
"desiredSpecHash",
1285+
specHash,
1286+
"reason",
1287+
"pod spec hash mismatch",
1288+
)
1289+
1290+
return false, nil
1291+
}
1292+
1293+
// Check if any of the mutable fields of the Pod have been changed outside the operators control, e.g.
1294+
// by using kubectl.
1295+
if !mutablePodFieldsAreCorrect(desiredPodSpec, pod.Spec.DeepCopy()) {
1296+
logger.Info(
1297+
"IncorrectPodSpec",
1298+
"currentSpecHash",
1299+
pod.Annotations[fdbv1beta2.LastSpecKey],
1300+
"desiredSpecHash",
1301+
specHash,
1302+
"reason",
1303+
"mutable field in pod spec was updated",
1304+
)
1305+
1306+
return false, nil
1307+
}
1308+
1309+
// Check if the Pod is pending updates, this method will return true in the default implementation.
1310+
updated, err := r.PodLifecycleManager.PodIsUpdated(ctx, r, cluster, pod)
1311+
if err != nil {
1312+
return updated, err
1313+
}
1314+
1315+
if !updated {
1316+
logger.Info(
1317+
"IncorrectPodSpec",
1318+
"currentSpecHash",
1319+
pod.Annotations[fdbv1beta2.LastSpecKey],
1320+
"desiredSpecHash",
1321+
specHash,
1322+
"reason",
1323+
"pod is pending updates",
1324+
)
1325+
1326+
return false, nil
1327+
}
1328+
1329+
return true, nil
1330+
}
1331+
1332+
// mutablePodFieldsAreCorrect checks if any of the pods mutable fields have been changed, see:
1333+
// https://kubernetes.io/docs/concepts/workloads/pods/#pod-update-and-replacement
1334+
func mutablePodFieldsAreCorrect(
1335+
desiredPodSpec *corev1.PodSpec,
1336+
currentPodSpec *corev1.PodSpec,
1337+
) bool {
1338+
for idx, container := range desiredPodSpec.Containers {
1339+
if !equality.Semantic.DeepEqual(container.Image, currentPodSpec.Containers[idx].Image) {
1340+
return false
1341+
}
1342+
}
1343+
1344+
for idx, container := range desiredPodSpec.InitContainers {
1345+
if !equality.Semantic.DeepEqual(container.Image, currentPodSpec.InitContainers[idx].Image) {
1346+
return false
1347+
}
1348+
}
1349+
1350+
if !equality.Semantic.DeepEqual(
1351+
desiredPodSpec.ActiveDeadlineSeconds,
1352+
currentPodSpec.ActiveDeadlineSeconds,
1353+
) {
1354+
return false
1355+
}
1356+
1357+
if !equality.Semantic.DeepEqual(
1358+
desiredPodSpec.TerminationGracePeriodSeconds,
1359+
currentPodSpec.TerminationGracePeriodSeconds,
1360+
) {
1361+
return false
1362+
}
1363+
1364+
// We ignore the spec.schedulingGates in the comparison here. Otherwise, a component could make the Pod ready for
1365+
// being scheduled by removing the scheduling gate which then would case a recreation of the Pod. This would
1366+
// cause the Pod to be stuck in pending or recreation.
1367+
// We could make use of the mutability of the tolerations field and remove the need for a Pod recreation when only
1368+
// tolerations are added.
1369+
return equality.Semantic.DeepEqual(desiredPodSpec.Tolerations, currentPodSpec.Tolerations)
1370+
}

0 commit comments

Comments
 (0)