Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix issue where a ReplicaSet name collision could cause hot loop #236

Merged
merged 1 commit into from
Oct 29, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions experiments/experiment.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,7 @@ func (ec *experimentContext) reconcileTemplate(template v1alpha1.TemplateSpec) {
rs := ec.templateRSs[template.Name]
if rs == nil {
// Create the ReplicaSet if necessary
if templateStatus.Status.Completed() {
// do nothing (not even pollute the logs)
} else if ec.isTerminating {
logCtx.Info("Skipping ReplicaSet creation: experiment is terminating")
} else {
if desiredReplicaCount > 0 {
newRS, err := ec.createReplicaSet(template, templateStatus.CollisionCount)
if err != nil {
logCtx.Warnf("Failed to create ReplicaSet: %v", err)
Expand All @@ -123,19 +119,23 @@ func (ec *experimentContext) reconcileTemplate(template v1alpha1.TemplateSpec) {
if newRS != nil {
ec.templateRSs[template.Name] = newRS
templateStatus.LastTransitionTime = &now
rs = newRS
}
}
templateStatus.Replicas = 0
templateStatus.UpdatedReplicas = 0
templateStatus.ReadyReplicas = 0
templateStatus.AvailableReplicas = 0
} else {
// If we get here, replicaset exists. We need to ensure it's scaled properly based on
// termination, or changed replica count
// Replicaset exists. We ensure it is scaled properly based on termination, or changed replica count
if *rs.Spec.Replicas != desiredReplicaCount {
ec.scaleReplicaSetAndRecordEvent(rs, desiredReplicaCount)
templateStatus.LastTransitionTime = &now
}
}

if rs == nil {
templateStatus.Replicas = 0
templateStatus.UpdatedReplicas = 0
templateStatus.ReadyReplicas = 0
templateStatus.AvailableReplicas = 0
} else {
templateStatus.Replicas = replicasetutil.GetActualReplicaCountForReplicaSets([]*appsv1.ReplicaSet{rs})
templateStatus.UpdatedReplicas = replicasetutil.GetActualReplicaCountForReplicaSets([]*appsv1.ReplicaSet{rs})
templateStatus.ReadyReplicas = replicasetutil.GetReadyReplicaCountForReplicaSets([]*appsv1.ReplicaSet{rs})
Expand Down
68 changes: 46 additions & 22 deletions experiments/replicaset.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,26 +89,7 @@ func (c *ExperimentController) getReplicaSetsForExperiment(experiment *v1alpha1.

// createReplicaSet creates a new replicaset based on the template
func (ec *experimentContext) createReplicaSet(template v1alpha1.TemplateSpec, collisionCount *int32) (*appsv1.ReplicaSet, error) {
newRSTemplate := *template.Template.DeepCopy()
// The labels must be different for each template because labels are used to match replicasets
// to templates. We inject the experiment and template name in the replicaset labels to ensure
// uniqueness.
replicaSetlabels := newReplicaSetLabels(ec.ex.Name, template.Name)
podTemplateSpecHash := controller.ComputeHash(&newRSTemplate, collisionCount)
newRS := appsv1.ReplicaSet{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("%s-%s-%s", ec.ex.Name, template.Name, podTemplateSpecHash),
Namespace: ec.ex.Namespace,
OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(ec.ex, controllerKind)},
Labels: replicaSetlabels,
},
Spec: appsv1.ReplicaSetSpec{
Replicas: new(int32),
MinReadySeconds: template.MinReadySeconds,
Selector: template.Selector,
Template: newRSTemplate,
},
}
newRS := newReplicaSetFromTemplate(ec.ex, template, collisionCount)

newReplicasCount := experimentutil.CalculateTemplateReplicasCount(ec.ex, template)
*(newRS.Spec.Replicas) = newReplicasCount
Expand All @@ -133,10 +114,14 @@ func (ec *experimentContext) createReplicaSet(template v1alpha1.TemplateSpec, co
// deep equal to the PodTemplateSpec of the Experiment, it's the Experiment's new ReplicaSet.
// Otherwise, this is a hash collision and we need to increment the collisionCount field in
// the status of the Experiment and requeue to try the creation in the next sync.
controllerRef := metav1.GetControllerOf(rs)
if controllerRef != nil && controllerRef.UID == ec.ex.UID && replicasetutil.PodTemplateEqualIgnoreHash(&rs.Spec.Template, &template.Template) {
if ec.isReplicaSetSemanticallyEqual(&newRS, rs) {
// NOTE: it should be impossible to get here, because the isReplicaSetSemanticallyEqual()
// helper is actually stricter than the the replicaset claim logic that builds up
// ec.templateRSs at the start of reconciliation, preventing this code path from
// happening entirely. We should consider deleting this if-block entirely.
createdRS = rs
err = nil
ec.log.Warnf("Claimed existing ReplicaSet %s with equivalent template spec", createdRS.Name)
break
}

Expand Down Expand Up @@ -183,6 +168,45 @@ func (ec *experimentContext) createReplicaSet(template v1alpha1.TemplateSpec, co
return createdRS, nil
}

// newReplicaSetFromTemplate is a helper to formulate a replicaset from an experiment's template
func newReplicaSetFromTemplate(experiment *v1alpha1.Experiment, template v1alpha1.TemplateSpec, collisionCount *int32) appsv1.ReplicaSet {
newRSTemplate := *template.Template.DeepCopy()
// The labels must be different for each template because labels are used to match replicasets
// to templates. We inject the experiment and template name in the replicaset labels to ensure
// uniqueness.
replicaSetlabels := newReplicaSetLabels(experiment.Name, template.Name)
podTemplateSpecHash := controller.ComputeHash(&newRSTemplate, collisionCount)
return appsv1.ReplicaSet{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("%s-%s-%s", experiment.Name, template.Name, podTemplateSpecHash),
Namespace: experiment.Namespace,
OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(experiment, controllerKind)},
Labels: replicaSetlabels,
},
Spec: appsv1.ReplicaSetSpec{
Replicas: new(int32),
MinReadySeconds: template.MinReadySeconds,
Selector: template.Selector,
Template: newRSTemplate,
},
}
}

// isReplicaSetSemanticallyEqual checks to see if an existing ReplicaSet is semantically equal
// to the ReplicaSet we are trying to create
func (ec *experimentContext) isReplicaSetSemanticallyEqual(newRS, existingRS *appsv1.ReplicaSet) bool {
controllerRef := metav1.GetControllerOf(existingRS)
podTemplatesEqual := replicasetutil.PodTemplateEqualIgnoreHash(&existingRS.Spec.Template, &newRS.Spec.Template)
existingLabels := existingRS.GetLabels()
newLabels := newRS.GetLabels()
return controllerRef != nil &&
controllerRef.UID == ec.ex.UID &&
podTemplatesEqual &&
existingLabels != nil &&
existingLabels[ExperimentNameLabelKey] == newLabels[ExperimentNameLabelKey] &&
existingLabels[ExperimentTemplateNameLabelKey] == newLabels[ExperimentTemplateNameLabelKey]
}

func (ec *experimentContext) scaleReplicaSetAndRecordEvent(rs *appsv1.ReplicaSet, newScale int32) (bool, *appsv1.ReplicaSet, error) {
// No need to scale
if *(rs.Spec.Replicas) == newScale {
Expand Down
36 changes: 36 additions & 0 deletions experiments/replicaset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,3 +156,39 @@ func TestNameCollision(t *testing.T) {
validatePatch(t, patch, "", NoChange, templateStatuses, cond)
}
}

// TestNameCollisionWithEquivalentPodTemplateAndControllerUID verifies we consider the labels of the
// replicaset when encountering name collisions
func TestNameCollisionWithEquivalentPodTemplateAndControllerUID(t *testing.T) {
templates := generateTemplates("bar")
e := newExperiment("foo", templates, nil)
e.Status.Status = v1alpha1.AnalysisStatusPending

rs := templateToRS(e, templates[0], 0)
rs.ObjectMeta.Labels[ExperimentTemplateNameLabelKey] = "something-different" // change this to something different

f := newFixture(t, e, rs)
defer f.Close()

f.expectCreateReplicaSetAction(rs)
collisionCountPatchIndex := f.expectPatchExperimentAction(e) // update collision count
statusUpdatePatchIndex := f.expectPatchExperimentAction(e) // updates status
f.run(getKey(e, t))

{
patch := f.getPatchedExperiment(collisionCountPatchIndex)
templateStatuses := []v1alpha1.TemplateStatus{
generateTemplatesStatus("bar", 0, 0, "", nil),
}
templateStatuses[0].CollisionCount = pointer.Int32Ptr(1)
validatePatch(t, patch, "", NoChange, templateStatuses, nil)
}
{
patch := f.getPatchedExperiment(statusUpdatePatchIndex)
templateStatuses := []v1alpha1.TemplateStatus{
generateTemplatesStatus("bar", 0, 0, v1alpha1.TemplateStatusProgressing, nil),
}
cond := []v1alpha1.ExperimentCondition{*newCondition(conditions.ReplicaSetUpdatedReason, e)}
validatePatch(t, patch, "", NoChange, templateStatuses, cond)
}
}