Skip to content

Commit cb435e1

Browse files
sethp-nrmnguyen
andcommitted
✨ KCP adopts existing machines
The KCP controller identifies Machines that belong to the control plane of an existing cluster and adopts them, including finding PKI materials that may be owned by the machine's bootstrap config and pivoting their ownership to the KCP as well. Prior to adopting machines (which, if unsuccessful, will block the KCP from taking any management actions), it runs a number of safety checks including: - Ensuring the KCP has not been deleted (to prevent re-adoption of orphans, though this process races with the garbage collector) - Checking that the machine's bootstrap provider was KubeadmConfig - Verifying that the Machine is no further than one minor version off of the KCP's spec Additionally, we set set a "best guess" value for the kubeadm.controlplane.cluster.x-k8s.io/hash on the adopted machine as if it were generated by a KCP in the past. The intent is that a KCP will adopt machines matching its "spec" (to the best of its ability) without modification, which in practice works well for adopting machines with the same spec'd version. Co-authored-by: mnguyen <mnguyen@newrelic.com>
1 parent 4d0192c commit cb435e1

File tree

15 files changed

+837
-97
lines changed

15 files changed

+837
-97
lines changed

controlplane/kubeadm/config/rbac/role.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ rules:
6161
- get
6262
- list
6363
- patch
64+
- update
6465
- watch
6566

6667
---

controlplane/kubeadm/controllers/kubeadm_control_plane_controller.go

Lines changed: 164 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import (
3333
kerrors "k8s.io/apimachinery/pkg/util/errors"
3434
"k8s.io/apiserver/pkg/storage/names"
3535
"k8s.io/client-go/tools/record"
36+
"k8s.io/utils/pointer"
3637
ctrl "sigs.k8s.io/controller-runtime"
3738
"sigs.k8s.io/controller-runtime/pkg/client"
3839
"sigs.k8s.io/controller-runtime/pkg/controller"
@@ -69,7 +70,7 @@ const (
6970
)
7071

7172
// +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;patch
72-
// +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;patch
73+
// +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;patch
7374
// +kubebuilder:rbac:groups=core,resources=configmaps,namespace=kube-system,verbs=get;list;watch;create
7475
// +kubebuilder:rbac:groups=rbac,resources=roles,namespace=kube-system,verbs=get;list;watch;create
7576
// +kubebuilder:rbac:groups=rbac,resources=rolebindings,namespace=kube-system,verbs=get;list;watch;create
@@ -86,6 +87,8 @@ type KubeadmControlPlaneReconciler struct {
8687
recorder record.EventRecorder
8788

8889
managementCluster internal.ManagementCluster
90+
91+
uncachedClient client.Reader
8992
}
9093

9194
func (r *KubeadmControlPlaneReconciler) SetupWithManager(mgr ctrl.Manager, options controller.Options) error {
@@ -110,6 +113,7 @@ func (r *KubeadmControlPlaneReconciler) SetupWithManager(mgr ctrl.Manager, optio
110113
if r.managementCluster == nil {
111114
r.managementCluster = &internal.Management{Client: r.Client}
112115
}
116+
r.uncachedClient = mgr.GetAPIReader()
113117

114118
return nil
115119
}
@@ -227,13 +231,25 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster *
227231
return ctrl.Result{}, err
228232
}
229233

230-
// TODO: handle proper adoption of Machines
231-
ownedMachines, err := r.managementCluster.GetMachinesForCluster(ctx, util.ObjectKey(cluster), internal.OwnedControlPlaneMachines(kcp.Name))
234+
controlPlaneMachines, err := r.managementCluster.GetMachinesForCluster(ctx, util.ObjectKey(cluster), internal.ControlPlaneMachines(cluster.Name))
232235
if err != nil {
233236
logger.Error(err, "failed to retrieve control plane machines for cluster")
234237
return ctrl.Result{}, err
235238
}
236239

240+
adoptableMachines := controlPlaneMachines.Filter(internal.AdoptableControlPlaneMachines(cluster.Name))
241+
if len(adoptableMachines) > 0 {
242+
// We adopt the Machines and then wait for the update event for the ownership reference to re-queue them so the cache is up-to-date
243+
err = r.AdoptMachines(ctx, kcp, adoptableMachines)
244+
return ctrl.Result{}, err
245+
}
246+
247+
ownedMachines := controlPlaneMachines.Filter(internal.OwnedControlPlaneMachines(kcp))
248+
if len(ownedMachines) != len(controlPlaneMachines) {
249+
logger.Info("Not all control plane machines are owned by this KubeadmControlPlane, refusing to operate in mixed management mode")
250+
return ctrl.Result{}, nil
251+
}
252+
237253
now := metav1.Now()
238254
var requireUpgrade internal.FilterableMachineCollection
239255
if kcp.Spec.UpgradeAfter != nil && kcp.Spec.UpgradeAfter.Before(&now) {
@@ -276,17 +292,12 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster *
276292
}
277293

278294
func (r *KubeadmControlPlaneReconciler) updateStatus(ctx context.Context, kcp *controlplanev1.KubeadmControlPlane, cluster *clusterv1.Cluster) error {
279-
labelSelector := internal.ControlPlaneSelectorForCluster(cluster.Name)
280-
selector, err := metav1.LabelSelectorAsSelector(labelSelector)
281-
if err != nil {
282-
// Since we are building up the LabelSelector above, this should not fail
283-
return errors.Wrap(err, "failed to parse label selector")
284-
}
295+
selector := internal.ControlPlaneSelectorForCluster(cluster.Name)
285296
// Copy label selector to its status counterpart in string format.
286297
// This is necessary for CRDs including scale subresources.
287298
kcp.Status.Selector = selector.String()
288299

289-
ownedMachines, err := r.managementCluster.GetMachinesForCluster(ctx, util.ObjectKey(cluster), internal.OwnedControlPlaneMachines(kcp.Name))
300+
ownedMachines, err := r.managementCluster.GetMachinesForCluster(ctx, util.ObjectKey(cluster), internal.OwnedControlPlaneMachines(kcp))
290301
if err != nil {
291302
return errors.Wrap(err, "failed to get list of owned machines")
292303
}
@@ -724,7 +735,7 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, clu
724735
logger.Error(err, "failed to retrieve machines for cluster")
725736
return ctrl.Result{}, err
726737
}
727-
ownedMachines := allMachines.Filter(internal.OwnedControlPlaneMachines(kcp.Name))
738+
ownedMachines := allMachines.Filter(internal.OwnedControlPlaneMachines(kcp))
728739

729740
// If no control plane machines remain, remove the finalizer
730741
if len(ownedMachines) == 0 {
@@ -834,3 +845,145 @@ func (r *KubeadmControlPlaneReconciler) ClusterToKubeadmControlPlane(o handler.M
834845

835846
return nil
836847
}
848+
849+
func (r *KubeadmControlPlaneReconciler) AdoptMachines(ctx context.Context, kcp *controlplanev1.KubeadmControlPlane, machines internal.FilterableMachineCollection) error {
850+
// We do an uncached full quorum read against the KCP to avoid re-adopting Machines the garbage collector just intentionally orphaned
851+
// See https://github.com/kubernetes/kubernetes/issues/42639
852+
uncached := controlplanev1.KubeadmControlPlane{}
853+
err := r.uncachedClient.Get(ctx, client.ObjectKey{Namespace: kcp.Namespace, Name: kcp.Name}, &uncached)
854+
if err != nil {
855+
return fmt.Errorf("can't recheck DeletionTimestamp: %v", err)
856+
}
857+
if !uncached.DeletionTimestamp.IsZero() {
858+
return fmt.Errorf("%v/%v has just been deleted at %v", kcp.GetNamespace(), kcp.GetName(), kcp.GetDeletionTimestamp())
859+
}
860+
861+
kcpVersion, err := semver.ParseTolerant(kcp.Spec.Version)
862+
if err != nil {
863+
return errors.Wrapf(err, "failed to parse kubernetes version %q", kcp.Spec.Version)
864+
}
865+
866+
for _, m := range machines {
867+
ref := m.Spec.Bootstrap.ConfigRef
868+
869+
// TODO instead of returning error here, we should instead Event and add a watch on potentially adoptable Machines
870+
if ref == nil || ref.Kind != "KubeadmConfig" {
871+
return fmt.Errorf("Unable to adopt Machine %v/%v: expected a ConfigRef of kind KubeadmConfig but instead found %v", m.Namespace, m.Name, ref)
872+
}
873+
874+
// TODO instead of returning error here, we should instead Event and add a watch on potentially adoptable Machines
875+
if ref.Namespace != "" && ref.Namespace != kcp.Namespace {
876+
return fmt.Errorf("Could not adopt resources from KubeadmConfig %v/%v: cannot adopt across namespaces", ref.Namespace, ref.Name)
877+
}
878+
879+
if m.Spec.Version == nil {
880+
// if the machine's version is not immediately apparent, assume the operator knows what they're doing
881+
continue
882+
}
883+
884+
machineVersion, err := semver.ParseTolerant(*m.Spec.Version)
885+
if err != nil {
886+
return errors.Wrapf(err, "failed to parse kubernetes version %q", *m.Spec.Version)
887+
}
888+
889+
dist := func(a, b uint64) uint64 {
890+
if a > b {
891+
return a - b
892+
}
893+
return b - a
894+
}
895+
if kcpVersion.Major != machineVersion.Major || dist(kcpVersion.Minor, machineVersion.Minor) > 1 {
896+
r.recorder.Eventf(kcp, corev1.EventTypeWarning, "AdoptionFailed", "Could not adopt Machine %s/%s: its version (%q) is outside supported +/- one minor version skew from KCP's (%q)", m.Namespace, m.Name, *m.Spec.Version, kcp.Spec.Version)
897+
// avoid returning an error here so we don't cause the KCP controller to spin until the operator clarifies their intent
898+
return nil
899+
}
900+
}
901+
902+
for _, m := range machines {
903+
ref := m.Spec.Bootstrap.ConfigRef
904+
obj := &bootstrapv1.KubeadmConfig{}
905+
err := r.Client.Get(ctx, client.ObjectKey{Name: ref.Name, Namespace: kcp.Namespace}, obj)
906+
if err != nil {
907+
return err
908+
}
909+
910+
err = r.AdoptOwnedSecrets(ctx, kcp, obj)
911+
if err != nil {
912+
return err
913+
}
914+
915+
patchHelper, err := patch.NewHelper(m, r.Client)
916+
if err != nil {
917+
return err
918+
}
919+
920+
m.SetOwnerReferences(util.EnsureOwnerRef(m.GetOwnerReferences(), metav1.OwnerReference{
921+
APIVersion: controlplanev1.GroupVersion.String(),
922+
Kind: "KubeadmControlPlane",
923+
Name: kcp.Name,
924+
UID: kcp.UID,
925+
Controller: pointer.BoolPtr(true),
926+
BlockOwnerDeletion: pointer.BoolPtr(true),
927+
}))
928+
929+
// 0. get machine.Spec.Version - the easy answer
930+
machineKubernetesVersion := ""
931+
if m.Spec.Version != nil {
932+
machineKubernetesVersion = *m.Spec.Version
933+
}
934+
935+
// 1. hash the version (kubernetes version) and kubeadm_controlplane's Spec.infrastructureTemplate
936+
newSpec := controlplanev1.KubeadmControlPlaneSpec{
937+
Version: machineKubernetesVersion,
938+
InfrastructureTemplate: kcp.Spec.InfrastructureTemplate,
939+
}
940+
newConfigurationHash := hash.Compute(&newSpec)
941+
// 2. add kubeadm.controlplane.cluster.x-k8s.io/hash as a label in each machine
942+
m.Labels["kubeadm.controlplane.cluster.x-k8s.io/hash"] = newConfigurationHash
943+
944+
// Note that ValidateOwnerReferences() will reject this patch if another
945+
// OwnerReference exists with controller=true.
946+
if err := patchHelper.Patch(ctx, m); err != nil {
947+
return err
948+
}
949+
}
950+
return nil
951+
}
952+
953+
func (r *KubeadmControlPlaneReconciler) AdoptOwnedSecrets(ctx context.Context, kcp *controlplanev1.KubeadmControlPlane, currentOwner metav1.Object) error {
954+
secrets := corev1.SecretList{}
955+
err := r.Client.List(ctx, &secrets, client.InNamespace(kcp.Namespace))
956+
957+
if err != nil {
958+
return errors.Wrap(err, "error finding secrets for adoption")
959+
}
960+
961+
for _, s := range secrets.Items {
962+
if !util.PointsTo(s.GetOwnerReferences(), currentOwner) {
963+
continue
964+
}
965+
// avoid taking ownership of the bootstrap data secret
966+
if s.Name == currentOwner.GetName() {
967+
continue
968+
}
969+
970+
ss := corev1.Secret{}
971+
s.DeepCopyInto(&ss)
972+
973+
ss.SetOwnerReferences(util.ReconcileOwnerRef(ss.GetOwnerReferences(), metav1.OwnerReference{
974+
APIVersion: controlplanev1.GroupVersion.String(),
975+
Kind: "KubeadmControlPlane",
976+
Name: kcp.Name,
977+
UID: kcp.UID,
978+
Controller: pointer.BoolPtr(true),
979+
BlockOwnerDeletion: pointer.BoolPtr(true),
980+
}, currentOwner))
981+
982+
err := r.Client.Update(ctx, &ss)
983+
if err != nil {
984+
return errors.Wrapf(err, "error changing secret %v ownership from KubeadmConfig/%v to KubeadmControlPlane/%v", s.Name, currentOwner.GetName(), kcp.Name)
985+
}
986+
}
987+
988+
return nil
989+
}

0 commit comments

Comments
 (0)