Skip to content

Commit 24fe81b

Browse files
authored
Revert "Revert "fix(platform): delete machine failed & move machine operation to provider (#1435)" (#1451)"
This reverts commit cad879c.
1 parent 083b789 commit 24fe81b

File tree

8 files changed

+126
-77
lines changed

8 files changed

+126
-77
lines changed

pkg/platform/controller/cluster/cluster_controller.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ func (c *Controller) needsUpdate(old *platformv1.Cluster, new *platformv1.Cluste
152152
return true
153153
}
154154

155-
if old.Status.Phase == platformv1.ClusterRunning && new.Status.Phase == platformv1.ClusterTerminating {
155+
if old.Status.Phase != platformv1.ClusterTerminating && new.Status.Phase == platformv1.ClusterTerminating {
156156
return true
157157
}
158158

@@ -281,7 +281,7 @@ func (c *Controller) reconcile(ctx context.Context, key string, cluster *platfor
281281
log.FromContext(ctx).Info("Cluster has been terminated. Attempting to cleanup resources")
282282
err = c.deleter.Delete(ctx, key)
283283
if err == nil {
284-
log.FromContext(ctx).Info("Machine has been successfully deleted")
284+
log.FromContext(ctx).Info("Cluster has been successfully deleted")
285285
}
286286
default:
287287
log.FromContext(ctx).Info("unknown cluster phase", "status.phase", cluster.Status.Phase)

pkg/platform/controller/cluster/deletion/cluster_deleter.go

Lines changed: 1 addition & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ import (
2424

2525
"k8s.io/apimachinery/pkg/api/errors"
2626
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
27-
"k8s.io/apimachinery/pkg/fields"
2827
utilerrors "k8s.io/apimachinery/pkg/util/errors"
2928
"k8s.io/apimachinery/pkg/util/sets"
29+
3030
v1clientset "tkestack.io/tke/api/client/clientset/versioned/typed/platform/v1"
3131
platformv1 "tkestack.io/tke/api/platform/v1"
3232
clusterprovider "tkestack.io/tke/pkg/platform/provider/cluster"
@@ -244,7 +244,6 @@ var deleteResourceFuncs = []deleteResourceFunc{
244244
deleteIPAM,
245245
deleteTappControllers,
246246
deleteClusterProvider,
247-
deleteMachine,
248247
}
249248

250249
// deleteAllContent will use the client to delete each resource identified in cluster.
@@ -433,29 +432,3 @@ func deleteClusterCredential(ctx context.Context, deleter *clusterDeleter, clust
433432
return nil
434433
}
435434
*/
436-
437-
func deleteMachine(ctx context.Context, deleter *clusterDeleter, cluster *platformv1.Cluster) error {
438-
log.FromContext(ctx).Info("deleteMachine doing")
439-
440-
fieldSelector := fields.OneTermEqualSelector("spec.clusterName", cluster.Name).String()
441-
machineList, err := deleter.platformClient.Machines().List(ctx, metav1.ListOptions{FieldSelector: fieldSelector})
442-
if err != nil {
443-
return err
444-
}
445-
if len(machineList.Items) == 0 {
446-
return nil
447-
}
448-
background := metav1.DeletePropagationForeground
449-
deleteOpt := metav1.DeleteOptions{PropagationPolicy: &background}
450-
for _, machine := range machineList.Items {
451-
if err := deleter.platformClient.Machines().Delete(ctx, machine.Name, deleteOpt); err != nil {
452-
if !errors.IsNotFound(err) {
453-
return err
454-
}
455-
}
456-
}
457-
458-
log.FromContext(ctx).Info("deleteMachine done")
459-
460-
return nil
461-
}

pkg/platform/controller/machine/deletion/machine_deleter.go

Lines changed: 9 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,11 @@ import (
2626
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2727
utilerrors "k8s.io/apimachinery/pkg/util/errors"
2828
"k8s.io/apimachinery/pkg/util/sets"
29+
2930
v1clientset "tkestack.io/tke/api/client/clientset/versioned/typed/platform/v1"
30-
platformv1 "tkestack.io/tke/api/platform/v1"
3131
v1 "tkestack.io/tke/api/platform/v1"
3232
clusterprovider "tkestack.io/tke/pkg/platform/provider/cluster"
3333
machineprovider "tkestack.io/tke/pkg/platform/provider/machine"
34-
"tkestack.io/tke/pkg/util/apiclient"
3534
"tkestack.io/tke/pkg/util/log"
3635
)
3736

@@ -101,7 +100,7 @@ func (d *machineDeleter) Delete(ctx context.Context, name string) error {
101100

102101
// ensure that the status is up to date on the machine
103102
// if we get a not found error, we assume the machine is truly gone
104-
machine, err = d.retryOnConflictError(machine, d.updateMachineStatusFunc)
103+
machine, err = d.retryOnConflictError(ctx, machine, d.updateMachineStatusFunc)
105104
if err != nil {
106105
if errors.IsNotFound(err) {
107106
return nil
@@ -126,7 +125,7 @@ func (d *machineDeleter) Delete(ctx context.Context, name string) error {
126125
}
127126

128127
// we have removed content, so mark it finalized by us
129-
machine, err = d.retryOnConflictError(machine, d.finalizeMachine)
128+
machine, err = d.retryOnConflictError(ctx, machine, d.finalizeMachine)
130129
if err != nil {
131130
// in normal practice, this should not be possible, but if a deployment is running
132131
// two controllers to do machine deletion that share a common finalizer token it's
@@ -159,15 +158,15 @@ func (d *machineDeleter) deleteMachine(machine *v1.Machine) error {
159158
}
160159

161160
// updateMachineFunc is a function that makes an update to a namespace
162-
type updateMachineFunc func(machine *v1.Machine) (*v1.Machine, error)
161+
type updateMachineFunc func(ctx context.Context, machine *v1.Machine) (*v1.Machine, error)
163162

164163
// retryOnConflictError retries the specified fn if there was a conflict error
165164
// it will return an error if the UID for an object changes across retry operations.
166165
// TODO RetryOnConflict should be a generic concept in client code
167-
func (d *machineDeleter) retryOnConflictError(machine *v1.Machine, fn updateMachineFunc) (result *v1.Machine, err error) {
166+
func (d *machineDeleter) retryOnConflictError(ctx context.Context, machine *v1.Machine, fn updateMachineFunc) (result *v1.Machine, err error) {
168167
latestMachine := machine
169168
for {
170-
result, err = fn(latestMachine)
169+
result, err = fn(ctx, latestMachine)
171170
if err == nil {
172171
return result, nil
173172
}
@@ -186,7 +185,7 @@ func (d *machineDeleter) retryOnConflictError(machine *v1.Machine, fn updateMach
186185
}
187186

188187
// updateMachineStatusFunc will verify that the status of the machine is correct
189-
func (d *machineDeleter) updateMachineStatusFunc(machine *v1.Machine) (*v1.Machine, error) {
188+
func (d *machineDeleter) updateMachineStatusFunc(ctx context.Context, machine *v1.Machine) (*v1.Machine, error) {
190189
if machine.DeletionTimestamp.IsZero() || machine.Status.Phase == v1.MachineTerminating {
191190
return machine, nil
192191
}
@@ -203,7 +202,7 @@ func finalized(machine *v1.Machine) bool {
203202
}
204203

205204
// finalizeMachine removes the specified finalizerToken and finalizes the machine
206-
func (d *machineDeleter) finalizeMachine(machine *v1.Machine) (*v1.Machine, error) {
205+
func (d *machineDeleter) finalizeMachine(ctx context.Context, machine *v1.Machine) (*v1.Machine, error) {
207206
machineFinalize := v1.Machine{}
208207
machineFinalize.ObjectMeta = machine.ObjectMeta
209208
machineFinalize.Spec = machine.Spec
@@ -225,7 +224,7 @@ func (d *machineDeleter) finalizeMachine(machine *v1.Machine) (*v1.Machine, erro
225224
Name(machineFinalize.Name).
226225
SubResource("finalize").
227226
Body(&machineFinalize).
228-
Do(context.Background()).
227+
Do(ctx).
229228
Into(machine)
230229

231230
if err != nil {
@@ -241,7 +240,6 @@ type deleteResourceFunc func(ctx context.Context, deleter *machineDeleter, machi
241240

242241
var deleteResourceFuncs = []deleteResourceFunc{
243242
deleteMachineProvider,
244-
deleteNode,
245243
}
246244

247245
// deleteAllContent will use the client to delete each resource identified in machine.
@@ -287,39 +285,3 @@ func deleteMachineProvider(ctx context.Context, deleter *machineDeleter, machine
287285

288286
return nil
289287
}
290-
291-
func deleteNode(ctx context.Context, deleter *machineDeleter, machine *v1.Machine) error {
292-
log.FromContext(ctx).Info("deleteNode doing")
293-
294-
cluster, err := clusterprovider.GetV1ClusterByName(context.Background(), deleter.platformClient, machine.Spec.ClusterName, clusterprovider.AdminUsername)
295-
if err != nil {
296-
return err
297-
}
298-
if cluster.Status.Phase == platformv1.ClusterTerminating {
299-
return nil
300-
}
301-
clientset, err := cluster.Clientset()
302-
if err != nil {
303-
return err
304-
}
305-
306-
node, err := apiclient.GetNodeByMachineIP(ctx, clientset, machine.Spec.IP)
307-
if err != nil {
308-
if !errors.IsNotFound(err) {
309-
return err
310-
}
311-
log.FromContext(ctx).Info("deleteNode done")
312-
return nil
313-
}
314-
315-
err = clientset.CoreV1().Nodes().Delete(context.Background(), node.Name, metav1.DeleteOptions{})
316-
if err != nil {
317-
if !errors.IsNotFound(err) {
318-
return err
319-
}
320-
}
321-
322-
log.FromContext(ctx).Info("deleteNode done")
323-
324-
return nil
325-
}

pkg/platform/controller/machine/machine_controller.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import (
3434
"k8s.io/client-go/tools/cache"
3535
"k8s.io/client-go/util/retry"
3636
"k8s.io/client-go/util/workqueue"
37+
3738
platformversionedclient "tkestack.io/tke/api/client/clientset/versioned/typed/platform/v1"
3839
platformv1informer "tkestack.io/tke/api/client/informers/externalversions/platform/v1"
3940
platformv1lister "tkestack.io/tke/api/client/listers/platform/v1"
@@ -123,6 +124,10 @@ func (c *Controller) needsUpdate(oldMachine *platformv1.Machine, newMachine *pla
123124
return true
124125
}
125126

127+
if oldMachine.Status.Phase != platformv1.MachineTerminating && newMachine.Status.Phase == platformv1.MachineTerminating {
128+
return true
129+
}
130+
126131
// Control the synchronization interval through the health detection interval
127132
// to avoid version conflicts caused by concurrent modification
128133
healthCondition := newMachine.GetCondition(conditionTypeHealthCheck)

pkg/platform/provider/baremetal/cluster/delete.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,14 @@ package cluster
2020

2121
import (
2222
"context"
23+
"time"
2324

2425
"k8s.io/apimachinery/pkg/api/errors"
2526
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
27+
"k8s.io/apimachinery/pkg/fields"
28+
"k8s.io/apimachinery/pkg/util/wait"
29+
30+
platformv1client "tkestack.io/tke/api/client/clientset/versioned/typed/platform/v1"
2631
"tkestack.io/tke/pkg/platform/provider/baremetal/phases/kubeadm"
2732
"tkestack.io/tke/pkg/platform/provider/util/mark"
2833
typesv1 "tkestack.io/tke/pkg/platform/types/v1"
@@ -75,3 +80,44 @@ func (p *Provider) EnsureRemoveNode(ctx context.Context, c *v1.Cluster) error {
7580
}
7681
return nil
7782
}
83+
84+
func (p *Provider) EnsureRemoveMachine(ctx context.Context, c *v1.Cluster) error {
85+
log.FromContext(ctx).Info("delete machine start")
86+
fieldSelector := fields.OneTermEqualSelector("spec.clusterName", c.Name).String()
87+
machineList, err := p.platformClient.Machines().List(ctx, metav1.ListOptions{FieldSelector: fieldSelector})
88+
if err != nil {
89+
return err
90+
}
91+
if len(machineList.Items) == 0 {
92+
return nil
93+
}
94+
for _, machine := range machineList.Items {
95+
if err := p.platformClient.Machines().Delete(ctx, machine.Name, metav1.DeleteOptions{}); err != nil {
96+
if errors.IsNotFound(err) {
97+
return nil
98+
}
99+
return err
100+
}
101+
102+
if err = wait.PollImmediate(5*time.Second, 5*time.Minute, waitForMachineDelete(ctx, p.platformClient, machine.Name)); err != nil {
103+
return err
104+
}
105+
}
106+
107+
log.FromContext(ctx).Info("delete machine done")
108+
109+
return nil
110+
}
111+
112+
func waitForMachineDelete(ctx context.Context, c platformv1client.PlatformV1Interface, machineName string) wait.ConditionFunc {
113+
return func() (done bool, err error) {
114+
115+
if _, err := c.Machines().Get(ctx, machineName, metav1.GetOptions{}); err != nil {
116+
if errors.IsNotFound(err) {
117+
return true, nil
118+
}
119+
}
120+
121+
return false, nil
122+
}
123+
}

pkg/platform/provider/baremetal/cluster/provider.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ func NewProvider() (*Provider, error) {
154154
p.EnsureRemoveNode,
155155
},
156156
DeleteHandlers: []clusterprovider.Handler{
157+
p.EnsureRemoveMachine,
157158
p.EnsureCleanClusterMark,
158159
},
159160
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
package machine
2+
3+
import (
4+
"context"
5+
"time"
6+
7+
"k8s.io/apimachinery/pkg/api/errors"
8+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
9+
"k8s.io/apimachinery/pkg/util/wait"
10+
"k8s.io/client-go/kubernetes"
11+
12+
platformv1 "tkestack.io/tke/api/platform/v1"
13+
typesv1 "tkestack.io/tke/pkg/platform/types/v1"
14+
"tkestack.io/tke/pkg/util/apiclient"
15+
"tkestack.io/tke/pkg/util/log"
16+
)
17+
18+
func (p *Provider) EnsureRemoveNode(ctx context.Context, machine *platformv1.Machine, cluster *typesv1.Cluster) error {
19+
log.FromContext(ctx).Info("deleteNode doing")
20+
21+
if cluster.Status.Phase == platformv1.ClusterTerminating {
22+
return nil
23+
}
24+
25+
clientset, err := cluster.Clientset()
26+
if err != nil {
27+
return err
28+
}
29+
30+
node, err := apiclient.GetNodeByMachineIP(ctx, clientset, machine.Spec.IP)
31+
if err != nil {
32+
return err
33+
}
34+
err = clientset.CoreV1().Nodes().Delete(ctx, node.Name, metav1.DeleteOptions{})
35+
if err != nil {
36+
if errors.IsNotFound(err) {
37+
return nil
38+
}
39+
return err
40+
}
41+
if err = wait.PollImmediate(5*time.Second, 5*time.Minute, waitForNodeDelete(ctx, clientset, node.Name)); err != nil {
42+
return err
43+
}
44+
45+
log.FromContext(ctx).Info("deleteNode done")
46+
return nil
47+
}
48+
49+
func waitForNodeDelete(ctx context.Context, c kubernetes.Interface, nodeName string) wait.ConditionFunc {
50+
return func() (done bool, err error) {
51+
if _, err := c.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}); err != nil {
52+
if errors.IsNotFound(err) {
53+
return true, nil
54+
}
55+
}
56+
57+
return false, nil
58+
}
59+
}

pkg/platform/provider/baremetal/machine/provider.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,9 @@ func NewProvider() (*Provider, error) {
9494
p.EnsureUpgrade,
9595
p.EnsurePostUpgradeHook,
9696
},
97+
DeleteHandlers: []machineprovider.Handler{
98+
p.EnsureRemoveNode,
99+
},
97100
}
98101

99102
cfg, err := config.New(constants.ConfigFile)

0 commit comments

Comments
 (0)