Skip to content
This repository was archived by the owner on Aug 12, 2025. It is now read-only.

Commit 1b2c144

Browse files
committed
Make PacketMachine controller re-entrant when crashes happen during creation process
1 parent 34f9668 commit 1b2c144

File tree

2 files changed

+53
-25
lines changed

2 files changed

+53
-25
lines changed

controllers/packetmachine_controller.go

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,16 @@ import (
2323
"strings"
2424
"time"
2525

26-
corev1 "k8s.io/api/core/v1"
27-
2826
"github.com/go-logr/logr"
29-
"github.com/google/uuid"
3027
"github.com/packethost/packngo"
3128
"github.com/pkg/errors"
32-
29+
corev1 "k8s.io/api/core/v1"
3330
apierrors "k8s.io/apimachinery/pkg/api/errors"
3431
"k8s.io/apimachinery/pkg/runtime"
3532
"k8s.io/client-go/tools/record"
33+
infrastructurev1alpha3 "sigs.k8s.io/cluster-api-provider-packet/api/v1alpha3"
34+
packet "sigs.k8s.io/cluster-api-provider-packet/pkg/cloud/packet"
35+
"sigs.k8s.io/cluster-api-provider-packet/pkg/cloud/packet/scope"
3636
clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3"
3737
capierrors "sigs.k8s.io/cluster-api/errors"
3838
"sigs.k8s.io/cluster-api/util"
@@ -42,11 +42,6 @@ import (
4242
"sigs.k8s.io/controller-runtime/pkg/handler"
4343
"sigs.k8s.io/controller-runtime/pkg/reconcile"
4444
"sigs.k8s.io/controller-runtime/pkg/source"
45-
46-
packet "sigs.k8s.io/cluster-api-provider-packet/pkg/cloud/packet"
47-
"sigs.k8s.io/cluster-api-provider-packet/pkg/cloud/packet/scope"
48-
49-
infrastructurev1alpha3 "sigs.k8s.io/cluster-api-provider-packet/api/v1alpha3"
5045
)
5146

5247
const (
@@ -173,6 +168,7 @@ func (r *PacketMachineReconciler) SetupWithManager(mgr ctrl.Manager) error {
173168

174169
func (r *PacketMachineReconciler) reconcile(ctx context.Context, machineScope *scope.MachineScope, clusterScope *scope.ClusterScope, logger logr.Logger) (ctrl.Result, error) {
175170
logger.Info("Reconciling PacketMachine")
171+
176172
packetmachine := machineScope.PacketMachine
177173
// If the PacketMachine is in an error state, return early.
178174
if packetmachine.Status.ErrorReason != nil || packetmachine.Status.ErrorMessage != nil {
@@ -195,28 +191,50 @@ func (r *PacketMachineReconciler) reconcile(ctx context.Context, machineScope *s
195191
}
196192

197193
providerID := machineScope.GetInstanceID()
194+
195+
defaultTags := []string{
196+
packet.GenerateClusterTag(clusterScope.Name()),
197+
packet.GenerateMachineNameTag(machineScope.Name()),
198+
packet.GenerateNamespaceTag(machineScope.Namespace()),
199+
}
200+
198201
var (
199202
dev *packngo.Device
200203
addrs []corev1.NodeAddress
201204
err error
202205
controlPlaneEndpoint packngo.IPAddressReservation
203206
)
204-
// if we have no provider ID, then we are creating
207+
205208
if providerID != "" {
209+
// If we already have a providerID, then retrieve the device using the
210+
// providerID. This means that the Machine has already been created
211+
// and bootstrapped as a Node into the cluster.
206212
dev, err = r.PacketClient.GetDevice(providerID)
207213
if err != nil {
208214
return ctrl.Result{}, err
209215
}
210216
}
217+
211218
if dev == nil {
219+
// We don't yet have a providerID, check to see if we've already
220+
// created a device by using the tags that we assign to devices
221+
// on creation.
222+
dev, err = r.PacketClient.GetDeviceByTags(
223+
machineScope.PacketCluster.Spec.ProjectID,
224+
defaultTags,
225+
)
226+
if err != nil {
227+
return ctrl.Result{}, err
228+
}
229+
}
230+
231+
if dev == nil {
232+
// We weren't able to find a device by either providerID or by tags,
233+
// so we need to create a new device.
212234
createDeviceReq := packet.CreateDeviceRequest{
235+
ExtraTags: defaultTags,
213236
MachineScope: machineScope,
214237
}
215-
mUID := uuid.New().String()
216-
tags := []string{
217-
packet.GenerateMachineTag(mUID),
218-
packet.GenerateClusterTag(clusterScope.Name()),
219-
}
220238

221239
// when the node is a control plan we should check if the elastic ip
222240
// for this cluster is not assigned. If it is free we can prepare the
@@ -236,14 +254,14 @@ func (r *PacketMachineReconciler) reconcile(ctx context.Context, machineScope *s
236254
createDeviceReq.ControlPlaneEndpoint = controlPlaneEndpoint.Address
237255
}
238256

239-
createDeviceReq.ExtraTags = tags
240-
241257
dev, err = r.PacketClient.NewDevice(createDeviceReq)
242258

243259
switch {
244260
// TODO: find a better way than parsing the error messages for this.
245261
case err != nil && strings.Contains(err.Error(), " no available hardware reservations "):
246-
// Do not treat an error indicating there are no hardware reservations available as fatal
262+
// Do not treat an error indicating there are no hardware
263+
// reservations available as fatal, we should continue to retry
264+
// device creation until a reservation is available
247265
return ctrl.Result{}, fmt.Errorf("failed to create machine %s: %w", machineScope.Name(), err)
248266
case err != nil:
249267
errs := fmt.Errorf("failed to create machine %s: %w", machineScope.Name(), err)

pkg/cloud/packet/util.go

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,26 @@ import (
2121
)
2222

2323
const (
24-
MachineUIDTag = "cluster-api-provider-packet:machine-uid"
25-
clusterIDTag = "cluster-api-provider-packet:cluster-id"
26-
AnnotationUID = "cluster.k8s.io/machine-uid"
24+
MachineUIDTag = "cluster-api-provider-packet:machine-uid"
25+
machineNameTag = "cluster-api-provider-packet:machine-name"
26+
clusterIDTag = "cluster-api-provider-packet:cluster-id"
27+
namespaceTag = "cluster-api-provider-packet:namespace"
2728
)
2829

29-
func GenerateMachineTag(ID string) string {
30-
return fmt.Sprintf("%s:%s", MachineUIDTag, ID)
30+
func GenerateMachineTag(id string) string {
31+
return fmt.Sprintf("%s:%s", MachineUIDTag, id)
3132
}
32-
func GenerateClusterTag(ID string) string {
33-
return fmt.Sprintf("%s:%s", clusterIDTag, ID)
33+
34+
func GenerateMachineNameTag(name string) string {
35+
return fmt.Sprintf("%s:%s", machineNameTag, name)
36+
}
37+
38+
func GenerateClusterTag(clusterName string) string {
39+
return fmt.Sprintf("%s:%s", clusterIDTag, clusterName)
40+
}
41+
42+
func GenerateNamespaceTag(namespace string) string {
43+
return fmt.Sprintf("%s:%s", namespaceTag, namespace)
3444
}
3545

3646
// ItemsInList checks if all items are in the list

0 commit comments

Comments
 (0)