Skip to content

Commit 6932142

Browse files
Improve Machine remediation logs
1 parent ce2d67d commit 6932142

File tree

2 files changed

+11
-18
lines changed

2 files changed

+11
-18
lines changed

internal/controllers/machinehealthcheck/machinehealthcheck_controller.go

+7-5
Original file line numberDiff line numberDiff line change
@@ -377,10 +377,11 @@ func (r *Reconciler) patchUnhealthyTargets(ctx context.Context, logger logr.Logg
377377
// mark for remediation
378378
errList := []error{}
379379
for _, t := range unhealthy {
380+
logger := logger.WithValues("Machine", klog.KObj(t.Machine), "Node", klog.KObj(t.Node))
380381
condition := conditions.Get(t.Machine, clusterv1.MachineHealthCheckSucceededCondition)
381382

382383
if annotations.IsPaused(cluster, t.Machine) {
383-
logger.Info("Machine has failed health check, but machine is paused so skipping remediation", "target", t.string(), "reason", condition.Reason, "message", condition.Message)
384+
logger.Info("Machine has failed health check, but machine is paused so skipping remediation", "reason", condition.Reason, "message", condition.Message)
384385
} else {
385386
if m.Spec.RemediationTemplate != nil {
386387
// If external remediation request already exists,
@@ -424,15 +425,15 @@ func (r *Reconciler) patchUnhealthyTargets(ctx context.Context, logger logr.Logg
424425
// the same Machine, users are in charge of setting health checks and remediation properly.
425426
to.SetName(t.Machine.Name)
426427

427-
logger.Info("Target has failed health check, creating an external remediation request", "remediation request name", to.GetName(), "target", t.string(), "reason", condition.Reason, "message", condition.Message)
428+
logger.Info("Machine has failed health check, creating an external remediation request", "remediation request name", to.GetName(), "reason", condition.Reason, "message", condition.Message)
428429
// Create the external clone.
429430
if err := r.Client.Create(ctx, to); err != nil {
430431
conditions.MarkFalse(m, clusterv1.ExternalRemediationRequestAvailableCondition, clusterv1.ExternalRemediationRequestCreationFailedReason, clusterv1.ConditionSeverityError, err.Error())
431432
errList = append(errList, errors.Wrapf(err, "error creating remediation request for machine %q in namespace %q within cluster %q", t.Machine.Name, t.Machine.Namespace, t.Machine.Spec.ClusterName))
432433
return errList
433434
}
434435
} else {
435-
logger.Info("Target has failed health check, marking for remediation", "target", t.string(), "reason", condition.Reason, "message", condition.Message)
436+
logger.Info("Machine has failed health check, marking for remediation", "reason", condition.Reason, "message", condition.Message)
436437
// NOTE: MHC is responsible for creating MachineOwnerRemediatedCondition if missing or to trigger another remediation if the previous one is completed;
437438
// instead, if a remediation is in already progress, the remediation owner is responsible for completing the process and MHC should not overwrite the condition.
438439
if !conditions.Has(t.Machine, clusterv1.MachineOwnerRemediatedCondition) || conditions.IsTrue(t.Machine, clusterv1.MachineOwnerRemediatedCondition) {
@@ -449,8 +450,9 @@ func (r *Reconciler) patchUnhealthyTargets(ctx context.Context, logger logr.Logg
449450
t.Machine,
450451
corev1.EventTypeNormal,
451452
EventMachineMarkedUnhealthy,
452-
"Machine %v has been marked as unhealthy",
453-
t.string(),
453+
"Machine %s has been marked as unhealthy by %s",
454+
klog.KObj(t.Machine),
455+
klog.KObj(t.MHC),
454456
)
455457
}
456458
return errList

internal/controllers/machinehealthcheck/machinehealthcheck_targets.go

+4-13
Original file line numberDiff line numberDiff line change
@@ -63,15 +63,6 @@ type healthCheckTarget struct {
6363
nodeMissing bool
6464
}
6565

66-
func (t *healthCheckTarget) string() string {
67-
return fmt.Sprintf("%s/%s/%s/%s",
68-
t.MHC.GetNamespace(),
69-
t.MHC.GetName(),
70-
t.Machine.GetName(),
71-
t.nodeName(),
72-
)
73-
}
74-
7566
// Get the node name if the target has a node.
7667
func (t *healthCheckTarget) nodeName() string {
7768
if t.Node != nil {
@@ -302,7 +293,7 @@ func (r *Reconciler) healthCheckTargets(targets []healthCheckTarget, logger logr
302293
var healthy []healthCheckTarget
303294

304295
for _, t := range targets {
305-
logger := logger.WithValues("target", t.string())
296+
logger := logger.WithValues("Machine", klog.KObj(t.Machine), "Node", klog.KObj(t.Node))
306297
logger.V(3).Info("Health checking target")
307298
needsRemediation, nextCheck := t.needsRemediation(logger, timeoutForMachineToHaveNode)
308299

@@ -312,13 +303,13 @@ func (r *Reconciler) healthCheckTargets(targets []healthCheckTarget, logger logr
312303
}
313304

314305
if nextCheck > 0 {
315-
logger.V(3).Info("Target is likely to go unhealthy", "timeUntilUnhealthy", nextCheck.Truncate(time.Second).String())
306+
logger.V(3).Info("Machine is likely to go unhealthy", "timeUntilUnhealthy", nextCheck.Truncate(time.Second).String())
316307
r.recorder.Eventf(
317308
t.Machine,
318309
corev1.EventTypeNormal,
319310
EventDetectedUnhealthy,
320-
"Machine %v has unhealthy node %v",
321-
t.string(),
311+
"Machine %s has unhealthy node %s",
312+
t.Machine.Name,
322313
t.nodeName(),
323314
)
324315
nextCheckTimes = append(nextCheckTimes, nextCheck)

0 commit comments

Comments
 (0)