Skip to content

Commit fde2ec3

Browse files
committed
Add functions for attaching errors to spans
The "spancheck" linter reminds us to call "Span.RecordError" when returning an error. Two functions help with that: "tracing.Check" and "tracing.Escape".
1 parent 7d4d44e commit fde2ec3

File tree

9 files changed

+172
-42
lines changed

9 files changed

+172
-42
lines changed

.golangci.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,11 @@ linters-settings:
100100
no-unaliased: true
101101

102102
spancheck:
103+
checks: [end, record-error]
103104
extra-start-span-signatures:
104105
- 'github.com/crunchydata/postgres-operator/internal/tracing.Start:opentelemetry'
106+
ignore-check-signatures:
107+
- 'tracing.Escape'
105108

106109
issues:
107110
exclude-generated: strict

internal/bridge/crunchybridgecluster/crunchybridgecluster_controller.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ func (r *CrunchyBridgeClusterReconciler) Reconcile(ctx context.Context, req ctrl
133133
// NotFound cannot be fixed by requeuing so ignore it. During background
134134
// deletion, we receive delete events from crunchybridgecluster's dependents after
135135
// crunchybridgecluster is deleted.
136-
return ctrl.Result{}, client.IgnoreNotFound(err)
136+
return ctrl.Result{}, tracing.Escape(span, client.IgnoreNotFound(err))
137137
}
138138

139139
// Get and validate connection secret for requests
@@ -152,12 +152,12 @@ func (r *CrunchyBridgeClusterReconciler) Reconcile(ctx context.Context, req ctrl
152152
// is not being deleted.
153153
if result, err := r.handleDelete(ctx, crunchybridgecluster, key); err != nil {
154154
log.Error(err, "deleting")
155-
return ctrl.Result{}, err
155+
return ctrl.Result{}, tracing.Escape(span, err)
156156
} else if result != nil {
157157
if log := log.V(1); log.Enabled() {
158158
log.Info("deleting", "result", fmt.Sprintf("%+v", *result))
159159
}
160-
return *result, err
160+
return *result, tracing.Escape(span, err)
161161
}
162162

163163
// Wonder if there's a better way to handle adding/checking/removing statuses
@@ -190,7 +190,7 @@ func (r *CrunchyBridgeClusterReconciler) Reconcile(ctx context.Context, req ctrl
190190
// Check if a cluster with the same name already exists
191191
controllerResult, err := r.handleDuplicateClusterName(ctx, key, team, crunchybridgecluster)
192192
if err != nil || controllerResult != nil {
193-
return *controllerResult, err
193+
return *controllerResult, tracing.Escape(span, err)
194194
}
195195

196196
// if we've gotten here then no cluster exists with that name and we're missing the ID, ergo, create cluster
@@ -204,26 +204,26 @@ func (r *CrunchyBridgeClusterReconciler) Reconcile(ctx context.Context, req ctrl
204204
// Get Cluster
205205
err = r.handleGetCluster(ctx, key, crunchybridgecluster)
206206
if err != nil {
207-
return ctrl.Result{}, err
207+
return ctrl.Result{}, tracing.Escape(span, err)
208208
}
209209

210210
// Get Cluster Status
211211
err = r.handleGetClusterStatus(ctx, key, crunchybridgecluster)
212212
if err != nil {
213-
return ctrl.Result{}, err
213+
return ctrl.Result{}, tracing.Escape(span, err)
214214
}
215215

216216
// Get Cluster Upgrade
217217
err = r.handleGetClusterUpgrade(ctx, key, crunchybridgecluster)
218218
if err != nil {
219-
return ctrl.Result{}, err
219+
return ctrl.Result{}, tracing.Escape(span, err)
220220
}
221221

222222
// Reconcile roles and their secrets
223223
err = r.reconcilePostgresRoles(ctx, key, crunchybridgecluster)
224224
if err != nil {
225225
log.Error(err, "issue reconciling postgres user roles/secrets")
226-
return ctrl.Result{}, err
226+
return ctrl.Result{}, tracing.Escape(span, err)
227227
}
228228

229229
// For now, we skip updating until the upgrade status is cleared.

internal/controller/pgupgrade/pgupgrade_controller.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ func (r *PGUpgradeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
9898
ctx, span := tracing.Start(ctx, "reconcile-pgupgrade")
9999
log := logging.FromContext(ctx)
100100
defer span.End()
101+
defer func(s tracing.Span) { _ = tracing.Escape(s, err) }(span)
101102

102103
// Retrieve the upgrade from the client cache, if it exists. A deferred
103104
// function below will send any changes to its Status field.

internal/controller/postgrescluster/controller.go

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,8 @@ func (r *Reconciler) Reconcile(
8080
// cluster is deleted.
8181
if err = client.IgnoreNotFound(err); err != nil {
8282
log.Error(err, "unable to fetch PostgresCluster")
83-
span.RecordError(err)
8483
}
85-
return runtime.ErrorWithBackoff(err)
84+
return runtime.ErrorWithBackoff(tracing.Escape(span, err))
8685
}
8786

8887
// Set any defaults that may not have been stored in the API. No DeepCopy
@@ -107,9 +106,8 @@ func (r *Reconciler) Reconcile(
107106
// Check for and handle deletion of cluster. Return early if it is being
108107
// deleted or there was an error.
109108
if result, err := r.handleDelete(ctx, cluster); err != nil {
110-
span.RecordError(err)
111109
log.Error(err, "deleting")
112-
return runtime.ErrorWithBackoff(err)
110+
return runtime.ErrorWithBackoff(tracing.Escape(span, err))
113111

114112
} else if result != nil {
115113
if log := log.V(1); log.Enabled() {
@@ -130,7 +128,7 @@ func (r *Reconciler) Reconcile(
130128
// specifically allow reconciliation if the cluster is shutdown to
131129
// facilitate upgrades, otherwise return
132130
if !initialize.FromPointer(cluster.Spec.Shutdown) {
133-
return runtime.ErrorWithBackoff(err)
131+
return runtime.ErrorWithBackoff(tracing.Escape(span, err))
134132
}
135133
}
136134
// Issue Warning Event if postgres version is EOL according to PostgreSQL:
@@ -154,7 +152,7 @@ func (r *Reconciler) Reconcile(
154152
path := field.NewPath("spec", "standby")
155153
err := field.Invalid(path, cluster.Name, "Standby requires a host or repoName to be enabled")
156154
r.Recorder.Event(cluster, corev1.EventTypeWarning, "InvalidStandbyConfiguration", err.Error())
157-
return runtime.ErrorWithBackoff(err)
155+
return runtime.ErrorWithBackoff(tracing.Escape(span, err))
158156
}
159157

160158
var (
@@ -208,7 +206,7 @@ func (r *Reconciler) Reconcile(
208206

209207
ObservedGeneration: cluster.GetGeneration(),
210208
})
211-
return runtime.ErrorWithBackoff(patchClusterStatus())
209+
return runtime.ErrorWithBackoff(tracing.Escape(span, patchClusterStatus()))
212210
} else {
213211
meta.RemoveStatusCondition(&cluster.Status.Conditions, v1beta1.PostgresClusterProgressing)
214212
}
@@ -228,7 +226,7 @@ func (r *Reconciler) Reconcile(
228226

229227
ObservedGeneration: cluster.GetGeneration(),
230228
})
231-
return runtime.ErrorWithBackoff(patchClusterStatus())
229+
return runtime.ErrorWithBackoff(tracing.Escape(span, patchClusterStatus()))
232230
} else {
233231
meta.RemoveStatusCondition(&cluster.Status.Conditions, v1beta1.PostgresClusterProgressing)
234232
}
@@ -259,7 +257,8 @@ func (r *Reconciler) Reconcile(
259257
// return is no longer needed, and reconciliation can proceed normally.
260258
returnEarly, err := r.reconcileDirMoveJobs(ctx, cluster)
261259
if err != nil || returnEarly {
262-
return runtime.ErrorWithBackoff(errors.Join(err, patchClusterStatus()))
260+
return runtime.ErrorWithBackoff(tracing.Escape(span,
261+
errors.Join(err, patchClusterStatus())))
263262
}
264263
}
265264
if err == nil {
@@ -309,7 +308,7 @@ func (r *Reconciler) Reconcile(
309308
// can proceed normally.
310309
returnEarly, err := r.reconcileDataSource(ctx, cluster, instances, clusterVolumes, rootCA, backupsSpecFound)
311310
if err != nil || returnEarly {
312-
return runtime.ErrorWithBackoff(errors.Join(err, patchClusterStatus()))
311+
return runtime.ErrorWithBackoff(tracing.Escape(span, errors.Join(err, patchClusterStatus())))
313312
}
314313
}
315314
if err == nil {
@@ -401,7 +400,7 @@ func (r *Reconciler) Reconcile(
401400

402401
log.V(1).Info("reconciled cluster")
403402

404-
return result, errors.Join(err, patchClusterStatus())
403+
return result, tracing.Escape(span, errors.Join(err, patchClusterStatus()))
405404
}
406405

407406
// deleteControlled safely deletes object when it is controlled by cluster.

internal/controller/postgrescluster/instance.go

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -807,8 +807,7 @@ func (r *Reconciler) rolloutInstance(
807807
err = errors.New("unable to switchover")
808808
}
809809

810-
span.RecordError(err)
811-
return err
810+
return tracing.Escape(span, err)
812811
}
813812

814813
// When the cluster has only one instance for failover, perform a series of
@@ -840,8 +839,7 @@ func (r *Reconciler) rolloutInstance(
840839
logging.FromContext(ctx).V(1).Info("attempted checkpoint",
841840
"duration", elapsed, "stdout", stdout, "stderr", stderr)
842841

843-
span.RecordError(err)
844-
return elapsed, err
842+
return elapsed, tracing.Escape(span, err)
845843
}
846844

847845
duration, err := checkpoint(ctx)
@@ -950,8 +948,7 @@ func (r *Reconciler) rolloutInstances(
950948
}
951949
}
952950

953-
span.RecordError(err)
954-
return err
951+
return tracing.Escape(span, err)
955952
}
956953

957954
// scaleDownInstances removes extra instances from a cluster until it matches
@@ -1081,20 +1078,23 @@ func (r *Reconciler) scaleUpInstances(
10811078
// While there are fewer instances than specified, generate another empty one
10821079
// and append it.
10831080
for len(instances) < int(*set.Replicas) {
1084-
_, span := tracing.Start(ctx, "generate-instance-name")
1085-
next := naming.GenerateInstance(cluster, set)
1086-
// if there are any available instance names (as determined by observing any PVCs for the
1087-
// instance set that are not currently associated with an instance, e.g. in the event the
1088-
// instance STS was deleted), then reuse them instead of generating a new name
1089-
if len(availableInstanceNames) > 0 {
1090-
next.Name = availableInstanceNames[0]
1091-
availableInstanceNames = availableInstanceNames[1:]
1092-
} else {
1093-
for instanceNames.Has(next.Name) {
1094-
next = naming.GenerateInstance(cluster, set)
1081+
next := func() metav1.ObjectMeta {
1082+
_, span := tracing.Start(ctx, "generate-instance-name")
1083+
defer span.End()
1084+
n := naming.GenerateInstance(cluster, set)
1085+
// if there are any available instance names (as determined by observing any PVCs for the
1086+
// instance set that are not currently associated with an instance, e.g. in the event the
1087+
// instance STS was deleted), then reuse them instead of generating a new name
1088+
if len(availableInstanceNames) > 0 {
1089+
n.Name = availableInstanceNames[0]
1090+
availableInstanceNames = availableInstanceNames[1:]
1091+
} else {
1092+
for instanceNames.Has(n.Name) {
1093+
n = naming.GenerateInstance(cluster, set)
1094+
}
10951095
}
1096-
}
1097-
span.End()
1096+
return n
1097+
}()
10981098

10991099
instanceNames.Insert(next.Name)
11001100
instances = append(instances, &appsv1.StatefulSet{ObjectMeta: next})

internal/controller/standalone_pgadmin/controller.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ func (r *PGAdminReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
9393
// NotFound cannot be fixed by requeuing so ignore it. During background
9494
// deletion, we receive delete events from pgadmin's dependents after
9595
// pgadmin is deleted.
96-
return ctrl.Result{}, client.IgnoreNotFound(err)
96+
return ctrl.Result{}, tracing.Escape(span, client.IgnoreNotFound(err))
9797
}
9898

9999
// Write any changes to the pgadmin status on the way out.
@@ -148,7 +148,7 @@ func (r *PGAdminReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
148148
log.V(1).Info("Reconciled pgAdmin")
149149
}
150150

151-
return ctrl.Result{}, err
151+
return ctrl.Result{}, tracing.Escape(span, err)
152152
}
153153

154154
// The owner reference created by controllerutil.SetControllerReference blocks

internal/naming/dns.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,10 @@ func KubernetesClusterDomain(ctx context.Context) string {
7979
api := "kubernetes.default.svc"
8080
cname, err := net.DefaultResolver.LookupCNAME(ctx, api)
8181

82-
if err == nil {
82+
if tracing.Check(span, err) {
8383
return strings.TrimPrefix(cname, api+".")
8484
}
8585

86-
span.RecordError(err)
8786
// The kubeadm default is "cluster.local" and is adequate when not running
8887
// in an actual Kubernetes cluster.
8988
return "cluster.local."

internal/tracing/errors.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// Copyright 2021 - 2024 Crunchy Data Solutions, Inc.
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
package tracing
6+
7+
import (
8+
semconv "go.opentelemetry.io/otel/semconv/v1.27.0"
9+
"go.opentelemetry.io/otel/trace"
10+
)
11+
12+
// Check returns true when err is nil. Otherwise, it adds err as an exception
13+
// event on s and returns false. If you intend to return err, consider using
14+
// [Escape] instead.
15+
//
16+
// See: https://opentelemetry.io/docs/specs/semconv/exceptions/exceptions-spans
17+
func Check(s Span, err error) bool {
18+
if err == nil {
19+
return true
20+
}
21+
if s.IsRecording() {
22+
s.RecordError(err)
23+
}
24+
return false
25+
}
26+
27+
// Escape adds non-nil err as an escaped exception event on s and returns err.
28+
// See: https://opentelemetry.io/docs/specs/semconv/exceptions/exceptions-spans
29+
func Escape(s Span, err error) error {
30+
if err != nil && s.IsRecording() {
31+
s.RecordError(err, trace.WithAttributes(semconv.ExceptionEscaped(true)))
32+
}
33+
return err
34+
}

internal/tracing/errors_test.go

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
// Copyright 2021 - 2024 Crunchy Data Solutions, Inc.
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
package tracing
6+
7+
import (
8+
"context"
9+
"errors"
10+
"testing"
11+
12+
"go.opentelemetry.io/otel/sdk/trace"
13+
"go.opentelemetry.io/otel/sdk/trace/tracetest"
14+
semconv "go.opentelemetry.io/otel/semconv/v1.27.0"
15+
"gotest.tools/v3/assert"
16+
)
17+
18+
func TestCheck(t *testing.T) {
19+
recorder := tracetest.NewSpanRecorder()
20+
tracer := trace.NewTracerProvider(
21+
trace.WithSpanProcessor(recorder),
22+
).Tracer("")
23+
24+
{
25+
_, span := tracer.Start(context.Background(), "")
26+
assert.Assert(t, Check(span, nil))
27+
span.End()
28+
29+
spans := recorder.Ended()
30+
assert.Equal(t, len(spans), 1)
31+
assert.Equal(t, len(spans[0].Events()), 0, "expected no events")
32+
}
33+
34+
{
35+
_, span := tracer.Start(context.Background(), "")
36+
assert.Assert(t, !Check(span, errors.New("msg")))
37+
span.End()
38+
39+
spans := recorder.Ended()
40+
assert.Equal(t, len(spans), 2)
41+
assert.Equal(t, len(spans[1].Events()), 1, "expected one event")
42+
43+
event := spans[1].Events()[0]
44+
assert.Equal(t, event.Name, semconv.ExceptionEventName)
45+
46+
attrs := event.Attributes
47+
assert.Equal(t, len(attrs), 2)
48+
assert.Equal(t, string(attrs[0].Key), "exception.type")
49+
assert.Equal(t, string(attrs[1].Key), "exception.message")
50+
assert.Equal(t, attrs[0].Value.AsInterface(), "*errors.errorString")
51+
assert.Equal(t, attrs[1].Value.AsInterface(), "msg")
52+
}
53+
}
54+
55+
func TestEscape(t *testing.T) {
56+
recorder := tracetest.NewSpanRecorder()
57+
tracer := trace.NewTracerProvider(
58+
trace.WithSpanProcessor(recorder),
59+
).Tracer("")
60+
61+
{
62+
_, span := tracer.Start(context.Background(), "")
63+
assert.NilError(t, Escape(span, nil))
64+
span.End()
65+
66+
spans := recorder.Ended()
67+
assert.Equal(t, len(spans), 1)
68+
assert.Equal(t, len(spans[0].Events()), 0, "expected no events")
69+
}
70+
71+
{
72+
_, span := tracer.Start(context.Background(), "")
73+
expected := errors.New("somesuch")
74+
assert.Assert(t, errors.Is(Escape(span, expected), expected),
75+
"expected to unwrap the original error")
76+
span.End()
77+
78+
spans := recorder.Ended()
79+
assert.Equal(t, len(spans), 2)
80+
assert.Equal(t, len(spans[1].Events()), 1, "expected one event")
81+
82+
event := spans[1].Events()[0]
83+
assert.Equal(t, event.Name, semconv.ExceptionEventName)
84+
85+
attrs := event.Attributes
86+
assert.Equal(t, len(attrs), 3)
87+
assert.Equal(t, string(attrs[0].Key), "exception.escaped")
88+
assert.Equal(t, string(attrs[1].Key), "exception.type")
89+
assert.Equal(t, string(attrs[2].Key), "exception.message")
90+
assert.Equal(t, attrs[0].Value.AsInterface(), true)
91+
assert.Equal(t, attrs[1].Value.AsInterface(), "*errors.errorString")
92+
assert.Equal(t, attrs[2].Value.AsInterface(), "somesuch")
93+
}
94+
}

0 commit comments

Comments
 (0)