Skip to content

Commit

Permalink
fix: child span not acquiring transaction lock in some cases (#1487)
Browse files Browse the repository at this point in the history
* fix: acquire lock before adding dropped spans

Spans from the same transaction share the dropped spans map.
If they end concurrency a race condition could happen when updating
the map.
Add a RW lock to prevent that.

* Revert "fix: acquire lock before adding dropped spans"

This reverts commit 7fe3968.

* fix: child span not acquiring transaction lock in some cases

The previous fix didn't cover all cases. The issue is deeper and affects more than the
transaction data. We need to always acquire the tx lock before ending the span.

* test: add test case for dropped spans race condition

* fix: also acquire transactiondata lock

Co-authored-by: Marc Lopez Rubio <marc5.12@outlook.com>

* test: end parent and transaction

Co-authored-by: Marc Lopez Rubio <marc5.12@outlook.com>

* test: close tracer once test ends

Co-authored-by: Marc Lopez Rubio <marc5.12@outlook.com>

---------

Co-authored-by: Marc Lopez Rubio <marc5.12@outlook.com>
  • Loading branch information
kruskall and marclop committed Jul 26, 2023
1 parent ded2651 commit 1c14a96
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 0 deletions.
8 changes: 8 additions & 0 deletions span.go
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,14 @@ func (s *Span) End() {

if s.Type != parentType || s.Subtype != parentSubtype {
s.dropWhen(true)
if s.tx != nil {
s.tx.mu.Lock()
defer s.tx.mu.Unlock()
if !s.tx.ended() {
s.tx.TransactionData.mu.Lock()
defer s.tx.TransactionData.mu.Unlock()
}
}
s.end()
return
}
Expand Down
36 changes: 36 additions & 0 deletions span_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1044,6 +1044,42 @@ func TestCompressSpanSameKindConcurrent(t *testing.T) {
assert.Equal(t, 101, spanCount)
}

func TestDroppedSpanParentConcurrent(t *testing.T) {
// This test verifies there aren't any deadlocks on calling
// span.End(), Parent.End() and tx.End().
tracer := apmtest.NewRecordingTracer()
defer tracer.Close()
tracer.SetExitSpanMinDuration(0)

tx := tracer.StartTransaction("name", "type")
ctx := apm.ContextWithTransaction(context.Background(), tx)

parent, _ := apm.StartSpanOptions(ctx, "parent", "request", apm.SpanOptions{
ExitSpan: true,
})
ctx = apm.ContextWithSpan(ctx, parent)

var wg sync.WaitGroup
count := 100
wg.Add(count)
for i := 0; i < count; i++ {
go func(i int) {
child, _ := apm.StartSpanOptions(ctx, fmt.Sprint(i), fmt.Sprintf("request%d", i), apm.SpanOptions{
ExitSpan: true,
})
child.Context.SetDestinationService(apm.DestinationServiceSpanContext{Resource: fmt.Sprintf("foo%d", i)})
child.End()
wg.Done()
}(i)
}

// Wait until all the spans have ended.
wg.Wait()
parent.End()
tx.End()
tracer.Flush(nil)
}

func TestCompressSpanPrematureEnd(t *testing.T) {
// This test cases assert that the cached spans are sent when the span or
// tx that holds their cache is ended and the cache isn't lost.
Expand Down

0 comments on commit 1c14a96

Please sign in to comment.