diff --git a/CHANGELOG.md b/CHANGELOG.md index 717a990bc7a..d33d430d270 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - Improve `go.opentelemetry.io/otel/trace.TraceState`'s performance. (#4722) - Improve `go.opentelemetry.io/otel/propagation.TraceContext`'s performance. (#4721) - Improve `go.opentelemetry.io/otel/baggage` performance. (#4743) +- Improve performance of the `(*Set).Filter` method in `go.opentelemetry.io/otel/attribute` when the passed filter does not filter out any attributes from the set. (#4774) - `Member.String` in `go.opentelemetry.io/otel/baggage` percent-encodes only when necessary. (#4775) ### Fixed diff --git a/attribute/set.go b/attribute/set.go index 9f9303d4f15..7e6765b06b7 100644 --- a/attribute/set.go +++ b/attribute/set.go @@ -279,52 +279,75 @@ func NewSetWithSortableFiltered(kvs []KeyValue, tmp *Sortable, filter Filter) (S position-- kvs[offset], kvs[position] = kvs[position], kvs[offset] } + kvs = kvs[position:] + if filter != nil { - return filterSet(kvs[position:], filter) + if div := filteredToFront(kvs, filter); div != 0 { + return Set{equivalent: computeDistinct(kvs[div:])}, kvs[:div] + } } - return Set{ - equivalent: computeDistinct(kvs[position:]), - }, nil + return Set{equivalent: computeDistinct(kvs)}, nil } -// filterSet reorders kvs so that included keys are contiguous at the end of -// the slice, while excluded keys precede the included keys. -func filterSet(kvs []KeyValue, filter Filter) (Set, []KeyValue) { - var excluded []KeyValue - - // Move attributes that do not match the filter so they're adjacent before - // calling computeDistinct(). - distinctPosition := len(kvs) - - // Swap indistinct keys forward and distinct keys toward the - // end of the slice. - offset := len(kvs) - 1 - for ; offset >= 0; offset-- { - if filter(kvs[offset]) { - distinctPosition-- - kvs[offset], kvs[distinctPosition] = kvs[distinctPosition], kvs[offset] - continue +// filteredToFront filters slice in-place using keep function. All KeyValues that need to +// be removed are moved to the front. All KeyValues that need to be kept are +// moved (in-order) to the back. The index for the first KeyValue to be kept is +// returned. +func filteredToFront(slice []KeyValue, keep Filter) int { + n := len(slice) + j := n + for i := n - 1; i >= 0; i-- { + if keep(slice[i]) { + j-- + slice[i], slice[j] = slice[j], slice[i] } } - excluded = kvs[:distinctPosition] - - return Set{ - equivalent: computeDistinct(kvs[distinctPosition:]), - }, excluded + return j } // Filter returns a filtered copy of this Set. See the documentation for // NewSetWithSortableFiltered for more details. func (l *Set) Filter(re Filter) (Set, []KeyValue) { if re == nil { - return Set{ - equivalent: l.equivalent, - }, nil + return *l, nil } - // Note: This could be refactored to avoid the temporary slice - // allocation, if it proves to be expensive. - return filterSet(l.ToSlice(), re) + // Iterate in reverse to the first attribute that will be filtered out. + n := l.Len() + first := n - 1 + for ; first >= 0; first-- { + kv, _ := l.Get(first) + if !re(kv) { + break + } + } + + // No attributes will be dropped, return the immutable Set l and nil. + if first < 0 { + return *l, nil + } + + // Copy now that we know we need to return a modified set. + // + // Do not do this in-place on the underlying storage of *Set l. Sets are + // immutable and filtering should not change this. + slice := l.ToSlice() + + // Don't re-iterate the slice if only slice[0] is filtered. + if first == 0 { + // It is safe to assume len(slice) >= 1 given we found at least one + // attribute above that needs to be filtered out. + return Set{equivalent: computeDistinct(slice[1:])}, slice[:1] + } + + // Move the filtered slice[first] to the front (preserving order). + kv := slice[first] + copy(slice[1:first+1], slice[:first]) + slice[0] = kv + + // Do not re-evaluate re(slice[first+1:]). + div := filteredToFront(slice[1:first+1], re) + 1 + return Set{equivalent: computeDistinct(slice[div:])}, slice[:div] } // computeDistinct returns a Distinct using either the fixed- or diff --git a/attribute/set_test.go b/attribute/set_test.go index 4fb47752e5f..4382e9a7c54 100644 --- a/attribute/set_test.go +++ b/attribute/set_test.go @@ -130,6 +130,106 @@ func TestSetDedup(t *testing.T) { } } +func TestFiltering(t *testing.T) { + a := attribute.String("A", "a") + b := attribute.String("B", "b") + c := attribute.String("C", "c") + + tests := []struct { + name string + in []attribute.KeyValue + filter attribute.Filter + kept, drop []attribute.KeyValue + }{ + { + name: "A", + in: []attribute.KeyValue{a, b, c}, + filter: func(kv attribute.KeyValue) bool { return kv.Key == "A" }, + kept: []attribute.KeyValue{a}, + drop: []attribute.KeyValue{b, c}, + }, + { + name: "B", + in: []attribute.KeyValue{a, b, c}, + filter: func(kv attribute.KeyValue) bool { return kv.Key == "B" }, + kept: []attribute.KeyValue{b}, + drop: []attribute.KeyValue{a, c}, + }, + { + name: "C", + in: []attribute.KeyValue{a, b, c}, + filter: func(kv attribute.KeyValue) bool { return kv.Key == "C" }, + kept: []attribute.KeyValue{c}, + drop: []attribute.KeyValue{a, b}, + }, + { + name: "A||B", + in: []attribute.KeyValue{a, b, c}, + filter: func(kv attribute.KeyValue) bool { + return kv.Key == "A" || kv.Key == "B" + }, + kept: []attribute.KeyValue{a, b}, + drop: []attribute.KeyValue{c}, + }, + { + name: "B||C", + in: []attribute.KeyValue{a, b, c}, + filter: func(kv attribute.KeyValue) bool { + return kv.Key == "B" || kv.Key == "C" + }, + kept: []attribute.KeyValue{b, c}, + drop: []attribute.KeyValue{a}, + }, + { + name: "A||C", + in: []attribute.KeyValue{a, b, c}, + filter: func(kv attribute.KeyValue) bool { + return kv.Key == "A" || kv.Key == "C" + }, + kept: []attribute.KeyValue{a, c}, + drop: []attribute.KeyValue{b}, + }, + { + name: "None", + in: []attribute.KeyValue{a, b, c}, + filter: func(kv attribute.KeyValue) bool { return false }, + kept: nil, + drop: []attribute.KeyValue{a, b, c}, + }, + { + name: "All", + in: []attribute.KeyValue{a, b, c}, + filter: func(kv attribute.KeyValue) bool { return true }, + kept: []attribute.KeyValue{a, b, c}, + drop: nil, + }, + { + name: "Empty", + in: []attribute.KeyValue{}, + filter: func(kv attribute.KeyValue) bool { return true }, + kept: nil, + drop: nil, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + t.Run("NewSetWithFiltered", func(t *testing.T) { + fltr, drop := attribute.NewSetWithFiltered(test.in, test.filter) + assert.Equal(t, test.kept, fltr.ToSlice(), "filtered") + assert.ElementsMatch(t, test.drop, drop, "dropped") + }) + + t.Run("Set.Filter", func(t *testing.T) { + s := attribute.NewSet(test.in...) + fltr, drop := s.Filter(test.filter) + assert.Equal(t, test.kept, fltr.ToSlice(), "filtered") + assert.ElementsMatch(t, test.drop, drop, "dropped") + }) + }) + } +} + func TestUniqueness(t *testing.T) { short := []attribute.KeyValue{ attribute.String("A", "0"), @@ -225,3 +325,45 @@ func args(m reflect.Method) []reflect.Value { } return out } + +func BenchmarkFiltering(b *testing.B) { + var kvs [26]attribute.KeyValue + buf := [1]byte{'A' - 1} + for i := range kvs { + buf[0]++ // A, B, C ... Z + kvs[i] = attribute.String(string(buf[:]), "") + } + + var result struct { + set attribute.Set + dropped []attribute.KeyValue + } + + benchFn := func(fltr attribute.Filter) func(*testing.B) { + return func(b *testing.B) { + b.Helper() + b.Run("Set.Filter", func(b *testing.B) { + s := attribute.NewSet(kvs[:]...) + b.ResetTimer() + b.ReportAllocs() + for n := 0; n < b.N; n++ { + result.set, result.dropped = s.Filter(fltr) + } + }) + + b.Run("NewSetWithFiltered", func(b *testing.B) { + attrs := kvs[:] + b.ResetTimer() + b.ReportAllocs() + for n := 0; n < b.N; n++ { + result.set, result.dropped = attribute.NewSetWithFiltered(attrs, fltr) + } + }) + } + } + + b.Run("NoFilter", benchFn(nil)) + b.Run("NoFiltered", benchFn(func(attribute.KeyValue) bool { return true })) + b.Run("Filtered", benchFn(func(kv attribute.KeyValue) bool { return kv.Key == "A" })) + b.Run("AllDropped", benchFn(func(attribute.KeyValue) bool { return false })) +}