From 4b048001b82d4a0c8ac68943d077c95492852b6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Geyslan=20Greg=C3=B3rio?= Date: Sat, 10 Aug 2024 11:19:54 -0300 Subject: [PATCH] perf(changelog): optimize enforceSizeBoundary These optimizations make the enforceSizeBoundary function more efficient across various scenarios, leading to overall better performance while maintaining the same behaviour (*). Running tool: /home/gg/.goenv/versions/1.22.4/bin/go test -benchmem -run=^$ -tags ebpf -bench ^(BenchmarkEnforceSizeBoundaryOld|BenchmarkEnforceSizeBoundary)$ github.com/aquasecurity/tracee/pkg/changelog -benchtime=100000000x goos: linux goarch: amd64 pkg: github.com/aquasecurity/tracee/pkg/changelog cpu: AMD Ryzen 9 7950X 16-Core Processor | Test Case | Old (ns/op) | New (ns/op) | (%) | |------------------------|-------------|-------------|--------| | No change needed | 1.446 | 1.434 | 0.83% | | Trim excess duplicates | 30.18 | 21.97 | 27.21% | | Remove oldest entries | 27.29 | 21.39 | 21.60% | Since enforceSizeBoundary is called by setAt, the performance improvements will be reflected in the latter: -bench ^(setAtOld|setAt)$ | Test Case | Old (ns/op) | New (ns/op) | (%) | |--------------------|-------------|-------------|--------| | All Scenarios | 1267 | 1163 | 8.21% | | Within Limit | 1770 | 1747 | 1.30% | Tests were fixed and added to ensure the correctness of the implementation. Benchmarks were also added to track the performance improvements. (*) The behaviour of the function is the same, but the implementation fixes a bug that when coalescing duplicate values, the removal of the oldest entry was not being done correctly. Instead of removing the oldest entry, the function was removing the newest one. --- pkg/changelog/changelog.go | 67 +++++-- pkg/changelog/changelog_benchmark_test.go | 233 ++++++++++++++++++++++ pkg/changelog/changelog_test.go | 186 +++++++++++++++-- 3 files changed, 448 insertions(+), 38 deletions(-) create mode 100644 pkg/changelog/changelog_benchmark_test.go diff --git a/pkg/changelog/changelog.go b/pkg/changelog/changelog.go index 7bdb1e10732a..6aef4c269112 100644 --- a/pkg/changelog/changelog.go +++ b/pkg/changelog/changelog.go @@ -1,7 +1,6 @@ package changelog import ( - "slices" "time" "github.com/aquasecurity/tracee/pkg/logger" @@ -165,33 +164,57 @@ func (clv *Changelog[T]) findIndex(target time.Time) int { // enforceSizeBoundary ensures that the size of the inner array doesn't exceed the limit. // It applies two methods to reduce the log size to the maximum allowed: -// 1. Unite duplicate values that are trailing one another. +// 1. Unite duplicate values that are trailing one another, removing the oldest of the pair. // 2. Remove the oldest logs as they are likely less important. func (clv *Changelog[T]) enforceSizeBoundary() { - if len(clv.changes) > clv.maxSize { - // Get rid of oldest changes to keep max size boundary - boundaryDiff := len(clv.changes) - clv.maxSize - - // First try to unite states - clv.changes = slices.CompactFunc(clv.changes, func(i item[T], i2 item[T]) bool { - if i.value == i2.value && boundaryDiff > 0 { - delete(clv.timestamps, i2.timestamp) - boundaryDiff-- - return true - } - return false - }) - - if boundaryDiff == 0 { - return + if len(clv.changes) <= clv.maxSize { + // Nothing to do + return + } + + boundaryDiff := len(clv.changes) - clv.maxSize + changed := false + + // Compact the slice in place + writeIdx := 0 + for readIdx := 0; readIdx < len(clv.changes); readIdx++ { + nextIdx := readIdx + 1 + if nextIdx < len(clv.changes) && + clv.changes[nextIdx].value == clv.changes[readIdx].value && + boundaryDiff > 0 { + // Remove the oldest (readIdx) from the duplicate pair + delete(clv.timestamps, clv.changes[readIdx].timestamp) + boundaryDiff-- + changed = true + continue } - removedChanges := clv.changes[:boundaryDiff] - clv.changes = clv.changes[boundaryDiff:] - for _, removedChange := range removedChanges { - delete(clv.timestamps, removedChange.timestamp) + + // If elements have been removed or moved, update the map and the slice + if changed { + clv.changes[writeIdx] = clv.changes[readIdx] } + + writeIdx++ + } + + if changed { + clear(clv.changes[writeIdx:]) + clv.changes = clv.changes[:writeIdx] + } + + if len(clv.changes) <= clv.maxSize { + // Size is within limits after compaction + return + } + + // As it still exceeds maxSize, remove the oldest entries in the remaining slice + boundaryDiff = len(clv.changes) - clv.maxSize + for i := 0; i < boundaryDiff; i++ { + delete(clv.timestamps, clv.changes[i].timestamp) } + clear(clv.changes[:boundaryDiff]) + clv.changes = clv.changes[boundaryDiff:] } // returnZero returns the zero value of the type T. diff --git a/pkg/changelog/changelog_benchmark_test.go b/pkg/changelog/changelog_benchmark_test.go new file mode 100644 index 000000000000..fff34f286301 --- /dev/null +++ b/pkg/changelog/changelog_benchmark_test.go @@ -0,0 +1,233 @@ +package changelog + +import ( + "testing" + "time" +) + +func Benchmark_enforceSizeBoundary(b *testing.B) { + testCases := []struct { + name string + changelog Changelog[int] + }{ + { + name: "No change needed", + changelog: Changelog[int]{ + changes: []item[int]{ + {value: 1, timestamp: getTimeFromSec(1)}, + {value: 2, timestamp: getTimeFromSec(2)}, + }, + timestamps: map[time.Time]struct{}{ + getTimeFromSec(1): {}, + getTimeFromSec(2): {}, + }, + maxSize: 5, + }, + }, + { + name: "Trim excess with duplicates", + changelog: Changelog[int]{ + changes: []item[int]{ + {value: 1, timestamp: getTimeFromSec(1)}, + {value: 1, timestamp: getTimeFromSec(2)}, + {value: 2, timestamp: getTimeFromSec(3)}, + {value: 3, timestamp: getTimeFromSec(4)}, + {value: 3, timestamp: getTimeFromSec(5)}, + }, + timestamps: map[time.Time]struct{}{ + getTimeFromSec(1): {}, + getTimeFromSec(2): {}, + getTimeFromSec(3): {}, + getTimeFromSec(4): {}, + getTimeFromSec(5): {}, + }, + maxSize: 3, + }, + }, + { + name: "Remove oldest entries", + changelog: Changelog[int]{ + changes: []item[int]{ + {value: 1, timestamp: getTimeFromSec(1)}, + {value: 2, timestamp: getTimeFromSec(2)}, + {value: 3, timestamp: getTimeFromSec(3)}, + {value: 4, timestamp: getTimeFromSec(4)}, + }, + timestamps: map[time.Time]struct{}{ + getTimeFromSec(1): {}, + getTimeFromSec(2): {}, + getTimeFromSec(3): {}, + getTimeFromSec(4): {}, + }, + maxSize: 2, + }, + }, + } + + for _, tc := range testCases { + b.Run(tc.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + clv := tc.changelog // Create a copy for each iteration + clv.enforceSizeBoundary() + } + }) + } +} + +func Benchmark_setAt(b *testing.B) { + // Test cases where the Changelog needs to enforce the size boundary + testCasesAllScenarios := []struct { + value int + time time.Time + }{ + { + value: 42, + time: getTimeFromSec(0), + }, + { + value: 72, + time: getTimeFromSec(1), + }, + { + value: 642, + time: getTimeFromSec(2), + }, + { + value: 672, + time: getTimeFromSec(3), // will trigger removal of oldest entry + }, + { + value: 642, + time: getTimeFromSec(4), // will trigger coalescing of duplicate values + }, + { + value: 672, + time: getTimeFromSec(5), // will trigger coalescing of duplicate values + }, + { + value: 6642, + time: getTimeFromSec(6), // will trigger removal of oldest entry + }, + { + value: 672, + time: getTimeFromSec(7), // will trigger coalescing of duplicate values + }, + { + value: 642, + time: getTimeFromSec(8), // will trigger coalescing of duplicate values + }, + { + value: 6672, + time: getTimeFromSec(9), // will trigger removal of oldest entry + }, + { + value: 9642, + time: getTimeFromSec(10), // will trigger removal of oldest entry + }, + { + value: 0, + time: getTimeFromSec(0), // will just update the value + }, + { + value: 0, + time: getTimeFromSec(1), // will just update the value + }, + { + value: 0, + time: getTimeFromSec(2), // will just update the value + }, + { + value: 0, + time: getTimeFromSec(3), // will just update the value + }, + } + + b.Run("All Scenarios", func(b *testing.B) { + for i := 0; i < b.N; i++ { + b.StopTimer() + clv := NewChangelog[int](3) + b.StartTimer() + for _, tc := range testCasesAllScenarios { + clv.setAt(tc.value, tc.time) + } + } + }) + + // Test cases where the changelog is within the maximum size limit + testCasesWithinLimit := []struct { + value int + time time.Time + }{ + { + value: 0, + time: getTimeFromSec(0), + }, + { + value: 1, + time: getTimeFromSec(1), + }, + { + value: 2, + time: getTimeFromSec(2), + }, + { + value: 3, + time: getTimeFromSec(3), + }, + { + value: 4, + time: getTimeFromSec(4), + }, + { + value: 5, + time: getTimeFromSec(5), + }, + { + value: 6, + time: getTimeFromSec(6), + }, + { + value: 7, + time: getTimeFromSec(7), + }, + { + value: 8, + time: getTimeFromSec(8), + }, + { + value: 9, + time: getTimeFromSec(9), + }, + { + value: 10, + time: getTimeFromSec(10), + }, + { + value: 11, + time: getTimeFromSec(11), + }, + { + value: 12, + time: getTimeFromSec(12), + }, + { + value: 13, + time: getTimeFromSec(13), + }, + { + value: 14, + time: getTimeFromSec(14), + }, + } + + b.Run("Within Limit", func(b *testing.B) { + for i := 0; i < b.N; i++ { + b.StopTimer() + clv := NewChangelog[int](15) + b.StartTimer() + for _, tc := range testCasesWithinLimit { + clv.setAt(tc.value, tc.time) + } + } + }) +} diff --git a/pkg/changelog/changelog_test.go b/pkg/changelog/changelog_test.go index 3ef999683a72..f831aa7e5231 100644 --- a/pkg/changelog/changelog_test.go +++ b/pkg/changelog/changelog_test.go @@ -1,26 +1,25 @@ -package changelog_test +package changelog import ( + "reflect" "testing" "time" "github.com/stretchr/testify/assert" - - "github.com/aquasecurity/tracee/pkg/changelog" ) func TestChangelog(t *testing.T) { t.Parallel() t.Run("GetCurrent on an empty changelog", func(t *testing.T) { - cl := changelog.NewChangelog[int](3) + cl := NewChangelog[int](3) // Test GetCurrent on an empty changelog assert.Zero(t, cl.GetCurrent()) }) t.Run("Set and get", func(t *testing.T) { - cl := changelog.NewChangelog[int](3) + cl := NewChangelog[int](3) testVal := 42 cl.SetCurrent(testVal) @@ -28,7 +27,7 @@ func TestChangelog(t *testing.T) { }) t.Run("Set and get on set time", func(t *testing.T) { - cl := changelog.NewChangelog[int](3) + cl := NewChangelog[int](3) testVal1 := 42 testVal2 := 76 testVal3 := 76 @@ -50,7 +49,7 @@ func TestChangelog(t *testing.T) { }) t.Run("Set twice on the same time", func(t *testing.T) { - cl := changelog.NewChangelog[int](3) + cl := NewChangelog[int](3) testVal := 42 now := time.Now() @@ -62,13 +61,13 @@ func TestChangelog(t *testing.T) { }) t.Run("Get on an empty changelog", func(t *testing.T) { - cl := changelog.NewChangelog[int](3) + cl := NewChangelog[int](3) assert.Zero(t, cl.GetCurrent()) }) t.Run("Test 1 second interval among changes", func(t *testing.T) { - cl := changelog.NewChangelog[int](3) + cl := NewChangelog[int](3) cl.SetCurrent(1) time.Sleep(2 * time.Second) @@ -84,7 +83,7 @@ func TestChangelog(t *testing.T) { }) t.Run("Test 100 milliseconds interval among changes", func(t *testing.T) { - cl := changelog.NewChangelog[int](3) + cl := NewChangelog[int](3) cl.SetCurrent(1) time.Sleep(100 * time.Millisecond) @@ -100,7 +99,7 @@ func TestChangelog(t *testing.T) { }) t.Run("Test getting all values at once", func(t *testing.T) { - cl := changelog.NewChangelog[int](3) + cl := NewChangelog[int](3) cl.SetCurrent(1) time.Sleep(100 * time.Millisecond) @@ -113,7 +112,7 @@ func TestChangelog(t *testing.T) { }) t.Run("Pass max size wit repeated values", func(t *testing.T) { - cl := changelog.NewChangelog[int](3) + cl := NewChangelog[int](3) cl.SetCurrent(1) time.Sleep(100 * time.Millisecond) @@ -125,14 +124,14 @@ func TestChangelog(t *testing.T) { now := time.Now() assert.Equal(t, 1, cl.Get(now.Add(-300*time.Millisecond))) - assert.Equal(t, 2, cl.Get(now.Add(-200*time.Millisecond))) + assert.Equal(t, 1, cl.Get(now.Add(-200*time.Millisecond))) // oldest 2 is removed, so 1 is returned assert.Equal(t, 2, cl.Get(now.Add(-100*time.Millisecond))) assert.Equal(t, 3, cl.Get(now)) assert.Len(t, cl.GetAll(), 3) }) t.Run("Pass max size with unique values", func(t *testing.T) { - cl := changelog.NewChangelog[int](3) + cl := NewChangelog[int](3) cl.SetCurrent(1) time.Sleep(100 * time.Millisecond) @@ -151,7 +150,7 @@ func TestChangelog(t *testing.T) { }) t.Run("Pass max size with new old value", func(t *testing.T) { - cl := changelog.NewChangelog[int](3) + cl := NewChangelog[int](3) cl.SetCurrent(1) time.Sleep(100 * time.Millisecond) @@ -173,7 +172,7 @@ func TestChangelog(t *testing.T) { }) t.Run("Zero sized changelog", func(t *testing.T) { - cl := changelog.NewChangelog[int](0) + cl := NewChangelog[int](0) cl.SetCurrent(1) time.Sleep(100 * time.Millisecond) @@ -193,4 +192,159 @@ func TestChangelog(t *testing.T) { assert.Equal(t, 0, cl.Get(now)) assert.Empty(t, cl.GetAll()) }) + + t.Run("Test enforceSizeBoundary", func(t *testing.T) { + type TestCase struct { + name string + maxSize int + initialChanges []item[int] + expectedChanges []item[int] + expectedTimestamps map[time.Time]struct{} + } + + testCases := []TestCase{ + { + name: "No Action Required", + maxSize: 3, + initialChanges: []item[int]{ + {timestamp: getTimeFromSec(42), value: 1}, + {timestamp: getTimeFromSec(43), value: 2}, + {timestamp: getTimeFromSec(44), value: 3}, + }, + expectedChanges: []item[int]{ + {timestamp: getTimeFromSec(42), value: 1}, + {timestamp: getTimeFromSec(43), value: 2}, + {timestamp: getTimeFromSec(44), value: 3}, + }, + expectedTimestamps: map[time.Time]struct{}{ + getTimeFromSec(42): {}, + getTimeFromSec(43): {}, + getTimeFromSec(44): {}, + }, + }, + { + name: "Basic Removal of Oldest Entries", + maxSize: 3, + initialChanges: []item[int]{ + {timestamp: getTimeFromSec(42), value: 1}, + {timestamp: getTimeFromSec(43), value: 2}, + {timestamp: getTimeFromSec(44), value: 3}, + {timestamp: getTimeFromSec(45), value: 4}, + }, + expectedChanges: []item[int]{ + {timestamp: getTimeFromSec(43), value: 2}, + {timestamp: getTimeFromSec(44), value: 3}, + {timestamp: getTimeFromSec(45), value: 4}, + }, + expectedTimestamps: map[time.Time]struct{}{ + getTimeFromSec(43): {}, + getTimeFromSec(44): {}, + getTimeFromSec(45): {}, + }, + }, + { + name: "Compacting Duplicate Values - Start", + maxSize: 3, + initialChanges: []item[int]{ + {timestamp: getTimeFromSec(42), value: 1}, + {timestamp: getTimeFromSec(43), value: 1}, + {timestamp: getTimeFromSec(44), value: 2}, + {timestamp: getTimeFromSec(45), value: 3}, + }, + expectedChanges: []item[int]{ + {timestamp: getTimeFromSec(43), value: 1}, + {timestamp: getTimeFromSec(44), value: 2}, + {timestamp: getTimeFromSec(45), value: 3}, + }, + expectedTimestamps: map[time.Time]struct{}{ + getTimeFromSec(43): {}, + getTimeFromSec(44): {}, + getTimeFromSec(45): {}, + }, + }, + { + name: "Compacting Duplicate Values - Middle", + maxSize: 3, + initialChanges: []item[int]{ + {timestamp: getTimeFromSec(42), value: 1}, + {timestamp: getTimeFromSec(43), value: 2}, + {timestamp: getTimeFromSec(44), value: 2}, + {timestamp: getTimeFromSec(45), value: 3}, + }, + expectedChanges: []item[int]{ + {timestamp: getTimeFromSec(42), value: 1}, + {timestamp: getTimeFromSec(44), value: 2}, + {timestamp: getTimeFromSec(45), value: 3}, + }, + expectedTimestamps: map[time.Time]struct{}{ + getTimeFromSec(42): {}, + getTimeFromSec(44): {}, + getTimeFromSec(45): {}, + }, + }, + { + name: "Compacting Duplicate Values - End", + maxSize: 3, + initialChanges: []item[int]{ + {timestamp: getTimeFromSec(42), value: 1}, + {timestamp: getTimeFromSec(43), value: 2}, + {timestamp: getTimeFromSec(44), value: 3}, + {timestamp: getTimeFromSec(45), value: 3}, + }, + expectedChanges: []item[int]{ + {timestamp: getTimeFromSec(42), value: 1}, + {timestamp: getTimeFromSec(43), value: 2}, + {timestamp: getTimeFromSec(45), value: 3}, + }, + expectedTimestamps: map[time.Time]struct{}{ + getTimeFromSec(42): {}, + getTimeFromSec(43): {}, + getTimeFromSec(45): {}, + }, + }, + { + name: "Combination of Compaction and Removal of Oldest Entries", + maxSize: 3, + initialChanges: []item[int]{ + {timestamp: getTimeFromSec(42), value: 1}, + {timestamp: getTimeFromSec(43), value: 2}, + {timestamp: getTimeFromSec(44), value: 2}, + {timestamp: getTimeFromSec(45), value: 2}, + {timestamp: getTimeFromSec(46), value: 3}, + {timestamp: getTimeFromSec(47), value: 4}, + }, + expectedChanges: []item[int]{ + {timestamp: getTimeFromSec(45), value: 2}, + {timestamp: getTimeFromSec(46), value: 3}, + {timestamp: getTimeFromSec(47), value: 4}, + }, + expectedTimestamps: map[time.Time]struct{}{ + getTimeFromSec(45): {}, + getTimeFromSec(46): {}, + getTimeFromSec(47): {}, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + cl := NewChangelog[int](tc.maxSize) + for _, change := range tc.initialChanges { + cl.Set(change.value, change.timestamp) + } + + cl.enforceSizeBoundary() + + eq := reflect.DeepEqual(cl.timestamps, tc.expectedTimestamps) + assert.True(t, eq) + + eq = reflect.DeepEqual(cl.changes, tc.expectedChanges) + assert.True(t, eq) + }) + } + }) +} + +func getTimeFromSec(second int) time.Time { + return time.Unix(int64(second), 0) }